diff options
| author | Herbert Xu <herbert@gondor.apana.org.au> | 2013-09-06 22:53:35 -0400 |
|---|---|---|
| committer | Herbert Xu <herbert@gondor.apana.org.au> | 2013-09-06 22:53:35 -0400 |
| commit | eeca9fad52fc4bfdf42c38bfcf383e932eb3e9d6 (patch) | |
| tree | cc51c880459d41c0e8d7576405bef4c987bc7aa0 /kernel | |
| parent | ff6f83fc9d44db09997937c3475d525a6866fbb4 (diff) | |
| parent | b48a97be8e6c2afdba2f3b61fd88c3c7743fbd73 (diff) | |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux
Merge upstream tree in order to reinstate crct10dif.
Diffstat (limited to 'kernel')
69 files changed, 2886 insertions, 1908 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 271fd3119af9..470839d1a30e 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
| @@ -9,7 +9,7 @@ obj-y = fork.o exec_domain.o panic.o printk.o \ | |||
| 9 | rcupdate.o extable.o params.o posix-timers.o \ | 9 | rcupdate.o extable.o params.o posix-timers.o \ |
| 10 | kthread.o wait.o sys_ni.o posix-cpu-timers.o mutex.o \ | 10 | kthread.o wait.o sys_ni.o posix-cpu-timers.o mutex.o \ |
| 11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ | 11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ |
| 12 | notifier.o ksysfs.o cred.o \ | 12 | notifier.o ksysfs.o cred.o reboot.o \ |
| 13 | async.o range.o groups.o lglock.o smpboot.o | 13 | async.o range.o groups.o lglock.o smpboot.o |
| 14 | 14 | ||
| 15 | ifdef CONFIG_FUNCTION_TRACER | 15 | ifdef CONFIG_FUNCTION_TRACER |
diff --git a/kernel/audit.h b/kernel/audit.h index 1c95131ef760..123c9b7c3979 100644 --- a/kernel/audit.h +++ b/kernel/audit.h | |||
| @@ -85,6 +85,7 @@ struct audit_names { | |||
| 85 | 85 | ||
| 86 | struct filename *name; | 86 | struct filename *name; |
| 87 | int name_len; /* number of chars to log */ | 87 | int name_len; /* number of chars to log */ |
| 88 | bool hidden; /* don't log this record */ | ||
| 88 | bool name_put; /* call __putname()? */ | 89 | bool name_put; /* call __putname()? */ |
| 89 | 90 | ||
| 90 | unsigned long ino; | 91 | unsigned long ino; |
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 6bd4a90d1991..f7aee8be7fb2 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c | |||
| @@ -423,7 +423,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, | |||
| 423 | f->lsm_rule = NULL; | 423 | f->lsm_rule = NULL; |
| 424 | 424 | ||
| 425 | /* Support legacy tests for a valid loginuid */ | 425 | /* Support legacy tests for a valid loginuid */ |
| 426 | if ((f->type == AUDIT_LOGINUID) && (f->val == 4294967295)) { | 426 | if ((f->type == AUDIT_LOGINUID) && (f->val == ~0U)) { |
| 427 | f->type = AUDIT_LOGINUID_SET; | 427 | f->type = AUDIT_LOGINUID_SET; |
| 428 | f->val = 0; | 428 | f->val = 0; |
| 429 | } | 429 | } |
| @@ -865,6 +865,12 @@ static inline int audit_add_rule(struct audit_entry *entry) | |||
| 865 | err = audit_add_watch(&entry->rule, &list); | 865 | err = audit_add_watch(&entry->rule, &list); |
| 866 | if (err) { | 866 | if (err) { |
| 867 | mutex_unlock(&audit_filter_mutex); | 867 | mutex_unlock(&audit_filter_mutex); |
| 868 | /* | ||
| 869 | * normally audit_add_tree_rule() will free it | ||
| 870 | * on failure | ||
| 871 | */ | ||
| 872 | if (tree) | ||
| 873 | audit_put_tree(tree); | ||
| 868 | goto error; | 874 | goto error; |
| 869 | } | 875 | } |
| 870 | } | 876 | } |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 3c8a601324a2..9845cb32b60a 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
| @@ -1399,8 +1399,11 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
| 1399 | } | 1399 | } |
| 1400 | 1400 | ||
| 1401 | i = 0; | 1401 | i = 0; |
| 1402 | list_for_each_entry(n, &context->names_list, list) | 1402 | list_for_each_entry(n, &context->names_list, list) { |
| 1403 | if (n->hidden) | ||
| 1404 | continue; | ||
| 1403 | audit_log_name(context, n, NULL, i++, &call_panic); | 1405 | audit_log_name(context, n, NULL, i++, &call_panic); |
| 1406 | } | ||
| 1404 | 1407 | ||
| 1405 | /* Send end of event record to help user space know we are finished */ | 1408 | /* Send end of event record to help user space know we are finished */ |
| 1406 | ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE); | 1409 | ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE); |
| @@ -1769,14 +1772,15 @@ void audit_putname(struct filename *name) | |||
| 1769 | * __audit_inode - store the inode and device from a lookup | 1772 | * __audit_inode - store the inode and device from a lookup |
| 1770 | * @name: name being audited | 1773 | * @name: name being audited |
| 1771 | * @dentry: dentry being audited | 1774 | * @dentry: dentry being audited |
| 1772 | * @parent: does this dentry represent the parent? | 1775 | * @flags: attributes for this particular entry |
| 1773 | */ | 1776 | */ |
| 1774 | void __audit_inode(struct filename *name, const struct dentry *dentry, | 1777 | void __audit_inode(struct filename *name, const struct dentry *dentry, |
| 1775 | unsigned int parent) | 1778 | unsigned int flags) |
| 1776 | { | 1779 | { |
| 1777 | struct audit_context *context = current->audit_context; | 1780 | struct audit_context *context = current->audit_context; |
| 1778 | const struct inode *inode = dentry->d_inode; | 1781 | const struct inode *inode = dentry->d_inode; |
| 1779 | struct audit_names *n; | 1782 | struct audit_names *n; |
| 1783 | bool parent = flags & AUDIT_INODE_PARENT; | ||
| 1780 | 1784 | ||
| 1781 | if (!context->in_syscall) | 1785 | if (!context->in_syscall) |
| 1782 | return; | 1786 | return; |
| @@ -1831,6 +1835,8 @@ out: | |||
| 1831 | if (parent) { | 1835 | if (parent) { |
| 1832 | n->name_len = n->name ? parent_len(n->name->name) : AUDIT_NAME_FULL; | 1836 | n->name_len = n->name ? parent_len(n->name->name) : AUDIT_NAME_FULL; |
| 1833 | n->type = AUDIT_TYPE_PARENT; | 1837 | n->type = AUDIT_TYPE_PARENT; |
| 1838 | if (flags & AUDIT_INODE_HIDDEN) | ||
| 1839 | n->hidden = true; | ||
| 1834 | } else { | 1840 | } else { |
| 1835 | n->name_len = AUDIT_NAME_FULL; | 1841 | n->name_len = AUDIT_NAME_FULL; |
| 1836 | n->type = AUDIT_TYPE_NORMAL; | 1842 | n->type = AUDIT_TYPE_NORMAL; |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e5583d10a325..789ec4683db3 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -802,7 +802,6 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task, | |||
| 802 | */ | 802 | */ |
| 803 | 803 | ||
| 804 | static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); | 804 | static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); |
| 805 | static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int); | ||
| 806 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); | 805 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); |
| 807 | static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files, | 806 | static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files, |
| 808 | unsigned long subsys_mask); | 807 | unsigned long subsys_mask); |
| @@ -1846,36 +1845,43 @@ out: | |||
| 1846 | EXPORT_SYMBOL_GPL(cgroup_path); | 1845 | EXPORT_SYMBOL_GPL(cgroup_path); |
| 1847 | 1846 | ||
| 1848 | /** | 1847 | /** |
| 1849 | * task_cgroup_path_from_hierarchy - cgroup path of a task on a hierarchy | 1848 | * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy |
| 1850 | * @task: target task | 1849 | * @task: target task |
| 1851 | * @hierarchy_id: the hierarchy to look up @task's cgroup from | ||
| 1852 | * @buf: the buffer to write the path into | 1850 | * @buf: the buffer to write the path into |
| 1853 | * @buflen: the length of the buffer | 1851 | * @buflen: the length of the buffer |
| 1854 | * | 1852 | * |
| 1855 | * Determine @task's cgroup on the hierarchy specified by @hierarchy_id and | 1853 | * Determine @task's cgroup on the first (the one with the lowest non-zero |
| 1856 | * copy its path into @buf. This function grabs cgroup_mutex and shouldn't | 1854 | * hierarchy_id) cgroup hierarchy and copy its path into @buf. This |
| 1857 | * be used inside locks used by cgroup controller callbacks. | 1855 | * function grabs cgroup_mutex and shouldn't be used inside locks used by |
| 1856 | * cgroup controller callbacks. | ||
| 1857 | * | ||
| 1858 | * Returns 0 on success, fails with -%ENAMETOOLONG if @buflen is too short. | ||
| 1858 | */ | 1859 | */ |
| 1859 | int task_cgroup_path_from_hierarchy(struct task_struct *task, int hierarchy_id, | 1860 | int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen) |
| 1860 | char *buf, size_t buflen) | ||
| 1861 | { | 1861 | { |
| 1862 | struct cgroupfs_root *root; | 1862 | struct cgroupfs_root *root; |
| 1863 | struct cgroup *cgrp = NULL; | 1863 | struct cgroup *cgrp; |
| 1864 | int ret = -ENOENT; | 1864 | int hierarchy_id = 1, ret = 0; |
| 1865 | |||
| 1866 | if (buflen < 2) | ||
| 1867 | return -ENAMETOOLONG; | ||
| 1865 | 1868 | ||
| 1866 | mutex_lock(&cgroup_mutex); | 1869 | mutex_lock(&cgroup_mutex); |
| 1867 | 1870 | ||
| 1868 | root = idr_find(&cgroup_hierarchy_idr, hierarchy_id); | 1871 | root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id); |
| 1872 | |||
| 1869 | if (root) { | 1873 | if (root) { |
| 1870 | cgrp = task_cgroup_from_root(task, root); | 1874 | cgrp = task_cgroup_from_root(task, root); |
| 1871 | ret = cgroup_path(cgrp, buf, buflen); | 1875 | ret = cgroup_path(cgrp, buf, buflen); |
| 1876 | } else { | ||
| 1877 | /* if no hierarchy exists, everyone is in "/" */ | ||
| 1878 | memcpy(buf, "/", 2); | ||
| 1872 | } | 1879 | } |
| 1873 | 1880 | ||
| 1874 | mutex_unlock(&cgroup_mutex); | 1881 | mutex_unlock(&cgroup_mutex); |
| 1875 | |||
| 1876 | return ret; | 1882 | return ret; |
| 1877 | } | 1883 | } |
| 1878 | EXPORT_SYMBOL_GPL(task_cgroup_path_from_hierarchy); | 1884 | EXPORT_SYMBOL_GPL(task_cgroup_path); |
| 1879 | 1885 | ||
| 1880 | /* | 1886 | /* |
| 1881 | * Control Group taskset | 1887 | * Control Group taskset |
| @@ -2642,7 +2648,7 @@ static const struct inode_operations cgroup_file_inode_operations = { | |||
| 2642 | }; | 2648 | }; |
| 2643 | 2649 | ||
| 2644 | static const struct inode_operations cgroup_dir_inode_operations = { | 2650 | static const struct inode_operations cgroup_dir_inode_operations = { |
| 2645 | .lookup = cgroup_lookup, | 2651 | .lookup = simple_lookup, |
| 2646 | .mkdir = cgroup_mkdir, | 2652 | .mkdir = cgroup_mkdir, |
| 2647 | .rmdir = cgroup_rmdir, | 2653 | .rmdir = cgroup_rmdir, |
| 2648 | .rename = cgroup_rename, | 2654 | .rename = cgroup_rename, |
| @@ -2652,14 +2658,6 @@ static const struct inode_operations cgroup_dir_inode_operations = { | |||
| 2652 | .removexattr = cgroup_removexattr, | 2658 | .removexattr = cgroup_removexattr, |
| 2653 | }; | 2659 | }; |
| 2654 | 2660 | ||
| 2655 | static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) | ||
| 2656 | { | ||
| 2657 | if (dentry->d_name.len > NAME_MAX) | ||
| 2658 | return ERR_PTR(-ENAMETOOLONG); | ||
| 2659 | d_add(dentry, NULL); | ||
| 2660 | return NULL; | ||
| 2661 | } | ||
| 2662 | |||
| 2663 | /* | 2661 | /* |
| 2664 | * Check if a file is a control file | 2662 | * Check if a file is a control file |
| 2665 | */ | 2663 | */ |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 198a38883e64..b2b227b82123 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
| @@ -366,7 +366,7 @@ EXPORT_SYMBOL(cpu_down); | |||
| 366 | #endif /*CONFIG_HOTPLUG_CPU*/ | 366 | #endif /*CONFIG_HOTPLUG_CPU*/ |
| 367 | 367 | ||
| 368 | /* Requires cpu_add_remove_lock to be held */ | 368 | /* Requires cpu_add_remove_lock to be held */ |
| 369 | static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) | 369 | static int _cpu_up(unsigned int cpu, int tasks_frozen) |
| 370 | { | 370 | { |
| 371 | int ret, nr_calls = 0; | 371 | int ret, nr_calls = 0; |
| 372 | void *hcpu = (void *)(long)cpu; | 372 | void *hcpu = (void *)(long)cpu; |
| @@ -419,7 +419,7 @@ out: | |||
| 419 | return ret; | 419 | return ret; |
| 420 | } | 420 | } |
| 421 | 421 | ||
| 422 | int __cpuinit cpu_up(unsigned int cpu) | 422 | int cpu_up(unsigned int cpu) |
| 423 | { | 423 | { |
| 424 | int err = 0; | 424 | int err = 0; |
| 425 | 425 | ||
| @@ -618,7 +618,7 @@ core_initcall(cpu_hotplug_pm_sync_init); | |||
| 618 | * It must be called by the arch code on the new cpu, before the new cpu | 618 | * It must be called by the arch code on the new cpu, before the new cpu |
| 619 | * enables interrupts and before the "boot" cpu returns from __cpu_up(). | 619 | * enables interrupts and before the "boot" cpu returns from __cpu_up(). |
| 620 | */ | 620 | */ |
| 621 | void __cpuinit notify_cpu_starting(unsigned int cpu) | 621 | void notify_cpu_starting(unsigned int cpu) |
| 622 | { | 622 | { |
| 623 | unsigned long val = CPU_STARTING; | 623 | unsigned long val = CPU_STARTING; |
| 624 | 624 | ||
diff --git a/kernel/events/core.c b/kernel/events/core.c index 1db3af933704..f86599e8c123 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
| @@ -182,7 +182,7 @@ void update_perf_cpu_limits(void) | |||
| 182 | u64 tmp = perf_sample_period_ns; | 182 | u64 tmp = perf_sample_period_ns; |
| 183 | 183 | ||
| 184 | tmp *= sysctl_perf_cpu_time_max_percent; | 184 | tmp *= sysctl_perf_cpu_time_max_percent; |
| 185 | tmp = do_div(tmp, 100); | 185 | do_div(tmp, 100); |
| 186 | atomic_set(&perf_sample_allowed_ns, tmp); | 186 | atomic_set(&perf_sample_allowed_ns, tmp); |
| 187 | } | 187 | } |
| 188 | 188 | ||
| @@ -232,7 +232,7 @@ DEFINE_PER_CPU(u64, running_sample_length); | |||
| 232 | void perf_sample_event_took(u64 sample_len_ns) | 232 | void perf_sample_event_took(u64 sample_len_ns) |
| 233 | { | 233 | { |
| 234 | u64 avg_local_sample_len; | 234 | u64 avg_local_sample_len; |
| 235 | u64 local_samples_len = __get_cpu_var(running_sample_length); | 235 | u64 local_samples_len; |
| 236 | 236 | ||
| 237 | if (atomic_read(&perf_sample_allowed_ns) == 0) | 237 | if (atomic_read(&perf_sample_allowed_ns) == 0) |
| 238 | return; | 238 | return; |
| @@ -947,8 +947,18 @@ perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags) | |||
| 947 | { | 947 | { |
| 948 | struct perf_event_context *ctx; | 948 | struct perf_event_context *ctx; |
| 949 | 949 | ||
| 950 | rcu_read_lock(); | ||
| 951 | retry: | 950 | retry: |
| 951 | /* | ||
| 952 | * One of the few rules of preemptible RCU is that one cannot do | ||
| 953 | * rcu_read_unlock() while holding a scheduler (or nested) lock when | ||
| 954 | * part of the read side critical section was preemptible -- see | ||
| 955 | * rcu_read_unlock_special(). | ||
| 956 | * | ||
| 957 | * Since ctx->lock nests under rq->lock we must ensure the entire read | ||
| 958 | * side critical section is non-preemptible. | ||
| 959 | */ | ||
| 960 | preempt_disable(); | ||
| 961 | rcu_read_lock(); | ||
| 952 | ctx = rcu_dereference(task->perf_event_ctxp[ctxn]); | 962 | ctx = rcu_dereference(task->perf_event_ctxp[ctxn]); |
| 953 | if (ctx) { | 963 | if (ctx) { |
| 954 | /* | 964 | /* |
| @@ -964,6 +974,8 @@ retry: | |||
| 964 | raw_spin_lock_irqsave(&ctx->lock, *flags); | 974 | raw_spin_lock_irqsave(&ctx->lock, *flags); |
| 965 | if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) { | 975 | if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) { |
| 966 | raw_spin_unlock_irqrestore(&ctx->lock, *flags); | 976 | raw_spin_unlock_irqrestore(&ctx->lock, *flags); |
| 977 | rcu_read_unlock(); | ||
| 978 | preempt_enable(); | ||
| 967 | goto retry; | 979 | goto retry; |
| 968 | } | 980 | } |
| 969 | 981 | ||
| @@ -973,6 +985,7 @@ retry: | |||
| 973 | } | 985 | } |
| 974 | } | 986 | } |
| 975 | rcu_read_unlock(); | 987 | rcu_read_unlock(); |
| 988 | preempt_enable(); | ||
| 976 | return ctx; | 989 | return ctx; |
| 977 | } | 990 | } |
| 978 | 991 | ||
| @@ -1950,7 +1963,16 @@ static int __perf_event_enable(void *info) | |||
| 1950 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | 1963 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); |
| 1951 | int err; | 1964 | int err; |
| 1952 | 1965 | ||
| 1953 | if (WARN_ON_ONCE(!ctx->is_active)) | 1966 | /* |
| 1967 | * There's a time window between 'ctx->is_active' check | ||
| 1968 | * in perf_event_enable function and this place having: | ||
| 1969 | * - IRQs on | ||
| 1970 | * - ctx->lock unlocked | ||
| 1971 | * | ||
| 1972 | * where the task could be killed and 'ctx' deactivated | ||
| 1973 | * by perf_event_exit_task. | ||
| 1974 | */ | ||
| 1975 | if (!ctx->is_active) | ||
| 1954 | return -EINVAL; | 1976 | return -EINVAL; |
| 1955 | 1977 | ||
| 1956 | raw_spin_lock(&ctx->lock); | 1978 | raw_spin_lock(&ctx->lock); |
| @@ -6212,8 +6234,6 @@ perf_event_mux_interval_ms_store(struct device *dev, | |||
| 6212 | return count; | 6234 | return count; |
| 6213 | } | 6235 | } |
| 6214 | 6236 | ||
| 6215 | #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) | ||
| 6216 | |||
| 6217 | static struct device_attribute pmu_dev_attrs[] = { | 6237 | static struct device_attribute pmu_dev_attrs[] = { |
| 6218 | __ATTR_RO(type), | 6238 | __ATTR_RO(type), |
| 6219 | __ATTR_RW(perf_event_mux_interval_ms), | 6239 | __ATTR_RW(perf_event_mux_interval_ms), |
| @@ -7465,7 +7485,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent, | |||
| 7465 | * child. | 7485 | * child. |
| 7466 | */ | 7486 | */ |
| 7467 | 7487 | ||
| 7468 | child_ctx = alloc_perf_context(event->pmu, child); | 7488 | child_ctx = alloc_perf_context(parent_ctx->pmu, child); |
| 7469 | if (!child_ctx) | 7489 | if (!child_ctx) |
| 7470 | return -ENOMEM; | 7490 | return -ENOMEM; |
| 7471 | 7491 | ||
| @@ -7608,7 +7628,7 @@ static void __init perf_event_init_all_cpus(void) | |||
| 7608 | } | 7628 | } |
| 7609 | } | 7629 | } |
| 7610 | 7630 | ||
| 7611 | static void __cpuinit perf_event_init_cpu(int cpu) | 7631 | static void perf_event_init_cpu(int cpu) |
| 7612 | { | 7632 | { |
| 7613 | struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); | 7633 | struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); |
| 7614 | 7634 | ||
| @@ -7697,7 +7717,7 @@ static struct notifier_block perf_reboot_notifier = { | |||
| 7697 | .priority = INT_MIN, | 7717 | .priority = INT_MIN, |
| 7698 | }; | 7718 | }; |
| 7699 | 7719 | ||
| 7700 | static int __cpuinit | 7720 | static int |
| 7701 | perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) | 7721 | perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) |
| 7702 | { | 7722 | { |
| 7703 | unsigned int cpu = (long)hcpu; | 7723 | unsigned int cpu = (long)hcpu; |
diff --git a/kernel/exit.c b/kernel/exit.c index fafe75d9e6f6..a949819055d5 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -808,7 +808,7 @@ void do_exit(long code) | |||
| 808 | /* | 808 | /* |
| 809 | * FIXME: do that only when needed, using sched_exit tracepoint | 809 | * FIXME: do that only when needed, using sched_exit tracepoint |
| 810 | */ | 810 | */ |
| 811 | ptrace_put_breakpoints(tsk); | 811 | flush_ptrace_hw_breakpoint(tsk); |
| 812 | 812 | ||
| 813 | exit_notify(tsk, group_dead); | 813 | exit_notify(tsk, group_dead); |
| 814 | #ifdef CONFIG_NUMA | 814 | #ifdef CONFIG_NUMA |
diff --git a/kernel/fork.c b/kernel/fork.c index 6e6a1c11b3e5..403d2bb8a968 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -365,8 +365,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
| 365 | mm->locked_vm = 0; | 365 | mm->locked_vm = 0; |
| 366 | mm->mmap = NULL; | 366 | mm->mmap = NULL; |
| 367 | mm->mmap_cache = NULL; | 367 | mm->mmap_cache = NULL; |
| 368 | mm->free_area_cache = oldmm->mmap_base; | ||
| 369 | mm->cached_hole_size = ~0UL; | ||
| 370 | mm->map_count = 0; | 368 | mm->map_count = 0; |
| 371 | cpumask_clear(mm_cpumask(mm)); | 369 | cpumask_clear(mm_cpumask(mm)); |
| 372 | mm->mm_rb = RB_ROOT; | 370 | mm->mm_rb = RB_ROOT; |
| @@ -540,8 +538,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) | |||
| 540 | mm->nr_ptes = 0; | 538 | mm->nr_ptes = 0; |
| 541 | memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); | 539 | memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); |
| 542 | spin_lock_init(&mm->page_table_lock); | 540 | spin_lock_init(&mm->page_table_lock); |
| 543 | mm->free_area_cache = TASK_UNMAPPED_BASE; | ||
| 544 | mm->cached_hole_size = ~0UL; | ||
| 545 | mm_init_aio(mm); | 541 | mm_init_aio(mm); |
| 546 | mm_init_owner(mm, p); | 542 | mm_init_owner(mm, p); |
| 547 | 543 | ||
| @@ -1550,7 +1546,7 @@ static inline void init_idle_pids(struct pid_link *links) | |||
| 1550 | } | 1546 | } |
| 1551 | } | 1547 | } |
| 1552 | 1548 | ||
| 1553 | struct task_struct * __cpuinit fork_idle(int cpu) | 1549 | struct task_struct *fork_idle(int cpu) |
| 1554 | { | 1550 | { |
| 1555 | struct task_struct *task; | 1551 | struct task_struct *task; |
| 1556 | task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0); | 1552 | task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0); |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 3ee4d06c6fc2..383319bae3f7 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
| @@ -722,17 +722,20 @@ static int hrtimer_switch_to_hres(void) | |||
| 722 | return 1; | 722 | return 1; |
| 723 | } | 723 | } |
| 724 | 724 | ||
| 725 | static void clock_was_set_work(struct work_struct *work) | ||
| 726 | { | ||
| 727 | clock_was_set(); | ||
| 728 | } | ||
| 729 | |||
| 730 | static DECLARE_WORK(hrtimer_work, clock_was_set_work); | ||
| 731 | |||
| 725 | /* | 732 | /* |
| 726 | * Called from timekeeping code to reprogramm the hrtimer interrupt | 733 | * Called from timekeeping and resume code to reprogramm the hrtimer |
| 727 | * device. If called from the timer interrupt context we defer it to | 734 | * interrupt device on all cpus. |
| 728 | * softirq context. | ||
| 729 | */ | 735 | */ |
| 730 | void clock_was_set_delayed(void) | 736 | void clock_was_set_delayed(void) |
| 731 | { | 737 | { |
| 732 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); | 738 | schedule_work(&hrtimer_work); |
| 733 | |||
| 734 | cpu_base->clock_was_set = 1; | ||
| 735 | __raise_softirq_irqoff(HRTIMER_SOFTIRQ); | ||
| 736 | } | 739 | } |
| 737 | 740 | ||
| 738 | #else | 741 | #else |
| @@ -774,15 +777,19 @@ void clock_was_set(void) | |||
| 774 | 777 | ||
| 775 | /* | 778 | /* |
| 776 | * During resume we might have to reprogram the high resolution timer | 779 | * During resume we might have to reprogram the high resolution timer |
| 777 | * interrupt (on the local CPU): | 780 | * interrupt on all online CPUs. However, all other CPUs will be |
| 781 | * stopped with IRQs interrupts disabled so the clock_was_set() call | ||
| 782 | * must be deferred. | ||
| 778 | */ | 783 | */ |
| 779 | void hrtimers_resume(void) | 784 | void hrtimers_resume(void) |
| 780 | { | 785 | { |
| 781 | WARN_ONCE(!irqs_disabled(), | 786 | WARN_ONCE(!irqs_disabled(), |
| 782 | KERN_INFO "hrtimers_resume() called with IRQs enabled!"); | 787 | KERN_INFO "hrtimers_resume() called with IRQs enabled!"); |
| 783 | 788 | ||
| 789 | /* Retrigger on the local CPU */ | ||
| 784 | retrigger_next_event(NULL); | 790 | retrigger_next_event(NULL); |
| 785 | timerfd_clock_was_set(); | 791 | /* And schedule a retrigger for all others */ |
| 792 | clock_was_set_delayed(); | ||
| 786 | } | 793 | } |
| 787 | 794 | ||
| 788 | static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) | 795 | static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) |
| @@ -1433,13 +1440,6 @@ void hrtimer_peek_ahead_timers(void) | |||
| 1433 | 1440 | ||
| 1434 | static void run_hrtimer_softirq(struct softirq_action *h) | 1441 | static void run_hrtimer_softirq(struct softirq_action *h) |
| 1435 | { | 1442 | { |
| 1436 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); | ||
| 1437 | |||
| 1438 | if (cpu_base->clock_was_set) { | ||
| 1439 | cpu_base->clock_was_set = 0; | ||
| 1440 | clock_was_set(); | ||
| 1441 | } | ||
| 1442 | |||
| 1443 | hrtimer_peek_ahead_timers(); | 1443 | hrtimer_peek_ahead_timers(); |
| 1444 | } | 1444 | } |
| 1445 | 1445 | ||
| @@ -1659,7 +1659,7 @@ SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, | |||
| 1659 | /* | 1659 | /* |
| 1660 | * Functions related to boot-time initialization: | 1660 | * Functions related to boot-time initialization: |
| 1661 | */ | 1661 | */ |
| 1662 | static void __cpuinit init_hrtimers_cpu(int cpu) | 1662 | static void init_hrtimers_cpu(int cpu) |
| 1663 | { | 1663 | { |
| 1664 | struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); | 1664 | struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); |
| 1665 | int i; | 1665 | int i; |
| @@ -1740,7 +1740,7 @@ static void migrate_hrtimers(int scpu) | |||
| 1740 | 1740 | ||
| 1741 | #endif /* CONFIG_HOTPLUG_CPU */ | 1741 | #endif /* CONFIG_HOTPLUG_CPU */ |
| 1742 | 1742 | ||
| 1743 | static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, | 1743 | static int hrtimer_cpu_notify(struct notifier_block *self, |
| 1744 | unsigned long action, void *hcpu) | 1744 | unsigned long action, void *hcpu) |
| 1745 | { | 1745 | { |
| 1746 | int scpu = (long)hcpu; | 1746 | int scpu = (long)hcpu; |
| @@ -1773,7 +1773,7 @@ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, | |||
| 1773 | return NOTIFY_OK; | 1773 | return NOTIFY_OK; |
| 1774 | } | 1774 | } |
| 1775 | 1775 | ||
| 1776 | static struct notifier_block __cpuinitdata hrtimers_nb = { | 1776 | static struct notifier_block hrtimers_nb = { |
| 1777 | .notifier_call = hrtimer_cpu_notify, | 1777 | .notifier_call = hrtimer_cpu_notify, |
| 1778 | }; | 1778 | }; |
| 1779 | 1779 | ||
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index e3544c19bdd2..452d6f2ba21d 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c | |||
| @@ -275,10 +275,7 @@ int irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, | |||
| 275 | if (d->gc) | 275 | if (d->gc) |
| 276 | return -EBUSY; | 276 | return -EBUSY; |
| 277 | 277 | ||
| 278 | if (d->revmap_type != IRQ_DOMAIN_MAP_LINEAR) | 278 | numchips = DIV_ROUND_UP(d->revmap_size, irqs_per_chip); |
| 279 | return -EINVAL; | ||
| 280 | |||
| 281 | numchips = d->revmap_data.linear.size / irqs_per_chip; | ||
| 282 | if (!numchips) | 279 | if (!numchips) |
| 283 | return -EINVAL; | 280 | return -EINVAL; |
| 284 | 281 | ||
| @@ -310,6 +307,7 @@ int irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, | |||
| 310 | /* Calc pointer to the next generic chip */ | 307 | /* Calc pointer to the next generic chip */ |
| 311 | tmp += sizeof(*gc) + num_ct * sizeof(struct irq_chip_type); | 308 | tmp += sizeof(*gc) + num_ct * sizeof(struct irq_chip_type); |
| 312 | } | 309 | } |
| 310 | d->name = name; | ||
| 313 | return 0; | 311 | return 0; |
| 314 | } | 312 | } |
| 315 | EXPORT_SYMBOL_GPL(irq_alloc_domain_generic_chips); | 313 | EXPORT_SYMBOL_GPL(irq_alloc_domain_generic_chips); |
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 1ed8dff17eb9..706724e9835d 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c | |||
| @@ -23,9 +23,11 @@ static DEFINE_MUTEX(revmap_trees_mutex); | |||
| 23 | static struct irq_domain *irq_default_domain; | 23 | static struct irq_domain *irq_default_domain; |
| 24 | 24 | ||
| 25 | /** | 25 | /** |
| 26 | * irq_domain_alloc() - Allocate a new irq_domain data structure | 26 | * __irq_domain_add() - Allocate a new irq_domain data structure |
| 27 | * @of_node: optional device-tree node of the interrupt controller | 27 | * @of_node: optional device-tree node of the interrupt controller |
| 28 | * @revmap_type: type of reverse mapping to use | 28 | * @size: Size of linear map; 0 for radix mapping only |
| 29 | * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no | ||
| 30 | * direct mapping | ||
| 29 | * @ops: map/unmap domain callbacks | 31 | * @ops: map/unmap domain callbacks |
| 30 | * @host_data: Controller private data pointer | 32 | * @host_data: Controller private data pointer |
| 31 | * | 33 | * |
| @@ -33,41 +35,35 @@ static struct irq_domain *irq_default_domain; | |||
| 33 | * register allocated irq_domain with irq_domain_register(). Returns pointer | 35 | * register allocated irq_domain with irq_domain_register(). Returns pointer |
| 34 | * to IRQ domain, or NULL on failure. | 36 | * to IRQ domain, or NULL on failure. |
| 35 | */ | 37 | */ |
| 36 | static struct irq_domain *irq_domain_alloc(struct device_node *of_node, | 38 | struct irq_domain *__irq_domain_add(struct device_node *of_node, int size, |
| 37 | unsigned int revmap_type, | 39 | irq_hw_number_t hwirq_max, int direct_max, |
| 38 | const struct irq_domain_ops *ops, | 40 | const struct irq_domain_ops *ops, |
| 39 | void *host_data) | 41 | void *host_data) |
| 40 | { | 42 | { |
| 41 | struct irq_domain *domain; | 43 | struct irq_domain *domain; |
| 42 | 44 | ||
| 43 | domain = kzalloc_node(sizeof(*domain), GFP_KERNEL, | 45 | domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size), |
| 44 | of_node_to_nid(of_node)); | 46 | GFP_KERNEL, of_node_to_nid(of_node)); |
| 45 | if (WARN_ON(!domain)) | 47 | if (WARN_ON(!domain)) |
| 46 | return NULL; | 48 | return NULL; |
| 47 | 49 | ||
| 48 | /* Fill structure */ | 50 | /* Fill structure */ |
| 49 | domain->revmap_type = revmap_type; | 51 | INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL); |
| 50 | domain->ops = ops; | 52 | domain->ops = ops; |
| 51 | domain->host_data = host_data; | 53 | domain->host_data = host_data; |
| 52 | domain->of_node = of_node_get(of_node); | 54 | domain->of_node = of_node_get(of_node); |
| 55 | domain->hwirq_max = hwirq_max; | ||
| 56 | domain->revmap_size = size; | ||
| 57 | domain->revmap_direct_max_irq = direct_max; | ||
| 53 | 58 | ||
| 54 | return domain; | ||
| 55 | } | ||
| 56 | |||
| 57 | static void irq_domain_free(struct irq_domain *domain) | ||
| 58 | { | ||
| 59 | of_node_put(domain->of_node); | ||
| 60 | kfree(domain); | ||
| 61 | } | ||
| 62 | |||
| 63 | static void irq_domain_add(struct irq_domain *domain) | ||
| 64 | { | ||
| 65 | mutex_lock(&irq_domain_mutex); | 59 | mutex_lock(&irq_domain_mutex); |
| 66 | list_add(&domain->link, &irq_domain_list); | 60 | list_add(&domain->link, &irq_domain_list); |
| 67 | mutex_unlock(&irq_domain_mutex); | 61 | mutex_unlock(&irq_domain_mutex); |
| 68 | pr_debug("Allocated domain of type %d @0x%p\n", | 62 | |
| 69 | domain->revmap_type, domain); | 63 | pr_debug("Added domain %s\n", domain->name); |
| 64 | return domain; | ||
| 70 | } | 65 | } |
| 66 | EXPORT_SYMBOL_GPL(__irq_domain_add); | ||
| 71 | 67 | ||
| 72 | /** | 68 | /** |
| 73 | * irq_domain_remove() - Remove an irq domain. | 69 | * irq_domain_remove() - Remove an irq domain. |
| @@ -81,29 +77,12 @@ void irq_domain_remove(struct irq_domain *domain) | |||
| 81 | { | 77 | { |
| 82 | mutex_lock(&irq_domain_mutex); | 78 | mutex_lock(&irq_domain_mutex); |
| 83 | 79 | ||
| 84 | switch (domain->revmap_type) { | 80 | /* |
| 85 | case IRQ_DOMAIN_MAP_LEGACY: | 81 | * radix_tree_delete() takes care of destroying the root |
| 86 | /* | 82 | * node when all entries are removed. Shout if there are |
| 87 | * Legacy domains don't manage their own irq_desc | 83 | * any mappings left. |
| 88 | * allocations, we expect the caller to handle irq_desc | 84 | */ |
| 89 | * freeing on their own. | 85 | WARN_ON(domain->revmap_tree.height); |
| 90 | */ | ||
| 91 | break; | ||
| 92 | case IRQ_DOMAIN_MAP_TREE: | ||
| 93 | /* | ||
| 94 | * radix_tree_delete() takes care of destroying the root | ||
| 95 | * node when all entries are removed. Shout if there are | ||
| 96 | * any mappings left. | ||
| 97 | */ | ||
| 98 | WARN_ON(domain->revmap_data.tree.height); | ||
| 99 | break; | ||
| 100 | case IRQ_DOMAIN_MAP_LINEAR: | ||
| 101 | kfree(domain->revmap_data.linear.revmap); | ||
| 102 | domain->revmap_data.linear.size = 0; | ||
| 103 | break; | ||
| 104 | case IRQ_DOMAIN_MAP_NOMAP: | ||
| 105 | break; | ||
| 106 | } | ||
| 107 | 86 | ||
| 108 | list_del(&domain->link); | 87 | list_del(&domain->link); |
| 109 | 88 | ||
| @@ -115,44 +94,30 @@ void irq_domain_remove(struct irq_domain *domain) | |||
| 115 | 94 | ||
| 116 | mutex_unlock(&irq_domain_mutex); | 95 | mutex_unlock(&irq_domain_mutex); |
| 117 | 96 | ||
| 118 | pr_debug("Removed domain of type %d @0x%p\n", | 97 | pr_debug("Removed domain %s\n", domain->name); |
| 119 | domain->revmap_type, domain); | ||
| 120 | 98 | ||
| 121 | irq_domain_free(domain); | 99 | of_node_put(domain->of_node); |
| 100 | kfree(domain); | ||
| 122 | } | 101 | } |
| 123 | EXPORT_SYMBOL_GPL(irq_domain_remove); | 102 | EXPORT_SYMBOL_GPL(irq_domain_remove); |
| 124 | 103 | ||
| 125 | static unsigned int irq_domain_legacy_revmap(struct irq_domain *domain, | ||
| 126 | irq_hw_number_t hwirq) | ||
| 127 | { | ||
| 128 | irq_hw_number_t first_hwirq = domain->revmap_data.legacy.first_hwirq; | ||
| 129 | int size = domain->revmap_data.legacy.size; | ||
| 130 | |||
| 131 | if (WARN_ON(hwirq < first_hwirq || hwirq >= first_hwirq + size)) | ||
| 132 | return 0; | ||
| 133 | return hwirq - first_hwirq + domain->revmap_data.legacy.first_irq; | ||
| 134 | } | ||
| 135 | |||
| 136 | /** | 104 | /** |
| 137 | * irq_domain_add_simple() - Allocate and register a simple irq_domain. | 105 | * irq_domain_add_simple() - Register an irq_domain and optionally map a range of irqs |
| 138 | * @of_node: pointer to interrupt controller's device tree node. | 106 | * @of_node: pointer to interrupt controller's device tree node. |
| 139 | * @size: total number of irqs in mapping | 107 | * @size: total number of irqs in mapping |
| 140 | * @first_irq: first number of irq block assigned to the domain, | 108 | * @first_irq: first number of irq block assigned to the domain, |
| 141 | * pass zero to assign irqs on-the-fly. This will result in a | 109 | * pass zero to assign irqs on-the-fly. If first_irq is non-zero, then |
| 142 | * linear IRQ domain so it is important to use irq_create_mapping() | 110 | * pre-map all of the irqs in the domain to virqs starting at first_irq. |
| 143 | * for each used IRQ, especially when SPARSE_IRQ is enabled. | ||
| 144 | * @ops: map/unmap domain callbacks | 111 | * @ops: map/unmap domain callbacks |
| 145 | * @host_data: Controller private data pointer | 112 | * @host_data: Controller private data pointer |
| 146 | * | 113 | * |
| 147 | * Allocates a legacy irq_domain if irq_base is positive or a linear | 114 | * Allocates an irq_domain, and optionally if first_irq is positive then also |
| 148 | * domain otherwise. For the legacy domain, IRQ descriptors will also | 115 | * allocate irq_descs and map all of the hwirqs to virqs starting at first_irq. |
| 149 | * be allocated. | ||
| 150 | * | 116 | * |
| 151 | * This is intended to implement the expected behaviour for most | 117 | * This is intended to implement the expected behaviour for most |
| 152 | * interrupt controllers which is that a linear mapping should | 118 | * interrupt controllers. If device tree is used, then first_irq will be 0 and |
| 153 | * normally be used unless the system requires a legacy mapping in | 119 | * irqs get mapped dynamically on the fly. However, if the controller requires |
| 154 | * order to support supplying interrupt numbers during non-DT | 120 | * static virq assignments (non-DT boot) then it will set that up correctly. |
| 155 | * registration of devices. | ||
| 156 | */ | 121 | */ |
| 157 | struct irq_domain *irq_domain_add_simple(struct device_node *of_node, | 122 | struct irq_domain *irq_domain_add_simple(struct device_node *of_node, |
| 158 | unsigned int size, | 123 | unsigned int size, |
| @@ -160,33 +125,25 @@ struct irq_domain *irq_domain_add_simple(struct device_node *of_node, | |||
| 160 | const struct irq_domain_ops *ops, | 125 | const struct irq_domain_ops *ops, |
| 161 | void *host_data) | 126 | void *host_data) |
| 162 | { | 127 | { |
| 163 | if (first_irq > 0) { | 128 | struct irq_domain *domain; |
| 164 | int irq_base; | 129 | |
| 130 | domain = __irq_domain_add(of_node, size, size, 0, ops, host_data); | ||
| 131 | if (!domain) | ||
| 132 | return NULL; | ||
| 165 | 133 | ||
| 134 | if (first_irq > 0) { | ||
| 166 | if (IS_ENABLED(CONFIG_SPARSE_IRQ)) { | 135 | if (IS_ENABLED(CONFIG_SPARSE_IRQ)) { |
| 167 | /* | 136 | /* attempt to allocated irq_descs */ |
| 168 | * Set the descriptor allocator to search for a | 137 | int rc = irq_alloc_descs(first_irq, first_irq, size, |
| 169 | * 1-to-1 mapping, such as irq_alloc_desc_at(). | 138 | of_node_to_nid(of_node)); |
| 170 | * Use of_node_to_nid() which is defined to | 139 | if (rc < 0) |
| 171 | * numa_node_id() on platforms that have no custom | ||
| 172 | * implementation. | ||
| 173 | */ | ||
| 174 | irq_base = irq_alloc_descs(first_irq, first_irq, size, | ||
| 175 | of_node_to_nid(of_node)); | ||
| 176 | if (irq_base < 0) { | ||
| 177 | pr_info("Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n", | 140 | pr_info("Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n", |
| 178 | first_irq); | 141 | first_irq); |
| 179 | irq_base = first_irq; | 142 | } |
| 180 | } | 143 | irq_domain_associate_many(domain, first_irq, 0, size); |
| 181 | } else | ||
| 182 | irq_base = first_irq; | ||
| 183 | |||
| 184 | return irq_domain_add_legacy(of_node, size, irq_base, 0, | ||
| 185 | ops, host_data); | ||
| 186 | } | 144 | } |
| 187 | 145 | ||
| 188 | /* A linear domain is the default */ | 146 | return domain; |
| 189 | return irq_domain_add_linear(of_node, size, ops, host_data); | ||
| 190 | } | 147 | } |
| 191 | EXPORT_SYMBOL_GPL(irq_domain_add_simple); | 148 | EXPORT_SYMBOL_GPL(irq_domain_add_simple); |
| 192 | 149 | ||
| @@ -213,131 +170,19 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, | |||
| 213 | void *host_data) | 170 | void *host_data) |
| 214 | { | 171 | { |
| 215 | struct irq_domain *domain; | 172 | struct irq_domain *domain; |
| 216 | unsigned int i; | ||
| 217 | 173 | ||
| 218 | domain = irq_domain_alloc(of_node, IRQ_DOMAIN_MAP_LEGACY, ops, host_data); | 174 | domain = __irq_domain_add(of_node, first_hwirq + size, |
| 175 | first_hwirq + size, 0, ops, host_data); | ||
| 219 | if (!domain) | 176 | if (!domain) |
| 220 | return NULL; | 177 | return NULL; |
| 221 | 178 | ||
| 222 | domain->revmap_data.legacy.first_irq = first_irq; | 179 | irq_domain_associate_many(domain, first_irq, first_hwirq, size); |
| 223 | domain->revmap_data.legacy.first_hwirq = first_hwirq; | ||
| 224 | domain->revmap_data.legacy.size = size; | ||
| 225 | |||
| 226 | mutex_lock(&irq_domain_mutex); | ||
| 227 | /* Verify that all the irqs are available */ | ||
| 228 | for (i = 0; i < size; i++) { | ||
| 229 | int irq = first_irq + i; | ||
| 230 | struct irq_data *irq_data = irq_get_irq_data(irq); | ||
| 231 | |||
| 232 | if (WARN_ON(!irq_data || irq_data->domain)) { | ||
| 233 | mutex_unlock(&irq_domain_mutex); | ||
| 234 | irq_domain_free(domain); | ||
| 235 | return NULL; | ||
| 236 | } | ||
| 237 | } | ||
| 238 | |||
| 239 | /* Claim all of the irqs before registering a legacy domain */ | ||
| 240 | for (i = 0; i < size; i++) { | ||
| 241 | struct irq_data *irq_data = irq_get_irq_data(first_irq + i); | ||
| 242 | irq_data->hwirq = first_hwirq + i; | ||
| 243 | irq_data->domain = domain; | ||
| 244 | } | ||
| 245 | mutex_unlock(&irq_domain_mutex); | ||
| 246 | |||
| 247 | for (i = 0; i < size; i++) { | ||
| 248 | int irq = first_irq + i; | ||
| 249 | int hwirq = first_hwirq + i; | ||
| 250 | |||
| 251 | /* IRQ0 gets ignored */ | ||
| 252 | if (!irq) | ||
| 253 | continue; | ||
| 254 | |||
| 255 | /* Legacy flags are left to default at this point, | ||
| 256 | * one can then use irq_create_mapping() to | ||
| 257 | * explicitly change them | ||
| 258 | */ | ||
| 259 | if (ops->map) | ||
| 260 | ops->map(domain, irq, hwirq); | ||
| 261 | |||
| 262 | /* Clear norequest flags */ | ||
| 263 | irq_clear_status_flags(irq, IRQ_NOREQUEST); | ||
| 264 | } | ||
| 265 | 180 | ||
| 266 | irq_domain_add(domain); | ||
| 267 | return domain; | 181 | return domain; |
| 268 | } | 182 | } |
| 269 | EXPORT_SYMBOL_GPL(irq_domain_add_legacy); | 183 | EXPORT_SYMBOL_GPL(irq_domain_add_legacy); |
| 270 | 184 | ||
| 271 | /** | 185 | /** |
| 272 | * irq_domain_add_linear() - Allocate and register a linear revmap irq_domain. | ||
| 273 | * @of_node: pointer to interrupt controller's device tree node. | ||
| 274 | * @size: Number of interrupts in the domain. | ||
| 275 | * @ops: map/unmap domain callbacks | ||
| 276 | * @host_data: Controller private data pointer | ||
| 277 | */ | ||
| 278 | struct irq_domain *irq_domain_add_linear(struct device_node *of_node, | ||
| 279 | unsigned int size, | ||
| 280 | const struct irq_domain_ops *ops, | ||
| 281 | void *host_data) | ||
| 282 | { | ||
| 283 | struct irq_domain *domain; | ||
| 284 | unsigned int *revmap; | ||
| 285 | |||
| 286 | revmap = kzalloc_node(sizeof(*revmap) * size, GFP_KERNEL, | ||
| 287 | of_node_to_nid(of_node)); | ||
| 288 | if (WARN_ON(!revmap)) | ||
| 289 | return NULL; | ||
| 290 | |||
| 291 | domain = irq_domain_alloc(of_node, IRQ_DOMAIN_MAP_LINEAR, ops, host_data); | ||
| 292 | if (!domain) { | ||
| 293 | kfree(revmap); | ||
| 294 | return NULL; | ||
| 295 | } | ||
| 296 | domain->revmap_data.linear.size = size; | ||
| 297 | domain->revmap_data.linear.revmap = revmap; | ||
| 298 | irq_domain_add(domain); | ||
| 299 | return domain; | ||
| 300 | } | ||
| 301 | EXPORT_SYMBOL_GPL(irq_domain_add_linear); | ||
| 302 | |||
| 303 | struct irq_domain *irq_domain_add_nomap(struct device_node *of_node, | ||
| 304 | unsigned int max_irq, | ||
| 305 | const struct irq_domain_ops *ops, | ||
| 306 | void *host_data) | ||
| 307 | { | ||
| 308 | struct irq_domain *domain = irq_domain_alloc(of_node, | ||
| 309 | IRQ_DOMAIN_MAP_NOMAP, ops, host_data); | ||
| 310 | if (domain) { | ||
| 311 | domain->revmap_data.nomap.max_irq = max_irq ? max_irq : ~0; | ||
| 312 | irq_domain_add(domain); | ||
| 313 | } | ||
| 314 | return domain; | ||
| 315 | } | ||
| 316 | EXPORT_SYMBOL_GPL(irq_domain_add_nomap); | ||
| 317 | |||
| 318 | /** | ||
| 319 | * irq_domain_add_tree() | ||
| 320 | * @of_node: pointer to interrupt controller's device tree node. | ||
| 321 | * @ops: map/unmap domain callbacks | ||
| 322 | * | ||
| 323 | * Note: The radix tree will be allocated later during boot automatically | ||
| 324 | * (the reverse mapping will use the slow path until that happens). | ||
| 325 | */ | ||
| 326 | struct irq_domain *irq_domain_add_tree(struct device_node *of_node, | ||
| 327 | const struct irq_domain_ops *ops, | ||
| 328 | void *host_data) | ||
| 329 | { | ||
| 330 | struct irq_domain *domain = irq_domain_alloc(of_node, | ||
| 331 | IRQ_DOMAIN_MAP_TREE, ops, host_data); | ||
| 332 | if (domain) { | ||
| 333 | INIT_RADIX_TREE(&domain->revmap_data.tree, GFP_KERNEL); | ||
| 334 | irq_domain_add(domain); | ||
| 335 | } | ||
| 336 | return domain; | ||
| 337 | } | ||
| 338 | EXPORT_SYMBOL_GPL(irq_domain_add_tree); | ||
| 339 | |||
| 340 | /** | ||
| 341 | * irq_find_host() - Locates a domain for a given device node | 186 | * irq_find_host() - Locates a domain for a given device node |
| 342 | * @node: device-tree node of the interrupt controller | 187 | * @node: device-tree node of the interrupt controller |
| 343 | */ | 188 | */ |
| @@ -385,125 +230,108 @@ void irq_set_default_host(struct irq_domain *domain) | |||
| 385 | } | 230 | } |
| 386 | EXPORT_SYMBOL_GPL(irq_set_default_host); | 231 | EXPORT_SYMBOL_GPL(irq_set_default_host); |
| 387 | 232 | ||
| 388 | static void irq_domain_disassociate_many(struct irq_domain *domain, | 233 | static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq) |
| 389 | unsigned int irq_base, int count) | ||
| 390 | { | 234 | { |
| 391 | /* | 235 | struct irq_data *irq_data = irq_get_irq_data(irq); |
| 392 | * disassociate in reverse order; | 236 | irq_hw_number_t hwirq; |
| 393 | * not strictly necessary, but nice for unwinding | ||
| 394 | */ | ||
| 395 | while (count--) { | ||
| 396 | int irq = irq_base + count; | ||
| 397 | struct irq_data *irq_data = irq_get_irq_data(irq); | ||
| 398 | irq_hw_number_t hwirq; | ||
| 399 | 237 | ||
| 400 | if (WARN_ON(!irq_data || irq_data->domain != domain)) | 238 | if (WARN(!irq_data || irq_data->domain != domain, |
| 401 | continue; | 239 | "virq%i doesn't exist; cannot disassociate\n", irq)) |
| 240 | return; | ||
| 402 | 241 | ||
| 403 | hwirq = irq_data->hwirq; | 242 | hwirq = irq_data->hwirq; |
| 404 | irq_set_status_flags(irq, IRQ_NOREQUEST); | 243 | irq_set_status_flags(irq, IRQ_NOREQUEST); |
| 405 | 244 | ||
| 406 | /* remove chip and handler */ | 245 | /* remove chip and handler */ |
| 407 | irq_set_chip_and_handler(irq, NULL, NULL); | 246 | irq_set_chip_and_handler(irq, NULL, NULL); |
| 408 | 247 | ||
| 409 | /* Make sure it's completed */ | 248 | /* Make sure it's completed */ |
| 410 | synchronize_irq(irq); | 249 | synchronize_irq(irq); |
| 411 | 250 | ||
| 412 | /* Tell the PIC about it */ | 251 | /* Tell the PIC about it */ |
| 413 | if (domain->ops->unmap) | 252 | if (domain->ops->unmap) |
| 414 | domain->ops->unmap(domain, irq); | 253 | domain->ops->unmap(domain, irq); |
| 415 | smp_mb(); | 254 | smp_mb(); |
| 416 | 255 | ||
| 417 | irq_data->domain = NULL; | 256 | irq_data->domain = NULL; |
| 418 | irq_data->hwirq = 0; | 257 | irq_data->hwirq = 0; |
| 419 | 258 | ||
| 420 | /* Clear reverse map */ | 259 | /* Clear reverse map for this hwirq */ |
| 421 | switch(domain->revmap_type) { | 260 | if (hwirq < domain->revmap_size) { |
| 422 | case IRQ_DOMAIN_MAP_LINEAR: | 261 | domain->linear_revmap[hwirq] = 0; |
| 423 | if (hwirq < domain->revmap_data.linear.size) | 262 | } else { |
| 424 | domain->revmap_data.linear.revmap[hwirq] = 0; | 263 | mutex_lock(&revmap_trees_mutex); |
| 425 | break; | 264 | radix_tree_delete(&domain->revmap_tree, hwirq); |
| 426 | case IRQ_DOMAIN_MAP_TREE: | 265 | mutex_unlock(&revmap_trees_mutex); |
| 427 | mutex_lock(&revmap_trees_mutex); | ||
| 428 | radix_tree_delete(&domain->revmap_data.tree, hwirq); | ||
| 429 | mutex_unlock(&revmap_trees_mutex); | ||
| 430 | break; | ||
| 431 | } | ||
| 432 | } | 266 | } |
| 433 | } | 267 | } |
| 434 | 268 | ||
| 435 | int irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base, | 269 | int irq_domain_associate(struct irq_domain *domain, unsigned int virq, |
| 436 | irq_hw_number_t hwirq_base, int count) | 270 | irq_hw_number_t hwirq) |
| 437 | { | 271 | { |
| 438 | unsigned int virq = irq_base; | 272 | struct irq_data *irq_data = irq_get_irq_data(virq); |
| 439 | irq_hw_number_t hwirq = hwirq_base; | 273 | int ret; |
| 440 | int i, ret; | ||
| 441 | 274 | ||
| 442 | pr_debug("%s(%s, irqbase=%i, hwbase=%i, count=%i)\n", __func__, | 275 | if (WARN(hwirq >= domain->hwirq_max, |
| 443 | of_node_full_name(domain->of_node), irq_base, (int)hwirq_base, count); | 276 | "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain->name)) |
| 277 | return -EINVAL; | ||
| 278 | if (WARN(!irq_data, "error: virq%i is not allocated", virq)) | ||
| 279 | return -EINVAL; | ||
| 280 | if (WARN(irq_data->domain, "error: virq%i is already associated", virq)) | ||
| 281 | return -EINVAL; | ||
| 444 | 282 | ||
| 445 | for (i = 0; i < count; i++) { | 283 | mutex_lock(&irq_domain_mutex); |
| 446 | struct irq_data *irq_data = irq_get_irq_data(virq + i); | 284 | irq_data->hwirq = hwirq; |
| 447 | 285 | irq_data->domain = domain; | |
| 448 | if (WARN(!irq_data, "error: irq_desc not allocated; " | 286 | if (domain->ops->map) { |
| 449 | "irq=%i hwirq=0x%x\n", virq + i, (int)hwirq + i)) | 287 | ret = domain->ops->map(domain, virq, hwirq); |
| 450 | return -EINVAL; | 288 | if (ret != 0) { |
| 451 | if (WARN(irq_data->domain, "error: irq_desc already associated; " | 289 | /* |
| 452 | "irq=%i hwirq=0x%x\n", virq + i, (int)hwirq + i)) | 290 | * If map() returns -EPERM, this interrupt is protected |
| 453 | return -EINVAL; | 291 | * by the firmware or some other service and shall not |
| 454 | }; | 292 | * be mapped. Don't bother telling the user about it. |
| 455 | 293 | */ | |
| 456 | for (i = 0; i < count; i++, virq++, hwirq++) { | 294 | if (ret != -EPERM) { |
| 457 | struct irq_data *irq_data = irq_get_irq_data(virq); | 295 | pr_info("%s didn't like hwirq-0x%lx to VIRQ%i mapping (rc=%d)\n", |
| 458 | 296 | domain->name, hwirq, virq, ret); | |
| 459 | irq_data->hwirq = hwirq; | ||
| 460 | irq_data->domain = domain; | ||
| 461 | if (domain->ops->map) { | ||
| 462 | ret = domain->ops->map(domain, virq, hwirq); | ||
| 463 | if (ret != 0) { | ||
| 464 | /* | ||
| 465 | * If map() returns -EPERM, this interrupt is protected | ||
| 466 | * by the firmware or some other service and shall not | ||
| 467 | * be mapped. | ||
| 468 | * | ||
| 469 | * Since on some platforms we blindly try to map everything | ||
| 470 | * we end up with a log full of backtraces. | ||
| 471 | * | ||
| 472 | * So instead, we silently fail on -EPERM, it is the | ||
| 473 | * responsibility of the PIC driver to display a relevant | ||
| 474 | * message if needed. | ||
| 475 | */ | ||
| 476 | if (ret != -EPERM) { | ||
| 477 | pr_err("irq-%i==>hwirq-0x%lx mapping failed: %d\n", | ||
| 478 | virq, hwirq, ret); | ||
| 479 | WARN_ON(1); | ||
| 480 | } | ||
| 481 | irq_data->domain = NULL; | ||
| 482 | irq_data->hwirq = 0; | ||
| 483 | goto err_unmap; | ||
| 484 | } | 297 | } |
| 298 | irq_data->domain = NULL; | ||
| 299 | irq_data->hwirq = 0; | ||
| 300 | mutex_unlock(&irq_domain_mutex); | ||
| 301 | return ret; | ||
| 485 | } | 302 | } |
| 486 | 303 | ||
| 487 | switch (domain->revmap_type) { | 304 | /* If not already assigned, give the domain the chip's name */ |
| 488 | case IRQ_DOMAIN_MAP_LINEAR: | 305 | if (!domain->name && irq_data->chip) |
| 489 | if (hwirq < domain->revmap_data.linear.size) | 306 | domain->name = irq_data->chip->name; |
| 490 | domain->revmap_data.linear.revmap[hwirq] = virq; | 307 | } |
| 491 | break; | ||
| 492 | case IRQ_DOMAIN_MAP_TREE: | ||
| 493 | mutex_lock(&revmap_trees_mutex); | ||
| 494 | radix_tree_insert(&domain->revmap_data.tree, hwirq, irq_data); | ||
| 495 | mutex_unlock(&revmap_trees_mutex); | ||
| 496 | break; | ||
| 497 | } | ||
| 498 | 308 | ||
| 499 | irq_clear_status_flags(virq, IRQ_NOREQUEST); | 309 | if (hwirq < domain->revmap_size) { |
| 310 | domain->linear_revmap[hwirq] = virq; | ||
| 311 | } else { | ||
| 312 | mutex_lock(&revmap_trees_mutex); | ||
| 313 | radix_tree_insert(&domain->revmap_tree, hwirq, irq_data); | ||
| 314 | mutex_unlock(&revmap_trees_mutex); | ||
| 500 | } | 315 | } |
| 316 | mutex_unlock(&irq_domain_mutex); | ||
| 317 | |||
| 318 | irq_clear_status_flags(virq, IRQ_NOREQUEST); | ||
| 501 | 319 | ||
| 502 | return 0; | 320 | return 0; |
| 321 | } | ||
| 322 | EXPORT_SYMBOL_GPL(irq_domain_associate); | ||
| 503 | 323 | ||
| 504 | err_unmap: | 324 | void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base, |
| 505 | irq_domain_disassociate_many(domain, irq_base, i); | 325 | irq_hw_number_t hwirq_base, int count) |
| 506 | return -EINVAL; | 326 | { |
| 327 | int i; | ||
| 328 | |||
| 329 | pr_debug("%s(%s, irqbase=%i, hwbase=%i, count=%i)\n", __func__, | ||
| 330 | of_node_full_name(domain->of_node), irq_base, (int)hwirq_base, count); | ||
| 331 | |||
| 332 | for (i = 0; i < count; i++) { | ||
| 333 | irq_domain_associate(domain, irq_base + i, hwirq_base + i); | ||
| 334 | } | ||
| 507 | } | 335 | } |
| 508 | EXPORT_SYMBOL_GPL(irq_domain_associate_many); | 336 | EXPORT_SYMBOL_GPL(irq_domain_associate_many); |
| 509 | 337 | ||
| @@ -513,7 +341,9 @@ EXPORT_SYMBOL_GPL(irq_domain_associate_many); | |||
| 513 | * | 341 | * |
| 514 | * This routine is used for irq controllers which can choose the hardware | 342 | * This routine is used for irq controllers which can choose the hardware |
| 515 | * interrupt numbers they generate. In such a case it's simplest to use | 343 | * interrupt numbers they generate. In such a case it's simplest to use |
| 516 | * the linux irq as the hardware interrupt number. | 344 | * the linux irq as the hardware interrupt number. It still uses the linear |
| 345 | * or radix tree to store the mapping, but the irq controller can optimize | ||
| 346 | * the revmap path by using the hwirq directly. | ||
| 517 | */ | 347 | */ |
| 518 | unsigned int irq_create_direct_mapping(struct irq_domain *domain) | 348 | unsigned int irq_create_direct_mapping(struct irq_domain *domain) |
| 519 | { | 349 | { |
| @@ -522,17 +352,14 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain) | |||
| 522 | if (domain == NULL) | 352 | if (domain == NULL) |
| 523 | domain = irq_default_domain; | 353 | domain = irq_default_domain; |
| 524 | 354 | ||
| 525 | if (WARN_ON(!domain || domain->revmap_type != IRQ_DOMAIN_MAP_NOMAP)) | ||
| 526 | return 0; | ||
| 527 | |||
| 528 | virq = irq_alloc_desc_from(1, of_node_to_nid(domain->of_node)); | 355 | virq = irq_alloc_desc_from(1, of_node_to_nid(domain->of_node)); |
| 529 | if (!virq) { | 356 | if (!virq) { |
| 530 | pr_debug("create_direct virq allocation failed\n"); | 357 | pr_debug("create_direct virq allocation failed\n"); |
| 531 | return 0; | 358 | return 0; |
| 532 | } | 359 | } |
| 533 | if (virq >= domain->revmap_data.nomap.max_irq) { | 360 | if (virq >= domain->revmap_direct_max_irq) { |
| 534 | pr_err("ERROR: no free irqs available below %i maximum\n", | 361 | pr_err("ERROR: no free irqs available below %i maximum\n", |
| 535 | domain->revmap_data.nomap.max_irq); | 362 | domain->revmap_direct_max_irq); |
| 536 | irq_free_desc(virq); | 363 | irq_free_desc(virq); |
| 537 | return 0; | 364 | return 0; |
| 538 | } | 365 | } |
| @@ -569,9 +396,7 @@ unsigned int irq_create_mapping(struct irq_domain *domain, | |||
| 569 | if (domain == NULL) | 396 | if (domain == NULL) |
| 570 | domain = irq_default_domain; | 397 | domain = irq_default_domain; |
| 571 | if (domain == NULL) { | 398 | if (domain == NULL) { |
| 572 | pr_warning("irq_create_mapping called for" | 399 | WARN(1, "%s(, %lx) called with NULL domain\n", __func__, hwirq); |
| 573 | " NULL domain, hwirq=%lx\n", hwirq); | ||
| 574 | WARN_ON(1); | ||
| 575 | return 0; | 400 | return 0; |
| 576 | } | 401 | } |
| 577 | pr_debug("-> using domain @%p\n", domain); | 402 | pr_debug("-> using domain @%p\n", domain); |
| @@ -583,10 +408,6 @@ unsigned int irq_create_mapping(struct irq_domain *domain, | |||
| 583 | return virq; | 408 | return virq; |
| 584 | } | 409 | } |
| 585 | 410 | ||
| 586 | /* Get a virtual interrupt number */ | ||
| 587 | if (domain->revmap_type == IRQ_DOMAIN_MAP_LEGACY) | ||
| 588 | return irq_domain_legacy_revmap(domain, hwirq); | ||
| 589 | |||
| 590 | /* Allocate a virtual interrupt number */ | 411 | /* Allocate a virtual interrupt number */ |
| 591 | hint = hwirq % nr_irqs; | 412 | hint = hwirq % nr_irqs; |
| 592 | if (hint == 0) | 413 | if (hint == 0) |
| @@ -639,12 +460,7 @@ int irq_create_strict_mappings(struct irq_domain *domain, unsigned int irq_base, | |||
| 639 | if (unlikely(ret < 0)) | 460 | if (unlikely(ret < 0)) |
| 640 | return ret; | 461 | return ret; |
| 641 | 462 | ||
| 642 | ret = irq_domain_associate_many(domain, irq_base, hwirq_base, count); | 463 | irq_domain_associate_many(domain, irq_base, hwirq_base, count); |
| 643 | if (unlikely(ret < 0)) { | ||
| 644 | irq_free_descs(irq_base, count); | ||
| 645 | return ret; | ||
| 646 | } | ||
| 647 | |||
| 648 | return 0; | 464 | return 0; |
| 649 | } | 465 | } |
| 650 | EXPORT_SYMBOL_GPL(irq_create_strict_mappings); | 466 | EXPORT_SYMBOL_GPL(irq_create_strict_mappings); |
| @@ -659,20 +475,8 @@ unsigned int irq_create_of_mapping(struct device_node *controller, | |||
| 659 | 475 | ||
| 660 | domain = controller ? irq_find_host(controller) : irq_default_domain; | 476 | domain = controller ? irq_find_host(controller) : irq_default_domain; |
| 661 | if (!domain) { | 477 | if (!domain) { |
| 662 | #ifdef CONFIG_MIPS | 478 | pr_warn("no irq domain found for %s !\n", |
| 663 | /* | 479 | of_node_full_name(controller)); |
| 664 | * Workaround to avoid breaking interrupt controller drivers | ||
| 665 | * that don't yet register an irq_domain. This is temporary | ||
| 666 | * code. ~~~gcl, Feb 24, 2012 | ||
| 667 | * | ||
| 668 | * Scheduled for removal in Linux v3.6. That should be enough | ||
| 669 | * time. | ||
| 670 | */ | ||
| 671 | if (intsize > 0) | ||
| 672 | return intspec[0]; | ||
| 673 | #endif | ||
| 674 | pr_warning("no irq domain found for %s !\n", | ||
| 675 | of_node_full_name(controller)); | ||
| 676 | return 0; | 480 | return 0; |
| 677 | } | 481 | } |
| 678 | 482 | ||
| @@ -714,11 +518,7 @@ void irq_dispose_mapping(unsigned int virq) | |||
| 714 | if (WARN_ON(domain == NULL)) | 518 | if (WARN_ON(domain == NULL)) |
| 715 | return; | 519 | return; |
| 716 | 520 | ||
| 717 | /* Never unmap legacy interrupts */ | 521 | irq_domain_disassociate(domain, virq); |
| 718 | if (domain->revmap_type == IRQ_DOMAIN_MAP_LEGACY) | ||
| 719 | return; | ||
| 720 | |||
| 721 | irq_domain_disassociate_many(domain, virq, 1); | ||
| 722 | irq_free_desc(virq); | 522 | irq_free_desc(virq); |
| 723 | } | 523 | } |
| 724 | EXPORT_SYMBOL_GPL(irq_dispose_mapping); | 524 | EXPORT_SYMBOL_GPL(irq_dispose_mapping); |
| @@ -739,63 +539,51 @@ unsigned int irq_find_mapping(struct irq_domain *domain, | |||
| 739 | if (domain == NULL) | 539 | if (domain == NULL) |
| 740 | return 0; | 540 | return 0; |
| 741 | 541 | ||
| 742 | switch (domain->revmap_type) { | 542 | if (hwirq < domain->revmap_direct_max_irq) { |
| 743 | case IRQ_DOMAIN_MAP_LEGACY: | ||
| 744 | return irq_domain_legacy_revmap(domain, hwirq); | ||
| 745 | case IRQ_DOMAIN_MAP_LINEAR: | ||
| 746 | return irq_linear_revmap(domain, hwirq); | ||
| 747 | case IRQ_DOMAIN_MAP_TREE: | ||
| 748 | rcu_read_lock(); | ||
| 749 | data = radix_tree_lookup(&domain->revmap_data.tree, hwirq); | ||
| 750 | rcu_read_unlock(); | ||
| 751 | if (data) | ||
| 752 | return data->irq; | ||
| 753 | break; | ||
| 754 | case IRQ_DOMAIN_MAP_NOMAP: | ||
| 755 | data = irq_get_irq_data(hwirq); | 543 | data = irq_get_irq_data(hwirq); |
| 756 | if (data && (data->domain == domain) && (data->hwirq == hwirq)) | 544 | if (data && (data->domain == domain) && (data->hwirq == hwirq)) |
| 757 | return hwirq; | 545 | return hwirq; |
| 758 | break; | ||
| 759 | } | 546 | } |
| 760 | 547 | ||
| 761 | return 0; | 548 | /* Check if the hwirq is in the linear revmap. */ |
| 762 | } | 549 | if (hwirq < domain->revmap_size) |
| 763 | EXPORT_SYMBOL_GPL(irq_find_mapping); | 550 | return domain->linear_revmap[hwirq]; |
| 764 | 551 | ||
| 765 | /** | 552 | rcu_read_lock(); |
| 766 | * irq_linear_revmap() - Find a linux irq from a hw irq number. | 553 | data = radix_tree_lookup(&domain->revmap_tree, hwirq); |
| 767 | * @domain: domain owning this hardware interrupt | 554 | rcu_read_unlock(); |
| 768 | * @hwirq: hardware irq number in that domain space | 555 | return data ? data->irq : 0; |
| 769 | * | ||
| 770 | * This is a fast path that can be called directly by irq controller code to | ||
| 771 | * save a handful of instructions. | ||
| 772 | */ | ||
| 773 | unsigned int irq_linear_revmap(struct irq_domain *domain, | ||
| 774 | irq_hw_number_t hwirq) | ||
| 775 | { | ||
| 776 | BUG_ON(domain->revmap_type != IRQ_DOMAIN_MAP_LINEAR); | ||
| 777 | |||
| 778 | /* Check revmap bounds; complain if exceeded */ | ||
| 779 | if (WARN_ON(hwirq >= domain->revmap_data.linear.size)) | ||
| 780 | return 0; | ||
| 781 | |||
| 782 | return domain->revmap_data.linear.revmap[hwirq]; | ||
| 783 | } | 556 | } |
| 784 | EXPORT_SYMBOL_GPL(irq_linear_revmap); | 557 | EXPORT_SYMBOL_GPL(irq_find_mapping); |
| 785 | 558 | ||
| 786 | #ifdef CONFIG_IRQ_DOMAIN_DEBUG | 559 | #ifdef CONFIG_IRQ_DOMAIN_DEBUG |
| 787 | static int virq_debug_show(struct seq_file *m, void *private) | 560 | static int virq_debug_show(struct seq_file *m, void *private) |
| 788 | { | 561 | { |
| 789 | unsigned long flags; | 562 | unsigned long flags; |
| 790 | struct irq_desc *desc; | 563 | struct irq_desc *desc; |
| 791 | const char *p; | 564 | struct irq_domain *domain; |
| 792 | static const char none[] = "none"; | 565 | struct radix_tree_iter iter; |
| 793 | void *data; | 566 | void *data, **slot; |
| 794 | int i; | 567 | int i; |
| 795 | 568 | ||
| 796 | seq_printf(m, "%-5s %-7s %-15s %-*s %s\n", "irq", "hwirq", | 569 | seq_printf(m, " %-16s %-6s %-10s %-10s %s\n", |
| 570 | "name", "mapped", "linear-max", "direct-max", "devtree-node"); | ||
| 571 | mutex_lock(&irq_domain_mutex); | ||
| 572 | list_for_each_entry(domain, &irq_domain_list, link) { | ||
| 573 | int count = 0; | ||
| 574 | radix_tree_for_each_slot(slot, &domain->revmap_tree, &iter, 0) | ||
| 575 | count++; | ||
| 576 | seq_printf(m, "%c%-16s %6u %10u %10u %s\n", | ||
| 577 | domain == irq_default_domain ? '*' : ' ', domain->name, | ||
| 578 | domain->revmap_size + count, domain->revmap_size, | ||
| 579 | domain->revmap_direct_max_irq, | ||
| 580 | domain->of_node ? of_node_full_name(domain->of_node) : ""); | ||
| 581 | } | ||
| 582 | mutex_unlock(&irq_domain_mutex); | ||
| 583 | |||
| 584 | seq_printf(m, "%-5s %-7s %-15s %-*s %6s %-14s %s\n", "irq", "hwirq", | ||
| 797 | "chip name", (int)(2 * sizeof(void *) + 2), "chip data", | 585 | "chip name", (int)(2 * sizeof(void *) + 2), "chip data", |
| 798 | "domain name"); | 586 | "active", "type", "domain"); |
| 799 | 587 | ||
| 800 | for (i = 1; i < nr_irqs; i++) { | 588 | for (i = 1; i < nr_irqs; i++) { |
| 801 | desc = irq_to_desc(i); | 589 | desc = irq_to_desc(i); |
| @@ -803,28 +591,28 @@ static int virq_debug_show(struct seq_file *m, void *private) | |||
| 803 | continue; | 591 | continue; |
| 804 | 592 | ||
| 805 | raw_spin_lock_irqsave(&desc->lock, flags); | 593 | raw_spin_lock_irqsave(&desc->lock, flags); |
| 594 | domain = desc->irq_data.domain; | ||
| 806 | 595 | ||
| 807 | if (desc->action && desc->action->handler) { | 596 | if (domain) { |
| 808 | struct irq_chip *chip; | 597 | struct irq_chip *chip; |
| 598 | int hwirq = desc->irq_data.hwirq; | ||
| 599 | bool direct; | ||
| 809 | 600 | ||
| 810 | seq_printf(m, "%5d ", i); | 601 | seq_printf(m, "%5d ", i); |
| 811 | seq_printf(m, "0x%05lx ", desc->irq_data.hwirq); | 602 | seq_printf(m, "0x%05x ", hwirq); |
| 812 | 603 | ||
| 813 | chip = irq_desc_get_chip(desc); | 604 | chip = irq_desc_get_chip(desc); |
| 814 | if (chip && chip->name) | 605 | seq_printf(m, "%-15s ", (chip && chip->name) ? chip->name : "none"); |
| 815 | p = chip->name; | ||
| 816 | else | ||
| 817 | p = none; | ||
| 818 | seq_printf(m, "%-15s ", p); | ||
| 819 | 606 | ||
| 820 | data = irq_desc_get_chip_data(desc); | 607 | data = irq_desc_get_chip_data(desc); |
| 821 | seq_printf(m, data ? "0x%p " : " %p ", data); | 608 | seq_printf(m, data ? "0x%p " : " %p ", data); |
| 822 | 609 | ||
| 823 | if (desc->irq_data.domain) | 610 | seq_printf(m, " %c ", (desc->action && desc->action->handler) ? '*' : ' '); |
| 824 | p = of_node_full_name(desc->irq_data.domain->of_node); | 611 | direct = (i == hwirq) && (i < domain->revmap_direct_max_irq); |
| 825 | else | 612 | seq_printf(m, "%6s%-8s ", |
| 826 | p = none; | 613 | (hwirq < domain->revmap_size) ? "LINEAR" : "RADIX", |
| 827 | seq_printf(m, "%s\n", p); | 614 | direct ? "(DIRECT)" : ""); |
| 615 | seq_printf(m, "%s\n", desc->irq_data.domain->name); | ||
| 828 | } | 616 | } |
| 829 | 617 | ||
| 830 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 618 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
| @@ -921,18 +709,3 @@ const struct irq_domain_ops irq_domain_simple_ops = { | |||
| 921 | .xlate = irq_domain_xlate_onetwocell, | 709 | .xlate = irq_domain_xlate_onetwocell, |
| 922 | }; | 710 | }; |
| 923 | EXPORT_SYMBOL_GPL(irq_domain_simple_ops); | 711 | EXPORT_SYMBOL_GPL(irq_domain_simple_ops); |
| 924 | |||
| 925 | #ifdef CONFIG_OF_IRQ | ||
| 926 | void irq_domain_generate_simple(const struct of_device_id *match, | ||
| 927 | u64 phys_base, unsigned int irq_start) | ||
| 928 | { | ||
| 929 | struct device_node *node; | ||
| 930 | pr_debug("looking for phys_base=%llx, irq_start=%i\n", | ||
| 931 | (unsigned long long) phys_base, (int) irq_start); | ||
| 932 | node = of_find_matching_node_by_address(NULL, match, phys_base); | ||
| 933 | if (node) | ||
| 934 | irq_domain_add_legacy(node, 32, irq_start, 0, | ||
| 935 | &irq_domain_simple_ops, NULL); | ||
| 936 | } | ||
| 937 | EXPORT_SYMBOL_GPL(irq_domain_generate_simple); | ||
| 938 | #endif | ||
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 19ed5c425c3b..36f6ee181b0c 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
| @@ -462,6 +462,8 @@ int show_interrupts(struct seq_file *p, void *v) | |||
| 462 | } else { | 462 | } else { |
| 463 | seq_printf(p, " %8s", "None"); | 463 | seq_printf(p, " %8s", "None"); |
| 464 | } | 464 | } |
| 465 | if (desc->irq_data.domain) | ||
| 466 | seq_printf(p, " %*d", prec, (int) desc->irq_data.hwirq); | ||
| 465 | #ifdef CONFIG_GENERIC_IRQ_SHOW_LEVEL | 467 | #ifdef CONFIG_GENERIC_IRQ_SHOW_LEVEL |
| 466 | seq_printf(p, " %-8s", irqd_is_level_type(&desc->irq_data) ? "Level" : "Edge"); | 468 | seq_printf(p, " %-8s", irqd_is_level_type(&desc->irq_data) ? "Level" : "Edge"); |
| 467 | #endif | 469 | #endif |
diff --git a/kernel/module.c b/kernel/module.c index cab4bce49c23..206915830d29 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
| @@ -455,7 +455,7 @@ const struct kernel_symbol *find_symbol(const char *name, | |||
| 455 | EXPORT_SYMBOL_GPL(find_symbol); | 455 | EXPORT_SYMBOL_GPL(find_symbol); |
| 456 | 456 | ||
| 457 | /* Search for module by name: must hold module_mutex. */ | 457 | /* Search for module by name: must hold module_mutex. */ |
| 458 | static struct module *find_module_all(const char *name, | 458 | static struct module *find_module_all(const char *name, size_t len, |
| 459 | bool even_unformed) | 459 | bool even_unformed) |
| 460 | { | 460 | { |
| 461 | struct module *mod; | 461 | struct module *mod; |
| @@ -463,7 +463,7 @@ static struct module *find_module_all(const char *name, | |||
| 463 | list_for_each_entry(mod, &modules, list) { | 463 | list_for_each_entry(mod, &modules, list) { |
| 464 | if (!even_unformed && mod->state == MODULE_STATE_UNFORMED) | 464 | if (!even_unformed && mod->state == MODULE_STATE_UNFORMED) |
| 465 | continue; | 465 | continue; |
| 466 | if (strcmp(mod->name, name) == 0) | 466 | if (strlen(mod->name) == len && !memcmp(mod->name, name, len)) |
| 467 | return mod; | 467 | return mod; |
| 468 | } | 468 | } |
| 469 | return NULL; | 469 | return NULL; |
| @@ -471,7 +471,7 @@ static struct module *find_module_all(const char *name, | |||
| 471 | 471 | ||
| 472 | struct module *find_module(const char *name) | 472 | struct module *find_module(const char *name) |
| 473 | { | 473 | { |
| 474 | return find_module_all(name, false); | 474 | return find_module_all(name, strlen(name), false); |
| 475 | } | 475 | } |
| 476 | EXPORT_SYMBOL_GPL(find_module); | 476 | EXPORT_SYMBOL_GPL(find_module); |
| 477 | 477 | ||
| @@ -482,23 +482,28 @@ static inline void __percpu *mod_percpu(struct module *mod) | |||
| 482 | return mod->percpu; | 482 | return mod->percpu; |
| 483 | } | 483 | } |
| 484 | 484 | ||
| 485 | static int percpu_modalloc(struct module *mod, | 485 | static int percpu_modalloc(struct module *mod, struct load_info *info) |
| 486 | unsigned long size, unsigned long align) | ||
| 487 | { | 486 | { |
| 487 | Elf_Shdr *pcpusec = &info->sechdrs[info->index.pcpu]; | ||
| 488 | unsigned long align = pcpusec->sh_addralign; | ||
| 489 | |||
| 490 | if (!pcpusec->sh_size) | ||
| 491 | return 0; | ||
| 492 | |||
| 488 | if (align > PAGE_SIZE) { | 493 | if (align > PAGE_SIZE) { |
| 489 | printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", | 494 | printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", |
| 490 | mod->name, align, PAGE_SIZE); | 495 | mod->name, align, PAGE_SIZE); |
| 491 | align = PAGE_SIZE; | 496 | align = PAGE_SIZE; |
| 492 | } | 497 | } |
| 493 | 498 | ||
| 494 | mod->percpu = __alloc_reserved_percpu(size, align); | 499 | mod->percpu = __alloc_reserved_percpu(pcpusec->sh_size, align); |
| 495 | if (!mod->percpu) { | 500 | if (!mod->percpu) { |
| 496 | printk(KERN_WARNING | 501 | printk(KERN_WARNING |
| 497 | "%s: Could not allocate %lu bytes percpu data\n", | 502 | "%s: Could not allocate %lu bytes percpu data\n", |
| 498 | mod->name, size); | 503 | mod->name, (unsigned long)pcpusec->sh_size); |
| 499 | return -ENOMEM; | 504 | return -ENOMEM; |
| 500 | } | 505 | } |
| 501 | mod->percpu_size = size; | 506 | mod->percpu_size = pcpusec->sh_size; |
| 502 | return 0; | 507 | return 0; |
| 503 | } | 508 | } |
| 504 | 509 | ||
| @@ -563,10 +568,12 @@ static inline void __percpu *mod_percpu(struct module *mod) | |||
| 563 | { | 568 | { |
| 564 | return NULL; | 569 | return NULL; |
| 565 | } | 570 | } |
| 566 | static inline int percpu_modalloc(struct module *mod, | 571 | static int percpu_modalloc(struct module *mod, struct load_info *info) |
| 567 | unsigned long size, unsigned long align) | ||
| 568 | { | 572 | { |
| 569 | return -ENOMEM; | 573 | /* UP modules shouldn't have this section: ENOMEM isn't quite right */ |
| 574 | if (info->sechdrs[info->index.pcpu].sh_size != 0) | ||
| 575 | return -ENOMEM; | ||
| 576 | return 0; | ||
| 570 | } | 577 | } |
| 571 | static inline void percpu_modfree(struct module *mod) | 578 | static inline void percpu_modfree(struct module *mod) |
| 572 | { | 579 | { |
| @@ -2927,7 +2934,6 @@ static struct module *layout_and_allocate(struct load_info *info, int flags) | |||
| 2927 | { | 2934 | { |
| 2928 | /* Module within temporary copy. */ | 2935 | /* Module within temporary copy. */ |
| 2929 | struct module *mod; | 2936 | struct module *mod; |
| 2930 | Elf_Shdr *pcpusec; | ||
| 2931 | int err; | 2937 | int err; |
| 2932 | 2938 | ||
| 2933 | mod = setup_load_info(info, flags); | 2939 | mod = setup_load_info(info, flags); |
| @@ -2942,17 +2948,10 @@ static struct module *layout_and_allocate(struct load_info *info, int flags) | |||
| 2942 | err = module_frob_arch_sections(info->hdr, info->sechdrs, | 2948 | err = module_frob_arch_sections(info->hdr, info->sechdrs, |
| 2943 | info->secstrings, mod); | 2949 | info->secstrings, mod); |
| 2944 | if (err < 0) | 2950 | if (err < 0) |
| 2945 | goto out; | 2951 | return ERR_PTR(err); |
| 2946 | 2952 | ||
| 2947 | pcpusec = &info->sechdrs[info->index.pcpu]; | 2953 | /* We will do a special allocation for per-cpu sections later. */ |
| 2948 | if (pcpusec->sh_size) { | 2954 | info->sechdrs[info->index.pcpu].sh_flags &= ~(unsigned long)SHF_ALLOC; |
| 2949 | /* We have a special allocation for this section. */ | ||
| 2950 | err = percpu_modalloc(mod, | ||
| 2951 | pcpusec->sh_size, pcpusec->sh_addralign); | ||
| 2952 | if (err) | ||
| 2953 | goto out; | ||
| 2954 | pcpusec->sh_flags &= ~(unsigned long)SHF_ALLOC; | ||
| 2955 | } | ||
| 2956 | 2955 | ||
| 2957 | /* Determine total sizes, and put offsets in sh_entsize. For now | 2956 | /* Determine total sizes, and put offsets in sh_entsize. For now |
| 2958 | this is done generically; there doesn't appear to be any | 2957 | this is done generically; there doesn't appear to be any |
| @@ -2963,17 +2962,12 @@ static struct module *layout_and_allocate(struct load_info *info, int flags) | |||
| 2963 | /* Allocate and move to the final place */ | 2962 | /* Allocate and move to the final place */ |
| 2964 | err = move_module(mod, info); | 2963 | err = move_module(mod, info); |
| 2965 | if (err) | 2964 | if (err) |
| 2966 | goto free_percpu; | 2965 | return ERR_PTR(err); |
| 2967 | 2966 | ||
| 2968 | /* Module has been copied to its final place now: return it. */ | 2967 | /* Module has been copied to its final place now: return it. */ |
| 2969 | mod = (void *)info->sechdrs[info->index.mod].sh_addr; | 2968 | mod = (void *)info->sechdrs[info->index.mod].sh_addr; |
| 2970 | kmemleak_load_module(mod, info); | 2969 | kmemleak_load_module(mod, info); |
| 2971 | return mod; | 2970 | return mod; |
| 2972 | |||
| 2973 | free_percpu: | ||
| 2974 | percpu_modfree(mod); | ||
| 2975 | out: | ||
| 2976 | return ERR_PTR(err); | ||
| 2977 | } | 2971 | } |
| 2978 | 2972 | ||
| 2979 | /* mod is no longer valid after this! */ | 2973 | /* mod is no longer valid after this! */ |
| @@ -3014,7 +3008,7 @@ static bool finished_loading(const char *name) | |||
| 3014 | bool ret; | 3008 | bool ret; |
| 3015 | 3009 | ||
| 3016 | mutex_lock(&module_mutex); | 3010 | mutex_lock(&module_mutex); |
| 3017 | mod = find_module_all(name, true); | 3011 | mod = find_module_all(name, strlen(name), true); |
| 3018 | ret = !mod || mod->state == MODULE_STATE_LIVE | 3012 | ret = !mod || mod->state == MODULE_STATE_LIVE |
| 3019 | || mod->state == MODULE_STATE_GOING; | 3013 | || mod->state == MODULE_STATE_GOING; |
| 3020 | mutex_unlock(&module_mutex); | 3014 | mutex_unlock(&module_mutex); |
| @@ -3152,7 +3146,8 @@ static int add_unformed_module(struct module *mod) | |||
| 3152 | 3146 | ||
| 3153 | again: | 3147 | again: |
| 3154 | mutex_lock(&module_mutex); | 3148 | mutex_lock(&module_mutex); |
| 3155 | if ((old = find_module_all(mod->name, true)) != NULL) { | 3149 | old = find_module_all(mod->name, strlen(mod->name), true); |
| 3150 | if (old != NULL) { | ||
| 3156 | if (old->state == MODULE_STATE_COMING | 3151 | if (old->state == MODULE_STATE_COMING |
| 3157 | || old->state == MODULE_STATE_UNFORMED) { | 3152 | || old->state == MODULE_STATE_UNFORMED) { |
| 3158 | /* Wait in case it fails to load. */ | 3153 | /* Wait in case it fails to load. */ |
| @@ -3198,6 +3193,17 @@ out: | |||
| 3198 | return err; | 3193 | return err; |
| 3199 | } | 3194 | } |
| 3200 | 3195 | ||
| 3196 | static int unknown_module_param_cb(char *param, char *val, const char *modname) | ||
| 3197 | { | ||
| 3198 | /* Check for magic 'dyndbg' arg */ | ||
| 3199 | int ret = ddebug_dyndbg_module_param_cb(param, val, modname); | ||
| 3200 | if (ret != 0) { | ||
| 3201 | printk(KERN_WARNING "%s: unknown parameter '%s' ignored\n", | ||
| 3202 | modname, param); | ||
| 3203 | } | ||
| 3204 | return 0; | ||
| 3205 | } | ||
| 3206 | |||
| 3201 | /* Allocate and load the module: note that size of section 0 is always | 3207 | /* Allocate and load the module: note that size of section 0 is always |
| 3202 | zero, and we rely on this for optional sections. */ | 3208 | zero, and we rely on this for optional sections. */ |
| 3203 | static int load_module(struct load_info *info, const char __user *uargs, | 3209 | static int load_module(struct load_info *info, const char __user *uargs, |
| @@ -3237,6 +3243,11 @@ static int load_module(struct load_info *info, const char __user *uargs, | |||
| 3237 | } | 3243 | } |
| 3238 | #endif | 3244 | #endif |
| 3239 | 3245 | ||
| 3246 | /* To avoid stressing percpu allocator, do this once we're unique. */ | ||
| 3247 | err = percpu_modalloc(mod, info); | ||
| 3248 | if (err) | ||
| 3249 | goto unlink_mod; | ||
| 3250 | |||
| 3240 | /* Now module is in final location, initialize linked lists, etc. */ | 3251 | /* Now module is in final location, initialize linked lists, etc. */ |
| 3241 | err = module_unload_init(mod); | 3252 | err = module_unload_init(mod); |
| 3242 | if (err) | 3253 | if (err) |
| @@ -3284,7 +3295,7 @@ static int load_module(struct load_info *info, const char __user *uargs, | |||
| 3284 | 3295 | ||
| 3285 | /* Module is ready to execute: parsing args may do that. */ | 3296 | /* Module is ready to execute: parsing args may do that. */ |
| 3286 | err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, | 3297 | err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, |
| 3287 | -32768, 32767, &ddebug_dyndbg_module_param_cb); | 3298 | -32768, 32767, unknown_module_param_cb); |
| 3288 | if (err < 0) | 3299 | if (err < 0) |
| 3289 | goto bug_cleanup; | 3300 | goto bug_cleanup; |
| 3290 | 3301 | ||
| @@ -3563,10 +3574,8 @@ unsigned long module_kallsyms_lookup_name(const char *name) | |||
| 3563 | /* Don't lock: we're in enough trouble already. */ | 3574 | /* Don't lock: we're in enough trouble already. */ |
| 3564 | preempt_disable(); | 3575 | preempt_disable(); |
| 3565 | if ((colon = strchr(name, ':')) != NULL) { | 3576 | if ((colon = strchr(name, ':')) != NULL) { |
| 3566 | *colon = '\0'; | 3577 | if ((mod = find_module_all(name, colon - name, false)) != NULL) |
| 3567 | if ((mod = find_module(name)) != NULL) | ||
| 3568 | ret = mod_find_symname(mod, colon+1); | 3578 | ret = mod_find_symname(mod, colon+1); |
| 3569 | *colon = ':'; | ||
| 3570 | } else { | 3579 | } else { |
| 3571 | list_for_each_entry_rcu(mod, &modules, list) { | 3580 | list_for_each_entry_rcu(mod, &modules, list) { |
| 3572 | if (mod->state == MODULE_STATE_UNFORMED) | 3581 | if (mod->state == MODULE_STATE_UNFORMED) |
diff --git a/kernel/mutex.c b/kernel/mutex.c index e581ada5faf4..ff05f4bd86eb 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | * Also see Documentation/mutex-design.txt. | 18 | * Also see Documentation/mutex-design.txt. |
| 19 | */ | 19 | */ |
| 20 | #include <linux/mutex.h> | 20 | #include <linux/mutex.h> |
| 21 | #include <linux/ww_mutex.h> | ||
| 21 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
| 22 | #include <linux/sched/rt.h> | 23 | #include <linux/sched/rt.h> |
| 23 | #include <linux/export.h> | 24 | #include <linux/export.h> |
diff --git a/kernel/panic.c b/kernel/panic.c index 167ec097ce8b..801864600514 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include <linux/notifier.h> | 15 | #include <linux/notifier.h> |
| 16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
| 17 | #include <linux/random.h> | 17 | #include <linux/random.h> |
| 18 | #include <linux/ftrace.h> | ||
| 18 | #include <linux/reboot.h> | 19 | #include <linux/reboot.h> |
| 19 | #include <linux/delay.h> | 20 | #include <linux/delay.h> |
| 20 | #include <linux/kexec.h> | 21 | #include <linux/kexec.h> |
| @@ -399,8 +400,11 @@ struct slowpath_args { | |||
| 399 | static void warn_slowpath_common(const char *file, int line, void *caller, | 400 | static void warn_slowpath_common(const char *file, int line, void *caller, |
| 400 | unsigned taint, struct slowpath_args *args) | 401 | unsigned taint, struct slowpath_args *args) |
| 401 | { | 402 | { |
| 402 | printk(KERN_WARNING "------------[ cut here ]------------\n"); | 403 | disable_trace_on_warning(); |
| 403 | printk(KERN_WARNING "WARNING: at %s:%d %pS()\n", file, line, caller); | 404 | |
| 405 | pr_warn("------------[ cut here ]------------\n"); | ||
| 406 | pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS()\n", | ||
| 407 | raw_smp_processor_id(), current->pid, file, line, caller); | ||
| 404 | 408 | ||
| 405 | if (args) | 409 | if (args) |
| 406 | vprintk(args->fmt, args->args); | 410 | vprintk(args->fmt, args->args); |
diff --git a/kernel/params.c b/kernel/params.c index 53b958fcd639..440e65d1a544 100644 --- a/kernel/params.c +++ b/kernel/params.c | |||
| @@ -787,7 +787,7 @@ static void __init kernel_add_sysfs_param(const char *name, | |||
| 787 | } | 787 | } |
| 788 | 788 | ||
| 789 | /* | 789 | /* |
| 790 | * param_sysfs_builtin - add contents in /sys/parameters for built-in modules | 790 | * param_sysfs_builtin - add sysfs parameters for built-in modules |
| 791 | * | 791 | * |
| 792 | * Add module_parameters to sysfs for "modules" built into the kernel. | 792 | * Add module_parameters to sysfs for "modules" built into the kernel. |
| 793 | * | 793 | * |
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 42670e9b44e0..c7f31aa272f7 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
| @@ -51,59 +51,28 @@ static int check_clock(const clockid_t which_clock) | |||
| 51 | return error; | 51 | return error; |
| 52 | } | 52 | } |
| 53 | 53 | ||
| 54 | static inline union cpu_time_count | 54 | static inline unsigned long long |
| 55 | timespec_to_sample(const clockid_t which_clock, const struct timespec *tp) | 55 | timespec_to_sample(const clockid_t which_clock, const struct timespec *tp) |
| 56 | { | 56 | { |
| 57 | union cpu_time_count ret; | 57 | unsigned long long ret; |
| 58 | ret.sched = 0; /* high half always zero when .cpu used */ | 58 | |
| 59 | ret = 0; /* high half always zero when .cpu used */ | ||
| 59 | if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { | 60 | if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { |
| 60 | ret.sched = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec; | 61 | ret = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec; |
| 61 | } else { | 62 | } else { |
| 62 | ret.cpu = timespec_to_cputime(tp); | 63 | ret = cputime_to_expires(timespec_to_cputime(tp)); |
| 63 | } | 64 | } |
| 64 | return ret; | 65 | return ret; |
| 65 | } | 66 | } |
| 66 | 67 | ||
| 67 | static void sample_to_timespec(const clockid_t which_clock, | 68 | static void sample_to_timespec(const clockid_t which_clock, |
| 68 | union cpu_time_count cpu, | 69 | unsigned long long expires, |
| 69 | struct timespec *tp) | 70 | struct timespec *tp) |
| 70 | { | 71 | { |
| 71 | if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) | 72 | if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) |
| 72 | *tp = ns_to_timespec(cpu.sched); | 73 | *tp = ns_to_timespec(expires); |
| 73 | else | 74 | else |
| 74 | cputime_to_timespec(cpu.cpu, tp); | 75 | cputime_to_timespec((__force cputime_t)expires, tp); |
| 75 | } | ||
| 76 | |||
| 77 | static inline int cpu_time_before(const clockid_t which_clock, | ||
| 78 | union cpu_time_count now, | ||
| 79 | union cpu_time_count then) | ||
| 80 | { | ||
| 81 | if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { | ||
| 82 | return now.sched < then.sched; | ||
| 83 | } else { | ||
| 84 | return now.cpu < then.cpu; | ||
| 85 | } | ||
| 86 | } | ||
| 87 | static inline void cpu_time_add(const clockid_t which_clock, | ||
| 88 | union cpu_time_count *acc, | ||
| 89 | union cpu_time_count val) | ||
| 90 | { | ||
| 91 | if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { | ||
| 92 | acc->sched += val.sched; | ||
| 93 | } else { | ||
| 94 | acc->cpu += val.cpu; | ||
| 95 | } | ||
| 96 | } | ||
| 97 | static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock, | ||
| 98 | union cpu_time_count a, | ||
| 99 | union cpu_time_count b) | ||
| 100 | { | ||
| 101 | if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { | ||
| 102 | a.sched -= b.sched; | ||
| 103 | } else { | ||
| 104 | a.cpu -= b.cpu; | ||
| 105 | } | ||
| 106 | return a; | ||
| 107 | } | 76 | } |
| 108 | 77 | ||
| 109 | /* | 78 | /* |
| @@ -111,47 +80,31 @@ static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock, | |||
| 111 | * given the current clock sample. | 80 | * given the current clock sample. |
| 112 | */ | 81 | */ |
| 113 | static void bump_cpu_timer(struct k_itimer *timer, | 82 | static void bump_cpu_timer(struct k_itimer *timer, |
| 114 | union cpu_time_count now) | 83 | unsigned long long now) |
| 115 | { | 84 | { |
| 116 | int i; | 85 | int i; |
| 86 | unsigned long long delta, incr; | ||
| 117 | 87 | ||
| 118 | if (timer->it.cpu.incr.sched == 0) | 88 | if (timer->it.cpu.incr == 0) |
| 119 | return; | 89 | return; |
| 120 | 90 | ||
| 121 | if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) { | 91 | if (now < timer->it.cpu.expires) |
| 122 | unsigned long long delta, incr; | 92 | return; |
| 123 | 93 | ||
| 124 | if (now.sched < timer->it.cpu.expires.sched) | 94 | incr = timer->it.cpu.incr; |
| 125 | return; | 95 | delta = now + incr - timer->it.cpu.expires; |
| 126 | incr = timer->it.cpu.incr.sched; | ||
| 127 | delta = now.sched + incr - timer->it.cpu.expires.sched; | ||
| 128 | /* Don't use (incr*2 < delta), incr*2 might overflow. */ | ||
| 129 | for (i = 0; incr < delta - incr; i++) | ||
| 130 | incr = incr << 1; | ||
| 131 | for (; i >= 0; incr >>= 1, i--) { | ||
| 132 | if (delta < incr) | ||
| 133 | continue; | ||
| 134 | timer->it.cpu.expires.sched += incr; | ||
| 135 | timer->it_overrun += 1 << i; | ||
| 136 | delta -= incr; | ||
| 137 | } | ||
| 138 | } else { | ||
| 139 | cputime_t delta, incr; | ||
| 140 | 96 | ||
| 141 | if (now.cpu < timer->it.cpu.expires.cpu) | 97 | /* Don't use (incr*2 < delta), incr*2 might overflow. */ |
| 142 | return; | 98 | for (i = 0; incr < delta - incr; i++) |
| 143 | incr = timer->it.cpu.incr.cpu; | 99 | incr = incr << 1; |
| 144 | delta = now.cpu + incr - timer->it.cpu.expires.cpu; | 100 | |
| 145 | /* Don't use (incr*2 < delta), incr*2 might overflow. */ | 101 | for (; i >= 0; incr >>= 1, i--) { |
| 146 | for (i = 0; incr < delta - incr; i++) | 102 | if (delta < incr) |
| 147 | incr += incr; | 103 | continue; |
| 148 | for (; i >= 0; incr = incr >> 1, i--) { | 104 | |
| 149 | if (delta < incr) | 105 | timer->it.cpu.expires += incr; |
| 150 | continue; | 106 | timer->it_overrun += 1 << i; |
| 151 | timer->it.cpu.expires.cpu += incr; | 107 | delta -= incr; |
| 152 | timer->it_overrun += 1 << i; | ||
| 153 | delta -= incr; | ||
| 154 | } | ||
| 155 | } | 108 | } |
| 156 | } | 109 | } |
| 157 | 110 | ||
| @@ -170,21 +123,21 @@ static inline int task_cputime_zero(const struct task_cputime *cputime) | |||
| 170 | return 0; | 123 | return 0; |
| 171 | } | 124 | } |
| 172 | 125 | ||
| 173 | static inline cputime_t prof_ticks(struct task_struct *p) | 126 | static inline unsigned long long prof_ticks(struct task_struct *p) |
| 174 | { | 127 | { |
| 175 | cputime_t utime, stime; | 128 | cputime_t utime, stime; |
| 176 | 129 | ||
| 177 | task_cputime(p, &utime, &stime); | 130 | task_cputime(p, &utime, &stime); |
| 178 | 131 | ||
| 179 | return utime + stime; | 132 | return cputime_to_expires(utime + stime); |
| 180 | } | 133 | } |
| 181 | static inline cputime_t virt_ticks(struct task_struct *p) | 134 | static inline unsigned long long virt_ticks(struct task_struct *p) |
| 182 | { | 135 | { |
| 183 | cputime_t utime; | 136 | cputime_t utime; |
| 184 | 137 | ||
| 185 | task_cputime(p, &utime, NULL); | 138 | task_cputime(p, &utime, NULL); |
| 186 | 139 | ||
| 187 | return utime; | 140 | return cputime_to_expires(utime); |
| 188 | } | 141 | } |
| 189 | 142 | ||
| 190 | static int | 143 | static int |
| @@ -225,19 +178,19 @@ posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp) | |||
| 225 | * Sample a per-thread clock for the given task. | 178 | * Sample a per-thread clock for the given task. |
| 226 | */ | 179 | */ |
| 227 | static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, | 180 | static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, |
| 228 | union cpu_time_count *cpu) | 181 | unsigned long long *sample) |
| 229 | { | 182 | { |
| 230 | switch (CPUCLOCK_WHICH(which_clock)) { | 183 | switch (CPUCLOCK_WHICH(which_clock)) { |
| 231 | default: | 184 | default: |
| 232 | return -EINVAL; | 185 | return -EINVAL; |
| 233 | case CPUCLOCK_PROF: | 186 | case CPUCLOCK_PROF: |
| 234 | cpu->cpu = prof_ticks(p); | 187 | *sample = prof_ticks(p); |
| 235 | break; | 188 | break; |
| 236 | case CPUCLOCK_VIRT: | 189 | case CPUCLOCK_VIRT: |
| 237 | cpu->cpu = virt_ticks(p); | 190 | *sample = virt_ticks(p); |
| 238 | break; | 191 | break; |
| 239 | case CPUCLOCK_SCHED: | 192 | case CPUCLOCK_SCHED: |
| 240 | cpu->sched = task_sched_runtime(p); | 193 | *sample = task_sched_runtime(p); |
| 241 | break; | 194 | break; |
| 242 | } | 195 | } |
| 243 | return 0; | 196 | return 0; |
| @@ -284,7 +237,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) | |||
| 284 | */ | 237 | */ |
| 285 | static int cpu_clock_sample_group(const clockid_t which_clock, | 238 | static int cpu_clock_sample_group(const clockid_t which_clock, |
| 286 | struct task_struct *p, | 239 | struct task_struct *p, |
| 287 | union cpu_time_count *cpu) | 240 | unsigned long long *sample) |
| 288 | { | 241 | { |
| 289 | struct task_cputime cputime; | 242 | struct task_cputime cputime; |
| 290 | 243 | ||
| @@ -293,15 +246,15 @@ static int cpu_clock_sample_group(const clockid_t which_clock, | |||
| 293 | return -EINVAL; | 246 | return -EINVAL; |
| 294 | case CPUCLOCK_PROF: | 247 | case CPUCLOCK_PROF: |
| 295 | thread_group_cputime(p, &cputime); | 248 | thread_group_cputime(p, &cputime); |
| 296 | cpu->cpu = cputime.utime + cputime.stime; | 249 | *sample = cputime_to_expires(cputime.utime + cputime.stime); |
| 297 | break; | 250 | break; |
| 298 | case CPUCLOCK_VIRT: | 251 | case CPUCLOCK_VIRT: |
| 299 | thread_group_cputime(p, &cputime); | 252 | thread_group_cputime(p, &cputime); |
| 300 | cpu->cpu = cputime.utime; | 253 | *sample = cputime_to_expires(cputime.utime); |
| 301 | break; | 254 | break; |
| 302 | case CPUCLOCK_SCHED: | 255 | case CPUCLOCK_SCHED: |
| 303 | thread_group_cputime(p, &cputime); | 256 | thread_group_cputime(p, &cputime); |
| 304 | cpu->sched = cputime.sum_exec_runtime; | 257 | *sample = cputime.sum_exec_runtime; |
| 305 | break; | 258 | break; |
| 306 | } | 259 | } |
| 307 | return 0; | 260 | return 0; |
| @@ -312,7 +265,7 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) | |||
| 312 | { | 265 | { |
| 313 | const pid_t pid = CPUCLOCK_PID(which_clock); | 266 | const pid_t pid = CPUCLOCK_PID(which_clock); |
| 314 | int error = -EINVAL; | 267 | int error = -EINVAL; |
| 315 | union cpu_time_count rtn; | 268 | unsigned long long rtn; |
| 316 | 269 | ||
| 317 | if (pid == 0) { | 270 | if (pid == 0) { |
| 318 | /* | 271 | /* |
| @@ -446,6 +399,15 @@ static int posix_cpu_timer_del(struct k_itimer *timer) | |||
| 446 | return ret; | 399 | return ret; |
| 447 | } | 400 | } |
| 448 | 401 | ||
| 402 | static void cleanup_timers_list(struct list_head *head, | ||
| 403 | unsigned long long curr) | ||
| 404 | { | ||
| 405 | struct cpu_timer_list *timer, *next; | ||
| 406 | |||
| 407 | list_for_each_entry_safe(timer, next, head, entry) | ||
| 408 | list_del_init(&timer->entry); | ||
| 409 | } | ||
| 410 | |||
| 449 | /* | 411 | /* |
| 450 | * Clean out CPU timers still ticking when a thread exited. The task | 412 | * Clean out CPU timers still ticking when a thread exited. The task |
| 451 | * pointer is cleared, and the expiry time is replaced with the residual | 413 | * pointer is cleared, and the expiry time is replaced with the residual |
| @@ -456,37 +418,12 @@ static void cleanup_timers(struct list_head *head, | |||
| 456 | cputime_t utime, cputime_t stime, | 418 | cputime_t utime, cputime_t stime, |
| 457 | unsigned long long sum_exec_runtime) | 419 | unsigned long long sum_exec_runtime) |
| 458 | { | 420 | { |
| 459 | struct cpu_timer_list *timer, *next; | ||
| 460 | cputime_t ptime = utime + stime; | ||
| 461 | |||
| 462 | list_for_each_entry_safe(timer, next, head, entry) { | ||
| 463 | list_del_init(&timer->entry); | ||
| 464 | if (timer->expires.cpu < ptime) { | ||
| 465 | timer->expires.cpu = 0; | ||
| 466 | } else { | ||
| 467 | timer->expires.cpu -= ptime; | ||
| 468 | } | ||
| 469 | } | ||
| 470 | 421 | ||
| 471 | ++head; | 422 | cputime_t ptime = utime + stime; |
| 472 | list_for_each_entry_safe(timer, next, head, entry) { | ||
| 473 | list_del_init(&timer->entry); | ||
| 474 | if (timer->expires.cpu < utime) { | ||
| 475 | timer->expires.cpu = 0; | ||
| 476 | } else { | ||
| 477 | timer->expires.cpu -= utime; | ||
| 478 | } | ||
| 479 | } | ||
| 480 | 423 | ||
| 481 | ++head; | 424 | cleanup_timers_list(head, cputime_to_expires(ptime)); |
| 482 | list_for_each_entry_safe(timer, next, head, entry) { | 425 | cleanup_timers_list(++head, cputime_to_expires(utime)); |
| 483 | list_del_init(&timer->entry); | 426 | cleanup_timers_list(++head, sum_exec_runtime); |
| 484 | if (timer->expires.sched < sum_exec_runtime) { | ||
| 485 | timer->expires.sched = 0; | ||
| 486 | } else { | ||
| 487 | timer->expires.sched -= sum_exec_runtime; | ||
| 488 | } | ||
| 489 | } | ||
| 490 | } | 427 | } |
| 491 | 428 | ||
| 492 | /* | 429 | /* |
| @@ -516,17 +453,21 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk) | |||
| 516 | tsk->se.sum_exec_runtime + sig->sum_sched_runtime); | 453 | tsk->se.sum_exec_runtime + sig->sum_sched_runtime); |
| 517 | } | 454 | } |
| 518 | 455 | ||
| 519 | static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now) | 456 | static void clear_dead_task(struct k_itimer *itimer, unsigned long long now) |
| 520 | { | 457 | { |
| 458 | struct cpu_timer_list *timer = &itimer->it.cpu; | ||
| 459 | |||
| 521 | /* | 460 | /* |
| 522 | * That's all for this thread or process. | 461 | * That's all for this thread or process. |
| 523 | * We leave our residual in expires to be reported. | 462 | * We leave our residual in expires to be reported. |
| 524 | */ | 463 | */ |
| 525 | put_task_struct(timer->it.cpu.task); | 464 | put_task_struct(timer->task); |
| 526 | timer->it.cpu.task = NULL; | 465 | timer->task = NULL; |
| 527 | timer->it.cpu.expires = cpu_time_sub(timer->it_clock, | 466 | if (timer->expires < now) { |
| 528 | timer->it.cpu.expires, | 467 | timer->expires = 0; |
| 529 | now); | 468 | } else { |
| 469 | timer->expires -= now; | ||
| 470 | } | ||
| 530 | } | 471 | } |
| 531 | 472 | ||
| 532 | static inline int expires_gt(cputime_t expires, cputime_t new_exp) | 473 | static inline int expires_gt(cputime_t expires, cputime_t new_exp) |
| @@ -558,14 +499,14 @@ static void arm_timer(struct k_itimer *timer) | |||
| 558 | 499 | ||
| 559 | listpos = head; | 500 | listpos = head; |
| 560 | list_for_each_entry(next, head, entry) { | 501 | list_for_each_entry(next, head, entry) { |
| 561 | if (cpu_time_before(timer->it_clock, nt->expires, next->expires)) | 502 | if (nt->expires < next->expires) |
| 562 | break; | 503 | break; |
| 563 | listpos = &next->entry; | 504 | listpos = &next->entry; |
| 564 | } | 505 | } |
| 565 | list_add(&nt->entry, listpos); | 506 | list_add(&nt->entry, listpos); |
| 566 | 507 | ||
| 567 | if (listpos == head) { | 508 | if (listpos == head) { |
| 568 | union cpu_time_count *exp = &nt->expires; | 509 | unsigned long long exp = nt->expires; |
| 569 | 510 | ||
| 570 | /* | 511 | /* |
| 571 | * We are the new earliest-expiring POSIX 1.b timer, hence | 512 | * We are the new earliest-expiring POSIX 1.b timer, hence |
| @@ -576,17 +517,17 @@ static void arm_timer(struct k_itimer *timer) | |||
| 576 | 517 | ||
| 577 | switch (CPUCLOCK_WHICH(timer->it_clock)) { | 518 | switch (CPUCLOCK_WHICH(timer->it_clock)) { |
| 578 | case CPUCLOCK_PROF: | 519 | case CPUCLOCK_PROF: |
| 579 | if (expires_gt(cputime_expires->prof_exp, exp->cpu)) | 520 | if (expires_gt(cputime_expires->prof_exp, expires_to_cputime(exp))) |
| 580 | cputime_expires->prof_exp = exp->cpu; | 521 | cputime_expires->prof_exp = expires_to_cputime(exp); |
| 581 | break; | 522 | break; |
| 582 | case CPUCLOCK_VIRT: | 523 | case CPUCLOCK_VIRT: |
| 583 | if (expires_gt(cputime_expires->virt_exp, exp->cpu)) | 524 | if (expires_gt(cputime_expires->virt_exp, expires_to_cputime(exp))) |
| 584 | cputime_expires->virt_exp = exp->cpu; | 525 | cputime_expires->virt_exp = expires_to_cputime(exp); |
| 585 | break; | 526 | break; |
| 586 | case CPUCLOCK_SCHED: | 527 | case CPUCLOCK_SCHED: |
| 587 | if (cputime_expires->sched_exp == 0 || | 528 | if (cputime_expires->sched_exp == 0 || |
| 588 | cputime_expires->sched_exp > exp->sched) | 529 | cputime_expires->sched_exp > exp) |
| 589 | cputime_expires->sched_exp = exp->sched; | 530 | cputime_expires->sched_exp = exp; |
| 590 | break; | 531 | break; |
| 591 | } | 532 | } |
| 592 | } | 533 | } |
| @@ -601,20 +542,20 @@ static void cpu_timer_fire(struct k_itimer *timer) | |||
| 601 | /* | 542 | /* |
| 602 | * User don't want any signal. | 543 | * User don't want any signal. |
| 603 | */ | 544 | */ |
| 604 | timer->it.cpu.expires.sched = 0; | 545 | timer->it.cpu.expires = 0; |
| 605 | } else if (unlikely(timer->sigq == NULL)) { | 546 | } else if (unlikely(timer->sigq == NULL)) { |
| 606 | /* | 547 | /* |
| 607 | * This a special case for clock_nanosleep, | 548 | * This a special case for clock_nanosleep, |
| 608 | * not a normal timer from sys_timer_create. | 549 | * not a normal timer from sys_timer_create. |
| 609 | */ | 550 | */ |
| 610 | wake_up_process(timer->it_process); | 551 | wake_up_process(timer->it_process); |
| 611 | timer->it.cpu.expires.sched = 0; | 552 | timer->it.cpu.expires = 0; |
| 612 | } else if (timer->it.cpu.incr.sched == 0) { | 553 | } else if (timer->it.cpu.incr == 0) { |
| 613 | /* | 554 | /* |
| 614 | * One-shot timer. Clear it as soon as it's fired. | 555 | * One-shot timer. Clear it as soon as it's fired. |
| 615 | */ | 556 | */ |
| 616 | posix_timer_event(timer, 0); | 557 | posix_timer_event(timer, 0); |
| 617 | timer->it.cpu.expires.sched = 0; | 558 | timer->it.cpu.expires = 0; |
| 618 | } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) { | 559 | } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) { |
| 619 | /* | 560 | /* |
| 620 | * The signal did not get queued because the signal | 561 | * The signal did not get queued because the signal |
| @@ -632,7 +573,7 @@ static void cpu_timer_fire(struct k_itimer *timer) | |||
| 632 | */ | 573 | */ |
| 633 | static int cpu_timer_sample_group(const clockid_t which_clock, | 574 | static int cpu_timer_sample_group(const clockid_t which_clock, |
| 634 | struct task_struct *p, | 575 | struct task_struct *p, |
| 635 | union cpu_time_count *cpu) | 576 | unsigned long long *sample) |
| 636 | { | 577 | { |
| 637 | struct task_cputime cputime; | 578 | struct task_cputime cputime; |
| 638 | 579 | ||
| @@ -641,13 +582,13 @@ static int cpu_timer_sample_group(const clockid_t which_clock, | |||
| 641 | default: | 582 | default: |
| 642 | return -EINVAL; | 583 | return -EINVAL; |
| 643 | case CPUCLOCK_PROF: | 584 | case CPUCLOCK_PROF: |
| 644 | cpu->cpu = cputime.utime + cputime.stime; | 585 | *sample = cputime_to_expires(cputime.utime + cputime.stime); |
| 645 | break; | 586 | break; |
| 646 | case CPUCLOCK_VIRT: | 587 | case CPUCLOCK_VIRT: |
| 647 | cpu->cpu = cputime.utime; | 588 | *sample = cputime_to_expires(cputime.utime); |
| 648 | break; | 589 | break; |
| 649 | case CPUCLOCK_SCHED: | 590 | case CPUCLOCK_SCHED: |
| 650 | cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); | 591 | *sample = cputime.sum_exec_runtime + task_delta_exec(p); |
| 651 | break; | 592 | break; |
| 652 | } | 593 | } |
| 653 | return 0; | 594 | return 0; |
| @@ -694,7 +635,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
| 694 | struct itimerspec *new, struct itimerspec *old) | 635 | struct itimerspec *new, struct itimerspec *old) |
| 695 | { | 636 | { |
| 696 | struct task_struct *p = timer->it.cpu.task; | 637 | struct task_struct *p = timer->it.cpu.task; |
| 697 | union cpu_time_count old_expires, new_expires, old_incr, val; | 638 | unsigned long long old_expires, new_expires, old_incr, val; |
| 698 | int ret; | 639 | int ret; |
| 699 | 640 | ||
| 700 | if (unlikely(p == NULL)) { | 641 | if (unlikely(p == NULL)) { |
| @@ -749,7 +690,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
| 749 | } | 690 | } |
| 750 | 691 | ||
| 751 | if (old) { | 692 | if (old) { |
| 752 | if (old_expires.sched == 0) { | 693 | if (old_expires == 0) { |
| 753 | old->it_value.tv_sec = 0; | 694 | old->it_value.tv_sec = 0; |
| 754 | old->it_value.tv_nsec = 0; | 695 | old->it_value.tv_nsec = 0; |
| 755 | } else { | 696 | } else { |
| @@ -764,11 +705,8 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
| 764 | * new setting. | 705 | * new setting. |
| 765 | */ | 706 | */ |
| 766 | bump_cpu_timer(timer, val); | 707 | bump_cpu_timer(timer, val); |
| 767 | if (cpu_time_before(timer->it_clock, val, | 708 | if (val < timer->it.cpu.expires) { |
| 768 | timer->it.cpu.expires)) { | 709 | old_expires = timer->it.cpu.expires - val; |
| 769 | old_expires = cpu_time_sub( | ||
| 770 | timer->it_clock, | ||
| 771 | timer->it.cpu.expires, val); | ||
| 772 | sample_to_timespec(timer->it_clock, | 710 | sample_to_timespec(timer->it_clock, |
| 773 | old_expires, | 711 | old_expires, |
| 774 | &old->it_value); | 712 | &old->it_value); |
| @@ -791,8 +729,8 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
| 791 | goto out; | 729 | goto out; |
| 792 | } | 730 | } |
| 793 | 731 | ||
| 794 | if (new_expires.sched != 0 && !(flags & TIMER_ABSTIME)) { | 732 | if (new_expires != 0 && !(flags & TIMER_ABSTIME)) { |
| 795 | cpu_time_add(timer->it_clock, &new_expires, val); | 733 | new_expires += val; |
| 796 | } | 734 | } |
| 797 | 735 | ||
| 798 | /* | 736 | /* |
| @@ -801,8 +739,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
| 801 | * arm the timer (we'll just fake it for timer_gettime). | 739 | * arm the timer (we'll just fake it for timer_gettime). |
| 802 | */ | 740 | */ |
| 803 | timer->it.cpu.expires = new_expires; | 741 | timer->it.cpu.expires = new_expires; |
| 804 | if (new_expires.sched != 0 && | 742 | if (new_expires != 0 && val < new_expires) { |
| 805 | cpu_time_before(timer->it_clock, val, new_expires)) { | ||
| 806 | arm_timer(timer); | 743 | arm_timer(timer); |
| 807 | } | 744 | } |
| 808 | 745 | ||
| @@ -826,8 +763,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
| 826 | timer->it_overrun_last = 0; | 763 | timer->it_overrun_last = 0; |
| 827 | timer->it_overrun = -1; | 764 | timer->it_overrun = -1; |
| 828 | 765 | ||
| 829 | if (new_expires.sched != 0 && | 766 | if (new_expires != 0 && !(val < new_expires)) { |
| 830 | !cpu_time_before(timer->it_clock, val, new_expires)) { | ||
| 831 | /* | 767 | /* |
| 832 | * The designated time already passed, so we notify | 768 | * The designated time already passed, so we notify |
| 833 | * immediately, even if the thread never runs to | 769 | * immediately, even if the thread never runs to |
| @@ -849,7 +785,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
| 849 | 785 | ||
| 850 | static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) | 786 | static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) |
| 851 | { | 787 | { |
| 852 | union cpu_time_count now; | 788 | unsigned long long now; |
| 853 | struct task_struct *p = timer->it.cpu.task; | 789 | struct task_struct *p = timer->it.cpu.task; |
| 854 | int clear_dead; | 790 | int clear_dead; |
| 855 | 791 | ||
| @@ -859,7 +795,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) | |||
| 859 | sample_to_timespec(timer->it_clock, | 795 | sample_to_timespec(timer->it_clock, |
| 860 | timer->it.cpu.incr, &itp->it_interval); | 796 | timer->it.cpu.incr, &itp->it_interval); |
| 861 | 797 | ||
| 862 | if (timer->it.cpu.expires.sched == 0) { /* Timer not armed at all. */ | 798 | if (timer->it.cpu.expires == 0) { /* Timer not armed at all. */ |
| 863 | itp->it_value.tv_sec = itp->it_value.tv_nsec = 0; | 799 | itp->it_value.tv_sec = itp->it_value.tv_nsec = 0; |
| 864 | return; | 800 | return; |
| 865 | } | 801 | } |
| @@ -891,7 +827,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) | |||
| 891 | */ | 827 | */ |
| 892 | put_task_struct(p); | 828 | put_task_struct(p); |
| 893 | timer->it.cpu.task = NULL; | 829 | timer->it.cpu.task = NULL; |
| 894 | timer->it.cpu.expires.sched = 0; | 830 | timer->it.cpu.expires = 0; |
| 895 | read_unlock(&tasklist_lock); | 831 | read_unlock(&tasklist_lock); |
| 896 | goto dead; | 832 | goto dead; |
| 897 | } else { | 833 | } else { |
| @@ -912,10 +848,9 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) | |||
| 912 | goto dead; | 848 | goto dead; |
| 913 | } | 849 | } |
| 914 | 850 | ||
| 915 | if (cpu_time_before(timer->it_clock, now, timer->it.cpu.expires)) { | 851 | if (now < timer->it.cpu.expires) { |
| 916 | sample_to_timespec(timer->it_clock, | 852 | sample_to_timespec(timer->it_clock, |
| 917 | cpu_time_sub(timer->it_clock, | 853 | timer->it.cpu.expires - now, |
| 918 | timer->it.cpu.expires, now), | ||
| 919 | &itp->it_value); | 854 | &itp->it_value); |
| 920 | } else { | 855 | } else { |
| 921 | /* | 856 | /* |
| @@ -927,6 +862,28 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) | |||
| 927 | } | 862 | } |
| 928 | } | 863 | } |
| 929 | 864 | ||
| 865 | static unsigned long long | ||
| 866 | check_timers_list(struct list_head *timers, | ||
| 867 | struct list_head *firing, | ||
| 868 | unsigned long long curr) | ||
| 869 | { | ||
| 870 | int maxfire = 20; | ||
| 871 | |||
| 872 | while (!list_empty(timers)) { | ||
| 873 | struct cpu_timer_list *t; | ||
| 874 | |||
| 875 | t = list_first_entry(timers, struct cpu_timer_list, entry); | ||
| 876 | |||
| 877 | if (!--maxfire || curr < t->expires) | ||
| 878 | return t->expires; | ||
| 879 | |||
| 880 | t->firing = 1; | ||
| 881 | list_move_tail(&t->entry, firing); | ||
| 882 | } | ||
| 883 | |||
| 884 | return 0; | ||
| 885 | } | ||
| 886 | |||
| 930 | /* | 887 | /* |
| 931 | * Check for any per-thread CPU timers that have fired and move them off | 888 | * Check for any per-thread CPU timers that have fired and move them off |
| 932 | * the tsk->cpu_timers[N] list onto the firing list. Here we update the | 889 | * the tsk->cpu_timers[N] list onto the firing list. Here we update the |
| @@ -935,54 +892,20 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) | |||
| 935 | static void check_thread_timers(struct task_struct *tsk, | 892 | static void check_thread_timers(struct task_struct *tsk, |
| 936 | struct list_head *firing) | 893 | struct list_head *firing) |
| 937 | { | 894 | { |
| 938 | int maxfire; | ||
| 939 | struct list_head *timers = tsk->cpu_timers; | 895 | struct list_head *timers = tsk->cpu_timers; |
| 940 | struct signal_struct *const sig = tsk->signal; | 896 | struct signal_struct *const sig = tsk->signal; |
| 897 | struct task_cputime *tsk_expires = &tsk->cputime_expires; | ||
| 898 | unsigned long long expires; | ||
| 941 | unsigned long soft; | 899 | unsigned long soft; |
| 942 | 900 | ||
| 943 | maxfire = 20; | 901 | expires = check_timers_list(timers, firing, prof_ticks(tsk)); |
| 944 | tsk->cputime_expires.prof_exp = 0; | 902 | tsk_expires->prof_exp = expires_to_cputime(expires); |
| 945 | while (!list_empty(timers)) { | ||
| 946 | struct cpu_timer_list *t = list_first_entry(timers, | ||
| 947 | struct cpu_timer_list, | ||
| 948 | entry); | ||
| 949 | if (!--maxfire || prof_ticks(tsk) < t->expires.cpu) { | ||
| 950 | tsk->cputime_expires.prof_exp = t->expires.cpu; | ||
| 951 | break; | ||
| 952 | } | ||
| 953 | t->firing = 1; | ||
| 954 | list_move_tail(&t->entry, firing); | ||
| 955 | } | ||
| 956 | 903 | ||
| 957 | ++timers; | 904 | expires = check_timers_list(++timers, firing, virt_ticks(tsk)); |
| 958 | maxfire = 20; | 905 | tsk_expires->virt_exp = expires_to_cputime(expires); |
| 959 | tsk->cputime_expires.virt_exp = 0; | ||
| 960 | while (!list_empty(timers)) { | ||
| 961 | struct cpu_timer_list *t = list_first_entry(timers, | ||
| 962 | struct cpu_timer_list, | ||
| 963 | entry); | ||
| 964 | if (!--maxfire || virt_ticks(tsk) < t->expires.cpu) { | ||
| 965 | tsk->cputime_expires.virt_exp = t->expires.cpu; | ||
| 966 | break; | ||
| 967 | } | ||
| 968 | t->firing = 1; | ||
| 969 | list_move_tail(&t->entry, firing); | ||
| 970 | } | ||
| 971 | 906 | ||
| 972 | ++timers; | 907 | tsk_expires->sched_exp = check_timers_list(++timers, firing, |
| 973 | maxfire = 20; | 908 | tsk->se.sum_exec_runtime); |
| 974 | tsk->cputime_expires.sched_exp = 0; | ||
| 975 | while (!list_empty(timers)) { | ||
| 976 | struct cpu_timer_list *t = list_first_entry(timers, | ||
| 977 | struct cpu_timer_list, | ||
| 978 | entry); | ||
| 979 | if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) { | ||
| 980 | tsk->cputime_expires.sched_exp = t->expires.sched; | ||
| 981 | break; | ||
| 982 | } | ||
| 983 | t->firing = 1; | ||
| 984 | list_move_tail(&t->entry, firing); | ||
| 985 | } | ||
| 986 | 909 | ||
| 987 | /* | 910 | /* |
| 988 | * Check for the special case thread timers. | 911 | * Check for the special case thread timers. |
| @@ -1030,7 +953,8 @@ static void stop_process_timers(struct signal_struct *sig) | |||
| 1030 | static u32 onecputick; | 953 | static u32 onecputick; |
| 1031 | 954 | ||
| 1032 | static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, | 955 | static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, |
| 1033 | cputime_t *expires, cputime_t cur_time, int signo) | 956 | unsigned long long *expires, |
| 957 | unsigned long long cur_time, int signo) | ||
| 1034 | { | 958 | { |
| 1035 | if (!it->expires) | 959 | if (!it->expires) |
| 1036 | return; | 960 | return; |
| @@ -1066,9 +990,8 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, | |||
| 1066 | static void check_process_timers(struct task_struct *tsk, | 990 | static void check_process_timers(struct task_struct *tsk, |
| 1067 | struct list_head *firing) | 991 | struct list_head *firing) |
| 1068 | { | 992 | { |
| 1069 | int maxfire; | ||
| 1070 | struct signal_struct *const sig = tsk->signal; | 993 | struct signal_struct *const sig = tsk->signal; |
| 1071 | cputime_t utime, ptime, virt_expires, prof_expires; | 994 | unsigned long long utime, ptime, virt_expires, prof_expires; |
| 1072 | unsigned long long sum_sched_runtime, sched_expires; | 995 | unsigned long long sum_sched_runtime, sched_expires; |
| 1073 | struct list_head *timers = sig->cpu_timers; | 996 | struct list_head *timers = sig->cpu_timers; |
| 1074 | struct task_cputime cputime; | 997 | struct task_cputime cputime; |
| @@ -1078,52 +1001,13 @@ static void check_process_timers(struct task_struct *tsk, | |||
| 1078 | * Collect the current process totals. | 1001 | * Collect the current process totals. |
| 1079 | */ | 1002 | */ |
| 1080 | thread_group_cputimer(tsk, &cputime); | 1003 | thread_group_cputimer(tsk, &cputime); |
| 1081 | utime = cputime.utime; | 1004 | utime = cputime_to_expires(cputime.utime); |
| 1082 | ptime = utime + cputime.stime; | 1005 | ptime = utime + cputime_to_expires(cputime.stime); |
| 1083 | sum_sched_runtime = cputime.sum_exec_runtime; | 1006 | sum_sched_runtime = cputime.sum_exec_runtime; |
| 1084 | maxfire = 20; | ||
| 1085 | prof_expires = 0; | ||
| 1086 | while (!list_empty(timers)) { | ||
| 1087 | struct cpu_timer_list *tl = list_first_entry(timers, | ||
| 1088 | struct cpu_timer_list, | ||
| 1089 | entry); | ||
| 1090 | if (!--maxfire || ptime < tl->expires.cpu) { | ||
| 1091 | prof_expires = tl->expires.cpu; | ||
| 1092 | break; | ||
| 1093 | } | ||
| 1094 | tl->firing = 1; | ||
| 1095 | list_move_tail(&tl->entry, firing); | ||
| 1096 | } | ||
| 1097 | 1007 | ||
| 1098 | ++timers; | 1008 | prof_expires = check_timers_list(timers, firing, ptime); |
| 1099 | maxfire = 20; | 1009 | virt_expires = check_timers_list(++timers, firing, utime); |
| 1100 | virt_expires = 0; | 1010 | sched_expires = check_timers_list(++timers, firing, sum_sched_runtime); |
| 1101 | while (!list_empty(timers)) { | ||
| 1102 | struct cpu_timer_list *tl = list_first_entry(timers, | ||
| 1103 | struct cpu_timer_list, | ||
| 1104 | entry); | ||
| 1105 | if (!--maxfire || utime < tl->expires.cpu) { | ||
| 1106 | virt_expires = tl->expires.cpu; | ||
| 1107 | break; | ||
| 1108 | } | ||
| 1109 | tl->firing = 1; | ||
| 1110 | list_move_tail(&tl->entry, firing); | ||
| 1111 | } | ||
| 1112 | |||
| 1113 | ++timers; | ||
| 1114 | maxfire = 20; | ||
| 1115 | sched_expires = 0; | ||
| 1116 | while (!list_empty(timers)) { | ||
| 1117 | struct cpu_timer_list *tl = list_first_entry(timers, | ||
| 1118 | struct cpu_timer_list, | ||
| 1119 | entry); | ||
| 1120 | if (!--maxfire || sum_sched_runtime < tl->expires.sched) { | ||
| 1121 | sched_expires = tl->expires.sched; | ||
| 1122 | break; | ||
| 1123 | } | ||
| 1124 | tl->firing = 1; | ||
| 1125 | list_move_tail(&tl->entry, firing); | ||
| 1126 | } | ||
| 1127 | 1011 | ||
| 1128 | /* | 1012 | /* |
| 1129 | * Check for the special case process timers. | 1013 | * Check for the special case process timers. |
| @@ -1162,8 +1046,8 @@ static void check_process_timers(struct task_struct *tsk, | |||
| 1162 | } | 1046 | } |
| 1163 | } | 1047 | } |
| 1164 | 1048 | ||
| 1165 | sig->cputime_expires.prof_exp = prof_expires; | 1049 | sig->cputime_expires.prof_exp = expires_to_cputime(prof_expires); |
| 1166 | sig->cputime_expires.virt_exp = virt_expires; | 1050 | sig->cputime_expires.virt_exp = expires_to_cputime(virt_expires); |
| 1167 | sig->cputime_expires.sched_exp = sched_expires; | 1051 | sig->cputime_expires.sched_exp = sched_expires; |
| 1168 | if (task_cputime_zero(&sig->cputime_expires)) | 1052 | if (task_cputime_zero(&sig->cputime_expires)) |
| 1169 | stop_process_timers(sig); | 1053 | stop_process_timers(sig); |
| @@ -1176,7 +1060,7 @@ static void check_process_timers(struct task_struct *tsk, | |||
| 1176 | void posix_cpu_timer_schedule(struct k_itimer *timer) | 1060 | void posix_cpu_timer_schedule(struct k_itimer *timer) |
| 1177 | { | 1061 | { |
| 1178 | struct task_struct *p = timer->it.cpu.task; | 1062 | struct task_struct *p = timer->it.cpu.task; |
| 1179 | union cpu_time_count now; | 1063 | unsigned long long now; |
| 1180 | 1064 | ||
| 1181 | if (unlikely(p == NULL)) | 1065 | if (unlikely(p == NULL)) |
| 1182 | /* | 1066 | /* |
| @@ -1205,7 +1089,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) | |||
| 1205 | */ | 1089 | */ |
| 1206 | put_task_struct(p); | 1090 | put_task_struct(p); |
| 1207 | timer->it.cpu.task = p = NULL; | 1091 | timer->it.cpu.task = p = NULL; |
| 1208 | timer->it.cpu.expires.sched = 0; | 1092 | timer->it.cpu.expires = 0; |
| 1209 | goto out_unlock; | 1093 | goto out_unlock; |
| 1210 | } else if (unlikely(p->exit_state) && thread_group_empty(p)) { | 1094 | } else if (unlikely(p->exit_state) && thread_group_empty(p)) { |
| 1211 | /* | 1095 | /* |
| @@ -1213,6 +1097,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) | |||
| 1213 | * not yet reaped. Take this opportunity to | 1097 | * not yet reaped. Take this opportunity to |
| 1214 | * drop our task ref. | 1098 | * drop our task ref. |
| 1215 | */ | 1099 | */ |
| 1100 | cpu_timer_sample_group(timer->it_clock, p, &now); | ||
| 1216 | clear_dead_task(timer, now); | 1101 | clear_dead_task(timer, now); |
| 1217 | goto out_unlock; | 1102 | goto out_unlock; |
| 1218 | } | 1103 | } |
| @@ -1387,7 +1272,7 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |||
| 1387 | void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | 1272 | void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, |
| 1388 | cputime_t *newval, cputime_t *oldval) | 1273 | cputime_t *newval, cputime_t *oldval) |
| 1389 | { | 1274 | { |
| 1390 | union cpu_time_count now; | 1275 | unsigned long long now; |
| 1391 | 1276 | ||
| 1392 | BUG_ON(clock_idx == CPUCLOCK_SCHED); | 1277 | BUG_ON(clock_idx == CPUCLOCK_SCHED); |
| 1393 | cpu_timer_sample_group(clock_idx, tsk, &now); | 1278 | cpu_timer_sample_group(clock_idx, tsk, &now); |
| @@ -1399,17 +1284,17 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | |||
| 1399 | * it to be absolute. | 1284 | * it to be absolute. |
| 1400 | */ | 1285 | */ |
| 1401 | if (*oldval) { | 1286 | if (*oldval) { |
| 1402 | if (*oldval <= now.cpu) { | 1287 | if (*oldval <= now) { |
| 1403 | /* Just about to fire. */ | 1288 | /* Just about to fire. */ |
| 1404 | *oldval = cputime_one_jiffy; | 1289 | *oldval = cputime_one_jiffy; |
| 1405 | } else { | 1290 | } else { |
| 1406 | *oldval -= now.cpu; | 1291 | *oldval -= now; |
| 1407 | } | 1292 | } |
| 1408 | } | 1293 | } |
| 1409 | 1294 | ||
| 1410 | if (!*newval) | 1295 | if (!*newval) |
| 1411 | goto out; | 1296 | goto out; |
| 1412 | *newval += now.cpu; | 1297 | *newval += now; |
| 1413 | } | 1298 | } |
| 1414 | 1299 | ||
| 1415 | /* | 1300 | /* |
| @@ -1459,7 +1344,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, | |||
| 1459 | } | 1344 | } |
| 1460 | 1345 | ||
| 1461 | while (!signal_pending(current)) { | 1346 | while (!signal_pending(current)) { |
| 1462 | if (timer.it.cpu.expires.sched == 0) { | 1347 | if (timer.it.cpu.expires == 0) { |
| 1463 | /* | 1348 | /* |
| 1464 | * Our timer fired and was reset, below | 1349 | * Our timer fired and was reset, below |
| 1465 | * deletion can not fail. | 1350 | * deletion can not fail. |
diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c index c6422ffeda9a..9012ecf7b814 100644 --- a/kernel/power/autosleep.c +++ b/kernel/power/autosleep.c | |||
| @@ -32,7 +32,8 @@ static void try_to_suspend(struct work_struct *work) | |||
| 32 | 32 | ||
| 33 | mutex_lock(&autosleep_lock); | 33 | mutex_lock(&autosleep_lock); |
| 34 | 34 | ||
| 35 | if (!pm_save_wakeup_count(initial_count)) { | 35 | if (!pm_save_wakeup_count(initial_count) || |
| 36 | system_state != SYSTEM_RUNNING) { | ||
| 36 | mutex_unlock(&autosleep_lock); | 37 | mutex_unlock(&autosleep_lock); |
| 37 | goto out; | 38 | goto out; |
| 38 | } | 39 | } |
diff --git a/kernel/printk.c b/kernel/printk.c index 8212c1aef125..69b0890ed7e5 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
| @@ -1369,9 +1369,9 @@ static int console_trylock_for_printk(unsigned int cpu) | |||
| 1369 | } | 1369 | } |
| 1370 | } | 1370 | } |
| 1371 | logbuf_cpu = UINT_MAX; | 1371 | logbuf_cpu = UINT_MAX; |
| 1372 | raw_spin_unlock(&logbuf_lock); | ||
| 1372 | if (wake) | 1373 | if (wake) |
| 1373 | up(&console_sem); | 1374 | up(&console_sem); |
| 1374 | raw_spin_unlock(&logbuf_lock); | ||
| 1375 | return retval; | 1375 | return retval; |
| 1376 | } | 1376 | } |
| 1377 | 1377 | ||
| @@ -1921,7 +1921,7 @@ void resume_console(void) | |||
| 1921 | * called when a new CPU comes online (or fails to come up), and ensures | 1921 | * called when a new CPU comes online (or fails to come up), and ensures |
| 1922 | * that any such output gets printed. | 1922 | * that any such output gets printed. |
| 1923 | */ | 1923 | */ |
| 1924 | static int __cpuinit console_cpu_notify(struct notifier_block *self, | 1924 | static int console_cpu_notify(struct notifier_block *self, |
| 1925 | unsigned long action, void *hcpu) | 1925 | unsigned long action, void *hcpu) |
| 1926 | { | 1926 | { |
| 1927 | switch (action) { | 1927 | switch (action) { |
diff --git a/kernel/profile.c b/kernel/profile.c index 0bf400737660..6631e1ef55ab 100644 --- a/kernel/profile.c +++ b/kernel/profile.c | |||
| @@ -331,7 +331,7 @@ out: | |||
| 331 | put_cpu(); | 331 | put_cpu(); |
| 332 | } | 332 | } |
| 333 | 333 | ||
| 334 | static int __cpuinit profile_cpu_callback(struct notifier_block *info, | 334 | static int profile_cpu_callback(struct notifier_block *info, |
| 335 | unsigned long action, void *__cpu) | 335 | unsigned long action, void *__cpu) |
| 336 | { | 336 | { |
| 337 | int node, cpu = (unsigned long)__cpu; | 337 | int node, cpu = (unsigned long)__cpu; |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index ba5e6cea181a..4041f5747e73 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
| @@ -469,6 +469,7 @@ static int ptrace_detach(struct task_struct *child, unsigned int data) | |||
| 469 | /* Architecture-specific hardware disable .. */ | 469 | /* Architecture-specific hardware disable .. */ |
| 470 | ptrace_disable(child); | 470 | ptrace_disable(child); |
| 471 | clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); | 471 | clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); |
| 472 | flush_ptrace_hw_breakpoint(child); | ||
| 472 | 473 | ||
| 473 | write_lock_irq(&tasklist_lock); | 474 | write_lock_irq(&tasklist_lock); |
| 474 | /* | 475 | /* |
| @@ -1221,19 +1222,3 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, | |||
| 1221 | return ret; | 1222 | return ret; |
| 1222 | } | 1223 | } |
| 1223 | #endif /* CONFIG_COMPAT */ | 1224 | #endif /* CONFIG_COMPAT */ |
| 1224 | |||
| 1225 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | ||
| 1226 | int ptrace_get_breakpoints(struct task_struct *tsk) | ||
| 1227 | { | ||
| 1228 | if (atomic_inc_not_zero(&tsk->ptrace_bp_refcnt)) | ||
| 1229 | return 0; | ||
| 1230 | |||
| 1231 | return -1; | ||
| 1232 | } | ||
| 1233 | |||
| 1234 | void ptrace_put_breakpoints(struct task_struct *tsk) | ||
| 1235 | { | ||
| 1236 | if (atomic_dec_and_test(&tsk->ptrace_bp_refcnt)) | ||
| 1237 | flush_ptrace_hw_breakpoint(tsk); | ||
| 1238 | } | ||
| 1239 | #endif /* CONFIG_HAVE_HW_BREAKPOINT */ | ||
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index b1fa5510388d..f4871e52c546 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
| @@ -1476,7 +1476,7 @@ rcu_torture_shutdown(void *arg) | |||
| 1476 | * Execute random CPU-hotplug operations at the interval specified | 1476 | * Execute random CPU-hotplug operations at the interval specified |
| 1477 | * by the onoff_interval. | 1477 | * by the onoff_interval. |
| 1478 | */ | 1478 | */ |
| 1479 | static int __cpuinit | 1479 | static int |
| 1480 | rcu_torture_onoff(void *arg) | 1480 | rcu_torture_onoff(void *arg) |
| 1481 | { | 1481 | { |
| 1482 | int cpu; | 1482 | int cpu; |
| @@ -1558,7 +1558,7 @@ rcu_torture_onoff(void *arg) | |||
| 1558 | return 0; | 1558 | return 0; |
| 1559 | } | 1559 | } |
| 1560 | 1560 | ||
| 1561 | static int __cpuinit | 1561 | static int |
| 1562 | rcu_torture_onoff_init(void) | 1562 | rcu_torture_onoff_init(void) |
| 1563 | { | 1563 | { |
| 1564 | int ret; | 1564 | int ret; |
| @@ -1601,7 +1601,7 @@ static void rcu_torture_onoff_cleanup(void) | |||
| 1601 | * CPU-stall kthread. It waits as specified by stall_cpu_holdoff, then | 1601 | * CPU-stall kthread. It waits as specified by stall_cpu_holdoff, then |
| 1602 | * induces a CPU stall for the time specified by stall_cpu. | 1602 | * induces a CPU stall for the time specified by stall_cpu. |
| 1603 | */ | 1603 | */ |
| 1604 | static int __cpuinit rcu_torture_stall(void *args) | 1604 | static int rcu_torture_stall(void *args) |
| 1605 | { | 1605 | { |
| 1606 | unsigned long stop_at; | 1606 | unsigned long stop_at; |
| 1607 | 1607 | ||
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index e08abb9461ac..068de3a93606 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
| @@ -2910,7 +2910,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
| 2910 | * can accept some slop in the rsp->completed access due to the fact | 2910 | * can accept some slop in the rsp->completed access due to the fact |
| 2911 | * that this CPU cannot possibly have any RCU callbacks in flight yet. | 2911 | * that this CPU cannot possibly have any RCU callbacks in flight yet. |
| 2912 | */ | 2912 | */ |
| 2913 | static void __cpuinit | 2913 | static void |
| 2914 | rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | 2914 | rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) |
| 2915 | { | 2915 | { |
| 2916 | unsigned long flags; | 2916 | unsigned long flags; |
| @@ -2962,7 +2962,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | |||
| 2962 | mutex_unlock(&rsp->onoff_mutex); | 2962 | mutex_unlock(&rsp->onoff_mutex); |
| 2963 | } | 2963 | } |
| 2964 | 2964 | ||
| 2965 | static void __cpuinit rcu_prepare_cpu(int cpu) | 2965 | static void rcu_prepare_cpu(int cpu) |
| 2966 | { | 2966 | { |
| 2967 | struct rcu_state *rsp; | 2967 | struct rcu_state *rsp; |
| 2968 | 2968 | ||
| @@ -2974,7 +2974,7 @@ static void __cpuinit rcu_prepare_cpu(int cpu) | |||
| 2974 | /* | 2974 | /* |
| 2975 | * Handle CPU online/offline notification events. | 2975 | * Handle CPU online/offline notification events. |
| 2976 | */ | 2976 | */ |
| 2977 | static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | 2977 | static int rcu_cpu_notify(struct notifier_block *self, |
| 2978 | unsigned long action, void *hcpu) | 2978 | unsigned long action, void *hcpu) |
| 2979 | { | 2979 | { |
| 2980 | long cpu = (long)hcpu; | 2980 | long cpu = (long)hcpu; |
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 4a39d364493c..b3832581043c 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
| @@ -521,10 +521,10 @@ static void invoke_rcu_callbacks_kthread(void); | |||
| 521 | static bool rcu_is_callbacks_kthread(void); | 521 | static bool rcu_is_callbacks_kthread(void); |
| 522 | #ifdef CONFIG_RCU_BOOST | 522 | #ifdef CONFIG_RCU_BOOST |
| 523 | static void rcu_preempt_do_callbacks(void); | 523 | static void rcu_preempt_do_callbacks(void); |
| 524 | static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | 524 | static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, |
| 525 | struct rcu_node *rnp); | 525 | struct rcu_node *rnp); |
| 526 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 526 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
| 527 | static void __cpuinit rcu_prepare_kthreads(int cpu); | 527 | static void rcu_prepare_kthreads(int cpu); |
| 528 | static void rcu_cleanup_after_idle(int cpu); | 528 | static void rcu_cleanup_after_idle(int cpu); |
| 529 | static void rcu_prepare_for_idle(int cpu); | 529 | static void rcu_prepare_for_idle(int cpu); |
| 530 | static void rcu_idle_count_callbacks_posted(void); | 530 | static void rcu_idle_count_callbacks_posted(void); |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 63098a59216e..769e12e3151b 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
| @@ -1352,7 +1352,7 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) | |||
| 1352 | * already exist. We only create this kthread for preemptible RCU. | 1352 | * already exist. We only create this kthread for preemptible RCU. |
| 1353 | * Returns zero if all is well, a negated errno otherwise. | 1353 | * Returns zero if all is well, a negated errno otherwise. |
| 1354 | */ | 1354 | */ |
| 1355 | static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | 1355 | static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, |
| 1356 | struct rcu_node *rnp) | 1356 | struct rcu_node *rnp) |
| 1357 | { | 1357 | { |
| 1358 | int rnp_index = rnp - &rsp->node[0]; | 1358 | int rnp_index = rnp - &rsp->node[0]; |
| @@ -1507,7 +1507,7 @@ static int __init rcu_spawn_kthreads(void) | |||
| 1507 | } | 1507 | } |
| 1508 | early_initcall(rcu_spawn_kthreads); | 1508 | early_initcall(rcu_spawn_kthreads); |
| 1509 | 1509 | ||
| 1510 | static void __cpuinit rcu_prepare_kthreads(int cpu) | 1510 | static void rcu_prepare_kthreads(int cpu) |
| 1511 | { | 1511 | { |
| 1512 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); | 1512 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); |
| 1513 | struct rcu_node *rnp = rdp->mynode; | 1513 | struct rcu_node *rnp = rdp->mynode; |
| @@ -1549,7 +1549,7 @@ static int __init rcu_scheduler_really_started(void) | |||
| 1549 | } | 1549 | } |
| 1550 | early_initcall(rcu_scheduler_really_started); | 1550 | early_initcall(rcu_scheduler_really_started); |
| 1551 | 1551 | ||
| 1552 | static void __cpuinit rcu_prepare_kthreads(int cpu) | 1552 | static void rcu_prepare_kthreads(int cpu) |
| 1553 | { | 1553 | { |
| 1554 | } | 1554 | } |
| 1555 | 1555 | ||
diff --git a/kernel/reboot.c b/kernel/reboot.c new file mode 100644 index 000000000000..269ed9384cc4 --- /dev/null +++ b/kernel/reboot.c | |||
| @@ -0,0 +1,419 @@ | |||
| 1 | /* | ||
| 2 | * linux/kernel/reboot.c | ||
| 3 | * | ||
| 4 | * Copyright (C) 2013 Linus Torvalds | ||
| 5 | */ | ||
| 6 | |||
| 7 | #define pr_fmt(fmt) "reboot: " fmt | ||
| 8 | |||
| 9 | #include <linux/ctype.h> | ||
| 10 | #include <linux/export.h> | ||
| 11 | #include <linux/kexec.h> | ||
| 12 | #include <linux/kmod.h> | ||
| 13 | #include <linux/kmsg_dump.h> | ||
| 14 | #include <linux/reboot.h> | ||
| 15 | #include <linux/suspend.h> | ||
| 16 | #include <linux/syscalls.h> | ||
| 17 | #include <linux/syscore_ops.h> | ||
| 18 | #include <linux/uaccess.h> | ||
| 19 | |||
| 20 | /* | ||
| 21 | * this indicates whether you can reboot with ctrl-alt-del: the default is yes | ||
| 22 | */ | ||
| 23 | |||
| 24 | int C_A_D = 1; | ||
| 25 | struct pid *cad_pid; | ||
| 26 | EXPORT_SYMBOL(cad_pid); | ||
| 27 | |||
| 28 | #if defined(CONFIG_ARM) || defined(CONFIG_UNICORE32) | ||
| 29 | #define DEFAULT_REBOOT_MODE = REBOOT_HARD | ||
| 30 | #else | ||
| 31 | #define DEFAULT_REBOOT_MODE | ||
| 32 | #endif | ||
| 33 | enum reboot_mode reboot_mode DEFAULT_REBOOT_MODE; | ||
| 34 | |||
| 35 | int reboot_default; | ||
| 36 | int reboot_cpu; | ||
| 37 | enum reboot_type reboot_type = BOOT_ACPI; | ||
| 38 | int reboot_force; | ||
| 39 | |||
| 40 | /* | ||
| 41 | * If set, this is used for preparing the system to power off. | ||
| 42 | */ | ||
| 43 | |||
| 44 | void (*pm_power_off_prepare)(void); | ||
| 45 | |||
| 46 | /** | ||
| 47 | * emergency_restart - reboot the system | ||
| 48 | * | ||
| 49 | * Without shutting down any hardware or taking any locks | ||
| 50 | * reboot the system. This is called when we know we are in | ||
| 51 | * trouble so this is our best effort to reboot. This is | ||
| 52 | * safe to call in interrupt context. | ||
| 53 | */ | ||
| 54 | void emergency_restart(void) | ||
| 55 | { | ||
| 56 | kmsg_dump(KMSG_DUMP_EMERG); | ||
| 57 | machine_emergency_restart(); | ||
| 58 | } | ||
| 59 | EXPORT_SYMBOL_GPL(emergency_restart); | ||
| 60 | |||
| 61 | void kernel_restart_prepare(char *cmd) | ||
| 62 | { | ||
| 63 | blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); | ||
| 64 | system_state = SYSTEM_RESTART; | ||
| 65 | usermodehelper_disable(); | ||
| 66 | device_shutdown(); | ||
| 67 | } | ||
| 68 | |||
| 69 | /** | ||
| 70 | * register_reboot_notifier - Register function to be called at reboot time | ||
| 71 | * @nb: Info about notifier function to be called | ||
| 72 | * | ||
| 73 | * Registers a function with the list of functions | ||
| 74 | * to be called at reboot time. | ||
| 75 | * | ||
| 76 | * Currently always returns zero, as blocking_notifier_chain_register() | ||
| 77 | * always returns zero. | ||
| 78 | */ | ||
| 79 | int register_reboot_notifier(struct notifier_block *nb) | ||
| 80 | { | ||
| 81 | return blocking_notifier_chain_register(&reboot_notifier_list, nb); | ||
| 82 | } | ||
| 83 | EXPORT_SYMBOL(register_reboot_notifier); | ||
| 84 | |||
| 85 | /** | ||
| 86 | * unregister_reboot_notifier - Unregister previously registered reboot notifier | ||
| 87 | * @nb: Hook to be unregistered | ||
| 88 | * | ||
| 89 | * Unregisters a previously registered reboot | ||
| 90 | * notifier function. | ||
| 91 | * | ||
| 92 | * Returns zero on success, or %-ENOENT on failure. | ||
| 93 | */ | ||
| 94 | int unregister_reboot_notifier(struct notifier_block *nb) | ||
| 95 | { | ||
| 96 | return blocking_notifier_chain_unregister(&reboot_notifier_list, nb); | ||
| 97 | } | ||
| 98 | EXPORT_SYMBOL(unregister_reboot_notifier); | ||
| 99 | |||
| 100 | static void migrate_to_reboot_cpu(void) | ||
| 101 | { | ||
| 102 | /* The boot cpu is always logical cpu 0 */ | ||
| 103 | int cpu = reboot_cpu; | ||
| 104 | |||
| 105 | cpu_hotplug_disable(); | ||
| 106 | |||
| 107 | /* Make certain the cpu I'm about to reboot on is online */ | ||
| 108 | if (!cpu_online(cpu)) | ||
| 109 | cpu = cpumask_first(cpu_online_mask); | ||
| 110 | |||
| 111 | /* Prevent races with other tasks migrating this task */ | ||
| 112 | current->flags |= PF_NO_SETAFFINITY; | ||
| 113 | |||
| 114 | /* Make certain I only run on the appropriate processor */ | ||
| 115 | set_cpus_allowed_ptr(current, cpumask_of(cpu)); | ||
| 116 | } | ||
| 117 | |||
| 118 | /** | ||
| 119 | * kernel_restart - reboot the system | ||
| 120 | * @cmd: pointer to buffer containing command to execute for restart | ||
| 121 | * or %NULL | ||
| 122 | * | ||
| 123 | * Shutdown everything and perform a clean reboot. | ||
| 124 | * This is not safe to call in interrupt context. | ||
| 125 | */ | ||
| 126 | void kernel_restart(char *cmd) | ||
| 127 | { | ||
| 128 | kernel_restart_prepare(cmd); | ||
| 129 | migrate_to_reboot_cpu(); | ||
| 130 | syscore_shutdown(); | ||
| 131 | if (!cmd) | ||
| 132 | pr_emerg("Restarting system\n"); | ||
| 133 | else | ||
| 134 | pr_emerg("Restarting system with command '%s'\n", cmd); | ||
| 135 | kmsg_dump(KMSG_DUMP_RESTART); | ||
| 136 | machine_restart(cmd); | ||
| 137 | } | ||
| 138 | EXPORT_SYMBOL_GPL(kernel_restart); | ||
| 139 | |||
| 140 | static void kernel_shutdown_prepare(enum system_states state) | ||
| 141 | { | ||
| 142 | blocking_notifier_call_chain(&reboot_notifier_list, | ||
| 143 | (state == SYSTEM_HALT) ? SYS_HALT : SYS_POWER_OFF, NULL); | ||
| 144 | system_state = state; | ||
| 145 | usermodehelper_disable(); | ||
| 146 | device_shutdown(); | ||
| 147 | } | ||
| 148 | /** | ||
| 149 | * kernel_halt - halt the system | ||
| 150 | * | ||
| 151 | * Shutdown everything and perform a clean system halt. | ||
| 152 | */ | ||
| 153 | void kernel_halt(void) | ||
| 154 | { | ||
| 155 | kernel_shutdown_prepare(SYSTEM_HALT); | ||
| 156 | migrate_to_reboot_cpu(); | ||
| 157 | syscore_shutdown(); | ||
| 158 | pr_emerg("System halted\n"); | ||
| 159 | kmsg_dump(KMSG_DUMP_HALT); | ||
| 160 | machine_halt(); | ||
| 161 | } | ||
| 162 | EXPORT_SYMBOL_GPL(kernel_halt); | ||
| 163 | |||
| 164 | /** | ||
| 165 | * kernel_power_off - power_off the system | ||
| 166 | * | ||
| 167 | * Shutdown everything and perform a clean system power_off. | ||
| 168 | */ | ||
| 169 | void kernel_power_off(void) | ||
| 170 | { | ||
| 171 | kernel_shutdown_prepare(SYSTEM_POWER_OFF); | ||
| 172 | if (pm_power_off_prepare) | ||
| 173 | pm_power_off_prepare(); | ||
| 174 | migrate_to_reboot_cpu(); | ||
| 175 | syscore_shutdown(); | ||
| 176 | pr_emerg("Power down\n"); | ||
| 177 | kmsg_dump(KMSG_DUMP_POWEROFF); | ||
| 178 | machine_power_off(); | ||
| 179 | } | ||
| 180 | EXPORT_SYMBOL_GPL(kernel_power_off); | ||
| 181 | |||
| 182 | static DEFINE_MUTEX(reboot_mutex); | ||
| 183 | |||
| 184 | /* | ||
| 185 | * Reboot system call: for obvious reasons only root may call it, | ||
| 186 | * and even root needs to set up some magic numbers in the registers | ||
| 187 | * so that some mistake won't make this reboot the whole machine. | ||
| 188 | * You can also set the meaning of the ctrl-alt-del-key here. | ||
| 189 | * | ||
| 190 | * reboot doesn't sync: do that yourself before calling this. | ||
| 191 | */ | ||
| 192 | SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, | ||
| 193 | void __user *, arg) | ||
| 194 | { | ||
| 195 | struct pid_namespace *pid_ns = task_active_pid_ns(current); | ||
| 196 | char buffer[256]; | ||
| 197 | int ret = 0; | ||
| 198 | |||
| 199 | /* We only trust the superuser with rebooting the system. */ | ||
| 200 | if (!ns_capable(pid_ns->user_ns, CAP_SYS_BOOT)) | ||
| 201 | return -EPERM; | ||
| 202 | |||
| 203 | /* For safety, we require "magic" arguments. */ | ||
| 204 | if (magic1 != LINUX_REBOOT_MAGIC1 || | ||
| 205 | (magic2 != LINUX_REBOOT_MAGIC2 && | ||
| 206 | magic2 != LINUX_REBOOT_MAGIC2A && | ||
| 207 | magic2 != LINUX_REBOOT_MAGIC2B && | ||
| 208 | magic2 != LINUX_REBOOT_MAGIC2C)) | ||
| 209 | return -EINVAL; | ||
| 210 | |||
| 211 | /* | ||
| 212 | * If pid namespaces are enabled and the current task is in a child | ||
| 213 | * pid_namespace, the command is handled by reboot_pid_ns() which will | ||
| 214 | * call do_exit(). | ||
| 215 | */ | ||
| 216 | ret = reboot_pid_ns(pid_ns, cmd); | ||
| 217 | if (ret) | ||
| 218 | return ret; | ||
| 219 | |||
| 220 | /* Instead of trying to make the power_off code look like | ||
| 221 | * halt when pm_power_off is not set do it the easy way. | ||
| 222 | */ | ||
| 223 | if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) | ||
| 224 | cmd = LINUX_REBOOT_CMD_HALT; | ||
| 225 | |||
| 226 | mutex_lock(&reboot_mutex); | ||
| 227 | switch (cmd) { | ||
| 228 | case LINUX_REBOOT_CMD_RESTART: | ||
| 229 | kernel_restart(NULL); | ||
| 230 | break; | ||
| 231 | |||
| 232 | case LINUX_REBOOT_CMD_CAD_ON: | ||
| 233 | C_A_D = 1; | ||
| 234 | break; | ||
| 235 | |||
| 236 | case LINUX_REBOOT_CMD_CAD_OFF: | ||
| 237 | C_A_D = 0; | ||
| 238 | break; | ||
| 239 | |||
| 240 | case LINUX_REBOOT_CMD_HALT: | ||
| 241 | kernel_halt(); | ||
| 242 | do_exit(0); | ||
| 243 | panic("cannot halt"); | ||
| 244 | |||
| 245 | case LINUX_REBOOT_CMD_POWER_OFF: | ||
| 246 | kernel_power_off(); | ||
| 247 | do_exit(0); | ||
| 248 | break; | ||
| 249 | |||
| 250 | case LINUX_REBOOT_CMD_RESTART2: | ||
| 251 | ret = strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1); | ||
| 252 | if (ret < 0) { | ||
| 253 | ret = -EFAULT; | ||
| 254 | break; | ||
| 255 | } | ||
| 256 | buffer[sizeof(buffer) - 1] = '\0'; | ||
| 257 | |||
| 258 | kernel_restart(buffer); | ||
| 259 | break; | ||
| 260 | |||
| 261 | #ifdef CONFIG_KEXEC | ||
| 262 | case LINUX_REBOOT_CMD_KEXEC: | ||
| 263 | ret = kernel_kexec(); | ||
| 264 | break; | ||
| 265 | #endif | ||
| 266 | |||
| 267 | #ifdef CONFIG_HIBERNATION | ||
| 268 | case LINUX_REBOOT_CMD_SW_SUSPEND: | ||
| 269 | ret = hibernate(); | ||
| 270 | break; | ||
| 271 | #endif | ||
| 272 | |||
| 273 | default: | ||
| 274 | ret = -EINVAL; | ||
| 275 | break; | ||
| 276 | } | ||
| 277 | mutex_unlock(&reboot_mutex); | ||
| 278 | return ret; | ||
| 279 | } | ||
| 280 | |||
| 281 | static void deferred_cad(struct work_struct *dummy) | ||
| 282 | { | ||
| 283 | kernel_restart(NULL); | ||
| 284 | } | ||
| 285 | |||
| 286 | /* | ||
| 287 | * This function gets called by ctrl-alt-del - ie the keyboard interrupt. | ||
| 288 | * As it's called within an interrupt, it may NOT sync: the only choice | ||
| 289 | * is whether to reboot at once, or just ignore the ctrl-alt-del. | ||
| 290 | */ | ||
| 291 | void ctrl_alt_del(void) | ||
| 292 | { | ||
| 293 | static DECLARE_WORK(cad_work, deferred_cad); | ||
| 294 | |||
| 295 | if (C_A_D) | ||
| 296 | schedule_work(&cad_work); | ||
| 297 | else | ||
| 298 | kill_cad_pid(SIGINT, 1); | ||
| 299 | } | ||
| 300 | |||
| 301 | char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; | ||
| 302 | |||
| 303 | static int __orderly_poweroff(bool force) | ||
| 304 | { | ||
| 305 | char **argv; | ||
| 306 | static char *envp[] = { | ||
| 307 | "HOME=/", | ||
| 308 | "PATH=/sbin:/bin:/usr/sbin:/usr/bin", | ||
| 309 | NULL | ||
| 310 | }; | ||
| 311 | int ret; | ||
| 312 | |||
| 313 | argv = argv_split(GFP_KERNEL, poweroff_cmd, NULL); | ||
| 314 | if (argv) { | ||
| 315 | ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); | ||
| 316 | argv_free(argv); | ||
| 317 | } else { | ||
| 318 | ret = -ENOMEM; | ||
| 319 | } | ||
| 320 | |||
| 321 | if (ret && force) { | ||
| 322 | pr_warn("Failed to start orderly shutdown: forcing the issue\n"); | ||
| 323 | /* | ||
| 324 | * I guess this should try to kick off some daemon to sync and | ||
| 325 | * poweroff asap. Or not even bother syncing if we're doing an | ||
| 326 | * emergency shutdown? | ||
| 327 | */ | ||
| 328 | emergency_sync(); | ||
| 329 | kernel_power_off(); | ||
| 330 | } | ||
| 331 | |||
| 332 | return ret; | ||
| 333 | } | ||
| 334 | |||
| 335 | static bool poweroff_force; | ||
| 336 | |||
| 337 | static void poweroff_work_func(struct work_struct *work) | ||
| 338 | { | ||
| 339 | __orderly_poweroff(poweroff_force); | ||
| 340 | } | ||
| 341 | |||
| 342 | static DECLARE_WORK(poweroff_work, poweroff_work_func); | ||
| 343 | |||
| 344 | /** | ||
| 345 | * orderly_poweroff - Trigger an orderly system poweroff | ||
| 346 | * @force: force poweroff if command execution fails | ||
| 347 | * | ||
| 348 | * This may be called from any context to trigger a system shutdown. | ||
| 349 | * If the orderly shutdown fails, it will force an immediate shutdown. | ||
| 350 | */ | ||
| 351 | int orderly_poweroff(bool force) | ||
| 352 | { | ||
| 353 | if (force) /* do not override the pending "true" */ | ||
| 354 | poweroff_force = true; | ||
| 355 | schedule_work(&poweroff_work); | ||
| 356 | return 0; | ||
| 357 | } | ||
| 358 | EXPORT_SYMBOL_GPL(orderly_poweroff); | ||
| 359 | |||
| 360 | static int __init reboot_setup(char *str) | ||
| 361 | { | ||
| 362 | for (;;) { | ||
| 363 | /* | ||
| 364 | * Having anything passed on the command line via | ||
| 365 | * reboot= will cause us to disable DMI checking | ||
| 366 | * below. | ||
| 367 | */ | ||
| 368 | reboot_default = 0; | ||
| 369 | |||
| 370 | switch (*str) { | ||
| 371 | case 'w': | ||
| 372 | reboot_mode = REBOOT_WARM; | ||
| 373 | break; | ||
| 374 | |||
| 375 | case 'c': | ||
| 376 | reboot_mode = REBOOT_COLD; | ||
| 377 | break; | ||
| 378 | |||
| 379 | case 'h': | ||
| 380 | reboot_mode = REBOOT_HARD; | ||
| 381 | break; | ||
| 382 | |||
| 383 | case 's': | ||
| 384 | if (isdigit(*(str+1))) | ||
| 385 | reboot_cpu = simple_strtoul(str+1, NULL, 0); | ||
| 386 | else if (str[1] == 'm' && str[2] == 'p' && | ||
| 387 | isdigit(*(str+3))) | ||
| 388 | reboot_cpu = simple_strtoul(str+3, NULL, 0); | ||
| 389 | else | ||
| 390 | reboot_mode = REBOOT_SOFT; | ||
| 391 | break; | ||
| 392 | |||
| 393 | case 'g': | ||
| 394 | reboot_mode = REBOOT_GPIO; | ||
| 395 | break; | ||
| 396 | |||
| 397 | case 'b': | ||
| 398 | case 'a': | ||
| 399 | case 'k': | ||
| 400 | case 't': | ||
| 401 | case 'e': | ||
| 402 | case 'p': | ||
| 403 | reboot_type = *str; | ||
| 404 | break; | ||
| 405 | |||
| 406 | case 'f': | ||
| 407 | reboot_force = 1; | ||
| 408 | break; | ||
| 409 | } | ||
| 410 | |||
| 411 | str = strchr(str, ','); | ||
| 412 | if (str) | ||
| 413 | str++; | ||
| 414 | else | ||
| 415 | break; | ||
| 416 | } | ||
| 417 | return 1; | ||
| 418 | } | ||
| 419 | __setup("reboot=", reboot_setup); | ||
diff --git a/kernel/relay.c b/kernel/relay.c index b91488ba2e5a..5001c9887db1 100644 --- a/kernel/relay.c +++ b/kernel/relay.c | |||
| @@ -516,7 +516,7 @@ static void setup_callbacks(struct rchan *chan, | |||
| 516 | * | 516 | * |
| 517 | * Returns the success/failure of the operation. (%NOTIFY_OK, %NOTIFY_BAD) | 517 | * Returns the success/failure of the operation. (%NOTIFY_OK, %NOTIFY_BAD) |
| 518 | */ | 518 | */ |
| 519 | static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb, | 519 | static int relay_hotcpu_callback(struct notifier_block *nb, |
| 520 | unsigned long action, | 520 | unsigned long action, |
| 521 | void *hcpu) | 521 | void *hcpu) |
| 522 | { | 522 | { |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9b1f2e533b95..b7c32cb7bfeb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -370,13 +370,6 @@ static struct rq *this_rq_lock(void) | |||
| 370 | #ifdef CONFIG_SCHED_HRTICK | 370 | #ifdef CONFIG_SCHED_HRTICK |
| 371 | /* | 371 | /* |
| 372 | * Use HR-timers to deliver accurate preemption points. | 372 | * Use HR-timers to deliver accurate preemption points. |
| 373 | * | ||
| 374 | * Its all a bit involved since we cannot program an hrt while holding the | ||
| 375 | * rq->lock. So what we do is store a state in in rq->hrtick_* and ask for a | ||
| 376 | * reschedule event. | ||
| 377 | * | ||
| 378 | * When we get rescheduled we reprogram the hrtick_timer outside of the | ||
| 379 | * rq->lock. | ||
| 380 | */ | 373 | */ |
| 381 | 374 | ||
| 382 | static void hrtick_clear(struct rq *rq) | 375 | static void hrtick_clear(struct rq *rq) |
| @@ -404,6 +397,15 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer) | |||
| 404 | } | 397 | } |
| 405 | 398 | ||
| 406 | #ifdef CONFIG_SMP | 399 | #ifdef CONFIG_SMP |
| 400 | |||
| 401 | static int __hrtick_restart(struct rq *rq) | ||
| 402 | { | ||
| 403 | struct hrtimer *timer = &rq->hrtick_timer; | ||
| 404 | ktime_t time = hrtimer_get_softexpires(timer); | ||
| 405 | |||
| 406 | return __hrtimer_start_range_ns(timer, time, 0, HRTIMER_MODE_ABS_PINNED, 0); | ||
| 407 | } | ||
| 408 | |||
| 407 | /* | 409 | /* |
| 408 | * called from hardirq (IPI) context | 410 | * called from hardirq (IPI) context |
| 409 | */ | 411 | */ |
| @@ -412,7 +414,7 @@ static void __hrtick_start(void *arg) | |||
| 412 | struct rq *rq = arg; | 414 | struct rq *rq = arg; |
| 413 | 415 | ||
| 414 | raw_spin_lock(&rq->lock); | 416 | raw_spin_lock(&rq->lock); |
| 415 | hrtimer_restart(&rq->hrtick_timer); | 417 | __hrtick_restart(rq); |
| 416 | rq->hrtick_csd_pending = 0; | 418 | rq->hrtick_csd_pending = 0; |
| 417 | raw_spin_unlock(&rq->lock); | 419 | raw_spin_unlock(&rq->lock); |
| 418 | } | 420 | } |
| @@ -430,7 +432,7 @@ void hrtick_start(struct rq *rq, u64 delay) | |||
| 430 | hrtimer_set_expires(timer, time); | 432 | hrtimer_set_expires(timer, time); |
| 431 | 433 | ||
| 432 | if (rq == this_rq()) { | 434 | if (rq == this_rq()) { |
| 433 | hrtimer_restart(timer); | 435 | __hrtick_restart(rq); |
| 434 | } else if (!rq->hrtick_csd_pending) { | 436 | } else if (!rq->hrtick_csd_pending) { |
| 435 | __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0); | 437 | __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0); |
| 436 | rq->hrtick_csd_pending = 1; | 438 | rq->hrtick_csd_pending = 1; |
| @@ -4131,7 +4133,7 @@ void show_state_filter(unsigned long state_filter) | |||
| 4131 | debug_show_all_locks(); | 4133 | debug_show_all_locks(); |
| 4132 | } | 4134 | } |
| 4133 | 4135 | ||
| 4134 | void __cpuinit init_idle_bootup_task(struct task_struct *idle) | 4136 | void init_idle_bootup_task(struct task_struct *idle) |
| 4135 | { | 4137 | { |
| 4136 | idle->sched_class = &idle_sched_class; | 4138 | idle->sched_class = &idle_sched_class; |
| 4137 | } | 4139 | } |
| @@ -4144,7 +4146,7 @@ void __cpuinit init_idle_bootup_task(struct task_struct *idle) | |||
| 4144 | * NOTE: this function does not set the idle thread's NEED_RESCHED | 4146 | * NOTE: this function does not set the idle thread's NEED_RESCHED |
| 4145 | * flag, to make booting more robust. | 4147 | * flag, to make booting more robust. |
| 4146 | */ | 4148 | */ |
| 4147 | void __cpuinit init_idle(struct task_struct *idle, int cpu) | 4149 | void init_idle(struct task_struct *idle, int cpu) |
| 4148 | { | 4150 | { |
| 4149 | struct rq *rq = cpu_rq(cpu); | 4151 | struct rq *rq = cpu_rq(cpu); |
| 4150 | unsigned long flags; | 4152 | unsigned long flags; |
| @@ -4628,7 +4630,7 @@ static void set_rq_offline(struct rq *rq) | |||
| 4628 | * migration_call - callback that gets triggered when a CPU is added. | 4630 | * migration_call - callback that gets triggered when a CPU is added. |
| 4629 | * Here we can start up the necessary migration thread for the new CPU. | 4631 | * Here we can start up the necessary migration thread for the new CPU. |
| 4630 | */ | 4632 | */ |
| 4631 | static int __cpuinit | 4633 | static int |
| 4632 | migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | 4634 | migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) |
| 4633 | { | 4635 | { |
| 4634 | int cpu = (long)hcpu; | 4636 | int cpu = (long)hcpu; |
| @@ -4682,12 +4684,12 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
| 4682 | * happens before everything else. This has to be lower priority than | 4684 | * happens before everything else. This has to be lower priority than |
| 4683 | * the notifier in the perf_event subsystem, though. | 4685 | * the notifier in the perf_event subsystem, though. |
| 4684 | */ | 4686 | */ |
| 4685 | static struct notifier_block __cpuinitdata migration_notifier = { | 4687 | static struct notifier_block migration_notifier = { |
| 4686 | .notifier_call = migration_call, | 4688 | .notifier_call = migration_call, |
| 4687 | .priority = CPU_PRI_MIGRATION, | 4689 | .priority = CPU_PRI_MIGRATION, |
| 4688 | }; | 4690 | }; |
| 4689 | 4691 | ||
| 4690 | static int __cpuinit sched_cpu_active(struct notifier_block *nfb, | 4692 | static int sched_cpu_active(struct notifier_block *nfb, |
| 4691 | unsigned long action, void *hcpu) | 4693 | unsigned long action, void *hcpu) |
| 4692 | { | 4694 | { |
| 4693 | switch (action & ~CPU_TASKS_FROZEN) { | 4695 | switch (action & ~CPU_TASKS_FROZEN) { |
| @@ -4700,7 +4702,7 @@ static int __cpuinit sched_cpu_active(struct notifier_block *nfb, | |||
| 4700 | } | 4702 | } |
| 4701 | } | 4703 | } |
| 4702 | 4704 | ||
| 4703 | static int __cpuinit sched_cpu_inactive(struct notifier_block *nfb, | 4705 | static int sched_cpu_inactive(struct notifier_block *nfb, |
| 4704 | unsigned long action, void *hcpu) | 4706 | unsigned long action, void *hcpu) |
| 4705 | { | 4707 | { |
| 4706 | switch (action & ~CPU_TASKS_FROZEN) { | 4708 | switch (action & ~CPU_TASKS_FROZEN) { |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f77f9c527449..bb456f44b7b1 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
| @@ -5506,7 +5506,7 @@ void nohz_balance_enter_idle(int cpu) | |||
| 5506 | set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); | 5506 | set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); |
| 5507 | } | 5507 | } |
| 5508 | 5508 | ||
| 5509 | static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb, | 5509 | static int sched_ilb_notifier(struct notifier_block *nfb, |
| 5510 | unsigned long action, void *hcpu) | 5510 | unsigned long action, void *hcpu) |
| 5511 | { | 5511 | { |
| 5512 | switch (action & ~CPU_TASKS_FROZEN) { | 5512 | switch (action & ~CPU_TASKS_FROZEN) { |
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 17d7065c3872..5aef494fc8b4 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h | |||
| @@ -162,6 +162,39 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next) | |||
| 162 | */ | 162 | */ |
| 163 | 163 | ||
| 164 | /** | 164 | /** |
| 165 | * cputimer_running - return true if cputimer is running | ||
| 166 | * | ||
| 167 | * @tsk: Pointer to target task. | ||
| 168 | */ | ||
| 169 | static inline bool cputimer_running(struct task_struct *tsk) | ||
| 170 | |||
| 171 | { | ||
| 172 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; | ||
| 173 | |||
| 174 | if (!cputimer->running) | ||
| 175 | return false; | ||
| 176 | |||
| 177 | /* | ||
| 178 | * After we flush the task's sum_exec_runtime to sig->sum_sched_runtime | ||
| 179 | * in __exit_signal(), we won't account to the signal struct further | ||
| 180 | * cputime consumed by that task, even though the task can still be | ||
| 181 | * ticking after __exit_signal(). | ||
| 182 | * | ||
| 183 | * In order to keep a consistent behaviour between thread group cputime | ||
| 184 | * and thread group cputimer accounting, lets also ignore the cputime | ||
| 185 | * elapsing after __exit_signal() in any thread group timer running. | ||
| 186 | * | ||
| 187 | * This makes sure that POSIX CPU clocks and timers are synchronized, so | ||
| 188 | * that a POSIX CPU timer won't expire while the corresponding POSIX CPU | ||
| 189 | * clock delta is behind the expiring timer value. | ||
| 190 | */ | ||
| 191 | if (unlikely(!tsk->sighand)) | ||
| 192 | return false; | ||
| 193 | |||
| 194 | return true; | ||
| 195 | } | ||
| 196 | |||
| 197 | /** | ||
| 165 | * account_group_user_time - Maintain utime for a thread group. | 198 | * account_group_user_time - Maintain utime for a thread group. |
| 166 | * | 199 | * |
| 167 | * @tsk: Pointer to task structure. | 200 | * @tsk: Pointer to task structure. |
| @@ -176,7 +209,7 @@ static inline void account_group_user_time(struct task_struct *tsk, | |||
| 176 | { | 209 | { |
| 177 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; | 210 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; |
| 178 | 211 | ||
| 179 | if (!cputimer->running) | 212 | if (!cputimer_running(tsk)) |
| 180 | return; | 213 | return; |
| 181 | 214 | ||
| 182 | raw_spin_lock(&cputimer->lock); | 215 | raw_spin_lock(&cputimer->lock); |
| @@ -199,7 +232,7 @@ static inline void account_group_system_time(struct task_struct *tsk, | |||
| 199 | { | 232 | { |
| 200 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; | 233 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; |
| 201 | 234 | ||
| 202 | if (!cputimer->running) | 235 | if (!cputimer_running(tsk)) |
| 203 | return; | 236 | return; |
| 204 | 237 | ||
| 205 | raw_spin_lock(&cputimer->lock); | 238 | raw_spin_lock(&cputimer->lock); |
| @@ -222,7 +255,7 @@ static inline void account_group_exec_runtime(struct task_struct *tsk, | |||
| 222 | { | 255 | { |
| 223 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; | 256 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; |
| 224 | 257 | ||
| 225 | if (!cputimer->running) | 258 | if (!cputimer_running(tsk)) |
| 226 | return; | 259 | return; |
| 227 | 260 | ||
| 228 | raw_spin_lock(&cputimer->lock); | 261 | raw_spin_lock(&cputimer->lock); |
diff --git a/kernel/smp.c b/kernel/smp.c index 4dba0f7b72ad..fe9f773d7114 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
| @@ -73,7 +73,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
| 73 | return NOTIFY_OK; | 73 | return NOTIFY_OK; |
| 74 | } | 74 | } |
| 75 | 75 | ||
| 76 | static struct notifier_block __cpuinitdata hotplug_cfd_notifier = { | 76 | static struct notifier_block hotplug_cfd_notifier = { |
| 77 | .notifier_call = hotplug_cfd, | 77 | .notifier_call = hotplug_cfd, |
| 78 | }; | 78 | }; |
| 79 | 79 | ||
diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 02fc5c933673..eb89e1807408 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c | |||
| @@ -24,7 +24,7 @@ | |||
| 24 | */ | 24 | */ |
| 25 | static DEFINE_PER_CPU(struct task_struct *, idle_threads); | 25 | static DEFINE_PER_CPU(struct task_struct *, idle_threads); |
| 26 | 26 | ||
| 27 | struct task_struct * __cpuinit idle_thread_get(unsigned int cpu) | 27 | struct task_struct *idle_thread_get(unsigned int cpu) |
| 28 | { | 28 | { |
| 29 | struct task_struct *tsk = per_cpu(idle_threads, cpu); | 29 | struct task_struct *tsk = per_cpu(idle_threads, cpu); |
| 30 | 30 | ||
diff --git a/kernel/softirq.c b/kernel/softirq.c index ca25e6e704a2..be3d3514c325 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
| @@ -699,7 +699,7 @@ void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq) | |||
| 699 | } | 699 | } |
| 700 | EXPORT_SYMBOL(send_remote_softirq); | 700 | EXPORT_SYMBOL(send_remote_softirq); |
| 701 | 701 | ||
| 702 | static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self, | 702 | static int remote_softirq_cpu_notify(struct notifier_block *self, |
| 703 | unsigned long action, void *hcpu) | 703 | unsigned long action, void *hcpu) |
| 704 | { | 704 | { |
| 705 | /* | 705 | /* |
| @@ -728,7 +728,7 @@ static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self, | |||
| 728 | return NOTIFY_OK; | 728 | return NOTIFY_OK; |
| 729 | } | 729 | } |
| 730 | 730 | ||
| 731 | static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = { | 731 | static struct notifier_block remote_softirq_cpu_notifier = { |
| 732 | .notifier_call = remote_softirq_cpu_notify, | 732 | .notifier_call = remote_softirq_cpu_notify, |
| 733 | }; | 733 | }; |
| 734 | 734 | ||
| @@ -830,7 +830,7 @@ static void takeover_tasklets(unsigned int cpu) | |||
| 830 | } | 830 | } |
| 831 | #endif /* CONFIG_HOTPLUG_CPU */ | 831 | #endif /* CONFIG_HOTPLUG_CPU */ |
| 832 | 832 | ||
| 833 | static int __cpuinit cpu_callback(struct notifier_block *nfb, | 833 | static int cpu_callback(struct notifier_block *nfb, |
| 834 | unsigned long action, | 834 | unsigned long action, |
| 835 | void *hcpu) | 835 | void *hcpu) |
| 836 | { | 836 | { |
| @@ -845,7 +845,7 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, | |||
| 845 | return NOTIFY_OK; | 845 | return NOTIFY_OK; |
| 846 | } | 846 | } |
| 847 | 847 | ||
| 848 | static struct notifier_block __cpuinitdata cpu_nfb = { | 848 | static struct notifier_block cpu_nfb = { |
| 849 | .notifier_call = cpu_callback | 849 | .notifier_call = cpu_callback |
| 850 | }; | 850 | }; |
| 851 | 851 | ||
diff --git a/kernel/sys.c b/kernel/sys.c index 071de900c824..771129b299f8 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
| @@ -116,20 +116,6 @@ EXPORT_SYMBOL(fs_overflowuid); | |||
| 116 | EXPORT_SYMBOL(fs_overflowgid); | 116 | EXPORT_SYMBOL(fs_overflowgid); |
| 117 | 117 | ||
| 118 | /* | 118 | /* |
| 119 | * this indicates whether you can reboot with ctrl-alt-del: the default is yes | ||
| 120 | */ | ||
| 121 | |||
| 122 | int C_A_D = 1; | ||
| 123 | struct pid *cad_pid; | ||
| 124 | EXPORT_SYMBOL(cad_pid); | ||
| 125 | |||
| 126 | /* | ||
| 127 | * If set, this is used for preparing the system to power off. | ||
| 128 | */ | ||
| 129 | |||
| 130 | void (*pm_power_off_prepare)(void); | ||
| 131 | |||
| 132 | /* | ||
| 133 | * Returns true if current's euid is same as p's uid or euid, | 119 | * Returns true if current's euid is same as p's uid or euid, |
| 134 | * or has CAP_SYS_NICE to p's user_ns. | 120 | * or has CAP_SYS_NICE to p's user_ns. |
| 135 | * | 121 | * |
| @@ -308,266 +294,6 @@ out_unlock: | |||
| 308 | return retval; | 294 | return retval; |
| 309 | } | 295 | } |
| 310 | 296 | ||
| 311 | /** | ||
| 312 | * emergency_restart - reboot the system | ||
| 313 | * | ||
| 314 | * Without shutting down any hardware or taking any locks | ||
| 315 | * reboot the system. This is called when we know we are in | ||
| 316 | * trouble so this is our best effort to reboot. This is | ||
| 317 | * safe to call in interrupt context. | ||
| 318 | */ | ||
| 319 | void emergency_restart(void) | ||
| 320 | { | ||
| 321 | kmsg_dump(KMSG_DUMP_EMERG); | ||
| 322 | machine_emergency_restart(); | ||
| 323 | } | ||
| 324 | EXPORT_SYMBOL_GPL(emergency_restart); | ||
| 325 | |||
| 326 | void kernel_restart_prepare(char *cmd) | ||
| 327 | { | ||
| 328 | blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); | ||
| 329 | system_state = SYSTEM_RESTART; | ||
| 330 | usermodehelper_disable(); | ||
| 331 | device_shutdown(); | ||
| 332 | } | ||
| 333 | |||
| 334 | /** | ||
| 335 | * register_reboot_notifier - Register function to be called at reboot time | ||
| 336 | * @nb: Info about notifier function to be called | ||
| 337 | * | ||
| 338 | * Registers a function with the list of functions | ||
| 339 | * to be called at reboot time. | ||
| 340 | * | ||
| 341 | * Currently always returns zero, as blocking_notifier_chain_register() | ||
| 342 | * always returns zero. | ||
| 343 | */ | ||
| 344 | int register_reboot_notifier(struct notifier_block *nb) | ||
| 345 | { | ||
| 346 | return blocking_notifier_chain_register(&reboot_notifier_list, nb); | ||
| 347 | } | ||
| 348 | EXPORT_SYMBOL(register_reboot_notifier); | ||
| 349 | |||
| 350 | /** | ||
| 351 | * unregister_reboot_notifier - Unregister previously registered reboot notifier | ||
| 352 | * @nb: Hook to be unregistered | ||
| 353 | * | ||
| 354 | * Unregisters a previously registered reboot | ||
| 355 | * notifier function. | ||
| 356 | * | ||
| 357 | * Returns zero on success, or %-ENOENT on failure. | ||
| 358 | */ | ||
| 359 | int unregister_reboot_notifier(struct notifier_block *nb) | ||
| 360 | { | ||
| 361 | return blocking_notifier_chain_unregister(&reboot_notifier_list, nb); | ||
| 362 | } | ||
| 363 | EXPORT_SYMBOL(unregister_reboot_notifier); | ||
| 364 | |||
| 365 | /* Add backwards compatibility for stable trees. */ | ||
| 366 | #ifndef PF_NO_SETAFFINITY | ||
| 367 | #define PF_NO_SETAFFINITY PF_THREAD_BOUND | ||
| 368 | #endif | ||
| 369 | |||
| 370 | static void migrate_to_reboot_cpu(void) | ||
| 371 | { | ||
| 372 | /* The boot cpu is always logical cpu 0 */ | ||
| 373 | int cpu = 0; | ||
| 374 | |||
| 375 | cpu_hotplug_disable(); | ||
| 376 | |||
| 377 | /* Make certain the cpu I'm about to reboot on is online */ | ||
| 378 | if (!cpu_online(cpu)) | ||
| 379 | cpu = cpumask_first(cpu_online_mask); | ||
| 380 | |||
| 381 | /* Prevent races with other tasks migrating this task */ | ||
| 382 | current->flags |= PF_NO_SETAFFINITY; | ||
| 383 | |||
| 384 | /* Make certain I only run on the appropriate processor */ | ||
| 385 | set_cpus_allowed_ptr(current, cpumask_of(cpu)); | ||
| 386 | } | ||
| 387 | |||
| 388 | /** | ||
| 389 | * kernel_restart - reboot the system | ||
| 390 | * @cmd: pointer to buffer containing command to execute for restart | ||
| 391 | * or %NULL | ||
| 392 | * | ||
| 393 | * Shutdown everything and perform a clean reboot. | ||
| 394 | * This is not safe to call in interrupt context. | ||
| 395 | */ | ||
| 396 | void kernel_restart(char *cmd) | ||
| 397 | { | ||
| 398 | kernel_restart_prepare(cmd); | ||
| 399 | migrate_to_reboot_cpu(); | ||
| 400 | syscore_shutdown(); | ||
| 401 | if (!cmd) | ||
| 402 | printk(KERN_EMERG "Restarting system.\n"); | ||
| 403 | else | ||
| 404 | printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd); | ||
| 405 | kmsg_dump(KMSG_DUMP_RESTART); | ||
| 406 | machine_restart(cmd); | ||
| 407 | } | ||
| 408 | EXPORT_SYMBOL_GPL(kernel_restart); | ||
| 409 | |||
| 410 | static void kernel_shutdown_prepare(enum system_states state) | ||
| 411 | { | ||
| 412 | blocking_notifier_call_chain(&reboot_notifier_list, | ||
| 413 | (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL); | ||
| 414 | system_state = state; | ||
| 415 | usermodehelper_disable(); | ||
| 416 | device_shutdown(); | ||
| 417 | } | ||
| 418 | /** | ||
| 419 | * kernel_halt - halt the system | ||
| 420 | * | ||
| 421 | * Shutdown everything and perform a clean system halt. | ||
| 422 | */ | ||
| 423 | void kernel_halt(void) | ||
| 424 | { | ||
| 425 | kernel_shutdown_prepare(SYSTEM_HALT); | ||
| 426 | migrate_to_reboot_cpu(); | ||
| 427 | syscore_shutdown(); | ||
| 428 | printk(KERN_EMERG "System halted.\n"); | ||
| 429 | kmsg_dump(KMSG_DUMP_HALT); | ||
| 430 | machine_halt(); | ||
| 431 | } | ||
| 432 | |||
| 433 | EXPORT_SYMBOL_GPL(kernel_halt); | ||
| 434 | |||
| 435 | /** | ||
| 436 | * kernel_power_off - power_off the system | ||
| 437 | * | ||
| 438 | * Shutdown everything and perform a clean system power_off. | ||
| 439 | */ | ||
| 440 | void kernel_power_off(void) | ||
| 441 | { | ||
| 442 | kernel_shutdown_prepare(SYSTEM_POWER_OFF); | ||
| 443 | if (pm_power_off_prepare) | ||
| 444 | pm_power_off_prepare(); | ||
| 445 | migrate_to_reboot_cpu(); | ||
| 446 | syscore_shutdown(); | ||
| 447 | printk(KERN_EMERG "Power down.\n"); | ||
| 448 | kmsg_dump(KMSG_DUMP_POWEROFF); | ||
| 449 | machine_power_off(); | ||
| 450 | } | ||
| 451 | EXPORT_SYMBOL_GPL(kernel_power_off); | ||
| 452 | |||
| 453 | static DEFINE_MUTEX(reboot_mutex); | ||
| 454 | |||
| 455 | /* | ||
| 456 | * Reboot system call: for obvious reasons only root may call it, | ||
| 457 | * and even root needs to set up some magic numbers in the registers | ||
| 458 | * so that some mistake won't make this reboot the whole machine. | ||
| 459 | * You can also set the meaning of the ctrl-alt-del-key here. | ||
| 460 | * | ||
| 461 | * reboot doesn't sync: do that yourself before calling this. | ||
| 462 | */ | ||
| 463 | SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, | ||
| 464 | void __user *, arg) | ||
| 465 | { | ||
| 466 | struct pid_namespace *pid_ns = task_active_pid_ns(current); | ||
| 467 | char buffer[256]; | ||
| 468 | int ret = 0; | ||
| 469 | |||
| 470 | /* We only trust the superuser with rebooting the system. */ | ||
| 471 | if (!ns_capable(pid_ns->user_ns, CAP_SYS_BOOT)) | ||
| 472 | return -EPERM; | ||
| 473 | |||
| 474 | /* For safety, we require "magic" arguments. */ | ||
| 475 | if (magic1 != LINUX_REBOOT_MAGIC1 || | ||
| 476 | (magic2 != LINUX_REBOOT_MAGIC2 && | ||
| 477 | magic2 != LINUX_REBOOT_MAGIC2A && | ||
| 478 | magic2 != LINUX_REBOOT_MAGIC2B && | ||
| 479 | magic2 != LINUX_REBOOT_MAGIC2C)) | ||
| 480 | return -EINVAL; | ||
| 481 | |||
| 482 | /* | ||
| 483 | * If pid namespaces are enabled and the current task is in a child | ||
| 484 | * pid_namespace, the command is handled by reboot_pid_ns() which will | ||
| 485 | * call do_exit(). | ||
| 486 | */ | ||
| 487 | ret = reboot_pid_ns(pid_ns, cmd); | ||
| 488 | if (ret) | ||
| 489 | return ret; | ||
| 490 | |||
| 491 | /* Instead of trying to make the power_off code look like | ||
| 492 | * halt when pm_power_off is not set do it the easy way. | ||
| 493 | */ | ||
| 494 | if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) | ||
| 495 | cmd = LINUX_REBOOT_CMD_HALT; | ||
| 496 | |||
| 497 | mutex_lock(&reboot_mutex); | ||
| 498 | switch (cmd) { | ||
| 499 | case LINUX_REBOOT_CMD_RESTART: | ||
| 500 | kernel_restart(NULL); | ||
| 501 | break; | ||
| 502 | |||
| 503 | case LINUX_REBOOT_CMD_CAD_ON: | ||
| 504 | C_A_D = 1; | ||
| 505 | break; | ||
| 506 | |||
| 507 | case LINUX_REBOOT_CMD_CAD_OFF: | ||
| 508 | C_A_D = 0; | ||
| 509 | break; | ||
| 510 | |||
| 511 | case LINUX_REBOOT_CMD_HALT: | ||
| 512 | kernel_halt(); | ||
| 513 | do_exit(0); | ||
| 514 | panic("cannot halt.\n"); | ||
| 515 | |||
| 516 | case LINUX_REBOOT_CMD_POWER_OFF: | ||
| 517 | kernel_power_off(); | ||
| 518 | do_exit(0); | ||
| 519 | break; | ||
| 520 | |||
| 521 | case LINUX_REBOOT_CMD_RESTART2: | ||
| 522 | if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) { | ||
| 523 | ret = -EFAULT; | ||
| 524 | break; | ||
| 525 | } | ||
| 526 | buffer[sizeof(buffer) - 1] = '\0'; | ||
| 527 | |||
| 528 | kernel_restart(buffer); | ||
| 529 | break; | ||
| 530 | |||
| 531 | #ifdef CONFIG_KEXEC | ||
| 532 | case LINUX_REBOOT_CMD_KEXEC: | ||
| 533 | ret = kernel_kexec(); | ||
| 534 | break; | ||
| 535 | #endif | ||
| 536 | |||
| 537 | #ifdef CONFIG_HIBERNATION | ||
| 538 | case LINUX_REBOOT_CMD_SW_SUSPEND: | ||
| 539 | ret = hibernate(); | ||
| 540 | break; | ||
| 541 | #endif | ||
| 542 | |||
| 543 | default: | ||
| 544 | ret = -EINVAL; | ||
| 545 | break; | ||
| 546 | } | ||
| 547 | mutex_unlock(&reboot_mutex); | ||
| 548 | return ret; | ||
| 549 | } | ||
| 550 | |||
| 551 | static void deferred_cad(struct work_struct *dummy) | ||
| 552 | { | ||
| 553 | kernel_restart(NULL); | ||
| 554 | } | ||
| 555 | |||
| 556 | /* | ||
| 557 | * This function gets called by ctrl-alt-del - ie the keyboard interrupt. | ||
| 558 | * As it's called within an interrupt, it may NOT sync: the only choice | ||
| 559 | * is whether to reboot at once, or just ignore the ctrl-alt-del. | ||
| 560 | */ | ||
| 561 | void ctrl_alt_del(void) | ||
| 562 | { | ||
| 563 | static DECLARE_WORK(cad_work, deferred_cad); | ||
| 564 | |||
| 565 | if (C_A_D) | ||
| 566 | schedule_work(&cad_work); | ||
| 567 | else | ||
| 568 | kill_cad_pid(SIGINT, 1); | ||
| 569 | } | ||
| 570 | |||
| 571 | /* | 297 | /* |
| 572 | * Unprivileged users may change the real gid to the effective gid | 298 | * Unprivileged users may change the real gid to the effective gid |
| 573 | * or vice versa. (BSD-style) | 299 | * or vice versa. (BSD-style) |
| @@ -2292,68 +2018,6 @@ SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, | |||
| 2292 | return err ? -EFAULT : 0; | 2018 | return err ? -EFAULT : 0; |
| 2293 | } | 2019 | } |
| 2294 | 2020 | ||
| 2295 | char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; | ||
| 2296 | |||
| 2297 | static int __orderly_poweroff(bool force) | ||
| 2298 | { | ||
| 2299 | char **argv; | ||
| 2300 | static char *envp[] = { | ||
| 2301 | "HOME=/", | ||
| 2302 | "PATH=/sbin:/bin:/usr/sbin:/usr/bin", | ||
| 2303 | NULL | ||
| 2304 | }; | ||
| 2305 | int ret; | ||
| 2306 | |||
| 2307 | argv = argv_split(GFP_KERNEL, poweroff_cmd, NULL); | ||
| 2308 | if (argv) { | ||
| 2309 | ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); | ||
| 2310 | argv_free(argv); | ||
| 2311 | } else { | ||
| 2312 | printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", | ||
| 2313 | __func__, poweroff_cmd); | ||
| 2314 | ret = -ENOMEM; | ||
| 2315 | } | ||
| 2316 | |||
| 2317 | if (ret && force) { | ||
| 2318 | printk(KERN_WARNING "Failed to start orderly shutdown: " | ||
| 2319 | "forcing the issue\n"); | ||
| 2320 | /* | ||
| 2321 | * I guess this should try to kick off some daemon to sync and | ||
| 2322 | * poweroff asap. Or not even bother syncing if we're doing an | ||
| 2323 | * emergency shutdown? | ||
| 2324 | */ | ||
| 2325 | emergency_sync(); | ||
| 2326 | kernel_power_off(); | ||
| 2327 | } | ||
| 2328 | |||
| 2329 | return ret; | ||
| 2330 | } | ||
| 2331 | |||
| 2332 | static bool poweroff_force; | ||
| 2333 | |||
| 2334 | static void poweroff_work_func(struct work_struct *work) | ||
| 2335 | { | ||
| 2336 | __orderly_poweroff(poweroff_force); | ||
| 2337 | } | ||
| 2338 | |||
| 2339 | static DECLARE_WORK(poweroff_work, poweroff_work_func); | ||
| 2340 | |||
| 2341 | /** | ||
| 2342 | * orderly_poweroff - Trigger an orderly system poweroff | ||
| 2343 | * @force: force poweroff if command execution fails | ||
| 2344 | * | ||
| 2345 | * This may be called from any context to trigger a system shutdown. | ||
| 2346 | * If the orderly shutdown fails, it will force an immediate shutdown. | ||
| 2347 | */ | ||
| 2348 | int orderly_poweroff(bool force) | ||
| 2349 | { | ||
| 2350 | if (force) /* do not override the pending "true" */ | ||
| 2351 | poweroff_force = true; | ||
| 2352 | schedule_work(&poweroff_work); | ||
| 2353 | return 0; | ||
| 2354 | } | ||
| 2355 | EXPORT_SYMBOL_GPL(orderly_poweroff); | ||
| 2356 | |||
| 2357 | /** | 2021 | /** |
| 2358 | * do_sysinfo - fill in sysinfo struct | 2022 | * do_sysinfo - fill in sysinfo struct |
| 2359 | * @info: pointer to buffer to fill | 2023 | * @info: pointer to buffer to fill |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4ce13c3cedb9..ac09d98490aa 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -599,6 +599,13 @@ static struct ctl_table kern_table[] = { | |||
| 599 | .mode = 0644, | 599 | .mode = 0644, |
| 600 | .proc_handler = proc_dointvec, | 600 | .proc_handler = proc_dointvec, |
| 601 | }, | 601 | }, |
| 602 | { | ||
| 603 | .procname = "traceoff_on_warning", | ||
| 604 | .data = &__disable_trace_on_warning, | ||
| 605 | .maxlen = sizeof(__disable_trace_on_warning), | ||
| 606 | .mode = 0644, | ||
| 607 | .proc_handler = proc_dointvec, | ||
| 608 | }, | ||
| 602 | #endif | 609 | #endif |
| 603 | #ifdef CONFIG_MODULES | 610 | #ifdef CONFIG_MODULES |
| 604 | { | 611 | { |
| @@ -800,7 +807,7 @@ static struct ctl_table kern_table[] = { | |||
| 800 | #if defined(CONFIG_LOCKUP_DETECTOR) | 807 | #if defined(CONFIG_LOCKUP_DETECTOR) |
| 801 | { | 808 | { |
| 802 | .procname = "watchdog", | 809 | .procname = "watchdog", |
| 803 | .data = &watchdog_enabled, | 810 | .data = &watchdog_user_enabled, |
| 804 | .maxlen = sizeof (int), | 811 | .maxlen = sizeof (int), |
| 805 | .mode = 0644, | 812 | .mode = 0644, |
| 806 | .proc_handler = proc_dowatchdog, | 813 | .proc_handler = proc_dowatchdog, |
| @@ -827,7 +834,7 @@ static struct ctl_table kern_table[] = { | |||
| 827 | }, | 834 | }, |
| 828 | { | 835 | { |
| 829 | .procname = "nmi_watchdog", | 836 | .procname = "nmi_watchdog", |
| 830 | .data = &watchdog_enabled, | 837 | .data = &watchdog_user_enabled, |
| 831 | .maxlen = sizeof (int), | 838 | .maxlen = sizeof (int), |
| 832 | .mode = 0644, | 839 | .mode = 0644, |
| 833 | .proc_handler = proc_dowatchdog, | 840 | .proc_handler = proc_dowatchdog, |
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index aea4a9ea6fc8..b609213ca9a2 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c | |||
| @@ -3,7 +3,6 @@ | |||
| 3 | #include "../fs/xfs/xfs_sysctl.h" | 3 | #include "../fs/xfs/xfs_sysctl.h" |
| 4 | #include <linux/sunrpc/debug.h> | 4 | #include <linux/sunrpc/debug.h> |
| 5 | #include <linux/string.h> | 5 | #include <linux/string.h> |
| 6 | #include <net/ip_vs.h> | ||
| 7 | #include <linux/syscalls.h> | 6 | #include <linux/syscalls.h> |
| 8 | #include <linux/namei.h> | 7 | #include <linux/namei.h> |
| 9 | #include <linux/mount.h> | 8 | #include <linux/mount.h> |
diff --git a/kernel/time/Makefile b/kernel/time/Makefile index ff7d9d2ab504..9250130646f5 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile | |||
| @@ -4,6 +4,8 @@ obj-y += timeconv.o posix-clock.o alarmtimer.o | |||
| 4 | obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o | 4 | obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o |
| 5 | obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o | 5 | obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o |
| 6 | obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o | 6 | obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o |
| 7 | obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o | ||
| 7 | obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o | 8 | obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o |
| 8 | obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o | 9 | obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o |
| 9 | obj-$(CONFIG_TIMER_STATS) += timer_stats.o | 10 | obj-$(CONFIG_TIMER_STATS) += timer_stats.o |
| 11 | obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o | ||
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index f11d83b12949..eec50fcef9e4 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c | |||
| @@ -199,6 +199,13 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) | |||
| 199 | 199 | ||
| 200 | } | 200 | } |
| 201 | 201 | ||
| 202 | ktime_t alarm_expires_remaining(const struct alarm *alarm) | ||
| 203 | { | ||
| 204 | struct alarm_base *base = &alarm_bases[alarm->type]; | ||
| 205 | return ktime_sub(alarm->node.expires, base->gettime()); | ||
| 206 | } | ||
| 207 | EXPORT_SYMBOL_GPL(alarm_expires_remaining); | ||
| 208 | |||
| 202 | #ifdef CONFIG_RTC_CLASS | 209 | #ifdef CONFIG_RTC_CLASS |
| 203 | /** | 210 | /** |
| 204 | * alarmtimer_suspend - Suspend time callback | 211 | * alarmtimer_suspend - Suspend time callback |
| @@ -303,9 +310,10 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type, | |||
| 303 | alarm->type = type; | 310 | alarm->type = type; |
| 304 | alarm->state = ALARMTIMER_STATE_INACTIVE; | 311 | alarm->state = ALARMTIMER_STATE_INACTIVE; |
| 305 | } | 312 | } |
| 313 | EXPORT_SYMBOL_GPL(alarm_init); | ||
| 306 | 314 | ||
| 307 | /** | 315 | /** |
| 308 | * alarm_start - Sets an alarm to fire | 316 | * alarm_start - Sets an absolute alarm to fire |
| 309 | * @alarm: ptr to alarm to set | 317 | * @alarm: ptr to alarm to set |
| 310 | * @start: time to run the alarm | 318 | * @start: time to run the alarm |
| 311 | */ | 319 | */ |
| @@ -323,6 +331,34 @@ int alarm_start(struct alarm *alarm, ktime_t start) | |||
| 323 | spin_unlock_irqrestore(&base->lock, flags); | 331 | spin_unlock_irqrestore(&base->lock, flags); |
| 324 | return ret; | 332 | return ret; |
| 325 | } | 333 | } |
| 334 | EXPORT_SYMBOL_GPL(alarm_start); | ||
| 335 | |||
| 336 | /** | ||
| 337 | * alarm_start_relative - Sets a relative alarm to fire | ||
| 338 | * @alarm: ptr to alarm to set | ||
| 339 | * @start: time relative to now to run the alarm | ||
| 340 | */ | ||
| 341 | int alarm_start_relative(struct alarm *alarm, ktime_t start) | ||
| 342 | { | ||
| 343 | struct alarm_base *base = &alarm_bases[alarm->type]; | ||
| 344 | |||
| 345 | start = ktime_add(start, base->gettime()); | ||
| 346 | return alarm_start(alarm, start); | ||
| 347 | } | ||
| 348 | EXPORT_SYMBOL_GPL(alarm_start_relative); | ||
| 349 | |||
| 350 | void alarm_restart(struct alarm *alarm) | ||
| 351 | { | ||
| 352 | struct alarm_base *base = &alarm_bases[alarm->type]; | ||
| 353 | unsigned long flags; | ||
| 354 | |||
| 355 | spin_lock_irqsave(&base->lock, flags); | ||
| 356 | hrtimer_set_expires(&alarm->timer, alarm->node.expires); | ||
| 357 | hrtimer_restart(&alarm->timer); | ||
| 358 | alarmtimer_enqueue(base, alarm); | ||
| 359 | spin_unlock_irqrestore(&base->lock, flags); | ||
| 360 | } | ||
| 361 | EXPORT_SYMBOL_GPL(alarm_restart); | ||
| 326 | 362 | ||
| 327 | /** | 363 | /** |
| 328 | * alarm_try_to_cancel - Tries to cancel an alarm timer | 364 | * alarm_try_to_cancel - Tries to cancel an alarm timer |
| @@ -344,6 +380,7 @@ int alarm_try_to_cancel(struct alarm *alarm) | |||
| 344 | spin_unlock_irqrestore(&base->lock, flags); | 380 | spin_unlock_irqrestore(&base->lock, flags); |
| 345 | return ret; | 381 | return ret; |
| 346 | } | 382 | } |
| 383 | EXPORT_SYMBOL_GPL(alarm_try_to_cancel); | ||
| 347 | 384 | ||
| 348 | 385 | ||
| 349 | /** | 386 | /** |
| @@ -361,6 +398,7 @@ int alarm_cancel(struct alarm *alarm) | |||
| 361 | cpu_relax(); | 398 | cpu_relax(); |
| 362 | } | 399 | } |
| 363 | } | 400 | } |
| 401 | EXPORT_SYMBOL_GPL(alarm_cancel); | ||
| 364 | 402 | ||
| 365 | 403 | ||
| 366 | u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval) | 404 | u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval) |
| @@ -393,8 +431,15 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval) | |||
| 393 | alarm->node.expires = ktime_add(alarm->node.expires, interval); | 431 | alarm->node.expires = ktime_add(alarm->node.expires, interval); |
| 394 | return overrun; | 432 | return overrun; |
| 395 | } | 433 | } |
| 434 | EXPORT_SYMBOL_GPL(alarm_forward); | ||
| 396 | 435 | ||
| 436 | u64 alarm_forward_now(struct alarm *alarm, ktime_t interval) | ||
| 437 | { | ||
| 438 | struct alarm_base *base = &alarm_bases[alarm->type]; | ||
| 397 | 439 | ||
| 440 | return alarm_forward(alarm, base->gettime(), interval); | ||
| 441 | } | ||
| 442 | EXPORT_SYMBOL_GPL(alarm_forward_now); | ||
| 398 | 443 | ||
| 399 | 444 | ||
| 400 | /** | 445 | /** |
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index c6d6400ee137..38959c866789 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c | |||
| @@ -15,20 +15,23 @@ | |||
| 15 | #include <linux/hrtimer.h> | 15 | #include <linux/hrtimer.h> |
| 16 | #include <linux/init.h> | 16 | #include <linux/init.h> |
| 17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
| 18 | #include <linux/notifier.h> | ||
| 19 | #include <linux/smp.h> | 18 | #include <linux/smp.h> |
| 19 | #include <linux/device.h> | ||
| 20 | 20 | ||
| 21 | #include "tick-internal.h" | 21 | #include "tick-internal.h" |
| 22 | 22 | ||
| 23 | /* The registered clock event devices */ | 23 | /* The registered clock event devices */ |
| 24 | static LIST_HEAD(clockevent_devices); | 24 | static LIST_HEAD(clockevent_devices); |
| 25 | static LIST_HEAD(clockevents_released); | 25 | static LIST_HEAD(clockevents_released); |
| 26 | |||
| 27 | /* Notification for clock events */ | ||
| 28 | static RAW_NOTIFIER_HEAD(clockevents_chain); | ||
| 29 | |||
| 30 | /* Protection for the above */ | 26 | /* Protection for the above */ |
| 31 | static DEFINE_RAW_SPINLOCK(clockevents_lock); | 27 | static DEFINE_RAW_SPINLOCK(clockevents_lock); |
| 28 | /* Protection for unbind operations */ | ||
| 29 | static DEFINE_MUTEX(clockevents_mutex); | ||
| 30 | |||
| 31 | struct ce_unbind { | ||
| 32 | struct clock_event_device *ce; | ||
| 33 | int res; | ||
| 34 | }; | ||
| 32 | 35 | ||
| 33 | /** | 36 | /** |
| 34 | * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds | 37 | * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds |
| @@ -232,47 +235,107 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, | |||
| 232 | return (rc && force) ? clockevents_program_min_delta(dev) : rc; | 235 | return (rc && force) ? clockevents_program_min_delta(dev) : rc; |
| 233 | } | 236 | } |
| 234 | 237 | ||
| 235 | /** | 238 | /* |
| 236 | * clockevents_register_notifier - register a clock events change listener | 239 | * Called after a notify add to make devices available which were |
| 240 | * released from the notifier call. | ||
| 237 | */ | 241 | */ |
| 238 | int clockevents_register_notifier(struct notifier_block *nb) | 242 | static void clockevents_notify_released(void) |
| 239 | { | 243 | { |
| 240 | unsigned long flags; | 244 | struct clock_event_device *dev; |
| 241 | int ret; | ||
| 242 | 245 | ||
| 243 | raw_spin_lock_irqsave(&clockevents_lock, flags); | 246 | while (!list_empty(&clockevents_released)) { |
| 244 | ret = raw_notifier_chain_register(&clockevents_chain, nb); | 247 | dev = list_entry(clockevents_released.next, |
| 245 | raw_spin_unlock_irqrestore(&clockevents_lock, flags); | 248 | struct clock_event_device, list); |
| 249 | list_del(&dev->list); | ||
| 250 | list_add(&dev->list, &clockevent_devices); | ||
| 251 | tick_check_new_device(dev); | ||
| 252 | } | ||
| 253 | } | ||
| 246 | 254 | ||
| 247 | return ret; | 255 | /* |
| 256 | * Try to install a replacement clock event device | ||
| 257 | */ | ||
| 258 | static int clockevents_replace(struct clock_event_device *ced) | ||
| 259 | { | ||
| 260 | struct clock_event_device *dev, *newdev = NULL; | ||
| 261 | |||
| 262 | list_for_each_entry(dev, &clockevent_devices, list) { | ||
| 263 | if (dev == ced || dev->mode != CLOCK_EVT_MODE_UNUSED) | ||
| 264 | continue; | ||
| 265 | |||
| 266 | if (!tick_check_replacement(newdev, dev)) | ||
| 267 | continue; | ||
| 268 | |||
| 269 | if (!try_module_get(dev->owner)) | ||
| 270 | continue; | ||
| 271 | |||
| 272 | if (newdev) | ||
| 273 | module_put(newdev->owner); | ||
| 274 | newdev = dev; | ||
| 275 | } | ||
| 276 | if (newdev) { | ||
| 277 | tick_install_replacement(newdev); | ||
| 278 | list_del_init(&ced->list); | ||
| 279 | } | ||
| 280 | return newdev ? 0 : -EBUSY; | ||
| 248 | } | 281 | } |
| 249 | 282 | ||
| 250 | /* | 283 | /* |
| 251 | * Notify about a clock event change. Called with clockevents_lock | 284 | * Called with clockevents_mutex and clockevents_lock held |
| 252 | * held. | ||
| 253 | */ | 285 | */ |
| 254 | static void clockevents_do_notify(unsigned long reason, void *dev) | 286 | static int __clockevents_try_unbind(struct clock_event_device *ced, int cpu) |
| 255 | { | 287 | { |
| 256 | raw_notifier_call_chain(&clockevents_chain, reason, dev); | 288 | /* Fast track. Device is unused */ |
| 289 | if (ced->mode == CLOCK_EVT_MODE_UNUSED) { | ||
| 290 | list_del_init(&ced->list); | ||
| 291 | return 0; | ||
| 292 | } | ||
| 293 | |||
| 294 | return ced == per_cpu(tick_cpu_device, cpu).evtdev ? -EAGAIN : -EBUSY; | ||
| 257 | } | 295 | } |
| 258 | 296 | ||
| 259 | /* | 297 | /* |
| 260 | * Called after a notify add to make devices available which were | 298 | * SMP function call to unbind a device |
| 261 | * released from the notifier call. | ||
| 262 | */ | 299 | */ |
| 263 | static void clockevents_notify_released(void) | 300 | static void __clockevents_unbind(void *arg) |
| 264 | { | 301 | { |
| 265 | struct clock_event_device *dev; | 302 | struct ce_unbind *cu = arg; |
| 303 | int res; | ||
| 304 | |||
| 305 | raw_spin_lock(&clockevents_lock); | ||
| 306 | res = __clockevents_try_unbind(cu->ce, smp_processor_id()); | ||
| 307 | if (res == -EAGAIN) | ||
| 308 | res = clockevents_replace(cu->ce); | ||
| 309 | cu->res = res; | ||
| 310 | raw_spin_unlock(&clockevents_lock); | ||
| 311 | } | ||
| 266 | 312 | ||
| 267 | while (!list_empty(&clockevents_released)) { | 313 | /* |
| 268 | dev = list_entry(clockevents_released.next, | 314 | * Issues smp function call to unbind a per cpu device. Called with |
| 269 | struct clock_event_device, list); | 315 | * clockevents_mutex held. |
| 270 | list_del(&dev->list); | 316 | */ |
| 271 | list_add(&dev->list, &clockevent_devices); | 317 | static int clockevents_unbind(struct clock_event_device *ced, int cpu) |
| 272 | clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); | 318 | { |
| 273 | } | 319 | struct ce_unbind cu = { .ce = ced, .res = -ENODEV }; |
| 320 | |||
| 321 | smp_call_function_single(cpu, __clockevents_unbind, &cu, 1); | ||
| 322 | return cu.res; | ||
| 274 | } | 323 | } |
| 275 | 324 | ||
| 325 | /* | ||
| 326 | * Unbind a clockevents device. | ||
| 327 | */ | ||
| 328 | int clockevents_unbind_device(struct clock_event_device *ced, int cpu) | ||
| 329 | { | ||
| 330 | int ret; | ||
| 331 | |||
| 332 | mutex_lock(&clockevents_mutex); | ||
| 333 | ret = clockevents_unbind(ced, cpu); | ||
| 334 | mutex_unlock(&clockevents_mutex); | ||
| 335 | return ret; | ||
| 336 | } | ||
| 337 | EXPORT_SYMBOL_GPL(clockevents_unbind); | ||
| 338 | |||
| 276 | /** | 339 | /** |
| 277 | * clockevents_register_device - register a clock event device | 340 | * clockevents_register_device - register a clock event device |
| 278 | * @dev: device to register | 341 | * @dev: device to register |
| @@ -290,7 +353,7 @@ void clockevents_register_device(struct clock_event_device *dev) | |||
| 290 | raw_spin_lock_irqsave(&clockevents_lock, flags); | 353 | raw_spin_lock_irqsave(&clockevents_lock, flags); |
| 291 | 354 | ||
| 292 | list_add(&dev->list, &clockevent_devices); | 355 | list_add(&dev->list, &clockevent_devices); |
| 293 | clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); | 356 | tick_check_new_device(dev); |
| 294 | clockevents_notify_released(); | 357 | clockevents_notify_released(); |
| 295 | 358 | ||
| 296 | raw_spin_unlock_irqrestore(&clockevents_lock, flags); | 359 | raw_spin_unlock_irqrestore(&clockevents_lock, flags); |
| @@ -386,6 +449,7 @@ void clockevents_exchange_device(struct clock_event_device *old, | |||
| 386 | * released list and do a notify add later. | 449 | * released list and do a notify add later. |
| 387 | */ | 450 | */ |
| 388 | if (old) { | 451 | if (old) { |
| 452 | module_put(old->owner); | ||
| 389 | clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED); | 453 | clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED); |
| 390 | list_del(&old->list); | 454 | list_del(&old->list); |
| 391 | list_add(&old->list, &clockevents_released); | 455 | list_add(&old->list, &clockevents_released); |
| @@ -433,10 +497,36 @@ void clockevents_notify(unsigned long reason, void *arg) | |||
| 433 | int cpu; | 497 | int cpu; |
| 434 | 498 | ||
| 435 | raw_spin_lock_irqsave(&clockevents_lock, flags); | 499 | raw_spin_lock_irqsave(&clockevents_lock, flags); |
| 436 | clockevents_do_notify(reason, arg); | ||
| 437 | 500 | ||
| 438 | switch (reason) { | 501 | switch (reason) { |
| 502 | case CLOCK_EVT_NOTIFY_BROADCAST_ON: | ||
| 503 | case CLOCK_EVT_NOTIFY_BROADCAST_OFF: | ||
| 504 | case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: | ||
| 505 | tick_broadcast_on_off(reason, arg); | ||
| 506 | break; | ||
| 507 | |||
| 508 | case CLOCK_EVT_NOTIFY_BROADCAST_ENTER: | ||
| 509 | case CLOCK_EVT_NOTIFY_BROADCAST_EXIT: | ||
| 510 | tick_broadcast_oneshot_control(reason); | ||
| 511 | break; | ||
| 512 | |||
| 513 | case CLOCK_EVT_NOTIFY_CPU_DYING: | ||
| 514 | tick_handover_do_timer(arg); | ||
| 515 | break; | ||
| 516 | |||
| 517 | case CLOCK_EVT_NOTIFY_SUSPEND: | ||
| 518 | tick_suspend(); | ||
| 519 | tick_suspend_broadcast(); | ||
| 520 | break; | ||
| 521 | |||
| 522 | case CLOCK_EVT_NOTIFY_RESUME: | ||
| 523 | tick_resume(); | ||
| 524 | break; | ||
| 525 | |||
| 439 | case CLOCK_EVT_NOTIFY_CPU_DEAD: | 526 | case CLOCK_EVT_NOTIFY_CPU_DEAD: |
| 527 | tick_shutdown_broadcast_oneshot(arg); | ||
| 528 | tick_shutdown_broadcast(arg); | ||
| 529 | tick_shutdown(arg); | ||
| 440 | /* | 530 | /* |
| 441 | * Unregister the clock event devices which were | 531 | * Unregister the clock event devices which were |
| 442 | * released from the users in the notify chain. | 532 | * released from the users in the notify chain. |
| @@ -462,4 +552,123 @@ void clockevents_notify(unsigned long reason, void *arg) | |||
| 462 | raw_spin_unlock_irqrestore(&clockevents_lock, flags); | 552 | raw_spin_unlock_irqrestore(&clockevents_lock, flags); |
| 463 | } | 553 | } |
| 464 | EXPORT_SYMBOL_GPL(clockevents_notify); | 554 | EXPORT_SYMBOL_GPL(clockevents_notify); |
| 555 | |||
| 556 | #ifdef CONFIG_SYSFS | ||
| 557 | struct bus_type clockevents_subsys = { | ||
| 558 | .name = "clockevents", | ||
| 559 | .dev_name = "clockevent", | ||
| 560 | }; | ||
| 561 | |||
| 562 | static DEFINE_PER_CPU(struct device, tick_percpu_dev); | ||
| 563 | static struct tick_device *tick_get_tick_dev(struct device *dev); | ||
| 564 | |||
| 565 | static ssize_t sysfs_show_current_tick_dev(struct device *dev, | ||
| 566 | struct device_attribute *attr, | ||
| 567 | char *buf) | ||
| 568 | { | ||
| 569 | struct tick_device *td; | ||
| 570 | ssize_t count = 0; | ||
| 571 | |||
| 572 | raw_spin_lock_irq(&clockevents_lock); | ||
| 573 | td = tick_get_tick_dev(dev); | ||
| 574 | if (td && td->evtdev) | ||
| 575 | count = snprintf(buf, PAGE_SIZE, "%s\n", td->evtdev->name); | ||
| 576 | raw_spin_unlock_irq(&clockevents_lock); | ||
| 577 | return count; | ||
| 578 | } | ||
| 579 | static DEVICE_ATTR(current_device, 0444, sysfs_show_current_tick_dev, NULL); | ||
| 580 | |||
| 581 | /* We don't support the abomination of removable broadcast devices */ | ||
| 582 | static ssize_t sysfs_unbind_tick_dev(struct device *dev, | ||
| 583 | struct device_attribute *attr, | ||
| 584 | const char *buf, size_t count) | ||
| 585 | { | ||
| 586 | char name[CS_NAME_LEN]; | ||
| 587 | size_t ret = sysfs_get_uname(buf, name, count); | ||
| 588 | struct clock_event_device *ce; | ||
| 589 | |||
| 590 | if (ret < 0) | ||
| 591 | return ret; | ||
| 592 | |||
| 593 | ret = -ENODEV; | ||
| 594 | mutex_lock(&clockevents_mutex); | ||
| 595 | raw_spin_lock_irq(&clockevents_lock); | ||
| 596 | list_for_each_entry(ce, &clockevent_devices, list) { | ||
| 597 | if (!strcmp(ce->name, name)) { | ||
| 598 | ret = __clockevents_try_unbind(ce, dev->id); | ||
| 599 | break; | ||
| 600 | } | ||
| 601 | } | ||
| 602 | raw_spin_unlock_irq(&clockevents_lock); | ||
| 603 | /* | ||
| 604 | * We hold clockevents_mutex, so ce can't go away | ||
| 605 | */ | ||
| 606 | if (ret == -EAGAIN) | ||
| 607 | ret = clockevents_unbind(ce, dev->id); | ||
| 608 | mutex_unlock(&clockevents_mutex); | ||
| 609 | return ret ? ret : count; | ||
| 610 | } | ||
| 611 | static DEVICE_ATTR(unbind_device, 0200, NULL, sysfs_unbind_tick_dev); | ||
| 612 | |||
| 613 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST | ||
| 614 | static struct device tick_bc_dev = { | ||
| 615 | .init_name = "broadcast", | ||
| 616 | .id = 0, | ||
| 617 | .bus = &clockevents_subsys, | ||
| 618 | }; | ||
| 619 | |||
| 620 | static struct tick_device *tick_get_tick_dev(struct device *dev) | ||
| 621 | { | ||
| 622 | return dev == &tick_bc_dev ? tick_get_broadcast_device() : | ||
| 623 | &per_cpu(tick_cpu_device, dev->id); | ||
| 624 | } | ||
| 625 | |||
| 626 | static __init int tick_broadcast_init_sysfs(void) | ||
| 627 | { | ||
| 628 | int err = device_register(&tick_bc_dev); | ||
| 629 | |||
| 630 | if (!err) | ||
| 631 | err = device_create_file(&tick_bc_dev, &dev_attr_current_device); | ||
| 632 | return err; | ||
| 633 | } | ||
| 634 | #else | ||
| 635 | static struct tick_device *tick_get_tick_dev(struct device *dev) | ||
| 636 | { | ||
| 637 | return &per_cpu(tick_cpu_device, dev->id); | ||
| 638 | } | ||
| 639 | static inline int tick_broadcast_init_sysfs(void) { return 0; } | ||
| 465 | #endif | 640 | #endif |
| 641 | |||
| 642 | static int __init tick_init_sysfs(void) | ||
| 643 | { | ||
| 644 | int cpu; | ||
| 645 | |||
| 646 | for_each_possible_cpu(cpu) { | ||
| 647 | struct device *dev = &per_cpu(tick_percpu_dev, cpu); | ||
| 648 | int err; | ||
| 649 | |||
| 650 | dev->id = cpu; | ||
| 651 | dev->bus = &clockevents_subsys; | ||
| 652 | err = device_register(dev); | ||
| 653 | if (!err) | ||
| 654 | err = device_create_file(dev, &dev_attr_current_device); | ||
| 655 | if (!err) | ||
| 656 | err = device_create_file(dev, &dev_attr_unbind_device); | ||
| 657 | if (err) | ||
| 658 | return err; | ||
| 659 | } | ||
| 660 | return tick_broadcast_init_sysfs(); | ||
| 661 | } | ||
| 662 | |||
| 663 | static int __init clockevents_init_sysfs(void) | ||
| 664 | { | ||
| 665 | int err = subsys_system_register(&clockevents_subsys, NULL); | ||
| 666 | |||
| 667 | if (!err) | ||
| 668 | err = tick_init_sysfs(); | ||
| 669 | return err; | ||
| 670 | } | ||
| 671 | device_initcall(clockevents_init_sysfs); | ||
| 672 | #endif /* SYSFS */ | ||
| 673 | |||
| 674 | #endif /* GENERIC_CLOCK_EVENTS */ | ||
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index c9583382141a..50a8736757f3 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
| @@ -31,6 +31,8 @@ | |||
| 31 | #include <linux/tick.h> | 31 | #include <linux/tick.h> |
| 32 | #include <linux/kthread.h> | 32 | #include <linux/kthread.h> |
| 33 | 33 | ||
| 34 | #include "tick-internal.h" | ||
| 35 | |||
| 34 | void timecounter_init(struct timecounter *tc, | 36 | void timecounter_init(struct timecounter *tc, |
| 35 | const struct cyclecounter *cc, | 37 | const struct cyclecounter *cc, |
| 36 | u64 start_tstamp) | 38 | u64 start_tstamp) |
| @@ -174,11 +176,12 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec) | |||
| 174 | static struct clocksource *curr_clocksource; | 176 | static struct clocksource *curr_clocksource; |
| 175 | static LIST_HEAD(clocksource_list); | 177 | static LIST_HEAD(clocksource_list); |
| 176 | static DEFINE_MUTEX(clocksource_mutex); | 178 | static DEFINE_MUTEX(clocksource_mutex); |
| 177 | static char override_name[32]; | 179 | static char override_name[CS_NAME_LEN]; |
| 178 | static int finished_booting; | 180 | static int finished_booting; |
| 179 | 181 | ||
| 180 | #ifdef CONFIG_CLOCKSOURCE_WATCHDOG | 182 | #ifdef CONFIG_CLOCKSOURCE_WATCHDOG |
| 181 | static void clocksource_watchdog_work(struct work_struct *work); | 183 | static void clocksource_watchdog_work(struct work_struct *work); |
| 184 | static void clocksource_select(void); | ||
| 182 | 185 | ||
| 183 | static LIST_HEAD(watchdog_list); | 186 | static LIST_HEAD(watchdog_list); |
| 184 | static struct clocksource *watchdog; | 187 | static struct clocksource *watchdog; |
| @@ -299,13 +302,30 @@ static void clocksource_watchdog(unsigned long data) | |||
| 299 | if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && | 302 | if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && |
| 300 | (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && | 303 | (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && |
| 301 | (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { | 304 | (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { |
| 305 | /* Mark it valid for high-res. */ | ||
| 302 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; | 306 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; |
| 307 | |||
| 308 | /* | ||
| 309 | * clocksource_done_booting() will sort it if | ||
| 310 | * finished_booting is not set yet. | ||
| 311 | */ | ||
| 312 | if (!finished_booting) | ||
| 313 | continue; | ||
| 314 | |||
| 303 | /* | 315 | /* |
| 304 | * We just marked the clocksource as highres-capable, | 316 | * If this is not the current clocksource let |
| 305 | * notify the rest of the system as well so that we | 317 | * the watchdog thread reselect it. Due to the |
| 306 | * transition into high-res mode: | 318 | * change to high res this clocksource might |
| 319 | * be preferred now. If it is the current | ||
| 320 | * clocksource let the tick code know about | ||
| 321 | * that change. | ||
| 307 | */ | 322 | */ |
| 308 | tick_clock_notify(); | 323 | if (cs != curr_clocksource) { |
| 324 | cs->flags |= CLOCK_SOURCE_RESELECT; | ||
| 325 | schedule_work(&watchdog_work); | ||
| 326 | } else { | ||
| 327 | tick_clock_notify(); | ||
| 328 | } | ||
| 309 | } | 329 | } |
| 310 | } | 330 | } |
| 311 | 331 | ||
| @@ -388,44 +408,39 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs) | |||
| 388 | 408 | ||
| 389 | static void clocksource_dequeue_watchdog(struct clocksource *cs) | 409 | static void clocksource_dequeue_watchdog(struct clocksource *cs) |
| 390 | { | 410 | { |
| 391 | struct clocksource *tmp; | ||
| 392 | unsigned long flags; | 411 | unsigned long flags; |
| 393 | 412 | ||
| 394 | spin_lock_irqsave(&watchdog_lock, flags); | 413 | spin_lock_irqsave(&watchdog_lock, flags); |
| 395 | if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { | 414 | if (cs != watchdog) { |
| 396 | /* cs is a watched clocksource. */ | 415 | if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { |
| 397 | list_del_init(&cs->wd_list); | 416 | /* cs is a watched clocksource. */ |
| 398 | } else if (cs == watchdog) { | 417 | list_del_init(&cs->wd_list); |
| 399 | /* Reset watchdog cycles */ | 418 | /* Check if the watchdog timer needs to be stopped. */ |
| 400 | clocksource_reset_watchdog(); | 419 | clocksource_stop_watchdog(); |
| 401 | /* Current watchdog is removed. Find an alternative. */ | ||
| 402 | watchdog = NULL; | ||
| 403 | list_for_each_entry(tmp, &clocksource_list, list) { | ||
| 404 | if (tmp == cs || tmp->flags & CLOCK_SOURCE_MUST_VERIFY) | ||
| 405 | continue; | ||
| 406 | if (!watchdog || tmp->rating > watchdog->rating) | ||
| 407 | watchdog = tmp; | ||
| 408 | } | 420 | } |
| 409 | } | 421 | } |
| 410 | cs->flags &= ~CLOCK_SOURCE_WATCHDOG; | ||
| 411 | /* Check if the watchdog timer needs to be stopped. */ | ||
| 412 | clocksource_stop_watchdog(); | ||
| 413 | spin_unlock_irqrestore(&watchdog_lock, flags); | 422 | spin_unlock_irqrestore(&watchdog_lock, flags); |
| 414 | } | 423 | } |
| 415 | 424 | ||
| 416 | static int clocksource_watchdog_kthread(void *data) | 425 | static int __clocksource_watchdog_kthread(void) |
| 417 | { | 426 | { |
| 418 | struct clocksource *cs, *tmp; | 427 | struct clocksource *cs, *tmp; |
| 419 | unsigned long flags; | 428 | unsigned long flags; |
| 420 | LIST_HEAD(unstable); | 429 | LIST_HEAD(unstable); |
| 430 | int select = 0; | ||
| 421 | 431 | ||
| 422 | mutex_lock(&clocksource_mutex); | ||
| 423 | spin_lock_irqsave(&watchdog_lock, flags); | 432 | spin_lock_irqsave(&watchdog_lock, flags); |
| 424 | list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) | 433 | list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { |
| 425 | if (cs->flags & CLOCK_SOURCE_UNSTABLE) { | 434 | if (cs->flags & CLOCK_SOURCE_UNSTABLE) { |
| 426 | list_del_init(&cs->wd_list); | 435 | list_del_init(&cs->wd_list); |
| 427 | list_add(&cs->wd_list, &unstable); | 436 | list_add(&cs->wd_list, &unstable); |
| 437 | select = 1; | ||
| 428 | } | 438 | } |
| 439 | if (cs->flags & CLOCK_SOURCE_RESELECT) { | ||
| 440 | cs->flags &= ~CLOCK_SOURCE_RESELECT; | ||
| 441 | select = 1; | ||
| 442 | } | ||
| 443 | } | ||
| 429 | /* Check if the watchdog timer needs to be stopped. */ | 444 | /* Check if the watchdog timer needs to be stopped. */ |
| 430 | clocksource_stop_watchdog(); | 445 | clocksource_stop_watchdog(); |
| 431 | spin_unlock_irqrestore(&watchdog_lock, flags); | 446 | spin_unlock_irqrestore(&watchdog_lock, flags); |
| @@ -435,10 +450,23 @@ static int clocksource_watchdog_kthread(void *data) | |||
| 435 | list_del_init(&cs->wd_list); | 450 | list_del_init(&cs->wd_list); |
| 436 | __clocksource_change_rating(cs, 0); | 451 | __clocksource_change_rating(cs, 0); |
| 437 | } | 452 | } |
| 453 | return select; | ||
| 454 | } | ||
| 455 | |||
| 456 | static int clocksource_watchdog_kthread(void *data) | ||
| 457 | { | ||
| 458 | mutex_lock(&clocksource_mutex); | ||
| 459 | if (__clocksource_watchdog_kthread()) | ||
| 460 | clocksource_select(); | ||
| 438 | mutex_unlock(&clocksource_mutex); | 461 | mutex_unlock(&clocksource_mutex); |
| 439 | return 0; | 462 | return 0; |
| 440 | } | 463 | } |
| 441 | 464 | ||
| 465 | static bool clocksource_is_watchdog(struct clocksource *cs) | ||
| 466 | { | ||
| 467 | return cs == watchdog; | ||
| 468 | } | ||
| 469 | |||
| 442 | #else /* CONFIG_CLOCKSOURCE_WATCHDOG */ | 470 | #else /* CONFIG_CLOCKSOURCE_WATCHDOG */ |
| 443 | 471 | ||
| 444 | static void clocksource_enqueue_watchdog(struct clocksource *cs) | 472 | static void clocksource_enqueue_watchdog(struct clocksource *cs) |
| @@ -449,7 +477,8 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs) | |||
| 449 | 477 | ||
| 450 | static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { } | 478 | static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { } |
| 451 | static inline void clocksource_resume_watchdog(void) { } | 479 | static inline void clocksource_resume_watchdog(void) { } |
| 452 | static inline int clocksource_watchdog_kthread(void *data) { return 0; } | 480 | static inline int __clocksource_watchdog_kthread(void) { return 0; } |
| 481 | static bool clocksource_is_watchdog(struct clocksource *cs) { return false; } | ||
| 453 | 482 | ||
| 454 | #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */ | 483 | #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */ |
| 455 | 484 | ||
| @@ -553,24 +582,42 @@ static u64 clocksource_max_deferment(struct clocksource *cs) | |||
| 553 | 582 | ||
| 554 | #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET | 583 | #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET |
| 555 | 584 | ||
| 556 | /** | 585 | static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur) |
| 557 | * clocksource_select - Select the best clocksource available | ||
| 558 | * | ||
| 559 | * Private function. Must hold clocksource_mutex when called. | ||
| 560 | * | ||
| 561 | * Select the clocksource with the best rating, or the clocksource, | ||
| 562 | * which is selected by userspace override. | ||
| 563 | */ | ||
| 564 | static void clocksource_select(void) | ||
| 565 | { | 586 | { |
| 566 | struct clocksource *best, *cs; | 587 | struct clocksource *cs; |
| 567 | 588 | ||
| 568 | if (!finished_booting || list_empty(&clocksource_list)) | 589 | if (!finished_booting || list_empty(&clocksource_list)) |
| 590 | return NULL; | ||
| 591 | |||
| 592 | /* | ||
| 593 | * We pick the clocksource with the highest rating. If oneshot | ||
| 594 | * mode is active, we pick the highres valid clocksource with | ||
| 595 | * the best rating. | ||
| 596 | */ | ||
| 597 | list_for_each_entry(cs, &clocksource_list, list) { | ||
| 598 | if (skipcur && cs == curr_clocksource) | ||
| 599 | continue; | ||
| 600 | if (oneshot && !(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES)) | ||
| 601 | continue; | ||
| 602 | return cs; | ||
| 603 | } | ||
| 604 | return NULL; | ||
| 605 | } | ||
| 606 | |||
| 607 | static void __clocksource_select(bool skipcur) | ||
| 608 | { | ||
| 609 | bool oneshot = tick_oneshot_mode_active(); | ||
| 610 | struct clocksource *best, *cs; | ||
| 611 | |||
| 612 | /* Find the best suitable clocksource */ | ||
| 613 | best = clocksource_find_best(oneshot, skipcur); | ||
| 614 | if (!best) | ||
| 569 | return; | 615 | return; |
| 570 | /* First clocksource on the list has the best rating. */ | 616 | |
| 571 | best = list_first_entry(&clocksource_list, struct clocksource, list); | ||
| 572 | /* Check for the override clocksource. */ | 617 | /* Check for the override clocksource. */ |
| 573 | list_for_each_entry(cs, &clocksource_list, list) { | 618 | list_for_each_entry(cs, &clocksource_list, list) { |
| 619 | if (skipcur && cs == curr_clocksource) | ||
| 620 | continue; | ||
| 574 | if (strcmp(cs->name, override_name) != 0) | 621 | if (strcmp(cs->name, override_name) != 0) |
| 575 | continue; | 622 | continue; |
| 576 | /* | 623 | /* |
| @@ -578,8 +625,7 @@ static void clocksource_select(void) | |||
| 578 | * capable clocksource if the tick code is in oneshot | 625 | * capable clocksource if the tick code is in oneshot |
| 579 | * mode (highres or nohz) | 626 | * mode (highres or nohz) |
| 580 | */ | 627 | */ |
| 581 | if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && | 628 | if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) { |
| 582 | tick_oneshot_mode_active()) { | ||
| 583 | /* Override clocksource cannot be used. */ | 629 | /* Override clocksource cannot be used. */ |
| 584 | printk(KERN_WARNING "Override clocksource %s is not " | 630 | printk(KERN_WARNING "Override clocksource %s is not " |
| 585 | "HRT compatible. Cannot switch while in " | 631 | "HRT compatible. Cannot switch while in " |
| @@ -590,16 +636,35 @@ static void clocksource_select(void) | |||
| 590 | best = cs; | 636 | best = cs; |
| 591 | break; | 637 | break; |
| 592 | } | 638 | } |
| 593 | if (curr_clocksource != best) { | 639 | |
| 594 | printk(KERN_INFO "Switching to clocksource %s\n", best->name); | 640 | if (curr_clocksource != best && !timekeeping_notify(best)) { |
| 641 | pr_info("Switched to clocksource %s\n", best->name); | ||
| 595 | curr_clocksource = best; | 642 | curr_clocksource = best; |
| 596 | timekeeping_notify(curr_clocksource); | ||
| 597 | } | 643 | } |
| 598 | } | 644 | } |
| 599 | 645 | ||
| 646 | /** | ||
| 647 | * clocksource_select - Select the best clocksource available | ||
| 648 | * | ||
| 649 | * Private function. Must hold clocksource_mutex when called. | ||
| 650 | * | ||
| 651 | * Select the clocksource with the best rating, or the clocksource, | ||
| 652 | * which is selected by userspace override. | ||
| 653 | */ | ||
| 654 | static void clocksource_select(void) | ||
| 655 | { | ||
| 656 | return __clocksource_select(false); | ||
| 657 | } | ||
| 658 | |||
| 659 | static void clocksource_select_fallback(void) | ||
| 660 | { | ||
| 661 | return __clocksource_select(true); | ||
| 662 | } | ||
| 663 | |||
| 600 | #else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */ | 664 | #else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */ |
| 601 | 665 | ||
| 602 | static inline void clocksource_select(void) { } | 666 | static inline void clocksource_select(void) { } |
| 667 | static inline void clocksource_select_fallback(void) { } | ||
| 603 | 668 | ||
| 604 | #endif | 669 | #endif |
| 605 | 670 | ||
| @@ -614,16 +679,11 @@ static int __init clocksource_done_booting(void) | |||
| 614 | { | 679 | { |
| 615 | mutex_lock(&clocksource_mutex); | 680 | mutex_lock(&clocksource_mutex); |
| 616 | curr_clocksource = clocksource_default_clock(); | 681 | curr_clocksource = clocksource_default_clock(); |
| 617 | mutex_unlock(&clocksource_mutex); | ||
| 618 | |||
| 619 | finished_booting = 1; | 682 | finished_booting = 1; |
| 620 | |||
| 621 | /* | 683 | /* |
| 622 | * Run the watchdog first to eliminate unstable clock sources | 684 | * Run the watchdog first to eliminate unstable clock sources |
| 623 | */ | 685 | */ |
| 624 | clocksource_watchdog_kthread(NULL); | 686 | __clocksource_watchdog_kthread(); |
| 625 | |||
| 626 | mutex_lock(&clocksource_mutex); | ||
| 627 | clocksource_select(); | 687 | clocksource_select(); |
| 628 | mutex_unlock(&clocksource_mutex); | 688 | mutex_unlock(&clocksource_mutex); |
| 629 | return 0; | 689 | return 0; |
| @@ -756,7 +816,6 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating) | |||
| 756 | list_del(&cs->list); | 816 | list_del(&cs->list); |
| 757 | cs->rating = rating; | 817 | cs->rating = rating; |
| 758 | clocksource_enqueue(cs); | 818 | clocksource_enqueue(cs); |
| 759 | clocksource_select(); | ||
| 760 | } | 819 | } |
| 761 | 820 | ||
| 762 | /** | 821 | /** |
| @@ -768,21 +827,47 @@ void clocksource_change_rating(struct clocksource *cs, int rating) | |||
| 768 | { | 827 | { |
| 769 | mutex_lock(&clocksource_mutex); | 828 | mutex_lock(&clocksource_mutex); |
| 770 | __clocksource_change_rating(cs, rating); | 829 | __clocksource_change_rating(cs, rating); |
| 830 | clocksource_select(); | ||
| 771 | mutex_unlock(&clocksource_mutex); | 831 | mutex_unlock(&clocksource_mutex); |
| 772 | } | 832 | } |
| 773 | EXPORT_SYMBOL(clocksource_change_rating); | 833 | EXPORT_SYMBOL(clocksource_change_rating); |
| 774 | 834 | ||
| 835 | /* | ||
| 836 | * Unbind clocksource @cs. Called with clocksource_mutex held | ||
| 837 | */ | ||
| 838 | static int clocksource_unbind(struct clocksource *cs) | ||
| 839 | { | ||
| 840 | /* | ||
| 841 | * I really can't convince myself to support this on hardware | ||
| 842 | * designed by lobotomized monkeys. | ||
| 843 | */ | ||
| 844 | if (clocksource_is_watchdog(cs)) | ||
| 845 | return -EBUSY; | ||
| 846 | |||
| 847 | if (cs == curr_clocksource) { | ||
| 848 | /* Select and try to install a replacement clock source */ | ||
| 849 | clocksource_select_fallback(); | ||
| 850 | if (curr_clocksource == cs) | ||
| 851 | return -EBUSY; | ||
| 852 | } | ||
| 853 | clocksource_dequeue_watchdog(cs); | ||
| 854 | list_del_init(&cs->list); | ||
| 855 | return 0; | ||
| 856 | } | ||
| 857 | |||
| 775 | /** | 858 | /** |
| 776 | * clocksource_unregister - remove a registered clocksource | 859 | * clocksource_unregister - remove a registered clocksource |
| 777 | * @cs: clocksource to be unregistered | 860 | * @cs: clocksource to be unregistered |
| 778 | */ | 861 | */ |
| 779 | void clocksource_unregister(struct clocksource *cs) | 862 | int clocksource_unregister(struct clocksource *cs) |
| 780 | { | 863 | { |
| 864 | int ret = 0; | ||
| 865 | |||
| 781 | mutex_lock(&clocksource_mutex); | 866 | mutex_lock(&clocksource_mutex); |
| 782 | clocksource_dequeue_watchdog(cs); | 867 | if (!list_empty(&cs->list)) |
| 783 | list_del(&cs->list); | 868 | ret = clocksource_unbind(cs); |
| 784 | clocksource_select(); | ||
| 785 | mutex_unlock(&clocksource_mutex); | 869 | mutex_unlock(&clocksource_mutex); |
| 870 | return ret; | ||
| 786 | } | 871 | } |
| 787 | EXPORT_SYMBOL(clocksource_unregister); | 872 | EXPORT_SYMBOL(clocksource_unregister); |
| 788 | 873 | ||
| @@ -808,6 +893,23 @@ sysfs_show_current_clocksources(struct device *dev, | |||
| 808 | return count; | 893 | return count; |
| 809 | } | 894 | } |
| 810 | 895 | ||
| 896 | size_t sysfs_get_uname(const char *buf, char *dst, size_t cnt) | ||
| 897 | { | ||
| 898 | size_t ret = cnt; | ||
| 899 | |||
| 900 | /* strings from sysfs write are not 0 terminated! */ | ||
| 901 | if (!cnt || cnt >= CS_NAME_LEN) | ||
| 902 | return -EINVAL; | ||
| 903 | |||
| 904 | /* strip of \n: */ | ||
| 905 | if (buf[cnt-1] == '\n') | ||
| 906 | cnt--; | ||
| 907 | if (cnt > 0) | ||
| 908 | memcpy(dst, buf, cnt); | ||
| 909 | dst[cnt] = 0; | ||
| 910 | return ret; | ||
| 911 | } | ||
| 912 | |||
| 811 | /** | 913 | /** |
| 812 | * sysfs_override_clocksource - interface for manually overriding clocksource | 914 | * sysfs_override_clocksource - interface for manually overriding clocksource |
| 813 | * @dev: unused | 915 | * @dev: unused |
| @@ -822,22 +924,13 @@ static ssize_t sysfs_override_clocksource(struct device *dev, | |||
| 822 | struct device_attribute *attr, | 924 | struct device_attribute *attr, |
| 823 | const char *buf, size_t count) | 925 | const char *buf, size_t count) |
| 824 | { | 926 | { |
| 825 | size_t ret = count; | 927 | size_t ret; |
| 826 | |||
| 827 | /* strings from sysfs write are not 0 terminated! */ | ||
| 828 | if (count >= sizeof(override_name)) | ||
| 829 | return -EINVAL; | ||
| 830 | |||
| 831 | /* strip of \n: */ | ||
| 832 | if (buf[count-1] == '\n') | ||
| 833 | count--; | ||
| 834 | 928 | ||
| 835 | mutex_lock(&clocksource_mutex); | 929 | mutex_lock(&clocksource_mutex); |
| 836 | 930 | ||
| 837 | if (count > 0) | 931 | ret = sysfs_get_uname(buf, override_name, count); |
| 838 | memcpy(override_name, buf, count); | 932 | if (ret >= 0) |
| 839 | override_name[count] = 0; | 933 | clocksource_select(); |
| 840 | clocksource_select(); | ||
| 841 | 934 | ||
| 842 | mutex_unlock(&clocksource_mutex); | 935 | mutex_unlock(&clocksource_mutex); |
| 843 | 936 | ||
| @@ -845,6 +938,40 @@ static ssize_t sysfs_override_clocksource(struct device *dev, | |||
| 845 | } | 938 | } |
| 846 | 939 | ||
| 847 | /** | 940 | /** |
| 941 | * sysfs_unbind_current_clocksource - interface for manually unbinding clocksource | ||
| 942 | * @dev: unused | ||
| 943 | * @attr: unused | ||
| 944 | * @buf: unused | ||
| 945 | * @count: length of buffer | ||
| 946 | * | ||
| 947 | * Takes input from sysfs interface for manually unbinding a clocksource. | ||
| 948 | */ | ||
| 949 | static ssize_t sysfs_unbind_clocksource(struct device *dev, | ||
| 950 | struct device_attribute *attr, | ||
| 951 | const char *buf, size_t count) | ||
| 952 | { | ||
| 953 | struct clocksource *cs; | ||
| 954 | char name[CS_NAME_LEN]; | ||
| 955 | size_t ret; | ||
| 956 | |||
| 957 | ret = sysfs_get_uname(buf, name, count); | ||
| 958 | if (ret < 0) | ||
| 959 | return ret; | ||
| 960 | |||
| 961 | ret = -ENODEV; | ||
| 962 | mutex_lock(&clocksource_mutex); | ||
| 963 | list_for_each_entry(cs, &clocksource_list, list) { | ||
| 964 | if (strcmp(cs->name, name)) | ||
| 965 | continue; | ||
| 966 | ret = clocksource_unbind(cs); | ||
| 967 | break; | ||
| 968 | } | ||
| 969 | mutex_unlock(&clocksource_mutex); | ||
| 970 | |||
| 971 | return ret ? ret : count; | ||
| 972 | } | ||
| 973 | |||
| 974 | /** | ||
| 848 | * sysfs_show_available_clocksources - sysfs interface for listing clocksource | 975 | * sysfs_show_available_clocksources - sysfs interface for listing clocksource |
| 849 | * @dev: unused | 976 | * @dev: unused |
| 850 | * @attr: unused | 977 | * @attr: unused |
| @@ -886,6 +1013,8 @@ sysfs_show_available_clocksources(struct device *dev, | |||
| 886 | static DEVICE_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources, | 1013 | static DEVICE_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources, |
| 887 | sysfs_override_clocksource); | 1014 | sysfs_override_clocksource); |
| 888 | 1015 | ||
| 1016 | static DEVICE_ATTR(unbind_clocksource, 0200, NULL, sysfs_unbind_clocksource); | ||
| 1017 | |||
| 889 | static DEVICE_ATTR(available_clocksource, 0444, | 1018 | static DEVICE_ATTR(available_clocksource, 0444, |
| 890 | sysfs_show_available_clocksources, NULL); | 1019 | sysfs_show_available_clocksources, NULL); |
| 891 | 1020 | ||
| @@ -910,6 +1039,9 @@ static int __init init_clocksource_sysfs(void) | |||
| 910 | &device_clocksource, | 1039 | &device_clocksource, |
| 911 | &dev_attr_current_clocksource); | 1040 | &dev_attr_current_clocksource); |
| 912 | if (!error) | 1041 | if (!error) |
| 1042 | error = device_create_file(&device_clocksource, | ||
| 1043 | &dev_attr_unbind_clocksource); | ||
| 1044 | if (!error) | ||
| 913 | error = device_create_file( | 1045 | error = device_create_file( |
| 914 | &device_clocksource, | 1046 | &device_clocksource, |
| 915 | &dev_attr_available_clocksource); | 1047 | &dev_attr_available_clocksource); |
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c new file mode 100644 index 000000000000..a326f27d7f09 --- /dev/null +++ b/kernel/time/sched_clock.c | |||
| @@ -0,0 +1,212 @@ | |||
| 1 | /* | ||
| 2 | * sched_clock.c: support for extending counters to full 64-bit ns counter | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License version 2 as | ||
| 6 | * published by the Free Software Foundation. | ||
| 7 | */ | ||
| 8 | #include <linux/clocksource.h> | ||
| 9 | #include <linux/init.h> | ||
| 10 | #include <linux/jiffies.h> | ||
| 11 | #include <linux/kernel.h> | ||
| 12 | #include <linux/moduleparam.h> | ||
| 13 | #include <linux/sched.h> | ||
| 14 | #include <linux/syscore_ops.h> | ||
| 15 | #include <linux/timer.h> | ||
| 16 | #include <linux/sched_clock.h> | ||
| 17 | |||
| 18 | struct clock_data { | ||
| 19 | u64 epoch_ns; | ||
| 20 | u32 epoch_cyc; | ||
| 21 | u32 epoch_cyc_copy; | ||
| 22 | unsigned long rate; | ||
| 23 | u32 mult; | ||
| 24 | u32 shift; | ||
| 25 | bool suspended; | ||
| 26 | }; | ||
| 27 | |||
| 28 | static void sched_clock_poll(unsigned long wrap_ticks); | ||
| 29 | static DEFINE_TIMER(sched_clock_timer, sched_clock_poll, 0, 0); | ||
| 30 | static int irqtime = -1; | ||
| 31 | |||
| 32 | core_param(irqtime, irqtime, int, 0400); | ||
| 33 | |||
| 34 | static struct clock_data cd = { | ||
| 35 | .mult = NSEC_PER_SEC / HZ, | ||
| 36 | }; | ||
| 37 | |||
| 38 | static u32 __read_mostly sched_clock_mask = 0xffffffff; | ||
| 39 | |||
| 40 | static u32 notrace jiffy_sched_clock_read(void) | ||
| 41 | { | ||
| 42 | return (u32)(jiffies - INITIAL_JIFFIES); | ||
| 43 | } | ||
| 44 | |||
| 45 | static u32 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read; | ||
| 46 | |||
| 47 | static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) | ||
| 48 | { | ||
| 49 | return (cyc * mult) >> shift; | ||
| 50 | } | ||
| 51 | |||
| 52 | static unsigned long long notrace sched_clock_32(void) | ||
| 53 | { | ||
| 54 | u64 epoch_ns; | ||
| 55 | u32 epoch_cyc; | ||
| 56 | u32 cyc; | ||
| 57 | |||
| 58 | if (cd.suspended) | ||
| 59 | return cd.epoch_ns; | ||
| 60 | |||
| 61 | /* | ||
| 62 | * Load the epoch_cyc and epoch_ns atomically. We do this by | ||
| 63 | * ensuring that we always write epoch_cyc, epoch_ns and | ||
| 64 | * epoch_cyc_copy in strict order, and read them in strict order. | ||
| 65 | * If epoch_cyc and epoch_cyc_copy are not equal, then we're in | ||
| 66 | * the middle of an update, and we should repeat the load. | ||
| 67 | */ | ||
| 68 | do { | ||
| 69 | epoch_cyc = cd.epoch_cyc; | ||
| 70 | smp_rmb(); | ||
| 71 | epoch_ns = cd.epoch_ns; | ||
| 72 | smp_rmb(); | ||
| 73 | } while (epoch_cyc != cd.epoch_cyc_copy); | ||
| 74 | |||
| 75 | cyc = read_sched_clock(); | ||
| 76 | cyc = (cyc - epoch_cyc) & sched_clock_mask; | ||
| 77 | return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift); | ||
| 78 | } | ||
| 79 | |||
| 80 | /* | ||
| 81 | * Atomically update the sched_clock epoch. | ||
| 82 | */ | ||
| 83 | static void notrace update_sched_clock(void) | ||
| 84 | { | ||
| 85 | unsigned long flags; | ||
| 86 | u32 cyc; | ||
| 87 | u64 ns; | ||
| 88 | |||
| 89 | cyc = read_sched_clock(); | ||
| 90 | ns = cd.epoch_ns + | ||
| 91 | cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, | ||
| 92 | cd.mult, cd.shift); | ||
| 93 | /* | ||
| 94 | * Write epoch_cyc and epoch_ns in a way that the update is | ||
| 95 | * detectable in cyc_to_fixed_sched_clock(). | ||
| 96 | */ | ||
| 97 | raw_local_irq_save(flags); | ||
| 98 | cd.epoch_cyc_copy = cyc; | ||
| 99 | smp_wmb(); | ||
| 100 | cd.epoch_ns = ns; | ||
| 101 | smp_wmb(); | ||
| 102 | cd.epoch_cyc = cyc; | ||
| 103 | raw_local_irq_restore(flags); | ||
| 104 | } | ||
| 105 | |||
| 106 | static void sched_clock_poll(unsigned long wrap_ticks) | ||
| 107 | { | ||
| 108 | mod_timer(&sched_clock_timer, round_jiffies(jiffies + wrap_ticks)); | ||
| 109 | update_sched_clock(); | ||
| 110 | } | ||
| 111 | |||
| 112 | void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate) | ||
| 113 | { | ||
| 114 | unsigned long r, w; | ||
| 115 | u64 res, wrap; | ||
| 116 | char r_unit; | ||
| 117 | |||
| 118 | if (cd.rate > rate) | ||
| 119 | return; | ||
| 120 | |||
| 121 | BUG_ON(bits > 32); | ||
| 122 | WARN_ON(!irqs_disabled()); | ||
| 123 | read_sched_clock = read; | ||
| 124 | sched_clock_mask = (1 << bits) - 1; | ||
| 125 | cd.rate = rate; | ||
| 126 | |||
| 127 | /* calculate the mult/shift to convert counter ticks to ns. */ | ||
| 128 | clocks_calc_mult_shift(&cd.mult, &cd.shift, rate, NSEC_PER_SEC, 0); | ||
| 129 | |||
| 130 | r = rate; | ||
| 131 | if (r >= 4000000) { | ||
| 132 | r /= 1000000; | ||
| 133 | r_unit = 'M'; | ||
| 134 | } else if (r >= 1000) { | ||
| 135 | r /= 1000; | ||
| 136 | r_unit = 'k'; | ||
| 137 | } else | ||
| 138 | r_unit = ' '; | ||
| 139 | |||
| 140 | /* calculate how many ns until we wrap */ | ||
| 141 | wrap = cyc_to_ns((1ULL << bits) - 1, cd.mult, cd.shift); | ||
| 142 | do_div(wrap, NSEC_PER_MSEC); | ||
| 143 | w = wrap; | ||
| 144 | |||
| 145 | /* calculate the ns resolution of this counter */ | ||
| 146 | res = cyc_to_ns(1ULL, cd.mult, cd.shift); | ||
| 147 | pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lums\n", | ||
| 148 | bits, r, r_unit, res, w); | ||
| 149 | |||
| 150 | /* | ||
| 151 | * Start the timer to keep sched_clock() properly updated and | ||
| 152 | * sets the initial epoch. | ||
| 153 | */ | ||
| 154 | sched_clock_timer.data = msecs_to_jiffies(w - (w / 10)); | ||
| 155 | update_sched_clock(); | ||
| 156 | |||
| 157 | /* | ||
| 158 | * Ensure that sched_clock() starts off at 0ns | ||
| 159 | */ | ||
| 160 | cd.epoch_ns = 0; | ||
| 161 | |||
| 162 | /* Enable IRQ time accounting if we have a fast enough sched_clock */ | ||
| 163 | if (irqtime > 0 || (irqtime == -1 && rate >= 1000000)) | ||
| 164 | enable_sched_clock_irqtime(); | ||
| 165 | |||
| 166 | pr_debug("Registered %pF as sched_clock source\n", read); | ||
| 167 | } | ||
| 168 | |||
| 169 | unsigned long long __read_mostly (*sched_clock_func)(void) = sched_clock_32; | ||
| 170 | |||
| 171 | unsigned long long notrace sched_clock(void) | ||
| 172 | { | ||
| 173 | return sched_clock_func(); | ||
| 174 | } | ||
| 175 | |||
| 176 | void __init sched_clock_postinit(void) | ||
| 177 | { | ||
| 178 | /* | ||
| 179 | * If no sched_clock function has been provided at that point, | ||
| 180 | * make it the final one one. | ||
| 181 | */ | ||
| 182 | if (read_sched_clock == jiffy_sched_clock_read) | ||
| 183 | setup_sched_clock(jiffy_sched_clock_read, 32, HZ); | ||
| 184 | |||
| 185 | sched_clock_poll(sched_clock_timer.data); | ||
| 186 | } | ||
| 187 | |||
| 188 | static int sched_clock_suspend(void) | ||
| 189 | { | ||
| 190 | sched_clock_poll(sched_clock_timer.data); | ||
| 191 | cd.suspended = true; | ||
| 192 | return 0; | ||
| 193 | } | ||
| 194 | |||
| 195 | static void sched_clock_resume(void) | ||
| 196 | { | ||
| 197 | cd.epoch_cyc = read_sched_clock(); | ||
| 198 | cd.epoch_cyc_copy = cd.epoch_cyc; | ||
| 199 | cd.suspended = false; | ||
| 200 | } | ||
| 201 | |||
| 202 | static struct syscore_ops sched_clock_ops = { | ||
| 203 | .suspend = sched_clock_suspend, | ||
| 204 | .resume = sched_clock_resume, | ||
| 205 | }; | ||
| 206 | |||
| 207 | static int __init sched_clock_syscore_init(void) | ||
| 208 | { | ||
| 209 | register_syscore_ops(&sched_clock_ops); | ||
| 210 | return 0; | ||
| 211 | } | ||
| 212 | device_initcall(sched_clock_syscore_init); | ||
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 20d6fba70652..218bcb565fed 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include <linux/profile.h> | 19 | #include <linux/profile.h> |
| 20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
| 21 | #include <linux/smp.h> | 21 | #include <linux/smp.h> |
| 22 | #include <linux/module.h> | ||
| 22 | 23 | ||
| 23 | #include "tick-internal.h" | 24 | #include "tick-internal.h" |
| 24 | 25 | ||
| @@ -29,6 +30,7 @@ | |||
| 29 | 30 | ||
| 30 | static struct tick_device tick_broadcast_device; | 31 | static struct tick_device tick_broadcast_device; |
| 31 | static cpumask_var_t tick_broadcast_mask; | 32 | static cpumask_var_t tick_broadcast_mask; |
| 33 | static cpumask_var_t tick_broadcast_on; | ||
| 32 | static cpumask_var_t tmpmask; | 34 | static cpumask_var_t tmpmask; |
| 33 | static DEFINE_RAW_SPINLOCK(tick_broadcast_lock); | 35 | static DEFINE_RAW_SPINLOCK(tick_broadcast_lock); |
| 34 | static int tick_broadcast_force; | 36 | static int tick_broadcast_force; |
| @@ -64,17 +66,34 @@ static void tick_broadcast_start_periodic(struct clock_event_device *bc) | |||
| 64 | /* | 66 | /* |
| 65 | * Check, if the device can be utilized as broadcast device: | 67 | * Check, if the device can be utilized as broadcast device: |
| 66 | */ | 68 | */ |
| 67 | int tick_check_broadcast_device(struct clock_event_device *dev) | 69 | static bool tick_check_broadcast_device(struct clock_event_device *curdev, |
| 70 | struct clock_event_device *newdev) | ||
| 71 | { | ||
| 72 | if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) || | ||
| 73 | (newdev->features & CLOCK_EVT_FEAT_C3STOP)) | ||
| 74 | return false; | ||
| 75 | |||
| 76 | if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT && | ||
| 77 | !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) | ||
| 78 | return false; | ||
| 79 | |||
| 80 | return !curdev || newdev->rating > curdev->rating; | ||
| 81 | } | ||
| 82 | |||
| 83 | /* | ||
| 84 | * Conditionally install/replace broadcast device | ||
| 85 | */ | ||
| 86 | void tick_install_broadcast_device(struct clock_event_device *dev) | ||
| 68 | { | 87 | { |
| 69 | struct clock_event_device *cur = tick_broadcast_device.evtdev; | 88 | struct clock_event_device *cur = tick_broadcast_device.evtdev; |
| 70 | 89 | ||
| 71 | if ((dev->features & CLOCK_EVT_FEAT_DUMMY) || | 90 | if (!tick_check_broadcast_device(cur, dev)) |
| 72 | (tick_broadcast_device.evtdev && | 91 | return; |
| 73 | tick_broadcast_device.evtdev->rating >= dev->rating) || | 92 | |
| 74 | (dev->features & CLOCK_EVT_FEAT_C3STOP)) | 93 | if (!try_module_get(dev->owner)) |
| 75 | return 0; | 94 | return; |
| 76 | 95 | ||
| 77 | clockevents_exchange_device(tick_broadcast_device.evtdev, dev); | 96 | clockevents_exchange_device(cur, dev); |
| 78 | if (cur) | 97 | if (cur) |
| 79 | cur->event_handler = clockevents_handle_noop; | 98 | cur->event_handler = clockevents_handle_noop; |
| 80 | tick_broadcast_device.evtdev = dev; | 99 | tick_broadcast_device.evtdev = dev; |
| @@ -90,7 +109,6 @@ int tick_check_broadcast_device(struct clock_event_device *dev) | |||
| 90 | */ | 109 | */ |
| 91 | if (dev->features & CLOCK_EVT_FEAT_ONESHOT) | 110 | if (dev->features & CLOCK_EVT_FEAT_ONESHOT) |
| 92 | tick_clock_notify(); | 111 | tick_clock_notify(); |
| 93 | return 1; | ||
| 94 | } | 112 | } |
| 95 | 113 | ||
| 96 | /* | 114 | /* |
| @@ -123,8 +141,9 @@ static void tick_device_setup_broadcast_func(struct clock_event_device *dev) | |||
| 123 | */ | 141 | */ |
| 124 | int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) | 142 | int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) |
| 125 | { | 143 | { |
| 144 | struct clock_event_device *bc = tick_broadcast_device.evtdev; | ||
| 126 | unsigned long flags; | 145 | unsigned long flags; |
| 127 | int ret = 0; | 146 | int ret; |
| 128 | 147 | ||
| 129 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); | 148 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); |
| 130 | 149 | ||
| @@ -138,20 +157,62 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) | |||
| 138 | dev->event_handler = tick_handle_periodic; | 157 | dev->event_handler = tick_handle_periodic; |
| 139 | tick_device_setup_broadcast_func(dev); | 158 | tick_device_setup_broadcast_func(dev); |
| 140 | cpumask_set_cpu(cpu, tick_broadcast_mask); | 159 | cpumask_set_cpu(cpu, tick_broadcast_mask); |
| 141 | tick_broadcast_start_periodic(tick_broadcast_device.evtdev); | 160 | if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) |
| 161 | tick_broadcast_start_periodic(bc); | ||
| 162 | else | ||
| 163 | tick_broadcast_setup_oneshot(bc); | ||
| 142 | ret = 1; | 164 | ret = 1; |
| 143 | } else { | 165 | } else { |
| 144 | /* | 166 | /* |
| 145 | * When the new device is not affected by the stop | 167 | * Clear the broadcast bit for this cpu if the |
| 146 | * feature and the cpu is marked in the broadcast mask | 168 | * device is not power state affected. |
| 147 | * then clear the broadcast bit. | ||
| 148 | */ | 169 | */ |
| 149 | if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) { | 170 | if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) |
| 150 | int cpu = smp_processor_id(); | ||
| 151 | cpumask_clear_cpu(cpu, tick_broadcast_mask); | 171 | cpumask_clear_cpu(cpu, tick_broadcast_mask); |
| 152 | tick_broadcast_clear_oneshot(cpu); | 172 | else |
| 153 | } else { | ||
| 154 | tick_device_setup_broadcast_func(dev); | 173 | tick_device_setup_broadcast_func(dev); |
| 174 | |||
| 175 | /* | ||
| 176 | * Clear the broadcast bit if the CPU is not in | ||
| 177 | * periodic broadcast on state. | ||
| 178 | */ | ||
| 179 | if (!cpumask_test_cpu(cpu, tick_broadcast_on)) | ||
| 180 | cpumask_clear_cpu(cpu, tick_broadcast_mask); | ||
| 181 | |||
| 182 | switch (tick_broadcast_device.mode) { | ||
| 183 | case TICKDEV_MODE_ONESHOT: | ||
| 184 | /* | ||
| 185 | * If the system is in oneshot mode we can | ||
| 186 | * unconditionally clear the oneshot mask bit, | ||
| 187 | * because the CPU is running and therefore | ||
| 188 | * not in an idle state which causes the power | ||
| 189 | * state affected device to stop. Let the | ||
| 190 | * caller initialize the device. | ||
| 191 | */ | ||
| 192 | tick_broadcast_clear_oneshot(cpu); | ||
| 193 | ret = 0; | ||
| 194 | break; | ||
| 195 | |||
| 196 | case TICKDEV_MODE_PERIODIC: | ||
| 197 | /* | ||
| 198 | * If the system is in periodic mode, check | ||
| 199 | * whether the broadcast device can be | ||
| 200 | * switched off now. | ||
| 201 | */ | ||
| 202 | if (cpumask_empty(tick_broadcast_mask) && bc) | ||
| 203 | clockevents_shutdown(bc); | ||
| 204 | /* | ||
| 205 | * If we kept the cpu in the broadcast mask, | ||
| 206 | * tell the caller to leave the per cpu device | ||
| 207 | * in shutdown state. The periodic interrupt | ||
| 208 | * is delivered by the broadcast device. | ||
| 209 | */ | ||
| 210 | ret = cpumask_test_cpu(cpu, tick_broadcast_mask); | ||
| 211 | break; | ||
| 212 | default: | ||
| 213 | /* Nothing to do */ | ||
| 214 | ret = 0; | ||
| 215 | break; | ||
| 155 | } | 216 | } |
| 156 | } | 217 | } |
| 157 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); | 218 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); |
| @@ -281,6 +342,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason) | |||
| 281 | switch (*reason) { | 342 | switch (*reason) { |
| 282 | case CLOCK_EVT_NOTIFY_BROADCAST_ON: | 343 | case CLOCK_EVT_NOTIFY_BROADCAST_ON: |
| 283 | case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: | 344 | case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: |
| 345 | cpumask_set_cpu(cpu, tick_broadcast_on); | ||
| 284 | if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { | 346 | if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { |
| 285 | if (tick_broadcast_device.mode == | 347 | if (tick_broadcast_device.mode == |
| 286 | TICKDEV_MODE_PERIODIC) | 348 | TICKDEV_MODE_PERIODIC) |
| @@ -290,8 +352,12 @@ static void tick_do_broadcast_on_off(unsigned long *reason) | |||
| 290 | tick_broadcast_force = 1; | 352 | tick_broadcast_force = 1; |
| 291 | break; | 353 | break; |
| 292 | case CLOCK_EVT_NOTIFY_BROADCAST_OFF: | 354 | case CLOCK_EVT_NOTIFY_BROADCAST_OFF: |
| 293 | if (!tick_broadcast_force && | 355 | if (tick_broadcast_force) |
| 294 | cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) { | 356 | break; |
| 357 | cpumask_clear_cpu(cpu, tick_broadcast_on); | ||
| 358 | if (!tick_device_is_functional(dev)) | ||
| 359 | break; | ||
| 360 | if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) { | ||
| 295 | if (tick_broadcast_device.mode == | 361 | if (tick_broadcast_device.mode == |
| 296 | TICKDEV_MODE_PERIODIC) | 362 | TICKDEV_MODE_PERIODIC) |
| 297 | tick_setup_periodic(dev, 0); | 363 | tick_setup_periodic(dev, 0); |
| @@ -349,6 +415,7 @@ void tick_shutdown_broadcast(unsigned int *cpup) | |||
| 349 | 415 | ||
| 350 | bc = tick_broadcast_device.evtdev; | 416 | bc = tick_broadcast_device.evtdev; |
| 351 | cpumask_clear_cpu(cpu, tick_broadcast_mask); | 417 | cpumask_clear_cpu(cpu, tick_broadcast_mask); |
| 418 | cpumask_clear_cpu(cpu, tick_broadcast_on); | ||
| 352 | 419 | ||
| 353 | if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { | 420 | if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { |
| 354 | if (bc && cpumask_empty(tick_broadcast_mask)) | 421 | if (bc && cpumask_empty(tick_broadcast_mask)) |
| @@ -475,7 +542,15 @@ void tick_check_oneshot_broadcast(int cpu) | |||
| 475 | if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) { | 542 | if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) { |
| 476 | struct tick_device *td = &per_cpu(tick_cpu_device, cpu); | 543 | struct tick_device *td = &per_cpu(tick_cpu_device, cpu); |
| 477 | 544 | ||
| 478 | clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT); | 545 | /* |
| 546 | * We might be in the middle of switching over from | ||
| 547 | * periodic to oneshot. If the CPU has not yet | ||
| 548 | * switched over, leave the device alone. | ||
| 549 | */ | ||
| 550 | if (td->mode == TICKDEV_MODE_ONESHOT) { | ||
| 551 | clockevents_set_mode(td->evtdev, | ||
| 552 | CLOCK_EVT_MODE_ONESHOT); | ||
| 553 | } | ||
| 479 | } | 554 | } |
| 480 | } | 555 | } |
| 481 | 556 | ||
| @@ -522,6 +597,13 @@ again: | |||
| 522 | cpumask_clear(tick_broadcast_force_mask); | 597 | cpumask_clear(tick_broadcast_force_mask); |
| 523 | 598 | ||
| 524 | /* | 599 | /* |
| 600 | * Sanity check. Catch the case where we try to broadcast to | ||
| 601 | * offline cpus. | ||
| 602 | */ | ||
| 603 | if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask))) | ||
| 604 | cpumask_and(tmpmask, tmpmask, cpu_online_mask); | ||
| 605 | |||
| 606 | /* | ||
| 525 | * Wakeup the cpus which have an expired event. | 607 | * Wakeup the cpus which have an expired event. |
| 526 | */ | 608 | */ |
| 527 | tick_do_broadcast(tmpmask); | 609 | tick_do_broadcast(tmpmask); |
| @@ -761,10 +843,12 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup) | |||
| 761 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); | 843 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); |
| 762 | 844 | ||
| 763 | /* | 845 | /* |
| 764 | * Clear the broadcast mask flag for the dead cpu, but do not | 846 | * Clear the broadcast masks for the dead cpu, but do not stop |
| 765 | * stop the broadcast device! | 847 | * the broadcast device! |
| 766 | */ | 848 | */ |
| 767 | cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); | 849 | cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); |
| 850 | cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); | ||
| 851 | cpumask_clear_cpu(cpu, tick_broadcast_force_mask); | ||
| 768 | 852 | ||
| 769 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); | 853 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); |
| 770 | } | 854 | } |
| @@ -792,6 +876,7 @@ bool tick_broadcast_oneshot_available(void) | |||
| 792 | void __init tick_broadcast_init(void) | 876 | void __init tick_broadcast_init(void) |
| 793 | { | 877 | { |
| 794 | zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT); | 878 | zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT); |
| 879 | zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT); | ||
| 795 | zalloc_cpumask_var(&tmpmask, GFP_NOWAIT); | 880 | zalloc_cpumask_var(&tmpmask, GFP_NOWAIT); |
| 796 | #ifdef CONFIG_TICK_ONESHOT | 881 | #ifdef CONFIG_TICK_ONESHOT |
| 797 | zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT); | 882 | zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT); |
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 5d3fb100bc06..64522ecdfe0e 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include <linux/percpu.h> | 18 | #include <linux/percpu.h> |
| 19 | #include <linux/profile.h> | 19 | #include <linux/profile.h> |
| 20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
| 21 | #include <linux/module.h> | ||
| 21 | 22 | ||
| 22 | #include <asm/irq_regs.h> | 23 | #include <asm/irq_regs.h> |
| 23 | 24 | ||
| @@ -33,7 +34,6 @@ DEFINE_PER_CPU(struct tick_device, tick_cpu_device); | |||
| 33 | ktime_t tick_next_period; | 34 | ktime_t tick_next_period; |
| 34 | ktime_t tick_period; | 35 | ktime_t tick_period; |
| 35 | int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT; | 36 | int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT; |
| 36 | static DEFINE_RAW_SPINLOCK(tick_device_lock); | ||
| 37 | 37 | ||
| 38 | /* | 38 | /* |
| 39 | * Debugging: see timer_list.c | 39 | * Debugging: see timer_list.c |
| @@ -194,7 +194,8 @@ static void tick_setup_device(struct tick_device *td, | |||
| 194 | * When global broadcasting is active, check if the current | 194 | * When global broadcasting is active, check if the current |
| 195 | * device is registered as a placeholder for broadcast mode. | 195 | * device is registered as a placeholder for broadcast mode. |
| 196 | * This allows us to handle this x86 misfeature in a generic | 196 | * This allows us to handle this x86 misfeature in a generic |
| 197 | * way. | 197 | * way. This function also returns !=0 when we keep the |
| 198 | * current active broadcast state for this CPU. | ||
| 198 | */ | 199 | */ |
| 199 | if (tick_device_uses_broadcast(newdev, cpu)) | 200 | if (tick_device_uses_broadcast(newdev, cpu)) |
| 200 | return; | 201 | return; |
| @@ -205,17 +206,75 @@ static void tick_setup_device(struct tick_device *td, | |||
| 205 | tick_setup_oneshot(newdev, handler, next_event); | 206 | tick_setup_oneshot(newdev, handler, next_event); |
| 206 | } | 207 | } |
| 207 | 208 | ||
| 209 | void tick_install_replacement(struct clock_event_device *newdev) | ||
| 210 | { | ||
| 211 | struct tick_device *td = &__get_cpu_var(tick_cpu_device); | ||
| 212 | int cpu = smp_processor_id(); | ||
| 213 | |||
| 214 | clockevents_exchange_device(td->evtdev, newdev); | ||
| 215 | tick_setup_device(td, newdev, cpu, cpumask_of(cpu)); | ||
| 216 | if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) | ||
| 217 | tick_oneshot_notify(); | ||
| 218 | } | ||
| 219 | |||
| 220 | static bool tick_check_percpu(struct clock_event_device *curdev, | ||
| 221 | struct clock_event_device *newdev, int cpu) | ||
| 222 | { | ||
| 223 | if (!cpumask_test_cpu(cpu, newdev->cpumask)) | ||
| 224 | return false; | ||
| 225 | if (cpumask_equal(newdev->cpumask, cpumask_of(cpu))) | ||
| 226 | return true; | ||
| 227 | /* Check if irq affinity can be set */ | ||
| 228 | if (newdev->irq >= 0 && !irq_can_set_affinity(newdev->irq)) | ||
| 229 | return false; | ||
| 230 | /* Prefer an existing cpu local device */ | ||
| 231 | if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu))) | ||
| 232 | return false; | ||
| 233 | return true; | ||
| 234 | } | ||
| 235 | |||
| 236 | static bool tick_check_preferred(struct clock_event_device *curdev, | ||
| 237 | struct clock_event_device *newdev) | ||
| 238 | { | ||
| 239 | /* Prefer oneshot capable device */ | ||
| 240 | if (!(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) { | ||
| 241 | if (curdev && (curdev->features & CLOCK_EVT_FEAT_ONESHOT)) | ||
| 242 | return false; | ||
| 243 | if (tick_oneshot_mode_active()) | ||
| 244 | return false; | ||
| 245 | } | ||
| 246 | |||
| 247 | /* | ||
| 248 | * Use the higher rated one, but prefer a CPU local device with a lower | ||
| 249 | * rating than a non-CPU local device | ||
| 250 | */ | ||
| 251 | return !curdev || | ||
| 252 | newdev->rating > curdev->rating || | ||
| 253 | !cpumask_equal(curdev->cpumask, newdev->cpumask); | ||
| 254 | } | ||
| 255 | |||
| 256 | /* | ||
| 257 | * Check whether the new device is a better fit than curdev. curdev | ||
| 258 | * can be NULL ! | ||
| 259 | */ | ||
| 260 | bool tick_check_replacement(struct clock_event_device *curdev, | ||
| 261 | struct clock_event_device *newdev) | ||
| 262 | { | ||
| 263 | if (tick_check_percpu(curdev, newdev, smp_processor_id())) | ||
| 264 | return false; | ||
| 265 | |||
| 266 | return tick_check_preferred(curdev, newdev); | ||
| 267 | } | ||
| 268 | |||
| 208 | /* | 269 | /* |
| 209 | * Check, if the new registered device should be used. | 270 | * Check, if the new registered device should be used. Called with |
| 271 | * clockevents_lock held and interrupts disabled. | ||
| 210 | */ | 272 | */ |
| 211 | static int tick_check_new_device(struct clock_event_device *newdev) | 273 | void tick_check_new_device(struct clock_event_device *newdev) |
| 212 | { | 274 | { |
| 213 | struct clock_event_device *curdev; | 275 | struct clock_event_device *curdev; |
| 214 | struct tick_device *td; | 276 | struct tick_device *td; |
| 215 | int cpu, ret = NOTIFY_OK; | 277 | int cpu; |
| 216 | unsigned long flags; | ||
| 217 | |||
| 218 | raw_spin_lock_irqsave(&tick_device_lock, flags); | ||
| 219 | 278 | ||
| 220 | cpu = smp_processor_id(); | 279 | cpu = smp_processor_id(); |
| 221 | if (!cpumask_test_cpu(cpu, newdev->cpumask)) | 280 | if (!cpumask_test_cpu(cpu, newdev->cpumask)) |
| @@ -225,40 +284,15 @@ static int tick_check_new_device(struct clock_event_device *newdev) | |||
| 225 | curdev = td->evtdev; | 284 | curdev = td->evtdev; |
| 226 | 285 | ||
| 227 | /* cpu local device ? */ | 286 | /* cpu local device ? */ |
| 228 | if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) { | 287 | if (!tick_check_percpu(curdev, newdev, cpu)) |
| 229 | 288 | goto out_bc; | |
| 230 | /* | ||
| 231 | * If the cpu affinity of the device interrupt can not | ||
| 232 | * be set, ignore it. | ||
| 233 | */ | ||
| 234 | if (!irq_can_set_affinity(newdev->irq)) | ||
| 235 | goto out_bc; | ||
| 236 | 289 | ||
| 237 | /* | 290 | /* Preference decision */ |
| 238 | * If we have a cpu local device already, do not replace it | 291 | if (!tick_check_preferred(curdev, newdev)) |
| 239 | * by a non cpu local device | 292 | goto out_bc; |
| 240 | */ | ||
| 241 | if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu))) | ||
| 242 | goto out_bc; | ||
| 243 | } | ||
| 244 | 293 | ||
| 245 | /* | 294 | if (!try_module_get(newdev->owner)) |
| 246 | * If we have an active device, then check the rating and the oneshot | 295 | return; |
| 247 | * feature. | ||
| 248 | */ | ||
| 249 | if (curdev) { | ||
| 250 | /* | ||
| 251 | * Prefer one shot capable devices ! | ||
| 252 | */ | ||
| 253 | if ((curdev->features & CLOCK_EVT_FEAT_ONESHOT) && | ||
| 254 | !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) | ||
| 255 | goto out_bc; | ||
| 256 | /* | ||
| 257 | * Check the rating | ||
| 258 | */ | ||
| 259 | if (curdev->rating >= newdev->rating) | ||
| 260 | goto out_bc; | ||
| 261 | } | ||
| 262 | 296 | ||
| 263 | /* | 297 | /* |
| 264 | * Replace the eventually existing device by the new | 298 | * Replace the eventually existing device by the new |
| @@ -273,20 +307,13 @@ static int tick_check_new_device(struct clock_event_device *newdev) | |||
| 273 | tick_setup_device(td, newdev, cpu, cpumask_of(cpu)); | 307 | tick_setup_device(td, newdev, cpu, cpumask_of(cpu)); |
| 274 | if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) | 308 | if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) |
| 275 | tick_oneshot_notify(); | 309 | tick_oneshot_notify(); |
| 276 | 310 | return; | |
| 277 | raw_spin_unlock_irqrestore(&tick_device_lock, flags); | ||
| 278 | return NOTIFY_STOP; | ||
| 279 | 311 | ||
| 280 | out_bc: | 312 | out_bc: |
| 281 | /* | 313 | /* |
| 282 | * Can the new device be used as a broadcast device ? | 314 | * Can the new device be used as a broadcast device ? |
| 283 | */ | 315 | */ |
| 284 | if (tick_check_broadcast_device(newdev)) | 316 | tick_install_broadcast_device(newdev); |
| 285 | ret = NOTIFY_STOP; | ||
| 286 | |||
| 287 | raw_spin_unlock_irqrestore(&tick_device_lock, flags); | ||
| 288 | |||
| 289 | return ret; | ||
| 290 | } | 317 | } |
| 291 | 318 | ||
| 292 | /* | 319 | /* |
| @@ -294,7 +321,7 @@ out_bc: | |||
| 294 | * | 321 | * |
| 295 | * Called with interrupts disabled. | 322 | * Called with interrupts disabled. |
| 296 | */ | 323 | */ |
| 297 | static void tick_handover_do_timer(int *cpup) | 324 | void tick_handover_do_timer(int *cpup) |
| 298 | { | 325 | { |
| 299 | if (*cpup == tick_do_timer_cpu) { | 326 | if (*cpup == tick_do_timer_cpu) { |
| 300 | int cpu = cpumask_first(cpu_online_mask); | 327 | int cpu = cpumask_first(cpu_online_mask); |
| @@ -311,13 +338,11 @@ static void tick_handover_do_timer(int *cpup) | |||
| 311 | * access the hardware device itself. | 338 | * access the hardware device itself. |
| 312 | * We just set the mode and remove it from the lists. | 339 | * We just set the mode and remove it from the lists. |
| 313 | */ | 340 | */ |
| 314 | static void tick_shutdown(unsigned int *cpup) | 341 | void tick_shutdown(unsigned int *cpup) |
| 315 | { | 342 | { |
| 316 | struct tick_device *td = &per_cpu(tick_cpu_device, *cpup); | 343 | struct tick_device *td = &per_cpu(tick_cpu_device, *cpup); |
| 317 | struct clock_event_device *dev = td->evtdev; | 344 | struct clock_event_device *dev = td->evtdev; |
| 318 | unsigned long flags; | ||
| 319 | 345 | ||
| 320 | raw_spin_lock_irqsave(&tick_device_lock, flags); | ||
| 321 | td->mode = TICKDEV_MODE_PERIODIC; | 346 | td->mode = TICKDEV_MODE_PERIODIC; |
| 322 | if (dev) { | 347 | if (dev) { |
| 323 | /* | 348 | /* |
| @@ -329,26 +354,20 @@ static void tick_shutdown(unsigned int *cpup) | |||
| 329 | dev->event_handler = clockevents_handle_noop; | 354 | dev->event_handler = clockevents_handle_noop; |
| 330 | td->evtdev = NULL; | 355 | td->evtdev = NULL; |
| 331 | } | 356 | } |
| 332 | raw_spin_unlock_irqrestore(&tick_device_lock, flags); | ||
| 333 | } | 357 | } |
| 334 | 358 | ||
| 335 | static void tick_suspend(void) | 359 | void tick_suspend(void) |
| 336 | { | 360 | { |
| 337 | struct tick_device *td = &__get_cpu_var(tick_cpu_device); | 361 | struct tick_device *td = &__get_cpu_var(tick_cpu_device); |
| 338 | unsigned long flags; | ||
| 339 | 362 | ||
| 340 | raw_spin_lock_irqsave(&tick_device_lock, flags); | ||
| 341 | clockevents_shutdown(td->evtdev); | 363 | clockevents_shutdown(td->evtdev); |
| 342 | raw_spin_unlock_irqrestore(&tick_device_lock, flags); | ||
| 343 | } | 364 | } |
| 344 | 365 | ||
| 345 | static void tick_resume(void) | 366 | void tick_resume(void) |
| 346 | { | 367 | { |
| 347 | struct tick_device *td = &__get_cpu_var(tick_cpu_device); | 368 | struct tick_device *td = &__get_cpu_var(tick_cpu_device); |
| 348 | unsigned long flags; | ||
| 349 | int broadcast = tick_resume_broadcast(); | 369 | int broadcast = tick_resume_broadcast(); |
| 350 | 370 | ||
| 351 | raw_spin_lock_irqsave(&tick_device_lock, flags); | ||
| 352 | clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME); | 371 | clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME); |
| 353 | 372 | ||
| 354 | if (!broadcast) { | 373 | if (!broadcast) { |
| @@ -357,68 +376,12 @@ static void tick_resume(void) | |||
| 357 | else | 376 | else |
| 358 | tick_resume_oneshot(); | 377 | tick_resume_oneshot(); |
| 359 | } | 378 | } |
| 360 | raw_spin_unlock_irqrestore(&tick_device_lock, flags); | ||
| 361 | } | 379 | } |
| 362 | 380 | ||
| 363 | /* | ||
| 364 | * Notification about clock event devices | ||
| 365 | */ | ||
| 366 | static int tick_notify(struct notifier_block *nb, unsigned long reason, | ||
| 367 | void *dev) | ||
| 368 | { | ||
| 369 | switch (reason) { | ||
| 370 | |||
| 371 | case CLOCK_EVT_NOTIFY_ADD: | ||
| 372 | return tick_check_new_device(dev); | ||
| 373 | |||
| 374 | case CLOCK_EVT_NOTIFY_BROADCAST_ON: | ||
| 375 | case CLOCK_EVT_NOTIFY_BROADCAST_OFF: | ||
| 376 | case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: | ||
| 377 | tick_broadcast_on_off(reason, dev); | ||
| 378 | break; | ||
| 379 | |||
| 380 | case CLOCK_EVT_NOTIFY_BROADCAST_ENTER: | ||
| 381 | case CLOCK_EVT_NOTIFY_BROADCAST_EXIT: | ||
| 382 | tick_broadcast_oneshot_control(reason); | ||
| 383 | break; | ||
| 384 | |||
| 385 | case CLOCK_EVT_NOTIFY_CPU_DYING: | ||
| 386 | tick_handover_do_timer(dev); | ||
| 387 | break; | ||
| 388 | |||
| 389 | case CLOCK_EVT_NOTIFY_CPU_DEAD: | ||
| 390 | tick_shutdown_broadcast_oneshot(dev); | ||
| 391 | tick_shutdown_broadcast(dev); | ||
| 392 | tick_shutdown(dev); | ||
| 393 | break; | ||
| 394 | |||
| 395 | case CLOCK_EVT_NOTIFY_SUSPEND: | ||
| 396 | tick_suspend(); | ||
| 397 | tick_suspend_broadcast(); | ||
| 398 | break; | ||
| 399 | |||
| 400 | case CLOCK_EVT_NOTIFY_RESUME: | ||
| 401 | tick_resume(); | ||
| 402 | break; | ||
| 403 | |||
| 404 | default: | ||
| 405 | break; | ||
| 406 | } | ||
| 407 | |||
| 408 | return NOTIFY_OK; | ||
| 409 | } | ||
| 410 | |||
| 411 | static struct notifier_block tick_notifier = { | ||
| 412 | .notifier_call = tick_notify, | ||
| 413 | }; | ||
| 414 | |||
| 415 | /** | 381 | /** |
| 416 | * tick_init - initialize the tick control | 382 | * tick_init - initialize the tick control |
| 417 | * | ||
| 418 | * Register the notifier with the clockevents framework | ||
| 419 | */ | 383 | */ |
| 420 | void __init tick_init(void) | 384 | void __init tick_init(void) |
| 421 | { | 385 | { |
| 422 | clockevents_register_notifier(&tick_notifier); | ||
| 423 | tick_broadcast_init(); | 386 | tick_broadcast_init(); |
| 424 | } | 387 | } |
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index f0299eae4602..bc906cad709b 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h | |||
| @@ -6,6 +6,8 @@ | |||
| 6 | 6 | ||
| 7 | extern seqlock_t jiffies_lock; | 7 | extern seqlock_t jiffies_lock; |
| 8 | 8 | ||
| 9 | #define CS_NAME_LEN 32 | ||
| 10 | |||
| 9 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD | 11 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD |
| 10 | 12 | ||
| 11 | #define TICK_DO_TIMER_NONE -1 | 13 | #define TICK_DO_TIMER_NONE -1 |
| @@ -18,9 +20,19 @@ extern int tick_do_timer_cpu __read_mostly; | |||
| 18 | 20 | ||
| 19 | extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast); | 21 | extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast); |
| 20 | extern void tick_handle_periodic(struct clock_event_device *dev); | 22 | extern void tick_handle_periodic(struct clock_event_device *dev); |
| 23 | extern void tick_check_new_device(struct clock_event_device *dev); | ||
| 24 | extern void tick_handover_do_timer(int *cpup); | ||
| 25 | extern void tick_shutdown(unsigned int *cpup); | ||
| 26 | extern void tick_suspend(void); | ||
| 27 | extern void tick_resume(void); | ||
| 28 | extern bool tick_check_replacement(struct clock_event_device *curdev, | ||
| 29 | struct clock_event_device *newdev); | ||
| 30 | extern void tick_install_replacement(struct clock_event_device *dev); | ||
| 21 | 31 | ||
| 22 | extern void clockevents_shutdown(struct clock_event_device *dev); | 32 | extern void clockevents_shutdown(struct clock_event_device *dev); |
| 23 | 33 | ||
| 34 | extern size_t sysfs_get_uname(const char *buf, char *dst, size_t cnt); | ||
| 35 | |||
| 24 | /* | 36 | /* |
| 25 | * NO_HZ / high resolution timer shared code | 37 | * NO_HZ / high resolution timer shared code |
| 26 | */ | 38 | */ |
| @@ -90,7 +102,7 @@ static inline bool tick_broadcast_oneshot_available(void) { return false; } | |||
| 90 | */ | 102 | */ |
| 91 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST | 103 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST |
| 92 | extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu); | 104 | extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu); |
| 93 | extern int tick_check_broadcast_device(struct clock_event_device *dev); | 105 | extern void tick_install_broadcast_device(struct clock_event_device *dev); |
| 94 | extern int tick_is_broadcast_device(struct clock_event_device *dev); | 106 | extern int tick_is_broadcast_device(struct clock_event_device *dev); |
| 95 | extern void tick_broadcast_on_off(unsigned long reason, int *oncpu); | 107 | extern void tick_broadcast_on_off(unsigned long reason, int *oncpu); |
| 96 | extern void tick_shutdown_broadcast(unsigned int *cpup); | 108 | extern void tick_shutdown_broadcast(unsigned int *cpup); |
| @@ -102,9 +114,8 @@ tick_set_periodic_handler(struct clock_event_device *dev, int broadcast); | |||
| 102 | 114 | ||
| 103 | #else /* !BROADCAST */ | 115 | #else /* !BROADCAST */ |
| 104 | 116 | ||
| 105 | static inline int tick_check_broadcast_device(struct clock_event_device *dev) | 117 | static inline void tick_install_broadcast_device(struct clock_event_device *dev) |
| 106 | { | 118 | { |
| 107 | return 0; | ||
| 108 | } | 119 | } |
| 109 | 120 | ||
| 110 | static inline int tick_is_broadcast_device(struct clock_event_device *dev) | 121 | static inline int tick_is_broadcast_device(struct clock_event_device *dev) |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 0cf1c1453181..e80183f4a6c4 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
| @@ -178,6 +178,11 @@ static bool can_stop_full_tick(void) | |||
| 178 | */ | 178 | */ |
| 179 | if (!sched_clock_stable) { | 179 | if (!sched_clock_stable) { |
| 180 | trace_tick_stop(0, "unstable sched clock\n"); | 180 | trace_tick_stop(0, "unstable sched clock\n"); |
| 181 | /* | ||
| 182 | * Don't allow the user to think they can get | ||
| 183 | * full NO_HZ with this machine. | ||
| 184 | */ | ||
| 185 | WARN_ONCE(1, "NO_HZ FULL will not work with unstable sched clock"); | ||
| 181 | return false; | 186 | return false; |
| 182 | } | 187 | } |
| 183 | #endif | 188 | #endif |
| @@ -293,7 +298,7 @@ static int __init tick_nohz_full_setup(char *str) | |||
| 293 | } | 298 | } |
| 294 | __setup("nohz_full=", tick_nohz_full_setup); | 299 | __setup("nohz_full=", tick_nohz_full_setup); |
| 295 | 300 | ||
| 296 | static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, | 301 | static int tick_nohz_cpu_down_callback(struct notifier_block *nfb, |
| 297 | unsigned long action, | 302 | unsigned long action, |
| 298 | void *hcpu) | 303 | void *hcpu) |
| 299 | { | 304 | { |
| @@ -346,16 +351,6 @@ void __init tick_nohz_init(void) | |||
| 346 | } | 351 | } |
| 347 | 352 | ||
| 348 | cpu_notifier(tick_nohz_cpu_down_callback, 0); | 353 | cpu_notifier(tick_nohz_cpu_down_callback, 0); |
| 349 | |||
| 350 | /* Make sure full dynticks CPU are also RCU nocbs */ | ||
| 351 | for_each_cpu(cpu, nohz_full_mask) { | ||
| 352 | if (!rcu_is_nocb_cpu(cpu)) { | ||
| 353 | pr_warning("NO_HZ: CPU %d is not RCU nocb: " | ||
| 354 | "cleared from nohz_full range", cpu); | ||
| 355 | cpumask_clear_cpu(cpu, nohz_full_mask); | ||
| 356 | } | ||
| 357 | } | ||
| 358 | |||
| 359 | cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); | 354 | cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); |
| 360 | pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); | 355 | pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); |
| 361 | } | 356 | } |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index baeeb5c87cf1..48b9fffabdc2 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
| @@ -25,6 +25,11 @@ | |||
| 25 | 25 | ||
| 26 | #include "tick-internal.h" | 26 | #include "tick-internal.h" |
| 27 | #include "ntp_internal.h" | 27 | #include "ntp_internal.h" |
| 28 | #include "timekeeping_internal.h" | ||
| 29 | |||
| 30 | #define TK_CLEAR_NTP (1 << 0) | ||
| 31 | #define TK_MIRROR (1 << 1) | ||
| 32 | #define TK_CLOCK_WAS_SET (1 << 2) | ||
| 28 | 33 | ||
| 29 | static struct timekeeper timekeeper; | 34 | static struct timekeeper timekeeper; |
| 30 | static DEFINE_RAW_SPINLOCK(timekeeper_lock); | 35 | static DEFINE_RAW_SPINLOCK(timekeeper_lock); |
| @@ -200,9 +205,9 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) | |||
| 200 | 205 | ||
| 201 | static RAW_NOTIFIER_HEAD(pvclock_gtod_chain); | 206 | static RAW_NOTIFIER_HEAD(pvclock_gtod_chain); |
| 202 | 207 | ||
| 203 | static void update_pvclock_gtod(struct timekeeper *tk) | 208 | static void update_pvclock_gtod(struct timekeeper *tk, bool was_set) |
| 204 | { | 209 | { |
| 205 | raw_notifier_call_chain(&pvclock_gtod_chain, 0, tk); | 210 | raw_notifier_call_chain(&pvclock_gtod_chain, was_set, tk); |
| 206 | } | 211 | } |
| 207 | 212 | ||
| 208 | /** | 213 | /** |
| @@ -216,7 +221,7 @@ int pvclock_gtod_register_notifier(struct notifier_block *nb) | |||
| 216 | 221 | ||
| 217 | raw_spin_lock_irqsave(&timekeeper_lock, flags); | 222 | raw_spin_lock_irqsave(&timekeeper_lock, flags); |
| 218 | ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb); | 223 | ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb); |
| 219 | update_pvclock_gtod(tk); | 224 | update_pvclock_gtod(tk, true); |
| 220 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); | 225 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); |
| 221 | 226 | ||
| 222 | return ret; | 227 | return ret; |
| @@ -241,16 +246,16 @@ int pvclock_gtod_unregister_notifier(struct notifier_block *nb) | |||
| 241 | EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier); | 246 | EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier); |
| 242 | 247 | ||
| 243 | /* must hold timekeeper_lock */ | 248 | /* must hold timekeeper_lock */ |
| 244 | static void timekeeping_update(struct timekeeper *tk, bool clearntp, bool mirror) | 249 | static void timekeeping_update(struct timekeeper *tk, unsigned int action) |
| 245 | { | 250 | { |
| 246 | if (clearntp) { | 251 | if (action & TK_CLEAR_NTP) { |
| 247 | tk->ntp_error = 0; | 252 | tk->ntp_error = 0; |
| 248 | ntp_clear(); | 253 | ntp_clear(); |
| 249 | } | 254 | } |
| 250 | update_vsyscall(tk); | 255 | update_vsyscall(tk); |
| 251 | update_pvclock_gtod(tk); | 256 | update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET); |
| 252 | 257 | ||
| 253 | if (mirror) | 258 | if (action & TK_MIRROR) |
| 254 | memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper)); | 259 | memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper)); |
| 255 | } | 260 | } |
| 256 | 261 | ||
| @@ -508,7 +513,7 @@ int do_settimeofday(const struct timespec *tv) | |||
| 508 | 513 | ||
| 509 | tk_set_xtime(tk, tv); | 514 | tk_set_xtime(tk, tv); |
| 510 | 515 | ||
| 511 | timekeeping_update(tk, true, true); | 516 | timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET); |
| 512 | 517 | ||
| 513 | write_seqcount_end(&timekeeper_seq); | 518 | write_seqcount_end(&timekeeper_seq); |
| 514 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); | 519 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); |
| @@ -552,7 +557,7 @@ int timekeeping_inject_offset(struct timespec *ts) | |||
| 552 | tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts)); | 557 | tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts)); |
| 553 | 558 | ||
| 554 | error: /* even if we error out, we forwarded the time, so call update */ | 559 | error: /* even if we error out, we forwarded the time, so call update */ |
| 555 | timekeeping_update(tk, true, true); | 560 | timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET); |
| 556 | 561 | ||
| 557 | write_seqcount_end(&timekeeper_seq); | 562 | write_seqcount_end(&timekeeper_seq); |
| 558 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); | 563 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); |
| @@ -627,13 +632,22 @@ static int change_clocksource(void *data) | |||
| 627 | write_seqcount_begin(&timekeeper_seq); | 632 | write_seqcount_begin(&timekeeper_seq); |
| 628 | 633 | ||
| 629 | timekeeping_forward_now(tk); | 634 | timekeeping_forward_now(tk); |
| 630 | if (!new->enable || new->enable(new) == 0) { | 635 | /* |
| 631 | old = tk->clock; | 636 | * If the cs is in module, get a module reference. Succeeds |
| 632 | tk_setup_internals(tk, new); | 637 | * for built-in code (owner == NULL) as well. |
| 633 | if (old->disable) | 638 | */ |
| 634 | old->disable(old); | 639 | if (try_module_get(new->owner)) { |
| 640 | if (!new->enable || new->enable(new) == 0) { | ||
| 641 | old = tk->clock; | ||
| 642 | tk_setup_internals(tk, new); | ||
| 643 | if (old->disable) | ||
| 644 | old->disable(old); | ||
| 645 | module_put(old->owner); | ||
| 646 | } else { | ||
| 647 | module_put(new->owner); | ||
| 648 | } | ||
| 635 | } | 649 | } |
| 636 | timekeeping_update(tk, true, true); | 650 | timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET); |
| 637 | 651 | ||
| 638 | write_seqcount_end(&timekeeper_seq); | 652 | write_seqcount_end(&timekeeper_seq); |
| 639 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); | 653 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); |
| @@ -648,14 +662,15 @@ static int change_clocksource(void *data) | |||
| 648 | * This function is called from clocksource.c after a new, better clock | 662 | * This function is called from clocksource.c after a new, better clock |
| 649 | * source has been registered. The caller holds the clocksource_mutex. | 663 | * source has been registered. The caller holds the clocksource_mutex. |
| 650 | */ | 664 | */ |
| 651 | void timekeeping_notify(struct clocksource *clock) | 665 | int timekeeping_notify(struct clocksource *clock) |
| 652 | { | 666 | { |
| 653 | struct timekeeper *tk = &timekeeper; | 667 | struct timekeeper *tk = &timekeeper; |
| 654 | 668 | ||
| 655 | if (tk->clock == clock) | 669 | if (tk->clock == clock) |
| 656 | return; | 670 | return 0; |
| 657 | stop_machine(change_clocksource, clock, NULL); | 671 | stop_machine(change_clocksource, clock, NULL); |
| 658 | tick_clock_notify(); | 672 | tick_clock_notify(); |
| 673 | return tk->clock == clock ? 0 : -1; | ||
| 659 | } | 674 | } |
| 660 | 675 | ||
| 661 | /** | 676 | /** |
| @@ -841,6 +856,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk, | |||
| 841 | tk_xtime_add(tk, delta); | 856 | tk_xtime_add(tk, delta); |
| 842 | tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *delta)); | 857 | tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *delta)); |
| 843 | tk_set_sleep_time(tk, timespec_add(tk->total_sleep_time, *delta)); | 858 | tk_set_sleep_time(tk, timespec_add(tk->total_sleep_time, *delta)); |
| 859 | tk_debug_account_sleep_time(delta); | ||
| 844 | } | 860 | } |
| 845 | 861 | ||
| 846 | /** | 862 | /** |
| @@ -872,7 +888,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta) | |||
| 872 | 888 | ||
| 873 | __timekeeping_inject_sleeptime(tk, delta); | 889 | __timekeeping_inject_sleeptime(tk, delta); |
| 874 | 890 | ||
| 875 | timekeeping_update(tk, true, true); | 891 | timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET); |
| 876 | 892 | ||
| 877 | write_seqcount_end(&timekeeper_seq); | 893 | write_seqcount_end(&timekeeper_seq); |
| 878 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); | 894 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); |
| @@ -954,7 +970,7 @@ static void timekeeping_resume(void) | |||
| 954 | tk->cycle_last = clock->cycle_last = cycle_now; | 970 | tk->cycle_last = clock->cycle_last = cycle_now; |
| 955 | tk->ntp_error = 0; | 971 | tk->ntp_error = 0; |
| 956 | timekeeping_suspended = 0; | 972 | timekeeping_suspended = 0; |
| 957 | timekeeping_update(tk, false, true); | 973 | timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); |
| 958 | write_seqcount_end(&timekeeper_seq); | 974 | write_seqcount_end(&timekeeper_seq); |
| 959 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); | 975 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); |
| 960 | 976 | ||
| @@ -1236,9 +1252,10 @@ out_adjust: | |||
| 1236 | * It also calls into the NTP code to handle leapsecond processing. | 1252 | * It also calls into the NTP code to handle leapsecond processing. |
| 1237 | * | 1253 | * |
| 1238 | */ | 1254 | */ |
| 1239 | static inline void accumulate_nsecs_to_secs(struct timekeeper *tk) | 1255 | static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk) |
| 1240 | { | 1256 | { |
| 1241 | u64 nsecps = (u64)NSEC_PER_SEC << tk->shift; | 1257 | u64 nsecps = (u64)NSEC_PER_SEC << tk->shift; |
| 1258 | unsigned int action = 0; | ||
| 1242 | 1259 | ||
| 1243 | while (tk->xtime_nsec >= nsecps) { | 1260 | while (tk->xtime_nsec >= nsecps) { |
| 1244 | int leap; | 1261 | int leap; |
| @@ -1261,8 +1278,10 @@ static inline void accumulate_nsecs_to_secs(struct timekeeper *tk) | |||
| 1261 | __timekeeping_set_tai_offset(tk, tk->tai_offset - leap); | 1278 | __timekeeping_set_tai_offset(tk, tk->tai_offset - leap); |
| 1262 | 1279 | ||
| 1263 | clock_was_set_delayed(); | 1280 | clock_was_set_delayed(); |
| 1281 | action = TK_CLOCK_WAS_SET; | ||
| 1264 | } | 1282 | } |
| 1265 | } | 1283 | } |
| 1284 | return action; | ||
| 1266 | } | 1285 | } |
| 1267 | 1286 | ||
| 1268 | /** | 1287 | /** |
| @@ -1347,6 +1366,7 @@ static void update_wall_time(void) | |||
| 1347 | struct timekeeper *tk = &shadow_timekeeper; | 1366 | struct timekeeper *tk = &shadow_timekeeper; |
| 1348 | cycle_t offset; | 1367 | cycle_t offset; |
| 1349 | int shift = 0, maxshift; | 1368 | int shift = 0, maxshift; |
| 1369 | unsigned int action; | ||
| 1350 | unsigned long flags; | 1370 | unsigned long flags; |
| 1351 | 1371 | ||
| 1352 | raw_spin_lock_irqsave(&timekeeper_lock, flags); | 1372 | raw_spin_lock_irqsave(&timekeeper_lock, flags); |
| @@ -1399,7 +1419,7 @@ static void update_wall_time(void) | |||
| 1399 | * Finally, make sure that after the rounding | 1419 | * Finally, make sure that after the rounding |
| 1400 | * xtime_nsec isn't larger than NSEC_PER_SEC | 1420 | * xtime_nsec isn't larger than NSEC_PER_SEC |
| 1401 | */ | 1421 | */ |
| 1402 | accumulate_nsecs_to_secs(tk); | 1422 | action = accumulate_nsecs_to_secs(tk); |
| 1403 | 1423 | ||
| 1404 | write_seqcount_begin(&timekeeper_seq); | 1424 | write_seqcount_begin(&timekeeper_seq); |
| 1405 | /* Update clock->cycle_last with the new value */ | 1425 | /* Update clock->cycle_last with the new value */ |
| @@ -1415,7 +1435,7 @@ static void update_wall_time(void) | |||
| 1415 | * updating. | 1435 | * updating. |
| 1416 | */ | 1436 | */ |
| 1417 | memcpy(real_tk, tk, sizeof(*tk)); | 1437 | memcpy(real_tk, tk, sizeof(*tk)); |
| 1418 | timekeeping_update(real_tk, false, false); | 1438 | timekeeping_update(real_tk, action); |
| 1419 | write_seqcount_end(&timekeeper_seq); | 1439 | write_seqcount_end(&timekeeper_seq); |
| 1420 | out: | 1440 | out: |
| 1421 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); | 1441 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); |
| @@ -1677,6 +1697,7 @@ int do_adjtimex(struct timex *txc) | |||
| 1677 | 1697 | ||
| 1678 | if (tai != orig_tai) { | 1698 | if (tai != orig_tai) { |
| 1679 | __timekeeping_set_tai_offset(tk, tai); | 1699 | __timekeeping_set_tai_offset(tk, tai); |
| 1700 | update_pvclock_gtod(tk, true); | ||
| 1680 | clock_was_set_delayed(); | 1701 | clock_was_set_delayed(); |
| 1681 | } | 1702 | } |
| 1682 | write_seqcount_end(&timekeeper_seq); | 1703 | write_seqcount_end(&timekeeper_seq); |
diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c new file mode 100644 index 000000000000..802433a4f5eb --- /dev/null +++ b/kernel/time/timekeeping_debug.c | |||
| @@ -0,0 +1,72 @@ | |||
| 1 | /* | ||
| 2 | * debugfs file to track time spent in suspend | ||
| 3 | * | ||
| 4 | * Copyright (c) 2011, Google, Inc. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify | ||
| 7 | * it under the terms of the GNU General Public License as published by | ||
| 8 | * the Free Software Foundation; either version 2 of the License, or | ||
| 9 | * (at your option) any later version. | ||
| 10 | * | ||
| 11 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 14 | * more details. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/debugfs.h> | ||
| 18 | #include <linux/err.h> | ||
| 19 | #include <linux/init.h> | ||
| 20 | #include <linux/kernel.h> | ||
| 21 | #include <linux/seq_file.h> | ||
| 22 | #include <linux/time.h> | ||
| 23 | |||
| 24 | static unsigned int sleep_time_bin[32] = {0}; | ||
| 25 | |||
| 26 | static int tk_debug_show_sleep_time(struct seq_file *s, void *data) | ||
| 27 | { | ||
| 28 | unsigned int bin; | ||
| 29 | seq_puts(s, " time (secs) count\n"); | ||
| 30 | seq_puts(s, "------------------------------\n"); | ||
| 31 | for (bin = 0; bin < 32; bin++) { | ||
| 32 | if (sleep_time_bin[bin] == 0) | ||
| 33 | continue; | ||
| 34 | seq_printf(s, "%10u - %-10u %4u\n", | ||
| 35 | bin ? 1 << (bin - 1) : 0, 1 << bin, | ||
| 36 | sleep_time_bin[bin]); | ||
| 37 | } | ||
| 38 | return 0; | ||
| 39 | } | ||
| 40 | |||
| 41 | static int tk_debug_sleep_time_open(struct inode *inode, struct file *file) | ||
| 42 | { | ||
| 43 | return single_open(file, tk_debug_show_sleep_time, NULL); | ||
| 44 | } | ||
| 45 | |||
| 46 | static const struct file_operations tk_debug_sleep_time_fops = { | ||
| 47 | .open = tk_debug_sleep_time_open, | ||
| 48 | .read = seq_read, | ||
| 49 | .llseek = seq_lseek, | ||
| 50 | .release = single_release, | ||
| 51 | }; | ||
| 52 | |||
| 53 | static int __init tk_debug_sleep_time_init(void) | ||
| 54 | { | ||
| 55 | struct dentry *d; | ||
| 56 | |||
| 57 | d = debugfs_create_file("sleep_time", 0444, NULL, NULL, | ||
| 58 | &tk_debug_sleep_time_fops); | ||
| 59 | if (!d) { | ||
| 60 | pr_err("Failed to create sleep_time debug file\n"); | ||
| 61 | return -ENOMEM; | ||
| 62 | } | ||
| 63 | |||
| 64 | return 0; | ||
| 65 | } | ||
| 66 | late_initcall(tk_debug_sleep_time_init); | ||
| 67 | |||
| 68 | void tk_debug_account_sleep_time(struct timespec *t) | ||
| 69 | { | ||
| 70 | sleep_time_bin[fls(t->tv_sec)]++; | ||
| 71 | } | ||
| 72 | |||
diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h new file mode 100644 index 000000000000..13323ea08ffa --- /dev/null +++ b/kernel/time/timekeeping_internal.h | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | #ifndef _TIMEKEEPING_INTERNAL_H | ||
| 2 | #define _TIMEKEEPING_INTERNAL_H | ||
| 3 | /* | ||
| 4 | * timekeeping debug functions | ||
| 5 | */ | ||
| 6 | #include <linux/time.h> | ||
| 7 | |||
| 8 | #ifdef CONFIG_DEBUG_FS | ||
| 9 | extern void tk_debug_account_sleep_time(struct timespec *t); | ||
| 10 | #else | ||
| 11 | #define tk_debug_account_sleep_time(x) | ||
| 12 | #endif | ||
| 13 | |||
| 14 | #endif /* _TIMEKEEPING_INTERNAL_H */ | ||
diff --git a/kernel/timer.c b/kernel/timer.c index 15ffdb3f1948..4296d13db3d1 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
| @@ -149,9 +149,11 @@ static unsigned long round_jiffies_common(unsigned long j, int cpu, | |||
| 149 | /* now that we have rounded, subtract the extra skew again */ | 149 | /* now that we have rounded, subtract the extra skew again */ |
| 150 | j -= cpu * 3; | 150 | j -= cpu * 3; |
| 151 | 151 | ||
| 152 | if (j <= jiffies) /* rounding ate our timeout entirely; */ | 152 | /* |
| 153 | return original; | 153 | * Make sure j is still in the future. Otherwise return the |
| 154 | return j; | 154 | * unmodified value. |
| 155 | */ | ||
| 156 | return time_is_after_jiffies(j) ? j : original; | ||
| 155 | } | 157 | } |
| 156 | 158 | ||
| 157 | /** | 159 | /** |
| @@ -1503,11 +1505,11 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout) | |||
| 1503 | } | 1505 | } |
| 1504 | EXPORT_SYMBOL(schedule_timeout_uninterruptible); | 1506 | EXPORT_SYMBOL(schedule_timeout_uninterruptible); |
| 1505 | 1507 | ||
| 1506 | static int __cpuinit init_timers_cpu(int cpu) | 1508 | static int init_timers_cpu(int cpu) |
| 1507 | { | 1509 | { |
| 1508 | int j; | 1510 | int j; |
| 1509 | struct tvec_base *base; | 1511 | struct tvec_base *base; |
| 1510 | static char __cpuinitdata tvec_base_done[NR_CPUS]; | 1512 | static char tvec_base_done[NR_CPUS]; |
| 1511 | 1513 | ||
| 1512 | if (!tvec_base_done[cpu]) { | 1514 | if (!tvec_base_done[cpu]) { |
| 1513 | static char boot_done; | 1515 | static char boot_done; |
| @@ -1575,7 +1577,7 @@ static void migrate_timer_list(struct tvec_base *new_base, struct list_head *hea | |||
| 1575 | } | 1577 | } |
| 1576 | } | 1578 | } |
| 1577 | 1579 | ||
| 1578 | static void __cpuinit migrate_timers(int cpu) | 1580 | static void migrate_timers(int cpu) |
| 1579 | { | 1581 | { |
| 1580 | struct tvec_base *old_base; | 1582 | struct tvec_base *old_base; |
| 1581 | struct tvec_base *new_base; | 1583 | struct tvec_base *new_base; |
| @@ -1608,7 +1610,7 @@ static void __cpuinit migrate_timers(int cpu) | |||
| 1608 | } | 1610 | } |
| 1609 | #endif /* CONFIG_HOTPLUG_CPU */ | 1611 | #endif /* CONFIG_HOTPLUG_CPU */ |
| 1610 | 1612 | ||
| 1611 | static int __cpuinit timer_cpu_notify(struct notifier_block *self, | 1613 | static int timer_cpu_notify(struct notifier_block *self, |
| 1612 | unsigned long action, void *hcpu) | 1614 | unsigned long action, void *hcpu) |
| 1613 | { | 1615 | { |
| 1614 | long cpu = (long)hcpu; | 1616 | long cpu = (long)hcpu; |
| @@ -1633,7 +1635,7 @@ static int __cpuinit timer_cpu_notify(struct notifier_block *self, | |||
| 1633 | return NOTIFY_OK; | 1635 | return NOTIFY_OK; |
| 1634 | } | 1636 | } |
| 1635 | 1637 | ||
| 1636 | static struct notifier_block __cpuinitdata timers_nb = { | 1638 | static struct notifier_block timers_nb = { |
| 1637 | .notifier_call = timer_cpu_notify, | 1639 | .notifier_call = timer_cpu_notify, |
| 1638 | }; | 1640 | }; |
| 1639 | 1641 | ||
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6c508ff33c62..67708f46baae 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
| @@ -413,6 +413,17 @@ static int __register_ftrace_function(struct ftrace_ops *ops) | |||
| 413 | return 0; | 413 | return 0; |
| 414 | } | 414 | } |
| 415 | 415 | ||
| 416 | static void ftrace_sync(struct work_struct *work) | ||
| 417 | { | ||
| 418 | /* | ||
| 419 | * This function is just a stub to implement a hard force | ||
| 420 | * of synchronize_sched(). This requires synchronizing | ||
| 421 | * tasks even in userspace and idle. | ||
| 422 | * | ||
| 423 | * Yes, function tracing is rude. | ||
| 424 | */ | ||
| 425 | } | ||
| 426 | |||
| 416 | static int __unregister_ftrace_function(struct ftrace_ops *ops) | 427 | static int __unregister_ftrace_function(struct ftrace_ops *ops) |
| 417 | { | 428 | { |
| 418 | int ret; | 429 | int ret; |
| @@ -440,8 +451,12 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) | |||
| 440 | * so there'll be no new users. We must ensure | 451 | * so there'll be no new users. We must ensure |
| 441 | * all current users are done before we free | 452 | * all current users are done before we free |
| 442 | * the control data. | 453 | * the control data. |
| 454 | * Note synchronize_sched() is not enough, as we | ||
| 455 | * use preempt_disable() to do RCU, but the function | ||
| 456 | * tracer can be called where RCU is not active | ||
| 457 | * (before user_exit()). | ||
| 443 | */ | 458 | */ |
| 444 | synchronize_sched(); | 459 | schedule_on_each_cpu(ftrace_sync); |
| 445 | control_ops_free(ops); | 460 | control_ops_free(ops); |
| 446 | } | 461 | } |
| 447 | } else | 462 | } else |
| @@ -456,9 +471,13 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) | |||
| 456 | /* | 471 | /* |
| 457 | * Dynamic ops may be freed, we must make sure that all | 472 | * Dynamic ops may be freed, we must make sure that all |
| 458 | * callers are done before leaving this function. | 473 | * callers are done before leaving this function. |
| 474 | * | ||
| 475 | * Again, normal synchronize_sched() is not good enough. | ||
| 476 | * We need to do a hard force of sched synchronization. | ||
| 459 | */ | 477 | */ |
| 460 | if (ops->flags & FTRACE_OPS_FL_DYNAMIC) | 478 | if (ops->flags & FTRACE_OPS_FL_DYNAMIC) |
| 461 | synchronize_sched(); | 479 | schedule_on_each_cpu(ftrace_sync); |
| 480 | |||
| 462 | 481 | ||
| 463 | return 0; | 482 | return 0; |
| 464 | } | 483 | } |
| @@ -622,12 +641,18 @@ static int function_stat_show(struct seq_file *m, void *v) | |||
| 622 | if (rec->counter <= 1) | 641 | if (rec->counter <= 1) |
| 623 | stddev = 0; | 642 | stddev = 0; |
| 624 | else { | 643 | else { |
| 625 | stddev = rec->time_squared - rec->counter * avg * avg; | 644 | /* |
| 645 | * Apply Welford's method: | ||
| 646 | * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2) | ||
| 647 | */ | ||
| 648 | stddev = rec->counter * rec->time_squared - | ||
| 649 | rec->time * rec->time; | ||
| 650 | |||
| 626 | /* | 651 | /* |
| 627 | * Divide only 1000 for ns^2 -> us^2 conversion. | 652 | * Divide only 1000 for ns^2 -> us^2 conversion. |
| 628 | * trace_print_graph_duration will divide 1000 again. | 653 | * trace_print_graph_duration will divide 1000 again. |
| 629 | */ | 654 | */ |
| 630 | do_div(stddev, (rec->counter - 1) * 1000); | 655 | do_div(stddev, rec->counter * (rec->counter - 1) * 1000); |
| 631 | } | 656 | } |
| 632 | 657 | ||
| 633 | trace_seq_init(&s); | 658 | trace_seq_init(&s); |
| @@ -3512,8 +3537,12 @@ EXPORT_SYMBOL_GPL(ftrace_set_global_notrace); | |||
| 3512 | static char ftrace_notrace_buf[FTRACE_FILTER_SIZE] __initdata; | 3537 | static char ftrace_notrace_buf[FTRACE_FILTER_SIZE] __initdata; |
| 3513 | static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata; | 3538 | static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata; |
| 3514 | 3539 | ||
| 3540 | /* Used by function selftest to not test if filter is set */ | ||
| 3541 | bool ftrace_filter_param __initdata; | ||
| 3542 | |||
| 3515 | static int __init set_ftrace_notrace(char *str) | 3543 | static int __init set_ftrace_notrace(char *str) |
| 3516 | { | 3544 | { |
| 3545 | ftrace_filter_param = true; | ||
| 3517 | strlcpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE); | 3546 | strlcpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE); |
| 3518 | return 1; | 3547 | return 1; |
| 3519 | } | 3548 | } |
| @@ -3521,6 +3550,7 @@ __setup("ftrace_notrace=", set_ftrace_notrace); | |||
| 3521 | 3550 | ||
| 3522 | static int __init set_ftrace_filter(char *str) | 3551 | static int __init set_ftrace_filter(char *str) |
| 3523 | { | 3552 | { |
| 3553 | ftrace_filter_param = true; | ||
| 3524 | strlcpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE); | 3554 | strlcpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE); |
| 3525 | return 1; | 3555 | return 1; |
| 3526 | } | 3556 | } |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index e444ff88f0a4..cc2f66f68dc5 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
| @@ -36,11 +36,11 @@ int ring_buffer_print_entry_header(struct trace_seq *s) | |||
| 36 | { | 36 | { |
| 37 | int ret; | 37 | int ret; |
| 38 | 38 | ||
| 39 | ret = trace_seq_printf(s, "# compressed entry header\n"); | 39 | ret = trace_seq_puts(s, "# compressed entry header\n"); |
| 40 | ret = trace_seq_printf(s, "\ttype_len : 5 bits\n"); | 40 | ret = trace_seq_puts(s, "\ttype_len : 5 bits\n"); |
| 41 | ret = trace_seq_printf(s, "\ttime_delta : 27 bits\n"); | 41 | ret = trace_seq_puts(s, "\ttime_delta : 27 bits\n"); |
| 42 | ret = trace_seq_printf(s, "\tarray : 32 bits\n"); | 42 | ret = trace_seq_puts(s, "\tarray : 32 bits\n"); |
| 43 | ret = trace_seq_printf(s, "\n"); | 43 | ret = trace_seq_putc(s, '\n'); |
| 44 | ret = trace_seq_printf(s, "\tpadding : type == %d\n", | 44 | ret = trace_seq_printf(s, "\tpadding : type == %d\n", |
| 45 | RINGBUF_TYPE_PADDING); | 45 | RINGBUF_TYPE_PADDING); |
| 46 | ret = trace_seq_printf(s, "\ttime_extend : type == %d\n", | 46 | ret = trace_seq_printf(s, "\ttime_extend : type == %d\n", |
| @@ -1066,7 +1066,7 @@ static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1066 | } | 1066 | } |
| 1067 | 1067 | ||
| 1068 | /** | 1068 | /** |
| 1069 | * check_pages - integrity check of buffer pages | 1069 | * rb_check_pages - integrity check of buffer pages |
| 1070 | * @cpu_buffer: CPU buffer with pages to test | 1070 | * @cpu_buffer: CPU buffer with pages to test |
| 1071 | * | 1071 | * |
| 1072 | * As a safety measure we check to make sure the data pages have not | 1072 | * As a safety measure we check to make sure the data pages have not |
| @@ -1258,7 +1258,7 @@ static int rb_cpu_notify(struct notifier_block *self, | |||
| 1258 | #endif | 1258 | #endif |
| 1259 | 1259 | ||
| 1260 | /** | 1260 | /** |
| 1261 | * ring_buffer_alloc - allocate a new ring_buffer | 1261 | * __ring_buffer_alloc - allocate a new ring_buffer |
| 1262 | * @size: the size in bytes per cpu that is needed. | 1262 | * @size: the size in bytes per cpu that is needed. |
| 1263 | * @flags: attributes to set for the ring buffer. | 1263 | * @flags: attributes to set for the ring buffer. |
| 1264 | * | 1264 | * |
| @@ -1607,6 +1607,7 @@ static void update_pages_handler(struct work_struct *work) | |||
| 1607 | * ring_buffer_resize - resize the ring buffer | 1607 | * ring_buffer_resize - resize the ring buffer |
| 1608 | * @buffer: the buffer to resize. | 1608 | * @buffer: the buffer to resize. |
| 1609 | * @size: the new size. | 1609 | * @size: the new size. |
| 1610 | * @cpu_id: the cpu buffer to resize | ||
| 1610 | * | 1611 | * |
| 1611 | * Minimum size is 2 * BUF_PAGE_SIZE. | 1612 | * Minimum size is 2 * BUF_PAGE_SIZE. |
| 1612 | * | 1613 | * |
| @@ -3956,11 +3957,11 @@ EXPORT_SYMBOL_GPL(ring_buffer_consume); | |||
| 3956 | * expected. | 3957 | * expected. |
| 3957 | * | 3958 | * |
| 3958 | * After a sequence of ring_buffer_read_prepare calls, the user is | 3959 | * After a sequence of ring_buffer_read_prepare calls, the user is |
| 3959 | * expected to make at least one call to ring_buffer_prepare_sync. | 3960 | * expected to make at least one call to ring_buffer_read_prepare_sync. |
| 3960 | * Afterwards, ring_buffer_read_start is invoked to get things going | 3961 | * Afterwards, ring_buffer_read_start is invoked to get things going |
| 3961 | * for real. | 3962 | * for real. |
| 3962 | * | 3963 | * |
| 3963 | * This overall must be paired with ring_buffer_finish. | 3964 | * This overall must be paired with ring_buffer_read_finish. |
| 3964 | */ | 3965 | */ |
| 3965 | struct ring_buffer_iter * | 3966 | struct ring_buffer_iter * |
| 3966 | ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu) | 3967 | ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu) |
| @@ -4009,7 +4010,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync); | |||
| 4009 | * an intervening ring_buffer_read_prepare_sync must have been | 4010 | * an intervening ring_buffer_read_prepare_sync must have been |
| 4010 | * performed. | 4011 | * performed. |
| 4011 | * | 4012 | * |
| 4012 | * Must be paired with ring_buffer_finish. | 4013 | * Must be paired with ring_buffer_read_finish. |
| 4013 | */ | 4014 | */ |
| 4014 | void | 4015 | void |
| 4015 | ring_buffer_read_start(struct ring_buffer_iter *iter) | 4016 | ring_buffer_read_start(struct ring_buffer_iter *iter) |
| @@ -4031,7 +4032,7 @@ ring_buffer_read_start(struct ring_buffer_iter *iter) | |||
| 4031 | EXPORT_SYMBOL_GPL(ring_buffer_read_start); | 4032 | EXPORT_SYMBOL_GPL(ring_buffer_read_start); |
| 4032 | 4033 | ||
| 4033 | /** | 4034 | /** |
| 4034 | * ring_buffer_finish - finish reading the iterator of the buffer | 4035 | * ring_buffer_read_finish - finish reading the iterator of the buffer |
| 4035 | * @iter: The iterator retrieved by ring_buffer_start | 4036 | * @iter: The iterator retrieved by ring_buffer_start |
| 4036 | * | 4037 | * |
| 4037 | * This re-enables the recording to the buffer, and frees the | 4038 | * This re-enables the recording to the buffer, and frees the |
| @@ -4346,6 +4347,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); | |||
| 4346 | /** | 4347 | /** |
| 4347 | * ring_buffer_alloc_read_page - allocate a page to read from buffer | 4348 | * ring_buffer_alloc_read_page - allocate a page to read from buffer |
| 4348 | * @buffer: the buffer to allocate for. | 4349 | * @buffer: the buffer to allocate for. |
| 4350 | * @cpu: the cpu buffer to allocate. | ||
| 4349 | * | 4351 | * |
| 4350 | * This function is used in conjunction with ring_buffer_read_page. | 4352 | * This function is used in conjunction with ring_buffer_read_page. |
| 4351 | * When reading a full page from the ring buffer, these functions | 4353 | * When reading a full page from the ring buffer, these functions |
| @@ -4403,7 +4405,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); | |||
| 4403 | * to swap with a page in the ring buffer. | 4405 | * to swap with a page in the ring buffer. |
| 4404 | * | 4406 | * |
| 4405 | * for example: | 4407 | * for example: |
| 4406 | * rpage = ring_buffer_alloc_read_page(buffer); | 4408 | * rpage = ring_buffer_alloc_read_page(buffer, cpu); |
| 4407 | * if (!rpage) | 4409 | * if (!rpage) |
| 4408 | * return error; | 4410 | * return error; |
| 4409 | * ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0); | 4411 | * ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0); |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e71a8be4a6ee..3f2477713aca 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -115,6 +115,9 @@ cpumask_var_t __read_mostly tracing_buffer_mask; | |||
| 115 | 115 | ||
| 116 | enum ftrace_dump_mode ftrace_dump_on_oops; | 116 | enum ftrace_dump_mode ftrace_dump_on_oops; |
| 117 | 117 | ||
| 118 | /* When set, tracing will stop when a WARN*() is hit */ | ||
| 119 | int __disable_trace_on_warning; | ||
| 120 | |||
| 118 | static int tracing_set_tracer(const char *buf); | 121 | static int tracing_set_tracer(const char *buf); |
| 119 | 122 | ||
| 120 | #define MAX_TRACER_SIZE 100 | 123 | #define MAX_TRACER_SIZE 100 |
| @@ -149,6 +152,13 @@ static int __init set_ftrace_dump_on_oops(char *str) | |||
| 149 | } | 152 | } |
| 150 | __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); | 153 | __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); |
| 151 | 154 | ||
| 155 | static int __init stop_trace_on_warning(char *str) | ||
| 156 | { | ||
| 157 | __disable_trace_on_warning = 1; | ||
| 158 | return 1; | ||
| 159 | } | ||
| 160 | __setup("traceoff_on_warning=", stop_trace_on_warning); | ||
| 161 | |||
| 152 | static int __init boot_alloc_snapshot(char *str) | 162 | static int __init boot_alloc_snapshot(char *str) |
| 153 | { | 163 | { |
| 154 | allocate_snapshot = true; | 164 | allocate_snapshot = true; |
| @@ -170,6 +180,7 @@ static int __init set_trace_boot_options(char *str) | |||
| 170 | } | 180 | } |
| 171 | __setup("trace_options=", set_trace_boot_options); | 181 | __setup("trace_options=", set_trace_boot_options); |
| 172 | 182 | ||
| 183 | |||
| 173 | unsigned long long ns2usecs(cycle_t nsec) | 184 | unsigned long long ns2usecs(cycle_t nsec) |
| 174 | { | 185 | { |
| 175 | nsec += 500; | 186 | nsec += 500; |
| @@ -193,6 +204,37 @@ static struct trace_array global_trace; | |||
| 193 | 204 | ||
| 194 | LIST_HEAD(ftrace_trace_arrays); | 205 | LIST_HEAD(ftrace_trace_arrays); |
| 195 | 206 | ||
| 207 | int trace_array_get(struct trace_array *this_tr) | ||
| 208 | { | ||
| 209 | struct trace_array *tr; | ||
| 210 | int ret = -ENODEV; | ||
| 211 | |||
| 212 | mutex_lock(&trace_types_lock); | ||
| 213 | list_for_each_entry(tr, &ftrace_trace_arrays, list) { | ||
| 214 | if (tr == this_tr) { | ||
| 215 | tr->ref++; | ||
| 216 | ret = 0; | ||
| 217 | break; | ||
| 218 | } | ||
| 219 | } | ||
| 220 | mutex_unlock(&trace_types_lock); | ||
| 221 | |||
| 222 | return ret; | ||
| 223 | } | ||
| 224 | |||
| 225 | static void __trace_array_put(struct trace_array *this_tr) | ||
| 226 | { | ||
| 227 | WARN_ON(!this_tr->ref); | ||
| 228 | this_tr->ref--; | ||
| 229 | } | ||
| 230 | |||
| 231 | void trace_array_put(struct trace_array *this_tr) | ||
| 232 | { | ||
| 233 | mutex_lock(&trace_types_lock); | ||
| 234 | __trace_array_put(this_tr); | ||
| 235 | mutex_unlock(&trace_types_lock); | ||
| 236 | } | ||
| 237 | |||
| 196 | int filter_current_check_discard(struct ring_buffer *buffer, | 238 | int filter_current_check_discard(struct ring_buffer *buffer, |
| 197 | struct ftrace_event_call *call, void *rec, | 239 | struct ftrace_event_call *call, void *rec, |
| 198 | struct ring_buffer_event *event) | 240 | struct ring_buffer_event *event) |
| @@ -215,9 +257,24 @@ cycle_t ftrace_now(int cpu) | |||
| 215 | return ts; | 257 | return ts; |
| 216 | } | 258 | } |
| 217 | 259 | ||
| 260 | /** | ||
| 261 | * tracing_is_enabled - Show if global_trace has been disabled | ||
| 262 | * | ||
| 263 | * Shows if the global trace has been enabled or not. It uses the | ||
| 264 | * mirror flag "buffer_disabled" to be used in fast paths such as for | ||
| 265 | * the irqsoff tracer. But it may be inaccurate due to races. If you | ||
| 266 | * need to know the accurate state, use tracing_is_on() which is a little | ||
| 267 | * slower, but accurate. | ||
| 268 | */ | ||
| 218 | int tracing_is_enabled(void) | 269 | int tracing_is_enabled(void) |
| 219 | { | 270 | { |
| 220 | return tracing_is_on(); | 271 | /* |
| 272 | * For quick access (irqsoff uses this in fast path), just | ||
| 273 | * return the mirror variable of the state of the ring buffer. | ||
| 274 | * It's a little racy, but we don't really care. | ||
| 275 | */ | ||
| 276 | smp_rmb(); | ||
| 277 | return !global_trace.buffer_disabled; | ||
| 221 | } | 278 | } |
| 222 | 279 | ||
| 223 | /* | 280 | /* |
| @@ -240,7 +297,7 @@ static struct tracer *trace_types __read_mostly; | |||
| 240 | /* | 297 | /* |
| 241 | * trace_types_lock is used to protect the trace_types list. | 298 | * trace_types_lock is used to protect the trace_types list. |
| 242 | */ | 299 | */ |
| 243 | static DEFINE_MUTEX(trace_types_lock); | 300 | DEFINE_MUTEX(trace_types_lock); |
| 244 | 301 | ||
| 245 | /* | 302 | /* |
| 246 | * serialize the access of the ring buffer | 303 | * serialize the access of the ring buffer |
| @@ -330,6 +387,23 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | | |||
| 330 | TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | | 387 | TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | |
| 331 | TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION; | 388 | TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION; |
| 332 | 389 | ||
| 390 | static void tracer_tracing_on(struct trace_array *tr) | ||
| 391 | { | ||
| 392 | if (tr->trace_buffer.buffer) | ||
| 393 | ring_buffer_record_on(tr->trace_buffer.buffer); | ||
| 394 | /* | ||
| 395 | * This flag is looked at when buffers haven't been allocated | ||
| 396 | * yet, or by some tracers (like irqsoff), that just want to | ||
| 397 | * know if the ring buffer has been disabled, but it can handle | ||
| 398 | * races of where it gets disabled but we still do a record. | ||
| 399 | * As the check is in the fast path of the tracers, it is more | ||
| 400 | * important to be fast than accurate. | ||
| 401 | */ | ||
| 402 | tr->buffer_disabled = 0; | ||
| 403 | /* Make the flag seen by readers */ | ||
| 404 | smp_wmb(); | ||
| 405 | } | ||
| 406 | |||
| 333 | /** | 407 | /** |
| 334 | * tracing_on - enable tracing buffers | 408 | * tracing_on - enable tracing buffers |
| 335 | * | 409 | * |
| @@ -338,15 +412,7 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | | |||
| 338 | */ | 412 | */ |
| 339 | void tracing_on(void) | 413 | void tracing_on(void) |
| 340 | { | 414 | { |
| 341 | if (global_trace.trace_buffer.buffer) | 415 | tracer_tracing_on(&global_trace); |
| 342 | ring_buffer_record_on(global_trace.trace_buffer.buffer); | ||
| 343 | /* | ||
| 344 | * This flag is only looked at when buffers haven't been | ||
| 345 | * allocated yet. We don't really care about the race | ||
| 346 | * between setting this flag and actually turning | ||
| 347 | * on the buffer. | ||
| 348 | */ | ||
| 349 | global_trace.buffer_disabled = 0; | ||
| 350 | } | 416 | } |
| 351 | EXPORT_SYMBOL_GPL(tracing_on); | 417 | EXPORT_SYMBOL_GPL(tracing_on); |
| 352 | 418 | ||
| @@ -540,6 +606,23 @@ void tracing_snapshot_alloc(void) | |||
| 540 | EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); | 606 | EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); |
| 541 | #endif /* CONFIG_TRACER_SNAPSHOT */ | 607 | #endif /* CONFIG_TRACER_SNAPSHOT */ |
| 542 | 608 | ||
| 609 | static void tracer_tracing_off(struct trace_array *tr) | ||
| 610 | { | ||
| 611 | if (tr->trace_buffer.buffer) | ||
| 612 | ring_buffer_record_off(tr->trace_buffer.buffer); | ||
| 613 | /* | ||
| 614 | * This flag is looked at when buffers haven't been allocated | ||
| 615 | * yet, or by some tracers (like irqsoff), that just want to | ||
| 616 | * know if the ring buffer has been disabled, but it can handle | ||
| 617 | * races of where it gets disabled but we still do a record. | ||
| 618 | * As the check is in the fast path of the tracers, it is more | ||
| 619 | * important to be fast than accurate. | ||
| 620 | */ | ||
| 621 | tr->buffer_disabled = 1; | ||
| 622 | /* Make the flag seen by readers */ | ||
| 623 | smp_wmb(); | ||
| 624 | } | ||
| 625 | |||
| 543 | /** | 626 | /** |
| 544 | * tracing_off - turn off tracing buffers | 627 | * tracing_off - turn off tracing buffers |
| 545 | * | 628 | * |
| @@ -550,26 +633,35 @@ EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); | |||
| 550 | */ | 633 | */ |
| 551 | void tracing_off(void) | 634 | void tracing_off(void) |
| 552 | { | 635 | { |
| 553 | if (global_trace.trace_buffer.buffer) | 636 | tracer_tracing_off(&global_trace); |
| 554 | ring_buffer_record_off(global_trace.trace_buffer.buffer); | ||
| 555 | /* | ||
| 556 | * This flag is only looked at when buffers haven't been | ||
| 557 | * allocated yet. We don't really care about the race | ||
| 558 | * between setting this flag and actually turning | ||
| 559 | * on the buffer. | ||
| 560 | */ | ||
| 561 | global_trace.buffer_disabled = 1; | ||
| 562 | } | 637 | } |
| 563 | EXPORT_SYMBOL_GPL(tracing_off); | 638 | EXPORT_SYMBOL_GPL(tracing_off); |
| 564 | 639 | ||
| 640 | void disable_trace_on_warning(void) | ||
| 641 | { | ||
| 642 | if (__disable_trace_on_warning) | ||
| 643 | tracing_off(); | ||
| 644 | } | ||
| 645 | |||
| 646 | /** | ||
| 647 | * tracer_tracing_is_on - show real state of ring buffer enabled | ||
| 648 | * @tr : the trace array to know if ring buffer is enabled | ||
| 649 | * | ||
| 650 | * Shows real state of the ring buffer if it is enabled or not. | ||
| 651 | */ | ||
| 652 | static int tracer_tracing_is_on(struct trace_array *tr) | ||
| 653 | { | ||
| 654 | if (tr->trace_buffer.buffer) | ||
| 655 | return ring_buffer_record_is_on(tr->trace_buffer.buffer); | ||
| 656 | return !tr->buffer_disabled; | ||
| 657 | } | ||
| 658 | |||
| 565 | /** | 659 | /** |
| 566 | * tracing_is_on - show state of ring buffers enabled | 660 | * tracing_is_on - show state of ring buffers enabled |
| 567 | */ | 661 | */ |
| 568 | int tracing_is_on(void) | 662 | int tracing_is_on(void) |
| 569 | { | 663 | { |
| 570 | if (global_trace.trace_buffer.buffer) | 664 | return tracer_tracing_is_on(&global_trace); |
| 571 | return ring_buffer_record_is_on(global_trace.trace_buffer.buffer); | ||
| 572 | return !global_trace.buffer_disabled; | ||
| 573 | } | 665 | } |
| 574 | EXPORT_SYMBOL_GPL(tracing_is_on); | 666 | EXPORT_SYMBOL_GPL(tracing_is_on); |
| 575 | 667 | ||
| @@ -1543,15 +1635,6 @@ trace_function(struct trace_array *tr, | |||
| 1543 | __buffer_unlock_commit(buffer, event); | 1635 | __buffer_unlock_commit(buffer, event); |
| 1544 | } | 1636 | } |
| 1545 | 1637 | ||
| 1546 | void | ||
| 1547 | ftrace(struct trace_array *tr, struct trace_array_cpu *data, | ||
| 1548 | unsigned long ip, unsigned long parent_ip, unsigned long flags, | ||
| 1549 | int pc) | ||
| 1550 | { | ||
| 1551 | if (likely(!atomic_read(&data->disabled))) | ||
| 1552 | trace_function(tr, ip, parent_ip, flags, pc); | ||
| 1553 | } | ||
| 1554 | |||
| 1555 | #ifdef CONFIG_STACKTRACE | 1638 | #ifdef CONFIG_STACKTRACE |
| 1556 | 1639 | ||
| 1557 | #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long)) | 1640 | #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long)) |
| @@ -2768,10 +2851,9 @@ static const struct seq_operations tracer_seq_ops = { | |||
| 2768 | }; | 2851 | }; |
| 2769 | 2852 | ||
| 2770 | static struct trace_iterator * | 2853 | static struct trace_iterator * |
| 2771 | __tracing_open(struct inode *inode, struct file *file, bool snapshot) | 2854 | __tracing_open(struct trace_array *tr, struct trace_cpu *tc, |
| 2855 | struct inode *inode, struct file *file, bool snapshot) | ||
| 2772 | { | 2856 | { |
| 2773 | struct trace_cpu *tc = inode->i_private; | ||
| 2774 | struct trace_array *tr = tc->tr; | ||
| 2775 | struct trace_iterator *iter; | 2857 | struct trace_iterator *iter; |
| 2776 | int cpu; | 2858 | int cpu; |
| 2777 | 2859 | ||
| @@ -2850,8 +2932,6 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) | |||
| 2850 | tracing_iter_reset(iter, cpu); | 2932 | tracing_iter_reset(iter, cpu); |
| 2851 | } | 2933 | } |
| 2852 | 2934 | ||
| 2853 | tr->ref++; | ||
| 2854 | |||
| 2855 | mutex_unlock(&trace_types_lock); | 2935 | mutex_unlock(&trace_types_lock); |
| 2856 | 2936 | ||
| 2857 | return iter; | 2937 | return iter; |
| @@ -2874,6 +2954,43 @@ int tracing_open_generic(struct inode *inode, struct file *filp) | |||
| 2874 | return 0; | 2954 | return 0; |
| 2875 | } | 2955 | } |
| 2876 | 2956 | ||
| 2957 | /* | ||
| 2958 | * Open and update trace_array ref count. | ||
| 2959 | * Must have the current trace_array passed to it. | ||
| 2960 | */ | ||
| 2961 | static int tracing_open_generic_tr(struct inode *inode, struct file *filp) | ||
| 2962 | { | ||
| 2963 | struct trace_array *tr = inode->i_private; | ||
| 2964 | |||
| 2965 | if (tracing_disabled) | ||
| 2966 | return -ENODEV; | ||
| 2967 | |||
| 2968 | if (trace_array_get(tr) < 0) | ||
| 2969 | return -ENODEV; | ||
| 2970 | |||
| 2971 | filp->private_data = inode->i_private; | ||
| 2972 | |||
| 2973 | return 0; | ||
| 2974 | |||
| 2975 | } | ||
| 2976 | |||
| 2977 | static int tracing_open_generic_tc(struct inode *inode, struct file *filp) | ||
| 2978 | { | ||
| 2979 | struct trace_cpu *tc = inode->i_private; | ||
| 2980 | struct trace_array *tr = tc->tr; | ||
| 2981 | |||
| 2982 | if (tracing_disabled) | ||
| 2983 | return -ENODEV; | ||
| 2984 | |||
| 2985 | if (trace_array_get(tr) < 0) | ||
| 2986 | return -ENODEV; | ||
| 2987 | |||
| 2988 | filp->private_data = inode->i_private; | ||
| 2989 | |||
| 2990 | return 0; | ||
| 2991 | |||
| 2992 | } | ||
| 2993 | |||
| 2877 | static int tracing_release(struct inode *inode, struct file *file) | 2994 | static int tracing_release(struct inode *inode, struct file *file) |
| 2878 | { | 2995 | { |
| 2879 | struct seq_file *m = file->private_data; | 2996 | struct seq_file *m = file->private_data; |
| @@ -2881,17 +2998,19 @@ static int tracing_release(struct inode *inode, struct file *file) | |||
| 2881 | struct trace_array *tr; | 2998 | struct trace_array *tr; |
| 2882 | int cpu; | 2999 | int cpu; |
| 2883 | 3000 | ||
| 2884 | if (!(file->f_mode & FMODE_READ)) | 3001 | /* Writes do not use seq_file, need to grab tr from inode */ |
| 3002 | if (!(file->f_mode & FMODE_READ)) { | ||
| 3003 | struct trace_cpu *tc = inode->i_private; | ||
| 3004 | |||
| 3005 | trace_array_put(tc->tr); | ||
| 2885 | return 0; | 3006 | return 0; |
| 3007 | } | ||
| 2886 | 3008 | ||
| 2887 | iter = m->private; | 3009 | iter = m->private; |
| 2888 | tr = iter->tr; | 3010 | tr = iter->tr; |
| 2889 | 3011 | ||
| 2890 | mutex_lock(&trace_types_lock); | 3012 | mutex_lock(&trace_types_lock); |
| 2891 | 3013 | ||
| 2892 | WARN_ON(!tr->ref); | ||
| 2893 | tr->ref--; | ||
| 2894 | |||
| 2895 | for_each_tracing_cpu(cpu) { | 3014 | for_each_tracing_cpu(cpu) { |
| 2896 | if (iter->buffer_iter[cpu]) | 3015 | if (iter->buffer_iter[cpu]) |
| 2897 | ring_buffer_read_finish(iter->buffer_iter[cpu]); | 3016 | ring_buffer_read_finish(iter->buffer_iter[cpu]); |
| @@ -2903,6 +3022,9 @@ static int tracing_release(struct inode *inode, struct file *file) | |||
| 2903 | if (!iter->snapshot) | 3022 | if (!iter->snapshot) |
| 2904 | /* reenable tracing if it was previously enabled */ | 3023 | /* reenable tracing if it was previously enabled */ |
| 2905 | tracing_start_tr(tr); | 3024 | tracing_start_tr(tr); |
| 3025 | |||
| 3026 | __trace_array_put(tr); | ||
| 3027 | |||
| 2906 | mutex_unlock(&trace_types_lock); | 3028 | mutex_unlock(&trace_types_lock); |
| 2907 | 3029 | ||
| 2908 | mutex_destroy(&iter->mutex); | 3030 | mutex_destroy(&iter->mutex); |
| @@ -2910,20 +3032,49 @@ static int tracing_release(struct inode *inode, struct file *file) | |||
| 2910 | kfree(iter->trace); | 3032 | kfree(iter->trace); |
| 2911 | kfree(iter->buffer_iter); | 3033 | kfree(iter->buffer_iter); |
| 2912 | seq_release_private(inode, file); | 3034 | seq_release_private(inode, file); |
| 3035 | |||
| 2913 | return 0; | 3036 | return 0; |
| 2914 | } | 3037 | } |
| 2915 | 3038 | ||
| 3039 | static int tracing_release_generic_tr(struct inode *inode, struct file *file) | ||
| 3040 | { | ||
| 3041 | struct trace_array *tr = inode->i_private; | ||
| 3042 | |||
| 3043 | trace_array_put(tr); | ||
| 3044 | return 0; | ||
| 3045 | } | ||
| 3046 | |||
| 3047 | static int tracing_release_generic_tc(struct inode *inode, struct file *file) | ||
| 3048 | { | ||
| 3049 | struct trace_cpu *tc = inode->i_private; | ||
| 3050 | struct trace_array *tr = tc->tr; | ||
| 3051 | |||
| 3052 | trace_array_put(tr); | ||
| 3053 | return 0; | ||
| 3054 | } | ||
| 3055 | |||
| 3056 | static int tracing_single_release_tr(struct inode *inode, struct file *file) | ||
| 3057 | { | ||
| 3058 | struct trace_array *tr = inode->i_private; | ||
| 3059 | |||
| 3060 | trace_array_put(tr); | ||
| 3061 | |||
| 3062 | return single_release(inode, file); | ||
| 3063 | } | ||
| 3064 | |||
| 2916 | static int tracing_open(struct inode *inode, struct file *file) | 3065 | static int tracing_open(struct inode *inode, struct file *file) |
| 2917 | { | 3066 | { |
| 3067 | struct trace_cpu *tc = inode->i_private; | ||
| 3068 | struct trace_array *tr = tc->tr; | ||
| 2918 | struct trace_iterator *iter; | 3069 | struct trace_iterator *iter; |
| 2919 | int ret = 0; | 3070 | int ret = 0; |
| 2920 | 3071 | ||
| 3072 | if (trace_array_get(tr) < 0) | ||
| 3073 | return -ENODEV; | ||
| 3074 | |||
| 2921 | /* If this file was open for write, then erase contents */ | 3075 | /* If this file was open for write, then erase contents */ |
| 2922 | if ((file->f_mode & FMODE_WRITE) && | 3076 | if ((file->f_mode & FMODE_WRITE) && |
| 2923 | (file->f_flags & O_TRUNC)) { | 3077 | (file->f_flags & O_TRUNC)) { |
| 2924 | struct trace_cpu *tc = inode->i_private; | ||
| 2925 | struct trace_array *tr = tc->tr; | ||
| 2926 | |||
| 2927 | if (tc->cpu == RING_BUFFER_ALL_CPUS) | 3078 | if (tc->cpu == RING_BUFFER_ALL_CPUS) |
| 2928 | tracing_reset_online_cpus(&tr->trace_buffer); | 3079 | tracing_reset_online_cpus(&tr->trace_buffer); |
| 2929 | else | 3080 | else |
| @@ -2931,12 +3082,16 @@ static int tracing_open(struct inode *inode, struct file *file) | |||
| 2931 | } | 3082 | } |
| 2932 | 3083 | ||
| 2933 | if (file->f_mode & FMODE_READ) { | 3084 | if (file->f_mode & FMODE_READ) { |
| 2934 | iter = __tracing_open(inode, file, false); | 3085 | iter = __tracing_open(tr, tc, inode, file, false); |
| 2935 | if (IS_ERR(iter)) | 3086 | if (IS_ERR(iter)) |
| 2936 | ret = PTR_ERR(iter); | 3087 | ret = PTR_ERR(iter); |
| 2937 | else if (trace_flags & TRACE_ITER_LATENCY_FMT) | 3088 | else if (trace_flags & TRACE_ITER_LATENCY_FMT) |
| 2938 | iter->iter_flags |= TRACE_FILE_LAT_FMT; | 3089 | iter->iter_flags |= TRACE_FILE_LAT_FMT; |
| 2939 | } | 3090 | } |
| 3091 | |||
| 3092 | if (ret < 0) | ||
| 3093 | trace_array_put(tr); | ||
| 3094 | |||
| 2940 | return ret; | 3095 | return ret; |
| 2941 | } | 3096 | } |
| 2942 | 3097 | ||
| @@ -3293,17 +3448,27 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, | |||
| 3293 | 3448 | ||
| 3294 | static int tracing_trace_options_open(struct inode *inode, struct file *file) | 3449 | static int tracing_trace_options_open(struct inode *inode, struct file *file) |
| 3295 | { | 3450 | { |
| 3451 | struct trace_array *tr = inode->i_private; | ||
| 3452 | int ret; | ||
| 3453 | |||
| 3296 | if (tracing_disabled) | 3454 | if (tracing_disabled) |
| 3297 | return -ENODEV; | 3455 | return -ENODEV; |
| 3298 | 3456 | ||
| 3299 | return single_open(file, tracing_trace_options_show, inode->i_private); | 3457 | if (trace_array_get(tr) < 0) |
| 3458 | return -ENODEV; | ||
| 3459 | |||
| 3460 | ret = single_open(file, tracing_trace_options_show, inode->i_private); | ||
| 3461 | if (ret < 0) | ||
| 3462 | trace_array_put(tr); | ||
| 3463 | |||
| 3464 | return ret; | ||
| 3300 | } | 3465 | } |
| 3301 | 3466 | ||
| 3302 | static const struct file_operations tracing_iter_fops = { | 3467 | static const struct file_operations tracing_iter_fops = { |
| 3303 | .open = tracing_trace_options_open, | 3468 | .open = tracing_trace_options_open, |
| 3304 | .read = seq_read, | 3469 | .read = seq_read, |
| 3305 | .llseek = seq_lseek, | 3470 | .llseek = seq_lseek, |
| 3306 | .release = single_release, | 3471 | .release = tracing_single_release_tr, |
| 3307 | .write = tracing_trace_options_write, | 3472 | .write = tracing_trace_options_write, |
| 3308 | }; | 3473 | }; |
| 3309 | 3474 | ||
| @@ -3379,14 +3544,14 @@ static const char readme_msg[] = | |||
| 3379 | "\n snapshot\t\t- Like 'trace' but shows the content of the static snapshot buffer\n" | 3544 | "\n snapshot\t\t- Like 'trace' but shows the content of the static snapshot buffer\n" |
| 3380 | "\t\t\t Read the contents for more information\n" | 3545 | "\t\t\t Read the contents for more information\n" |
| 3381 | #endif | 3546 | #endif |
| 3382 | #ifdef CONFIG_STACKTRACE | 3547 | #ifdef CONFIG_STACK_TRACER |
| 3383 | " stack_trace\t\t- Shows the max stack trace when active\n" | 3548 | " stack_trace\t\t- Shows the max stack trace when active\n" |
| 3384 | " stack_max_size\t- Shows current max stack size that was traced\n" | 3549 | " stack_max_size\t- Shows current max stack size that was traced\n" |
| 3385 | "\t\t\t Write into this file to reset the max size (trigger a new trace)\n" | 3550 | "\t\t\t Write into this file to reset the max size (trigger a new trace)\n" |
| 3386 | #ifdef CONFIG_DYNAMIC_FTRACE | 3551 | #ifdef CONFIG_DYNAMIC_FTRACE |
| 3387 | " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace traces\n" | 3552 | " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace traces\n" |
| 3388 | #endif | 3553 | #endif |
| 3389 | #endif /* CONFIG_STACKTRACE */ | 3554 | #endif /* CONFIG_STACK_TRACER */ |
| 3390 | ; | 3555 | ; |
| 3391 | 3556 | ||
| 3392 | static ssize_t | 3557 | static ssize_t |
| @@ -3791,12 +3956,16 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) | |||
| 3791 | if (tracing_disabled) | 3956 | if (tracing_disabled) |
| 3792 | return -ENODEV; | 3957 | return -ENODEV; |
| 3793 | 3958 | ||
| 3959 | if (trace_array_get(tr) < 0) | ||
| 3960 | return -ENODEV; | ||
| 3961 | |||
| 3794 | mutex_lock(&trace_types_lock); | 3962 | mutex_lock(&trace_types_lock); |
| 3795 | 3963 | ||
| 3796 | /* create a buffer to store the information to pass to userspace */ | 3964 | /* create a buffer to store the information to pass to userspace */ |
| 3797 | iter = kzalloc(sizeof(*iter), GFP_KERNEL); | 3965 | iter = kzalloc(sizeof(*iter), GFP_KERNEL); |
| 3798 | if (!iter) { | 3966 | if (!iter) { |
| 3799 | ret = -ENOMEM; | 3967 | ret = -ENOMEM; |
| 3968 | __trace_array_put(tr); | ||
| 3800 | goto out; | 3969 | goto out; |
| 3801 | } | 3970 | } |
| 3802 | 3971 | ||
| @@ -3843,6 +4012,7 @@ out: | |||
| 3843 | fail: | 4012 | fail: |
| 3844 | kfree(iter->trace); | 4013 | kfree(iter->trace); |
| 3845 | kfree(iter); | 4014 | kfree(iter); |
| 4015 | __trace_array_put(tr); | ||
| 3846 | mutex_unlock(&trace_types_lock); | 4016 | mutex_unlock(&trace_types_lock); |
| 3847 | return ret; | 4017 | return ret; |
| 3848 | } | 4018 | } |
| @@ -3850,6 +4020,8 @@ fail: | |||
| 3850 | static int tracing_release_pipe(struct inode *inode, struct file *file) | 4020 | static int tracing_release_pipe(struct inode *inode, struct file *file) |
| 3851 | { | 4021 | { |
| 3852 | struct trace_iterator *iter = file->private_data; | 4022 | struct trace_iterator *iter = file->private_data; |
| 4023 | struct trace_cpu *tc = inode->i_private; | ||
| 4024 | struct trace_array *tr = tc->tr; | ||
| 3853 | 4025 | ||
| 3854 | mutex_lock(&trace_types_lock); | 4026 | mutex_lock(&trace_types_lock); |
| 3855 | 4027 | ||
| @@ -3863,6 +4035,8 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) | |||
| 3863 | kfree(iter->trace); | 4035 | kfree(iter->trace); |
| 3864 | kfree(iter); | 4036 | kfree(iter); |
| 3865 | 4037 | ||
| 4038 | trace_array_put(tr); | ||
| 4039 | |||
| 3866 | return 0; | 4040 | return 0; |
| 3867 | } | 4041 | } |
| 3868 | 4042 | ||
| @@ -3939,7 +4113,7 @@ static int tracing_wait_pipe(struct file *filp) | |||
| 3939 | * | 4113 | * |
| 3940 | * iter->pos will be 0 if we haven't read anything. | 4114 | * iter->pos will be 0 if we haven't read anything. |
| 3941 | */ | 4115 | */ |
| 3942 | if (!tracing_is_enabled() && iter->pos) | 4116 | if (!tracing_is_on() && iter->pos) |
| 3943 | break; | 4117 | break; |
| 3944 | } | 4118 | } |
| 3945 | 4119 | ||
| @@ -4320,6 +4494,8 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp) | |||
| 4320 | /* resize the ring buffer to 0 */ | 4494 | /* resize the ring buffer to 0 */ |
| 4321 | tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS); | 4495 | tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS); |
| 4322 | 4496 | ||
| 4497 | trace_array_put(tr); | ||
| 4498 | |||
| 4323 | return 0; | 4499 | return 0; |
| 4324 | } | 4500 | } |
| 4325 | 4501 | ||
| @@ -4328,6 +4504,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, | |||
| 4328 | size_t cnt, loff_t *fpos) | 4504 | size_t cnt, loff_t *fpos) |
| 4329 | { | 4505 | { |
| 4330 | unsigned long addr = (unsigned long)ubuf; | 4506 | unsigned long addr = (unsigned long)ubuf; |
| 4507 | struct trace_array *tr = filp->private_data; | ||
| 4331 | struct ring_buffer_event *event; | 4508 | struct ring_buffer_event *event; |
| 4332 | struct ring_buffer *buffer; | 4509 | struct ring_buffer *buffer; |
| 4333 | struct print_entry *entry; | 4510 | struct print_entry *entry; |
| @@ -4387,7 +4564,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, | |||
| 4387 | 4564 | ||
| 4388 | local_save_flags(irq_flags); | 4565 | local_save_flags(irq_flags); |
| 4389 | size = sizeof(*entry) + cnt + 2; /* possible \n added */ | 4566 | size = sizeof(*entry) + cnt + 2; /* possible \n added */ |
| 4390 | buffer = global_trace.trace_buffer.buffer; | 4567 | buffer = tr->trace_buffer.buffer; |
| 4391 | event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, | 4568 | event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, |
| 4392 | irq_flags, preempt_count()); | 4569 | irq_flags, preempt_count()); |
| 4393 | if (!event) { | 4570 | if (!event) { |
| @@ -4495,10 +4672,20 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, | |||
| 4495 | 4672 | ||
| 4496 | static int tracing_clock_open(struct inode *inode, struct file *file) | 4673 | static int tracing_clock_open(struct inode *inode, struct file *file) |
| 4497 | { | 4674 | { |
| 4675 | struct trace_array *tr = inode->i_private; | ||
| 4676 | int ret; | ||
| 4677 | |||
| 4498 | if (tracing_disabled) | 4678 | if (tracing_disabled) |
| 4499 | return -ENODEV; | 4679 | return -ENODEV; |
| 4500 | 4680 | ||
| 4501 | return single_open(file, tracing_clock_show, inode->i_private); | 4681 | if (trace_array_get(tr)) |
| 4682 | return -ENODEV; | ||
| 4683 | |||
| 4684 | ret = single_open(file, tracing_clock_show, inode->i_private); | ||
| 4685 | if (ret < 0) | ||
| 4686 | trace_array_put(tr); | ||
| 4687 | |||
| 4688 | return ret; | ||
| 4502 | } | 4689 | } |
| 4503 | 4690 | ||
| 4504 | struct ftrace_buffer_info { | 4691 | struct ftrace_buffer_info { |
| @@ -4511,30 +4698,40 @@ struct ftrace_buffer_info { | |||
| 4511 | static int tracing_snapshot_open(struct inode *inode, struct file *file) | 4698 | static int tracing_snapshot_open(struct inode *inode, struct file *file) |
| 4512 | { | 4699 | { |
| 4513 | struct trace_cpu *tc = inode->i_private; | 4700 | struct trace_cpu *tc = inode->i_private; |
| 4701 | struct trace_array *tr = tc->tr; | ||
| 4514 | struct trace_iterator *iter; | 4702 | struct trace_iterator *iter; |
| 4515 | struct seq_file *m; | 4703 | struct seq_file *m; |
| 4516 | int ret = 0; | 4704 | int ret = 0; |
| 4517 | 4705 | ||
| 4706 | if (trace_array_get(tr) < 0) | ||
| 4707 | return -ENODEV; | ||
| 4708 | |||
| 4518 | if (file->f_mode & FMODE_READ) { | 4709 | if (file->f_mode & FMODE_READ) { |
| 4519 | iter = __tracing_open(inode, file, true); | 4710 | iter = __tracing_open(tr, tc, inode, file, true); |
| 4520 | if (IS_ERR(iter)) | 4711 | if (IS_ERR(iter)) |
| 4521 | ret = PTR_ERR(iter); | 4712 | ret = PTR_ERR(iter); |
| 4522 | } else { | 4713 | } else { |
| 4523 | /* Writes still need the seq_file to hold the private data */ | 4714 | /* Writes still need the seq_file to hold the private data */ |
| 4715 | ret = -ENOMEM; | ||
| 4524 | m = kzalloc(sizeof(*m), GFP_KERNEL); | 4716 | m = kzalloc(sizeof(*m), GFP_KERNEL); |
| 4525 | if (!m) | 4717 | if (!m) |
| 4526 | return -ENOMEM; | 4718 | goto out; |
| 4527 | iter = kzalloc(sizeof(*iter), GFP_KERNEL); | 4719 | iter = kzalloc(sizeof(*iter), GFP_KERNEL); |
| 4528 | if (!iter) { | 4720 | if (!iter) { |
| 4529 | kfree(m); | 4721 | kfree(m); |
| 4530 | return -ENOMEM; | 4722 | goto out; |
| 4531 | } | 4723 | } |
| 4532 | iter->tr = tc->tr; | 4724 | ret = 0; |
| 4725 | |||
| 4726 | iter->tr = tr; | ||
| 4533 | iter->trace_buffer = &tc->tr->max_buffer; | 4727 | iter->trace_buffer = &tc->tr->max_buffer; |
| 4534 | iter->cpu_file = tc->cpu; | 4728 | iter->cpu_file = tc->cpu; |
| 4535 | m->private = iter; | 4729 | m->private = iter; |
| 4536 | file->private_data = m; | 4730 | file->private_data = m; |
| 4537 | } | 4731 | } |
| 4732 | out: | ||
| 4733 | if (ret < 0) | ||
| 4734 | trace_array_put(tr); | ||
| 4538 | 4735 | ||
| 4539 | return ret; | 4736 | return ret; |
| 4540 | } | 4737 | } |
| @@ -4616,9 +4813,12 @@ out: | |||
| 4616 | static int tracing_snapshot_release(struct inode *inode, struct file *file) | 4813 | static int tracing_snapshot_release(struct inode *inode, struct file *file) |
| 4617 | { | 4814 | { |
| 4618 | struct seq_file *m = file->private_data; | 4815 | struct seq_file *m = file->private_data; |
| 4816 | int ret; | ||
| 4817 | |||
| 4818 | ret = tracing_release(inode, file); | ||
| 4619 | 4819 | ||
| 4620 | if (file->f_mode & FMODE_READ) | 4820 | if (file->f_mode & FMODE_READ) |
| 4621 | return tracing_release(inode, file); | 4821 | return ret; |
| 4622 | 4822 | ||
| 4623 | /* If write only, the seq_file is just a stub */ | 4823 | /* If write only, the seq_file is just a stub */ |
| 4624 | if (m) | 4824 | if (m) |
| @@ -4684,34 +4884,38 @@ static const struct file_operations tracing_pipe_fops = { | |||
| 4684 | }; | 4884 | }; |
| 4685 | 4885 | ||
| 4686 | static const struct file_operations tracing_entries_fops = { | 4886 | static const struct file_operations tracing_entries_fops = { |
| 4687 | .open = tracing_open_generic, | 4887 | .open = tracing_open_generic_tc, |
| 4688 | .read = tracing_entries_read, | 4888 | .read = tracing_entries_read, |
| 4689 | .write = tracing_entries_write, | 4889 | .write = tracing_entries_write, |
| 4690 | .llseek = generic_file_llseek, | 4890 | .llseek = generic_file_llseek, |
| 4891 | .release = tracing_release_generic_tc, | ||
| 4691 | }; | 4892 | }; |
| 4692 | 4893 | ||
| 4693 | static const struct file_operations tracing_total_entries_fops = { | 4894 | static const struct file_operations tracing_total_entries_fops = { |
| 4694 | .open = tracing_open_generic, | 4895 | .open = tracing_open_generic_tr, |
| 4695 | .read = tracing_total_entries_read, | 4896 | .read = tracing_total_entries_read, |
| 4696 | .llseek = generic_file_llseek, | 4897 | .llseek = generic_file_llseek, |
| 4898 | .release = tracing_release_generic_tr, | ||
| 4697 | }; | 4899 | }; |
| 4698 | 4900 | ||
| 4699 | static const struct file_operations tracing_free_buffer_fops = { | 4901 | static const struct file_operations tracing_free_buffer_fops = { |
| 4902 | .open = tracing_open_generic_tr, | ||
| 4700 | .write = tracing_free_buffer_write, | 4903 | .write = tracing_free_buffer_write, |
| 4701 | .release = tracing_free_buffer_release, | 4904 | .release = tracing_free_buffer_release, |
| 4702 | }; | 4905 | }; |
| 4703 | 4906 | ||
| 4704 | static const struct file_operations tracing_mark_fops = { | 4907 | static const struct file_operations tracing_mark_fops = { |
| 4705 | .open = tracing_open_generic, | 4908 | .open = tracing_open_generic_tr, |
| 4706 | .write = tracing_mark_write, | 4909 | .write = tracing_mark_write, |
| 4707 | .llseek = generic_file_llseek, | 4910 | .llseek = generic_file_llseek, |
| 4911 | .release = tracing_release_generic_tr, | ||
| 4708 | }; | 4912 | }; |
| 4709 | 4913 | ||
| 4710 | static const struct file_operations trace_clock_fops = { | 4914 | static const struct file_operations trace_clock_fops = { |
| 4711 | .open = tracing_clock_open, | 4915 | .open = tracing_clock_open, |
| 4712 | .read = seq_read, | 4916 | .read = seq_read, |
| 4713 | .llseek = seq_lseek, | 4917 | .llseek = seq_lseek, |
| 4714 | .release = single_release, | 4918 | .release = tracing_single_release_tr, |
| 4715 | .write = tracing_clock_write, | 4919 | .write = tracing_clock_write, |
| 4716 | }; | 4920 | }; |
| 4717 | 4921 | ||
| @@ -4739,18 +4943,22 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) | |||
| 4739 | struct trace_cpu *tc = inode->i_private; | 4943 | struct trace_cpu *tc = inode->i_private; |
| 4740 | struct trace_array *tr = tc->tr; | 4944 | struct trace_array *tr = tc->tr; |
| 4741 | struct ftrace_buffer_info *info; | 4945 | struct ftrace_buffer_info *info; |
| 4946 | int ret; | ||
| 4742 | 4947 | ||
| 4743 | if (tracing_disabled) | 4948 | if (tracing_disabled) |
| 4744 | return -ENODEV; | 4949 | return -ENODEV; |
| 4745 | 4950 | ||
| 4951 | if (trace_array_get(tr) < 0) | ||
| 4952 | return -ENODEV; | ||
| 4953 | |||
| 4746 | info = kzalloc(sizeof(*info), GFP_KERNEL); | 4954 | info = kzalloc(sizeof(*info), GFP_KERNEL); |
| 4747 | if (!info) | 4955 | if (!info) { |
| 4956 | trace_array_put(tr); | ||
| 4748 | return -ENOMEM; | 4957 | return -ENOMEM; |
| 4958 | } | ||
| 4749 | 4959 | ||
| 4750 | mutex_lock(&trace_types_lock); | 4960 | mutex_lock(&trace_types_lock); |
| 4751 | 4961 | ||
| 4752 | tr->ref++; | ||
| 4753 | |||
| 4754 | info->iter.tr = tr; | 4962 | info->iter.tr = tr; |
| 4755 | info->iter.cpu_file = tc->cpu; | 4963 | info->iter.cpu_file = tc->cpu; |
| 4756 | info->iter.trace = tr->current_trace; | 4964 | info->iter.trace = tr->current_trace; |
| @@ -4763,7 +4971,11 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) | |||
| 4763 | 4971 | ||
| 4764 | mutex_unlock(&trace_types_lock); | 4972 | mutex_unlock(&trace_types_lock); |
| 4765 | 4973 | ||
| 4766 | return nonseekable_open(inode, filp); | 4974 | ret = nonseekable_open(inode, filp); |
| 4975 | if (ret < 0) | ||
| 4976 | trace_array_put(tr); | ||
| 4977 | |||
| 4978 | return ret; | ||
| 4767 | } | 4979 | } |
| 4768 | 4980 | ||
| 4769 | static unsigned int | 4981 | static unsigned int |
| @@ -4863,8 +5075,7 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) | |||
| 4863 | 5075 | ||
| 4864 | mutex_lock(&trace_types_lock); | 5076 | mutex_lock(&trace_types_lock); |
| 4865 | 5077 | ||
| 4866 | WARN_ON(!iter->tr->ref); | 5078 | __trace_array_put(iter->tr); |
| 4867 | iter->tr->ref--; | ||
| 4868 | 5079 | ||
| 4869 | if (info->spare) | 5080 | if (info->spare) |
| 4870 | ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare); | 5081 | ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare); |
| @@ -5126,9 +5337,10 @@ tracing_stats_read(struct file *filp, char __user *ubuf, | |||
| 5126 | } | 5337 | } |
| 5127 | 5338 | ||
| 5128 | static const struct file_operations tracing_stats_fops = { | 5339 | static const struct file_operations tracing_stats_fops = { |
| 5129 | .open = tracing_open_generic, | 5340 | .open = tracing_open_generic_tc, |
| 5130 | .read = tracing_stats_read, | 5341 | .read = tracing_stats_read, |
| 5131 | .llseek = generic_file_llseek, | 5342 | .llseek = generic_file_llseek, |
| 5343 | .release = tracing_release_generic_tc, | ||
| 5132 | }; | 5344 | }; |
| 5133 | 5345 | ||
| 5134 | #ifdef CONFIG_DYNAMIC_FTRACE | 5346 | #ifdef CONFIG_DYNAMIC_FTRACE |
| @@ -5612,15 +5824,10 @@ rb_simple_read(struct file *filp, char __user *ubuf, | |||
| 5612 | size_t cnt, loff_t *ppos) | 5824 | size_t cnt, loff_t *ppos) |
| 5613 | { | 5825 | { |
| 5614 | struct trace_array *tr = filp->private_data; | 5826 | struct trace_array *tr = filp->private_data; |
| 5615 | struct ring_buffer *buffer = tr->trace_buffer.buffer; | ||
| 5616 | char buf[64]; | 5827 | char buf[64]; |
| 5617 | int r; | 5828 | int r; |
| 5618 | 5829 | ||
| 5619 | if (buffer) | 5830 | r = tracer_tracing_is_on(tr); |
| 5620 | r = ring_buffer_record_is_on(buffer); | ||
| 5621 | else | ||
| 5622 | r = 0; | ||
| 5623 | |||
| 5624 | r = sprintf(buf, "%d\n", r); | 5831 | r = sprintf(buf, "%d\n", r); |
| 5625 | 5832 | ||
| 5626 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | 5833 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); |
| @@ -5642,11 +5849,11 @@ rb_simple_write(struct file *filp, const char __user *ubuf, | |||
| 5642 | if (buffer) { | 5849 | if (buffer) { |
| 5643 | mutex_lock(&trace_types_lock); | 5850 | mutex_lock(&trace_types_lock); |
| 5644 | if (val) { | 5851 | if (val) { |
| 5645 | ring_buffer_record_on(buffer); | 5852 | tracer_tracing_on(tr); |
| 5646 | if (tr->current_trace->start) | 5853 | if (tr->current_trace->start) |
| 5647 | tr->current_trace->start(tr); | 5854 | tr->current_trace->start(tr); |
| 5648 | } else { | 5855 | } else { |
| 5649 | ring_buffer_record_off(buffer); | 5856 | tracer_tracing_off(tr); |
| 5650 | if (tr->current_trace->stop) | 5857 | if (tr->current_trace->stop) |
| 5651 | tr->current_trace->stop(tr); | 5858 | tr->current_trace->stop(tr); |
| 5652 | } | 5859 | } |
| @@ -5659,9 +5866,10 @@ rb_simple_write(struct file *filp, const char __user *ubuf, | |||
| 5659 | } | 5866 | } |
| 5660 | 5867 | ||
| 5661 | static const struct file_operations rb_simple_fops = { | 5868 | static const struct file_operations rb_simple_fops = { |
| 5662 | .open = tracing_open_generic, | 5869 | .open = tracing_open_generic_tr, |
| 5663 | .read = rb_simple_read, | 5870 | .read = rb_simple_read, |
| 5664 | .write = rb_simple_write, | 5871 | .write = rb_simple_write, |
| 5872 | .release = tracing_release_generic_tr, | ||
| 5665 | .llseek = default_llseek, | 5873 | .llseek = default_llseek, |
| 5666 | }; | 5874 | }; |
| 5667 | 5875 | ||
| @@ -5775,8 +5983,10 @@ static int new_instance_create(const char *name) | |||
| 5775 | goto out_free_tr; | 5983 | goto out_free_tr; |
| 5776 | 5984 | ||
| 5777 | ret = event_trace_add_tracer(tr->dir, tr); | 5985 | ret = event_trace_add_tracer(tr->dir, tr); |
| 5778 | if (ret) | 5986 | if (ret) { |
| 5987 | debugfs_remove_recursive(tr->dir); | ||
| 5779 | goto out_free_tr; | 5988 | goto out_free_tr; |
| 5989 | } | ||
| 5780 | 5990 | ||
| 5781 | init_tracer_debugfs(tr, tr->dir); | 5991 | init_tracer_debugfs(tr, tr->dir); |
| 5782 | 5992 | ||
| @@ -5933,7 +6143,7 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer) | |||
| 5933 | trace_create_file("buffer_total_size_kb", 0444, d_tracer, | 6143 | trace_create_file("buffer_total_size_kb", 0444, d_tracer, |
| 5934 | tr, &tracing_total_entries_fops); | 6144 | tr, &tracing_total_entries_fops); |
| 5935 | 6145 | ||
| 5936 | trace_create_file("free_buffer", 0644, d_tracer, | 6146 | trace_create_file("free_buffer", 0200, d_tracer, |
| 5937 | tr, &tracing_free_buffer_fops); | 6147 | tr, &tracing_free_buffer_fops); |
| 5938 | 6148 | ||
| 5939 | trace_create_file("trace_marker", 0220, d_tracer, | 6149 | trace_create_file("trace_marker", 0220, d_tracer, |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 20572ed88c5c..e7d643b8a907 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
| @@ -214,7 +214,6 @@ struct trace_array { | |||
| 214 | struct dentry *event_dir; | 214 | struct dentry *event_dir; |
| 215 | struct list_head systems; | 215 | struct list_head systems; |
| 216 | struct list_head events; | 216 | struct list_head events; |
| 217 | struct task_struct *waiter; | ||
| 218 | int ref; | 217 | int ref; |
| 219 | }; | 218 | }; |
| 220 | 219 | ||
| @@ -224,6 +223,11 @@ enum { | |||
| 224 | 223 | ||
| 225 | extern struct list_head ftrace_trace_arrays; | 224 | extern struct list_head ftrace_trace_arrays; |
| 226 | 225 | ||
| 226 | extern struct mutex trace_types_lock; | ||
| 227 | |||
| 228 | extern int trace_array_get(struct trace_array *tr); | ||
| 229 | extern void trace_array_put(struct trace_array *tr); | ||
| 230 | |||
| 227 | /* | 231 | /* |
| 228 | * The global tracer (top) should be the first trace array added, | 232 | * The global tracer (top) should be the first trace array added, |
| 229 | * but we check the flag anyway. | 233 | * but we check the flag anyway. |
| @@ -554,11 +558,6 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu); | |||
| 554 | 558 | ||
| 555 | void poll_wait_pipe(struct trace_iterator *iter); | 559 | void poll_wait_pipe(struct trace_iterator *iter); |
| 556 | 560 | ||
| 557 | void ftrace(struct trace_array *tr, | ||
| 558 | struct trace_array_cpu *data, | ||
| 559 | unsigned long ip, | ||
| 560 | unsigned long parent_ip, | ||
| 561 | unsigned long flags, int pc); | ||
| 562 | void tracing_sched_switch_trace(struct trace_array *tr, | 561 | void tracing_sched_switch_trace(struct trace_array *tr, |
| 563 | struct task_struct *prev, | 562 | struct task_struct *prev, |
| 564 | struct task_struct *next, | 563 | struct task_struct *next, |
| @@ -680,6 +679,15 @@ extern int trace_selftest_startup_sched_switch(struct tracer *trace, | |||
| 680 | struct trace_array *tr); | 679 | struct trace_array *tr); |
| 681 | extern int trace_selftest_startup_branch(struct tracer *trace, | 680 | extern int trace_selftest_startup_branch(struct tracer *trace, |
| 682 | struct trace_array *tr); | 681 | struct trace_array *tr); |
| 682 | /* | ||
| 683 | * Tracer data references selftest functions that only occur | ||
| 684 | * on boot up. These can be __init functions. Thus, when selftests | ||
| 685 | * are enabled, then the tracers need to reference __init functions. | ||
| 686 | */ | ||
| 687 | #define __tracer_data __refdata | ||
| 688 | #else | ||
| 689 | /* Tracers are seldom changed. Optimize when selftests are disabled. */ | ||
| 690 | #define __tracer_data __read_mostly | ||
| 683 | #endif /* CONFIG_FTRACE_STARTUP_TEST */ | 691 | #endif /* CONFIG_FTRACE_STARTUP_TEST */ |
| 684 | 692 | ||
| 685 | extern void *head_page(struct trace_array_cpu *data); | 693 | extern void *head_page(struct trace_array_cpu *data); |
| @@ -774,6 +782,7 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags) | |||
| 774 | extern struct list_head ftrace_pids; | 782 | extern struct list_head ftrace_pids; |
| 775 | 783 | ||
| 776 | #ifdef CONFIG_FUNCTION_TRACER | 784 | #ifdef CONFIG_FUNCTION_TRACER |
| 785 | extern bool ftrace_filter_param __initdata; | ||
| 777 | static inline int ftrace_trace_task(struct task_struct *task) | 786 | static inline int ftrace_trace_task(struct task_struct *task) |
| 778 | { | 787 | { |
| 779 | if (list_empty(&ftrace_pids)) | 788 | if (list_empty(&ftrace_pids)) |
| @@ -899,12 +908,6 @@ static inline void trace_branch_disable(void) | |||
| 899 | /* set ring buffers to default size if not already done so */ | 908 | /* set ring buffers to default size if not already done so */ |
| 900 | int tracing_update_buffers(void); | 909 | int tracing_update_buffers(void); |
| 901 | 910 | ||
| 902 | /* trace event type bit fields, not numeric */ | ||
| 903 | enum { | ||
| 904 | TRACE_EVENT_TYPE_PRINTF = 1, | ||
| 905 | TRACE_EVENT_TYPE_RAW = 2, | ||
| 906 | }; | ||
| 907 | |||
| 908 | struct ftrace_event_field { | 911 | struct ftrace_event_field { |
| 909 | struct list_head link; | 912 | struct list_head link; |
| 910 | const char *name; | 913 | const char *name; |
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 84b1e045faba..80c36bcf66e8 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c | |||
| @@ -236,6 +236,10 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, | |||
| 236 | 236 | ||
| 237 | BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); | 237 | BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); |
| 238 | 238 | ||
| 239 | if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, | ||
| 240 | "perf buffer not large enough")) | ||
| 241 | return NULL; | ||
| 242 | |||
| 239 | pc = preempt_count(); | 243 | pc = preempt_count(); |
| 240 | 244 | ||
| 241 | *rctxp = perf_swevent_get_recursion_context(); | 245 | *rctxp = perf_swevent_get_recursion_context(); |
| @@ -266,6 +270,10 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, | |||
| 266 | struct pt_regs regs; | 270 | struct pt_regs regs; |
| 267 | int rctx; | 271 | int rctx; |
| 268 | 272 | ||
| 273 | head = this_cpu_ptr(event_function.perf_events); | ||
| 274 | if (hlist_empty(head)) | ||
| 275 | return; | ||
| 276 | |||
| 269 | #define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \ | 277 | #define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \ |
| 270 | sizeof(u64)) - sizeof(u32)) | 278 | sizeof(u64)) - sizeof(u32)) |
| 271 | 279 | ||
| @@ -279,8 +287,6 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, | |||
| 279 | 287 | ||
| 280 | entry->ip = ip; | 288 | entry->ip = ip; |
| 281 | entry->parent_ip = parent_ip; | 289 | entry->parent_ip = parent_ip; |
| 282 | |||
| 283 | head = this_cpu_ptr(event_function.perf_events); | ||
| 284 | perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0, | 290 | perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0, |
| 285 | 1, ®s, head, NULL); | 291 | 1, ®s, head, NULL); |
| 286 | 292 | ||
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 27963e2bf4bf..898f868833f2 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
| @@ -41,6 +41,23 @@ static LIST_HEAD(ftrace_common_fields); | |||
| 41 | static struct kmem_cache *field_cachep; | 41 | static struct kmem_cache *field_cachep; |
| 42 | static struct kmem_cache *file_cachep; | 42 | static struct kmem_cache *file_cachep; |
| 43 | 43 | ||
| 44 | #define SYSTEM_FL_FREE_NAME (1 << 31) | ||
| 45 | |||
| 46 | static inline int system_refcount(struct event_subsystem *system) | ||
| 47 | { | ||
| 48 | return system->ref_count & ~SYSTEM_FL_FREE_NAME; | ||
| 49 | } | ||
| 50 | |||
| 51 | static int system_refcount_inc(struct event_subsystem *system) | ||
| 52 | { | ||
| 53 | return (system->ref_count++) & ~SYSTEM_FL_FREE_NAME; | ||
| 54 | } | ||
| 55 | |||
| 56 | static int system_refcount_dec(struct event_subsystem *system) | ||
| 57 | { | ||
| 58 | return (--system->ref_count) & ~SYSTEM_FL_FREE_NAME; | ||
| 59 | } | ||
| 60 | |||
| 44 | /* Double loops, do not use break, only goto's work */ | 61 | /* Double loops, do not use break, only goto's work */ |
| 45 | #define do_for_each_event_file(tr, file) \ | 62 | #define do_for_each_event_file(tr, file) \ |
| 46 | list_for_each_entry(tr, &ftrace_trace_arrays, list) { \ | 63 | list_for_each_entry(tr, &ftrace_trace_arrays, list) { \ |
| @@ -97,7 +114,7 @@ static int __trace_define_field(struct list_head *head, const char *type, | |||
| 97 | 114 | ||
| 98 | field = kmem_cache_alloc(field_cachep, GFP_TRACE); | 115 | field = kmem_cache_alloc(field_cachep, GFP_TRACE); |
| 99 | if (!field) | 116 | if (!field) |
| 100 | goto err; | 117 | return -ENOMEM; |
| 101 | 118 | ||
| 102 | field->name = name; | 119 | field->name = name; |
| 103 | field->type = type; | 120 | field->type = type; |
| @@ -114,11 +131,6 @@ static int __trace_define_field(struct list_head *head, const char *type, | |||
| 114 | list_add(&field->link, head); | 131 | list_add(&field->link, head); |
| 115 | 132 | ||
| 116 | return 0; | 133 | return 0; |
| 117 | |||
| 118 | err: | ||
| 119 | kmem_cache_free(field_cachep, field); | ||
| 120 | |||
| 121 | return -ENOMEM; | ||
| 122 | } | 134 | } |
| 123 | 135 | ||
| 124 | int trace_define_field(struct ftrace_event_call *call, const char *type, | 136 | int trace_define_field(struct ftrace_event_call *call, const char *type, |
| @@ -279,9 +291,11 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file, | |||
| 279 | } | 291 | } |
| 280 | call->class->reg(call, TRACE_REG_UNREGISTER, file); | 292 | call->class->reg(call, TRACE_REG_UNREGISTER, file); |
| 281 | } | 293 | } |
| 282 | /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT */ | 294 | /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */ |
| 283 | if (file->flags & FTRACE_EVENT_FL_SOFT_MODE) | 295 | if (file->flags & FTRACE_EVENT_FL_SOFT_MODE) |
| 284 | set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); | 296 | set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); |
| 297 | else | ||
| 298 | clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); | ||
| 285 | break; | 299 | break; |
| 286 | case 1: | 300 | case 1: |
| 287 | /* | 301 | /* |
| @@ -349,8 +363,8 @@ static void __put_system(struct event_subsystem *system) | |||
| 349 | { | 363 | { |
| 350 | struct event_filter *filter = system->filter; | 364 | struct event_filter *filter = system->filter; |
| 351 | 365 | ||
| 352 | WARN_ON_ONCE(system->ref_count == 0); | 366 | WARN_ON_ONCE(system_refcount(system) == 0); |
| 353 | if (--system->ref_count) | 367 | if (system_refcount_dec(system)) |
| 354 | return; | 368 | return; |
| 355 | 369 | ||
| 356 | list_del(&system->list); | 370 | list_del(&system->list); |
| @@ -359,13 +373,15 @@ static void __put_system(struct event_subsystem *system) | |||
| 359 | kfree(filter->filter_string); | 373 | kfree(filter->filter_string); |
| 360 | kfree(filter); | 374 | kfree(filter); |
| 361 | } | 375 | } |
| 376 | if (system->ref_count & SYSTEM_FL_FREE_NAME) | ||
| 377 | kfree(system->name); | ||
| 362 | kfree(system); | 378 | kfree(system); |
| 363 | } | 379 | } |
| 364 | 380 | ||
| 365 | static void __get_system(struct event_subsystem *system) | 381 | static void __get_system(struct event_subsystem *system) |
| 366 | { | 382 | { |
| 367 | WARN_ON_ONCE(system->ref_count == 0); | 383 | WARN_ON_ONCE(system_refcount(system) == 0); |
| 368 | system->ref_count++; | 384 | system_refcount_inc(system); |
| 369 | } | 385 | } |
| 370 | 386 | ||
| 371 | static void __get_system_dir(struct ftrace_subsystem_dir *dir) | 387 | static void __get_system_dir(struct ftrace_subsystem_dir *dir) |
| @@ -379,7 +395,7 @@ static void __put_system_dir(struct ftrace_subsystem_dir *dir) | |||
| 379 | { | 395 | { |
| 380 | WARN_ON_ONCE(dir->ref_count == 0); | 396 | WARN_ON_ONCE(dir->ref_count == 0); |
| 381 | /* If the subsystem is about to be freed, the dir must be too */ | 397 | /* If the subsystem is about to be freed, the dir must be too */ |
| 382 | WARN_ON_ONCE(dir->subsystem->ref_count == 1 && dir->ref_count != 1); | 398 | WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1); |
| 383 | 399 | ||
| 384 | __put_system(dir->subsystem); | 400 | __put_system(dir->subsystem); |
| 385 | if (!--dir->ref_count) | 401 | if (!--dir->ref_count) |
| @@ -394,16 +410,45 @@ static void put_system(struct ftrace_subsystem_dir *dir) | |||
| 394 | } | 410 | } |
| 395 | 411 | ||
| 396 | /* | 412 | /* |
| 413 | * Open and update trace_array ref count. | ||
| 414 | * Must have the current trace_array passed to it. | ||
| 415 | */ | ||
| 416 | static int tracing_open_generic_file(struct inode *inode, struct file *filp) | ||
| 417 | { | ||
| 418 | struct ftrace_event_file *file = inode->i_private; | ||
| 419 | struct trace_array *tr = file->tr; | ||
| 420 | int ret; | ||
| 421 | |||
| 422 | if (trace_array_get(tr) < 0) | ||
| 423 | return -ENODEV; | ||
| 424 | |||
| 425 | ret = tracing_open_generic(inode, filp); | ||
| 426 | if (ret < 0) | ||
| 427 | trace_array_put(tr); | ||
| 428 | return ret; | ||
| 429 | } | ||
| 430 | |||
| 431 | static int tracing_release_generic_file(struct inode *inode, struct file *filp) | ||
| 432 | { | ||
| 433 | struct ftrace_event_file *file = inode->i_private; | ||
| 434 | struct trace_array *tr = file->tr; | ||
| 435 | |||
| 436 | trace_array_put(tr); | ||
| 437 | |||
| 438 | return 0; | ||
| 439 | } | ||
| 440 | |||
| 441 | /* | ||
| 397 | * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. | 442 | * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. |
| 398 | */ | 443 | */ |
| 399 | static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, | 444 | static int |
| 400 | const char *sub, const char *event, int set) | 445 | __ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match, |
| 446 | const char *sub, const char *event, int set) | ||
| 401 | { | 447 | { |
| 402 | struct ftrace_event_file *file; | 448 | struct ftrace_event_file *file; |
| 403 | struct ftrace_event_call *call; | 449 | struct ftrace_event_call *call; |
| 404 | int ret = -EINVAL; | 450 | int ret = -EINVAL; |
| 405 | 451 | ||
| 406 | mutex_lock(&event_mutex); | ||
| 407 | list_for_each_entry(file, &tr->events, list) { | 452 | list_for_each_entry(file, &tr->events, list) { |
| 408 | 453 | ||
| 409 | call = file->event_call; | 454 | call = file->event_call; |
| @@ -429,6 +474,17 @@ static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, | |||
| 429 | 474 | ||
| 430 | ret = 0; | 475 | ret = 0; |
| 431 | } | 476 | } |
| 477 | |||
| 478 | return ret; | ||
| 479 | } | ||
| 480 | |||
| 481 | static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, | ||
| 482 | const char *sub, const char *event, int set) | ||
| 483 | { | ||
| 484 | int ret; | ||
| 485 | |||
| 486 | mutex_lock(&event_mutex); | ||
| 487 | ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set); | ||
| 432 | mutex_unlock(&event_mutex); | 488 | mutex_unlock(&event_mutex); |
| 433 | 489 | ||
| 434 | return ret; | 490 | return ret; |
| @@ -624,17 +680,17 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, | |||
| 624 | loff_t *ppos) | 680 | loff_t *ppos) |
| 625 | { | 681 | { |
| 626 | struct ftrace_event_file *file = filp->private_data; | 682 | struct ftrace_event_file *file = filp->private_data; |
| 627 | char *buf; | 683 | char buf[4] = "0"; |
| 628 | 684 | ||
| 629 | if (file->flags & FTRACE_EVENT_FL_ENABLED) { | 685 | if (file->flags & FTRACE_EVENT_FL_ENABLED && |
| 630 | if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED) | 686 | !(file->flags & FTRACE_EVENT_FL_SOFT_DISABLED)) |
| 631 | buf = "0*\n"; | 687 | strcpy(buf, "1"); |
| 632 | else if (file->flags & FTRACE_EVENT_FL_SOFT_MODE) | 688 | |
| 633 | buf = "1*\n"; | 689 | if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED || |
| 634 | else | 690 | file->flags & FTRACE_EVENT_FL_SOFT_MODE) |
| 635 | buf = "1\n"; | 691 | strcat(buf, "*"); |
| 636 | } else | 692 | |
| 637 | buf = "0\n"; | 693 | strcat(buf, "\n"); |
| 638 | 694 | ||
| 639 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf)); | 695 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf)); |
| 640 | } | 696 | } |
| @@ -770,59 +826,33 @@ enum { | |||
| 770 | static void *f_next(struct seq_file *m, void *v, loff_t *pos) | 826 | static void *f_next(struct seq_file *m, void *v, loff_t *pos) |
| 771 | { | 827 | { |
| 772 | struct ftrace_event_call *call = m->private; | 828 | struct ftrace_event_call *call = m->private; |
| 773 | struct ftrace_event_field *field; | ||
| 774 | struct list_head *common_head = &ftrace_common_fields; | 829 | struct list_head *common_head = &ftrace_common_fields; |
| 775 | struct list_head *head = trace_get_fields(call); | 830 | struct list_head *head = trace_get_fields(call); |
| 831 | struct list_head *node = v; | ||
| 776 | 832 | ||
| 777 | (*pos)++; | 833 | (*pos)++; |
| 778 | 834 | ||
| 779 | switch ((unsigned long)v) { | 835 | switch ((unsigned long)v) { |
| 780 | case FORMAT_HEADER: | 836 | case FORMAT_HEADER: |
| 781 | if (unlikely(list_empty(common_head))) | 837 | node = common_head; |
| 782 | return NULL; | 838 | break; |
| 783 | |||
| 784 | field = list_entry(common_head->prev, | ||
| 785 | struct ftrace_event_field, link); | ||
| 786 | return field; | ||
| 787 | 839 | ||
| 788 | case FORMAT_FIELD_SEPERATOR: | 840 | case FORMAT_FIELD_SEPERATOR: |
| 789 | if (unlikely(list_empty(head))) | 841 | node = head; |
| 790 | return NULL; | 842 | break; |
| 791 | |||
| 792 | field = list_entry(head->prev, struct ftrace_event_field, link); | ||
| 793 | return field; | ||
| 794 | 843 | ||
| 795 | case FORMAT_PRINTFMT: | 844 | case FORMAT_PRINTFMT: |
| 796 | /* all done */ | 845 | /* all done */ |
| 797 | return NULL; | 846 | return NULL; |
| 798 | } | 847 | } |
| 799 | 848 | ||
| 800 | field = v; | 849 | node = node->prev; |
| 801 | if (field->link.prev == common_head) | 850 | if (node == common_head) |
| 802 | return (void *)FORMAT_FIELD_SEPERATOR; | 851 | return (void *)FORMAT_FIELD_SEPERATOR; |
| 803 | else if (field->link.prev == head) | 852 | else if (node == head) |
| 804 | return (void *)FORMAT_PRINTFMT; | 853 | return (void *)FORMAT_PRINTFMT; |
| 805 | 854 | else | |
| 806 | field = list_entry(field->link.prev, struct ftrace_event_field, link); | 855 | return node; |
| 807 | |||
| 808 | return field; | ||
| 809 | } | ||
| 810 | |||
| 811 | static void *f_start(struct seq_file *m, loff_t *pos) | ||
| 812 | { | ||
| 813 | loff_t l = 0; | ||
| 814 | void *p; | ||
| 815 | |||
| 816 | /* Start by showing the header */ | ||
| 817 | if (!*pos) | ||
| 818 | return (void *)FORMAT_HEADER; | ||
| 819 | |||
| 820 | p = (void *)FORMAT_HEADER; | ||
| 821 | do { | ||
| 822 | p = f_next(m, p, &l); | ||
| 823 | } while (p && l < *pos); | ||
| 824 | |||
| 825 | return p; | ||
| 826 | } | 856 | } |
| 827 | 857 | ||
| 828 | static int f_show(struct seq_file *m, void *v) | 858 | static int f_show(struct seq_file *m, void *v) |
| @@ -848,8 +878,7 @@ static int f_show(struct seq_file *m, void *v) | |||
| 848 | return 0; | 878 | return 0; |
| 849 | } | 879 | } |
| 850 | 880 | ||
| 851 | field = v; | 881 | field = list_entry(v, struct ftrace_event_field, link); |
| 852 | |||
| 853 | /* | 882 | /* |
| 854 | * Smartly shows the array type(except dynamic array). | 883 | * Smartly shows the array type(except dynamic array). |
| 855 | * Normal: | 884 | * Normal: |
| @@ -876,6 +905,17 @@ static int f_show(struct seq_file *m, void *v) | |||
| 876 | return 0; | 905 | return 0; |
| 877 | } | 906 | } |
| 878 | 907 | ||
| 908 | static void *f_start(struct seq_file *m, loff_t *pos) | ||
| 909 | { | ||
| 910 | void *p = (void *)FORMAT_HEADER; | ||
| 911 | loff_t l = 0; | ||
| 912 | |||
| 913 | while (l < *pos && p) | ||
| 914 | p = f_next(m, p, &l); | ||
| 915 | |||
| 916 | return p; | ||
| 917 | } | ||
| 918 | |||
| 879 | static void f_stop(struct seq_file *m, void *p) | 919 | static void f_stop(struct seq_file *m, void *p) |
| 880 | { | 920 | { |
| 881 | } | 921 | } |
| @@ -907,23 +947,14 @@ static ssize_t | |||
| 907 | event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) | 947 | event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) |
| 908 | { | 948 | { |
| 909 | struct ftrace_event_call *call = filp->private_data; | 949 | struct ftrace_event_call *call = filp->private_data; |
| 910 | struct trace_seq *s; | 950 | char buf[32]; |
| 911 | int r; | 951 | int len; |
| 912 | 952 | ||
| 913 | if (*ppos) | 953 | if (*ppos) |
| 914 | return 0; | 954 | return 0; |
| 915 | 955 | ||
| 916 | s = kmalloc(sizeof(*s), GFP_KERNEL); | 956 | len = sprintf(buf, "%d\n", call->event.type); |
| 917 | if (!s) | 957 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); |
| 918 | return -ENOMEM; | ||
| 919 | |||
| 920 | trace_seq_init(s); | ||
| 921 | trace_seq_printf(s, "%d\n", call->event.type); | ||
| 922 | |||
| 923 | r = simple_read_from_buffer(ubuf, cnt, ppos, | ||
| 924 | s->buffer, s->len); | ||
| 925 | kfree(s); | ||
| 926 | return r; | ||
| 927 | } | 958 | } |
| 928 | 959 | ||
| 929 | static ssize_t | 960 | static ssize_t |
| @@ -992,6 +1023,7 @@ static int subsystem_open(struct inode *inode, struct file *filp) | |||
| 992 | int ret; | 1023 | int ret; |
| 993 | 1024 | ||
| 994 | /* Make sure the system still exists */ | 1025 | /* Make sure the system still exists */ |
| 1026 | mutex_lock(&trace_types_lock); | ||
| 995 | mutex_lock(&event_mutex); | 1027 | mutex_lock(&event_mutex); |
| 996 | list_for_each_entry(tr, &ftrace_trace_arrays, list) { | 1028 | list_for_each_entry(tr, &ftrace_trace_arrays, list) { |
| 997 | list_for_each_entry(dir, &tr->systems, list) { | 1029 | list_for_each_entry(dir, &tr->systems, list) { |
| @@ -1007,6 +1039,7 @@ static int subsystem_open(struct inode *inode, struct file *filp) | |||
| 1007 | } | 1039 | } |
| 1008 | exit_loop: | 1040 | exit_loop: |
| 1009 | mutex_unlock(&event_mutex); | 1041 | mutex_unlock(&event_mutex); |
| 1042 | mutex_unlock(&trace_types_lock); | ||
| 1010 | 1043 | ||
| 1011 | if (!system) | 1044 | if (!system) |
| 1012 | return -ENODEV; | 1045 | return -ENODEV; |
| @@ -1014,9 +1047,17 @@ static int subsystem_open(struct inode *inode, struct file *filp) | |||
| 1014 | /* Some versions of gcc think dir can be uninitialized here */ | 1047 | /* Some versions of gcc think dir can be uninitialized here */ |
| 1015 | WARN_ON(!dir); | 1048 | WARN_ON(!dir); |
| 1016 | 1049 | ||
| 1050 | /* Still need to increment the ref count of the system */ | ||
| 1051 | if (trace_array_get(tr) < 0) { | ||
| 1052 | put_system(dir); | ||
| 1053 | return -ENODEV; | ||
| 1054 | } | ||
| 1055 | |||
| 1017 | ret = tracing_open_generic(inode, filp); | 1056 | ret = tracing_open_generic(inode, filp); |
| 1018 | if (ret < 0) | 1057 | if (ret < 0) { |
| 1058 | trace_array_put(tr); | ||
| 1019 | put_system(dir); | 1059 | put_system(dir); |
| 1060 | } | ||
| 1020 | 1061 | ||
| 1021 | return ret; | 1062 | return ret; |
| 1022 | } | 1063 | } |
| @@ -1027,16 +1068,23 @@ static int system_tr_open(struct inode *inode, struct file *filp) | |||
| 1027 | struct trace_array *tr = inode->i_private; | 1068 | struct trace_array *tr = inode->i_private; |
| 1028 | int ret; | 1069 | int ret; |
| 1029 | 1070 | ||
| 1071 | if (trace_array_get(tr) < 0) | ||
| 1072 | return -ENODEV; | ||
| 1073 | |||
| 1030 | /* Make a temporary dir that has no system but points to tr */ | 1074 | /* Make a temporary dir that has no system but points to tr */ |
| 1031 | dir = kzalloc(sizeof(*dir), GFP_KERNEL); | 1075 | dir = kzalloc(sizeof(*dir), GFP_KERNEL); |
| 1032 | if (!dir) | 1076 | if (!dir) { |
| 1077 | trace_array_put(tr); | ||
| 1033 | return -ENOMEM; | 1078 | return -ENOMEM; |
| 1079 | } | ||
| 1034 | 1080 | ||
| 1035 | dir->tr = tr; | 1081 | dir->tr = tr; |
| 1036 | 1082 | ||
| 1037 | ret = tracing_open_generic(inode, filp); | 1083 | ret = tracing_open_generic(inode, filp); |
| 1038 | if (ret < 0) | 1084 | if (ret < 0) { |
| 1085 | trace_array_put(tr); | ||
| 1039 | kfree(dir); | 1086 | kfree(dir); |
| 1087 | } | ||
| 1040 | 1088 | ||
| 1041 | filp->private_data = dir; | 1089 | filp->private_data = dir; |
| 1042 | 1090 | ||
| @@ -1047,6 +1095,8 @@ static int subsystem_release(struct inode *inode, struct file *file) | |||
| 1047 | { | 1095 | { |
| 1048 | struct ftrace_subsystem_dir *dir = file->private_data; | 1096 | struct ftrace_subsystem_dir *dir = file->private_data; |
| 1049 | 1097 | ||
| 1098 | trace_array_put(dir->tr); | ||
| 1099 | |||
| 1050 | /* | 1100 | /* |
| 1051 | * If dir->subsystem is NULL, then this is a temporary | 1101 | * If dir->subsystem is NULL, then this is a temporary |
| 1052 | * descriptor that was made for a trace_array to enable | 1102 | * descriptor that was made for a trace_array to enable |
| @@ -1143,6 +1193,7 @@ show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) | |||
| 1143 | 1193 | ||
| 1144 | static int ftrace_event_avail_open(struct inode *inode, struct file *file); | 1194 | static int ftrace_event_avail_open(struct inode *inode, struct file *file); |
| 1145 | static int ftrace_event_set_open(struct inode *inode, struct file *file); | 1195 | static int ftrace_event_set_open(struct inode *inode, struct file *file); |
| 1196 | static int ftrace_event_release(struct inode *inode, struct file *file); | ||
| 1146 | 1197 | ||
| 1147 | static const struct seq_operations show_event_seq_ops = { | 1198 | static const struct seq_operations show_event_seq_ops = { |
| 1148 | .start = t_start, | 1199 | .start = t_start, |
| @@ -1170,13 +1221,14 @@ static const struct file_operations ftrace_set_event_fops = { | |||
| 1170 | .read = seq_read, | 1221 | .read = seq_read, |
| 1171 | .write = ftrace_event_write, | 1222 | .write = ftrace_event_write, |
| 1172 | .llseek = seq_lseek, | 1223 | .llseek = seq_lseek, |
| 1173 | .release = seq_release, | 1224 | .release = ftrace_event_release, |
| 1174 | }; | 1225 | }; |
| 1175 | 1226 | ||
| 1176 | static const struct file_operations ftrace_enable_fops = { | 1227 | static const struct file_operations ftrace_enable_fops = { |
| 1177 | .open = tracing_open_generic, | 1228 | .open = tracing_open_generic_file, |
| 1178 | .read = event_enable_read, | 1229 | .read = event_enable_read, |
| 1179 | .write = event_enable_write, | 1230 | .write = event_enable_write, |
| 1231 | .release = tracing_release_generic_file, | ||
| 1180 | .llseek = default_llseek, | 1232 | .llseek = default_llseek, |
| 1181 | }; | 1233 | }; |
| 1182 | 1234 | ||
| @@ -1247,6 +1299,15 @@ ftrace_event_open(struct inode *inode, struct file *file, | |||
| 1247 | return ret; | 1299 | return ret; |
| 1248 | } | 1300 | } |
| 1249 | 1301 | ||
| 1302 | static int ftrace_event_release(struct inode *inode, struct file *file) | ||
| 1303 | { | ||
| 1304 | struct trace_array *tr = inode->i_private; | ||
| 1305 | |||
| 1306 | trace_array_put(tr); | ||
| 1307 | |||
| 1308 | return seq_release(inode, file); | ||
| 1309 | } | ||
| 1310 | |||
| 1250 | static int | 1311 | static int |
| 1251 | ftrace_event_avail_open(struct inode *inode, struct file *file) | 1312 | ftrace_event_avail_open(struct inode *inode, struct file *file) |
| 1252 | { | 1313 | { |
| @@ -1260,12 +1321,19 @@ ftrace_event_set_open(struct inode *inode, struct file *file) | |||
| 1260 | { | 1321 | { |
| 1261 | const struct seq_operations *seq_ops = &show_set_event_seq_ops; | 1322 | const struct seq_operations *seq_ops = &show_set_event_seq_ops; |
| 1262 | struct trace_array *tr = inode->i_private; | 1323 | struct trace_array *tr = inode->i_private; |
| 1324 | int ret; | ||
| 1325 | |||
| 1326 | if (trace_array_get(tr) < 0) | ||
| 1327 | return -ENODEV; | ||
| 1263 | 1328 | ||
| 1264 | if ((file->f_mode & FMODE_WRITE) && | 1329 | if ((file->f_mode & FMODE_WRITE) && |
| 1265 | (file->f_flags & O_TRUNC)) | 1330 | (file->f_flags & O_TRUNC)) |
| 1266 | ftrace_clear_events(tr); | 1331 | ftrace_clear_events(tr); |
| 1267 | 1332 | ||
| 1268 | return ftrace_event_open(inode, file, seq_ops); | 1333 | ret = ftrace_event_open(inode, file, seq_ops); |
| 1334 | if (ret < 0) | ||
| 1335 | trace_array_put(tr); | ||
| 1336 | return ret; | ||
| 1269 | } | 1337 | } |
| 1270 | 1338 | ||
| 1271 | static struct event_subsystem * | 1339 | static struct event_subsystem * |
| @@ -1279,7 +1347,15 @@ create_new_subsystem(const char *name) | |||
| 1279 | return NULL; | 1347 | return NULL; |
| 1280 | 1348 | ||
| 1281 | system->ref_count = 1; | 1349 | system->ref_count = 1; |
| 1282 | system->name = name; | 1350 | |
| 1351 | /* Only allocate if dynamic (kprobes and modules) */ | ||
| 1352 | if (!core_kernel_data((unsigned long)name)) { | ||
| 1353 | system->ref_count |= SYSTEM_FL_FREE_NAME; | ||
| 1354 | system->name = kstrdup(name, GFP_KERNEL); | ||
| 1355 | if (!system->name) | ||
| 1356 | goto out_free; | ||
| 1357 | } else | ||
| 1358 | system->name = name; | ||
| 1283 | 1359 | ||
| 1284 | system->filter = NULL; | 1360 | system->filter = NULL; |
| 1285 | 1361 | ||
| @@ -1292,6 +1368,8 @@ create_new_subsystem(const char *name) | |||
| 1292 | return system; | 1368 | return system; |
| 1293 | 1369 | ||
| 1294 | out_free: | 1370 | out_free: |
| 1371 | if (system->ref_count & SYSTEM_FL_FREE_NAME) | ||
| 1372 | kfree(system->name); | ||
| 1295 | kfree(system); | 1373 | kfree(system); |
| 1296 | return NULL; | 1374 | return NULL; |
| 1297 | } | 1375 | } |
| @@ -1591,6 +1669,7 @@ static void __add_event_to_tracers(struct ftrace_event_call *call, | |||
| 1591 | int trace_add_event_call(struct ftrace_event_call *call) | 1669 | int trace_add_event_call(struct ftrace_event_call *call) |
| 1592 | { | 1670 | { |
| 1593 | int ret; | 1671 | int ret; |
| 1672 | mutex_lock(&trace_types_lock); | ||
| 1594 | mutex_lock(&event_mutex); | 1673 | mutex_lock(&event_mutex); |
| 1595 | 1674 | ||
| 1596 | ret = __register_event(call, NULL); | 1675 | ret = __register_event(call, NULL); |
| @@ -1598,11 +1677,13 @@ int trace_add_event_call(struct ftrace_event_call *call) | |||
| 1598 | __add_event_to_tracers(call, NULL); | 1677 | __add_event_to_tracers(call, NULL); |
| 1599 | 1678 | ||
| 1600 | mutex_unlock(&event_mutex); | 1679 | mutex_unlock(&event_mutex); |
| 1680 | mutex_unlock(&trace_types_lock); | ||
| 1601 | return ret; | 1681 | return ret; |
| 1602 | } | 1682 | } |
| 1603 | 1683 | ||
| 1604 | /* | 1684 | /* |
| 1605 | * Must be called under locking both of event_mutex and trace_event_sem. | 1685 | * Must be called under locking of trace_types_lock, event_mutex and |
| 1686 | * trace_event_sem. | ||
| 1606 | */ | 1687 | */ |
| 1607 | static void __trace_remove_event_call(struct ftrace_event_call *call) | 1688 | static void __trace_remove_event_call(struct ftrace_event_call *call) |
| 1608 | { | 1689 | { |
| @@ -1614,11 +1695,13 @@ static void __trace_remove_event_call(struct ftrace_event_call *call) | |||
| 1614 | /* Remove an event_call */ | 1695 | /* Remove an event_call */ |
| 1615 | void trace_remove_event_call(struct ftrace_event_call *call) | 1696 | void trace_remove_event_call(struct ftrace_event_call *call) |
| 1616 | { | 1697 | { |
| 1698 | mutex_lock(&trace_types_lock); | ||
| 1617 | mutex_lock(&event_mutex); | 1699 | mutex_lock(&event_mutex); |
| 1618 | down_write(&trace_event_sem); | 1700 | down_write(&trace_event_sem); |
| 1619 | __trace_remove_event_call(call); | 1701 | __trace_remove_event_call(call); |
| 1620 | up_write(&trace_event_sem); | 1702 | up_write(&trace_event_sem); |
| 1621 | mutex_unlock(&event_mutex); | 1703 | mutex_unlock(&event_mutex); |
| 1704 | mutex_unlock(&trace_types_lock); | ||
| 1622 | } | 1705 | } |
| 1623 | 1706 | ||
| 1624 | #define for_each_event(event, start, end) \ | 1707 | #define for_each_event(event, start, end) \ |
| @@ -1762,6 +1845,7 @@ static int trace_module_notify(struct notifier_block *self, | |||
| 1762 | { | 1845 | { |
| 1763 | struct module *mod = data; | 1846 | struct module *mod = data; |
| 1764 | 1847 | ||
| 1848 | mutex_lock(&trace_types_lock); | ||
| 1765 | mutex_lock(&event_mutex); | 1849 | mutex_lock(&event_mutex); |
| 1766 | switch (val) { | 1850 | switch (val) { |
| 1767 | case MODULE_STATE_COMING: | 1851 | case MODULE_STATE_COMING: |
| @@ -1772,6 +1856,7 @@ static int trace_module_notify(struct notifier_block *self, | |||
| 1772 | break; | 1856 | break; |
| 1773 | } | 1857 | } |
| 1774 | mutex_unlock(&event_mutex); | 1858 | mutex_unlock(&event_mutex); |
| 1859 | mutex_unlock(&trace_types_lock); | ||
| 1775 | 1860 | ||
| 1776 | return 0; | 1861 | return 0; |
| 1777 | } | 1862 | } |
| @@ -2011,10 +2096,7 @@ event_enable_func(struct ftrace_hash *hash, | |||
| 2011 | int ret; | 2096 | int ret; |
| 2012 | 2097 | ||
| 2013 | /* hash funcs only work with set_ftrace_filter */ | 2098 | /* hash funcs only work with set_ftrace_filter */ |
| 2014 | if (!enabled) | 2099 | if (!enabled || !param) |
| 2015 | return -EINVAL; | ||
| 2016 | |||
| 2017 | if (!param) | ||
| 2018 | return -EINVAL; | 2100 | return -EINVAL; |
| 2019 | 2101 | ||
| 2020 | system = strsep(¶m, ":"); | 2102 | system = strsep(¶m, ":"); |
| @@ -2329,11 +2411,11 @@ early_event_add_tracer(struct dentry *parent, struct trace_array *tr) | |||
| 2329 | 2411 | ||
| 2330 | int event_trace_del_tracer(struct trace_array *tr) | 2412 | int event_trace_del_tracer(struct trace_array *tr) |
| 2331 | { | 2413 | { |
| 2332 | /* Disable any running events */ | ||
| 2333 | __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0); | ||
| 2334 | |||
| 2335 | mutex_lock(&event_mutex); | 2414 | mutex_lock(&event_mutex); |
| 2336 | 2415 | ||
| 2416 | /* Disable any running events */ | ||
| 2417 | __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0); | ||
| 2418 | |||
| 2337 | down_write(&trace_event_sem); | 2419 | down_write(&trace_event_sem); |
| 2338 | __trace_remove_event_dirs(tr); | 2420 | __trace_remove_event_dirs(tr); |
| 2339 | debugfs_remove_recursive(tr->event_dir); | 2421 | debugfs_remove_recursive(tr->event_dir); |
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index e1b653f7e1ca..0c7b75a8acc8 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c | |||
| @@ -44,6 +44,7 @@ enum filter_op_ids | |||
| 44 | OP_LE, | 44 | OP_LE, |
| 45 | OP_GT, | 45 | OP_GT, |
| 46 | OP_GE, | 46 | OP_GE, |
| 47 | OP_BAND, | ||
| 47 | OP_NONE, | 48 | OP_NONE, |
| 48 | OP_OPEN_PAREN, | 49 | OP_OPEN_PAREN, |
| 49 | }; | 50 | }; |
| @@ -54,6 +55,7 @@ struct filter_op { | |||
| 54 | int precedence; | 55 | int precedence; |
| 55 | }; | 56 | }; |
| 56 | 57 | ||
| 58 | /* Order must be the same as enum filter_op_ids above */ | ||
| 57 | static struct filter_op filter_ops[] = { | 59 | static struct filter_op filter_ops[] = { |
| 58 | { OP_OR, "||", 1 }, | 60 | { OP_OR, "||", 1 }, |
| 59 | { OP_AND, "&&", 2 }, | 61 | { OP_AND, "&&", 2 }, |
| @@ -64,6 +66,7 @@ static struct filter_op filter_ops[] = { | |||
| 64 | { OP_LE, "<=", 5 }, | 66 | { OP_LE, "<=", 5 }, |
| 65 | { OP_GT, ">", 5 }, | 67 | { OP_GT, ">", 5 }, |
| 66 | { OP_GE, ">=", 5 }, | 68 | { OP_GE, ">=", 5 }, |
| 69 | { OP_BAND, "&", 6 }, | ||
| 67 | { OP_NONE, "OP_NONE", 0 }, | 70 | { OP_NONE, "OP_NONE", 0 }, |
| 68 | { OP_OPEN_PAREN, "(", 0 }, | 71 | { OP_OPEN_PAREN, "(", 0 }, |
| 69 | }; | 72 | }; |
| @@ -156,6 +159,9 @@ static int filter_pred_##type(struct filter_pred *pred, void *event) \ | |||
| 156 | case OP_GE: \ | 159 | case OP_GE: \ |
| 157 | match = (*addr >= val); \ | 160 | match = (*addr >= val); \ |
| 158 | break; \ | 161 | break; \ |
| 162 | case OP_BAND: \ | ||
| 163 | match = (*addr & val); \ | ||
| 164 | break; \ | ||
| 159 | default: \ | 165 | default: \ |
| 160 | break; \ | 166 | break; \ |
| 161 | } \ | 167 | } \ |
| @@ -640,7 +646,7 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s) | |||
| 640 | if (filter && filter->filter_string) | 646 | if (filter && filter->filter_string) |
| 641 | trace_seq_printf(s, "%s\n", filter->filter_string); | 647 | trace_seq_printf(s, "%s\n", filter->filter_string); |
| 642 | else | 648 | else |
| 643 | trace_seq_printf(s, "none\n"); | 649 | trace_seq_puts(s, "none\n"); |
| 644 | mutex_unlock(&event_mutex); | 650 | mutex_unlock(&event_mutex); |
| 645 | } | 651 | } |
| 646 | 652 | ||
| @@ -654,7 +660,7 @@ void print_subsystem_event_filter(struct event_subsystem *system, | |||
| 654 | if (filter && filter->filter_string) | 660 | if (filter && filter->filter_string) |
| 655 | trace_seq_printf(s, "%s\n", filter->filter_string); | 661 | trace_seq_printf(s, "%s\n", filter->filter_string); |
| 656 | else | 662 | else |
| 657 | trace_seq_printf(s, DEFAULT_SYS_FILTER_MESSAGE "\n"); | 663 | trace_seq_puts(s, DEFAULT_SYS_FILTER_MESSAGE "\n"); |
| 658 | mutex_unlock(&event_mutex); | 664 | mutex_unlock(&event_mutex); |
| 659 | } | 665 | } |
| 660 | 666 | ||
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index c4d6d7191988..38fe1483c508 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c | |||
| @@ -199,7 +199,7 @@ static int func_set_flag(u32 old_flags, u32 bit, int set) | |||
| 199 | return 0; | 199 | return 0; |
| 200 | } | 200 | } |
| 201 | 201 | ||
| 202 | static struct tracer function_trace __read_mostly = | 202 | static struct tracer function_trace __tracer_data = |
| 203 | { | 203 | { |
| 204 | .name = "function", | 204 | .name = "function", |
| 205 | .init = function_trace_init, | 205 | .init = function_trace_init, |
| @@ -290,6 +290,21 @@ ftrace_stacktrace_count(unsigned long ip, unsigned long parent_ip, void **data) | |||
| 290 | trace_dump_stack(STACK_SKIP); | 290 | trace_dump_stack(STACK_SKIP); |
| 291 | } | 291 | } |
| 292 | 292 | ||
| 293 | static void | ||
| 294 | ftrace_dump_probe(unsigned long ip, unsigned long parent_ip, void **data) | ||
| 295 | { | ||
| 296 | if (update_count(data)) | ||
| 297 | ftrace_dump(DUMP_ALL); | ||
| 298 | } | ||
| 299 | |||
| 300 | /* Only dump the current CPU buffer. */ | ||
| 301 | static void | ||
| 302 | ftrace_cpudump_probe(unsigned long ip, unsigned long parent_ip, void **data) | ||
| 303 | { | ||
| 304 | if (update_count(data)) | ||
| 305 | ftrace_dump(DUMP_ORIG); | ||
| 306 | } | ||
| 307 | |||
| 293 | static int | 308 | static int |
| 294 | ftrace_probe_print(const char *name, struct seq_file *m, | 309 | ftrace_probe_print(const char *name, struct seq_file *m, |
| 295 | unsigned long ip, void *data) | 310 | unsigned long ip, void *data) |
| @@ -327,6 +342,20 @@ ftrace_stacktrace_print(struct seq_file *m, unsigned long ip, | |||
| 327 | return ftrace_probe_print("stacktrace", m, ip, data); | 342 | return ftrace_probe_print("stacktrace", m, ip, data); |
| 328 | } | 343 | } |
| 329 | 344 | ||
| 345 | static int | ||
| 346 | ftrace_dump_print(struct seq_file *m, unsigned long ip, | ||
| 347 | struct ftrace_probe_ops *ops, void *data) | ||
| 348 | { | ||
| 349 | return ftrace_probe_print("dump", m, ip, data); | ||
| 350 | } | ||
| 351 | |||
| 352 | static int | ||
| 353 | ftrace_cpudump_print(struct seq_file *m, unsigned long ip, | ||
| 354 | struct ftrace_probe_ops *ops, void *data) | ||
| 355 | { | ||
| 356 | return ftrace_probe_print("cpudump", m, ip, data); | ||
| 357 | } | ||
| 358 | |||
| 330 | static struct ftrace_probe_ops traceon_count_probe_ops = { | 359 | static struct ftrace_probe_ops traceon_count_probe_ops = { |
| 331 | .func = ftrace_traceon_count, | 360 | .func = ftrace_traceon_count, |
| 332 | .print = ftrace_traceon_print, | 361 | .print = ftrace_traceon_print, |
| @@ -342,6 +371,16 @@ static struct ftrace_probe_ops stacktrace_count_probe_ops = { | |||
| 342 | .print = ftrace_stacktrace_print, | 371 | .print = ftrace_stacktrace_print, |
| 343 | }; | 372 | }; |
| 344 | 373 | ||
| 374 | static struct ftrace_probe_ops dump_probe_ops = { | ||
| 375 | .func = ftrace_dump_probe, | ||
| 376 | .print = ftrace_dump_print, | ||
| 377 | }; | ||
| 378 | |||
| 379 | static struct ftrace_probe_ops cpudump_probe_ops = { | ||
| 380 | .func = ftrace_cpudump_probe, | ||
| 381 | .print = ftrace_cpudump_print, | ||
| 382 | }; | ||
| 383 | |||
| 345 | static struct ftrace_probe_ops traceon_probe_ops = { | 384 | static struct ftrace_probe_ops traceon_probe_ops = { |
| 346 | .func = ftrace_traceon, | 385 | .func = ftrace_traceon, |
| 347 | .print = ftrace_traceon_print, | 386 | .print = ftrace_traceon_print, |
| @@ -425,6 +464,32 @@ ftrace_stacktrace_callback(struct ftrace_hash *hash, | |||
| 425 | param, enable); | 464 | param, enable); |
| 426 | } | 465 | } |
| 427 | 466 | ||
| 467 | static int | ||
| 468 | ftrace_dump_callback(struct ftrace_hash *hash, | ||
| 469 | char *glob, char *cmd, char *param, int enable) | ||
| 470 | { | ||
| 471 | struct ftrace_probe_ops *ops; | ||
| 472 | |||
| 473 | ops = &dump_probe_ops; | ||
| 474 | |||
| 475 | /* Only dump once. */ | ||
| 476 | return ftrace_trace_probe_callback(ops, hash, glob, cmd, | ||
| 477 | "1", enable); | ||
| 478 | } | ||
| 479 | |||
| 480 | static int | ||
| 481 | ftrace_cpudump_callback(struct ftrace_hash *hash, | ||
| 482 | char *glob, char *cmd, char *param, int enable) | ||
| 483 | { | ||
| 484 | struct ftrace_probe_ops *ops; | ||
| 485 | |||
| 486 | ops = &cpudump_probe_ops; | ||
| 487 | |||
| 488 | /* Only dump once. */ | ||
| 489 | return ftrace_trace_probe_callback(ops, hash, glob, cmd, | ||
| 490 | "1", enable); | ||
| 491 | } | ||
| 492 | |||
| 428 | static struct ftrace_func_command ftrace_traceon_cmd = { | 493 | static struct ftrace_func_command ftrace_traceon_cmd = { |
| 429 | .name = "traceon", | 494 | .name = "traceon", |
| 430 | .func = ftrace_trace_onoff_callback, | 495 | .func = ftrace_trace_onoff_callback, |
| @@ -440,6 +505,16 @@ static struct ftrace_func_command ftrace_stacktrace_cmd = { | |||
| 440 | .func = ftrace_stacktrace_callback, | 505 | .func = ftrace_stacktrace_callback, |
| 441 | }; | 506 | }; |
| 442 | 507 | ||
| 508 | static struct ftrace_func_command ftrace_dump_cmd = { | ||
| 509 | .name = "dump", | ||
| 510 | .func = ftrace_dump_callback, | ||
| 511 | }; | ||
| 512 | |||
| 513 | static struct ftrace_func_command ftrace_cpudump_cmd = { | ||
| 514 | .name = "cpudump", | ||
| 515 | .func = ftrace_cpudump_callback, | ||
| 516 | }; | ||
| 517 | |||
| 443 | static int __init init_func_cmd_traceon(void) | 518 | static int __init init_func_cmd_traceon(void) |
| 444 | { | 519 | { |
| 445 | int ret; | 520 | int ret; |
| @@ -450,13 +525,31 @@ static int __init init_func_cmd_traceon(void) | |||
| 450 | 525 | ||
| 451 | ret = register_ftrace_command(&ftrace_traceon_cmd); | 526 | ret = register_ftrace_command(&ftrace_traceon_cmd); |
| 452 | if (ret) | 527 | if (ret) |
| 453 | unregister_ftrace_command(&ftrace_traceoff_cmd); | 528 | goto out_free_traceoff; |
| 454 | 529 | ||
| 455 | ret = register_ftrace_command(&ftrace_stacktrace_cmd); | 530 | ret = register_ftrace_command(&ftrace_stacktrace_cmd); |
| 456 | if (ret) { | 531 | if (ret) |
| 457 | unregister_ftrace_command(&ftrace_traceoff_cmd); | 532 | goto out_free_traceon; |
| 458 | unregister_ftrace_command(&ftrace_traceon_cmd); | 533 | |
| 459 | } | 534 | ret = register_ftrace_command(&ftrace_dump_cmd); |
| 535 | if (ret) | ||
| 536 | goto out_free_stacktrace; | ||
| 537 | |||
| 538 | ret = register_ftrace_command(&ftrace_cpudump_cmd); | ||
| 539 | if (ret) | ||
| 540 | goto out_free_dump; | ||
| 541 | |||
| 542 | return 0; | ||
| 543 | |||
| 544 | out_free_dump: | ||
| 545 | unregister_ftrace_command(&ftrace_dump_cmd); | ||
| 546 | out_free_stacktrace: | ||
| 547 | unregister_ftrace_command(&ftrace_stacktrace_cmd); | ||
| 548 | out_free_traceon: | ||
| 549 | unregister_ftrace_command(&ftrace_traceon_cmd); | ||
| 550 | out_free_traceoff: | ||
| 551 | unregister_ftrace_command(&ftrace_traceoff_cmd); | ||
| 552 | |||
| 460 | return ret; | 553 | return ret; |
| 461 | } | 554 | } |
| 462 | #else | 555 | #else |
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 8388bc99f2ee..b5c09242683d 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c | |||
| @@ -446,7 +446,7 @@ print_graph_proc(struct trace_seq *s, pid_t pid) | |||
| 446 | 446 | ||
| 447 | /* First spaces to align center */ | 447 | /* First spaces to align center */ |
| 448 | for (i = 0; i < spaces / 2; i++) { | 448 | for (i = 0; i < spaces / 2; i++) { |
| 449 | ret = trace_seq_printf(s, " "); | 449 | ret = trace_seq_putc(s, ' '); |
| 450 | if (!ret) | 450 | if (!ret) |
| 451 | return TRACE_TYPE_PARTIAL_LINE; | 451 | return TRACE_TYPE_PARTIAL_LINE; |
| 452 | } | 452 | } |
| @@ -457,7 +457,7 @@ print_graph_proc(struct trace_seq *s, pid_t pid) | |||
| 457 | 457 | ||
| 458 | /* Last spaces to align center */ | 458 | /* Last spaces to align center */ |
| 459 | for (i = 0; i < spaces - (spaces / 2); i++) { | 459 | for (i = 0; i < spaces - (spaces / 2); i++) { |
| 460 | ret = trace_seq_printf(s, " "); | 460 | ret = trace_seq_putc(s, ' '); |
| 461 | if (!ret) | 461 | if (!ret) |
| 462 | return TRACE_TYPE_PARTIAL_LINE; | 462 | return TRACE_TYPE_PARTIAL_LINE; |
| 463 | } | 463 | } |
| @@ -503,7 +503,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data) | |||
| 503 | ------------------------------------------ | 503 | ------------------------------------------ |
| 504 | 504 | ||
| 505 | */ | 505 | */ |
| 506 | ret = trace_seq_printf(s, | 506 | ret = trace_seq_puts(s, |
| 507 | " ------------------------------------------\n"); | 507 | " ------------------------------------------\n"); |
| 508 | if (!ret) | 508 | if (!ret) |
| 509 | return TRACE_TYPE_PARTIAL_LINE; | 509 | return TRACE_TYPE_PARTIAL_LINE; |
| @@ -516,7 +516,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data) | |||
| 516 | if (ret == TRACE_TYPE_PARTIAL_LINE) | 516 | if (ret == TRACE_TYPE_PARTIAL_LINE) |
| 517 | return TRACE_TYPE_PARTIAL_LINE; | 517 | return TRACE_TYPE_PARTIAL_LINE; |
| 518 | 518 | ||
| 519 | ret = trace_seq_printf(s, " => "); | 519 | ret = trace_seq_puts(s, " => "); |
| 520 | if (!ret) | 520 | if (!ret) |
| 521 | return TRACE_TYPE_PARTIAL_LINE; | 521 | return TRACE_TYPE_PARTIAL_LINE; |
| 522 | 522 | ||
| @@ -524,7 +524,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data) | |||
| 524 | if (ret == TRACE_TYPE_PARTIAL_LINE) | 524 | if (ret == TRACE_TYPE_PARTIAL_LINE) |
| 525 | return TRACE_TYPE_PARTIAL_LINE; | 525 | return TRACE_TYPE_PARTIAL_LINE; |
| 526 | 526 | ||
| 527 | ret = trace_seq_printf(s, | 527 | ret = trace_seq_puts(s, |
| 528 | "\n ------------------------------------------\n\n"); | 528 | "\n ------------------------------------------\n\n"); |
| 529 | if (!ret) | 529 | if (!ret) |
| 530 | return TRACE_TYPE_PARTIAL_LINE; | 530 | return TRACE_TYPE_PARTIAL_LINE; |
| @@ -645,7 +645,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, | |||
| 645 | ret = print_graph_proc(s, pid); | 645 | ret = print_graph_proc(s, pid); |
| 646 | if (ret == TRACE_TYPE_PARTIAL_LINE) | 646 | if (ret == TRACE_TYPE_PARTIAL_LINE) |
| 647 | return TRACE_TYPE_PARTIAL_LINE; | 647 | return TRACE_TYPE_PARTIAL_LINE; |
| 648 | ret = trace_seq_printf(s, " | "); | 648 | ret = trace_seq_puts(s, " | "); |
| 649 | if (!ret) | 649 | if (!ret) |
| 650 | return TRACE_TYPE_PARTIAL_LINE; | 650 | return TRACE_TYPE_PARTIAL_LINE; |
| 651 | } | 651 | } |
| @@ -657,9 +657,9 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, | |||
| 657 | return ret; | 657 | return ret; |
| 658 | 658 | ||
| 659 | if (type == TRACE_GRAPH_ENT) | 659 | if (type == TRACE_GRAPH_ENT) |
| 660 | ret = trace_seq_printf(s, "==========>"); | 660 | ret = trace_seq_puts(s, "==========>"); |
| 661 | else | 661 | else |
| 662 | ret = trace_seq_printf(s, "<=========="); | 662 | ret = trace_seq_puts(s, "<=========="); |
| 663 | 663 | ||
| 664 | if (!ret) | 664 | if (!ret) |
| 665 | return TRACE_TYPE_PARTIAL_LINE; | 665 | return TRACE_TYPE_PARTIAL_LINE; |
| @@ -668,7 +668,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, | |||
| 668 | if (ret != TRACE_TYPE_HANDLED) | 668 | if (ret != TRACE_TYPE_HANDLED) |
| 669 | return ret; | 669 | return ret; |
| 670 | 670 | ||
| 671 | ret = trace_seq_printf(s, "\n"); | 671 | ret = trace_seq_putc(s, '\n'); |
| 672 | 672 | ||
| 673 | if (!ret) | 673 | if (!ret) |
| 674 | return TRACE_TYPE_PARTIAL_LINE; | 674 | return TRACE_TYPE_PARTIAL_LINE; |
| @@ -705,13 +705,13 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s) | |||
| 705 | len += strlen(nsecs_str); | 705 | len += strlen(nsecs_str); |
| 706 | } | 706 | } |
| 707 | 707 | ||
| 708 | ret = trace_seq_printf(s, " us "); | 708 | ret = trace_seq_puts(s, " us "); |
| 709 | if (!ret) | 709 | if (!ret) |
| 710 | return TRACE_TYPE_PARTIAL_LINE; | 710 | return TRACE_TYPE_PARTIAL_LINE; |
| 711 | 711 | ||
| 712 | /* Print remaining spaces to fit the row's width */ | 712 | /* Print remaining spaces to fit the row's width */ |
| 713 | for (i = len; i < 7; i++) { | 713 | for (i = len; i < 7; i++) { |
| 714 | ret = trace_seq_printf(s, " "); | 714 | ret = trace_seq_putc(s, ' '); |
| 715 | if (!ret) | 715 | if (!ret) |
| 716 | return TRACE_TYPE_PARTIAL_LINE; | 716 | return TRACE_TYPE_PARTIAL_LINE; |
| 717 | } | 717 | } |
| @@ -731,13 +731,13 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s, | |||
| 731 | /* No real adata, just filling the column with spaces */ | 731 | /* No real adata, just filling the column with spaces */ |
| 732 | switch (duration) { | 732 | switch (duration) { |
| 733 | case DURATION_FILL_FULL: | 733 | case DURATION_FILL_FULL: |
| 734 | ret = trace_seq_printf(s, " | "); | 734 | ret = trace_seq_puts(s, " | "); |
| 735 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; | 735 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; |
| 736 | case DURATION_FILL_START: | 736 | case DURATION_FILL_START: |
| 737 | ret = trace_seq_printf(s, " "); | 737 | ret = trace_seq_puts(s, " "); |
| 738 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; | 738 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; |
| 739 | case DURATION_FILL_END: | 739 | case DURATION_FILL_END: |
| 740 | ret = trace_seq_printf(s, " |"); | 740 | ret = trace_seq_puts(s, " |"); |
| 741 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; | 741 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; |
| 742 | } | 742 | } |
| 743 | 743 | ||
| @@ -745,10 +745,10 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s, | |||
| 745 | if (flags & TRACE_GRAPH_PRINT_OVERHEAD) { | 745 | if (flags & TRACE_GRAPH_PRINT_OVERHEAD) { |
| 746 | /* Duration exceeded 100 msecs */ | 746 | /* Duration exceeded 100 msecs */ |
| 747 | if (duration > 100000ULL) | 747 | if (duration > 100000ULL) |
| 748 | ret = trace_seq_printf(s, "! "); | 748 | ret = trace_seq_puts(s, "! "); |
| 749 | /* Duration exceeded 10 msecs */ | 749 | /* Duration exceeded 10 msecs */ |
| 750 | else if (duration > 10000ULL) | 750 | else if (duration > 10000ULL) |
| 751 | ret = trace_seq_printf(s, "+ "); | 751 | ret = trace_seq_puts(s, "+ "); |
| 752 | } | 752 | } |
| 753 | 753 | ||
| 754 | /* | 754 | /* |
| @@ -757,7 +757,7 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s, | |||
| 757 | * to fill out the space. | 757 | * to fill out the space. |
| 758 | */ | 758 | */ |
| 759 | if (ret == -1) | 759 | if (ret == -1) |
| 760 | ret = trace_seq_printf(s, " "); | 760 | ret = trace_seq_puts(s, " "); |
| 761 | 761 | ||
| 762 | /* Catching here any failure happenned above */ | 762 | /* Catching here any failure happenned above */ |
| 763 | if (!ret) | 763 | if (!ret) |
| @@ -767,7 +767,7 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s, | |||
| 767 | if (ret != TRACE_TYPE_HANDLED) | 767 | if (ret != TRACE_TYPE_HANDLED) |
| 768 | return ret; | 768 | return ret; |
| 769 | 769 | ||
| 770 | ret = trace_seq_printf(s, "| "); | 770 | ret = trace_seq_puts(s, "| "); |
| 771 | if (!ret) | 771 | if (!ret) |
| 772 | return TRACE_TYPE_PARTIAL_LINE; | 772 | return TRACE_TYPE_PARTIAL_LINE; |
| 773 | 773 | ||
| @@ -817,7 +817,7 @@ print_graph_entry_leaf(struct trace_iterator *iter, | |||
| 817 | 817 | ||
| 818 | /* Function */ | 818 | /* Function */ |
| 819 | for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { | 819 | for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { |
| 820 | ret = trace_seq_printf(s, " "); | 820 | ret = trace_seq_putc(s, ' '); |
| 821 | if (!ret) | 821 | if (!ret) |
| 822 | return TRACE_TYPE_PARTIAL_LINE; | 822 | return TRACE_TYPE_PARTIAL_LINE; |
| 823 | } | 823 | } |
| @@ -858,7 +858,7 @@ print_graph_entry_nested(struct trace_iterator *iter, | |||
| 858 | 858 | ||
| 859 | /* Function */ | 859 | /* Function */ |
| 860 | for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { | 860 | for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { |
| 861 | ret = trace_seq_printf(s, " "); | 861 | ret = trace_seq_putc(s, ' '); |
| 862 | if (!ret) | 862 | if (!ret) |
| 863 | return TRACE_TYPE_PARTIAL_LINE; | 863 | return TRACE_TYPE_PARTIAL_LINE; |
| 864 | } | 864 | } |
| @@ -917,7 +917,7 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, | |||
| 917 | if (ret == TRACE_TYPE_PARTIAL_LINE) | 917 | if (ret == TRACE_TYPE_PARTIAL_LINE) |
| 918 | return TRACE_TYPE_PARTIAL_LINE; | 918 | return TRACE_TYPE_PARTIAL_LINE; |
| 919 | 919 | ||
| 920 | ret = trace_seq_printf(s, " | "); | 920 | ret = trace_seq_puts(s, " | "); |
| 921 | if (!ret) | 921 | if (!ret) |
| 922 | return TRACE_TYPE_PARTIAL_LINE; | 922 | return TRACE_TYPE_PARTIAL_LINE; |
| 923 | } | 923 | } |
| @@ -1117,7 +1117,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, | |||
| 1117 | 1117 | ||
| 1118 | /* Closing brace */ | 1118 | /* Closing brace */ |
| 1119 | for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) { | 1119 | for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) { |
| 1120 | ret = trace_seq_printf(s, " "); | 1120 | ret = trace_seq_putc(s, ' '); |
| 1121 | if (!ret) | 1121 | if (!ret) |
| 1122 | return TRACE_TYPE_PARTIAL_LINE; | 1122 | return TRACE_TYPE_PARTIAL_LINE; |
| 1123 | } | 1123 | } |
| @@ -1129,7 +1129,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, | |||
| 1129 | * belongs to, write out the function name. | 1129 | * belongs to, write out the function name. |
| 1130 | */ | 1130 | */ |
| 1131 | if (func_match) { | 1131 | if (func_match) { |
| 1132 | ret = trace_seq_printf(s, "}\n"); | 1132 | ret = trace_seq_puts(s, "}\n"); |
| 1133 | if (!ret) | 1133 | if (!ret) |
| 1134 | return TRACE_TYPE_PARTIAL_LINE; | 1134 | return TRACE_TYPE_PARTIAL_LINE; |
| 1135 | } else { | 1135 | } else { |
| @@ -1179,13 +1179,13 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, | |||
| 1179 | /* Indentation */ | 1179 | /* Indentation */ |
| 1180 | if (depth > 0) | 1180 | if (depth > 0) |
| 1181 | for (i = 0; i < (depth + 1) * TRACE_GRAPH_INDENT; i++) { | 1181 | for (i = 0; i < (depth + 1) * TRACE_GRAPH_INDENT; i++) { |
| 1182 | ret = trace_seq_printf(s, " "); | 1182 | ret = trace_seq_putc(s, ' '); |
| 1183 | if (!ret) | 1183 | if (!ret) |
| 1184 | return TRACE_TYPE_PARTIAL_LINE; | 1184 | return TRACE_TYPE_PARTIAL_LINE; |
| 1185 | } | 1185 | } |
| 1186 | 1186 | ||
| 1187 | /* The comment */ | 1187 | /* The comment */ |
| 1188 | ret = trace_seq_printf(s, "/* "); | 1188 | ret = trace_seq_puts(s, "/* "); |
| 1189 | if (!ret) | 1189 | if (!ret) |
| 1190 | return TRACE_TYPE_PARTIAL_LINE; | 1190 | return TRACE_TYPE_PARTIAL_LINE; |
| 1191 | 1191 | ||
| @@ -1216,7 +1216,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, | |||
| 1216 | s->len--; | 1216 | s->len--; |
| 1217 | } | 1217 | } |
| 1218 | 1218 | ||
| 1219 | ret = trace_seq_printf(s, " */\n"); | 1219 | ret = trace_seq_puts(s, " */\n"); |
| 1220 | if (!ret) | 1220 | if (!ret) |
| 1221 | return TRACE_TYPE_PARTIAL_LINE; | 1221 | return TRACE_TYPE_PARTIAL_LINE; |
| 1222 | 1222 | ||
| @@ -1448,7 +1448,7 @@ static struct trace_event graph_trace_ret_event = { | |||
| 1448 | .funcs = &graph_functions | 1448 | .funcs = &graph_functions |
| 1449 | }; | 1449 | }; |
| 1450 | 1450 | ||
| 1451 | static struct tracer graph_trace __read_mostly = { | 1451 | static struct tracer graph_trace __tracer_data = { |
| 1452 | .name = "function_graph", | 1452 | .name = "function_graph", |
| 1453 | .open = graph_trace_open, | 1453 | .open = graph_trace_open, |
| 1454 | .pipe_open = graph_trace_open, | 1454 | .pipe_open = graph_trace_open, |
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index b19d065a28cb..2aefbee93a6d 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c | |||
| @@ -373,7 +373,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) | |||
| 373 | struct trace_array_cpu *data; | 373 | struct trace_array_cpu *data; |
| 374 | unsigned long flags; | 374 | unsigned long flags; |
| 375 | 375 | ||
| 376 | if (likely(!tracer_enabled)) | 376 | if (!tracer_enabled || !tracing_is_enabled()) |
| 377 | return; | 377 | return; |
| 378 | 378 | ||
| 379 | cpu = raw_smp_processor_id(); | 379 | cpu = raw_smp_processor_id(); |
| @@ -416,7 +416,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) | |||
| 416 | else | 416 | else |
| 417 | return; | 417 | return; |
| 418 | 418 | ||
| 419 | if (!tracer_enabled) | 419 | if (!tracer_enabled || !tracing_is_enabled()) |
| 420 | return; | 420 | return; |
| 421 | 421 | ||
| 422 | data = per_cpu_ptr(tr->trace_buffer.data, cpu); | 422 | data = per_cpu_ptr(tr->trace_buffer.data, cpu); |
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 9f46e98ba8f2..3811487e7a7a 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
| @@ -35,12 +35,17 @@ struct trace_probe { | |||
| 35 | const char *symbol; /* symbol name */ | 35 | const char *symbol; /* symbol name */ |
| 36 | struct ftrace_event_class class; | 36 | struct ftrace_event_class class; |
| 37 | struct ftrace_event_call call; | 37 | struct ftrace_event_call call; |
| 38 | struct ftrace_event_file * __rcu *files; | 38 | struct list_head files; |
| 39 | ssize_t size; /* trace entry size */ | 39 | ssize_t size; /* trace entry size */ |
| 40 | unsigned int nr_args; | 40 | unsigned int nr_args; |
| 41 | struct probe_arg args[]; | 41 | struct probe_arg args[]; |
| 42 | }; | 42 | }; |
| 43 | 43 | ||
| 44 | struct event_file_link { | ||
| 45 | struct ftrace_event_file *file; | ||
| 46 | struct list_head list; | ||
| 47 | }; | ||
| 48 | |||
| 44 | #define SIZEOF_TRACE_PROBE(n) \ | 49 | #define SIZEOF_TRACE_PROBE(n) \ |
| 45 | (offsetof(struct trace_probe, args) + \ | 50 | (offsetof(struct trace_probe, args) + \ |
| 46 | (sizeof(struct probe_arg) * (n))) | 51 | (sizeof(struct probe_arg) * (n))) |
| @@ -150,6 +155,7 @@ static struct trace_probe *alloc_trace_probe(const char *group, | |||
| 150 | goto error; | 155 | goto error; |
| 151 | 156 | ||
| 152 | INIT_LIST_HEAD(&tp->list); | 157 | INIT_LIST_HEAD(&tp->list); |
| 158 | INIT_LIST_HEAD(&tp->files); | ||
| 153 | return tp; | 159 | return tp; |
| 154 | error: | 160 | error: |
| 155 | kfree(tp->call.name); | 161 | kfree(tp->call.name); |
| @@ -183,25 +189,6 @@ static struct trace_probe *find_trace_probe(const char *event, | |||
| 183 | return NULL; | 189 | return NULL; |
| 184 | } | 190 | } |
| 185 | 191 | ||
| 186 | static int trace_probe_nr_files(struct trace_probe *tp) | ||
| 187 | { | ||
| 188 | struct ftrace_event_file **file; | ||
| 189 | int ret = 0; | ||
| 190 | |||
| 191 | /* | ||
| 192 | * Since all tp->files updater is protected by probe_enable_lock, | ||
| 193 | * we don't need to lock an rcu_read_lock. | ||
| 194 | */ | ||
| 195 | file = rcu_dereference_raw(tp->files); | ||
| 196 | if (file) | ||
| 197 | while (*(file++)) | ||
| 198 | ret++; | ||
| 199 | |||
| 200 | return ret; | ||
| 201 | } | ||
| 202 | |||
| 203 | static DEFINE_MUTEX(probe_enable_lock); | ||
| 204 | |||
| 205 | /* | 192 | /* |
| 206 | * Enable trace_probe | 193 | * Enable trace_probe |
| 207 | * if the file is NULL, enable "perf" handler, or enable "trace" handler. | 194 | * if the file is NULL, enable "perf" handler, or enable "trace" handler. |
| @@ -211,67 +198,42 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) | |||
| 211 | { | 198 | { |
| 212 | int ret = 0; | 199 | int ret = 0; |
| 213 | 200 | ||
| 214 | mutex_lock(&probe_enable_lock); | ||
| 215 | |||
| 216 | if (file) { | 201 | if (file) { |
| 217 | struct ftrace_event_file **new, **old; | 202 | struct event_file_link *link; |
| 218 | int n = trace_probe_nr_files(tp); | 203 | |
| 219 | 204 | link = kmalloc(sizeof(*link), GFP_KERNEL); | |
| 220 | old = rcu_dereference_raw(tp->files); | 205 | if (!link) { |
| 221 | /* 1 is for new one and 1 is for stopper */ | ||
| 222 | new = kzalloc((n + 2) * sizeof(struct ftrace_event_file *), | ||
| 223 | GFP_KERNEL); | ||
| 224 | if (!new) { | ||
| 225 | ret = -ENOMEM; | 206 | ret = -ENOMEM; |
| 226 | goto out_unlock; | 207 | goto out; |
| 227 | } | 208 | } |
| 228 | memcpy(new, old, n * sizeof(struct ftrace_event_file *)); | ||
| 229 | new[n] = file; | ||
| 230 | /* The last one keeps a NULL */ | ||
| 231 | 209 | ||
| 232 | rcu_assign_pointer(tp->files, new); | 210 | link->file = file; |
| 233 | tp->flags |= TP_FLAG_TRACE; | 211 | list_add_tail_rcu(&link->list, &tp->files); |
| 234 | 212 | ||
| 235 | if (old) { | 213 | tp->flags |= TP_FLAG_TRACE; |
| 236 | /* Make sure the probe is done with old files */ | ||
| 237 | synchronize_sched(); | ||
| 238 | kfree(old); | ||
| 239 | } | ||
| 240 | } else | 214 | } else |
| 241 | tp->flags |= TP_FLAG_PROFILE; | 215 | tp->flags |= TP_FLAG_PROFILE; |
| 242 | 216 | ||
| 243 | if (trace_probe_is_enabled(tp) && trace_probe_is_registered(tp) && | 217 | if (trace_probe_is_registered(tp) && !trace_probe_has_gone(tp)) { |
| 244 | !trace_probe_has_gone(tp)) { | ||
| 245 | if (trace_probe_is_return(tp)) | 218 | if (trace_probe_is_return(tp)) |
| 246 | ret = enable_kretprobe(&tp->rp); | 219 | ret = enable_kretprobe(&tp->rp); |
| 247 | else | 220 | else |
| 248 | ret = enable_kprobe(&tp->rp.kp); | 221 | ret = enable_kprobe(&tp->rp.kp); |
| 249 | } | 222 | } |
| 250 | 223 | out: | |
| 251 | out_unlock: | ||
| 252 | mutex_unlock(&probe_enable_lock); | ||
| 253 | |||
| 254 | return ret; | 224 | return ret; |
| 255 | } | 225 | } |
| 256 | 226 | ||
| 257 | static int | 227 | static struct event_file_link * |
| 258 | trace_probe_file_index(struct trace_probe *tp, struct ftrace_event_file *file) | 228 | find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file) |
| 259 | { | 229 | { |
| 260 | struct ftrace_event_file **files; | 230 | struct event_file_link *link; |
| 261 | int i; | ||
| 262 | 231 | ||
| 263 | /* | 232 | list_for_each_entry(link, &tp->files, list) |
| 264 | * Since all tp->files updater is protected by probe_enable_lock, | 233 | if (link->file == file) |
| 265 | * we don't need to lock an rcu_read_lock. | 234 | return link; |
| 266 | */ | ||
| 267 | files = rcu_dereference_raw(tp->files); | ||
| 268 | if (files) { | ||
| 269 | for (i = 0; files[i]; i++) | ||
| 270 | if (files[i] == file) | ||
| 271 | return i; | ||
| 272 | } | ||
| 273 | 235 | ||
| 274 | return -1; | 236 | return NULL; |
| 275 | } | 237 | } |
| 276 | 238 | ||
| 277 | /* | 239 | /* |
| @@ -281,43 +243,23 @@ trace_probe_file_index(struct trace_probe *tp, struct ftrace_event_file *file) | |||
| 281 | static int | 243 | static int |
| 282 | disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) | 244 | disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) |
| 283 | { | 245 | { |
| 246 | struct event_file_link *link = NULL; | ||
| 247 | int wait = 0; | ||
| 284 | int ret = 0; | 248 | int ret = 0; |
| 285 | 249 | ||
| 286 | mutex_lock(&probe_enable_lock); | ||
| 287 | |||
| 288 | if (file) { | 250 | if (file) { |
| 289 | struct ftrace_event_file **new, **old; | 251 | link = find_event_file_link(tp, file); |
| 290 | int n = trace_probe_nr_files(tp); | 252 | if (!link) { |
| 291 | int i, j; | ||
| 292 | |||
| 293 | old = rcu_dereference_raw(tp->files); | ||
| 294 | if (n == 0 || trace_probe_file_index(tp, file) < 0) { | ||
| 295 | ret = -EINVAL; | 253 | ret = -EINVAL; |
| 296 | goto out_unlock; | 254 | goto out; |
| 297 | } | ||
| 298 | |||
| 299 | if (n == 1) { /* Remove the last file */ | ||
| 300 | tp->flags &= ~TP_FLAG_TRACE; | ||
| 301 | new = NULL; | ||
| 302 | } else { | ||
| 303 | new = kzalloc(n * sizeof(struct ftrace_event_file *), | ||
| 304 | GFP_KERNEL); | ||
| 305 | if (!new) { | ||
| 306 | ret = -ENOMEM; | ||
| 307 | goto out_unlock; | ||
| 308 | } | ||
| 309 | |||
| 310 | /* This copy & check loop copies the NULL stopper too */ | ||
| 311 | for (i = 0, j = 0; j < n && i < n + 1; i++) | ||
| 312 | if (old[i] != file) | ||
| 313 | new[j++] = old[i]; | ||
| 314 | } | 255 | } |
| 315 | 256 | ||
| 316 | rcu_assign_pointer(tp->files, new); | 257 | list_del_rcu(&link->list); |
| 258 | wait = 1; | ||
| 259 | if (!list_empty(&tp->files)) | ||
| 260 | goto out; | ||
| 317 | 261 | ||
| 318 | /* Make sure the probe is done with old files */ | 262 | tp->flags &= ~TP_FLAG_TRACE; |
| 319 | synchronize_sched(); | ||
| 320 | kfree(old); | ||
| 321 | } else | 263 | } else |
| 322 | tp->flags &= ~TP_FLAG_PROFILE; | 264 | tp->flags &= ~TP_FLAG_PROFILE; |
| 323 | 265 | ||
| @@ -326,10 +268,21 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) | |||
| 326 | disable_kretprobe(&tp->rp); | 268 | disable_kretprobe(&tp->rp); |
| 327 | else | 269 | else |
| 328 | disable_kprobe(&tp->rp.kp); | 270 | disable_kprobe(&tp->rp.kp); |
| 271 | wait = 1; | ||
| 272 | } | ||
| 273 | out: | ||
| 274 | if (wait) { | ||
| 275 | /* | ||
| 276 | * Synchronize with kprobe_trace_func/kretprobe_trace_func | ||
| 277 | * to ensure disabled (all running handlers are finished). | ||
| 278 | * This is not only for kfree(), but also the caller, | ||
| 279 | * trace_remove_event_call() supposes it for releasing | ||
| 280 | * event_call related objects, which will be accessed in | ||
| 281 | * the kprobe_trace_func/kretprobe_trace_func. | ||
| 282 | */ | ||
| 283 | synchronize_sched(); | ||
| 284 | kfree(link); /* Ignored if link == NULL */ | ||
| 329 | } | 285 | } |
| 330 | |||
| 331 | out_unlock: | ||
| 332 | mutex_unlock(&probe_enable_lock); | ||
| 333 | 286 | ||
| 334 | return ret; | 287 | return ret; |
| 335 | } | 288 | } |
| @@ -885,20 +838,10 @@ __kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs, | |||
| 885 | static __kprobes void | 838 | static __kprobes void |
| 886 | kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs) | 839 | kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs) |
| 887 | { | 840 | { |
| 888 | /* | 841 | struct event_file_link *link; |
| 889 | * Note: preempt is already disabled around the kprobe handler. | ||
| 890 | * However, we still need an smp_read_barrier_depends() corresponding | ||
| 891 | * to smp_wmb() in rcu_assign_pointer() to access the pointer. | ||
| 892 | */ | ||
| 893 | struct ftrace_event_file **file = rcu_dereference_raw(tp->files); | ||
| 894 | |||
| 895 | if (unlikely(!file)) | ||
| 896 | return; | ||
| 897 | 842 | ||
| 898 | while (*file) { | 843 | list_for_each_entry_rcu(link, &tp->files, list) |
| 899 | __kprobe_trace_func(tp, regs, *file); | 844 | __kprobe_trace_func(tp, regs, link->file); |
| 900 | file++; | ||
| 901 | } | ||
| 902 | } | 845 | } |
| 903 | 846 | ||
| 904 | /* Kretprobe handler */ | 847 | /* Kretprobe handler */ |
| @@ -945,20 +888,10 @@ static __kprobes void | |||
| 945 | kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri, | 888 | kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri, |
| 946 | struct pt_regs *regs) | 889 | struct pt_regs *regs) |
| 947 | { | 890 | { |
| 948 | /* | 891 | struct event_file_link *link; |
| 949 | * Note: preempt is already disabled around the kprobe handler. | ||
| 950 | * However, we still need an smp_read_barrier_depends() corresponding | ||
| 951 | * to smp_wmb() in rcu_assign_pointer() to access the pointer. | ||
| 952 | */ | ||
| 953 | struct ftrace_event_file **file = rcu_dereference_raw(tp->files); | ||
| 954 | |||
| 955 | if (unlikely(!file)) | ||
| 956 | return; | ||
| 957 | 892 | ||
| 958 | while (*file) { | 893 | list_for_each_entry_rcu(link, &tp->files, list) |
| 959 | __kretprobe_trace_func(tp, ri, regs, *file); | 894 | __kretprobe_trace_func(tp, ri, regs, link->file); |
| 960 | file++; | ||
| 961 | } | ||
| 962 | } | 895 | } |
| 963 | 896 | ||
| 964 | /* Event entry printers */ | 897 | /* Event entry printers */ |
| @@ -1157,13 +1090,14 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs) | |||
| 1157 | int size, __size, dsize; | 1090 | int size, __size, dsize; |
| 1158 | int rctx; | 1091 | int rctx; |
| 1159 | 1092 | ||
| 1093 | head = this_cpu_ptr(call->perf_events); | ||
| 1094 | if (hlist_empty(head)) | ||
| 1095 | return; | ||
| 1096 | |||
| 1160 | dsize = __get_data_size(tp, regs); | 1097 | dsize = __get_data_size(tp, regs); |
| 1161 | __size = sizeof(*entry) + tp->size + dsize; | 1098 | __size = sizeof(*entry) + tp->size + dsize; |
| 1162 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); | 1099 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); |
| 1163 | size -= sizeof(u32); | 1100 | size -= sizeof(u32); |
| 1164 | if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, | ||
| 1165 | "profile buffer not large enough")) | ||
| 1166 | return; | ||
| 1167 | 1101 | ||
| 1168 | entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); | 1102 | entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); |
| 1169 | if (!entry) | 1103 | if (!entry) |
| @@ -1172,10 +1106,7 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs) | |||
| 1172 | entry->ip = (unsigned long)tp->rp.kp.addr; | 1106 | entry->ip = (unsigned long)tp->rp.kp.addr; |
| 1173 | memset(&entry[1], 0, dsize); | 1107 | memset(&entry[1], 0, dsize); |
| 1174 | store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); | 1108 | store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); |
| 1175 | 1109 | perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); | |
| 1176 | head = this_cpu_ptr(call->perf_events); | ||
| 1177 | perf_trace_buf_submit(entry, size, rctx, | ||
| 1178 | entry->ip, 1, regs, head, NULL); | ||
| 1179 | } | 1110 | } |
| 1180 | 1111 | ||
| 1181 | /* Kretprobe profile handler */ | 1112 | /* Kretprobe profile handler */ |
| @@ -1189,13 +1120,14 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri, | |||
| 1189 | int size, __size, dsize; | 1120 | int size, __size, dsize; |
| 1190 | int rctx; | 1121 | int rctx; |
| 1191 | 1122 | ||
| 1123 | head = this_cpu_ptr(call->perf_events); | ||
| 1124 | if (hlist_empty(head)) | ||
| 1125 | return; | ||
| 1126 | |||
| 1192 | dsize = __get_data_size(tp, regs); | 1127 | dsize = __get_data_size(tp, regs); |
| 1193 | __size = sizeof(*entry) + tp->size + dsize; | 1128 | __size = sizeof(*entry) + tp->size + dsize; |
| 1194 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); | 1129 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); |
| 1195 | size -= sizeof(u32); | 1130 | size -= sizeof(u32); |
| 1196 | if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, | ||
| 1197 | "profile buffer not large enough")) | ||
| 1198 | return; | ||
| 1199 | 1131 | ||
| 1200 | entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); | 1132 | entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); |
| 1201 | if (!entry) | 1133 | if (!entry) |
| @@ -1204,13 +1136,16 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri, | |||
| 1204 | entry->func = (unsigned long)tp->rp.kp.addr; | 1136 | entry->func = (unsigned long)tp->rp.kp.addr; |
| 1205 | entry->ret_ip = (unsigned long)ri->ret_addr; | 1137 | entry->ret_ip = (unsigned long)ri->ret_addr; |
| 1206 | store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); | 1138 | store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); |
| 1207 | 1139 | perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); | |
| 1208 | head = this_cpu_ptr(call->perf_events); | ||
| 1209 | perf_trace_buf_submit(entry, size, rctx, | ||
| 1210 | entry->ret_ip, 1, regs, head, NULL); | ||
| 1211 | } | 1140 | } |
| 1212 | #endif /* CONFIG_PERF_EVENTS */ | 1141 | #endif /* CONFIG_PERF_EVENTS */ |
| 1213 | 1142 | ||
| 1143 | /* | ||
| 1144 | * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex. | ||
| 1145 | * | ||
| 1146 | * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe | ||
| 1147 | * lockless, but we can't race with this __init function. | ||
| 1148 | */ | ||
| 1214 | static __kprobes | 1149 | static __kprobes |
| 1215 | int kprobe_register(struct ftrace_event_call *event, | 1150 | int kprobe_register(struct ftrace_event_call *event, |
| 1216 | enum trace_reg type, void *data) | 1151 | enum trace_reg type, void *data) |
| @@ -1376,6 +1311,10 @@ find_trace_probe_file(struct trace_probe *tp, struct trace_array *tr) | |||
| 1376 | return NULL; | 1311 | return NULL; |
| 1377 | } | 1312 | } |
| 1378 | 1313 | ||
| 1314 | /* | ||
| 1315 | * Nobody but us can call enable_trace_probe/disable_trace_probe at this | ||
| 1316 | * stage, we can do this lockless. | ||
| 1317 | */ | ||
| 1379 | static __init int kprobe_trace_self_tests_init(void) | 1318 | static __init int kprobe_trace_self_tests_init(void) |
| 1380 | { | 1319 | { |
| 1381 | int ret, warn = 0; | 1320 | int ret, warn = 0; |
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index a5e8f4878bfa..b3dcfb2f0fef 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c | |||
| @@ -90,7 +90,7 @@ static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev) | |||
| 90 | if (drv) | 90 | if (drv) |
| 91 | ret += trace_seq_printf(s, " %s\n", drv->name); | 91 | ret += trace_seq_printf(s, " %s\n", drv->name); |
| 92 | else | 92 | else |
| 93 | ret += trace_seq_printf(s, " \n"); | 93 | ret += trace_seq_puts(s, " \n"); |
| 94 | return ret; | 94 | return ret; |
| 95 | } | 95 | } |
| 96 | 96 | ||
| @@ -107,7 +107,7 @@ static void mmio_pipe_open(struct trace_iterator *iter) | |||
| 107 | struct header_iter *hiter; | 107 | struct header_iter *hiter; |
| 108 | struct trace_seq *s = &iter->seq; | 108 | struct trace_seq *s = &iter->seq; |
| 109 | 109 | ||
| 110 | trace_seq_printf(s, "VERSION 20070824\n"); | 110 | trace_seq_puts(s, "VERSION 20070824\n"); |
| 111 | 111 | ||
| 112 | hiter = kzalloc(sizeof(*hiter), GFP_KERNEL); | 112 | hiter = kzalloc(sizeof(*hiter), GFP_KERNEL); |
| 113 | if (!hiter) | 113 | if (!hiter) |
| @@ -209,7 +209,7 @@ static enum print_line_t mmio_print_rw(struct trace_iterator *iter) | |||
| 209 | (rw->value >> 0) & 0xff, rw->pc, 0); | 209 | (rw->value >> 0) & 0xff, rw->pc, 0); |
| 210 | break; | 210 | break; |
| 211 | default: | 211 | default: |
| 212 | ret = trace_seq_printf(s, "rw what?\n"); | 212 | ret = trace_seq_puts(s, "rw what?\n"); |
| 213 | break; | 213 | break; |
| 214 | } | 214 | } |
| 215 | if (ret) | 215 | if (ret) |
| @@ -245,7 +245,7 @@ static enum print_line_t mmio_print_map(struct trace_iterator *iter) | |||
| 245 | secs, usec_rem, m->map_id, 0UL, 0); | 245 | secs, usec_rem, m->map_id, 0UL, 0); |
| 246 | break; | 246 | break; |
| 247 | default: | 247 | default: |
| 248 | ret = trace_seq_printf(s, "map what?\n"); | 248 | ret = trace_seq_puts(s, "map what?\n"); |
| 249 | break; | 249 | break; |
| 250 | } | 250 | } |
| 251 | if (ret) | 251 | if (ret) |
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index bb922d9ee51b..34e7cbac0c9c 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c | |||
| @@ -78,7 +78,7 @@ enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter) | |||
| 78 | 78 | ||
| 79 | trace_assign_type(field, entry); | 79 | trace_assign_type(field, entry); |
| 80 | 80 | ||
| 81 | ret = trace_seq_printf(s, "%s", field->buf); | 81 | ret = trace_seq_puts(s, field->buf); |
| 82 | if (!ret) | 82 | if (!ret) |
| 83 | return TRACE_TYPE_PARTIAL_LINE; | 83 | return TRACE_TYPE_PARTIAL_LINE; |
| 84 | 84 | ||
| @@ -558,14 +558,14 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, | |||
| 558 | if (ret) | 558 | if (ret) |
| 559 | ret = trace_seq_puts(s, "??"); | 559 | ret = trace_seq_puts(s, "??"); |
| 560 | if (ret) | 560 | if (ret) |
| 561 | ret = trace_seq_puts(s, "\n"); | 561 | ret = trace_seq_putc(s, '\n'); |
| 562 | continue; | 562 | continue; |
| 563 | } | 563 | } |
| 564 | if (!ret) | 564 | if (!ret) |
| 565 | break; | 565 | break; |
| 566 | if (ret) | 566 | if (ret) |
| 567 | ret = seq_print_user_ip(s, mm, ip, sym_flags); | 567 | ret = seq_print_user_ip(s, mm, ip, sym_flags); |
| 568 | ret = trace_seq_puts(s, "\n"); | 568 | ret = trace_seq_putc(s, '\n'); |
| 569 | } | 569 | } |
| 570 | 570 | ||
| 571 | if (mm) | 571 | if (mm) |
| @@ -579,7 +579,7 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) | |||
| 579 | int ret; | 579 | int ret; |
| 580 | 580 | ||
| 581 | if (!ip) | 581 | if (!ip) |
| 582 | return trace_seq_printf(s, "0"); | 582 | return trace_seq_putc(s, '0'); |
| 583 | 583 | ||
| 584 | if (sym_flags & TRACE_ITER_SYM_OFFSET) | 584 | if (sym_flags & TRACE_ITER_SYM_OFFSET) |
| 585 | ret = seq_print_sym_offset(s, "%s", ip); | 585 | ret = seq_print_sym_offset(s, "%s", ip); |
| @@ -964,14 +964,14 @@ static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags, | |||
| 964 | goto partial; | 964 | goto partial; |
| 965 | 965 | ||
| 966 | if ((flags & TRACE_ITER_PRINT_PARENT) && field->parent_ip) { | 966 | if ((flags & TRACE_ITER_PRINT_PARENT) && field->parent_ip) { |
| 967 | if (!trace_seq_printf(s, " <-")) | 967 | if (!trace_seq_puts(s, " <-")) |
| 968 | goto partial; | 968 | goto partial; |
| 969 | if (!seq_print_ip_sym(s, | 969 | if (!seq_print_ip_sym(s, |
| 970 | field->parent_ip, | 970 | field->parent_ip, |
| 971 | flags)) | 971 | flags)) |
| 972 | goto partial; | 972 | goto partial; |
| 973 | } | 973 | } |
| 974 | if (!trace_seq_printf(s, "\n")) | 974 | if (!trace_seq_putc(s, '\n')) |
| 975 | goto partial; | 975 | goto partial; |
| 976 | 976 | ||
| 977 | return TRACE_TYPE_HANDLED; | 977 | return TRACE_TYPE_HANDLED; |
| @@ -1210,7 +1210,7 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter, | |||
| 1210 | 1210 | ||
| 1211 | if (!seq_print_ip_sym(s, *p, flags)) | 1211 | if (!seq_print_ip_sym(s, *p, flags)) |
| 1212 | goto partial; | 1212 | goto partial; |
| 1213 | if (!trace_seq_puts(s, "\n")) | 1213 | if (!trace_seq_putc(s, '\n')) |
| 1214 | goto partial; | 1214 | goto partial; |
| 1215 | } | 1215 | } |
| 1216 | 1216 | ||
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 2901e3b88590..a7329b7902f8 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c | |||
| @@ -640,13 +640,20 @@ out: | |||
| 640 | * Enable ftrace, sleep 1/10 second, and then read the trace | 640 | * Enable ftrace, sleep 1/10 second, and then read the trace |
| 641 | * buffer to see if all is in order. | 641 | * buffer to see if all is in order. |
| 642 | */ | 642 | */ |
| 643 | int | 643 | __init int |
| 644 | trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) | 644 | trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) |
| 645 | { | 645 | { |
| 646 | int save_ftrace_enabled = ftrace_enabled; | 646 | int save_ftrace_enabled = ftrace_enabled; |
| 647 | unsigned long count; | 647 | unsigned long count; |
| 648 | int ret; | 648 | int ret; |
| 649 | 649 | ||
| 650 | #ifdef CONFIG_DYNAMIC_FTRACE | ||
| 651 | if (ftrace_filter_param) { | ||
| 652 | printk(KERN_CONT " ... kernel command line filter set: force PASS ... "); | ||
| 653 | return 0; | ||
| 654 | } | ||
| 655 | #endif | ||
| 656 | |||
| 650 | /* make sure msleep has been recorded */ | 657 | /* make sure msleep has been recorded */ |
| 651 | msleep(1); | 658 | msleep(1); |
| 652 | 659 | ||
| @@ -727,13 +734,20 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace) | |||
| 727 | * Pretty much the same than for the function tracer from which the selftest | 734 | * Pretty much the same than for the function tracer from which the selftest |
| 728 | * has been borrowed. | 735 | * has been borrowed. |
| 729 | */ | 736 | */ |
| 730 | int | 737 | __init int |
| 731 | trace_selftest_startup_function_graph(struct tracer *trace, | 738 | trace_selftest_startup_function_graph(struct tracer *trace, |
| 732 | struct trace_array *tr) | 739 | struct trace_array *tr) |
| 733 | { | 740 | { |
| 734 | int ret; | 741 | int ret; |
| 735 | unsigned long count; | 742 | unsigned long count; |
| 736 | 743 | ||
| 744 | #ifdef CONFIG_DYNAMIC_FTRACE | ||
| 745 | if (ftrace_filter_param) { | ||
| 746 | printk(KERN_CONT " ... kernel command line filter set: force PASS ... "); | ||
| 747 | return 0; | ||
| 748 | } | ||
| 749 | #endif | ||
| 750 | |||
| 737 | /* | 751 | /* |
| 738 | * Simulate the init() callback but we attach a watchdog callback | 752 | * Simulate the init() callback but we attach a watchdog callback |
| 739 | * to detect and recover from possible hangs | 753 | * to detect and recover from possible hangs |
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 8f2ac73c7a5f..8fd03657bc7d 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
| @@ -175,7 +175,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags, | |||
| 175 | entry = syscall_nr_to_meta(syscall); | 175 | entry = syscall_nr_to_meta(syscall); |
| 176 | 176 | ||
| 177 | if (!entry) { | 177 | if (!entry) { |
| 178 | trace_seq_printf(s, "\n"); | 178 | trace_seq_putc(s, '\n'); |
| 179 | return TRACE_TYPE_HANDLED; | 179 | return TRACE_TYPE_HANDLED; |
| 180 | } | 180 | } |
| 181 | 181 | ||
| @@ -306,6 +306,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) | |||
| 306 | struct syscall_metadata *sys_data; | 306 | struct syscall_metadata *sys_data; |
| 307 | struct ring_buffer_event *event; | 307 | struct ring_buffer_event *event; |
| 308 | struct ring_buffer *buffer; | 308 | struct ring_buffer *buffer; |
| 309 | unsigned long irq_flags; | ||
| 310 | int pc; | ||
| 309 | int syscall_nr; | 311 | int syscall_nr; |
| 310 | int size; | 312 | int size; |
| 311 | 313 | ||
| @@ -321,9 +323,12 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) | |||
| 321 | 323 | ||
| 322 | size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; | 324 | size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; |
| 323 | 325 | ||
| 326 | local_save_flags(irq_flags); | ||
| 327 | pc = preempt_count(); | ||
| 328 | |||
| 324 | buffer = tr->trace_buffer.buffer; | 329 | buffer = tr->trace_buffer.buffer; |
| 325 | event = trace_buffer_lock_reserve(buffer, | 330 | event = trace_buffer_lock_reserve(buffer, |
| 326 | sys_data->enter_event->event.type, size, 0, 0); | 331 | sys_data->enter_event->event.type, size, irq_flags, pc); |
| 327 | if (!event) | 332 | if (!event) |
| 328 | return; | 333 | return; |
| 329 | 334 | ||
| @@ -333,7 +338,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) | |||
| 333 | 338 | ||
| 334 | if (!filter_current_check_discard(buffer, sys_data->enter_event, | 339 | if (!filter_current_check_discard(buffer, sys_data->enter_event, |
| 335 | entry, event)) | 340 | entry, event)) |
| 336 | trace_current_buffer_unlock_commit(buffer, event, 0, 0); | 341 | trace_current_buffer_unlock_commit(buffer, event, |
| 342 | irq_flags, pc); | ||
| 337 | } | 343 | } |
| 338 | 344 | ||
| 339 | static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) | 345 | static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) |
| @@ -343,6 +349,8 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) | |||
| 343 | struct syscall_metadata *sys_data; | 349 | struct syscall_metadata *sys_data; |
| 344 | struct ring_buffer_event *event; | 350 | struct ring_buffer_event *event; |
| 345 | struct ring_buffer *buffer; | 351 | struct ring_buffer *buffer; |
| 352 | unsigned long irq_flags; | ||
| 353 | int pc; | ||
| 346 | int syscall_nr; | 354 | int syscall_nr; |
| 347 | 355 | ||
| 348 | syscall_nr = trace_get_syscall_nr(current, regs); | 356 | syscall_nr = trace_get_syscall_nr(current, regs); |
| @@ -355,9 +363,13 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) | |||
| 355 | if (!sys_data) | 363 | if (!sys_data) |
| 356 | return; | 364 | return; |
| 357 | 365 | ||
| 366 | local_save_flags(irq_flags); | ||
| 367 | pc = preempt_count(); | ||
| 368 | |||
| 358 | buffer = tr->trace_buffer.buffer; | 369 | buffer = tr->trace_buffer.buffer; |
| 359 | event = trace_buffer_lock_reserve(buffer, | 370 | event = trace_buffer_lock_reserve(buffer, |
| 360 | sys_data->exit_event->event.type, sizeof(*entry), 0, 0); | 371 | sys_data->exit_event->event.type, sizeof(*entry), |
| 372 | irq_flags, pc); | ||
| 361 | if (!event) | 373 | if (!event) |
| 362 | return; | 374 | return; |
| 363 | 375 | ||
| @@ -367,7 +379,8 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) | |||
| 367 | 379 | ||
| 368 | if (!filter_current_check_discard(buffer, sys_data->exit_event, | 380 | if (!filter_current_check_discard(buffer, sys_data->exit_event, |
| 369 | entry, event)) | 381 | entry, event)) |
| 370 | trace_current_buffer_unlock_commit(buffer, event, 0, 0); | 382 | trace_current_buffer_unlock_commit(buffer, event, |
| 383 | irq_flags, pc); | ||
| 371 | } | 384 | } |
| 372 | 385 | ||
| 373 | static int reg_event_syscall_enter(struct ftrace_event_file *file, | 386 | static int reg_event_syscall_enter(struct ftrace_event_file *file, |
| @@ -553,15 +566,15 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) | |||
| 553 | if (!sys_data) | 566 | if (!sys_data) |
| 554 | return; | 567 | return; |
| 555 | 568 | ||
| 569 | head = this_cpu_ptr(sys_data->enter_event->perf_events); | ||
| 570 | if (hlist_empty(head)) | ||
| 571 | return; | ||
| 572 | |||
| 556 | /* get the size after alignment with the u32 buffer size field */ | 573 | /* get the size after alignment with the u32 buffer size field */ |
| 557 | size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec); | 574 | size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec); |
| 558 | size = ALIGN(size + sizeof(u32), sizeof(u64)); | 575 | size = ALIGN(size + sizeof(u32), sizeof(u64)); |
| 559 | size -= sizeof(u32); | 576 | size -= sizeof(u32); |
| 560 | 577 | ||
| 561 | if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, | ||
| 562 | "perf buffer not large enough")) | ||
| 563 | return; | ||
| 564 | |||
| 565 | rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, | 578 | rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, |
| 566 | sys_data->enter_event->event.type, regs, &rctx); | 579 | sys_data->enter_event->event.type, regs, &rctx); |
| 567 | if (!rec) | 580 | if (!rec) |
| @@ -570,8 +583,6 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) | |||
| 570 | rec->nr = syscall_nr; | 583 | rec->nr = syscall_nr; |
| 571 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, | 584 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, |
| 572 | (unsigned long *)&rec->args); | 585 | (unsigned long *)&rec->args); |
| 573 | |||
| 574 | head = this_cpu_ptr(sys_data->enter_event->perf_events); | ||
| 575 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); | 586 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); |
| 576 | } | 587 | } |
| 577 | 588 | ||
| @@ -629,18 +640,14 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) | |||
| 629 | if (!sys_data) | 640 | if (!sys_data) |
| 630 | return; | 641 | return; |
| 631 | 642 | ||
| 643 | head = this_cpu_ptr(sys_data->exit_event->perf_events); | ||
| 644 | if (hlist_empty(head)) | ||
| 645 | return; | ||
| 646 | |||
| 632 | /* We can probably do that at build time */ | 647 | /* We can probably do that at build time */ |
| 633 | size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); | 648 | size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); |
| 634 | size -= sizeof(u32); | 649 | size -= sizeof(u32); |
| 635 | 650 | ||
| 636 | /* | ||
| 637 | * Impossible, but be paranoid with the future | ||
| 638 | * How to put this check outside runtime? | ||
| 639 | */ | ||
| 640 | if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, | ||
| 641 | "exit event has grown above perf buffer size")) | ||
| 642 | return; | ||
| 643 | |||
| 644 | rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, | 651 | rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, |
| 645 | sys_data->exit_event->event.type, regs, &rctx); | 652 | sys_data->exit_event->event.type, regs, &rctx); |
| 646 | if (!rec) | 653 | if (!rec) |
| @@ -648,8 +655,6 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) | |||
| 648 | 655 | ||
| 649 | rec->nr = syscall_nr; | 656 | rec->nr = syscall_nr; |
| 650 | rec->ret = syscall_get_return_value(current, regs); | 657 | rec->ret = syscall_get_return_value(current, regs); |
| 651 | |||
| 652 | head = this_cpu_ptr(sys_data->exit_event->perf_events); | ||
| 653 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); | 658 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); |
| 654 | } | 659 | } |
| 655 | 660 | ||
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 32494fb0ee64..a23d2d71188e 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c | |||
| @@ -283,8 +283,10 @@ static int create_trace_uprobe(int argc, char **argv) | |||
| 283 | return -EINVAL; | 283 | return -EINVAL; |
| 284 | } | 284 | } |
| 285 | arg = strchr(argv[1], ':'); | 285 | arg = strchr(argv[1], ':'); |
| 286 | if (!arg) | 286 | if (!arg) { |
| 287 | ret = -EINVAL; | ||
| 287 | goto fail_address_parse; | 288 | goto fail_address_parse; |
| 289 | } | ||
| 288 | 290 | ||
| 289 | *arg++ = '\0'; | 291 | *arg++ = '\0'; |
| 290 | filename = argv[1]; | 292 | filename = argv[1]; |
| @@ -816,8 +818,6 @@ static void uprobe_perf_print(struct trace_uprobe *tu, | |||
| 816 | 818 | ||
| 817 | size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); | 819 | size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); |
| 818 | size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32); | 820 | size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32); |
| 819 | if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) | ||
| 820 | return; | ||
| 821 | 821 | ||
| 822 | preempt_disable(); | 822 | preempt_disable(); |
| 823 | head = this_cpu_ptr(call->perf_events); | 823 | head = this_cpu_ptr(call->perf_events); |
diff --git a/kernel/wait.c b/kernel/wait.c index ce0daa320a26..dec68bd4e9d8 100644 --- a/kernel/wait.c +++ b/kernel/wait.c | |||
| @@ -333,7 +333,8 @@ int __wait_on_atomic_t(wait_queue_head_t *wq, struct wait_bit_queue *q, | |||
| 333 | prepare_to_wait(wq, &q->wait, mode); | 333 | prepare_to_wait(wq, &q->wait, mode); |
| 334 | val = q->key.flags; | 334 | val = q->key.flags; |
| 335 | if (atomic_read(val) == 0) | 335 | if (atomic_read(val) == 0) |
| 336 | ret = (*action)(val); | 336 | break; |
| 337 | ret = (*action)(val); | ||
| 337 | } while (!ret && atomic_read(val) != 0); | 338 | } while (!ret && atomic_read(val) != 0); |
| 338 | finish_wait(wq, &q->wait); | 339 | finish_wait(wq, &q->wait); |
| 339 | return ret; | 340 | return ret; |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 05039e348f07..1241d8c91d5e 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
| @@ -29,9 +29,9 @@ | |||
| 29 | #include <linux/kvm_para.h> | 29 | #include <linux/kvm_para.h> |
| 30 | #include <linux/perf_event.h> | 30 | #include <linux/perf_event.h> |
| 31 | 31 | ||
| 32 | int watchdog_enabled = 1; | 32 | int watchdog_user_enabled = 1; |
| 33 | int __read_mostly watchdog_thresh = 10; | 33 | int __read_mostly watchdog_thresh = 10; |
| 34 | static int __read_mostly watchdog_disabled; | 34 | static int __read_mostly watchdog_running; |
| 35 | static u64 __read_mostly sample_period; | 35 | static u64 __read_mostly sample_period; |
| 36 | 36 | ||
| 37 | static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); | 37 | static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); |
| @@ -63,7 +63,7 @@ static int __init hardlockup_panic_setup(char *str) | |||
| 63 | else if (!strncmp(str, "nopanic", 7)) | 63 | else if (!strncmp(str, "nopanic", 7)) |
| 64 | hardlockup_panic = 0; | 64 | hardlockup_panic = 0; |
| 65 | else if (!strncmp(str, "0", 1)) | 65 | else if (!strncmp(str, "0", 1)) |
| 66 | watchdog_enabled = 0; | 66 | watchdog_user_enabled = 0; |
| 67 | return 1; | 67 | return 1; |
| 68 | } | 68 | } |
| 69 | __setup("nmi_watchdog=", hardlockup_panic_setup); | 69 | __setup("nmi_watchdog=", hardlockup_panic_setup); |
| @@ -82,7 +82,7 @@ __setup("softlockup_panic=", softlockup_panic_setup); | |||
| 82 | 82 | ||
| 83 | static int __init nowatchdog_setup(char *str) | 83 | static int __init nowatchdog_setup(char *str) |
| 84 | { | 84 | { |
| 85 | watchdog_enabled = 0; | 85 | watchdog_user_enabled = 0; |
| 86 | return 1; | 86 | return 1; |
| 87 | } | 87 | } |
| 88 | __setup("nowatchdog", nowatchdog_setup); | 88 | __setup("nowatchdog", nowatchdog_setup); |
| @@ -90,7 +90,7 @@ __setup("nowatchdog", nowatchdog_setup); | |||
| 90 | /* deprecated */ | 90 | /* deprecated */ |
| 91 | static int __init nosoftlockup_setup(char *str) | 91 | static int __init nosoftlockup_setup(char *str) |
| 92 | { | 92 | { |
| 93 | watchdog_enabled = 0; | 93 | watchdog_user_enabled = 0; |
| 94 | return 1; | 94 | return 1; |
| 95 | } | 95 | } |
| 96 | __setup("nosoftlockup", nosoftlockup_setup); | 96 | __setup("nosoftlockup", nosoftlockup_setup); |
| @@ -158,7 +158,7 @@ void touch_all_softlockup_watchdogs(void) | |||
| 158 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | 158 | #ifdef CONFIG_HARDLOCKUP_DETECTOR |
| 159 | void touch_nmi_watchdog(void) | 159 | void touch_nmi_watchdog(void) |
| 160 | { | 160 | { |
| 161 | if (watchdog_enabled) { | 161 | if (watchdog_user_enabled) { |
| 162 | unsigned cpu; | 162 | unsigned cpu; |
| 163 | 163 | ||
| 164 | for_each_present_cpu(cpu) { | 164 | for_each_present_cpu(cpu) { |
| @@ -347,11 +347,6 @@ static void watchdog_enable(unsigned int cpu) | |||
| 347 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 347 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
| 348 | hrtimer->function = watchdog_timer_fn; | 348 | hrtimer->function = watchdog_timer_fn; |
| 349 | 349 | ||
| 350 | if (!watchdog_enabled) { | ||
| 351 | kthread_park(current); | ||
| 352 | return; | ||
| 353 | } | ||
| 354 | |||
| 355 | /* Enable the perf event */ | 350 | /* Enable the perf event */ |
| 356 | watchdog_nmi_enable(cpu); | 351 | watchdog_nmi_enable(cpu); |
| 357 | 352 | ||
| @@ -374,6 +369,11 @@ static void watchdog_disable(unsigned int cpu) | |||
| 374 | watchdog_nmi_disable(cpu); | 369 | watchdog_nmi_disable(cpu); |
| 375 | } | 370 | } |
| 376 | 371 | ||
| 372 | static void watchdog_cleanup(unsigned int cpu, bool online) | ||
| 373 | { | ||
| 374 | watchdog_disable(cpu); | ||
| 375 | } | ||
| 376 | |||
| 377 | static int watchdog_should_run(unsigned int cpu) | 377 | static int watchdog_should_run(unsigned int cpu) |
| 378 | { | 378 | { |
| 379 | return __this_cpu_read(hrtimer_interrupts) != | 379 | return __this_cpu_read(hrtimer_interrupts) != |
| @@ -475,28 +475,40 @@ static int watchdog_nmi_enable(unsigned int cpu) { return 0; } | |||
| 475 | static void watchdog_nmi_disable(unsigned int cpu) { return; } | 475 | static void watchdog_nmi_disable(unsigned int cpu) { return; } |
| 476 | #endif /* CONFIG_HARDLOCKUP_DETECTOR */ | 476 | #endif /* CONFIG_HARDLOCKUP_DETECTOR */ |
| 477 | 477 | ||
| 478 | /* prepare/enable/disable routines */ | 478 | static struct smp_hotplug_thread watchdog_threads = { |
| 479 | /* sysctl functions */ | 479 | .store = &softlockup_watchdog, |
| 480 | #ifdef CONFIG_SYSCTL | 480 | .thread_should_run = watchdog_should_run, |
| 481 | static void watchdog_enable_all_cpus(void) | 481 | .thread_fn = watchdog, |
| 482 | .thread_comm = "watchdog/%u", | ||
| 483 | .setup = watchdog_enable, | ||
| 484 | .cleanup = watchdog_cleanup, | ||
| 485 | .park = watchdog_disable, | ||
| 486 | .unpark = watchdog_enable, | ||
| 487 | }; | ||
| 488 | |||
| 489 | static int watchdog_enable_all_cpus(void) | ||
| 482 | { | 490 | { |
| 483 | unsigned int cpu; | 491 | int err = 0; |
| 484 | 492 | ||
| 485 | if (watchdog_disabled) { | 493 | if (!watchdog_running) { |
| 486 | watchdog_disabled = 0; | 494 | err = smpboot_register_percpu_thread(&watchdog_threads); |
| 487 | for_each_online_cpu(cpu) | 495 | if (err) |
| 488 | kthread_unpark(per_cpu(softlockup_watchdog, cpu)); | 496 | pr_err("Failed to create watchdog threads, disabled\n"); |
| 497 | else | ||
| 498 | watchdog_running = 1; | ||
| 489 | } | 499 | } |
| 500 | |||
| 501 | return err; | ||
| 490 | } | 502 | } |
| 491 | 503 | ||
| 504 | /* prepare/enable/disable routines */ | ||
| 505 | /* sysctl functions */ | ||
| 506 | #ifdef CONFIG_SYSCTL | ||
| 492 | static void watchdog_disable_all_cpus(void) | 507 | static void watchdog_disable_all_cpus(void) |
| 493 | { | 508 | { |
| 494 | unsigned int cpu; | 509 | if (watchdog_running) { |
| 495 | 510 | watchdog_running = 0; | |
| 496 | if (!watchdog_disabled) { | 511 | smpboot_unregister_percpu_thread(&watchdog_threads); |
| 497 | watchdog_disabled = 1; | ||
| 498 | for_each_online_cpu(cpu) | ||
| 499 | kthread_park(per_cpu(softlockup_watchdog, cpu)); | ||
| 500 | } | 512 | } |
| 501 | } | 513 | } |
| 502 | 514 | ||
| @@ -507,45 +519,48 @@ static void watchdog_disable_all_cpus(void) | |||
| 507 | int proc_dowatchdog(struct ctl_table *table, int write, | 519 | int proc_dowatchdog(struct ctl_table *table, int write, |
| 508 | void __user *buffer, size_t *lenp, loff_t *ppos) | 520 | void __user *buffer, size_t *lenp, loff_t *ppos) |
| 509 | { | 521 | { |
| 510 | int ret; | 522 | int err, old_thresh, old_enabled; |
| 511 | 523 | ||
| 512 | if (watchdog_disabled < 0) | 524 | old_thresh = ACCESS_ONCE(watchdog_thresh); |
| 513 | return -ENODEV; | 525 | old_enabled = ACCESS_ONCE(watchdog_user_enabled); |
| 514 | 526 | ||
| 515 | ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | 527 | err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
| 516 | if (ret || !write) | 528 | if (err || !write) |
| 517 | return ret; | 529 | return err; |
| 518 | 530 | ||
| 519 | set_sample_period(); | 531 | set_sample_period(); |
| 520 | /* | 532 | /* |
| 521 | * Watchdog threads shouldn't be enabled if they are | 533 | * Watchdog threads shouldn't be enabled if they are |
| 522 | * disabled. The 'watchdog_disabled' variable check in | 534 | * disabled. The 'watchdog_running' variable check in |
| 523 | * watchdog_*_all_cpus() function takes care of this. | 535 | * watchdog_*_all_cpus() function takes care of this. |
| 524 | */ | 536 | */ |
| 525 | if (watchdog_enabled && watchdog_thresh) | 537 | if (watchdog_user_enabled && watchdog_thresh) |
| 526 | watchdog_enable_all_cpus(); | 538 | err = watchdog_enable_all_cpus(); |
| 527 | else | 539 | else |
| 528 | watchdog_disable_all_cpus(); | 540 | watchdog_disable_all_cpus(); |
| 529 | 541 | ||
| 530 | return ret; | 542 | /* Restore old values on failure */ |
| 543 | if (err) { | ||
| 544 | watchdog_thresh = old_thresh; | ||
| 545 | watchdog_user_enabled = old_enabled; | ||
| 546 | } | ||
| 547 | |||
| 548 | return err; | ||
| 531 | } | 549 | } |
| 532 | #endif /* CONFIG_SYSCTL */ | 550 | #endif /* CONFIG_SYSCTL */ |
| 533 | 551 | ||
| 534 | static struct smp_hotplug_thread watchdog_threads = { | ||
| 535 | .store = &softlockup_watchdog, | ||
| 536 | .thread_should_run = watchdog_should_run, | ||
| 537 | .thread_fn = watchdog, | ||
| 538 | .thread_comm = "watchdog/%u", | ||
| 539 | .setup = watchdog_enable, | ||
| 540 | .park = watchdog_disable, | ||
| 541 | .unpark = watchdog_enable, | ||
| 542 | }; | ||
| 543 | |||
| 544 | void __init lockup_detector_init(void) | 552 | void __init lockup_detector_init(void) |
| 545 | { | 553 | { |
| 546 | set_sample_period(); | 554 | set_sample_period(); |
| 547 | if (smpboot_register_percpu_thread(&watchdog_threads)) { | 555 | |
| 548 | pr_err("Failed to create watchdog threads, disabled\n"); | 556 | #ifdef CONFIG_NO_HZ_FULL |
| 549 | watchdog_disabled = -ENODEV; | 557 | if (watchdog_user_enabled) { |
| 558 | watchdog_user_enabled = 0; | ||
| 559 | pr_warning("Disabled lockup detectors by default for full dynticks\n"); | ||
| 560 | pr_warning("You can reactivate it with 'sysctl -w kernel.watchdog=1'\n"); | ||
| 550 | } | 561 | } |
| 562 | #endif | ||
| 563 | |||
| 564 | if (watchdog_user_enabled) | ||
| 565 | watchdog_enable_all_cpus(); | ||
| 551 | } | 566 | } |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index f02c4a4a0c3c..0b72e816b8d0 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -4644,7 +4644,7 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu) | |||
| 4644 | * Workqueues should be brought up before normal priority CPU notifiers. | 4644 | * Workqueues should be brought up before normal priority CPU notifiers. |
| 4645 | * This will be registered high priority CPU notifier. | 4645 | * This will be registered high priority CPU notifier. |
| 4646 | */ | 4646 | */ |
| 4647 | static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb, | 4647 | static int workqueue_cpu_up_callback(struct notifier_block *nfb, |
| 4648 | unsigned long action, | 4648 | unsigned long action, |
| 4649 | void *hcpu) | 4649 | void *hcpu) |
| 4650 | { | 4650 | { |
| @@ -4697,7 +4697,7 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb, | |||
| 4697 | * Workqueues should be brought down after normal priority CPU notifiers. | 4697 | * Workqueues should be brought down after normal priority CPU notifiers. |
| 4698 | * This will be registered as low priority CPU notifier. | 4698 | * This will be registered as low priority CPU notifier. |
| 4699 | */ | 4699 | */ |
| 4700 | static int __cpuinit workqueue_cpu_down_callback(struct notifier_block *nfb, | 4700 | static int workqueue_cpu_down_callback(struct notifier_block *nfb, |
| 4701 | unsigned long action, | 4701 | unsigned long action, |
| 4702 | void *hcpu) | 4702 | void *hcpu) |
| 4703 | { | 4703 | { |
