diff options
Diffstat (limited to 'kernel')
140 files changed, 5711 insertions, 2117 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index d06467fc8f7..e898c5b9d02 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -9,8 +9,8 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ | |||
9 | rcupdate.o extable.o params.o posix-timers.o \ | 9 | rcupdate.o extable.o params.o posix-timers.o \ |
10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ | 10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ |
11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ | 11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ |
12 | notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ | 12 | notifier.o ksysfs.o sched_clock.o cred.o \ |
13 | async.o range.o jump_label.o | 13 | async.o range.o |
14 | obj-y += groups.o | 14 | obj-y += groups.o |
15 | 15 | ||
16 | ifdef CONFIG_FUNCTION_TRACER | 16 | ifdef CONFIG_FUNCTION_TRACER |
@@ -101,12 +101,14 @@ obj-$(CONFIG_RING_BUFFER) += trace/ | |||
101 | obj-$(CONFIG_TRACEPOINTS) += trace/ | 101 | obj-$(CONFIG_TRACEPOINTS) += trace/ |
102 | obj-$(CONFIG_SMP) += sched_cpupri.o | 102 | obj-$(CONFIG_SMP) += sched_cpupri.o |
103 | obj-$(CONFIG_IRQ_WORK) += irq_work.o | 103 | obj-$(CONFIG_IRQ_WORK) += irq_work.o |
104 | obj-$(CONFIG_CPU_PM) += cpu_pm.o | ||
104 | 105 | ||
105 | obj-$(CONFIG_PERF_EVENTS) += events/ | 106 | obj-$(CONFIG_PERF_EVENTS) += events/ |
106 | 107 | ||
107 | obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o | 108 | obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o |
108 | obj-$(CONFIG_PADATA) += padata.o | 109 | obj-$(CONFIG_PADATA) += padata.o |
109 | obj-$(CONFIG_CRASH_DUMP) += crash_dump.o | 110 | obj-$(CONFIG_CRASH_DUMP) += crash_dump.o |
111 | obj-$(CONFIG_JUMP_LABEL) += jump_label.o | ||
110 | 112 | ||
111 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) | 113 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) |
112 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is | 114 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is |
diff --git a/kernel/async.c b/kernel/async.c index d5fe7af0de2..80b74b88fef 100644 --- a/kernel/async.c +++ b/kernel/async.c | |||
@@ -51,7 +51,7 @@ asynchronous and synchronous parts of the kernel. | |||
51 | #include <linux/async.h> | 51 | #include <linux/async.h> |
52 | #include <linux/atomic.h> | 52 | #include <linux/atomic.h> |
53 | #include <linux/ktime.h> | 53 | #include <linux/ktime.h> |
54 | #include <linux/module.h> | 54 | #include <linux/export.h> |
55 | #include <linux/wait.h> | 55 | #include <linux/wait.h> |
56 | #include <linux/sched.h> | 56 | #include <linux/sched.h> |
57 | #include <linux/slab.h> | 57 | #include <linux/slab.h> |
@@ -120,7 +120,7 @@ static void async_run_entry_fn(struct work_struct *work) | |||
120 | struct async_entry *entry = | 120 | struct async_entry *entry = |
121 | container_of(work, struct async_entry, work); | 121 | container_of(work, struct async_entry, work); |
122 | unsigned long flags; | 122 | unsigned long flags; |
123 | ktime_t calltime, delta, rettime; | 123 | ktime_t uninitialized_var(calltime), delta, rettime; |
124 | 124 | ||
125 | /* 1) move self to the running queue */ | 125 | /* 1) move self to the running queue */ |
126 | spin_lock_irqsave(&async_lock, flags); | 126 | spin_lock_irqsave(&async_lock, flags); |
@@ -269,7 +269,7 @@ EXPORT_SYMBOL_GPL(async_synchronize_full_domain); | |||
269 | void async_synchronize_cookie_domain(async_cookie_t cookie, | 269 | void async_synchronize_cookie_domain(async_cookie_t cookie, |
270 | struct list_head *running) | 270 | struct list_head *running) |
271 | { | 271 | { |
272 | ktime_t starttime, delta, endtime; | 272 | ktime_t uninitialized_var(starttime), delta, endtime; |
273 | 273 | ||
274 | if (initcall_debug && system_state == SYSTEM_BOOTING) { | 274 | if (initcall_debug && system_state == SYSTEM_BOOTING) { |
275 | printk(KERN_DEBUG "async_waiting @ %i\n", task_pid_nr(current)); | 275 | printk(KERN_DEBUG "async_waiting @ %i\n", task_pid_nr(current)); |
diff --git a/kernel/audit.c b/kernel/audit.c index 0a1355ca3d7..09fae2677a4 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -45,7 +45,7 @@ | |||
45 | #include <asm/types.h> | 45 | #include <asm/types.h> |
46 | #include <linux/atomic.h> | 46 | #include <linux/atomic.h> |
47 | #include <linux/mm.h> | 47 | #include <linux/mm.h> |
48 | #include <linux/module.h> | 48 | #include <linux/export.h> |
49 | #include <linux/slab.h> | 49 | #include <linux/slab.h> |
50 | #include <linux/err.h> | 50 | #include <linux/err.h> |
51 | #include <linux/kthread.h> | 51 | #include <linux/kthread.h> |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index ce4b054acee..47b7fc1ea89 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
@@ -48,7 +48,7 @@ | |||
48 | #include <linux/fs.h> | 48 | #include <linux/fs.h> |
49 | #include <linux/namei.h> | 49 | #include <linux/namei.h> |
50 | #include <linux/mm.h> | 50 | #include <linux/mm.h> |
51 | #include <linux/module.h> | 51 | #include <linux/export.h> |
52 | #include <linux/slab.h> | 52 | #include <linux/slab.h> |
53 | #include <linux/mount.h> | 53 | #include <linux/mount.h> |
54 | #include <linux/socket.h> | 54 | #include <linux/socket.h> |
diff --git a/kernel/capability.c b/kernel/capability.c index 283c529f8b1..b463871a4e6 100644 --- a/kernel/capability.c +++ b/kernel/capability.c | |||
@@ -10,7 +10,7 @@ | |||
10 | #include <linux/audit.h> | 10 | #include <linux/audit.h> |
11 | #include <linux/capability.h> | 11 | #include <linux/capability.h> |
12 | #include <linux/mm.h> | 12 | #include <linux/mm.h> |
13 | #include <linux/module.h> | 13 | #include <linux/export.h> |
14 | #include <linux/security.h> | 14 | #include <linux/security.h> |
15 | #include <linux/syscalls.h> | 15 | #include <linux/syscalls.h> |
16 | #include <linux/pid_namespace.h> | 16 | #include <linux/pid_namespace.h> |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 1d2b6ceea95..d9d5648f3cd 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -265,7 +265,7 @@ list_for_each_entry(_root, &roots, root_list) | |||
265 | /* the list of cgroups eligible for automatic release. Protected by | 265 | /* the list of cgroups eligible for automatic release. Protected by |
266 | * release_list_lock */ | 266 | * release_list_lock */ |
267 | static LIST_HEAD(release_list); | 267 | static LIST_HEAD(release_list); |
268 | static DEFINE_SPINLOCK(release_list_lock); | 268 | static DEFINE_RAW_SPINLOCK(release_list_lock); |
269 | static void cgroup_release_agent(struct work_struct *work); | 269 | static void cgroup_release_agent(struct work_struct *work); |
270 | static DECLARE_WORK(release_agent_work, cgroup_release_agent); | 270 | static DECLARE_WORK(release_agent_work, cgroup_release_agent); |
271 | static void check_for_release(struct cgroup *cgrp); | 271 | static void check_for_release(struct cgroup *cgrp); |
@@ -2027,7 +2027,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
2027 | goto out_free_group_list; | 2027 | goto out_free_group_list; |
2028 | 2028 | ||
2029 | /* prevent changes to the threadgroup list while we take a snapshot. */ | 2029 | /* prevent changes to the threadgroup list while we take a snapshot. */ |
2030 | rcu_read_lock(); | 2030 | read_lock(&tasklist_lock); |
2031 | if (!thread_group_leader(leader)) { | 2031 | if (!thread_group_leader(leader)) { |
2032 | /* | 2032 | /* |
2033 | * a race with de_thread from another thread's exec() may strip | 2033 | * a race with de_thread from another thread's exec() may strip |
@@ -2036,7 +2036,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
2036 | * throw this task away and try again (from cgroup_procs_write); | 2036 | * throw this task away and try again (from cgroup_procs_write); |
2037 | * this is "double-double-toil-and-trouble-check locking". | 2037 | * this is "double-double-toil-and-trouble-check locking". |
2038 | */ | 2038 | */ |
2039 | rcu_read_unlock(); | 2039 | read_unlock(&tasklist_lock); |
2040 | retval = -EAGAIN; | 2040 | retval = -EAGAIN; |
2041 | goto out_free_group_list; | 2041 | goto out_free_group_list; |
2042 | } | 2042 | } |
@@ -2057,7 +2057,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
2057 | } while_each_thread(leader, tsk); | 2057 | } while_each_thread(leader, tsk); |
2058 | /* remember the number of threads in the array for later. */ | 2058 | /* remember the number of threads in the array for later. */ |
2059 | group_size = i; | 2059 | group_size = i; |
2060 | rcu_read_unlock(); | 2060 | read_unlock(&tasklist_lock); |
2061 | 2061 | ||
2062 | /* | 2062 | /* |
2063 | * step 1: check that we can legitimately attach to the cgroup. | 2063 | * step 1: check that we can legitimately attach to the cgroup. |
@@ -2135,14 +2135,17 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
2135 | oldcgrp = task_cgroup_from_root(tsk, root); | 2135 | oldcgrp = task_cgroup_from_root(tsk, root); |
2136 | if (cgrp == oldcgrp) | 2136 | if (cgrp == oldcgrp) |
2137 | continue; | 2137 | continue; |
2138 | /* attach each task to each subsystem */ | ||
2139 | for_each_subsys(root, ss) { | ||
2140 | if (ss->attach_task) | ||
2141 | ss->attach_task(cgrp, tsk); | ||
2142 | } | ||
2143 | /* if the thread is PF_EXITING, it can just get skipped. */ | 2138 | /* if the thread is PF_EXITING, it can just get skipped. */ |
2144 | retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, true); | 2139 | retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, true); |
2145 | BUG_ON(retval != 0 && retval != -ESRCH); | 2140 | if (retval == 0) { |
2141 | /* attach each task to each subsystem */ | ||
2142 | for_each_subsys(root, ss) { | ||
2143 | if (ss->attach_task) | ||
2144 | ss->attach_task(cgrp, tsk); | ||
2145 | } | ||
2146 | } else { | ||
2147 | BUG_ON(retval != -ESRCH); | ||
2148 | } | ||
2146 | } | 2149 | } |
2147 | /* nothing is sensitive to fork() after this point. */ | 2150 | /* nothing is sensitive to fork() after this point. */ |
2148 | 2151 | ||
@@ -4014,11 +4017,11 @@ again: | |||
4014 | finish_wait(&cgroup_rmdir_waitq, &wait); | 4017 | finish_wait(&cgroup_rmdir_waitq, &wait); |
4015 | clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags); | 4018 | clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags); |
4016 | 4019 | ||
4017 | spin_lock(&release_list_lock); | 4020 | raw_spin_lock(&release_list_lock); |
4018 | set_bit(CGRP_REMOVED, &cgrp->flags); | 4021 | set_bit(CGRP_REMOVED, &cgrp->flags); |
4019 | if (!list_empty(&cgrp->release_list)) | 4022 | if (!list_empty(&cgrp->release_list)) |
4020 | list_del_init(&cgrp->release_list); | 4023 | list_del_init(&cgrp->release_list); |
4021 | spin_unlock(&release_list_lock); | 4024 | raw_spin_unlock(&release_list_lock); |
4022 | 4025 | ||
4023 | cgroup_lock_hierarchy(cgrp->root); | 4026 | cgroup_lock_hierarchy(cgrp->root); |
4024 | /* delete this cgroup from parent->children */ | 4027 | /* delete this cgroup from parent->children */ |
@@ -4671,13 +4674,13 @@ static void check_for_release(struct cgroup *cgrp) | |||
4671 | * already queued for a userspace notification, queue | 4674 | * already queued for a userspace notification, queue |
4672 | * it now */ | 4675 | * it now */ |
4673 | int need_schedule_work = 0; | 4676 | int need_schedule_work = 0; |
4674 | spin_lock(&release_list_lock); | 4677 | raw_spin_lock(&release_list_lock); |
4675 | if (!cgroup_is_removed(cgrp) && | 4678 | if (!cgroup_is_removed(cgrp) && |
4676 | list_empty(&cgrp->release_list)) { | 4679 | list_empty(&cgrp->release_list)) { |
4677 | list_add(&cgrp->release_list, &release_list); | 4680 | list_add(&cgrp->release_list, &release_list); |
4678 | need_schedule_work = 1; | 4681 | need_schedule_work = 1; |
4679 | } | 4682 | } |
4680 | spin_unlock(&release_list_lock); | 4683 | raw_spin_unlock(&release_list_lock); |
4681 | if (need_schedule_work) | 4684 | if (need_schedule_work) |
4682 | schedule_work(&release_agent_work); | 4685 | schedule_work(&release_agent_work); |
4683 | } | 4686 | } |
@@ -4729,7 +4732,7 @@ static void cgroup_release_agent(struct work_struct *work) | |||
4729 | { | 4732 | { |
4730 | BUG_ON(work != &release_agent_work); | 4733 | BUG_ON(work != &release_agent_work); |
4731 | mutex_lock(&cgroup_mutex); | 4734 | mutex_lock(&cgroup_mutex); |
4732 | spin_lock(&release_list_lock); | 4735 | raw_spin_lock(&release_list_lock); |
4733 | while (!list_empty(&release_list)) { | 4736 | while (!list_empty(&release_list)) { |
4734 | char *argv[3], *envp[3]; | 4737 | char *argv[3], *envp[3]; |
4735 | int i; | 4738 | int i; |
@@ -4738,7 +4741,7 @@ static void cgroup_release_agent(struct work_struct *work) | |||
4738 | struct cgroup, | 4741 | struct cgroup, |
4739 | release_list); | 4742 | release_list); |
4740 | list_del_init(&cgrp->release_list); | 4743 | list_del_init(&cgrp->release_list); |
4741 | spin_unlock(&release_list_lock); | 4744 | raw_spin_unlock(&release_list_lock); |
4742 | pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL); | 4745 | pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL); |
4743 | if (!pathbuf) | 4746 | if (!pathbuf) |
4744 | goto continue_free; | 4747 | goto continue_free; |
@@ -4768,9 +4771,9 @@ static void cgroup_release_agent(struct work_struct *work) | |||
4768 | continue_free: | 4771 | continue_free: |
4769 | kfree(pathbuf); | 4772 | kfree(pathbuf); |
4770 | kfree(agentbuf); | 4773 | kfree(agentbuf); |
4771 | spin_lock(&release_list_lock); | 4774 | raw_spin_lock(&release_list_lock); |
4772 | } | 4775 | } |
4773 | spin_unlock(&release_list_lock); | 4776 | raw_spin_unlock(&release_list_lock); |
4774 | mutex_unlock(&cgroup_mutex); | 4777 | mutex_unlock(&cgroup_mutex); |
4775 | } | 4778 | } |
4776 | 4779 | ||
@@ -4880,9 +4883,9 @@ void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) | |||
4880 | 4883 | ||
4881 | rcu_assign_pointer(id->css, NULL); | 4884 | rcu_assign_pointer(id->css, NULL); |
4882 | rcu_assign_pointer(css->id, NULL); | 4885 | rcu_assign_pointer(css->id, NULL); |
4883 | spin_lock(&ss->id_lock); | 4886 | write_lock(&ss->id_lock); |
4884 | idr_remove(&ss->idr, id->id); | 4887 | idr_remove(&ss->idr, id->id); |
4885 | spin_unlock(&ss->id_lock); | 4888 | write_unlock(&ss->id_lock); |
4886 | kfree_rcu(id, rcu_head); | 4889 | kfree_rcu(id, rcu_head); |
4887 | } | 4890 | } |
4888 | EXPORT_SYMBOL_GPL(free_css_id); | 4891 | EXPORT_SYMBOL_GPL(free_css_id); |
@@ -4908,10 +4911,10 @@ static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth) | |||
4908 | error = -ENOMEM; | 4911 | error = -ENOMEM; |
4909 | goto err_out; | 4912 | goto err_out; |
4910 | } | 4913 | } |
4911 | spin_lock(&ss->id_lock); | 4914 | write_lock(&ss->id_lock); |
4912 | /* Don't use 0. allocates an ID of 1-65535 */ | 4915 | /* Don't use 0. allocates an ID of 1-65535 */ |
4913 | error = idr_get_new_above(&ss->idr, newid, 1, &myid); | 4916 | error = idr_get_new_above(&ss->idr, newid, 1, &myid); |
4914 | spin_unlock(&ss->id_lock); | 4917 | write_unlock(&ss->id_lock); |
4915 | 4918 | ||
4916 | /* Returns error when there are no free spaces for new ID.*/ | 4919 | /* Returns error when there are no free spaces for new ID.*/ |
4917 | if (error) { | 4920 | if (error) { |
@@ -4926,9 +4929,9 @@ static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth) | |||
4926 | return newid; | 4929 | return newid; |
4927 | remove_idr: | 4930 | remove_idr: |
4928 | error = -ENOSPC; | 4931 | error = -ENOSPC; |
4929 | spin_lock(&ss->id_lock); | 4932 | write_lock(&ss->id_lock); |
4930 | idr_remove(&ss->idr, myid); | 4933 | idr_remove(&ss->idr, myid); |
4931 | spin_unlock(&ss->id_lock); | 4934 | write_unlock(&ss->id_lock); |
4932 | err_out: | 4935 | err_out: |
4933 | kfree(newid); | 4936 | kfree(newid); |
4934 | return ERR_PTR(error); | 4937 | return ERR_PTR(error); |
@@ -4940,7 +4943,7 @@ static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss, | |||
4940 | { | 4943 | { |
4941 | struct css_id *newid; | 4944 | struct css_id *newid; |
4942 | 4945 | ||
4943 | spin_lock_init(&ss->id_lock); | 4946 | rwlock_init(&ss->id_lock); |
4944 | idr_init(&ss->idr); | 4947 | idr_init(&ss->idr); |
4945 | 4948 | ||
4946 | newid = get_new_cssid(ss, 0); | 4949 | newid = get_new_cssid(ss, 0); |
@@ -5035,9 +5038,9 @@ css_get_next(struct cgroup_subsys *ss, int id, | |||
5035 | * scan next entry from bitmap(tree), tmpid is updated after | 5038 | * scan next entry from bitmap(tree), tmpid is updated after |
5036 | * idr_get_next(). | 5039 | * idr_get_next(). |
5037 | */ | 5040 | */ |
5038 | spin_lock(&ss->id_lock); | 5041 | read_lock(&ss->id_lock); |
5039 | tmp = idr_get_next(&ss->idr, &tmpid); | 5042 | tmp = idr_get_next(&ss->idr, &tmpid); |
5040 | spin_unlock(&ss->id_lock); | 5043 | read_unlock(&ss->id_lock); |
5041 | 5044 | ||
5042 | if (!tmp) | 5045 | if (!tmp) |
5043 | break; | 5046 | break; |
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index e691818d7e4..213c0351dad 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c | |||
@@ -14,7 +14,7 @@ | |||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
15 | */ | 15 | */ |
16 | 16 | ||
17 | #include <linux/module.h> | 17 | #include <linux/export.h> |
18 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
19 | #include <linux/cgroup.h> | 19 | #include <linux/cgroup.h> |
20 | #include <linux/fs.h> | 20 | #include <linux/fs.h> |
@@ -153,6 +153,13 @@ static void freezer_destroy(struct cgroup_subsys *ss, | |||
153 | kfree(cgroup_freezer(cgroup)); | 153 | kfree(cgroup_freezer(cgroup)); |
154 | } | 154 | } |
155 | 155 | ||
156 | /* task is frozen or will freeze immediately when next it gets woken */ | ||
157 | static bool is_task_frozen_enough(struct task_struct *task) | ||
158 | { | ||
159 | return frozen(task) || | ||
160 | (task_is_stopped_or_traced(task) && freezing(task)); | ||
161 | } | ||
162 | |||
156 | /* | 163 | /* |
157 | * The call to cgroup_lock() in the freezer.state write method prevents | 164 | * The call to cgroup_lock() in the freezer.state write method prevents |
158 | * a write to that file racing against an attach, and hence the | 165 | * a write to that file racing against an attach, and hence the |
@@ -231,7 +238,7 @@ static void update_if_frozen(struct cgroup *cgroup, | |||
231 | cgroup_iter_start(cgroup, &it); | 238 | cgroup_iter_start(cgroup, &it); |
232 | while ((task = cgroup_iter_next(cgroup, &it))) { | 239 | while ((task = cgroup_iter_next(cgroup, &it))) { |
233 | ntotal++; | 240 | ntotal++; |
234 | if (frozen(task)) | 241 | if (is_task_frozen_enough(task)) |
235 | nfrozen++; | 242 | nfrozen++; |
236 | } | 243 | } |
237 | 244 | ||
@@ -284,7 +291,7 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) | |||
284 | while ((task = cgroup_iter_next(cgroup, &it))) { | 291 | while ((task = cgroup_iter_next(cgroup, &it))) { |
285 | if (!freeze_task(task, true)) | 292 | if (!freeze_task(task, true)) |
286 | continue; | 293 | continue; |
287 | if (frozen(task)) | 294 | if (is_task_frozen_enough(task)) |
288 | continue; | 295 | continue; |
289 | if (!freezing(task) && !freezer_should_skip(task)) | 296 | if (!freezing(task) && !freezer_should_skip(task)) |
290 | num_cant_freeze_now++; | 297 | num_cant_freeze_now++; |
diff --git a/kernel/compat.c b/kernel/compat.c index e2435ee9993..f346cedfe24 100644 --- a/kernel/compat.c +++ b/kernel/compat.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/unistd.h> | 21 | #include <linux/unistd.h> |
22 | #include <linux/security.h> | 22 | #include <linux/security.h> |
23 | #include <linux/timex.h> | 23 | #include <linux/timex.h> |
24 | #include <linux/export.h> | ||
24 | #include <linux/migrate.h> | 25 | #include <linux/migrate.h> |
25 | #include <linux/posix-timers.h> | 26 | #include <linux/posix-timers.h> |
26 | #include <linux/times.h> | 27 | #include <linux/times.h> |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 12b7458f23b..563f1360947 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -10,11 +10,12 @@ | |||
10 | #include <linux/sched.h> | 10 | #include <linux/sched.h> |
11 | #include <linux/unistd.h> | 11 | #include <linux/unistd.h> |
12 | #include <linux/cpu.h> | 12 | #include <linux/cpu.h> |
13 | #include <linux/module.h> | 13 | #include <linux/export.h> |
14 | #include <linux/kthread.h> | 14 | #include <linux/kthread.h> |
15 | #include <linux/stop_machine.h> | 15 | #include <linux/stop_machine.h> |
16 | #include <linux/mutex.h> | 16 | #include <linux/mutex.h> |
17 | #include <linux/gfp.h> | 17 | #include <linux/gfp.h> |
18 | #include <linux/suspend.h> | ||
18 | 19 | ||
19 | #ifdef CONFIG_SMP | 20 | #ifdef CONFIG_SMP |
20 | /* Serializes the updates to cpu_online_mask, cpu_present_mask */ | 21 | /* Serializes the updates to cpu_online_mask, cpu_present_mask */ |
@@ -476,6 +477,79 @@ static int alloc_frozen_cpus(void) | |||
476 | return 0; | 477 | return 0; |
477 | } | 478 | } |
478 | core_initcall(alloc_frozen_cpus); | 479 | core_initcall(alloc_frozen_cpus); |
480 | |||
481 | /* | ||
482 | * Prevent regular CPU hotplug from racing with the freezer, by disabling CPU | ||
483 | * hotplug when tasks are about to be frozen. Also, don't allow the freezer | ||
484 | * to continue until any currently running CPU hotplug operation gets | ||
485 | * completed. | ||
486 | * To modify the 'cpu_hotplug_disabled' flag, we need to acquire the | ||
487 | * 'cpu_add_remove_lock'. And this same lock is also taken by the regular | ||
488 | * CPU hotplug path and released only after it is complete. Thus, we | ||
489 | * (and hence the freezer) will block here until any currently running CPU | ||
490 | * hotplug operation gets completed. | ||
491 | */ | ||
492 | void cpu_hotplug_disable_before_freeze(void) | ||
493 | { | ||
494 | cpu_maps_update_begin(); | ||
495 | cpu_hotplug_disabled = 1; | ||
496 | cpu_maps_update_done(); | ||
497 | } | ||
498 | |||
499 | |||
500 | /* | ||
501 | * When tasks have been thawed, re-enable regular CPU hotplug (which had been | ||
502 | * disabled while beginning to freeze tasks). | ||
503 | */ | ||
504 | void cpu_hotplug_enable_after_thaw(void) | ||
505 | { | ||
506 | cpu_maps_update_begin(); | ||
507 | cpu_hotplug_disabled = 0; | ||
508 | cpu_maps_update_done(); | ||
509 | } | ||
510 | |||
511 | /* | ||
512 | * When callbacks for CPU hotplug notifications are being executed, we must | ||
513 | * ensure that the state of the system with respect to the tasks being frozen | ||
514 | * or not, as reported by the notification, remains unchanged *throughout the | ||
515 | * duration* of the execution of the callbacks. | ||
516 | * Hence we need to prevent the freezer from racing with regular CPU hotplug. | ||
517 | * | ||
518 | * This synchronization is implemented by mutually excluding regular CPU | ||
519 | * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/ | ||
520 | * Hibernate notifications. | ||
521 | */ | ||
522 | static int | ||
523 | cpu_hotplug_pm_callback(struct notifier_block *nb, | ||
524 | unsigned long action, void *ptr) | ||
525 | { | ||
526 | switch (action) { | ||
527 | |||
528 | case PM_SUSPEND_PREPARE: | ||
529 | case PM_HIBERNATION_PREPARE: | ||
530 | cpu_hotplug_disable_before_freeze(); | ||
531 | break; | ||
532 | |||
533 | case PM_POST_SUSPEND: | ||
534 | case PM_POST_HIBERNATION: | ||
535 | cpu_hotplug_enable_after_thaw(); | ||
536 | break; | ||
537 | |||
538 | default: | ||
539 | return NOTIFY_DONE; | ||
540 | } | ||
541 | |||
542 | return NOTIFY_OK; | ||
543 | } | ||
544 | |||
545 | |||
546 | int cpu_hotplug_pm_sync_init(void) | ||
547 | { | ||
548 | pm_notifier(cpu_hotplug_pm_callback, 0); | ||
549 | return 0; | ||
550 | } | ||
551 | core_initcall(cpu_hotplug_pm_sync_init); | ||
552 | |||
479 | #endif /* CONFIG_PM_SLEEP_SMP */ | 553 | #endif /* CONFIG_PM_SLEEP_SMP */ |
480 | 554 | ||
481 | /** | 555 | /** |
diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c new file mode 100644 index 00000000000..249152e1530 --- /dev/null +++ b/kernel/cpu_pm.c | |||
@@ -0,0 +1,233 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2011 Google, Inc. | ||
3 | * | ||
4 | * Author: | ||
5 | * Colin Cross <ccross@android.com> | ||
6 | * | ||
7 | * This software is licensed under the terms of the GNU General Public | ||
8 | * License version 2, as published by the Free Software Foundation, and | ||
9 | * may be copied, distributed, and modified under those terms. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | */ | ||
17 | |||
18 | #include <linux/kernel.h> | ||
19 | #include <linux/cpu_pm.h> | ||
20 | #include <linux/module.h> | ||
21 | #include <linux/notifier.h> | ||
22 | #include <linux/spinlock.h> | ||
23 | #include <linux/syscore_ops.h> | ||
24 | |||
25 | static DEFINE_RWLOCK(cpu_pm_notifier_lock); | ||
26 | static RAW_NOTIFIER_HEAD(cpu_pm_notifier_chain); | ||
27 | |||
28 | static int cpu_pm_notify(enum cpu_pm_event event, int nr_to_call, int *nr_calls) | ||
29 | { | ||
30 | int ret; | ||
31 | |||
32 | ret = __raw_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL, | ||
33 | nr_to_call, nr_calls); | ||
34 | |||
35 | return notifier_to_errno(ret); | ||
36 | } | ||
37 | |||
38 | /** | ||
39 | * cpu_pm_register_notifier - register a driver with cpu_pm | ||
40 | * @nb: notifier block to register | ||
41 | * | ||
42 | * Add a driver to a list of drivers that are notified about | ||
43 | * CPU and CPU cluster low power entry and exit. | ||
44 | * | ||
45 | * This function may sleep, and has the same return conditions as | ||
46 | * raw_notifier_chain_register. | ||
47 | */ | ||
48 | int cpu_pm_register_notifier(struct notifier_block *nb) | ||
49 | { | ||
50 | unsigned long flags; | ||
51 | int ret; | ||
52 | |||
53 | write_lock_irqsave(&cpu_pm_notifier_lock, flags); | ||
54 | ret = raw_notifier_chain_register(&cpu_pm_notifier_chain, nb); | ||
55 | write_unlock_irqrestore(&cpu_pm_notifier_lock, flags); | ||
56 | |||
57 | return ret; | ||
58 | } | ||
59 | EXPORT_SYMBOL_GPL(cpu_pm_register_notifier); | ||
60 | |||
61 | /** | ||
62 | * cpu_pm_unregister_notifier - unregister a driver with cpu_pm | ||
63 | * @nb: notifier block to be unregistered | ||
64 | * | ||
65 | * Remove a driver from the CPU PM notifier list. | ||
66 | * | ||
67 | * This function may sleep, and has the same return conditions as | ||
68 | * raw_notifier_chain_unregister. | ||
69 | */ | ||
70 | int cpu_pm_unregister_notifier(struct notifier_block *nb) | ||
71 | { | ||
72 | unsigned long flags; | ||
73 | int ret; | ||
74 | |||
75 | write_lock_irqsave(&cpu_pm_notifier_lock, flags); | ||
76 | ret = raw_notifier_chain_unregister(&cpu_pm_notifier_chain, nb); | ||
77 | write_unlock_irqrestore(&cpu_pm_notifier_lock, flags); | ||
78 | |||
79 | return ret; | ||
80 | } | ||
81 | EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier); | ||
82 | |||
83 | /** | ||
84 | * cpm_pm_enter - CPU low power entry notifier | ||
85 | * | ||
86 | * Notifies listeners that a single CPU is entering a low power state that may | ||
87 | * cause some blocks in the same power domain as the cpu to reset. | ||
88 | * | ||
89 | * Must be called on the affected CPU with interrupts disabled. Platform is | ||
90 | * responsible for ensuring that cpu_pm_enter is not called twice on the same | ||
91 | * CPU before cpu_pm_exit is called. Notified drivers can include VFP | ||
92 | * co-processor, interrupt controller and it's PM extensions, local CPU | ||
93 | * timers context save/restore which shouldn't be interrupted. Hence it | ||
94 | * must be called with interrupts disabled. | ||
95 | * | ||
96 | * Return conditions are same as __raw_notifier_call_chain. | ||
97 | */ | ||
98 | int cpu_pm_enter(void) | ||
99 | { | ||
100 | int nr_calls; | ||
101 | int ret = 0; | ||
102 | |||
103 | read_lock(&cpu_pm_notifier_lock); | ||
104 | ret = cpu_pm_notify(CPU_PM_ENTER, -1, &nr_calls); | ||
105 | if (ret) | ||
106 | /* | ||
107 | * Inform listeners (nr_calls - 1) about failure of CPU PM | ||
108 | * PM entry who are notified earlier to prepare for it. | ||
109 | */ | ||
110 | cpu_pm_notify(CPU_PM_ENTER_FAILED, nr_calls - 1, NULL); | ||
111 | read_unlock(&cpu_pm_notifier_lock); | ||
112 | |||
113 | return ret; | ||
114 | } | ||
115 | EXPORT_SYMBOL_GPL(cpu_pm_enter); | ||
116 | |||
117 | /** | ||
118 | * cpm_pm_exit - CPU low power exit notifier | ||
119 | * | ||
120 | * Notifies listeners that a single CPU is exiting a low power state that may | ||
121 | * have caused some blocks in the same power domain as the cpu to reset. | ||
122 | * | ||
123 | * Notified drivers can include VFP co-processor, interrupt controller | ||
124 | * and it's PM extensions, local CPU timers context save/restore which | ||
125 | * shouldn't be interrupted. Hence it must be called with interrupts disabled. | ||
126 | * | ||
127 | * Return conditions are same as __raw_notifier_call_chain. | ||
128 | */ | ||
129 | int cpu_pm_exit(void) | ||
130 | { | ||
131 | int ret; | ||
132 | |||
133 | read_lock(&cpu_pm_notifier_lock); | ||
134 | ret = cpu_pm_notify(CPU_PM_EXIT, -1, NULL); | ||
135 | read_unlock(&cpu_pm_notifier_lock); | ||
136 | |||
137 | return ret; | ||
138 | } | ||
139 | EXPORT_SYMBOL_GPL(cpu_pm_exit); | ||
140 | |||
141 | /** | ||
142 | * cpm_cluster_pm_enter - CPU cluster low power entry notifier | ||
143 | * | ||
144 | * Notifies listeners that all cpus in a power domain are entering a low power | ||
145 | * state that may cause some blocks in the same power domain to reset. | ||
146 | * | ||
147 | * Must be called after cpu_pm_enter has been called on all cpus in the power | ||
148 | * domain, and before cpu_pm_exit has been called on any cpu in the power | ||
149 | * domain. Notified drivers can include VFP co-processor, interrupt controller | ||
150 | * and it's PM extensions, local CPU timers context save/restore which | ||
151 | * shouldn't be interrupted. Hence it must be called with interrupts disabled. | ||
152 | * | ||
153 | * Must be called with interrupts disabled. | ||
154 | * | ||
155 | * Return conditions are same as __raw_notifier_call_chain. | ||
156 | */ | ||
157 | int cpu_cluster_pm_enter(void) | ||
158 | { | ||
159 | int nr_calls; | ||
160 | int ret = 0; | ||
161 | |||
162 | read_lock(&cpu_pm_notifier_lock); | ||
163 | ret = cpu_pm_notify(CPU_CLUSTER_PM_ENTER, -1, &nr_calls); | ||
164 | if (ret) | ||
165 | /* | ||
166 | * Inform listeners (nr_calls - 1) about failure of CPU cluster | ||
167 | * PM entry who are notified earlier to prepare for it. | ||
168 | */ | ||
169 | cpu_pm_notify(CPU_CLUSTER_PM_ENTER_FAILED, nr_calls - 1, NULL); | ||
170 | read_unlock(&cpu_pm_notifier_lock); | ||
171 | |||
172 | return ret; | ||
173 | } | ||
174 | EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter); | ||
175 | |||
176 | /** | ||
177 | * cpm_cluster_pm_exit - CPU cluster low power exit notifier | ||
178 | * | ||
179 | * Notifies listeners that all cpus in a power domain are exiting form a | ||
180 | * low power state that may have caused some blocks in the same power domain | ||
181 | * to reset. | ||
182 | * | ||
183 | * Must be called after cpu_pm_exit has been called on all cpus in the power | ||
184 | * domain, and before cpu_pm_exit has been called on any cpu in the power | ||
185 | * domain. Notified drivers can include VFP co-processor, interrupt controller | ||
186 | * and it's PM extensions, local CPU timers context save/restore which | ||
187 | * shouldn't be interrupted. Hence it must be called with interrupts disabled. | ||
188 | * | ||
189 | * Return conditions are same as __raw_notifier_call_chain. | ||
190 | */ | ||
191 | int cpu_cluster_pm_exit(void) | ||
192 | { | ||
193 | int ret; | ||
194 | |||
195 | read_lock(&cpu_pm_notifier_lock); | ||
196 | ret = cpu_pm_notify(CPU_CLUSTER_PM_EXIT, -1, NULL); | ||
197 | read_unlock(&cpu_pm_notifier_lock); | ||
198 | |||
199 | return ret; | ||
200 | } | ||
201 | EXPORT_SYMBOL_GPL(cpu_cluster_pm_exit); | ||
202 | |||
203 | #ifdef CONFIG_PM | ||
204 | static int cpu_pm_suspend(void) | ||
205 | { | ||
206 | int ret; | ||
207 | |||
208 | ret = cpu_pm_enter(); | ||
209 | if (ret) | ||
210 | return ret; | ||
211 | |||
212 | ret = cpu_cluster_pm_enter(); | ||
213 | return ret; | ||
214 | } | ||
215 | |||
216 | static void cpu_pm_resume(void) | ||
217 | { | ||
218 | cpu_cluster_pm_exit(); | ||
219 | cpu_pm_exit(); | ||
220 | } | ||
221 | |||
222 | static struct syscore_ops cpu_pm_syscore_ops = { | ||
223 | .suspend = cpu_pm_suspend, | ||
224 | .resume = cpu_pm_resume, | ||
225 | }; | ||
226 | |||
227 | static int cpu_pm_init(void) | ||
228 | { | ||
229 | register_syscore_ops(&cpu_pm_syscore_ops); | ||
230 | return 0; | ||
231 | } | ||
232 | core_initcall(cpu_pm_init); | ||
233 | #endif | ||
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 10131fdaff7..9fe58c46a42 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -37,7 +37,7 @@ | |||
37 | #include <linux/mempolicy.h> | 37 | #include <linux/mempolicy.h> |
38 | #include <linux/mm.h> | 38 | #include <linux/mm.h> |
39 | #include <linux/memory.h> | 39 | #include <linux/memory.h> |
40 | #include <linux/module.h> | 40 | #include <linux/export.h> |
41 | #include <linux/mount.h> | 41 | #include <linux/mount.h> |
42 | #include <linux/namei.h> | 42 | #include <linux/namei.h> |
43 | #include <linux/pagemap.h> | 43 | #include <linux/pagemap.h> |
@@ -949,6 +949,8 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, | |||
949 | static void cpuset_change_task_nodemask(struct task_struct *tsk, | 949 | static void cpuset_change_task_nodemask(struct task_struct *tsk, |
950 | nodemask_t *newmems) | 950 | nodemask_t *newmems) |
951 | { | 951 | { |
952 | bool masks_disjoint = !nodes_intersects(*newmems, tsk->mems_allowed); | ||
953 | |||
952 | repeat: | 954 | repeat: |
953 | /* | 955 | /* |
954 | * Allow tasks that have access to memory reserves because they have | 956 | * Allow tasks that have access to memory reserves because they have |
@@ -963,7 +965,6 @@ repeat: | |||
963 | nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); | 965 | nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); |
964 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); | 966 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); |
965 | 967 | ||
966 | |||
967 | /* | 968 | /* |
968 | * ensure checking ->mems_allowed_change_disable after setting all new | 969 | * ensure checking ->mems_allowed_change_disable after setting all new |
969 | * allowed nodes. | 970 | * allowed nodes. |
@@ -980,9 +981,11 @@ repeat: | |||
980 | 981 | ||
981 | /* | 982 | /* |
982 | * Allocation of memory is very fast, we needn't sleep when waiting | 983 | * Allocation of memory is very fast, we needn't sleep when waiting |
983 | * for the read-side. | 984 | * for the read-side. No wait is necessary, however, if at least one |
985 | * node remains unchanged. | ||
984 | */ | 986 | */ |
985 | while (ACCESS_ONCE(tsk->mems_allowed_change_disable)) { | 987 | while (masks_disjoint && |
988 | ACCESS_ONCE(tsk->mems_allowed_change_disable)) { | ||
986 | task_unlock(tsk); | 989 | task_unlock(tsk); |
987 | if (!task_curr(tsk)) | 990 | if (!task_curr(tsk)) |
988 | yield(); | 991 | yield(); |
diff --git a/kernel/crash_dump.c b/kernel/crash_dump.c index 5f85690285d..c766ee54c0b 100644 --- a/kernel/crash_dump.c +++ b/kernel/crash_dump.c | |||
@@ -2,7 +2,7 @@ | |||
2 | #include <linux/crash_dump.h> | 2 | #include <linux/crash_dump.h> |
3 | #include <linux/init.h> | 3 | #include <linux/init.h> |
4 | #include <linux/errno.h> | 4 | #include <linux/errno.h> |
5 | #include <linux/module.h> | 5 | #include <linux/export.h> |
6 | 6 | ||
7 | /* | 7 | /* |
8 | * If we have booted due to a crash, max_pfn will be a very low value. We need | 8 | * If we have booted due to a crash, max_pfn will be a very low value. We need |
@@ -20,8 +20,15 @@ unsigned long saved_max_pfn; | |||
20 | unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; | 20 | unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; |
21 | 21 | ||
22 | /* | 22 | /* |
23 | * stores the size of elf header of crash image | ||
24 | */ | ||
25 | unsigned long long elfcorehdr_size; | ||
26 | |||
27 | /* | ||
23 | * elfcorehdr= specifies the location of elf core header stored by the crashed | 28 | * elfcorehdr= specifies the location of elf core header stored by the crashed |
24 | * kernel. This option will be passed by kexec loader to the capture kernel. | 29 | * kernel. This option will be passed by kexec loader to the capture kernel. |
30 | * | ||
31 | * Syntax: elfcorehdr=[size[KMG]@]offset[KMG] | ||
25 | */ | 32 | */ |
26 | static int __init setup_elfcorehdr(char *arg) | 33 | static int __init setup_elfcorehdr(char *arg) |
27 | { | 34 | { |
@@ -29,6 +36,10 @@ static int __init setup_elfcorehdr(char *arg) | |||
29 | if (!arg) | 36 | if (!arg) |
30 | return -EINVAL; | 37 | return -EINVAL; |
31 | elfcorehdr_addr = memparse(arg, &end); | 38 | elfcorehdr_addr = memparse(arg, &end); |
39 | if (*end == '@') { | ||
40 | elfcorehdr_size = elfcorehdr_addr; | ||
41 | elfcorehdr_addr = memparse(end + 1, &end); | ||
42 | } | ||
32 | return end > arg ? 0 : -EINVAL; | 43 | return end > arg ? 0 : -EINVAL; |
33 | } | 44 | } |
34 | early_param("elfcorehdr", setup_elfcorehdr); | 45 | early_param("elfcorehdr", setup_elfcorehdr); |
diff --git a/kernel/cred.c b/kernel/cred.c index 174fa84eca3..5791612a404 100644 --- a/kernel/cred.c +++ b/kernel/cred.c | |||
@@ -8,7 +8,7 @@ | |||
8 | * as published by the Free Software Foundation; either version | 8 | * as published by the Free Software Foundation; either version |
9 | * 2 of the Licence, or (at your option) any later version. | 9 | * 2 of the Licence, or (at your option) any later version. |
10 | */ | 10 | */ |
11 | #include <linux/module.h> | 11 | #include <linux/export.h> |
12 | #include <linux/cred.h> | 12 | #include <linux/cred.h> |
13 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
14 | #include <linux/sched.h> | 14 | #include <linux/sched.h> |
@@ -508,10 +508,8 @@ int commit_creds(struct cred *new) | |||
508 | key_fsgid_changed(task); | 508 | key_fsgid_changed(task); |
509 | 509 | ||
510 | /* do it | 510 | /* do it |
511 | * - What if a process setreuid()'s and this brings the | 511 | * RLIMIT_NPROC limits on user->processes have already been checked |
512 | * new uid over his NPROC rlimit? We can check this now | 512 | * in set_user(). |
513 | * cheaply with the new uid cache, so if it matters | ||
514 | * we should be checking for it. -DaveM | ||
515 | */ | 513 | */ |
516 | alter_cred_subscribers(new, 2); | 514 | alter_cred_subscribers(new, 2); |
517 | if (new->user != old->user) | 515 | if (new->user != old->user) |
@@ -646,6 +644,9 @@ void __init cred_init(void) | |||
646 | */ | 644 | */ |
647 | struct cred *prepare_kernel_cred(struct task_struct *daemon) | 645 | struct cred *prepare_kernel_cred(struct task_struct *daemon) |
648 | { | 646 | { |
647 | #ifdef CONFIG_KEYS | ||
648 | struct thread_group_cred *tgcred; | ||
649 | #endif | ||
649 | const struct cred *old; | 650 | const struct cred *old; |
650 | struct cred *new; | 651 | struct cred *new; |
651 | 652 | ||
@@ -653,6 +654,14 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) | |||
653 | if (!new) | 654 | if (!new) |
654 | return NULL; | 655 | return NULL; |
655 | 656 | ||
657 | #ifdef CONFIG_KEYS | ||
658 | tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); | ||
659 | if (!tgcred) { | ||
660 | kmem_cache_free(cred_jar, new); | ||
661 | return NULL; | ||
662 | } | ||
663 | #endif | ||
664 | |||
656 | kdebug("prepare_kernel_cred() alloc %p", new); | 665 | kdebug("prepare_kernel_cred() alloc %p", new); |
657 | 666 | ||
658 | if (daemon) | 667 | if (daemon) |
@@ -669,8 +678,11 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) | |||
669 | get_group_info(new->group_info); | 678 | get_group_info(new->group_info); |
670 | 679 | ||
671 | #ifdef CONFIG_KEYS | 680 | #ifdef CONFIG_KEYS |
672 | atomic_inc(&init_tgcred.usage); | 681 | atomic_set(&tgcred->usage, 1); |
673 | new->tgcred = &init_tgcred; | 682 | spin_lock_init(&tgcred->lock); |
683 | tgcred->process_keyring = NULL; | ||
684 | tgcred->session_keyring = NULL; | ||
685 | new->tgcred = tgcred; | ||
674 | new->request_key_auth = NULL; | 686 | new->request_key_auth = NULL; |
675 | new->thread_keyring = NULL; | 687 | new->thread_keyring = NULL; |
676 | new->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; | 688 | new->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; |
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index 34872482315..c22d8c28ad8 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c | |||
@@ -217,7 +217,7 @@ void gdbstub_msg_write(const char *s, int len) | |||
217 | 217 | ||
218 | /* Pack in hex chars */ | 218 | /* Pack in hex chars */ |
219 | for (i = 0; i < wcount; i++) | 219 | for (i = 0; i < wcount; i++) |
220 | bufptr = pack_hex_byte(bufptr, s[i]); | 220 | bufptr = hex_byte_pack(bufptr, s[i]); |
221 | *bufptr = '\0'; | 221 | *bufptr = '\0'; |
222 | 222 | ||
223 | /* Move up */ | 223 | /* Move up */ |
@@ -249,7 +249,7 @@ char *kgdb_mem2hex(char *mem, char *buf, int count) | |||
249 | if (err) | 249 | if (err) |
250 | return NULL; | 250 | return NULL; |
251 | while (count > 0) { | 251 | while (count > 0) { |
252 | buf = pack_hex_byte(buf, *tmp); | 252 | buf = hex_byte_pack(buf, *tmp); |
253 | tmp++; | 253 | tmp++; |
254 | count--; | 254 | count--; |
255 | } | 255 | } |
@@ -411,14 +411,14 @@ static char *pack_threadid(char *pkt, unsigned char *id) | |||
411 | limit = id + (BUF_THREAD_ID_SIZE / 2); | 411 | limit = id + (BUF_THREAD_ID_SIZE / 2); |
412 | while (id < limit) { | 412 | while (id < limit) { |
413 | if (!lzero || *id != 0) { | 413 | if (!lzero || *id != 0) { |
414 | pkt = pack_hex_byte(pkt, *id); | 414 | pkt = hex_byte_pack(pkt, *id); |
415 | lzero = 0; | 415 | lzero = 0; |
416 | } | 416 | } |
417 | id++; | 417 | id++; |
418 | } | 418 | } |
419 | 419 | ||
420 | if (lzero) | 420 | if (lzero) |
421 | pkt = pack_hex_byte(pkt, 0); | 421 | pkt = hex_byte_pack(pkt, 0); |
422 | 422 | ||
423 | return pkt; | 423 | return pkt; |
424 | } | 424 | } |
@@ -486,7 +486,7 @@ static void gdb_cmd_status(struct kgdb_state *ks) | |||
486 | dbg_remove_all_break(); | 486 | dbg_remove_all_break(); |
487 | 487 | ||
488 | remcom_out_buffer[0] = 'S'; | 488 | remcom_out_buffer[0] = 'S'; |
489 | pack_hex_byte(&remcom_out_buffer[1], ks->signo); | 489 | hex_byte_pack(&remcom_out_buffer[1], ks->signo); |
490 | } | 490 | } |
491 | 491 | ||
492 | static void gdb_get_regs_helper(struct kgdb_state *ks) | 492 | static void gdb_get_regs_helper(struct kgdb_state *ks) |
@@ -954,7 +954,7 @@ int gdb_serial_stub(struct kgdb_state *ks) | |||
954 | /* Reply to host that an exception has occurred */ | 954 | /* Reply to host that an exception has occurred */ |
955 | ptr = remcom_out_buffer; | 955 | ptr = remcom_out_buffer; |
956 | *ptr++ = 'T'; | 956 | *ptr++ = 'T'; |
957 | ptr = pack_hex_byte(ptr, ks->signo); | 957 | ptr = hex_byte_pack(ptr, ks->signo); |
958 | ptr += strlen(strcpy(ptr, "thread:")); | 958 | ptr += strlen(strcpy(ptr, "thread:")); |
959 | int_to_threadref(thref, shadow_pid(current->pid)); | 959 | int_to_threadref(thref, shadow_pid(current->pid)); |
960 | ptr = pack_threadid(ptr, thref); | 960 | ptr = pack_threadid(ptr, thref); |
diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c index d9ca9aa481e..8b68ce78ff1 100644 --- a/kernel/debug/kdb/kdb_debugger.c +++ b/kernel/debug/kdb/kdb_debugger.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/kgdb.h> | 11 | #include <linux/kgdb.h> |
12 | #include <linux/kdb.h> | 12 | #include <linux/kdb.h> |
13 | #include <linux/kdebug.h> | 13 | #include <linux/kdebug.h> |
14 | #include <linux/export.h> | ||
14 | #include "kdb_private.h" | 15 | #include "kdb_private.h" |
15 | #include "../debug_core.h" | 16 | #include "../debug_core.h" |
16 | 17 | ||
diff --git a/kernel/dma.c b/kernel/dma.c index f903189c530..68a2306522c 100644 --- a/kernel/dma.c +++ b/kernel/dma.c | |||
@@ -9,7 +9,7 @@ | |||
9 | * [It also happened to remove the sizeof(char *) == sizeof(int) | 9 | * [It also happened to remove the sizeof(char *) == sizeof(int) |
10 | * assumption introduced because of those /proc/dma patches. -- Hennus] | 10 | * assumption introduced because of those /proc/dma patches. -- Hennus] |
11 | */ | 11 | */ |
12 | #include <linux/module.h> | 12 | #include <linux/export.h> |
13 | #include <linux/kernel.h> | 13 | #include <linux/kernel.h> |
14 | #include <linux/errno.h> | 14 | #include <linux/errno.h> |
15 | #include <linux/spinlock.h> | 15 | #include <linux/spinlock.h> |
diff --git a/kernel/events/core.c b/kernel/events/core.c index b8785e26ee1..0e8457da6f9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/reboot.h> | 25 | #include <linux/reboot.h> |
26 | #include <linux/vmstat.h> | 26 | #include <linux/vmstat.h> |
27 | #include <linux/device.h> | 27 | #include <linux/device.h> |
28 | #include <linux/export.h> | ||
28 | #include <linux/vmalloc.h> | 29 | #include <linux/vmalloc.h> |
29 | #include <linux/hardirq.h> | 30 | #include <linux/hardirq.h> |
30 | #include <linux/rculist.h> | 31 | #include <linux/rculist.h> |
@@ -399,14 +400,54 @@ void perf_cgroup_switch(struct task_struct *task, int mode) | |||
399 | local_irq_restore(flags); | 400 | local_irq_restore(flags); |
400 | } | 401 | } |
401 | 402 | ||
402 | static inline void perf_cgroup_sched_out(struct task_struct *task) | 403 | static inline void perf_cgroup_sched_out(struct task_struct *task, |
404 | struct task_struct *next) | ||
403 | { | 405 | { |
404 | perf_cgroup_switch(task, PERF_CGROUP_SWOUT); | 406 | struct perf_cgroup *cgrp1; |
407 | struct perf_cgroup *cgrp2 = NULL; | ||
408 | |||
409 | /* | ||
410 | * we come here when we know perf_cgroup_events > 0 | ||
411 | */ | ||
412 | cgrp1 = perf_cgroup_from_task(task); | ||
413 | |||
414 | /* | ||
415 | * next is NULL when called from perf_event_enable_on_exec() | ||
416 | * that will systematically cause a cgroup_switch() | ||
417 | */ | ||
418 | if (next) | ||
419 | cgrp2 = perf_cgroup_from_task(next); | ||
420 | |||
421 | /* | ||
422 | * only schedule out current cgroup events if we know | ||
423 | * that we are switching to a different cgroup. Otherwise, | ||
424 | * do no touch the cgroup events. | ||
425 | */ | ||
426 | if (cgrp1 != cgrp2) | ||
427 | perf_cgroup_switch(task, PERF_CGROUP_SWOUT); | ||
405 | } | 428 | } |
406 | 429 | ||
407 | static inline void perf_cgroup_sched_in(struct task_struct *task) | 430 | static inline void perf_cgroup_sched_in(struct task_struct *prev, |
431 | struct task_struct *task) | ||
408 | { | 432 | { |
409 | perf_cgroup_switch(task, PERF_CGROUP_SWIN); | 433 | struct perf_cgroup *cgrp1; |
434 | struct perf_cgroup *cgrp2 = NULL; | ||
435 | |||
436 | /* | ||
437 | * we come here when we know perf_cgroup_events > 0 | ||
438 | */ | ||
439 | cgrp1 = perf_cgroup_from_task(task); | ||
440 | |||
441 | /* prev can never be NULL */ | ||
442 | cgrp2 = perf_cgroup_from_task(prev); | ||
443 | |||
444 | /* | ||
445 | * only need to schedule in cgroup events if we are changing | ||
446 | * cgroup during ctxsw. Cgroup events were not scheduled | ||
447 | * out of ctxsw out if that was not the case. | ||
448 | */ | ||
449 | if (cgrp1 != cgrp2) | ||
450 | perf_cgroup_switch(task, PERF_CGROUP_SWIN); | ||
410 | } | 451 | } |
411 | 452 | ||
412 | static inline int perf_cgroup_connect(int fd, struct perf_event *event, | 453 | static inline int perf_cgroup_connect(int fd, struct perf_event *event, |
@@ -518,11 +559,13 @@ static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx) | |||
518 | { | 559 | { |
519 | } | 560 | } |
520 | 561 | ||
521 | static inline void perf_cgroup_sched_out(struct task_struct *task) | 562 | static inline void perf_cgroup_sched_out(struct task_struct *task, |
563 | struct task_struct *next) | ||
522 | { | 564 | { |
523 | } | 565 | } |
524 | 566 | ||
525 | static inline void perf_cgroup_sched_in(struct task_struct *task) | 567 | static inline void perf_cgroup_sched_in(struct task_struct *prev, |
568 | struct task_struct *task) | ||
526 | { | 569 | { |
527 | } | 570 | } |
528 | 571 | ||
@@ -1988,7 +2031,7 @@ void __perf_event_task_sched_out(struct task_struct *task, | |||
1988 | * cgroup event are system-wide mode only | 2031 | * cgroup event are system-wide mode only |
1989 | */ | 2032 | */ |
1990 | if (atomic_read(&__get_cpu_var(perf_cgroup_events))) | 2033 | if (atomic_read(&__get_cpu_var(perf_cgroup_events))) |
1991 | perf_cgroup_sched_out(task); | 2034 | perf_cgroup_sched_out(task, next); |
1992 | } | 2035 | } |
1993 | 2036 | ||
1994 | static void task_ctx_sched_out(struct perf_event_context *ctx) | 2037 | static void task_ctx_sched_out(struct perf_event_context *ctx) |
@@ -2153,7 +2196,8 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, | |||
2153 | * accessing the event control register. If a NMI hits, then it will | 2196 | * accessing the event control register. If a NMI hits, then it will |
2154 | * keep the event running. | 2197 | * keep the event running. |
2155 | */ | 2198 | */ |
2156 | void __perf_event_task_sched_in(struct task_struct *task) | 2199 | void __perf_event_task_sched_in(struct task_struct *prev, |
2200 | struct task_struct *task) | ||
2157 | { | 2201 | { |
2158 | struct perf_event_context *ctx; | 2202 | struct perf_event_context *ctx; |
2159 | int ctxn; | 2203 | int ctxn; |
@@ -2171,7 +2215,7 @@ void __perf_event_task_sched_in(struct task_struct *task) | |||
2171 | * cgroup event are system-wide mode only | 2215 | * cgroup event are system-wide mode only |
2172 | */ | 2216 | */ |
2173 | if (atomic_read(&__get_cpu_var(perf_cgroup_events))) | 2217 | if (atomic_read(&__get_cpu_var(perf_cgroup_events))) |
2174 | perf_cgroup_sched_in(task); | 2218 | perf_cgroup_sched_in(prev, task); |
2175 | } | 2219 | } |
2176 | 2220 | ||
2177 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) | 2221 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) |
@@ -2427,7 +2471,7 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx) | |||
2427 | * ctxswin cgroup events which are already scheduled | 2471 | * ctxswin cgroup events which are already scheduled |
2428 | * in. | 2472 | * in. |
2429 | */ | 2473 | */ |
2430 | perf_cgroup_sched_out(current); | 2474 | perf_cgroup_sched_out(current, NULL); |
2431 | 2475 | ||
2432 | raw_spin_lock(&ctx->lock); | 2476 | raw_spin_lock(&ctx->lock); |
2433 | task_ctx_sched_out(ctx); | 2477 | task_ctx_sched_out(ctx); |
@@ -3353,8 +3397,8 @@ static int perf_event_index(struct perf_event *event) | |||
3353 | } | 3397 | } |
3354 | 3398 | ||
3355 | static void calc_timer_values(struct perf_event *event, | 3399 | static void calc_timer_values(struct perf_event *event, |
3356 | u64 *running, | 3400 | u64 *enabled, |
3357 | u64 *enabled) | 3401 | u64 *running) |
3358 | { | 3402 | { |
3359 | u64 now, ctx_time; | 3403 | u64 now, ctx_time; |
3360 | 3404 | ||
@@ -3500,7 +3544,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) | |||
3500 | struct ring_buffer *rb = event->rb; | 3544 | struct ring_buffer *rb = event->rb; |
3501 | 3545 | ||
3502 | atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); | 3546 | atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); |
3503 | vma->vm_mm->locked_vm -= event->mmap_locked; | 3547 | vma->vm_mm->pinned_vm -= event->mmap_locked; |
3504 | rcu_assign_pointer(event->rb, NULL); | 3548 | rcu_assign_pointer(event->rb, NULL); |
3505 | mutex_unlock(&event->mmap_mutex); | 3549 | mutex_unlock(&event->mmap_mutex); |
3506 | 3550 | ||
@@ -3581,7 +3625,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
3581 | 3625 | ||
3582 | lock_limit = rlimit(RLIMIT_MEMLOCK); | 3626 | lock_limit = rlimit(RLIMIT_MEMLOCK); |
3583 | lock_limit >>= PAGE_SHIFT; | 3627 | lock_limit >>= PAGE_SHIFT; |
3584 | locked = vma->vm_mm->locked_vm + extra; | 3628 | locked = vma->vm_mm->pinned_vm + extra; |
3585 | 3629 | ||
3586 | if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() && | 3630 | if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() && |
3587 | !capable(CAP_IPC_LOCK)) { | 3631 | !capable(CAP_IPC_LOCK)) { |
@@ -3607,7 +3651,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
3607 | atomic_long_add(user_extra, &user->locked_vm); | 3651 | atomic_long_add(user_extra, &user->locked_vm); |
3608 | event->mmap_locked = extra; | 3652 | event->mmap_locked = extra; |
3609 | event->mmap_user = get_current_user(); | 3653 | event->mmap_user = get_current_user(); |
3610 | vma->vm_mm->locked_vm += event->mmap_locked; | 3654 | vma->vm_mm->pinned_vm += event->mmap_locked; |
3611 | 3655 | ||
3612 | unlock: | 3656 | unlock: |
3613 | if (!ret) | 3657 | if (!ret) |
@@ -5715,6 +5759,7 @@ struct pmu *perf_init_event(struct perf_event *event) | |||
5715 | pmu = idr_find(&pmu_idr, event->attr.type); | 5759 | pmu = idr_find(&pmu_idr, event->attr.type); |
5716 | rcu_read_unlock(); | 5760 | rcu_read_unlock(); |
5717 | if (pmu) { | 5761 | if (pmu) { |
5762 | event->pmu = pmu; | ||
5718 | ret = pmu->event_init(event); | 5763 | ret = pmu->event_init(event); |
5719 | if (ret) | 5764 | if (ret) |
5720 | pmu = ERR_PTR(ret); | 5765 | pmu = ERR_PTR(ret); |
@@ -5722,6 +5767,7 @@ struct pmu *perf_init_event(struct perf_event *event) | |||
5722 | } | 5767 | } |
5723 | 5768 | ||
5724 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 5769 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
5770 | event->pmu = pmu; | ||
5725 | ret = pmu->event_init(event); | 5771 | ret = pmu->event_init(event); |
5726 | if (!ret) | 5772 | if (!ret) |
5727 | goto unlock; | 5773 | goto unlock; |
@@ -5848,8 +5894,6 @@ done: | |||
5848 | return ERR_PTR(err); | 5894 | return ERR_PTR(err); |
5849 | } | 5895 | } |
5850 | 5896 | ||
5851 | event->pmu = pmu; | ||
5852 | |||
5853 | if (!event->parent) { | 5897 | if (!event->parent) { |
5854 | if (event->attach_state & PERF_ATTACH_TASK) | 5898 | if (event->attach_state & PERF_ATTACH_TASK) |
5855 | jump_label_inc(&perf_sched_events); | 5899 | jump_label_inc(&perf_sched_events); |
diff --git a/kernel/exit.c b/kernel/exit.c index 2913b3509d4..d0b7d988f87 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -681,8 +681,6 @@ static void exit_mm(struct task_struct * tsk) | |||
681 | enter_lazy_tlb(mm, current); | 681 | enter_lazy_tlb(mm, current); |
682 | /* We don't want this task to be frozen prematurely */ | 682 | /* We don't want this task to be frozen prematurely */ |
683 | clear_freeze_flag(tsk); | 683 | clear_freeze_flag(tsk); |
684 | if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) | ||
685 | atomic_dec(&mm->oom_disable_count); | ||
686 | task_unlock(tsk); | 684 | task_unlock(tsk); |
687 | mm_update_next_owner(mm); | 685 | mm_update_next_owner(mm); |
688 | mmput(mm); | 686 | mmput(mm); |
diff --git a/kernel/fork.c b/kernel/fork.c index e7ceaca8960..da4a6a10d08 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -162,7 +162,6 @@ static void account_kernel_stack(struct thread_info *ti, int account) | |||
162 | 162 | ||
163 | void free_task(struct task_struct *tsk) | 163 | void free_task(struct task_struct *tsk) |
164 | { | 164 | { |
165 | prop_local_destroy_single(&tsk->dirties); | ||
166 | account_kernel_stack(tsk->stack, -1); | 165 | account_kernel_stack(tsk->stack, -1); |
167 | free_thread_info(tsk->stack); | 166 | free_thread_info(tsk->stack); |
168 | rt_mutex_debug_task_free(tsk); | 167 | rt_mutex_debug_task_free(tsk); |
@@ -274,10 +273,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
274 | 273 | ||
275 | tsk->stack = ti; | 274 | tsk->stack = ti; |
276 | 275 | ||
277 | err = prop_local_init_single(&tsk->dirties); | ||
278 | if (err) | ||
279 | goto out; | ||
280 | |||
281 | setup_thread_stack(tsk, orig); | 276 | setup_thread_stack(tsk, orig); |
282 | clear_user_return_notifier(tsk); | 277 | clear_user_return_notifier(tsk); |
283 | clear_tsk_need_resched(tsk); | 278 | clear_tsk_need_resched(tsk); |
@@ -501,7 +496,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) | |||
501 | mm->cached_hole_size = ~0UL; | 496 | mm->cached_hole_size = ~0UL; |
502 | mm_init_aio(mm); | 497 | mm_init_aio(mm); |
503 | mm_init_owner(mm, p); | 498 | mm_init_owner(mm, p); |
504 | atomic_set(&mm->oom_disable_count, 0); | ||
505 | 499 | ||
506 | if (likely(!mm_alloc_pgd(mm))) { | 500 | if (likely(!mm_alloc_pgd(mm))) { |
507 | mm->def_flags = 0; | 501 | mm->def_flags = 0; |
@@ -816,8 +810,6 @@ good_mm: | |||
816 | /* Initializing for Swap token stuff */ | 810 | /* Initializing for Swap token stuff */ |
817 | mm->token_priority = 0; | 811 | mm->token_priority = 0; |
818 | mm->last_interval = 0; | 812 | mm->last_interval = 0; |
819 | if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) | ||
820 | atomic_inc(&mm->oom_disable_count); | ||
821 | 813 | ||
822 | tsk->mm = mm; | 814 | tsk->mm = mm; |
823 | tsk->active_mm = mm; | 815 | tsk->active_mm = mm; |
@@ -1111,6 +1103,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1111 | p->real_cred->user != INIT_USER) | 1103 | p->real_cred->user != INIT_USER) |
1112 | goto bad_fork_free; | 1104 | goto bad_fork_free; |
1113 | } | 1105 | } |
1106 | current->flags &= ~PF_NPROC_EXCEEDED; | ||
1114 | 1107 | ||
1115 | retval = copy_creds(p, clone_flags); | 1108 | retval = copy_creds(p, clone_flags); |
1116 | if (retval < 0) | 1109 | if (retval < 0) |
@@ -1301,6 +1294,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1301 | p->pdeath_signal = 0; | 1294 | p->pdeath_signal = 0; |
1302 | p->exit_state = 0; | 1295 | p->exit_state = 0; |
1303 | 1296 | ||
1297 | p->nr_dirtied = 0; | ||
1298 | p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); | ||
1299 | |||
1304 | /* | 1300 | /* |
1305 | * Ok, make it visible to the rest of the system. | 1301 | * Ok, make it visible to the rest of the system. |
1306 | * We dont wake it up yet. | 1302 | * We dont wake it up yet. |
@@ -1390,13 +1386,8 @@ bad_fork_cleanup_io: | |||
1390 | bad_fork_cleanup_namespaces: | 1386 | bad_fork_cleanup_namespaces: |
1391 | exit_task_namespaces(p); | 1387 | exit_task_namespaces(p); |
1392 | bad_fork_cleanup_mm: | 1388 | bad_fork_cleanup_mm: |
1393 | if (p->mm) { | 1389 | if (p->mm) |
1394 | task_lock(p); | ||
1395 | if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) | ||
1396 | atomic_dec(&p->mm->oom_disable_count); | ||
1397 | task_unlock(p); | ||
1398 | mmput(p->mm); | 1390 | mmput(p->mm); |
1399 | } | ||
1400 | bad_fork_cleanup_signal: | 1391 | bad_fork_cleanup_signal: |
1401 | if (!(clone_flags & CLONE_THREAD)) | 1392 | if (!(clone_flags & CLONE_THREAD)) |
1402 | free_signal_struct(p->signal); | 1393 | free_signal_struct(p->signal); |
diff --git a/kernel/freezer.c b/kernel/freezer.c index 7b01de98bb6..7be56c53439 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c | |||
@@ -6,7 +6,7 @@ | |||
6 | 6 | ||
7 | #include <linux/interrupt.h> | 7 | #include <linux/interrupt.h> |
8 | #include <linux/suspend.h> | 8 | #include <linux/suspend.h> |
9 | #include <linux/module.h> | 9 | #include <linux/export.h> |
10 | #include <linux/syscalls.h> | 10 | #include <linux/syscalls.h> |
11 | #include <linux/freezer.h> | 11 | #include <linux/freezer.h> |
12 | 12 | ||
diff --git a/kernel/futex.c b/kernel/futex.c index 11cbe052b2e..ea87f4d2f45 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -55,7 +55,7 @@ | |||
55 | #include <linux/pagemap.h> | 55 | #include <linux/pagemap.h> |
56 | #include <linux/syscalls.h> | 56 | #include <linux/syscalls.h> |
57 | #include <linux/signal.h> | 57 | #include <linux/signal.h> |
58 | #include <linux/module.h> | 58 | #include <linux/export.h> |
59 | #include <linux/magic.h> | 59 | #include <linux/magic.h> |
60 | #include <linux/pid.h> | 60 | #include <linux/pid.h> |
61 | #include <linux/nsproxy.h> | 61 | #include <linux/nsproxy.h> |
@@ -854,7 +854,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | |||
854 | { | 854 | { |
855 | struct task_struct *new_owner; | 855 | struct task_struct *new_owner; |
856 | struct futex_pi_state *pi_state = this->pi_state; | 856 | struct futex_pi_state *pi_state = this->pi_state; |
857 | u32 curval, newval; | 857 | u32 uninitialized_var(curval), newval; |
858 | 858 | ||
859 | if (!pi_state) | 859 | if (!pi_state) |
860 | return -EINVAL; | 860 | return -EINVAL; |
@@ -916,7 +916,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | |||
916 | 916 | ||
917 | static int unlock_futex_pi(u32 __user *uaddr, u32 uval) | 917 | static int unlock_futex_pi(u32 __user *uaddr, u32 uval) |
918 | { | 918 | { |
919 | u32 oldval; | 919 | u32 uninitialized_var(oldval); |
920 | 920 | ||
921 | /* | 921 | /* |
922 | * There is no waiter, so we unlock the futex. The owner died | 922 | * There is no waiter, so we unlock the futex. The owner died |
@@ -1576,7 +1576,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, | |||
1576 | u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; | 1576 | u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; |
1577 | struct futex_pi_state *pi_state = q->pi_state; | 1577 | struct futex_pi_state *pi_state = q->pi_state; |
1578 | struct task_struct *oldowner = pi_state->owner; | 1578 | struct task_struct *oldowner = pi_state->owner; |
1579 | u32 uval, curval, newval; | 1579 | u32 uval, uninitialized_var(curval), newval; |
1580 | int ret; | 1580 | int ret; |
1581 | 1581 | ||
1582 | /* Owner died? */ | 1582 | /* Owner died? */ |
@@ -1793,7 +1793,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, | |||
1793 | * | 1793 | * |
1794 | * Returns: | 1794 | * Returns: |
1795 | * 0 - uaddr contains val and hb has been locked | 1795 | * 0 - uaddr contains val and hb has been locked |
1796 | * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked | 1796 | * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked |
1797 | */ | 1797 | */ |
1798 | static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, | 1798 | static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, |
1799 | struct futex_q *q, struct futex_hash_bucket **hb) | 1799 | struct futex_q *q, struct futex_hash_bucket **hb) |
@@ -2481,7 +2481,7 @@ err_unlock: | |||
2481 | */ | 2481 | */ |
2482 | int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) | 2482 | int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) |
2483 | { | 2483 | { |
2484 | u32 uval, nval, mval; | 2484 | u32 uval, uninitialized_var(nval), mval; |
2485 | 2485 | ||
2486 | retry: | 2486 | retry: |
2487 | if (get_user(uval, uaddr)) | 2487 | if (get_user(uval, uaddr)) |
diff --git a/kernel/groups.c b/kernel/groups.c index 1cc476d52dd..99b53d1eb7e 100644 --- a/kernel/groups.c +++ b/kernel/groups.c | |||
@@ -2,7 +2,7 @@ | |||
2 | * Supplementary group IDs | 2 | * Supplementary group IDs |
3 | */ | 3 | */ |
4 | #include <linux/cred.h> | 4 | #include <linux/cred.h> |
5 | #include <linux/module.h> | 5 | #include <linux/export.h> |
6 | #include <linux/slab.h> | 6 | #include <linux/slab.h> |
7 | #include <linux/security.h> | 7 | #include <linux/security.h> |
8 | #include <linux/syscalls.h> | 8 | #include <linux/syscalls.h> |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 2043c08d36c..ae34bf51682 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -32,7 +32,7 @@ | |||
32 | */ | 32 | */ |
33 | 33 | ||
34 | #include <linux/cpu.h> | 34 | #include <linux/cpu.h> |
35 | #include <linux/module.h> | 35 | #include <linux/export.h> |
36 | #include <linux/percpu.h> | 36 | #include <linux/percpu.h> |
37 | #include <linux/hrtimer.h> | 37 | #include <linux/hrtimer.h> |
38 | #include <linux/notifier.h> | 38 | #include <linux/notifier.h> |
diff --git a/kernel/hung_task.c b/kernel/hung_task.c index ea640120ab8..8b1748d0172 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c | |||
@@ -13,7 +13,7 @@ | |||
13 | #include <linux/freezer.h> | 13 | #include <linux/freezer.h> |
14 | #include <linux/kthread.h> | 14 | #include <linux/kthread.h> |
15 | #include <linux/lockdep.h> | 15 | #include <linux/lockdep.h> |
16 | #include <linux/module.h> | 16 | #include <linux/export.h> |
17 | #include <linux/sysctl.h> | 17 | #include <linux/sysctl.h> |
18 | 18 | ||
19 | /* | 19 | /* |
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index d5a3009da71..f7c543a801d 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c | |||
@@ -26,7 +26,7 @@ | |||
26 | int irq_set_chip(unsigned int irq, struct irq_chip *chip) | 26 | int irq_set_chip(unsigned int irq, struct irq_chip *chip) |
27 | { | 27 | { |
28 | unsigned long flags; | 28 | unsigned long flags; |
29 | struct irq_desc *desc = irq_get_desc_lock(irq, &flags); | 29 | struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); |
30 | 30 | ||
31 | if (!desc) | 31 | if (!desc) |
32 | return -EINVAL; | 32 | return -EINVAL; |
@@ -54,7 +54,7 @@ EXPORT_SYMBOL(irq_set_chip); | |||
54 | int irq_set_irq_type(unsigned int irq, unsigned int type) | 54 | int irq_set_irq_type(unsigned int irq, unsigned int type) |
55 | { | 55 | { |
56 | unsigned long flags; | 56 | unsigned long flags; |
57 | struct irq_desc *desc = irq_get_desc_buslock(irq, &flags); | 57 | struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); |
58 | int ret = 0; | 58 | int ret = 0; |
59 | 59 | ||
60 | if (!desc) | 60 | if (!desc) |
@@ -78,7 +78,7 @@ EXPORT_SYMBOL(irq_set_irq_type); | |||
78 | int irq_set_handler_data(unsigned int irq, void *data) | 78 | int irq_set_handler_data(unsigned int irq, void *data) |
79 | { | 79 | { |
80 | unsigned long flags; | 80 | unsigned long flags; |
81 | struct irq_desc *desc = irq_get_desc_lock(irq, &flags); | 81 | struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); |
82 | 82 | ||
83 | if (!desc) | 83 | if (!desc) |
84 | return -EINVAL; | 84 | return -EINVAL; |
@@ -98,7 +98,7 @@ EXPORT_SYMBOL(irq_set_handler_data); | |||
98 | int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry) | 98 | int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry) |
99 | { | 99 | { |
100 | unsigned long flags; | 100 | unsigned long flags; |
101 | struct irq_desc *desc = irq_get_desc_lock(irq, &flags); | 101 | struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); |
102 | 102 | ||
103 | if (!desc) | 103 | if (!desc) |
104 | return -EINVAL; | 104 | return -EINVAL; |
@@ -119,7 +119,7 @@ int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry) | |||
119 | int irq_set_chip_data(unsigned int irq, void *data) | 119 | int irq_set_chip_data(unsigned int irq, void *data) |
120 | { | 120 | { |
121 | unsigned long flags; | 121 | unsigned long flags; |
122 | struct irq_desc *desc = irq_get_desc_lock(irq, &flags); | 122 | struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); |
123 | 123 | ||
124 | if (!desc) | 124 | if (!desc) |
125 | return -EINVAL; | 125 | return -EINVAL; |
@@ -178,7 +178,7 @@ void irq_shutdown(struct irq_desc *desc) | |||
178 | desc->depth = 1; | 178 | desc->depth = 1; |
179 | if (desc->irq_data.chip->irq_shutdown) | 179 | if (desc->irq_data.chip->irq_shutdown) |
180 | desc->irq_data.chip->irq_shutdown(&desc->irq_data); | 180 | desc->irq_data.chip->irq_shutdown(&desc->irq_data); |
181 | if (desc->irq_data.chip->irq_disable) | 181 | else if (desc->irq_data.chip->irq_disable) |
182 | desc->irq_data.chip->irq_disable(&desc->irq_data); | 182 | desc->irq_data.chip->irq_disable(&desc->irq_data); |
183 | else | 183 | else |
184 | desc->irq_data.chip->irq_mask(&desc->irq_data); | 184 | desc->irq_data.chip->irq_mask(&desc->irq_data); |
@@ -204,6 +204,24 @@ void irq_disable(struct irq_desc *desc) | |||
204 | } | 204 | } |
205 | } | 205 | } |
206 | 206 | ||
207 | void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu) | ||
208 | { | ||
209 | if (desc->irq_data.chip->irq_enable) | ||
210 | desc->irq_data.chip->irq_enable(&desc->irq_data); | ||
211 | else | ||
212 | desc->irq_data.chip->irq_unmask(&desc->irq_data); | ||
213 | cpumask_set_cpu(cpu, desc->percpu_enabled); | ||
214 | } | ||
215 | |||
216 | void irq_percpu_disable(struct irq_desc *desc, unsigned int cpu) | ||
217 | { | ||
218 | if (desc->irq_data.chip->irq_disable) | ||
219 | desc->irq_data.chip->irq_disable(&desc->irq_data); | ||
220 | else | ||
221 | desc->irq_data.chip->irq_mask(&desc->irq_data); | ||
222 | cpumask_clear_cpu(cpu, desc->percpu_enabled); | ||
223 | } | ||
224 | |||
207 | static inline void mask_ack_irq(struct irq_desc *desc) | 225 | static inline void mask_ack_irq(struct irq_desc *desc) |
208 | { | 226 | { |
209 | if (desc->irq_data.chip->irq_mask_ack) | 227 | if (desc->irq_data.chip->irq_mask_ack) |
@@ -544,12 +562,44 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc) | |||
544 | chip->irq_eoi(&desc->irq_data); | 562 | chip->irq_eoi(&desc->irq_data); |
545 | } | 563 | } |
546 | 564 | ||
565 | /** | ||
566 | * handle_percpu_devid_irq - Per CPU local irq handler with per cpu dev ids | ||
567 | * @irq: the interrupt number | ||
568 | * @desc: the interrupt description structure for this irq | ||
569 | * | ||
570 | * Per CPU interrupts on SMP machines without locking requirements. Same as | ||
571 | * handle_percpu_irq() above but with the following extras: | ||
572 | * | ||
573 | * action->percpu_dev_id is a pointer to percpu variables which | ||
574 | * contain the real device id for the cpu on which this handler is | ||
575 | * called | ||
576 | */ | ||
577 | void handle_percpu_devid_irq(unsigned int irq, struct irq_desc *desc) | ||
578 | { | ||
579 | struct irq_chip *chip = irq_desc_get_chip(desc); | ||
580 | struct irqaction *action = desc->action; | ||
581 | void *dev_id = __this_cpu_ptr(action->percpu_dev_id); | ||
582 | irqreturn_t res; | ||
583 | |||
584 | kstat_incr_irqs_this_cpu(irq, desc); | ||
585 | |||
586 | if (chip->irq_ack) | ||
587 | chip->irq_ack(&desc->irq_data); | ||
588 | |||
589 | trace_irq_handler_entry(irq, action); | ||
590 | res = action->handler(irq, dev_id); | ||
591 | trace_irq_handler_exit(irq, action, res); | ||
592 | |||
593 | if (chip->irq_eoi) | ||
594 | chip->irq_eoi(&desc->irq_data); | ||
595 | } | ||
596 | |||
547 | void | 597 | void |
548 | __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, | 598 | __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, |
549 | const char *name) | 599 | const char *name) |
550 | { | 600 | { |
551 | unsigned long flags; | 601 | unsigned long flags; |
552 | struct irq_desc *desc = irq_get_desc_buslock(irq, &flags); | 602 | struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, 0); |
553 | 603 | ||
554 | if (!desc) | 604 | if (!desc) |
555 | return; | 605 | return; |
@@ -593,7 +643,7 @@ irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip, | |||
593 | void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set) | 643 | void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set) |
594 | { | 644 | { |
595 | unsigned long flags; | 645 | unsigned long flags; |
596 | struct irq_desc *desc = irq_get_desc_lock(irq, &flags); | 646 | struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); |
597 | 647 | ||
598 | if (!desc) | 648 | if (!desc) |
599 | return; | 649 | return; |
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index 3a2cab407b9..c89295a8f66 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/io.h> | 6 | #include <linux/io.h> |
7 | #include <linux/irq.h> | 7 | #include <linux/irq.h> |
8 | #include <linux/slab.h> | 8 | #include <linux/slab.h> |
9 | #include <linux/export.h> | ||
9 | #include <linux/interrupt.h> | 10 | #include <linux/interrupt.h> |
10 | #include <linux/kernel_stat.h> | 11 | #include <linux/kernel_stat.h> |
11 | #include <linux/syscore_ops.h> | 12 | #include <linux/syscore_ops.h> |
@@ -211,6 +212,7 @@ irq_alloc_generic_chip(const char *name, int num_ct, unsigned int irq_base, | |||
211 | } | 212 | } |
212 | return gc; | 213 | return gc; |
213 | } | 214 | } |
215 | EXPORT_SYMBOL_GPL(irq_alloc_generic_chip); | ||
214 | 216 | ||
215 | /* | 217 | /* |
216 | * Separate lockdep class for interrupt chip which can nest irq_desc | 218 | * Separate lockdep class for interrupt chip which can nest irq_desc |
@@ -246,7 +248,7 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk, | |||
246 | gc->mask_cache = irq_reg_readl(gc->reg_base + ct->regs.mask); | 248 | gc->mask_cache = irq_reg_readl(gc->reg_base + ct->regs.mask); |
247 | 249 | ||
248 | for (i = gc->irq_base; msk; msk >>= 1, i++) { | 250 | for (i = gc->irq_base; msk; msk >>= 1, i++) { |
249 | if (!msk & 0x01) | 251 | if (!(msk & 0x01)) |
250 | continue; | 252 | continue; |
251 | 253 | ||
252 | if (flags & IRQ_GC_INIT_NESTED_LOCK) | 254 | if (flags & IRQ_GC_INIT_NESTED_LOCK) |
@@ -258,6 +260,7 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk, | |||
258 | } | 260 | } |
259 | gc->irq_cnt = i - gc->irq_base; | 261 | gc->irq_cnt = i - gc->irq_base; |
260 | } | 262 | } |
263 | EXPORT_SYMBOL_GPL(irq_setup_generic_chip); | ||
261 | 264 | ||
262 | /** | 265 | /** |
263 | * irq_setup_alt_chip - Switch to alternative chip | 266 | * irq_setup_alt_chip - Switch to alternative chip |
@@ -281,6 +284,7 @@ int irq_setup_alt_chip(struct irq_data *d, unsigned int type) | |||
281 | } | 284 | } |
282 | return -EINVAL; | 285 | return -EINVAL; |
283 | } | 286 | } |
287 | EXPORT_SYMBOL_GPL(irq_setup_alt_chip); | ||
284 | 288 | ||
285 | /** | 289 | /** |
286 | * irq_remove_generic_chip - Remove a chip | 290 | * irq_remove_generic_chip - Remove a chip |
@@ -301,7 +305,7 @@ void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk, | |||
301 | raw_spin_unlock(&gc_lock); | 305 | raw_spin_unlock(&gc_lock); |
302 | 306 | ||
303 | for (; msk; msk >>= 1, i++) { | 307 | for (; msk; msk >>= 1, i++) { |
304 | if (!msk & 0x01) | 308 | if (!(msk & 0x01)) |
305 | continue; | 309 | continue; |
306 | 310 | ||
307 | /* Remove handler first. That will mask the irq line */ | 311 | /* Remove handler first. That will mask the irq line */ |
@@ -311,6 +315,7 @@ void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk, | |||
311 | irq_modify_status(i, clr, set); | 315 | irq_modify_status(i, clr, set); |
312 | } | 316 | } |
313 | } | 317 | } |
318 | EXPORT_SYMBOL_GPL(irq_remove_generic_chip); | ||
314 | 319 | ||
315 | #ifdef CONFIG_PM | 320 | #ifdef CONFIG_PM |
316 | static int irq_gc_suspend(void) | 321 | static int irq_gc_suspend(void) |
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 6546431447d..a73dd6c7372 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h | |||
@@ -71,6 +71,8 @@ extern int irq_startup(struct irq_desc *desc); | |||
71 | extern void irq_shutdown(struct irq_desc *desc); | 71 | extern void irq_shutdown(struct irq_desc *desc); |
72 | extern void irq_enable(struct irq_desc *desc); | 72 | extern void irq_enable(struct irq_desc *desc); |
73 | extern void irq_disable(struct irq_desc *desc); | 73 | extern void irq_disable(struct irq_desc *desc); |
74 | extern void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu); | ||
75 | extern void irq_percpu_disable(struct irq_desc *desc, unsigned int cpu); | ||
74 | extern void mask_irq(struct irq_desc *desc); | 76 | extern void mask_irq(struct irq_desc *desc); |
75 | extern void unmask_irq(struct irq_desc *desc); | 77 | extern void unmask_irq(struct irq_desc *desc); |
76 | 78 | ||
@@ -114,14 +116,21 @@ static inline void chip_bus_sync_unlock(struct irq_desc *desc) | |||
114 | desc->irq_data.chip->irq_bus_sync_unlock(&desc->irq_data); | 116 | desc->irq_data.chip->irq_bus_sync_unlock(&desc->irq_data); |
115 | } | 117 | } |
116 | 118 | ||
119 | #define _IRQ_DESC_CHECK (1 << 0) | ||
120 | #define _IRQ_DESC_PERCPU (1 << 1) | ||
121 | |||
122 | #define IRQ_GET_DESC_CHECK_GLOBAL (_IRQ_DESC_CHECK) | ||
123 | #define IRQ_GET_DESC_CHECK_PERCPU (_IRQ_DESC_CHECK | _IRQ_DESC_PERCPU) | ||
124 | |||
117 | struct irq_desc * | 125 | struct irq_desc * |
118 | __irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus); | 126 | __irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus, |
127 | unsigned int check); | ||
119 | void __irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags, bool bus); | 128 | void __irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags, bool bus); |
120 | 129 | ||
121 | static inline struct irq_desc * | 130 | static inline struct irq_desc * |
122 | irq_get_desc_buslock(unsigned int irq, unsigned long *flags) | 131 | irq_get_desc_buslock(unsigned int irq, unsigned long *flags, unsigned int check) |
123 | { | 132 | { |
124 | return __irq_get_desc_lock(irq, flags, true); | 133 | return __irq_get_desc_lock(irq, flags, true, check); |
125 | } | 134 | } |
126 | 135 | ||
127 | static inline void | 136 | static inline void |
@@ -131,9 +140,9 @@ irq_put_desc_busunlock(struct irq_desc *desc, unsigned long flags) | |||
131 | } | 140 | } |
132 | 141 | ||
133 | static inline struct irq_desc * | 142 | static inline struct irq_desc * |
134 | irq_get_desc_lock(unsigned int irq, unsigned long *flags) | 143 | irq_get_desc_lock(unsigned int irq, unsigned long *flags, unsigned int check) |
135 | { | 144 | { |
136 | return __irq_get_desc_lock(irq, flags, false); | 145 | return __irq_get_desc_lock(irq, flags, false, check); |
137 | } | 146 | } |
138 | 147 | ||
139 | static inline void | 148 | static inline void |
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 4c60a50e66b..d86e254b95e 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c | |||
@@ -9,7 +9,7 @@ | |||
9 | */ | 9 | */ |
10 | #include <linux/irq.h> | 10 | #include <linux/irq.h> |
11 | #include <linux/slab.h> | 11 | #include <linux/slab.h> |
12 | #include <linux/module.h> | 12 | #include <linux/export.h> |
13 | #include <linux/interrupt.h> | 13 | #include <linux/interrupt.h> |
14 | #include <linux/kernel_stat.h> | 14 | #include <linux/kernel_stat.h> |
15 | #include <linux/radix-tree.h> | 15 | #include <linux/radix-tree.h> |
@@ -70,7 +70,8 @@ static inline void desc_smp_init(struct irq_desc *desc, int node) { } | |||
70 | static inline int desc_node(struct irq_desc *desc) { return 0; } | 70 | static inline int desc_node(struct irq_desc *desc) { return 0; } |
71 | #endif | 71 | #endif |
72 | 72 | ||
73 | static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) | 73 | static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node, |
74 | struct module *owner) | ||
74 | { | 75 | { |
75 | int cpu; | 76 | int cpu; |
76 | 77 | ||
@@ -86,6 +87,7 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) | |||
86 | desc->irq_count = 0; | 87 | desc->irq_count = 0; |
87 | desc->irqs_unhandled = 0; | 88 | desc->irqs_unhandled = 0; |
88 | desc->name = NULL; | 89 | desc->name = NULL; |
90 | desc->owner = owner; | ||
89 | for_each_possible_cpu(cpu) | 91 | for_each_possible_cpu(cpu) |
90 | *per_cpu_ptr(desc->kstat_irqs, cpu) = 0; | 92 | *per_cpu_ptr(desc->kstat_irqs, cpu) = 0; |
91 | desc_smp_init(desc, node); | 93 | desc_smp_init(desc, node); |
@@ -128,7 +130,7 @@ static void free_masks(struct irq_desc *desc) | |||
128 | static inline void free_masks(struct irq_desc *desc) { } | 130 | static inline void free_masks(struct irq_desc *desc) { } |
129 | #endif | 131 | #endif |
130 | 132 | ||
131 | static struct irq_desc *alloc_desc(int irq, int node) | 133 | static struct irq_desc *alloc_desc(int irq, int node, struct module *owner) |
132 | { | 134 | { |
133 | struct irq_desc *desc; | 135 | struct irq_desc *desc; |
134 | gfp_t gfp = GFP_KERNEL; | 136 | gfp_t gfp = GFP_KERNEL; |
@@ -147,7 +149,7 @@ static struct irq_desc *alloc_desc(int irq, int node) | |||
147 | raw_spin_lock_init(&desc->lock); | 149 | raw_spin_lock_init(&desc->lock); |
148 | lockdep_set_class(&desc->lock, &irq_desc_lock_class); | 150 | lockdep_set_class(&desc->lock, &irq_desc_lock_class); |
149 | 151 | ||
150 | desc_set_defaults(irq, desc, node); | 152 | desc_set_defaults(irq, desc, node, owner); |
151 | 153 | ||
152 | return desc; | 154 | return desc; |
153 | 155 | ||
@@ -173,13 +175,14 @@ static void free_desc(unsigned int irq) | |||
173 | kfree(desc); | 175 | kfree(desc); |
174 | } | 176 | } |
175 | 177 | ||
176 | static int alloc_descs(unsigned int start, unsigned int cnt, int node) | 178 | static int alloc_descs(unsigned int start, unsigned int cnt, int node, |
179 | struct module *owner) | ||
177 | { | 180 | { |
178 | struct irq_desc *desc; | 181 | struct irq_desc *desc; |
179 | int i; | 182 | int i; |
180 | 183 | ||
181 | for (i = 0; i < cnt; i++) { | 184 | for (i = 0; i < cnt; i++) { |
182 | desc = alloc_desc(start + i, node); | 185 | desc = alloc_desc(start + i, node, owner); |
183 | if (!desc) | 186 | if (!desc) |
184 | goto err; | 187 | goto err; |
185 | mutex_lock(&sparse_irq_lock); | 188 | mutex_lock(&sparse_irq_lock); |
@@ -227,7 +230,7 @@ int __init early_irq_init(void) | |||
227 | nr_irqs = initcnt; | 230 | nr_irqs = initcnt; |
228 | 231 | ||
229 | for (i = 0; i < initcnt; i++) { | 232 | for (i = 0; i < initcnt; i++) { |
230 | desc = alloc_desc(i, node); | 233 | desc = alloc_desc(i, node, NULL); |
231 | set_bit(i, allocated_irqs); | 234 | set_bit(i, allocated_irqs); |
232 | irq_insert_desc(i, desc); | 235 | irq_insert_desc(i, desc); |
233 | } | 236 | } |
@@ -261,7 +264,7 @@ int __init early_irq_init(void) | |||
261 | alloc_masks(&desc[i], GFP_KERNEL, node); | 264 | alloc_masks(&desc[i], GFP_KERNEL, node); |
262 | raw_spin_lock_init(&desc[i].lock); | 265 | raw_spin_lock_init(&desc[i].lock); |
263 | lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); | 266 | lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); |
264 | desc_set_defaults(i, &desc[i], node); | 267 | desc_set_defaults(i, &desc[i], node, NULL); |
265 | } | 268 | } |
266 | return arch_early_irq_init(); | 269 | return arch_early_irq_init(); |
267 | } | 270 | } |
@@ -276,8 +279,16 @@ static void free_desc(unsigned int irq) | |||
276 | dynamic_irq_cleanup(irq); | 279 | dynamic_irq_cleanup(irq); |
277 | } | 280 | } |
278 | 281 | ||
279 | static inline int alloc_descs(unsigned int start, unsigned int cnt, int node) | 282 | static inline int alloc_descs(unsigned int start, unsigned int cnt, int node, |
283 | struct module *owner) | ||
280 | { | 284 | { |
285 | u32 i; | ||
286 | |||
287 | for (i = 0; i < cnt; i++) { | ||
288 | struct irq_desc *desc = irq_to_desc(start + i); | ||
289 | |||
290 | desc->owner = owner; | ||
291 | } | ||
281 | return start; | 292 | return start; |
282 | } | 293 | } |
283 | 294 | ||
@@ -333,11 +344,13 @@ EXPORT_SYMBOL_GPL(irq_free_descs); | |||
333 | * @from: Start the search from this irq number | 344 | * @from: Start the search from this irq number |
334 | * @cnt: Number of consecutive irqs to allocate. | 345 | * @cnt: Number of consecutive irqs to allocate. |
335 | * @node: Preferred node on which the irq descriptor should be allocated | 346 | * @node: Preferred node on which the irq descriptor should be allocated |
347 | * @owner: Owning module (can be NULL) | ||
336 | * | 348 | * |
337 | * Returns the first irq number or error code | 349 | * Returns the first irq number or error code |
338 | */ | 350 | */ |
339 | int __ref | 351 | int __ref |
340 | irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node) | 352 | __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, |
353 | struct module *owner) | ||
341 | { | 354 | { |
342 | int start, ret; | 355 | int start, ret; |
343 | 356 | ||
@@ -366,13 +379,13 @@ irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node) | |||
366 | 379 | ||
367 | bitmap_set(allocated_irqs, start, cnt); | 380 | bitmap_set(allocated_irqs, start, cnt); |
368 | mutex_unlock(&sparse_irq_lock); | 381 | mutex_unlock(&sparse_irq_lock); |
369 | return alloc_descs(start, cnt, node); | 382 | return alloc_descs(start, cnt, node, owner); |
370 | 383 | ||
371 | err: | 384 | err: |
372 | mutex_unlock(&sparse_irq_lock); | 385 | mutex_unlock(&sparse_irq_lock); |
373 | return ret; | 386 | return ret; |
374 | } | 387 | } |
375 | EXPORT_SYMBOL_GPL(irq_alloc_descs); | 388 | EXPORT_SYMBOL_GPL(__irq_alloc_descs); |
376 | 389 | ||
377 | /** | 390 | /** |
378 | * irq_reserve_irqs - mark irqs allocated | 391 | * irq_reserve_irqs - mark irqs allocated |
@@ -411,11 +424,22 @@ unsigned int irq_get_next_irq(unsigned int offset) | |||
411 | } | 424 | } |
412 | 425 | ||
413 | struct irq_desc * | 426 | struct irq_desc * |
414 | __irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus) | 427 | __irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus, |
428 | unsigned int check) | ||
415 | { | 429 | { |
416 | struct irq_desc *desc = irq_to_desc(irq); | 430 | struct irq_desc *desc = irq_to_desc(irq); |
417 | 431 | ||
418 | if (desc) { | 432 | if (desc) { |
433 | if (check & _IRQ_DESC_CHECK) { | ||
434 | if ((check & _IRQ_DESC_PERCPU) && | ||
435 | !irq_settings_is_per_cpu_devid(desc)) | ||
436 | return NULL; | ||
437 | |||
438 | if (!(check & _IRQ_DESC_PERCPU) && | ||
439 | irq_settings_is_per_cpu_devid(desc)) | ||
440 | return NULL; | ||
441 | } | ||
442 | |||
419 | if (bus) | 443 | if (bus) |
420 | chip_bus_lock(desc); | 444 | chip_bus_lock(desc); |
421 | raw_spin_lock_irqsave(&desc->lock, *flags); | 445 | raw_spin_lock_irqsave(&desc->lock, *flags); |
@@ -430,6 +454,25 @@ void __irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags, bool bus) | |||
430 | chip_bus_sync_unlock(desc); | 454 | chip_bus_sync_unlock(desc); |
431 | } | 455 | } |
432 | 456 | ||
457 | int irq_set_percpu_devid(unsigned int irq) | ||
458 | { | ||
459 | struct irq_desc *desc = irq_to_desc(irq); | ||
460 | |||
461 | if (!desc) | ||
462 | return -EINVAL; | ||
463 | |||
464 | if (desc->percpu_enabled) | ||
465 | return -EINVAL; | ||
466 | |||
467 | desc->percpu_enabled = kzalloc(sizeof(*desc->percpu_enabled), GFP_KERNEL); | ||
468 | |||
469 | if (!desc->percpu_enabled) | ||
470 | return -ENOMEM; | ||
471 | |||
472 | irq_set_percpu_devid_flags(irq); | ||
473 | return 0; | ||
474 | } | ||
475 | |||
433 | /** | 476 | /** |
434 | * dynamic_irq_cleanup - cleanup a dynamically allocated irq | 477 | * dynamic_irq_cleanup - cleanup a dynamically allocated irq |
435 | * @irq: irq number to initialize | 478 | * @irq: irq number to initialize |
@@ -440,7 +483,7 @@ void dynamic_irq_cleanup(unsigned int irq) | |||
440 | unsigned long flags; | 483 | unsigned long flags; |
441 | 484 | ||
442 | raw_spin_lock_irqsave(&desc->lock, flags); | 485 | raw_spin_lock_irqsave(&desc->lock, flags); |
443 | desc_set_defaults(irq, desc, desc_node(desc)); | 486 | desc_set_defaults(irq, desc, desc_node(desc), NULL); |
444 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 487 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
445 | } | 488 | } |
446 | 489 | ||
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index d5828da3fd3..200ce832c58 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c | |||
@@ -20,16 +20,20 @@ static DEFINE_MUTEX(irq_domain_mutex); | |||
20 | void irq_domain_add(struct irq_domain *domain) | 20 | void irq_domain_add(struct irq_domain *domain) |
21 | { | 21 | { |
22 | struct irq_data *d; | 22 | struct irq_data *d; |
23 | int hwirq; | 23 | int hwirq, irq; |
24 | 24 | ||
25 | /* | 25 | /* |
26 | * This assumes that the irq_domain owner has already allocated | 26 | * This assumes that the irq_domain owner has already allocated |
27 | * the irq_descs. This block will be removed when support for dynamic | 27 | * the irq_descs. This block will be removed when support for dynamic |
28 | * allocation of irq_descs is added to irq_domain. | 28 | * allocation of irq_descs is added to irq_domain. |
29 | */ | 29 | */ |
30 | for (hwirq = 0; hwirq < domain->nr_irq; hwirq++) { | 30 | irq_domain_for_each_irq(domain, hwirq, irq) { |
31 | d = irq_get_irq_data(irq_domain_to_irq(domain, hwirq)); | 31 | d = irq_get_irq_data(irq); |
32 | if (d || d->domain) { | 32 | if (!d) { |
33 | WARN(1, "error: assigning domain to non existant irq_desc"); | ||
34 | return; | ||
35 | } | ||
36 | if (d->domain) { | ||
33 | /* things are broken; just report, don't clean up */ | 37 | /* things are broken; just report, don't clean up */ |
34 | WARN(1, "error: irq_desc already assigned to a domain"); | 38 | WARN(1, "error: irq_desc already assigned to a domain"); |
35 | return; | 39 | return; |
@@ -50,15 +54,15 @@ void irq_domain_add(struct irq_domain *domain) | |||
50 | void irq_domain_del(struct irq_domain *domain) | 54 | void irq_domain_del(struct irq_domain *domain) |
51 | { | 55 | { |
52 | struct irq_data *d; | 56 | struct irq_data *d; |
53 | int hwirq; | 57 | int hwirq, irq; |
54 | 58 | ||
55 | mutex_lock(&irq_domain_mutex); | 59 | mutex_lock(&irq_domain_mutex); |
56 | list_del(&domain->list); | 60 | list_del(&domain->list); |
57 | mutex_unlock(&irq_domain_mutex); | 61 | mutex_unlock(&irq_domain_mutex); |
58 | 62 | ||
59 | /* Clear the irq_domain assignments */ | 63 | /* Clear the irq_domain assignments */ |
60 | for (hwirq = 0; hwirq < domain->nr_irq; hwirq++) { | 64 | irq_domain_for_each_irq(domain, hwirq, irq) { |
61 | d = irq_get_irq_data(irq_domain_to_irq(domain, hwirq)); | 65 | d = irq_get_irq_data(irq); |
62 | d->domain = NULL; | 66 | d->domain = NULL; |
63 | } | 67 | } |
64 | } | 68 | } |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 0a7840aeb0f..1da999f5e74 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -195,7 +195,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *mask) | |||
195 | int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) | 195 | int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) |
196 | { | 196 | { |
197 | unsigned long flags; | 197 | unsigned long flags; |
198 | struct irq_desc *desc = irq_get_desc_lock(irq, &flags); | 198 | struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); |
199 | 199 | ||
200 | if (!desc) | 200 | if (!desc) |
201 | return -EINVAL; | 201 | return -EINVAL; |
@@ -356,7 +356,7 @@ void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend) | |||
356 | static int __disable_irq_nosync(unsigned int irq) | 356 | static int __disable_irq_nosync(unsigned int irq) |
357 | { | 357 | { |
358 | unsigned long flags; | 358 | unsigned long flags; |
359 | struct irq_desc *desc = irq_get_desc_buslock(irq, &flags); | 359 | struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); |
360 | 360 | ||
361 | if (!desc) | 361 | if (!desc) |
362 | return -EINVAL; | 362 | return -EINVAL; |
@@ -448,7 +448,7 @@ void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume) | |||
448 | void enable_irq(unsigned int irq) | 448 | void enable_irq(unsigned int irq) |
449 | { | 449 | { |
450 | unsigned long flags; | 450 | unsigned long flags; |
451 | struct irq_desc *desc = irq_get_desc_buslock(irq, &flags); | 451 | struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); |
452 | 452 | ||
453 | if (!desc) | 453 | if (!desc) |
454 | return; | 454 | return; |
@@ -467,6 +467,9 @@ static int set_irq_wake_real(unsigned int irq, unsigned int on) | |||
467 | struct irq_desc *desc = irq_to_desc(irq); | 467 | struct irq_desc *desc = irq_to_desc(irq); |
468 | int ret = -ENXIO; | 468 | int ret = -ENXIO; |
469 | 469 | ||
470 | if (irq_desc_get_chip(desc)->flags & IRQCHIP_SKIP_SET_WAKE) | ||
471 | return 0; | ||
472 | |||
470 | if (desc->irq_data.chip->irq_set_wake) | 473 | if (desc->irq_data.chip->irq_set_wake) |
471 | ret = desc->irq_data.chip->irq_set_wake(&desc->irq_data, on); | 474 | ret = desc->irq_data.chip->irq_set_wake(&desc->irq_data, on); |
472 | 475 | ||
@@ -488,7 +491,7 @@ static int set_irq_wake_real(unsigned int irq, unsigned int on) | |||
488 | int irq_set_irq_wake(unsigned int irq, unsigned int on) | 491 | int irq_set_irq_wake(unsigned int irq, unsigned int on) |
489 | { | 492 | { |
490 | unsigned long flags; | 493 | unsigned long flags; |
491 | struct irq_desc *desc = irq_get_desc_buslock(irq, &flags); | 494 | struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); |
492 | int ret = 0; | 495 | int ret = 0; |
493 | 496 | ||
494 | if (!desc) | 497 | if (!desc) |
@@ -529,7 +532,7 @@ EXPORT_SYMBOL(irq_set_irq_wake); | |||
529 | int can_request_irq(unsigned int irq, unsigned long irqflags) | 532 | int can_request_irq(unsigned int irq, unsigned long irqflags) |
530 | { | 533 | { |
531 | unsigned long flags; | 534 | unsigned long flags; |
532 | struct irq_desc *desc = irq_get_desc_lock(irq, &flags); | 535 | struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); |
533 | int canrequest = 0; | 536 | int canrequest = 0; |
534 | 537 | ||
535 | if (!desc) | 538 | if (!desc) |
@@ -620,8 +623,9 @@ static irqreturn_t irq_nested_primary_handler(int irq, void *dev_id) | |||
620 | 623 | ||
621 | static int irq_wait_for_interrupt(struct irqaction *action) | 624 | static int irq_wait_for_interrupt(struct irqaction *action) |
622 | { | 625 | { |
626 | set_current_state(TASK_INTERRUPTIBLE); | ||
627 | |||
623 | while (!kthread_should_stop()) { | 628 | while (!kthread_should_stop()) { |
624 | set_current_state(TASK_INTERRUPTIBLE); | ||
625 | 629 | ||
626 | if (test_and_clear_bit(IRQTF_RUNTHREAD, | 630 | if (test_and_clear_bit(IRQTF_RUNTHREAD, |
627 | &action->thread_flags)) { | 631 | &action->thread_flags)) { |
@@ -629,7 +633,9 @@ static int irq_wait_for_interrupt(struct irqaction *action) | |||
629 | return 0; | 633 | return 0; |
630 | } | 634 | } |
631 | schedule(); | 635 | schedule(); |
636 | set_current_state(TASK_INTERRUPTIBLE); | ||
632 | } | 637 | } |
638 | __set_current_state(TASK_RUNNING); | ||
633 | return -1; | 639 | return -1; |
634 | } | 640 | } |
635 | 641 | ||
@@ -883,6 +889,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
883 | 889 | ||
884 | if (desc->irq_data.chip == &no_irq_chip) | 890 | if (desc->irq_data.chip == &no_irq_chip) |
885 | return -ENOSYS; | 891 | return -ENOSYS; |
892 | if (!try_module_get(desc->owner)) | ||
893 | return -ENODEV; | ||
886 | /* | 894 | /* |
887 | * Some drivers like serial.c use request_irq() heavily, | 895 | * Some drivers like serial.c use request_irq() heavily, |
888 | * so we have to be careful not to interfere with a | 896 | * so we have to be careful not to interfere with a |
@@ -906,8 +914,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
906 | */ | 914 | */ |
907 | nested = irq_settings_is_nested_thread(desc); | 915 | nested = irq_settings_is_nested_thread(desc); |
908 | if (nested) { | 916 | if (nested) { |
909 | if (!new->thread_fn) | 917 | if (!new->thread_fn) { |
910 | return -EINVAL; | 918 | ret = -EINVAL; |
919 | goto out_mput; | ||
920 | } | ||
911 | /* | 921 | /* |
912 | * Replace the primary handler which was provided from | 922 | * Replace the primary handler which was provided from |
913 | * the driver for non nested interrupt handling by the | 923 | * the driver for non nested interrupt handling by the |
@@ -929,8 +939,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
929 | 939 | ||
930 | t = kthread_create(irq_thread, new, "irq/%d-%s", irq, | 940 | t = kthread_create(irq_thread, new, "irq/%d-%s", irq, |
931 | new->name); | 941 | new->name); |
932 | if (IS_ERR(t)) | 942 | if (IS_ERR(t)) { |
933 | return PTR_ERR(t); | 943 | ret = PTR_ERR(t); |
944 | goto out_mput; | ||
945 | } | ||
934 | /* | 946 | /* |
935 | * We keep the reference to the task struct even if | 947 | * We keep the reference to the task struct even if |
936 | * the thread dies to avoid that the interrupt code | 948 | * the thread dies to avoid that the interrupt code |
@@ -1095,6 +1107,8 @@ out_thread: | |||
1095 | kthread_stop(t); | 1107 | kthread_stop(t); |
1096 | put_task_struct(t); | 1108 | put_task_struct(t); |
1097 | } | 1109 | } |
1110 | out_mput: | ||
1111 | module_put(desc->owner); | ||
1098 | return ret; | 1112 | return ret; |
1099 | } | 1113 | } |
1100 | 1114 | ||
@@ -1110,6 +1124,8 @@ int setup_irq(unsigned int irq, struct irqaction *act) | |||
1110 | int retval; | 1124 | int retval; |
1111 | struct irq_desc *desc = irq_to_desc(irq); | 1125 | struct irq_desc *desc = irq_to_desc(irq); |
1112 | 1126 | ||
1127 | if (WARN_ON(irq_settings_is_per_cpu_devid(desc))) | ||
1128 | return -EINVAL; | ||
1113 | chip_bus_lock(desc); | 1129 | chip_bus_lock(desc); |
1114 | retval = __setup_irq(irq, desc, act); | 1130 | retval = __setup_irq(irq, desc, act); |
1115 | chip_bus_sync_unlock(desc); | 1131 | chip_bus_sync_unlock(desc); |
@@ -1118,7 +1134,7 @@ int setup_irq(unsigned int irq, struct irqaction *act) | |||
1118 | } | 1134 | } |
1119 | EXPORT_SYMBOL_GPL(setup_irq); | 1135 | EXPORT_SYMBOL_GPL(setup_irq); |
1120 | 1136 | ||
1121 | /* | 1137 | /* |
1122 | * Internal function to unregister an irqaction - used to free | 1138 | * Internal function to unregister an irqaction - used to free |
1123 | * regular and special interrupts that are part of the architecture. | 1139 | * regular and special interrupts that are part of the architecture. |
1124 | */ | 1140 | */ |
@@ -1203,6 +1219,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) | |||
1203 | put_task_struct(action->thread); | 1219 | put_task_struct(action->thread); |
1204 | } | 1220 | } |
1205 | 1221 | ||
1222 | module_put(desc->owner); | ||
1206 | return action; | 1223 | return action; |
1207 | } | 1224 | } |
1208 | 1225 | ||
@@ -1215,7 +1232,10 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) | |||
1215 | */ | 1232 | */ |
1216 | void remove_irq(unsigned int irq, struct irqaction *act) | 1233 | void remove_irq(unsigned int irq, struct irqaction *act) |
1217 | { | 1234 | { |
1218 | __free_irq(irq, act->dev_id); | 1235 | struct irq_desc *desc = irq_to_desc(irq); |
1236 | |||
1237 | if (desc && !WARN_ON(irq_settings_is_per_cpu_devid(desc))) | ||
1238 | __free_irq(irq, act->dev_id); | ||
1219 | } | 1239 | } |
1220 | EXPORT_SYMBOL_GPL(remove_irq); | 1240 | EXPORT_SYMBOL_GPL(remove_irq); |
1221 | 1241 | ||
@@ -1237,7 +1257,7 @@ void free_irq(unsigned int irq, void *dev_id) | |||
1237 | { | 1257 | { |
1238 | struct irq_desc *desc = irq_to_desc(irq); | 1258 | struct irq_desc *desc = irq_to_desc(irq); |
1239 | 1259 | ||
1240 | if (!desc) | 1260 | if (!desc || WARN_ON(irq_settings_is_per_cpu_devid(desc))) |
1241 | return; | 1261 | return; |
1242 | 1262 | ||
1243 | #ifdef CONFIG_SMP | 1263 | #ifdef CONFIG_SMP |
@@ -1315,7 +1335,8 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, | |||
1315 | if (!desc) | 1335 | if (!desc) |
1316 | return -EINVAL; | 1336 | return -EINVAL; |
1317 | 1337 | ||
1318 | if (!irq_settings_can_request(desc)) | 1338 | if (!irq_settings_can_request(desc) || |
1339 | WARN_ON(irq_settings_is_per_cpu_devid(desc))) | ||
1319 | return -EINVAL; | 1340 | return -EINVAL; |
1320 | 1341 | ||
1321 | if (!handler) { | 1342 | if (!handler) { |
@@ -1400,3 +1421,194 @@ int request_any_context_irq(unsigned int irq, irq_handler_t handler, | |||
1400 | return !ret ? IRQC_IS_HARDIRQ : ret; | 1421 | return !ret ? IRQC_IS_HARDIRQ : ret; |
1401 | } | 1422 | } |
1402 | EXPORT_SYMBOL_GPL(request_any_context_irq); | 1423 | EXPORT_SYMBOL_GPL(request_any_context_irq); |
1424 | |||
1425 | void enable_percpu_irq(unsigned int irq, unsigned int type) | ||
1426 | { | ||
1427 | unsigned int cpu = smp_processor_id(); | ||
1428 | unsigned long flags; | ||
1429 | struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_PERCPU); | ||
1430 | |||
1431 | if (!desc) | ||
1432 | return; | ||
1433 | |||
1434 | type &= IRQ_TYPE_SENSE_MASK; | ||
1435 | if (type != IRQ_TYPE_NONE) { | ||
1436 | int ret; | ||
1437 | |||
1438 | ret = __irq_set_trigger(desc, irq, type); | ||
1439 | |||
1440 | if (ret) { | ||
1441 | WARN(1, "failed to set type for IRQ%d\n", irq); | ||
1442 | goto out; | ||
1443 | } | ||
1444 | } | ||
1445 | |||
1446 | irq_percpu_enable(desc, cpu); | ||
1447 | out: | ||
1448 | irq_put_desc_unlock(desc, flags); | ||
1449 | } | ||
1450 | |||
1451 | void disable_percpu_irq(unsigned int irq) | ||
1452 | { | ||
1453 | unsigned int cpu = smp_processor_id(); | ||
1454 | unsigned long flags; | ||
1455 | struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_PERCPU); | ||
1456 | |||
1457 | if (!desc) | ||
1458 | return; | ||
1459 | |||
1460 | irq_percpu_disable(desc, cpu); | ||
1461 | irq_put_desc_unlock(desc, flags); | ||
1462 | } | ||
1463 | |||
1464 | /* | ||
1465 | * Internal function to unregister a percpu irqaction. | ||
1466 | */ | ||
1467 | static struct irqaction *__free_percpu_irq(unsigned int irq, void __percpu *dev_id) | ||
1468 | { | ||
1469 | struct irq_desc *desc = irq_to_desc(irq); | ||
1470 | struct irqaction *action; | ||
1471 | unsigned long flags; | ||
1472 | |||
1473 | WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq); | ||
1474 | |||
1475 | if (!desc) | ||
1476 | return NULL; | ||
1477 | |||
1478 | raw_spin_lock_irqsave(&desc->lock, flags); | ||
1479 | |||
1480 | action = desc->action; | ||
1481 | if (!action || action->percpu_dev_id != dev_id) { | ||
1482 | WARN(1, "Trying to free already-free IRQ %d\n", irq); | ||
1483 | goto bad; | ||
1484 | } | ||
1485 | |||
1486 | if (!cpumask_empty(desc->percpu_enabled)) { | ||
1487 | WARN(1, "percpu IRQ %d still enabled on CPU%d!\n", | ||
1488 | irq, cpumask_first(desc->percpu_enabled)); | ||
1489 | goto bad; | ||
1490 | } | ||
1491 | |||
1492 | /* Found it - now remove it from the list of entries: */ | ||
1493 | desc->action = NULL; | ||
1494 | |||
1495 | raw_spin_unlock_irqrestore(&desc->lock, flags); | ||
1496 | |||
1497 | unregister_handler_proc(irq, action); | ||
1498 | |||
1499 | module_put(desc->owner); | ||
1500 | return action; | ||
1501 | |||
1502 | bad: | ||
1503 | raw_spin_unlock_irqrestore(&desc->lock, flags); | ||
1504 | return NULL; | ||
1505 | } | ||
1506 | |||
1507 | /** | ||
1508 | * remove_percpu_irq - free a per-cpu interrupt | ||
1509 | * @irq: Interrupt line to free | ||
1510 | * @act: irqaction for the interrupt | ||
1511 | * | ||
1512 | * Used to remove interrupts statically setup by the early boot process. | ||
1513 | */ | ||
1514 | void remove_percpu_irq(unsigned int irq, struct irqaction *act) | ||
1515 | { | ||
1516 | struct irq_desc *desc = irq_to_desc(irq); | ||
1517 | |||
1518 | if (desc && irq_settings_is_per_cpu_devid(desc)) | ||
1519 | __free_percpu_irq(irq, act->percpu_dev_id); | ||
1520 | } | ||
1521 | |||
1522 | /** | ||
1523 | * free_percpu_irq - free an interrupt allocated with request_percpu_irq | ||
1524 | * @irq: Interrupt line to free | ||
1525 | * @dev_id: Device identity to free | ||
1526 | * | ||
1527 | * Remove a percpu interrupt handler. The handler is removed, but | ||
1528 | * the interrupt line is not disabled. This must be done on each | ||
1529 | * CPU before calling this function. The function does not return | ||
1530 | * until any executing interrupts for this IRQ have completed. | ||
1531 | * | ||
1532 | * This function must not be called from interrupt context. | ||
1533 | */ | ||
1534 | void free_percpu_irq(unsigned int irq, void __percpu *dev_id) | ||
1535 | { | ||
1536 | struct irq_desc *desc = irq_to_desc(irq); | ||
1537 | |||
1538 | if (!desc || !irq_settings_is_per_cpu_devid(desc)) | ||
1539 | return; | ||
1540 | |||
1541 | chip_bus_lock(desc); | ||
1542 | kfree(__free_percpu_irq(irq, dev_id)); | ||
1543 | chip_bus_sync_unlock(desc); | ||
1544 | } | ||
1545 | |||
1546 | /** | ||
1547 | * setup_percpu_irq - setup a per-cpu interrupt | ||
1548 | * @irq: Interrupt line to setup | ||
1549 | * @act: irqaction for the interrupt | ||
1550 | * | ||
1551 | * Used to statically setup per-cpu interrupts in the early boot process. | ||
1552 | */ | ||
1553 | int setup_percpu_irq(unsigned int irq, struct irqaction *act) | ||
1554 | { | ||
1555 | struct irq_desc *desc = irq_to_desc(irq); | ||
1556 | int retval; | ||
1557 | |||
1558 | if (!desc || !irq_settings_is_per_cpu_devid(desc)) | ||
1559 | return -EINVAL; | ||
1560 | chip_bus_lock(desc); | ||
1561 | retval = __setup_irq(irq, desc, act); | ||
1562 | chip_bus_sync_unlock(desc); | ||
1563 | |||
1564 | return retval; | ||
1565 | } | ||
1566 | |||
1567 | /** | ||
1568 | * request_percpu_irq - allocate a percpu interrupt line | ||
1569 | * @irq: Interrupt line to allocate | ||
1570 | * @handler: Function to be called when the IRQ occurs. | ||
1571 | * @devname: An ascii name for the claiming device | ||
1572 | * @dev_id: A percpu cookie passed back to the handler function | ||
1573 | * | ||
1574 | * This call allocates interrupt resources, but doesn't | ||
1575 | * automatically enable the interrupt. It has to be done on each | ||
1576 | * CPU using enable_percpu_irq(). | ||
1577 | * | ||
1578 | * Dev_id must be globally unique. It is a per-cpu variable, and | ||
1579 | * the handler gets called with the interrupted CPU's instance of | ||
1580 | * that variable. | ||
1581 | */ | ||
1582 | int request_percpu_irq(unsigned int irq, irq_handler_t handler, | ||
1583 | const char *devname, void __percpu *dev_id) | ||
1584 | { | ||
1585 | struct irqaction *action; | ||
1586 | struct irq_desc *desc; | ||
1587 | int retval; | ||
1588 | |||
1589 | if (!dev_id) | ||
1590 | return -EINVAL; | ||
1591 | |||
1592 | desc = irq_to_desc(irq); | ||
1593 | if (!desc || !irq_settings_can_request(desc) || | ||
1594 | !irq_settings_is_per_cpu_devid(desc)) | ||
1595 | return -EINVAL; | ||
1596 | |||
1597 | action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); | ||
1598 | if (!action) | ||
1599 | return -ENOMEM; | ||
1600 | |||
1601 | action->handler = handler; | ||
1602 | action->flags = IRQF_PERCPU | IRQF_NO_SUSPEND; | ||
1603 | action->name = devname; | ||
1604 | action->percpu_dev_id = dev_id; | ||
1605 | |||
1606 | chip_bus_lock(desc); | ||
1607 | retval = __setup_irq(irq, desc, action); | ||
1608 | chip_bus_sync_unlock(desc); | ||
1609 | |||
1610 | if (retval) | ||
1611 | kfree(action); | ||
1612 | |||
1613 | return retval; | ||
1614 | } | ||
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index f76fc00c987..15e53b1766a 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/irq.h> | 9 | #include <linux/irq.h> |
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/interrupt.h> | 11 | #include <linux/interrupt.h> |
12 | #include <linux/syscore_ops.h> | ||
12 | 13 | ||
13 | #include "internals.h" | 14 | #include "internals.h" |
14 | 15 | ||
@@ -39,25 +40,58 @@ void suspend_device_irqs(void) | |||
39 | } | 40 | } |
40 | EXPORT_SYMBOL_GPL(suspend_device_irqs); | 41 | EXPORT_SYMBOL_GPL(suspend_device_irqs); |
41 | 42 | ||
42 | /** | 43 | static void resume_irqs(bool want_early) |
43 | * resume_device_irqs - enable interrupt lines disabled by suspend_device_irqs() | ||
44 | * | ||
45 | * Enable all interrupt lines previously disabled by suspend_device_irqs() that | ||
46 | * have the IRQS_SUSPENDED flag set. | ||
47 | */ | ||
48 | void resume_device_irqs(void) | ||
49 | { | 44 | { |
50 | struct irq_desc *desc; | 45 | struct irq_desc *desc; |
51 | int irq; | 46 | int irq; |
52 | 47 | ||
53 | for_each_irq_desc(irq, desc) { | 48 | for_each_irq_desc(irq, desc) { |
54 | unsigned long flags; | 49 | unsigned long flags; |
50 | bool is_early = desc->action && | ||
51 | desc->action->flags & IRQF_EARLY_RESUME; | ||
52 | |||
53 | if (is_early != want_early) | ||
54 | continue; | ||
55 | 55 | ||
56 | raw_spin_lock_irqsave(&desc->lock, flags); | 56 | raw_spin_lock_irqsave(&desc->lock, flags); |
57 | __enable_irq(desc, irq, true); | 57 | __enable_irq(desc, irq, true); |
58 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 58 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
59 | } | 59 | } |
60 | } | 60 | } |
61 | |||
62 | /** | ||
63 | * irq_pm_syscore_ops - enable interrupt lines early | ||
64 | * | ||
65 | * Enable all interrupt lines with %IRQF_EARLY_RESUME set. | ||
66 | */ | ||
67 | static void irq_pm_syscore_resume(void) | ||
68 | { | ||
69 | resume_irqs(true); | ||
70 | } | ||
71 | |||
72 | static struct syscore_ops irq_pm_syscore_ops = { | ||
73 | .resume = irq_pm_syscore_resume, | ||
74 | }; | ||
75 | |||
76 | static int __init irq_pm_init_ops(void) | ||
77 | { | ||
78 | register_syscore_ops(&irq_pm_syscore_ops); | ||
79 | return 0; | ||
80 | } | ||
81 | |||
82 | device_initcall(irq_pm_init_ops); | ||
83 | |||
84 | /** | ||
85 | * resume_device_irqs - enable interrupt lines disabled by suspend_device_irqs() | ||
86 | * | ||
87 | * Enable all non-%IRQF_EARLY_RESUME interrupt lines previously | ||
88 | * disabled by suspend_device_irqs() that have the IRQS_SUSPENDED flag | ||
89 | * set as well as those with %IRQF_FORCE_RESUME. | ||
90 | */ | ||
91 | void resume_device_irqs(void) | ||
92 | { | ||
93 | resume_irqs(false); | ||
94 | } | ||
61 | EXPORT_SYMBOL_GPL(resume_device_irqs); | 95 | EXPORT_SYMBOL_GPL(resume_device_irqs); |
62 | 96 | ||
63 | /** | 97 | /** |
diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h index f1667833d44..1162f1030f1 100644 --- a/kernel/irq/settings.h +++ b/kernel/irq/settings.h | |||
@@ -13,6 +13,7 @@ enum { | |||
13 | _IRQ_MOVE_PCNTXT = IRQ_MOVE_PCNTXT, | 13 | _IRQ_MOVE_PCNTXT = IRQ_MOVE_PCNTXT, |
14 | _IRQ_NO_BALANCING = IRQ_NO_BALANCING, | 14 | _IRQ_NO_BALANCING = IRQ_NO_BALANCING, |
15 | _IRQ_NESTED_THREAD = IRQ_NESTED_THREAD, | 15 | _IRQ_NESTED_THREAD = IRQ_NESTED_THREAD, |
16 | _IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID, | ||
16 | _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK, | 17 | _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK, |
17 | }; | 18 | }; |
18 | 19 | ||
@@ -24,6 +25,7 @@ enum { | |||
24 | #define IRQ_NOTHREAD GOT_YOU_MORON | 25 | #define IRQ_NOTHREAD GOT_YOU_MORON |
25 | #define IRQ_NOAUTOEN GOT_YOU_MORON | 26 | #define IRQ_NOAUTOEN GOT_YOU_MORON |
26 | #define IRQ_NESTED_THREAD GOT_YOU_MORON | 27 | #define IRQ_NESTED_THREAD GOT_YOU_MORON |
28 | #define IRQ_PER_CPU_DEVID GOT_YOU_MORON | ||
27 | #undef IRQF_MODIFY_MASK | 29 | #undef IRQF_MODIFY_MASK |
28 | #define IRQF_MODIFY_MASK GOT_YOU_MORON | 30 | #define IRQF_MODIFY_MASK GOT_YOU_MORON |
29 | 31 | ||
@@ -39,6 +41,11 @@ static inline bool irq_settings_is_per_cpu(struct irq_desc *desc) | |||
39 | return desc->status_use_accessors & _IRQ_PER_CPU; | 41 | return desc->status_use_accessors & _IRQ_PER_CPU; |
40 | } | 42 | } |
41 | 43 | ||
44 | static inline bool irq_settings_is_per_cpu_devid(struct irq_desc *desc) | ||
45 | { | ||
46 | return desc->status_use_accessors & _IRQ_PER_CPU_DEVID; | ||
47 | } | ||
48 | |||
42 | static inline void irq_settings_set_per_cpu(struct irq_desc *desc) | 49 | static inline void irq_settings_set_per_cpu(struct irq_desc *desc) |
43 | { | 50 | { |
44 | desc->status_use_accessors |= _IRQ_PER_CPU; | 51 | desc->status_use_accessors |= _IRQ_PER_CPU; |
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index aa57d5da18c..dc813a948be 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c | |||
@@ -84,7 +84,9 @@ static int try_one_irq(int irq, struct irq_desc *desc, bool force) | |||
84 | */ | 84 | */ |
85 | action = desc->action; | 85 | action = desc->action; |
86 | if (!action || !(action->flags & IRQF_SHARED) || | 86 | if (!action || !(action->flags & IRQF_SHARED) || |
87 | (action->flags & __IRQF_TIMER) || !action->next) | 87 | (action->flags & __IRQF_TIMER) || |
88 | (action->handler(irq, action->dev_id) == IRQ_HANDLED) || | ||
89 | !action->next) | ||
88 | goto out; | 90 | goto out; |
89 | 91 | ||
90 | /* Already running on another processor */ | 92 | /* Already running on another processor */ |
@@ -115,7 +117,7 @@ static int misrouted_irq(int irq) | |||
115 | struct irq_desc *desc; | 117 | struct irq_desc *desc; |
116 | int i, ok = 0; | 118 | int i, ok = 0; |
117 | 119 | ||
118 | if (atomic_inc_return(&irq_poll_active) == 1) | 120 | if (atomic_inc_return(&irq_poll_active) != 1) |
119 | goto out; | 121 | goto out; |
120 | 122 | ||
121 | irq_poll_cpu = smp_processor_id(); | 123 | irq_poll_cpu = smp_processor_id(); |
diff --git a/kernel/irq_work.c b/kernel/irq_work.c index c58fa7da8ae..c3c46c72046 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c | |||
@@ -6,9 +6,11 @@ | |||
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/kernel.h> | 8 | #include <linux/kernel.h> |
9 | #include <linux/module.h> | 9 | #include <linux/export.h> |
10 | #include <linux/irq_work.h> | 10 | #include <linux/irq_work.h> |
11 | #include <linux/percpu.h> | ||
11 | #include <linux/hardirq.h> | 12 | #include <linux/hardirq.h> |
13 | #include <asm/processor.h> | ||
12 | 14 | ||
13 | /* | 15 | /* |
14 | * An entry can be in one of four states: | 16 | * An entry can be in one of four states: |
@@ -17,54 +19,34 @@ | |||
17 | * claimed NULL, 3 -> {pending} : claimed to be enqueued | 19 | * claimed NULL, 3 -> {pending} : claimed to be enqueued |
18 | * pending next, 3 -> {busy} : queued, pending callback | 20 | * pending next, 3 -> {busy} : queued, pending callback |
19 | * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed | 21 | * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed |
20 | * | ||
21 | * We use the lower two bits of the next pointer to keep PENDING and BUSY | ||
22 | * flags. | ||
23 | */ | 22 | */ |
24 | 23 | ||
25 | #define IRQ_WORK_PENDING 1UL | 24 | #define IRQ_WORK_PENDING 1UL |
26 | #define IRQ_WORK_BUSY 2UL | 25 | #define IRQ_WORK_BUSY 2UL |
27 | #define IRQ_WORK_FLAGS 3UL | 26 | #define IRQ_WORK_FLAGS 3UL |
28 | 27 | ||
29 | static inline bool irq_work_is_set(struct irq_work *entry, int flags) | 28 | static DEFINE_PER_CPU(struct llist_head, irq_work_list); |
30 | { | ||
31 | return (unsigned long)entry->next & flags; | ||
32 | } | ||
33 | |||
34 | static inline struct irq_work *irq_work_next(struct irq_work *entry) | ||
35 | { | ||
36 | unsigned long next = (unsigned long)entry->next; | ||
37 | next &= ~IRQ_WORK_FLAGS; | ||
38 | return (struct irq_work *)next; | ||
39 | } | ||
40 | |||
41 | static inline struct irq_work *next_flags(struct irq_work *entry, int flags) | ||
42 | { | ||
43 | unsigned long next = (unsigned long)entry; | ||
44 | next |= flags; | ||
45 | return (struct irq_work *)next; | ||
46 | } | ||
47 | |||
48 | static DEFINE_PER_CPU(struct irq_work *, irq_work_list); | ||
49 | 29 | ||
50 | /* | 30 | /* |
51 | * Claim the entry so that no one else will poke at it. | 31 | * Claim the entry so that no one else will poke at it. |
52 | */ | 32 | */ |
53 | static bool irq_work_claim(struct irq_work *entry) | 33 | static bool irq_work_claim(struct irq_work *work) |
54 | { | 34 | { |
55 | struct irq_work *next, *nflags; | 35 | unsigned long flags, nflags; |
56 | 36 | ||
57 | do { | 37 | for (;;) { |
58 | next = entry->next; | 38 | flags = work->flags; |
59 | if ((unsigned long)next & IRQ_WORK_PENDING) | 39 | if (flags & IRQ_WORK_PENDING) |
60 | return false; | 40 | return false; |
61 | nflags = next_flags(next, IRQ_WORK_FLAGS); | 41 | nflags = flags | IRQ_WORK_FLAGS; |
62 | } while (cmpxchg(&entry->next, next, nflags) != next); | 42 | if (cmpxchg(&work->flags, flags, nflags) == flags) |
43 | break; | ||
44 | cpu_relax(); | ||
45 | } | ||
63 | 46 | ||
64 | return true; | 47 | return true; |
65 | } | 48 | } |
66 | 49 | ||
67 | |||
68 | void __weak arch_irq_work_raise(void) | 50 | void __weak arch_irq_work_raise(void) |
69 | { | 51 | { |
70 | /* | 52 | /* |
@@ -75,20 +57,15 @@ void __weak arch_irq_work_raise(void) | |||
75 | /* | 57 | /* |
76 | * Queue the entry and raise the IPI if needed. | 58 | * Queue the entry and raise the IPI if needed. |
77 | */ | 59 | */ |
78 | static void __irq_work_queue(struct irq_work *entry) | 60 | static void __irq_work_queue(struct irq_work *work) |
79 | { | 61 | { |
80 | struct irq_work *next; | 62 | bool empty; |
81 | 63 | ||
82 | preempt_disable(); | 64 | preempt_disable(); |
83 | 65 | ||
84 | do { | 66 | empty = llist_add(&work->llnode, &__get_cpu_var(irq_work_list)); |
85 | next = __this_cpu_read(irq_work_list); | ||
86 | /* Can assign non-atomic because we keep the flags set. */ | ||
87 | entry->next = next_flags(next, IRQ_WORK_FLAGS); | ||
88 | } while (this_cpu_cmpxchg(irq_work_list, next, entry) != next); | ||
89 | |||
90 | /* The list was empty, raise self-interrupt to start processing. */ | 67 | /* The list was empty, raise self-interrupt to start processing. */ |
91 | if (!irq_work_next(entry)) | 68 | if (empty) |
92 | arch_irq_work_raise(); | 69 | arch_irq_work_raise(); |
93 | 70 | ||
94 | preempt_enable(); | 71 | preempt_enable(); |
@@ -100,16 +77,16 @@ static void __irq_work_queue(struct irq_work *entry) | |||
100 | * | 77 | * |
101 | * Can be re-enqueued while the callback is still in progress. | 78 | * Can be re-enqueued while the callback is still in progress. |
102 | */ | 79 | */ |
103 | bool irq_work_queue(struct irq_work *entry) | 80 | bool irq_work_queue(struct irq_work *work) |
104 | { | 81 | { |
105 | if (!irq_work_claim(entry)) { | 82 | if (!irq_work_claim(work)) { |
106 | /* | 83 | /* |
107 | * Already enqueued, can't do! | 84 | * Already enqueued, can't do! |
108 | */ | 85 | */ |
109 | return false; | 86 | return false; |
110 | } | 87 | } |
111 | 88 | ||
112 | __irq_work_queue(entry); | 89 | __irq_work_queue(work); |
113 | return true; | 90 | return true; |
114 | } | 91 | } |
115 | EXPORT_SYMBOL_GPL(irq_work_queue); | 92 | EXPORT_SYMBOL_GPL(irq_work_queue); |
@@ -120,34 +97,34 @@ EXPORT_SYMBOL_GPL(irq_work_queue); | |||
120 | */ | 97 | */ |
121 | void irq_work_run(void) | 98 | void irq_work_run(void) |
122 | { | 99 | { |
123 | struct irq_work *list; | 100 | struct irq_work *work; |
101 | struct llist_head *this_list; | ||
102 | struct llist_node *llnode; | ||
124 | 103 | ||
125 | if (this_cpu_read(irq_work_list) == NULL) | 104 | this_list = &__get_cpu_var(irq_work_list); |
105 | if (llist_empty(this_list)) | ||
126 | return; | 106 | return; |
127 | 107 | ||
128 | BUG_ON(!in_irq()); | 108 | BUG_ON(!in_irq()); |
129 | BUG_ON(!irqs_disabled()); | 109 | BUG_ON(!irqs_disabled()); |
130 | 110 | ||
131 | list = this_cpu_xchg(irq_work_list, NULL); | 111 | llnode = llist_del_all(this_list); |
132 | 112 | while (llnode != NULL) { | |
133 | while (list != NULL) { | 113 | work = llist_entry(llnode, struct irq_work, llnode); |
134 | struct irq_work *entry = list; | ||
135 | 114 | ||
136 | list = irq_work_next(list); | 115 | llnode = llist_next(llnode); |
137 | 116 | ||
138 | /* | 117 | /* |
139 | * Clear the PENDING bit, after this point the @entry | 118 | * Clear the PENDING bit, after this point the @work |
140 | * can be re-used. | 119 | * can be re-used. |
141 | */ | 120 | */ |
142 | entry->next = next_flags(NULL, IRQ_WORK_BUSY); | 121 | work->flags = IRQ_WORK_BUSY; |
143 | entry->func(entry); | 122 | work->func(work); |
144 | /* | 123 | /* |
145 | * Clear the BUSY bit and return to the free state if | 124 | * Clear the BUSY bit and return to the free state if |
146 | * no-one else claimed it meanwhile. | 125 | * no-one else claimed it meanwhile. |
147 | */ | 126 | */ |
148 | (void)cmpxchg(&entry->next, | 127 | (void)cmpxchg(&work->flags, IRQ_WORK_BUSY, 0); |
149 | next_flags(NULL, IRQ_WORK_BUSY), | ||
150 | NULL); | ||
151 | } | 128 | } |
152 | } | 129 | } |
153 | EXPORT_SYMBOL_GPL(irq_work_run); | 130 | EXPORT_SYMBOL_GPL(irq_work_run); |
@@ -156,11 +133,11 @@ EXPORT_SYMBOL_GPL(irq_work_run); | |||
156 | * Synchronize against the irq_work @entry, ensures the entry is not | 133 | * Synchronize against the irq_work @entry, ensures the entry is not |
157 | * currently in use. | 134 | * currently in use. |
158 | */ | 135 | */ |
159 | void irq_work_sync(struct irq_work *entry) | 136 | void irq_work_sync(struct irq_work *work) |
160 | { | 137 | { |
161 | WARN_ON_ONCE(irqs_disabled()); | 138 | WARN_ON_ONCE(irqs_disabled()); |
162 | 139 | ||
163 | while (irq_work_is_set(entry, IRQ_WORK_BUSY)) | 140 | while (work->flags & IRQ_WORK_BUSY) |
164 | cpu_relax(); | 141 | cpu_relax(); |
165 | } | 142 | } |
166 | EXPORT_SYMBOL_GPL(irq_work_sync); | 143 | EXPORT_SYMBOL_GPL(irq_work_sync); |
diff --git a/kernel/jump_label.c b/kernel/jump_label.c index a8ce45097f3..bbdfe2a462a 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c | |||
@@ -104,6 +104,18 @@ static int __jump_label_text_reserved(struct jump_entry *iter_start, | |||
104 | return 0; | 104 | return 0; |
105 | } | 105 | } |
106 | 106 | ||
107 | /* | ||
108 | * Update code which is definitely not currently executing. | ||
109 | * Architectures which need heavyweight synchronization to modify | ||
110 | * running code can override this to make the non-live update case | ||
111 | * cheaper. | ||
112 | */ | ||
113 | void __weak arch_jump_label_transform_static(struct jump_entry *entry, | ||
114 | enum jump_label_type type) | ||
115 | { | ||
116 | arch_jump_label_transform(entry, type); | ||
117 | } | ||
118 | |||
107 | static void __jump_label_update(struct jump_label_key *key, | 119 | static void __jump_label_update(struct jump_label_key *key, |
108 | struct jump_entry *entry, | 120 | struct jump_entry *entry, |
109 | struct jump_entry *stop, int enable) | 121 | struct jump_entry *stop, int enable) |
@@ -121,14 +133,7 @@ static void __jump_label_update(struct jump_label_key *key, | |||
121 | } | 133 | } |
122 | } | 134 | } |
123 | 135 | ||
124 | /* | 136 | void __init jump_label_init(void) |
125 | * Not all archs need this. | ||
126 | */ | ||
127 | void __weak arch_jump_label_text_poke_early(jump_label_t addr) | ||
128 | { | ||
129 | } | ||
130 | |||
131 | static __init int jump_label_init(void) | ||
132 | { | 137 | { |
133 | struct jump_entry *iter_start = __start___jump_table; | 138 | struct jump_entry *iter_start = __start___jump_table; |
134 | struct jump_entry *iter_stop = __stop___jump_table; | 139 | struct jump_entry *iter_stop = __stop___jump_table; |
@@ -139,22 +144,22 @@ static __init int jump_label_init(void) | |||
139 | jump_label_sort_entries(iter_start, iter_stop); | 144 | jump_label_sort_entries(iter_start, iter_stop); |
140 | 145 | ||
141 | for (iter = iter_start; iter < iter_stop; iter++) { | 146 | for (iter = iter_start; iter < iter_stop; iter++) { |
142 | arch_jump_label_text_poke_early(iter->code); | 147 | struct jump_label_key *iterk; |
143 | if (iter->key == (jump_label_t)(unsigned long)key) | 148 | |
149 | iterk = (struct jump_label_key *)(unsigned long)iter->key; | ||
150 | arch_jump_label_transform_static(iter, jump_label_enabled(iterk) ? | ||
151 | JUMP_LABEL_ENABLE : JUMP_LABEL_DISABLE); | ||
152 | if (iterk == key) | ||
144 | continue; | 153 | continue; |
145 | 154 | ||
146 | key = (struct jump_label_key *)(unsigned long)iter->key; | 155 | key = iterk; |
147 | atomic_set(&key->enabled, 0); | ||
148 | key->entries = iter; | 156 | key->entries = iter; |
149 | #ifdef CONFIG_MODULES | 157 | #ifdef CONFIG_MODULES |
150 | key->next = NULL; | 158 | key->next = NULL; |
151 | #endif | 159 | #endif |
152 | } | 160 | } |
153 | jump_label_unlock(); | 161 | jump_label_unlock(); |
154 | |||
155 | return 0; | ||
156 | } | 162 | } |
157 | early_initcall(jump_label_init); | ||
158 | 163 | ||
159 | #ifdef CONFIG_MODULES | 164 | #ifdef CONFIG_MODULES |
160 | 165 | ||
@@ -212,7 +217,7 @@ void jump_label_apply_nops(struct module *mod) | |||
212 | return; | 217 | return; |
213 | 218 | ||
214 | for (iter = iter_start; iter < iter_stop; iter++) | 219 | for (iter = iter_start; iter < iter_stop; iter++) |
215 | arch_jump_label_text_poke_early(iter->code); | 220 | arch_jump_label_transform_static(iter, JUMP_LABEL_DISABLE); |
216 | } | 221 | } |
217 | 222 | ||
218 | static int jump_label_add_module(struct module *mod) | 223 | static int jump_label_add_module(struct module *mod) |
diff --git a/kernel/kexec.c b/kernel/kexec.c index 296fbc84d65..dc7bc082928 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
@@ -498,7 +498,7 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image, | |||
498 | while (hole_end <= crashk_res.end) { | 498 | while (hole_end <= crashk_res.end) { |
499 | unsigned long i; | 499 | unsigned long i; |
500 | 500 | ||
501 | if (hole_end > KEXEC_CONTROL_MEMORY_LIMIT) | 501 | if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT) |
502 | break; | 502 | break; |
503 | if (hole_end > crashk_res.end) | 503 | if (hole_end > crashk_res.end) |
504 | break; | 504 | break; |
@@ -999,6 +999,7 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, | |||
999 | kimage_free(xchg(&kexec_crash_image, NULL)); | 999 | kimage_free(xchg(&kexec_crash_image, NULL)); |
1000 | result = kimage_crash_alloc(&image, entry, | 1000 | result = kimage_crash_alloc(&image, entry, |
1001 | nr_segments, segments); | 1001 | nr_segments, segments); |
1002 | crash_map_reserved_pages(); | ||
1002 | } | 1003 | } |
1003 | if (result) | 1004 | if (result) |
1004 | goto out; | 1005 | goto out; |
@@ -1015,6 +1016,8 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, | |||
1015 | goto out; | 1016 | goto out; |
1016 | } | 1017 | } |
1017 | kimage_terminate(image); | 1018 | kimage_terminate(image); |
1019 | if (flags & KEXEC_ON_CRASH) | ||
1020 | crash_unmap_reserved_pages(); | ||
1018 | } | 1021 | } |
1019 | /* Install the new kernel, and Uninstall the old */ | 1022 | /* Install the new kernel, and Uninstall the old */ |
1020 | image = xchg(dest_image, image); | 1023 | image = xchg(dest_image, image); |
@@ -1026,6 +1029,18 @@ out: | |||
1026 | return result; | 1029 | return result; |
1027 | } | 1030 | } |
1028 | 1031 | ||
1032 | /* | ||
1033 | * Add and remove page tables for crashkernel memory | ||
1034 | * | ||
1035 | * Provide an empty default implementation here -- architecture | ||
1036 | * code may override this | ||
1037 | */ | ||
1038 | void __weak crash_map_reserved_pages(void) | ||
1039 | {} | ||
1040 | |||
1041 | void __weak crash_unmap_reserved_pages(void) | ||
1042 | {} | ||
1043 | |||
1029 | #ifdef CONFIG_COMPAT | 1044 | #ifdef CONFIG_COMPAT |
1030 | asmlinkage long compat_sys_kexec_load(unsigned long entry, | 1045 | asmlinkage long compat_sys_kexec_load(unsigned long entry, |
1031 | unsigned long nr_segments, | 1046 | unsigned long nr_segments, |
@@ -1134,14 +1149,16 @@ int crash_shrink_memory(unsigned long new_size) | |||
1134 | goto unlock; | 1149 | goto unlock; |
1135 | } | 1150 | } |
1136 | 1151 | ||
1137 | start = roundup(start, PAGE_SIZE); | 1152 | start = roundup(start, KEXEC_CRASH_MEM_ALIGN); |
1138 | end = roundup(start + new_size, PAGE_SIZE); | 1153 | end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN); |
1139 | 1154 | ||
1155 | crash_map_reserved_pages(); | ||
1140 | crash_free_reserved_phys_range(end, crashk_res.end); | 1156 | crash_free_reserved_phys_range(end, crashk_res.end); |
1141 | 1157 | ||
1142 | if ((start == end) && (crashk_res.parent != NULL)) | 1158 | if ((start == end) && (crashk_res.parent != NULL)) |
1143 | release_resource(&crashk_res); | 1159 | release_resource(&crashk_res); |
1144 | crashk_res.end = end - 1; | 1160 | crashk_res.end = end - 1; |
1161 | crash_unmap_reserved_pages(); | ||
1145 | 1162 | ||
1146 | unlock: | 1163 | unlock: |
1147 | mutex_unlock(&kexec_mutex); | 1164 | mutex_unlock(&kexec_mutex); |
@@ -1380,24 +1397,23 @@ int __init parse_crashkernel(char *cmdline, | |||
1380 | } | 1397 | } |
1381 | 1398 | ||
1382 | 1399 | ||
1383 | 1400 | static void update_vmcoreinfo_note(void) | |
1384 | void crash_save_vmcoreinfo(void) | ||
1385 | { | 1401 | { |
1386 | u32 *buf; | 1402 | u32 *buf = vmcoreinfo_note; |
1387 | 1403 | ||
1388 | if (!vmcoreinfo_size) | 1404 | if (!vmcoreinfo_size) |
1389 | return; | 1405 | return; |
1390 | |||
1391 | vmcoreinfo_append_str("CRASHTIME=%ld", get_seconds()); | ||
1392 | |||
1393 | buf = (u32 *)vmcoreinfo_note; | ||
1394 | |||
1395 | buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, | 1406 | buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, |
1396 | vmcoreinfo_size); | 1407 | vmcoreinfo_size); |
1397 | |||
1398 | final_note(buf); | 1408 | final_note(buf); |
1399 | } | 1409 | } |
1400 | 1410 | ||
1411 | void crash_save_vmcoreinfo(void) | ||
1412 | { | ||
1413 | vmcoreinfo_append_str("CRASHTIME=%ld", get_seconds()); | ||
1414 | update_vmcoreinfo_note(); | ||
1415 | } | ||
1416 | |||
1401 | void vmcoreinfo_append_str(const char *fmt, ...) | 1417 | void vmcoreinfo_append_str(const char *fmt, ...) |
1402 | { | 1418 | { |
1403 | va_list args; | 1419 | va_list args; |
@@ -1483,6 +1499,7 @@ static int __init crash_save_vmcoreinfo_init(void) | |||
1483 | VMCOREINFO_NUMBER(PG_swapcache); | 1499 | VMCOREINFO_NUMBER(PG_swapcache); |
1484 | 1500 | ||
1485 | arch_crash_save_vmcoreinfo(); | 1501 | arch_crash_save_vmcoreinfo(); |
1502 | update_vmcoreinfo_note(); | ||
1486 | 1503 | ||
1487 | return 0; | 1504 | return 0; |
1488 | } | 1505 | } |
diff --git a/kernel/kfifo.c b/kernel/kfifo.c index 01a0700e873..c744b88c44e 100644 --- a/kernel/kfifo.c +++ b/kernel/kfifo.c | |||
@@ -20,7 +20,7 @@ | |||
20 | */ | 20 | */ |
21 | 21 | ||
22 | #include <linux/kernel.h> | 22 | #include <linux/kernel.h> |
23 | #include <linux/module.h> | 23 | #include <linux/export.h> |
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/err.h> | 25 | #include <linux/err.h> |
26 | #include <linux/log2.h> | 26 | #include <linux/log2.h> |
diff --git a/kernel/kmod.c b/kernel/kmod.c index ddc7644c130..a4bea97c75b 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
@@ -114,10 +114,12 @@ int __request_module(bool wait, const char *fmt, ...) | |||
114 | atomic_inc(&kmod_concurrent); | 114 | atomic_inc(&kmod_concurrent); |
115 | if (atomic_read(&kmod_concurrent) > max_modprobes) { | 115 | if (atomic_read(&kmod_concurrent) > max_modprobes) { |
116 | /* We may be blaming an innocent here, but unlikely */ | 116 | /* We may be blaming an innocent here, but unlikely */ |
117 | if (kmod_loop_msg++ < 5) | 117 | if (kmod_loop_msg < 5) { |
118 | printk(KERN_ERR | 118 | printk(KERN_ERR |
119 | "request_module: runaway loop modprobe %s\n", | 119 | "request_module: runaway loop modprobe %s\n", |
120 | module_name); | 120 | module_name); |
121 | kmod_loop_msg++; | ||
122 | } | ||
121 | atomic_dec(&kmod_concurrent); | 123 | atomic_dec(&kmod_concurrent); |
122 | return -ENOMEM; | 124 | return -ENOMEM; |
123 | } | 125 | } |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index b30fd54eb98..e5d84644823 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -36,7 +36,7 @@ | |||
36 | #include <linux/init.h> | 36 | #include <linux/init.h> |
37 | #include <linux/slab.h> | 37 | #include <linux/slab.h> |
38 | #include <linux/stddef.h> | 38 | #include <linux/stddef.h> |
39 | #include <linux/module.h> | 39 | #include <linux/export.h> |
40 | #include <linux/moduleloader.h> | 40 | #include <linux/moduleloader.h> |
41 | #include <linux/kallsyms.h> | 41 | #include <linux/kallsyms.h> |
42 | #include <linux/freezer.h> | 42 | #include <linux/freezer.h> |
@@ -78,10 +78,10 @@ static bool kprobes_all_disarmed; | |||
78 | static DEFINE_MUTEX(kprobe_mutex); | 78 | static DEFINE_MUTEX(kprobe_mutex); |
79 | static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; | 79 | static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; |
80 | static struct { | 80 | static struct { |
81 | spinlock_t lock ____cacheline_aligned_in_smp; | 81 | raw_spinlock_t lock ____cacheline_aligned_in_smp; |
82 | } kretprobe_table_locks[KPROBE_TABLE_SIZE]; | 82 | } kretprobe_table_locks[KPROBE_TABLE_SIZE]; |
83 | 83 | ||
84 | static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) | 84 | static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) |
85 | { | 85 | { |
86 | return &(kretprobe_table_locks[hash].lock); | 86 | return &(kretprobe_table_locks[hash].lock); |
87 | } | 87 | } |
@@ -1013,9 +1013,9 @@ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri, | |||
1013 | hlist_del(&ri->hlist); | 1013 | hlist_del(&ri->hlist); |
1014 | INIT_HLIST_NODE(&ri->hlist); | 1014 | INIT_HLIST_NODE(&ri->hlist); |
1015 | if (likely(rp)) { | 1015 | if (likely(rp)) { |
1016 | spin_lock(&rp->lock); | 1016 | raw_spin_lock(&rp->lock); |
1017 | hlist_add_head(&ri->hlist, &rp->free_instances); | 1017 | hlist_add_head(&ri->hlist, &rp->free_instances); |
1018 | spin_unlock(&rp->lock); | 1018 | raw_spin_unlock(&rp->lock); |
1019 | } else | 1019 | } else |
1020 | /* Unregistering */ | 1020 | /* Unregistering */ |
1021 | hlist_add_head(&ri->hlist, head); | 1021 | hlist_add_head(&ri->hlist, head); |
@@ -1026,19 +1026,19 @@ void __kprobes kretprobe_hash_lock(struct task_struct *tsk, | |||
1026 | __acquires(hlist_lock) | 1026 | __acquires(hlist_lock) |
1027 | { | 1027 | { |
1028 | unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); | 1028 | unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); |
1029 | spinlock_t *hlist_lock; | 1029 | raw_spinlock_t *hlist_lock; |
1030 | 1030 | ||
1031 | *head = &kretprobe_inst_table[hash]; | 1031 | *head = &kretprobe_inst_table[hash]; |
1032 | hlist_lock = kretprobe_table_lock_ptr(hash); | 1032 | hlist_lock = kretprobe_table_lock_ptr(hash); |
1033 | spin_lock_irqsave(hlist_lock, *flags); | 1033 | raw_spin_lock_irqsave(hlist_lock, *flags); |
1034 | } | 1034 | } |
1035 | 1035 | ||
1036 | static void __kprobes kretprobe_table_lock(unsigned long hash, | 1036 | static void __kprobes kretprobe_table_lock(unsigned long hash, |
1037 | unsigned long *flags) | 1037 | unsigned long *flags) |
1038 | __acquires(hlist_lock) | 1038 | __acquires(hlist_lock) |
1039 | { | 1039 | { |
1040 | spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); | 1040 | raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); |
1041 | spin_lock_irqsave(hlist_lock, *flags); | 1041 | raw_spin_lock_irqsave(hlist_lock, *flags); |
1042 | } | 1042 | } |
1043 | 1043 | ||
1044 | void __kprobes kretprobe_hash_unlock(struct task_struct *tsk, | 1044 | void __kprobes kretprobe_hash_unlock(struct task_struct *tsk, |
@@ -1046,18 +1046,18 @@ void __kprobes kretprobe_hash_unlock(struct task_struct *tsk, | |||
1046 | __releases(hlist_lock) | 1046 | __releases(hlist_lock) |
1047 | { | 1047 | { |
1048 | unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); | 1048 | unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); |
1049 | spinlock_t *hlist_lock; | 1049 | raw_spinlock_t *hlist_lock; |
1050 | 1050 | ||
1051 | hlist_lock = kretprobe_table_lock_ptr(hash); | 1051 | hlist_lock = kretprobe_table_lock_ptr(hash); |
1052 | spin_unlock_irqrestore(hlist_lock, *flags); | 1052 | raw_spin_unlock_irqrestore(hlist_lock, *flags); |
1053 | } | 1053 | } |
1054 | 1054 | ||
1055 | static void __kprobes kretprobe_table_unlock(unsigned long hash, | 1055 | static void __kprobes kretprobe_table_unlock(unsigned long hash, |
1056 | unsigned long *flags) | 1056 | unsigned long *flags) |
1057 | __releases(hlist_lock) | 1057 | __releases(hlist_lock) |
1058 | { | 1058 | { |
1059 | spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); | 1059 | raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); |
1060 | spin_unlock_irqrestore(hlist_lock, *flags); | 1060 | raw_spin_unlock_irqrestore(hlist_lock, *flags); |
1061 | } | 1061 | } |
1062 | 1062 | ||
1063 | /* | 1063 | /* |
@@ -1663,12 +1663,12 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, | |||
1663 | 1663 | ||
1664 | /*TODO: consider to only swap the RA after the last pre_handler fired */ | 1664 | /*TODO: consider to only swap the RA after the last pre_handler fired */ |
1665 | hash = hash_ptr(current, KPROBE_HASH_BITS); | 1665 | hash = hash_ptr(current, KPROBE_HASH_BITS); |
1666 | spin_lock_irqsave(&rp->lock, flags); | 1666 | raw_spin_lock_irqsave(&rp->lock, flags); |
1667 | if (!hlist_empty(&rp->free_instances)) { | 1667 | if (!hlist_empty(&rp->free_instances)) { |
1668 | ri = hlist_entry(rp->free_instances.first, | 1668 | ri = hlist_entry(rp->free_instances.first, |
1669 | struct kretprobe_instance, hlist); | 1669 | struct kretprobe_instance, hlist); |
1670 | hlist_del(&ri->hlist); | 1670 | hlist_del(&ri->hlist); |
1671 | spin_unlock_irqrestore(&rp->lock, flags); | 1671 | raw_spin_unlock_irqrestore(&rp->lock, flags); |
1672 | 1672 | ||
1673 | ri->rp = rp; | 1673 | ri->rp = rp; |
1674 | ri->task = current; | 1674 | ri->task = current; |
@@ -1685,7 +1685,7 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, | |||
1685 | kretprobe_table_unlock(hash, &flags); | 1685 | kretprobe_table_unlock(hash, &flags); |
1686 | } else { | 1686 | } else { |
1687 | rp->nmissed++; | 1687 | rp->nmissed++; |
1688 | spin_unlock_irqrestore(&rp->lock, flags); | 1688 | raw_spin_unlock_irqrestore(&rp->lock, flags); |
1689 | } | 1689 | } |
1690 | return 0; | 1690 | return 0; |
1691 | } | 1691 | } |
@@ -1721,7 +1721,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp) | |||
1721 | rp->maxactive = num_possible_cpus(); | 1721 | rp->maxactive = num_possible_cpus(); |
1722 | #endif | 1722 | #endif |
1723 | } | 1723 | } |
1724 | spin_lock_init(&rp->lock); | 1724 | raw_spin_lock_init(&rp->lock); |
1725 | INIT_HLIST_HEAD(&rp->free_instances); | 1725 | INIT_HLIST_HEAD(&rp->free_instances); |
1726 | for (i = 0; i < rp->maxactive; i++) { | 1726 | for (i = 0; i < rp->maxactive; i++) { |
1727 | inst = kmalloc(sizeof(struct kretprobe_instance) + | 1727 | inst = kmalloc(sizeof(struct kretprobe_instance) + |
@@ -1959,7 +1959,7 @@ static int __init init_kprobes(void) | |||
1959 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { | 1959 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { |
1960 | INIT_HLIST_HEAD(&kprobe_table[i]); | 1960 | INIT_HLIST_HEAD(&kprobe_table[i]); |
1961 | INIT_HLIST_HEAD(&kretprobe_inst_table[i]); | 1961 | INIT_HLIST_HEAD(&kretprobe_inst_table[i]); |
1962 | spin_lock_init(&(kretprobe_table_locks[i].lock)); | 1962 | raw_spin_lock_init(&(kretprobe_table_locks[i].lock)); |
1963 | } | 1963 | } |
1964 | 1964 | ||
1965 | /* | 1965 | /* |
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 3b053c04dd8..4e316e1acf5 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c | |||
@@ -11,10 +11,11 @@ | |||
11 | #include <linux/kobject.h> | 11 | #include <linux/kobject.h> |
12 | #include <linux/string.h> | 12 | #include <linux/string.h> |
13 | #include <linux/sysfs.h> | 13 | #include <linux/sysfs.h> |
14 | #include <linux/module.h> | 14 | #include <linux/export.h> |
15 | #include <linux/init.h> | 15 | #include <linux/init.h> |
16 | #include <linux/kexec.h> | 16 | #include <linux/kexec.h> |
17 | #include <linux/profile.h> | 17 | #include <linux/profile.h> |
18 | #include <linux/stat.h> | ||
18 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
19 | #include <linux/capability.h> | 20 | #include <linux/capability.h> |
20 | 21 | ||
diff --git a/kernel/kthread.c b/kernel/kthread.c index 4ba7cccb499..b6d216a9263 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -12,7 +12,7 @@ | |||
12 | #include <linux/cpuset.h> | 12 | #include <linux/cpuset.h> |
13 | #include <linux/unistd.h> | 13 | #include <linux/unistd.h> |
14 | #include <linux/file.h> | 14 | #include <linux/file.h> |
15 | #include <linux/module.h> | 15 | #include <linux/export.h> |
16 | #include <linux/mutex.h> | 16 | #include <linux/mutex.h> |
17 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
18 | #include <linux/freezer.h> | 18 | #include <linux/freezer.h> |
diff --git a/kernel/latencytop.c b/kernel/latencytop.c index 376066e1041..a462b317f9a 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c | |||
@@ -53,12 +53,12 @@ | |||
53 | #include <linux/notifier.h> | 53 | #include <linux/notifier.h> |
54 | #include <linux/spinlock.h> | 54 | #include <linux/spinlock.h> |
55 | #include <linux/proc_fs.h> | 55 | #include <linux/proc_fs.h> |
56 | #include <linux/module.h> | 56 | #include <linux/export.h> |
57 | #include <linux/sched.h> | 57 | #include <linux/sched.h> |
58 | #include <linux/list.h> | 58 | #include <linux/list.h> |
59 | #include <linux/stacktrace.h> | 59 | #include <linux/stacktrace.h> |
60 | 60 | ||
61 | static DEFINE_SPINLOCK(latency_lock); | 61 | static DEFINE_RAW_SPINLOCK(latency_lock); |
62 | 62 | ||
63 | #define MAXLR 128 | 63 | #define MAXLR 128 |
64 | static struct latency_record latency_record[MAXLR]; | 64 | static struct latency_record latency_record[MAXLR]; |
@@ -72,19 +72,19 @@ void clear_all_latency_tracing(struct task_struct *p) | |||
72 | if (!latencytop_enabled) | 72 | if (!latencytop_enabled) |
73 | return; | 73 | return; |
74 | 74 | ||
75 | spin_lock_irqsave(&latency_lock, flags); | 75 | raw_spin_lock_irqsave(&latency_lock, flags); |
76 | memset(&p->latency_record, 0, sizeof(p->latency_record)); | 76 | memset(&p->latency_record, 0, sizeof(p->latency_record)); |
77 | p->latency_record_count = 0; | 77 | p->latency_record_count = 0; |
78 | spin_unlock_irqrestore(&latency_lock, flags); | 78 | raw_spin_unlock_irqrestore(&latency_lock, flags); |
79 | } | 79 | } |
80 | 80 | ||
81 | static void clear_global_latency_tracing(void) | 81 | static void clear_global_latency_tracing(void) |
82 | { | 82 | { |
83 | unsigned long flags; | 83 | unsigned long flags; |
84 | 84 | ||
85 | spin_lock_irqsave(&latency_lock, flags); | 85 | raw_spin_lock_irqsave(&latency_lock, flags); |
86 | memset(&latency_record, 0, sizeof(latency_record)); | 86 | memset(&latency_record, 0, sizeof(latency_record)); |
87 | spin_unlock_irqrestore(&latency_lock, flags); | 87 | raw_spin_unlock_irqrestore(&latency_lock, flags); |
88 | } | 88 | } |
89 | 89 | ||
90 | static void __sched | 90 | static void __sched |
@@ -190,7 +190,7 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter) | |||
190 | lat.max = usecs; | 190 | lat.max = usecs; |
191 | store_stacktrace(tsk, &lat); | 191 | store_stacktrace(tsk, &lat); |
192 | 192 | ||
193 | spin_lock_irqsave(&latency_lock, flags); | 193 | raw_spin_lock_irqsave(&latency_lock, flags); |
194 | 194 | ||
195 | account_global_scheduler_latency(tsk, &lat); | 195 | account_global_scheduler_latency(tsk, &lat); |
196 | 196 | ||
@@ -231,7 +231,7 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter) | |||
231 | memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record)); | 231 | memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record)); |
232 | 232 | ||
233 | out_unlock: | 233 | out_unlock: |
234 | spin_unlock_irqrestore(&latency_lock, flags); | 234 | raw_spin_unlock_irqrestore(&latency_lock, flags); |
235 | } | 235 | } |
236 | 236 | ||
237 | static int lstats_show(struct seq_file *m, void *v) | 237 | static int lstats_show(struct seq_file *m, void *v) |
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 8c24294e477..e69434b070d 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
@@ -96,8 +96,13 @@ static int graph_lock(void) | |||
96 | 96 | ||
97 | static inline int graph_unlock(void) | 97 | static inline int graph_unlock(void) |
98 | { | 98 | { |
99 | if (debug_locks && !arch_spin_is_locked(&lockdep_lock)) | 99 | if (debug_locks && !arch_spin_is_locked(&lockdep_lock)) { |
100 | /* | ||
101 | * The lockdep graph lock isn't locked while we expect it to | ||
102 | * be, we're confused now, bye! | ||
103 | */ | ||
100 | return DEBUG_LOCKS_WARN_ON(1); | 104 | return DEBUG_LOCKS_WARN_ON(1); |
105 | } | ||
101 | 106 | ||
102 | current->lockdep_recursion--; | 107 | current->lockdep_recursion--; |
103 | arch_spin_unlock(&lockdep_lock); | 108 | arch_spin_unlock(&lockdep_lock); |
@@ -134,6 +139,9 @@ static struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; | |||
134 | static inline struct lock_class *hlock_class(struct held_lock *hlock) | 139 | static inline struct lock_class *hlock_class(struct held_lock *hlock) |
135 | { | 140 | { |
136 | if (!hlock->class_idx) { | 141 | if (!hlock->class_idx) { |
142 | /* | ||
143 | * Someone passed in garbage, we give up. | ||
144 | */ | ||
137 | DEBUG_LOCKS_WARN_ON(1); | 145 | DEBUG_LOCKS_WARN_ON(1); |
138 | return NULL; | 146 | return NULL; |
139 | } | 147 | } |
@@ -687,6 +695,10 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) | |||
687 | */ | 695 | */ |
688 | list_for_each_entry(class, hash_head, hash_entry) { | 696 | list_for_each_entry(class, hash_head, hash_entry) { |
689 | if (class->key == key) { | 697 | if (class->key == key) { |
698 | /* | ||
699 | * Huh! same key, different name? Did someone trample | ||
700 | * on some memory? We're most confused. | ||
701 | */ | ||
690 | WARN_ON_ONCE(class->name != lock->name); | 702 | WARN_ON_ONCE(class->name != lock->name); |
691 | return class; | 703 | return class; |
692 | } | 704 | } |
@@ -800,6 +812,10 @@ out_unlock_set: | |||
800 | else if (subclass < NR_LOCKDEP_CACHING_CLASSES) | 812 | else if (subclass < NR_LOCKDEP_CACHING_CLASSES) |
801 | lock->class_cache[subclass] = class; | 813 | lock->class_cache[subclass] = class; |
802 | 814 | ||
815 | /* | ||
816 | * Hash collision, did we smoke some? We found a class with a matching | ||
817 | * hash but the subclass -- which is hashed in -- didn't match. | ||
818 | */ | ||
803 | if (DEBUG_LOCKS_WARN_ON(class->subclass != subclass)) | 819 | if (DEBUG_LOCKS_WARN_ON(class->subclass != subclass)) |
804 | return NULL; | 820 | return NULL; |
805 | 821 | ||
@@ -926,7 +942,7 @@ static inline void mark_lock_accessed(struct lock_list *lock, | |||
926 | unsigned long nr; | 942 | unsigned long nr; |
927 | 943 | ||
928 | nr = lock - list_entries; | 944 | nr = lock - list_entries; |
929 | WARN_ON(nr >= nr_list_entries); | 945 | WARN_ON(nr >= nr_list_entries); /* Out-of-bounds, input fail */ |
930 | lock->parent = parent; | 946 | lock->parent = parent; |
931 | lock->class->dep_gen_id = lockdep_dependency_gen_id; | 947 | lock->class->dep_gen_id = lockdep_dependency_gen_id; |
932 | } | 948 | } |
@@ -936,7 +952,7 @@ static inline unsigned long lock_accessed(struct lock_list *lock) | |||
936 | unsigned long nr; | 952 | unsigned long nr; |
937 | 953 | ||
938 | nr = lock - list_entries; | 954 | nr = lock - list_entries; |
939 | WARN_ON(nr >= nr_list_entries); | 955 | WARN_ON(nr >= nr_list_entries); /* Out-of-bounds, input fail */ |
940 | return lock->class->dep_gen_id == lockdep_dependency_gen_id; | 956 | return lock->class->dep_gen_id == lockdep_dependency_gen_id; |
941 | } | 957 | } |
942 | 958 | ||
@@ -1129,10 +1145,11 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth, | |||
1129 | if (debug_locks_silent) | 1145 | if (debug_locks_silent) |
1130 | return 0; | 1146 | return 0; |
1131 | 1147 | ||
1132 | printk("\n=======================================================\n"); | 1148 | printk("\n"); |
1133 | printk( "[ INFO: possible circular locking dependency detected ]\n"); | 1149 | printk("======================================================\n"); |
1150 | printk("[ INFO: possible circular locking dependency detected ]\n"); | ||
1134 | print_kernel_version(); | 1151 | print_kernel_version(); |
1135 | printk( "-------------------------------------------------------\n"); | 1152 | printk("-------------------------------------------------------\n"); |
1136 | printk("%s/%d is trying to acquire lock:\n", | 1153 | printk("%s/%d is trying to acquire lock:\n", |
1137 | curr->comm, task_pid_nr(curr)); | 1154 | curr->comm, task_pid_nr(curr)); |
1138 | print_lock(check_src); | 1155 | print_lock(check_src); |
@@ -1196,6 +1213,9 @@ static noinline int print_bfs_bug(int ret) | |||
1196 | if (!debug_locks_off_graph_unlock()) | 1213 | if (!debug_locks_off_graph_unlock()) |
1197 | return 0; | 1214 | return 0; |
1198 | 1215 | ||
1216 | /* | ||
1217 | * Breadth-first-search failed, graph got corrupted? | ||
1218 | */ | ||
1199 | WARN(1, "lockdep bfs error:%d\n", ret); | 1219 | WARN(1, "lockdep bfs error:%d\n", ret); |
1200 | 1220 | ||
1201 | return 0; | 1221 | return 0; |
@@ -1463,11 +1483,12 @@ print_bad_irq_dependency(struct task_struct *curr, | |||
1463 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) | 1483 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) |
1464 | return 0; | 1484 | return 0; |
1465 | 1485 | ||
1466 | printk("\n======================================================\n"); | 1486 | printk("\n"); |
1467 | printk( "[ INFO: %s-safe -> %s-unsafe lock order detected ]\n", | 1487 | printk("======================================================\n"); |
1488 | printk("[ INFO: %s-safe -> %s-unsafe lock order detected ]\n", | ||
1468 | irqclass, irqclass); | 1489 | irqclass, irqclass); |
1469 | print_kernel_version(); | 1490 | print_kernel_version(); |
1470 | printk( "------------------------------------------------------\n"); | 1491 | printk("------------------------------------------------------\n"); |
1471 | printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", | 1492 | printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", |
1472 | curr->comm, task_pid_nr(curr), | 1493 | curr->comm, task_pid_nr(curr), |
1473 | curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT, | 1494 | curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT, |
@@ -1692,10 +1713,11 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, | |||
1692 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) | 1713 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) |
1693 | return 0; | 1714 | return 0; |
1694 | 1715 | ||
1695 | printk("\n=============================================\n"); | 1716 | printk("\n"); |
1696 | printk( "[ INFO: possible recursive locking detected ]\n"); | 1717 | printk("=============================================\n"); |
1718 | printk("[ INFO: possible recursive locking detected ]\n"); | ||
1697 | print_kernel_version(); | 1719 | print_kernel_version(); |
1698 | printk( "---------------------------------------------\n"); | 1720 | printk("---------------------------------------------\n"); |
1699 | printk("%s/%d is trying to acquire lock:\n", | 1721 | printk("%s/%d is trying to acquire lock:\n", |
1700 | curr->comm, task_pid_nr(curr)); | 1722 | curr->comm, task_pid_nr(curr)); |
1701 | print_lock(next); | 1723 | print_lock(next); |
@@ -1944,6 +1966,11 @@ out_bug: | |||
1944 | if (!debug_locks_off_graph_unlock()) | 1966 | if (!debug_locks_off_graph_unlock()) |
1945 | return 0; | 1967 | return 0; |
1946 | 1968 | ||
1969 | /* | ||
1970 | * Clearly we all shouldn't be here, but since we made it we | ||
1971 | * can reliable say we messed up our state. See the above two | ||
1972 | * gotos for reasons why we could possibly end up here. | ||
1973 | */ | ||
1947 | WARN_ON(1); | 1974 | WARN_ON(1); |
1948 | 1975 | ||
1949 | return 0; | 1976 | return 0; |
@@ -1975,6 +2002,11 @@ static inline int lookup_chain_cache(struct task_struct *curr, | |||
1975 | struct held_lock *hlock_curr, *hlock_next; | 2002 | struct held_lock *hlock_curr, *hlock_next; |
1976 | int i, j; | 2003 | int i, j; |
1977 | 2004 | ||
2005 | /* | ||
2006 | * We might need to take the graph lock, ensure we've got IRQs | ||
2007 | * disabled to make this an IRQ-safe lock.. for recursion reasons | ||
2008 | * lockdep won't complain about its own locking errors. | ||
2009 | */ | ||
1978 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | 2010 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) |
1979 | return 0; | 2011 | return 0; |
1980 | /* | 2012 | /* |
@@ -2126,6 +2158,10 @@ static void check_chain_key(struct task_struct *curr) | |||
2126 | hlock = curr->held_locks + i; | 2158 | hlock = curr->held_locks + i; |
2127 | if (chain_key != hlock->prev_chain_key) { | 2159 | if (chain_key != hlock->prev_chain_key) { |
2128 | debug_locks_off(); | 2160 | debug_locks_off(); |
2161 | /* | ||
2162 | * We got mighty confused, our chain keys don't match | ||
2163 | * with what we expect, someone trample on our task state? | ||
2164 | */ | ||
2129 | WARN(1, "hm#1, depth: %u [%u], %016Lx != %016Lx\n", | 2165 | WARN(1, "hm#1, depth: %u [%u], %016Lx != %016Lx\n", |
2130 | curr->lockdep_depth, i, | 2166 | curr->lockdep_depth, i, |
2131 | (unsigned long long)chain_key, | 2167 | (unsigned long long)chain_key, |
@@ -2133,6 +2169,9 @@ static void check_chain_key(struct task_struct *curr) | |||
2133 | return; | 2169 | return; |
2134 | } | 2170 | } |
2135 | id = hlock->class_idx - 1; | 2171 | id = hlock->class_idx - 1; |
2172 | /* | ||
2173 | * Whoops ran out of static storage again? | ||
2174 | */ | ||
2136 | if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS)) | 2175 | if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS)) |
2137 | return; | 2176 | return; |
2138 | 2177 | ||
@@ -2144,6 +2183,10 @@ static void check_chain_key(struct task_struct *curr) | |||
2144 | } | 2183 | } |
2145 | if (chain_key != curr->curr_chain_key) { | 2184 | if (chain_key != curr->curr_chain_key) { |
2146 | debug_locks_off(); | 2185 | debug_locks_off(); |
2186 | /* | ||
2187 | * More smoking hash instead of calculating it, damn see these | ||
2188 | * numbers float.. I bet that a pink elephant stepped on my memory. | ||
2189 | */ | ||
2147 | WARN(1, "hm#2, depth: %u [%u], %016Lx != %016Lx\n", | 2190 | WARN(1, "hm#2, depth: %u [%u], %016Lx != %016Lx\n", |
2148 | curr->lockdep_depth, i, | 2191 | curr->lockdep_depth, i, |
2149 | (unsigned long long)chain_key, | 2192 | (unsigned long long)chain_key, |
@@ -2177,10 +2220,11 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this, | |||
2177 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) | 2220 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) |
2178 | return 0; | 2221 | return 0; |
2179 | 2222 | ||
2180 | printk("\n=================================\n"); | 2223 | printk("\n"); |
2181 | printk( "[ INFO: inconsistent lock state ]\n"); | 2224 | printk("=================================\n"); |
2225 | printk("[ INFO: inconsistent lock state ]\n"); | ||
2182 | print_kernel_version(); | 2226 | print_kernel_version(); |
2183 | printk( "---------------------------------\n"); | 2227 | printk("---------------------------------\n"); |
2184 | 2228 | ||
2185 | printk("inconsistent {%s} -> {%s} usage.\n", | 2229 | printk("inconsistent {%s} -> {%s} usage.\n", |
2186 | usage_str[prev_bit], usage_str[new_bit]); | 2230 | usage_str[prev_bit], usage_str[new_bit]); |
@@ -2241,10 +2285,11 @@ print_irq_inversion_bug(struct task_struct *curr, | |||
2241 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) | 2285 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) |
2242 | return 0; | 2286 | return 0; |
2243 | 2287 | ||
2244 | printk("\n=========================================================\n"); | 2288 | printk("\n"); |
2245 | printk( "[ INFO: possible irq lock inversion dependency detected ]\n"); | 2289 | printk("=========================================================\n"); |
2290 | printk("[ INFO: possible irq lock inversion dependency detected ]\n"); | ||
2246 | print_kernel_version(); | 2291 | print_kernel_version(); |
2247 | printk( "---------------------------------------------------------\n"); | 2292 | printk("---------------------------------------------------------\n"); |
2248 | printk("%s/%d just changed the state of lock:\n", | 2293 | printk("%s/%d just changed the state of lock:\n", |
2249 | curr->comm, task_pid_nr(curr)); | 2294 | curr->comm, task_pid_nr(curr)); |
2250 | print_lock(this); | 2295 | print_lock(this); |
@@ -2525,12 +2570,24 @@ void trace_hardirqs_on_caller(unsigned long ip) | |||
2525 | return; | 2570 | return; |
2526 | } | 2571 | } |
2527 | 2572 | ||
2573 | /* | ||
2574 | * We're enabling irqs and according to our state above irqs weren't | ||
2575 | * already enabled, yet we find the hardware thinks they are in fact | ||
2576 | * enabled.. someone messed up their IRQ state tracing. | ||
2577 | */ | ||
2528 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | 2578 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) |
2529 | return; | 2579 | return; |
2530 | 2580 | ||
2581 | /* | ||
2582 | * See the fine text that goes along with this variable definition. | ||
2583 | */ | ||
2531 | if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled))) | 2584 | if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled))) |
2532 | return; | 2585 | return; |
2533 | 2586 | ||
2587 | /* | ||
2588 | * Can't allow enabling interrupts while in an interrupt handler, | ||
2589 | * that's general bad form and such. Recursion, limited stack etc.. | ||
2590 | */ | ||
2534 | if (DEBUG_LOCKS_WARN_ON(current->hardirq_context)) | 2591 | if (DEBUG_LOCKS_WARN_ON(current->hardirq_context)) |
2535 | return; | 2592 | return; |
2536 | 2593 | ||
@@ -2558,6 +2615,10 @@ void trace_hardirqs_off_caller(unsigned long ip) | |||
2558 | if (unlikely(!debug_locks || current->lockdep_recursion)) | 2615 | if (unlikely(!debug_locks || current->lockdep_recursion)) |
2559 | return; | 2616 | return; |
2560 | 2617 | ||
2618 | /* | ||
2619 | * So we're supposed to get called after you mask local IRQs, but for | ||
2620 | * some reason the hardware doesn't quite think you did a proper job. | ||
2621 | */ | ||
2561 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | 2622 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) |
2562 | return; | 2623 | return; |
2563 | 2624 | ||
@@ -2590,6 +2651,10 @@ void trace_softirqs_on(unsigned long ip) | |||
2590 | if (unlikely(!debug_locks || current->lockdep_recursion)) | 2651 | if (unlikely(!debug_locks || current->lockdep_recursion)) |
2591 | return; | 2652 | return; |
2592 | 2653 | ||
2654 | /* | ||
2655 | * We fancy IRQs being disabled here, see softirq.c, avoids | ||
2656 | * funny state and nesting things. | ||
2657 | */ | ||
2593 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | 2658 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) |
2594 | return; | 2659 | return; |
2595 | 2660 | ||
@@ -2626,6 +2691,9 @@ void trace_softirqs_off(unsigned long ip) | |||
2626 | if (unlikely(!debug_locks || current->lockdep_recursion)) | 2691 | if (unlikely(!debug_locks || current->lockdep_recursion)) |
2627 | return; | 2692 | return; |
2628 | 2693 | ||
2694 | /* | ||
2695 | * We fancy IRQs being disabled here, see softirq.c | ||
2696 | */ | ||
2629 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | 2697 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) |
2630 | return; | 2698 | return; |
2631 | 2699 | ||
@@ -2637,6 +2705,9 @@ void trace_softirqs_off(unsigned long ip) | |||
2637 | curr->softirq_disable_ip = ip; | 2705 | curr->softirq_disable_ip = ip; |
2638 | curr->softirq_disable_event = ++curr->irq_events; | 2706 | curr->softirq_disable_event = ++curr->irq_events; |
2639 | debug_atomic_inc(softirqs_off_events); | 2707 | debug_atomic_inc(softirqs_off_events); |
2708 | /* | ||
2709 | * Whoops, we wanted softirqs off, so why aren't they? | ||
2710 | */ | ||
2640 | DEBUG_LOCKS_WARN_ON(!softirq_count()); | 2711 | DEBUG_LOCKS_WARN_ON(!softirq_count()); |
2641 | } else | 2712 | } else |
2642 | debug_atomic_inc(redundant_softirqs_off); | 2713 | debug_atomic_inc(redundant_softirqs_off); |
@@ -2661,6 +2732,9 @@ static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags) | |||
2661 | if (!(gfp_mask & __GFP_FS)) | 2732 | if (!(gfp_mask & __GFP_FS)) |
2662 | return; | 2733 | return; |
2663 | 2734 | ||
2735 | /* | ||
2736 | * Oi! Can't be having __GFP_FS allocations with IRQs disabled. | ||
2737 | */ | ||
2664 | if (DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags))) | 2738 | if (DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags))) |
2665 | return; | 2739 | return; |
2666 | 2740 | ||
@@ -2773,13 +2847,13 @@ static int separate_irq_context(struct task_struct *curr, | |||
2773 | return 0; | 2847 | return 0; |
2774 | } | 2848 | } |
2775 | 2849 | ||
2776 | #else | 2850 | #else /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */ |
2777 | 2851 | ||
2778 | static inline | 2852 | static inline |
2779 | int mark_lock_irq(struct task_struct *curr, struct held_lock *this, | 2853 | int mark_lock_irq(struct task_struct *curr, struct held_lock *this, |
2780 | enum lock_usage_bit new_bit) | 2854 | enum lock_usage_bit new_bit) |
2781 | { | 2855 | { |
2782 | WARN_ON(1); | 2856 | WARN_ON(1); /* Impossible innit? when we don't have TRACE_IRQFLAG */ |
2783 | return 1; | 2857 | return 1; |
2784 | } | 2858 | } |
2785 | 2859 | ||
@@ -2799,7 +2873,7 @@ void lockdep_trace_alloc(gfp_t gfp_mask) | |||
2799 | { | 2873 | { |
2800 | } | 2874 | } |
2801 | 2875 | ||
2802 | #endif | 2876 | #endif /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */ |
2803 | 2877 | ||
2804 | /* | 2878 | /* |
2805 | * Mark a lock with a usage bit, and validate the state transition: | 2879 | * Mark a lock with a usage bit, and validate the state transition: |
@@ -2880,6 +2954,9 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name, | |||
2880 | lock->cpu = raw_smp_processor_id(); | 2954 | lock->cpu = raw_smp_processor_id(); |
2881 | #endif | 2955 | #endif |
2882 | 2956 | ||
2957 | /* | ||
2958 | * Can't be having no nameless bastards around this place! | ||
2959 | */ | ||
2883 | if (DEBUG_LOCKS_WARN_ON(!name)) { | 2960 | if (DEBUG_LOCKS_WARN_ON(!name)) { |
2884 | lock->name = "NULL"; | 2961 | lock->name = "NULL"; |
2885 | return; | 2962 | return; |
@@ -2887,6 +2964,9 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name, | |||
2887 | 2964 | ||
2888 | lock->name = name; | 2965 | lock->name = name; |
2889 | 2966 | ||
2967 | /* | ||
2968 | * No key, no joy, we need to hash something. | ||
2969 | */ | ||
2890 | if (DEBUG_LOCKS_WARN_ON(!key)) | 2970 | if (DEBUG_LOCKS_WARN_ON(!key)) |
2891 | return; | 2971 | return; |
2892 | /* | 2972 | /* |
@@ -2894,6 +2974,9 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name, | |||
2894 | */ | 2974 | */ |
2895 | if (!static_obj(key)) { | 2975 | if (!static_obj(key)) { |
2896 | printk("BUG: key %p not in .data!\n", key); | 2976 | printk("BUG: key %p not in .data!\n", key); |
2977 | /* | ||
2978 | * What it says above ^^^^^, I suggest you read it. | ||
2979 | */ | ||
2897 | DEBUG_LOCKS_WARN_ON(1); | 2980 | DEBUG_LOCKS_WARN_ON(1); |
2898 | return; | 2981 | return; |
2899 | } | 2982 | } |
@@ -2932,6 +3015,11 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
2932 | if (unlikely(!debug_locks)) | 3015 | if (unlikely(!debug_locks)) |
2933 | return 0; | 3016 | return 0; |
2934 | 3017 | ||
3018 | /* | ||
3019 | * Lockdep should run with IRQs disabled, otherwise we could | ||
3020 | * get an interrupt which would want to take locks, which would | ||
3021 | * end up in lockdep and have you got a head-ache already? | ||
3022 | */ | ||
2935 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | 3023 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) |
2936 | return 0; | 3024 | return 0; |
2937 | 3025 | ||
@@ -2963,6 +3051,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
2963 | * dependency checks are done) | 3051 | * dependency checks are done) |
2964 | */ | 3052 | */ |
2965 | depth = curr->lockdep_depth; | 3053 | depth = curr->lockdep_depth; |
3054 | /* | ||
3055 | * Ran out of static storage for our per-task lock stack again have we? | ||
3056 | */ | ||
2966 | if (DEBUG_LOCKS_WARN_ON(depth >= MAX_LOCK_DEPTH)) | 3057 | if (DEBUG_LOCKS_WARN_ON(depth >= MAX_LOCK_DEPTH)) |
2967 | return 0; | 3058 | return 0; |
2968 | 3059 | ||
@@ -2981,6 +3072,10 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
2981 | } | 3072 | } |
2982 | 3073 | ||
2983 | hlock = curr->held_locks + depth; | 3074 | hlock = curr->held_locks + depth; |
3075 | /* | ||
3076 | * Plain impossible, we just registered it and checked it weren't no | ||
3077 | * NULL like.. I bet this mushroom I ate was good! | ||
3078 | */ | ||
2984 | if (DEBUG_LOCKS_WARN_ON(!class)) | 3079 | if (DEBUG_LOCKS_WARN_ON(!class)) |
2985 | return 0; | 3080 | return 0; |
2986 | hlock->class_idx = class_idx; | 3081 | hlock->class_idx = class_idx; |
@@ -3015,11 +3110,17 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
3015 | * the hash, not class->key. | 3110 | * the hash, not class->key. |
3016 | */ | 3111 | */ |
3017 | id = class - lock_classes; | 3112 | id = class - lock_classes; |
3113 | /* | ||
3114 | * Whoops, we did it again.. ran straight out of our static allocation. | ||
3115 | */ | ||
3018 | if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS)) | 3116 | if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS)) |
3019 | return 0; | 3117 | return 0; |
3020 | 3118 | ||
3021 | chain_key = curr->curr_chain_key; | 3119 | chain_key = curr->curr_chain_key; |
3022 | if (!depth) { | 3120 | if (!depth) { |
3121 | /* | ||
3122 | * How can we have a chain hash when we ain't got no keys?! | ||
3123 | */ | ||
3023 | if (DEBUG_LOCKS_WARN_ON(chain_key != 0)) | 3124 | if (DEBUG_LOCKS_WARN_ON(chain_key != 0)) |
3024 | return 0; | 3125 | return 0; |
3025 | chain_head = 1; | 3126 | chain_head = 1; |
@@ -3065,9 +3166,10 @@ print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock, | |||
3065 | if (debug_locks_silent) | 3166 | if (debug_locks_silent) |
3066 | return 0; | 3167 | return 0; |
3067 | 3168 | ||
3068 | printk("\n=====================================\n"); | 3169 | printk("\n"); |
3069 | printk( "[ BUG: bad unlock balance detected! ]\n"); | 3170 | printk("=====================================\n"); |
3070 | printk( "-------------------------------------\n"); | 3171 | printk("[ BUG: bad unlock balance detected! ]\n"); |
3172 | printk("-------------------------------------\n"); | ||
3071 | printk("%s/%d is trying to release lock (", | 3173 | printk("%s/%d is trying to release lock (", |
3072 | curr->comm, task_pid_nr(curr)); | 3174 | curr->comm, task_pid_nr(curr)); |
3073 | print_lockdep_cache(lock); | 3175 | print_lockdep_cache(lock); |
@@ -3091,6 +3193,9 @@ static int check_unlock(struct task_struct *curr, struct lockdep_map *lock, | |||
3091 | { | 3193 | { |
3092 | if (unlikely(!debug_locks)) | 3194 | if (unlikely(!debug_locks)) |
3093 | return 0; | 3195 | return 0; |
3196 | /* | ||
3197 | * Lockdep should run with IRQs disabled, recursion, head-ache, etc.. | ||
3198 | */ | ||
3094 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | 3199 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) |
3095 | return 0; | 3200 | return 0; |
3096 | 3201 | ||
@@ -3111,9 +3216,20 @@ static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock) | |||
3111 | if (!class) | 3216 | if (!class) |
3112 | class = look_up_lock_class(lock, 0); | 3217 | class = look_up_lock_class(lock, 0); |
3113 | 3218 | ||
3114 | if (DEBUG_LOCKS_WARN_ON(!class)) | 3219 | /* |
3220 | * If look_up_lock_class() failed to find a class, we're trying | ||
3221 | * to test if we hold a lock that has never yet been acquired. | ||
3222 | * Clearly if the lock hasn't been acquired _ever_, we're not | ||
3223 | * holding it either, so report failure. | ||
3224 | */ | ||
3225 | if (!class) | ||
3115 | return 0; | 3226 | return 0; |
3116 | 3227 | ||
3228 | /* | ||
3229 | * References, but not a lock we're actually ref-counting? | ||
3230 | * State got messed up, follow the sites that change ->references | ||
3231 | * and try to make sense of it. | ||
3232 | */ | ||
3117 | if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock)) | 3233 | if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock)) |
3118 | return 0; | 3234 | return 0; |
3119 | 3235 | ||
@@ -3136,6 +3252,10 @@ __lock_set_class(struct lockdep_map *lock, const char *name, | |||
3136 | int i; | 3252 | int i; |
3137 | 3253 | ||
3138 | depth = curr->lockdep_depth; | 3254 | depth = curr->lockdep_depth; |
3255 | /* | ||
3256 | * This function is about (re)setting the class of a held lock, | ||
3257 | * yet we're not actually holding any locks. Naughty user! | ||
3258 | */ | ||
3139 | if (DEBUG_LOCKS_WARN_ON(!depth)) | 3259 | if (DEBUG_LOCKS_WARN_ON(!depth)) |
3140 | return 0; | 3260 | return 0; |
3141 | 3261 | ||
@@ -3171,6 +3291,10 @@ found_it: | |||
3171 | return 0; | 3291 | return 0; |
3172 | } | 3292 | } |
3173 | 3293 | ||
3294 | /* | ||
3295 | * I took it apart and put it back together again, except now I have | ||
3296 | * these 'spare' parts.. where shall I put them. | ||
3297 | */ | ||
3174 | if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth)) | 3298 | if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth)) |
3175 | return 0; | 3299 | return 0; |
3176 | return 1; | 3300 | return 1; |
@@ -3195,6 +3319,10 @@ lock_release_non_nested(struct task_struct *curr, | |||
3195 | * of held locks: | 3319 | * of held locks: |
3196 | */ | 3320 | */ |
3197 | depth = curr->lockdep_depth; | 3321 | depth = curr->lockdep_depth; |
3322 | /* | ||
3323 | * So we're all set to release this lock.. wait what lock? We don't | ||
3324 | * own any locks, you've been drinking again? | ||
3325 | */ | ||
3198 | if (DEBUG_LOCKS_WARN_ON(!depth)) | 3326 | if (DEBUG_LOCKS_WARN_ON(!depth)) |
3199 | return 0; | 3327 | return 0; |
3200 | 3328 | ||
@@ -3247,6 +3375,10 @@ found_it: | |||
3247 | return 0; | 3375 | return 0; |
3248 | } | 3376 | } |
3249 | 3377 | ||
3378 | /* | ||
3379 | * We had N bottles of beer on the wall, we drank one, but now | ||
3380 | * there's not N-1 bottles of beer left on the wall... | ||
3381 | */ | ||
3250 | if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - 1)) | 3382 | if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - 1)) |
3251 | return 0; | 3383 | return 0; |
3252 | return 1; | 3384 | return 1; |
@@ -3277,6 +3409,9 @@ static int lock_release_nested(struct task_struct *curr, | |||
3277 | return lock_release_non_nested(curr, lock, ip); | 3409 | return lock_release_non_nested(curr, lock, ip); |
3278 | curr->lockdep_depth--; | 3410 | curr->lockdep_depth--; |
3279 | 3411 | ||
3412 | /* | ||
3413 | * No more locks, but somehow we've got hash left over, who left it? | ||
3414 | */ | ||
3280 | if (DEBUG_LOCKS_WARN_ON(!depth && (hlock->prev_chain_key != 0))) | 3415 | if (DEBUG_LOCKS_WARN_ON(!depth && (hlock->prev_chain_key != 0))) |
3281 | return 0; | 3416 | return 0; |
3282 | 3417 | ||
@@ -3359,10 +3494,13 @@ static void check_flags(unsigned long flags) | |||
3359 | * check if not in hardirq contexts: | 3494 | * check if not in hardirq contexts: |
3360 | */ | 3495 | */ |
3361 | if (!hardirq_count()) { | 3496 | if (!hardirq_count()) { |
3362 | if (softirq_count()) | 3497 | if (softirq_count()) { |
3498 | /* like the above, but with softirqs */ | ||
3363 | DEBUG_LOCKS_WARN_ON(current->softirqs_enabled); | 3499 | DEBUG_LOCKS_WARN_ON(current->softirqs_enabled); |
3364 | else | 3500 | } else { |
3501 | /* lick the above, does it taste good? */ | ||
3365 | DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled); | 3502 | DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled); |
3503 | } | ||
3366 | } | 3504 | } |
3367 | 3505 | ||
3368 | if (!debug_locks) | 3506 | if (!debug_locks) |
@@ -3472,9 +3610,10 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, | |||
3472 | if (debug_locks_silent) | 3610 | if (debug_locks_silent) |
3473 | return 0; | 3611 | return 0; |
3474 | 3612 | ||
3475 | printk("\n=================================\n"); | 3613 | printk("\n"); |
3476 | printk( "[ BUG: bad contention detected! ]\n"); | 3614 | printk("=================================\n"); |
3477 | printk( "---------------------------------\n"); | 3615 | printk("[ BUG: bad contention detected! ]\n"); |
3616 | printk("---------------------------------\n"); | ||
3478 | printk("%s/%d is trying to contend lock (", | 3617 | printk("%s/%d is trying to contend lock (", |
3479 | curr->comm, task_pid_nr(curr)); | 3618 | curr->comm, task_pid_nr(curr)); |
3480 | print_lockdep_cache(lock); | 3619 | print_lockdep_cache(lock); |
@@ -3500,6 +3639,10 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip) | |||
3500 | int i, contention_point, contending_point; | 3639 | int i, contention_point, contending_point; |
3501 | 3640 | ||
3502 | depth = curr->lockdep_depth; | 3641 | depth = curr->lockdep_depth; |
3642 | /* | ||
3643 | * Whee, we contended on this lock, except it seems we're not | ||
3644 | * actually trying to acquire anything much at all.. | ||
3645 | */ | ||
3503 | if (DEBUG_LOCKS_WARN_ON(!depth)) | 3646 | if (DEBUG_LOCKS_WARN_ON(!depth)) |
3504 | return; | 3647 | return; |
3505 | 3648 | ||
@@ -3549,6 +3692,10 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip) | |||
3549 | int i, cpu; | 3692 | int i, cpu; |
3550 | 3693 | ||
3551 | depth = curr->lockdep_depth; | 3694 | depth = curr->lockdep_depth; |
3695 | /* | ||
3696 | * Yay, we acquired ownership of this lock we didn't try to | ||
3697 | * acquire, how the heck did that happen? | ||
3698 | */ | ||
3552 | if (DEBUG_LOCKS_WARN_ON(!depth)) | 3699 | if (DEBUG_LOCKS_WARN_ON(!depth)) |
3553 | return; | 3700 | return; |
3554 | 3701 | ||
@@ -3753,8 +3900,12 @@ void lockdep_reset_lock(struct lockdep_map *lock) | |||
3753 | match |= class == lock->class_cache[j]; | 3900 | match |= class == lock->class_cache[j]; |
3754 | 3901 | ||
3755 | if (unlikely(match)) { | 3902 | if (unlikely(match)) { |
3756 | if (debug_locks_off_graph_unlock()) | 3903 | if (debug_locks_off_graph_unlock()) { |
3904 | /* | ||
3905 | * We all just reset everything, how did it match? | ||
3906 | */ | ||
3757 | WARN_ON(1); | 3907 | WARN_ON(1); |
3908 | } | ||
3758 | goto out_restore; | 3909 | goto out_restore; |
3759 | } | 3910 | } |
3760 | } | 3911 | } |
@@ -3833,9 +3984,10 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from, | |||
3833 | if (debug_locks_silent) | 3984 | if (debug_locks_silent) |
3834 | return; | 3985 | return; |
3835 | 3986 | ||
3836 | printk("\n=========================\n"); | 3987 | printk("\n"); |
3837 | printk( "[ BUG: held lock freed! ]\n"); | 3988 | printk("=========================\n"); |
3838 | printk( "-------------------------\n"); | 3989 | printk("[ BUG: held lock freed! ]\n"); |
3990 | printk("-------------------------\n"); | ||
3839 | printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n", | 3991 | printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n", |
3840 | curr->comm, task_pid_nr(curr), mem_from, mem_to-1); | 3992 | curr->comm, task_pid_nr(curr), mem_from, mem_to-1); |
3841 | print_lock(hlock); | 3993 | print_lock(hlock); |
@@ -3889,9 +4041,10 @@ static void print_held_locks_bug(struct task_struct *curr) | |||
3889 | if (debug_locks_silent) | 4041 | if (debug_locks_silent) |
3890 | return; | 4042 | return; |
3891 | 4043 | ||
3892 | printk("\n=====================================\n"); | 4044 | printk("\n"); |
3893 | printk( "[ BUG: lock held at task exit time! ]\n"); | 4045 | printk("=====================================\n"); |
3894 | printk( "-------------------------------------\n"); | 4046 | printk("[ BUG: lock held at task exit time! ]\n"); |
4047 | printk("-------------------------------------\n"); | ||
3895 | printk("%s/%d is exiting with locks still held!\n", | 4048 | printk("%s/%d is exiting with locks still held!\n", |
3896 | curr->comm, task_pid_nr(curr)); | 4049 | curr->comm, task_pid_nr(curr)); |
3897 | lockdep_print_held_locks(curr); | 4050 | lockdep_print_held_locks(curr); |
@@ -3985,16 +4138,17 @@ void lockdep_sys_exit(void) | |||
3985 | if (unlikely(curr->lockdep_depth)) { | 4138 | if (unlikely(curr->lockdep_depth)) { |
3986 | if (!debug_locks_off()) | 4139 | if (!debug_locks_off()) |
3987 | return; | 4140 | return; |
3988 | printk("\n================================================\n"); | 4141 | printk("\n"); |
3989 | printk( "[ BUG: lock held when returning to user space! ]\n"); | 4142 | printk("================================================\n"); |
3990 | printk( "------------------------------------------------\n"); | 4143 | printk("[ BUG: lock held when returning to user space! ]\n"); |
4144 | printk("------------------------------------------------\n"); | ||
3991 | printk("%s/%d is leaving the kernel with locks still held!\n", | 4145 | printk("%s/%d is leaving the kernel with locks still held!\n", |
3992 | curr->comm, curr->pid); | 4146 | curr->comm, curr->pid); |
3993 | lockdep_print_held_locks(curr); | 4147 | lockdep_print_held_locks(curr); |
3994 | } | 4148 | } |
3995 | } | 4149 | } |
3996 | 4150 | ||
3997 | void lockdep_rcu_dereference(const char *file, const int line) | 4151 | void lockdep_rcu_suspicious(const char *file, const int line, const char *s) |
3998 | { | 4152 | { |
3999 | struct task_struct *curr = current; | 4153 | struct task_struct *curr = current; |
4000 | 4154 | ||
@@ -4003,15 +4157,15 @@ void lockdep_rcu_dereference(const char *file, const int line) | |||
4003 | return; | 4157 | return; |
4004 | #endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */ | 4158 | #endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */ |
4005 | /* Note: the following can be executed concurrently, so be careful. */ | 4159 | /* Note: the following can be executed concurrently, so be careful. */ |
4006 | printk("\n===================================================\n"); | 4160 | printk("\n"); |
4007 | printk( "[ INFO: suspicious rcu_dereference_check() usage. ]\n"); | 4161 | printk("===============================\n"); |
4008 | printk( "---------------------------------------------------\n"); | 4162 | printk("[ INFO: suspicious RCU usage. ]\n"); |
4009 | printk("%s:%d invoked rcu_dereference_check() without protection!\n", | 4163 | printk("-------------------------------\n"); |
4010 | file, line); | 4164 | printk("%s:%d %s!\n", file, line, s); |
4011 | printk("\nother info that might help us debug this:\n\n"); | 4165 | printk("\nother info that might help us debug this:\n\n"); |
4012 | printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks); | 4166 | printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks); |
4013 | lockdep_print_held_locks(curr); | 4167 | lockdep_print_held_locks(curr); |
4014 | printk("\nstack backtrace:\n"); | 4168 | printk("\nstack backtrace:\n"); |
4015 | dump_stack(); | 4169 | dump_stack(); |
4016 | } | 4170 | } |
4017 | EXPORT_SYMBOL_GPL(lockdep_rcu_dereference); | 4171 | EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious); |
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index 71edd2f60c0..91c32a0b612 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c | |||
@@ -11,7 +11,7 @@ | |||
11 | * Code for /proc/lockdep and /proc/lockdep_stats: | 11 | * Code for /proc/lockdep and /proc/lockdep_stats: |
12 | * | 12 | * |
13 | */ | 13 | */ |
14 | #include <linux/module.h> | 14 | #include <linux/export.h> |
15 | #include <linux/proc_fs.h> | 15 | #include <linux/proc_fs.h> |
16 | #include <linux/seq_file.h> | 16 | #include <linux/seq_file.h> |
17 | #include <linux/kallsyms.h> | 17 | #include <linux/kallsyms.h> |
diff --git a/kernel/module.c b/kernel/module.c index 04379f92f84..178333c48d1 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -16,7 +16,7 @@ | |||
16 | along with this program; if not, write to the Free Software | 16 | along with this program; if not, write to the Free Software |
17 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | */ | 18 | */ |
19 | #include <linux/module.h> | 19 | #include <linux/export.h> |
20 | #include <linux/moduleloader.h> | 20 | #include <linux/moduleloader.h> |
21 | #include <linux/ftrace_event.h> | 21 | #include <linux/ftrace_event.h> |
22 | #include <linux/init.h> | 22 | #include <linux/init.h> |
@@ -2487,6 +2487,9 @@ static int check_modinfo(struct module *mod, struct load_info *info) | |||
2487 | return -ENOEXEC; | 2487 | return -ENOEXEC; |
2488 | } | 2488 | } |
2489 | 2489 | ||
2490 | if (!get_modinfo(info, "intree")) | ||
2491 | add_taint_module(mod, TAINT_OOT_MODULE); | ||
2492 | |||
2490 | if (get_modinfo(info, "staging")) { | 2493 | if (get_modinfo(info, "staging")) { |
2491 | add_taint_module(mod, TAINT_CRAP); | 2494 | add_taint_module(mod, TAINT_CRAP); |
2492 | printk(KERN_WARNING "%s: module is from the staging directory," | 2495 | printk(KERN_WARNING "%s: module is from the staging directory," |
@@ -2878,8 +2881,7 @@ static struct module *load_module(void __user *umod, | |||
2878 | } | 2881 | } |
2879 | 2882 | ||
2880 | /* This has to be done once we're sure module name is unique. */ | 2883 | /* This has to be done once we're sure module name is unique. */ |
2881 | if (!mod->taints || mod->taints == (1U<<TAINT_CRAP)) | 2884 | dynamic_debug_setup(info.debug, info.num_debug); |
2882 | dynamic_debug_setup(info.debug, info.num_debug); | ||
2883 | 2885 | ||
2884 | /* Find duplicate symbols */ | 2886 | /* Find duplicate symbols */ |
2885 | err = verify_export_symbols(mod); | 2887 | err = verify_export_symbols(mod); |
@@ -2915,8 +2917,7 @@ static struct module *load_module(void __user *umod, | |||
2915 | module_bug_cleanup(mod); | 2917 | module_bug_cleanup(mod); |
2916 | 2918 | ||
2917 | ddebug: | 2919 | ddebug: |
2918 | if (!mod->taints || mod->taints == (1U<<TAINT_CRAP)) | 2920 | dynamic_debug_remove(info.debug); |
2919 | dynamic_debug_remove(info.debug); | ||
2920 | unlock: | 2921 | unlock: |
2921 | mutex_unlock(&module_mutex); | 2922 | mutex_unlock(&module_mutex); |
2922 | synchronize_sched(); | 2923 | synchronize_sched(); |
@@ -3257,6 +3258,8 @@ static char *module_flags(struct module *mod, char *buf) | |||
3257 | buf[bx++] = '('; | 3258 | buf[bx++] = '('; |
3258 | if (mod->taints & (1 << TAINT_PROPRIETARY_MODULE)) | 3259 | if (mod->taints & (1 << TAINT_PROPRIETARY_MODULE)) |
3259 | buf[bx++] = 'P'; | 3260 | buf[bx++] = 'P'; |
3261 | else if (mod->taints & (1 << TAINT_OOT_MODULE)) | ||
3262 | buf[bx++] = 'O'; | ||
3260 | if (mod->taints & (1 << TAINT_FORCED_MODULE)) | 3263 | if (mod->taints & (1 << TAINT_FORCED_MODULE)) |
3261 | buf[bx++] = 'F'; | 3264 | buf[bx++] = 'F'; |
3262 | if (mod->taints & (1 << TAINT_CRAP)) | 3265 | if (mod->taints & (1 << TAINT_CRAP)) |
@@ -3487,50 +3490,3 @@ void module_layout(struct module *mod, | |||
3487 | } | 3490 | } |
3488 | EXPORT_SYMBOL(module_layout); | 3491 | EXPORT_SYMBOL(module_layout); |
3489 | #endif | 3492 | #endif |
3490 | |||
3491 | #ifdef CONFIG_TRACEPOINTS | ||
3492 | void module_update_tracepoints(void) | ||
3493 | { | ||
3494 | struct module *mod; | ||
3495 | |||
3496 | mutex_lock(&module_mutex); | ||
3497 | list_for_each_entry(mod, &modules, list) | ||
3498 | if (!mod->taints) | ||
3499 | tracepoint_update_probe_range(mod->tracepoints_ptrs, | ||
3500 | mod->tracepoints_ptrs + mod->num_tracepoints); | ||
3501 | mutex_unlock(&module_mutex); | ||
3502 | } | ||
3503 | |||
3504 | /* | ||
3505 | * Returns 0 if current not found. | ||
3506 | * Returns 1 if current found. | ||
3507 | */ | ||
3508 | int module_get_iter_tracepoints(struct tracepoint_iter *iter) | ||
3509 | { | ||
3510 | struct module *iter_mod; | ||
3511 | int found = 0; | ||
3512 | |||
3513 | mutex_lock(&module_mutex); | ||
3514 | list_for_each_entry(iter_mod, &modules, list) { | ||
3515 | if (!iter_mod->taints) { | ||
3516 | /* | ||
3517 | * Sorted module list | ||
3518 | */ | ||
3519 | if (iter_mod < iter->module) | ||
3520 | continue; | ||
3521 | else if (iter_mod > iter->module) | ||
3522 | iter->tracepoint = NULL; | ||
3523 | found = tracepoint_get_iter_range(&iter->tracepoint, | ||
3524 | iter_mod->tracepoints_ptrs, | ||
3525 | iter_mod->tracepoints_ptrs | ||
3526 | + iter_mod->num_tracepoints); | ||
3527 | if (found) { | ||
3528 | iter->module = iter_mod; | ||
3529 | break; | ||
3530 | } | ||
3531 | } | ||
3532 | } | ||
3533 | mutex_unlock(&module_mutex); | ||
3534 | return found; | ||
3535 | } | ||
3536 | #endif | ||
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c index 73da83aff41..7e3443fe1f4 100644 --- a/kernel/mutex-debug.c +++ b/kernel/mutex-debug.c | |||
@@ -14,7 +14,7 @@ | |||
14 | */ | 14 | */ |
15 | #include <linux/mutex.h> | 15 | #include <linux/mutex.h> |
16 | #include <linux/delay.h> | 16 | #include <linux/delay.h> |
17 | #include <linux/module.h> | 17 | #include <linux/export.h> |
18 | #include <linux/poison.h> | 18 | #include <linux/poison.h> |
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <linux/spinlock.h> | 20 | #include <linux/spinlock.h> |
diff --git a/kernel/mutex.c b/kernel/mutex.c index d607ed5dd44..89096dd8786 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
@@ -19,7 +19,7 @@ | |||
19 | */ | 19 | */ |
20 | #include <linux/mutex.h> | 20 | #include <linux/mutex.h> |
21 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
22 | #include <linux/module.h> | 22 | #include <linux/export.h> |
23 | #include <linux/spinlock.h> | 23 | #include <linux/spinlock.h> |
24 | #include <linux/interrupt.h> | 24 | #include <linux/interrupt.h> |
25 | #include <linux/debug_locks.h> | 25 | #include <linux/debug_locks.h> |
diff --git a/kernel/notifier.c b/kernel/notifier.c index 8d7b435806c..2d5cc4ccff7 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c | |||
@@ -1,6 +1,6 @@ | |||
1 | #include <linux/kdebug.h> | 1 | #include <linux/kdebug.h> |
2 | #include <linux/kprobes.h> | 2 | #include <linux/kprobes.h> |
3 | #include <linux/module.h> | 3 | #include <linux/export.h> |
4 | #include <linux/notifier.h> | 4 | #include <linux/notifier.h> |
5 | #include <linux/rcupdate.h> | 5 | #include <linux/rcupdate.h> |
6 | #include <linux/vmalloc.h> | 6 | #include <linux/vmalloc.h> |
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 9aeab4b98c6..b576f7f14bc 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c | |||
@@ -14,7 +14,7 @@ | |||
14 | */ | 14 | */ |
15 | 15 | ||
16 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
17 | #include <linux/module.h> | 17 | #include <linux/export.h> |
18 | #include <linux/nsproxy.h> | 18 | #include <linux/nsproxy.h> |
19 | #include <linux/init_task.h> | 19 | #include <linux/init_task.h> |
20 | #include <linux/mnt_namespace.h> | 20 | #include <linux/mnt_namespace.h> |
diff --git a/kernel/padata.c b/kernel/padata.c index b91941df5e6..b4525993151 100644 --- a/kernel/padata.c +++ b/kernel/padata.c | |||
@@ -18,7 +18,7 @@ | |||
18 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | 18 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. |
19 | */ | 19 | */ |
20 | 20 | ||
21 | #include <linux/module.h> | 21 | #include <linux/export.h> |
22 | #include <linux/cpumask.h> | 22 | #include <linux/cpumask.h> |
23 | #include <linux/err.h> | 23 | #include <linux/err.h> |
24 | #include <linux/cpu.h> | 24 | #include <linux/cpu.h> |
diff --git a/kernel/panic.c b/kernel/panic.c index d7bb6974efb..b2659360421 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -177,6 +177,7 @@ static const struct tnt tnts[] = { | |||
177 | { TAINT_WARN, 'W', ' ' }, | 177 | { TAINT_WARN, 'W', ' ' }, |
178 | { TAINT_CRAP, 'C', ' ' }, | 178 | { TAINT_CRAP, 'C', ' ' }, |
179 | { TAINT_FIRMWARE_WORKAROUND, 'I', ' ' }, | 179 | { TAINT_FIRMWARE_WORKAROUND, 'I', ' ' }, |
180 | { TAINT_OOT_MODULE, 'O', ' ' }, | ||
180 | }; | 181 | }; |
181 | 182 | ||
182 | /** | 183 | /** |
@@ -194,6 +195,7 @@ static const struct tnt tnts[] = { | |||
194 | * 'W' - Taint on warning. | 195 | * 'W' - Taint on warning. |
195 | * 'C' - modules from drivers/staging are loaded. | 196 | * 'C' - modules from drivers/staging are loaded. |
196 | * 'I' - Working around severe firmware bug. | 197 | * 'I' - Working around severe firmware bug. |
198 | * 'O' - Out-of-tree module has been loaded. | ||
197 | * | 199 | * |
198 | * The string is overwritten by the next call to print_tainted(). | 200 | * The string is overwritten by the next call to print_tainted(). |
199 | */ | 201 | */ |
diff --git a/kernel/params.c b/kernel/params.c index 22df3e0d142..65aae11eb93 100644 --- a/kernel/params.c +++ b/kernel/params.c | |||
@@ -15,7 +15,7 @@ | |||
15 | along with this program; if not, write to the Free Software | 15 | along with this program; if not, write to the Free Software |
16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | #include <linux/moduleparam.h> | 18 | #include <linux/module.h> |
19 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
20 | #include <linux/string.h> | 20 | #include <linux/string.h> |
21 | #include <linux/errno.h> | 21 | #include <linux/errno.h> |
@@ -67,20 +67,27 @@ static void maybe_kfree_parameter(void *param) | |||
67 | } | 67 | } |
68 | } | 68 | } |
69 | 69 | ||
70 | static inline char dash2underscore(char c) | 70 | static char dash2underscore(char c) |
71 | { | 71 | { |
72 | if (c == '-') | 72 | if (c == '-') |
73 | return '_'; | 73 | return '_'; |
74 | return c; | 74 | return c; |
75 | } | 75 | } |
76 | 76 | ||
77 | static inline int parameq(const char *input, const char *paramname) | 77 | bool parameqn(const char *a, const char *b, size_t n) |
78 | { | 78 | { |
79 | unsigned int i; | 79 | size_t i; |
80 | for (i = 0; dash2underscore(input[i]) == paramname[i]; i++) | 80 | |
81 | if (input[i] == '\0') | 81 | for (i = 0; i < n; i++) { |
82 | return 1; | 82 | if (dash2underscore(a[i]) != dash2underscore(b[i])) |
83 | return 0; | 83 | return false; |
84 | } | ||
85 | return true; | ||
86 | } | ||
87 | |||
88 | bool parameq(const char *a, const char *b) | ||
89 | { | ||
90 | return parameqn(a, b, strlen(a)+1); | ||
84 | } | 91 | } |
85 | 92 | ||
86 | static int parse_one(char *param, | 93 | static int parse_one(char *param, |
diff --git a/kernel/pid.c b/kernel/pid.c index e432057f3b2..fa5f72227e5 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -27,7 +27,7 @@ | |||
27 | */ | 27 | */ |
28 | 28 | ||
29 | #include <linux/mm.h> | 29 | #include <linux/mm.h> |
30 | #include <linux/module.h> | 30 | #include <linux/export.h> |
31 | #include <linux/slab.h> | 31 | #include <linux/slab.h> |
32 | #include <linux/init.h> | 32 | #include <linux/init.h> |
33 | #include <linux/rculist.h> | 33 | #include <linux/rculist.h> |
@@ -418,7 +418,9 @@ EXPORT_SYMBOL(pid_task); | |||
418 | */ | 418 | */ |
419 | struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) | 419 | struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) |
420 | { | 420 | { |
421 | rcu_lockdep_assert(rcu_read_lock_held()); | 421 | rcu_lockdep_assert(rcu_read_lock_held(), |
422 | "find_task_by_pid_ns() needs rcu_read_lock()" | ||
423 | " protection"); | ||
422 | return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); | 424 | return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); |
423 | } | 425 | } |
424 | 426 | ||
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index c8008dd58ef..e7cb76dc18f 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
@@ -274,9 +274,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) | |||
274 | struct task_cputime sum; | 274 | struct task_cputime sum; |
275 | unsigned long flags; | 275 | unsigned long flags; |
276 | 276 | ||
277 | spin_lock_irqsave(&cputimer->lock, flags); | ||
278 | if (!cputimer->running) { | 277 | if (!cputimer->running) { |
279 | cputimer->running = 1; | ||
280 | /* | 278 | /* |
281 | * The POSIX timer interface allows for absolute time expiry | 279 | * The POSIX timer interface allows for absolute time expiry |
282 | * values through the TIMER_ABSTIME flag, therefore we have | 280 | * values through the TIMER_ABSTIME flag, therefore we have |
@@ -284,10 +282,13 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) | |||
284 | * it. | 282 | * it. |
285 | */ | 283 | */ |
286 | thread_group_cputime(tsk, &sum); | 284 | thread_group_cputime(tsk, &sum); |
285 | raw_spin_lock_irqsave(&cputimer->lock, flags); | ||
286 | cputimer->running = 1; | ||
287 | update_gt_cputime(&cputimer->cputime, &sum); | 287 | update_gt_cputime(&cputimer->cputime, &sum); |
288 | } | 288 | } else |
289 | raw_spin_lock_irqsave(&cputimer->lock, flags); | ||
289 | *times = cputimer->cputime; | 290 | *times = cputimer->cputime; |
290 | spin_unlock_irqrestore(&cputimer->lock, flags); | 291 | raw_spin_unlock_irqrestore(&cputimer->lock, flags); |
291 | } | 292 | } |
292 | 293 | ||
293 | /* | 294 | /* |
@@ -998,9 +999,9 @@ static void stop_process_timers(struct signal_struct *sig) | |||
998 | struct thread_group_cputimer *cputimer = &sig->cputimer; | 999 | struct thread_group_cputimer *cputimer = &sig->cputimer; |
999 | unsigned long flags; | 1000 | unsigned long flags; |
1000 | 1001 | ||
1001 | spin_lock_irqsave(&cputimer->lock, flags); | 1002 | raw_spin_lock_irqsave(&cputimer->lock, flags); |
1002 | cputimer->running = 0; | 1003 | cputimer->running = 0; |
1003 | spin_unlock_irqrestore(&cputimer->lock, flags); | 1004 | raw_spin_unlock_irqrestore(&cputimer->lock, flags); |
1004 | } | 1005 | } |
1005 | 1006 | ||
1006 | static u32 onecputick; | 1007 | static u32 onecputick; |
@@ -1290,9 +1291,9 @@ static inline int fastpath_timer_check(struct task_struct *tsk) | |||
1290 | if (sig->cputimer.running) { | 1291 | if (sig->cputimer.running) { |
1291 | struct task_cputime group_sample; | 1292 | struct task_cputime group_sample; |
1292 | 1293 | ||
1293 | spin_lock(&sig->cputimer.lock); | 1294 | raw_spin_lock(&sig->cputimer.lock); |
1294 | group_sample = sig->cputimer.cputime; | 1295 | group_sample = sig->cputimer.cputime; |
1295 | spin_unlock(&sig->cputimer.lock); | 1296 | raw_spin_unlock(&sig->cputimer.lock); |
1296 | 1297 | ||
1297 | if (task_cputime_expired(&group_sample, &sig->cputime_expires)) | 1298 | if (task_cputime_expired(&group_sample, &sig->cputime_expires)) |
1298 | return 1; | 1299 | return 1; |
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 4556182527f..69185ae6b70 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c | |||
@@ -46,7 +46,7 @@ | |||
46 | #include <linux/syscalls.h> | 46 | #include <linux/syscalls.h> |
47 | #include <linux/wait.h> | 47 | #include <linux/wait.h> |
48 | #include <linux/workqueue.h> | 48 | #include <linux/workqueue.h> |
49 | #include <linux/module.h> | 49 | #include <linux/export.h> |
50 | 50 | ||
51 | /* | 51 | /* |
52 | * Management arrays for POSIX timers. Timers are kept in slab memory | 52 | * Management arrays for POSIX timers. Timers are kept in slab memory |
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index b1914cb9095..deb5461e321 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
@@ -27,6 +27,7 @@ config HIBERNATION | |||
27 | select HIBERNATE_CALLBACKS | 27 | select HIBERNATE_CALLBACKS |
28 | select LZO_COMPRESS | 28 | select LZO_COMPRESS |
29 | select LZO_DECOMPRESS | 29 | select LZO_DECOMPRESS |
30 | select CRC32 | ||
30 | ---help--- | 31 | ---help--- |
31 | Enable the suspend to disk (STD) functionality, which is usually | 32 | Enable the suspend to disk (STD) functionality, which is usually |
32 | called "hibernation" in user interfaces. STD checkpoints the | 33 | called "hibernation" in user interfaces. STD checkpoints the |
@@ -65,6 +66,9 @@ config HIBERNATION | |||
65 | 66 | ||
66 | For more information take a look at <file:Documentation/power/swsusp.txt>. | 67 | For more information take a look at <file:Documentation/power/swsusp.txt>. |
67 | 68 | ||
69 | config ARCH_SAVE_PAGE_KEYS | ||
70 | bool | ||
71 | |||
68 | config PM_STD_PARTITION | 72 | config PM_STD_PARTITION |
69 | string "Default resume partition" | 73 | string "Default resume partition" |
70 | depends on HIBERNATION | 74 | depends on HIBERNATION |
@@ -231,3 +235,11 @@ config PM_CLK | |||
231 | config PM_GENERIC_DOMAINS | 235 | config PM_GENERIC_DOMAINS |
232 | bool | 236 | bool |
233 | depends on PM | 237 | depends on PM |
238 | |||
239 | config PM_GENERIC_DOMAINS_RUNTIME | ||
240 | def_bool y | ||
241 | depends on PM_RUNTIME && PM_GENERIC_DOMAINS | ||
242 | |||
243 | config CPU_PM | ||
244 | bool | ||
245 | depends on SUSPEND || CPU_IDLE | ||
diff --git a/kernel/power/Makefile b/kernel/power/Makefile index c5ebc6a9064..07e0e28ffba 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile | |||
@@ -1,8 +1,8 @@ | |||
1 | 1 | ||
2 | ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG | 2 | ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG |
3 | 3 | ||
4 | obj-$(CONFIG_PM) += main.o | 4 | obj-$(CONFIG_PM) += main.o qos.o |
5 | obj-$(CONFIG_PM_SLEEP) += console.o | 5 | obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o |
6 | obj-$(CONFIG_FREEZER) += process.o | 6 | obj-$(CONFIG_FREEZER) += process.o |
7 | obj-$(CONFIG_SUSPEND) += suspend.o | 7 | obj-$(CONFIG_SUSPEND) += suspend.o |
8 | obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o | 8 | obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o |
diff --git a/kernel/power/console.c b/kernel/power/console.c index 218e5af9015..b1dc456474b 100644 --- a/kernel/power/console.c +++ b/kernel/power/console.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * drivers/power/process.c - Functions for saving/restoring console. | 2 | * Functions for saving/restoring console. |
3 | * | 3 | * |
4 | * Originally from swsusp. | 4 | * Originally from swsusp. |
5 | */ | 5 | */ |
@@ -10,7 +10,6 @@ | |||
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include "power.h" | 11 | #include "power.h" |
12 | 12 | ||
13 | #if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE) | ||
14 | #define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1) | 13 | #define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1) |
15 | 14 | ||
16 | static int orig_fgconsole, orig_kmsg; | 15 | static int orig_fgconsole, orig_kmsg; |
@@ -32,4 +31,3 @@ void pm_restore_console(void) | |||
32 | vt_kmsg_redirect(orig_kmsg); | 31 | vt_kmsg_redirect(orig_kmsg); |
33 | } | 32 | } |
34 | } | 33 | } |
35 | #endif | ||
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 8f7b1db1ece..a6b0503574e 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c | |||
@@ -9,11 +9,13 @@ | |||
9 | * This file is released under the GPLv2. | 9 | * This file is released under the GPLv2. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <linux/export.h> | ||
12 | #include <linux/suspend.h> | 13 | #include <linux/suspend.h> |
13 | #include <linux/syscalls.h> | 14 | #include <linux/syscalls.h> |
14 | #include <linux/reboot.h> | 15 | #include <linux/reboot.h> |
15 | #include <linux/string.h> | 16 | #include <linux/string.h> |
16 | #include <linux/device.h> | 17 | #include <linux/device.h> |
18 | #include <linux/async.h> | ||
17 | #include <linux/kmod.h> | 19 | #include <linux/kmod.h> |
18 | #include <linux/delay.h> | 20 | #include <linux/delay.h> |
19 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
@@ -29,12 +31,14 @@ | |||
29 | #include "power.h" | 31 | #include "power.h" |
30 | 32 | ||
31 | 33 | ||
32 | static int nocompress = 0; | 34 | static int nocompress; |
33 | static int noresume = 0; | 35 | static int noresume; |
36 | static int resume_wait; | ||
37 | static int resume_delay; | ||
34 | static char resume_file[256] = CONFIG_PM_STD_PARTITION; | 38 | static char resume_file[256] = CONFIG_PM_STD_PARTITION; |
35 | dev_t swsusp_resume_device; | 39 | dev_t swsusp_resume_device; |
36 | sector_t swsusp_resume_block; | 40 | sector_t swsusp_resume_block; |
37 | int in_suspend __nosavedata = 0; | 41 | int in_suspend __nosavedata; |
38 | 42 | ||
39 | enum { | 43 | enum { |
40 | HIBERNATION_INVALID, | 44 | HIBERNATION_INVALID, |
@@ -51,6 +55,8 @@ enum { | |||
51 | 55 | ||
52 | static int hibernation_mode = HIBERNATION_SHUTDOWN; | 56 | static int hibernation_mode = HIBERNATION_SHUTDOWN; |
53 | 57 | ||
58 | static bool freezer_test_done; | ||
59 | |||
54 | static const struct platform_hibernation_ops *hibernation_ops; | 60 | static const struct platform_hibernation_ops *hibernation_ops; |
55 | 61 | ||
56 | /** | 62 | /** |
@@ -334,14 +340,31 @@ int hibernation_snapshot(int platform_mode) | |||
334 | if (error) | 340 | if (error) |
335 | goto Close; | 341 | goto Close; |
336 | 342 | ||
337 | error = dpm_prepare(PMSG_FREEZE); | ||
338 | if (error) | ||
339 | goto Complete_devices; | ||
340 | |||
341 | /* Preallocate image memory before shutting down devices. */ | 343 | /* Preallocate image memory before shutting down devices. */ |
342 | error = hibernate_preallocate_memory(); | 344 | error = hibernate_preallocate_memory(); |
343 | if (error) | 345 | if (error) |
344 | goto Complete_devices; | 346 | goto Close; |
347 | |||
348 | error = freeze_kernel_threads(); | ||
349 | if (error) | ||
350 | goto Cleanup; | ||
351 | |||
352 | if (hibernation_test(TEST_FREEZER) || | ||
353 | hibernation_testmode(HIBERNATION_TESTPROC)) { | ||
354 | |||
355 | /* | ||
356 | * Indicate to the caller that we are returning due to a | ||
357 | * successful freezer test. | ||
358 | */ | ||
359 | freezer_test_done = true; | ||
360 | goto Cleanup; | ||
361 | } | ||
362 | |||
363 | error = dpm_prepare(PMSG_FREEZE); | ||
364 | if (error) { | ||
365 | dpm_complete(msg); | ||
366 | goto Cleanup; | ||
367 | } | ||
345 | 368 | ||
346 | suspend_console(); | 369 | suspend_console(); |
347 | pm_restrict_gfp_mask(); | 370 | pm_restrict_gfp_mask(); |
@@ -370,8 +393,6 @@ int hibernation_snapshot(int platform_mode) | |||
370 | pm_restore_gfp_mask(); | 393 | pm_restore_gfp_mask(); |
371 | 394 | ||
372 | resume_console(); | 395 | resume_console(); |
373 | |||
374 | Complete_devices: | ||
375 | dpm_complete(msg); | 396 | dpm_complete(msg); |
376 | 397 | ||
377 | Close: | 398 | Close: |
@@ -381,6 +402,10 @@ int hibernation_snapshot(int platform_mode) | |||
381 | Recover_platform: | 402 | Recover_platform: |
382 | platform_recover(platform_mode); | 403 | platform_recover(platform_mode); |
383 | goto Resume_devices; | 404 | goto Resume_devices; |
405 | |||
406 | Cleanup: | ||
407 | swsusp_free(); | ||
408 | goto Close; | ||
384 | } | 409 | } |
385 | 410 | ||
386 | /** | 411 | /** |
@@ -463,7 +488,7 @@ static int resume_target_kernel(bool platform_mode) | |||
463 | * @platform_mode: If set, use platform driver to prepare for the transition. | 488 | * @platform_mode: If set, use platform driver to prepare for the transition. |
464 | * | 489 | * |
465 | * This routine must be called with pm_mutex held. If it is successful, control | 490 | * This routine must be called with pm_mutex held. If it is successful, control |
466 | * reappears in the restored target kernel in hibernation_snaphot(). | 491 | * reappears in the restored target kernel in hibernation_snapshot(). |
467 | */ | 492 | */ |
468 | int hibernation_restore(int platform_mode) | 493 | int hibernation_restore(int platform_mode) |
469 | { | 494 | { |
@@ -633,15 +658,13 @@ int hibernate(void) | |||
633 | if (error) | 658 | if (error) |
634 | goto Finish; | 659 | goto Finish; |
635 | 660 | ||
636 | if (hibernation_test(TEST_FREEZER)) | ||
637 | goto Thaw; | ||
638 | |||
639 | if (hibernation_testmode(HIBERNATION_TESTPROC)) | ||
640 | goto Thaw; | ||
641 | |||
642 | error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); | 661 | error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); |
643 | if (error) | 662 | if (error) |
644 | goto Thaw; | 663 | goto Thaw; |
664 | if (freezer_test_done) { | ||
665 | freezer_test_done = false; | ||
666 | goto Thaw; | ||
667 | } | ||
645 | 668 | ||
646 | if (in_suspend) { | 669 | if (in_suspend) { |
647 | unsigned int flags = 0; | 670 | unsigned int flags = 0; |
@@ -650,6 +673,9 @@ int hibernate(void) | |||
650 | flags |= SF_PLATFORM_MODE; | 673 | flags |= SF_PLATFORM_MODE; |
651 | if (nocompress) | 674 | if (nocompress) |
652 | flags |= SF_NOCOMPRESS_MODE; | 675 | flags |= SF_NOCOMPRESS_MODE; |
676 | else | ||
677 | flags |= SF_CRC32_MODE; | ||
678 | |||
653 | pr_debug("PM: writing image.\n"); | 679 | pr_debug("PM: writing image.\n"); |
654 | error = swsusp_write(flags); | 680 | error = swsusp_write(flags); |
655 | swsusp_free(); | 681 | swsusp_free(); |
@@ -724,6 +750,12 @@ static int software_resume(void) | |||
724 | 750 | ||
725 | pr_debug("PM: Checking hibernation image partition %s\n", resume_file); | 751 | pr_debug("PM: Checking hibernation image partition %s\n", resume_file); |
726 | 752 | ||
753 | if (resume_delay) { | ||
754 | printk(KERN_INFO "Waiting %dsec before reading resume device...\n", | ||
755 | resume_delay); | ||
756 | ssleep(resume_delay); | ||
757 | } | ||
758 | |||
727 | /* Check if the device is there */ | 759 | /* Check if the device is there */ |
728 | swsusp_resume_device = name_to_dev_t(resume_file); | 760 | swsusp_resume_device = name_to_dev_t(resume_file); |
729 | if (!swsusp_resume_device) { | 761 | if (!swsusp_resume_device) { |
@@ -732,6 +764,13 @@ static int software_resume(void) | |||
732 | * to wait for this to finish. | 764 | * to wait for this to finish. |
733 | */ | 765 | */ |
734 | wait_for_device_probe(); | 766 | wait_for_device_probe(); |
767 | |||
768 | if (resume_wait) { | ||
769 | while ((swsusp_resume_device = name_to_dev_t(resume_file)) == 0) | ||
770 | msleep(10); | ||
771 | async_synchronize_full(); | ||
772 | } | ||
773 | |||
735 | /* | 774 | /* |
736 | * We can't depend on SCSI devices being available after loading | 775 | * We can't depend on SCSI devices being available after loading |
737 | * one of their modules until scsi_complete_async_scans() is | 776 | * one of their modules until scsi_complete_async_scans() is |
@@ -1060,7 +1099,21 @@ static int __init noresume_setup(char *str) | |||
1060 | return 1; | 1099 | return 1; |
1061 | } | 1100 | } |
1062 | 1101 | ||
1102 | static int __init resumewait_setup(char *str) | ||
1103 | { | ||
1104 | resume_wait = 1; | ||
1105 | return 1; | ||
1106 | } | ||
1107 | |||
1108 | static int __init resumedelay_setup(char *str) | ||
1109 | { | ||
1110 | resume_delay = simple_strtoul(str, NULL, 0); | ||
1111 | return 1; | ||
1112 | } | ||
1113 | |||
1063 | __setup("noresume", noresume_setup); | 1114 | __setup("noresume", noresume_setup); |
1064 | __setup("resume_offset=", resume_offset_setup); | 1115 | __setup("resume_offset=", resume_offset_setup); |
1065 | __setup("resume=", resume_setup); | 1116 | __setup("resume=", resume_setup); |
1066 | __setup("hibernate=", hibernate_setup); | 1117 | __setup("hibernate=", hibernate_setup); |
1118 | __setup("resumewait", resumewait_setup); | ||
1119 | __setup("resumedelay=", resumedelay_setup); | ||
diff --git a/kernel/power/main.c b/kernel/power/main.c index 6c601f87196..36e0f0903c3 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
@@ -8,10 +8,13 @@ | |||
8 | * | 8 | * |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/export.h> | ||
11 | #include <linux/kobject.h> | 12 | #include <linux/kobject.h> |
12 | #include <linux/string.h> | 13 | #include <linux/string.h> |
13 | #include <linux/resume-trace.h> | 14 | #include <linux/resume-trace.h> |
14 | #include <linux/workqueue.h> | 15 | #include <linux/workqueue.h> |
16 | #include <linux/debugfs.h> | ||
17 | #include <linux/seq_file.h> | ||
15 | 18 | ||
16 | #include "power.h" | 19 | #include "power.h" |
17 | 20 | ||
@@ -131,6 +134,101 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr, | |||
131 | power_attr(pm_test); | 134 | power_attr(pm_test); |
132 | #endif /* CONFIG_PM_DEBUG */ | 135 | #endif /* CONFIG_PM_DEBUG */ |
133 | 136 | ||
137 | #ifdef CONFIG_DEBUG_FS | ||
138 | static char *suspend_step_name(enum suspend_stat_step step) | ||
139 | { | ||
140 | switch (step) { | ||
141 | case SUSPEND_FREEZE: | ||
142 | return "freeze"; | ||
143 | case SUSPEND_PREPARE: | ||
144 | return "prepare"; | ||
145 | case SUSPEND_SUSPEND: | ||
146 | return "suspend"; | ||
147 | case SUSPEND_SUSPEND_NOIRQ: | ||
148 | return "suspend_noirq"; | ||
149 | case SUSPEND_RESUME_NOIRQ: | ||
150 | return "resume_noirq"; | ||
151 | case SUSPEND_RESUME: | ||
152 | return "resume"; | ||
153 | default: | ||
154 | return ""; | ||
155 | } | ||
156 | } | ||
157 | |||
158 | static int suspend_stats_show(struct seq_file *s, void *unused) | ||
159 | { | ||
160 | int i, index, last_dev, last_errno, last_step; | ||
161 | |||
162 | last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1; | ||
163 | last_dev %= REC_FAILED_NUM; | ||
164 | last_errno = suspend_stats.last_failed_errno + REC_FAILED_NUM - 1; | ||
165 | last_errno %= REC_FAILED_NUM; | ||
166 | last_step = suspend_stats.last_failed_step + REC_FAILED_NUM - 1; | ||
167 | last_step %= REC_FAILED_NUM; | ||
168 | seq_printf(s, "%s: %d\n%s: %d\n%s: %d\n%s: %d\n" | ||
169 | "%s: %d\n%s: %d\n%s: %d\n%s: %d\n", | ||
170 | "success", suspend_stats.success, | ||
171 | "fail", suspend_stats.fail, | ||
172 | "failed_freeze", suspend_stats.failed_freeze, | ||
173 | "failed_prepare", suspend_stats.failed_prepare, | ||
174 | "failed_suspend", suspend_stats.failed_suspend, | ||
175 | "failed_suspend_noirq", | ||
176 | suspend_stats.failed_suspend_noirq, | ||
177 | "failed_resume", suspend_stats.failed_resume, | ||
178 | "failed_resume_noirq", | ||
179 | suspend_stats.failed_resume_noirq); | ||
180 | seq_printf(s, "failures:\n last_failed_dev:\t%-s\n", | ||
181 | suspend_stats.failed_devs[last_dev]); | ||
182 | for (i = 1; i < REC_FAILED_NUM; i++) { | ||
183 | index = last_dev + REC_FAILED_NUM - i; | ||
184 | index %= REC_FAILED_NUM; | ||
185 | seq_printf(s, "\t\t\t%-s\n", | ||
186 | suspend_stats.failed_devs[index]); | ||
187 | } | ||
188 | seq_printf(s, " last_failed_errno:\t%-d\n", | ||
189 | suspend_stats.errno[last_errno]); | ||
190 | for (i = 1; i < REC_FAILED_NUM; i++) { | ||
191 | index = last_errno + REC_FAILED_NUM - i; | ||
192 | index %= REC_FAILED_NUM; | ||
193 | seq_printf(s, "\t\t\t%-d\n", | ||
194 | suspend_stats.errno[index]); | ||
195 | } | ||
196 | seq_printf(s, " last_failed_step:\t%-s\n", | ||
197 | suspend_step_name( | ||
198 | suspend_stats.failed_steps[last_step])); | ||
199 | for (i = 1; i < REC_FAILED_NUM; i++) { | ||
200 | index = last_step + REC_FAILED_NUM - i; | ||
201 | index %= REC_FAILED_NUM; | ||
202 | seq_printf(s, "\t\t\t%-s\n", | ||
203 | suspend_step_name( | ||
204 | suspend_stats.failed_steps[index])); | ||
205 | } | ||
206 | |||
207 | return 0; | ||
208 | } | ||
209 | |||
210 | static int suspend_stats_open(struct inode *inode, struct file *file) | ||
211 | { | ||
212 | return single_open(file, suspend_stats_show, NULL); | ||
213 | } | ||
214 | |||
215 | static const struct file_operations suspend_stats_operations = { | ||
216 | .open = suspend_stats_open, | ||
217 | .read = seq_read, | ||
218 | .llseek = seq_lseek, | ||
219 | .release = single_release, | ||
220 | }; | ||
221 | |||
222 | static int __init pm_debugfs_init(void) | ||
223 | { | ||
224 | debugfs_create_file("suspend_stats", S_IFREG | S_IRUGO, | ||
225 | NULL, NULL, &suspend_stats_operations); | ||
226 | return 0; | ||
227 | } | ||
228 | |||
229 | late_initcall(pm_debugfs_init); | ||
230 | #endif /* CONFIG_DEBUG_FS */ | ||
231 | |||
134 | #endif /* CONFIG_PM_SLEEP */ | 232 | #endif /* CONFIG_PM_SLEEP */ |
135 | 233 | ||
136 | struct kobject *power_kobj; | 234 | struct kobject *power_kobj; |
@@ -192,8 +290,14 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, | |||
192 | if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) | 290 | if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) |
193 | break; | 291 | break; |
194 | } | 292 | } |
195 | if (state < PM_SUSPEND_MAX && *s) | 293 | if (state < PM_SUSPEND_MAX && *s) { |
196 | error = enter_state(state); | 294 | error = enter_state(state); |
295 | if (error) { | ||
296 | suspend_stats.fail++; | ||
297 | dpm_save_failed_errno(error); | ||
298 | } else | ||
299 | suspend_stats.success++; | ||
300 | } | ||
197 | #endif | 301 | #endif |
198 | 302 | ||
199 | Exit: | 303 | Exit: |
diff --git a/kernel/power/power.h b/kernel/power/power.h index 9a00a0a2628..23a2db1ec44 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h | |||
@@ -146,6 +146,7 @@ extern int swsusp_swap_in_use(void); | |||
146 | */ | 146 | */ |
147 | #define SF_PLATFORM_MODE 1 | 147 | #define SF_PLATFORM_MODE 1 |
148 | #define SF_NOCOMPRESS_MODE 2 | 148 | #define SF_NOCOMPRESS_MODE 2 |
149 | #define SF_CRC32_MODE 4 | ||
149 | 150 | ||
150 | /* kernel/power/hibernate.c */ | 151 | /* kernel/power/hibernate.c */ |
151 | extern int swsusp_check(void); | 152 | extern int swsusp_check(void); |
@@ -228,7 +229,8 @@ extern int pm_test_level; | |||
228 | #ifdef CONFIG_SUSPEND_FREEZER | 229 | #ifdef CONFIG_SUSPEND_FREEZER |
229 | static inline int suspend_freeze_processes(void) | 230 | static inline int suspend_freeze_processes(void) |
230 | { | 231 | { |
231 | return freeze_processes(); | 232 | int error = freeze_processes(); |
233 | return error ? : freeze_kernel_threads(); | ||
232 | } | 234 | } |
233 | 235 | ||
234 | static inline void suspend_thaw_processes(void) | 236 | static inline void suspend_thaw_processes(void) |
diff --git a/kernel/power/process.c b/kernel/power/process.c index 0cf3a27a6c9..addbbe5531b 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c | |||
@@ -135,7 +135,7 @@ static int try_to_freeze_tasks(bool sig_only) | |||
135 | } | 135 | } |
136 | 136 | ||
137 | /** | 137 | /** |
138 | * freeze_processes - tell processes to enter the refrigerator | 138 | * freeze_processes - Signal user space processes to enter the refrigerator. |
139 | */ | 139 | */ |
140 | int freeze_processes(void) | 140 | int freeze_processes(void) |
141 | { | 141 | { |
@@ -143,20 +143,30 @@ int freeze_processes(void) | |||
143 | 143 | ||
144 | printk("Freezing user space processes ... "); | 144 | printk("Freezing user space processes ... "); |
145 | error = try_to_freeze_tasks(true); | 145 | error = try_to_freeze_tasks(true); |
146 | if (error) | 146 | if (!error) { |
147 | goto Exit; | 147 | printk("done."); |
148 | printk("done.\n"); | 148 | oom_killer_disable(); |
149 | } | ||
150 | printk("\n"); | ||
151 | BUG_ON(in_atomic()); | ||
152 | |||
153 | return error; | ||
154 | } | ||
155 | |||
156 | /** | ||
157 | * freeze_kernel_threads - Make freezable kernel threads go to the refrigerator. | ||
158 | */ | ||
159 | int freeze_kernel_threads(void) | ||
160 | { | ||
161 | int error; | ||
149 | 162 | ||
150 | printk("Freezing remaining freezable tasks ... "); | 163 | printk("Freezing remaining freezable tasks ... "); |
151 | error = try_to_freeze_tasks(false); | 164 | error = try_to_freeze_tasks(false); |
152 | if (error) | 165 | if (!error) |
153 | goto Exit; | 166 | printk("done."); |
154 | printk("done."); | ||
155 | 167 | ||
156 | oom_killer_disable(); | ||
157 | Exit: | ||
158 | BUG_ON(in_atomic()); | ||
159 | printk("\n"); | 168 | printk("\n"); |
169 | BUG_ON(in_atomic()); | ||
160 | 170 | ||
161 | return error; | 171 | return error; |
162 | } | 172 | } |
diff --git a/kernel/pm_qos_params.c b/kernel/power/qos.c index 37f05d0f079..995e3bd3417 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/power/qos.c | |||
@@ -29,7 +29,7 @@ | |||
29 | 29 | ||
30 | /*#define DEBUG*/ | 30 | /*#define DEBUG*/ |
31 | 31 | ||
32 | #include <linux/pm_qos_params.h> | 32 | #include <linux/pm_qos.h> |
33 | #include <linux/sched.h> | 33 | #include <linux/sched.h> |
34 | #include <linux/spinlock.h> | 34 | #include <linux/spinlock.h> |
35 | #include <linux/slab.h> | 35 | #include <linux/slab.h> |
@@ -43,64 +43,61 @@ | |||
43 | #include <linux/kernel.h> | 43 | #include <linux/kernel.h> |
44 | 44 | ||
45 | #include <linux/uaccess.h> | 45 | #include <linux/uaccess.h> |
46 | #include <linux/export.h> | ||
46 | 47 | ||
47 | /* | 48 | /* |
48 | * locking rule: all changes to requests or notifiers lists | 49 | * locking rule: all changes to constraints or notifiers lists |
49 | * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock | 50 | * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock |
50 | * held, taken with _irqsave. One lock to rule them all | 51 | * held, taken with _irqsave. One lock to rule them all |
51 | */ | 52 | */ |
52 | enum pm_qos_type { | ||
53 | PM_QOS_MAX, /* return the largest value */ | ||
54 | PM_QOS_MIN /* return the smallest value */ | ||
55 | }; | ||
56 | |||
57 | /* | ||
58 | * Note: The lockless read path depends on the CPU accessing | ||
59 | * target_value atomically. Atomic access is only guaranteed on all CPU | ||
60 | * types linux supports for 32 bit quantites | ||
61 | */ | ||
62 | struct pm_qos_object { | 53 | struct pm_qos_object { |
63 | struct plist_head requests; | 54 | struct pm_qos_constraints *constraints; |
64 | struct blocking_notifier_head *notifiers; | ||
65 | struct miscdevice pm_qos_power_miscdev; | 55 | struct miscdevice pm_qos_power_miscdev; |
66 | char *name; | 56 | char *name; |
67 | s32 target_value; /* Do not change to 64 bit */ | ||
68 | s32 default_value; | ||
69 | enum pm_qos_type type; | ||
70 | }; | 57 | }; |
71 | 58 | ||
72 | static DEFINE_SPINLOCK(pm_qos_lock); | 59 | static DEFINE_SPINLOCK(pm_qos_lock); |
73 | 60 | ||
74 | static struct pm_qos_object null_pm_qos; | 61 | static struct pm_qos_object null_pm_qos; |
62 | |||
75 | static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier); | 63 | static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier); |
76 | static struct pm_qos_object cpu_dma_pm_qos = { | 64 | static struct pm_qos_constraints cpu_dma_constraints = { |
77 | .requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests), | 65 | .list = PLIST_HEAD_INIT(cpu_dma_constraints.list), |
78 | .notifiers = &cpu_dma_lat_notifier, | ||
79 | .name = "cpu_dma_latency", | ||
80 | .target_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, | 66 | .target_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, |
81 | .default_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, | 67 | .default_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, |
82 | .type = PM_QOS_MIN, | 68 | .type = PM_QOS_MIN, |
69 | .notifiers = &cpu_dma_lat_notifier, | ||
70 | }; | ||
71 | static struct pm_qos_object cpu_dma_pm_qos = { | ||
72 | .constraints = &cpu_dma_constraints, | ||
73 | .name = "cpu_dma_latency", | ||
83 | }; | 74 | }; |
84 | 75 | ||
85 | static BLOCKING_NOTIFIER_HEAD(network_lat_notifier); | 76 | static BLOCKING_NOTIFIER_HEAD(network_lat_notifier); |
86 | static struct pm_qos_object network_lat_pm_qos = { | 77 | static struct pm_qos_constraints network_lat_constraints = { |
87 | .requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests), | 78 | .list = PLIST_HEAD_INIT(network_lat_constraints.list), |
88 | .notifiers = &network_lat_notifier, | ||
89 | .name = "network_latency", | ||
90 | .target_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, | 79 | .target_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, |
91 | .default_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, | 80 | .default_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, |
92 | .type = PM_QOS_MIN | 81 | .type = PM_QOS_MIN, |
82 | .notifiers = &network_lat_notifier, | ||
83 | }; | ||
84 | static struct pm_qos_object network_lat_pm_qos = { | ||
85 | .constraints = &network_lat_constraints, | ||
86 | .name = "network_latency", | ||
93 | }; | 87 | }; |
94 | 88 | ||
95 | 89 | ||
96 | static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier); | 90 | static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier); |
97 | static struct pm_qos_object network_throughput_pm_qos = { | 91 | static struct pm_qos_constraints network_tput_constraints = { |
98 | .requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests), | 92 | .list = PLIST_HEAD_INIT(network_tput_constraints.list), |
99 | .notifiers = &network_throughput_notifier, | ||
100 | .name = "network_throughput", | ||
101 | .target_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, | 93 | .target_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, |
102 | .default_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, | 94 | .default_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, |
103 | .type = PM_QOS_MAX, | 95 | .type = PM_QOS_MAX, |
96 | .notifiers = &network_throughput_notifier, | ||
97 | }; | ||
98 | static struct pm_qos_object network_throughput_pm_qos = { | ||
99 | .constraints = &network_tput_constraints, | ||
100 | .name = "network_throughput", | ||
104 | }; | 101 | }; |
105 | 102 | ||
106 | 103 | ||
@@ -127,17 +124,17 @@ static const struct file_operations pm_qos_power_fops = { | |||
127 | }; | 124 | }; |
128 | 125 | ||
129 | /* unlocked internal variant */ | 126 | /* unlocked internal variant */ |
130 | static inline int pm_qos_get_value(struct pm_qos_object *o) | 127 | static inline int pm_qos_get_value(struct pm_qos_constraints *c) |
131 | { | 128 | { |
132 | if (plist_head_empty(&o->requests)) | 129 | if (plist_head_empty(&c->list)) |
133 | return o->default_value; | 130 | return c->default_value; |
134 | 131 | ||
135 | switch (o->type) { | 132 | switch (c->type) { |
136 | case PM_QOS_MIN: | 133 | case PM_QOS_MIN: |
137 | return plist_first(&o->requests)->prio; | 134 | return plist_first(&c->list)->prio; |
138 | 135 | ||
139 | case PM_QOS_MAX: | 136 | case PM_QOS_MAX: |
140 | return plist_last(&o->requests)->prio; | 137 | return plist_last(&c->list)->prio; |
141 | 138 | ||
142 | default: | 139 | default: |
143 | /* runtime check for not using enum */ | 140 | /* runtime check for not using enum */ |
@@ -145,69 +142,73 @@ static inline int pm_qos_get_value(struct pm_qos_object *o) | |||
145 | } | 142 | } |
146 | } | 143 | } |
147 | 144 | ||
148 | static inline s32 pm_qos_read_value(struct pm_qos_object *o) | 145 | s32 pm_qos_read_value(struct pm_qos_constraints *c) |
149 | { | 146 | { |
150 | return o->target_value; | 147 | return c->target_value; |
151 | } | 148 | } |
152 | 149 | ||
153 | static inline void pm_qos_set_value(struct pm_qos_object *o, s32 value) | 150 | static inline void pm_qos_set_value(struct pm_qos_constraints *c, s32 value) |
154 | { | 151 | { |
155 | o->target_value = value; | 152 | c->target_value = value; |
156 | } | 153 | } |
157 | 154 | ||
158 | static void update_target(struct pm_qos_object *o, struct plist_node *node, | 155 | /** |
159 | int del, int value) | 156 | * pm_qos_update_target - manages the constraints list and calls the notifiers |
157 | * if needed | ||
158 | * @c: constraints data struct | ||
159 | * @node: request to add to the list, to update or to remove | ||
160 | * @action: action to take on the constraints list | ||
161 | * @value: value of the request to add or update | ||
162 | * | ||
163 | * This function returns 1 if the aggregated constraint value has changed, 0 | ||
164 | * otherwise. | ||
165 | */ | ||
166 | int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node, | ||
167 | enum pm_qos_req_action action, int value) | ||
160 | { | 168 | { |
161 | unsigned long flags; | 169 | unsigned long flags; |
162 | int prev_value, curr_value; | 170 | int prev_value, curr_value, new_value; |
163 | 171 | ||
164 | spin_lock_irqsave(&pm_qos_lock, flags); | 172 | spin_lock_irqsave(&pm_qos_lock, flags); |
165 | prev_value = pm_qos_get_value(o); | 173 | prev_value = pm_qos_get_value(c); |
166 | /* PM_QOS_DEFAULT_VALUE is a signal that the value is unchanged */ | 174 | if (value == PM_QOS_DEFAULT_VALUE) |
167 | if (value != PM_QOS_DEFAULT_VALUE) { | 175 | new_value = c->default_value; |
176 | else | ||
177 | new_value = value; | ||
178 | |||
179 | switch (action) { | ||
180 | case PM_QOS_REMOVE_REQ: | ||
181 | plist_del(node, &c->list); | ||
182 | break; | ||
183 | case PM_QOS_UPDATE_REQ: | ||
168 | /* | 184 | /* |
169 | * to change the list, we atomically remove, reinit | 185 | * to change the list, we atomically remove, reinit |
170 | * with new value and add, then see if the extremal | 186 | * with new value and add, then see if the extremal |
171 | * changed | 187 | * changed |
172 | */ | 188 | */ |
173 | plist_del(node, &o->requests); | 189 | plist_del(node, &c->list); |
174 | plist_node_init(node, value); | 190 | case PM_QOS_ADD_REQ: |
175 | plist_add(node, &o->requests); | 191 | plist_node_init(node, new_value); |
176 | } else if (del) { | 192 | plist_add(node, &c->list); |
177 | plist_del(node, &o->requests); | 193 | break; |
178 | } else { | 194 | default: |
179 | plist_add(node, &o->requests); | 195 | /* no action */ |
196 | ; | ||
180 | } | 197 | } |
181 | curr_value = pm_qos_get_value(o); | 198 | |
182 | pm_qos_set_value(o, curr_value); | 199 | curr_value = pm_qos_get_value(c); |
200 | pm_qos_set_value(c, curr_value); | ||
201 | |||
183 | spin_unlock_irqrestore(&pm_qos_lock, flags); | 202 | spin_unlock_irqrestore(&pm_qos_lock, flags); |
184 | 203 | ||
185 | if (prev_value != curr_value) | 204 | if (prev_value != curr_value) { |
186 | blocking_notifier_call_chain(o->notifiers, | 205 | blocking_notifier_call_chain(c->notifiers, |
187 | (unsigned long)curr_value, | 206 | (unsigned long)curr_value, |
188 | NULL); | 207 | NULL); |
189 | } | 208 | return 1; |
190 | 209 | } else { | |
191 | static int register_pm_qos_misc(struct pm_qos_object *qos) | 210 | return 0; |
192 | { | ||
193 | qos->pm_qos_power_miscdev.minor = MISC_DYNAMIC_MINOR; | ||
194 | qos->pm_qos_power_miscdev.name = qos->name; | ||
195 | qos->pm_qos_power_miscdev.fops = &pm_qos_power_fops; | ||
196 | |||
197 | return misc_register(&qos->pm_qos_power_miscdev); | ||
198 | } | ||
199 | |||
200 | static int find_pm_qos_object_by_minor(int minor) | ||
201 | { | ||
202 | int pm_qos_class; | ||
203 | |||
204 | for (pm_qos_class = 0; | ||
205 | pm_qos_class < PM_QOS_NUM_CLASSES; pm_qos_class++) { | ||
206 | if (minor == | ||
207 | pm_qos_array[pm_qos_class]->pm_qos_power_miscdev.minor) | ||
208 | return pm_qos_class; | ||
209 | } | 211 | } |
210 | return -1; | ||
211 | } | 212 | } |
212 | 213 | ||
213 | /** | 214 | /** |
@@ -218,11 +219,11 @@ static int find_pm_qos_object_by_minor(int minor) | |||
218 | */ | 219 | */ |
219 | int pm_qos_request(int pm_qos_class) | 220 | int pm_qos_request(int pm_qos_class) |
220 | { | 221 | { |
221 | return pm_qos_read_value(pm_qos_array[pm_qos_class]); | 222 | return pm_qos_read_value(pm_qos_array[pm_qos_class]->constraints); |
222 | } | 223 | } |
223 | EXPORT_SYMBOL_GPL(pm_qos_request); | 224 | EXPORT_SYMBOL_GPL(pm_qos_request); |
224 | 225 | ||
225 | int pm_qos_request_active(struct pm_qos_request_list *req) | 226 | int pm_qos_request_active(struct pm_qos_request *req) |
226 | { | 227 | { |
227 | return req->pm_qos_class != 0; | 228 | return req->pm_qos_class != 0; |
228 | } | 229 | } |
@@ -230,40 +231,36 @@ EXPORT_SYMBOL_GPL(pm_qos_request_active); | |||
230 | 231 | ||
231 | /** | 232 | /** |
232 | * pm_qos_add_request - inserts new qos request into the list | 233 | * pm_qos_add_request - inserts new qos request into the list |
233 | * @dep: pointer to a preallocated handle | 234 | * @req: pointer to a preallocated handle |
234 | * @pm_qos_class: identifies which list of qos request to use | 235 | * @pm_qos_class: identifies which list of qos request to use |
235 | * @value: defines the qos request | 236 | * @value: defines the qos request |
236 | * | 237 | * |
237 | * This function inserts a new entry in the pm_qos_class list of requested qos | 238 | * This function inserts a new entry in the pm_qos_class list of requested qos |
238 | * performance characteristics. It recomputes the aggregate QoS expectations | 239 | * performance characteristics. It recomputes the aggregate QoS expectations |
239 | * for the pm_qos_class of parameters and initializes the pm_qos_request_list | 240 | * for the pm_qos_class of parameters and initializes the pm_qos_request |
240 | * handle. Caller needs to save this handle for later use in updates and | 241 | * handle. Caller needs to save this handle for later use in updates and |
241 | * removal. | 242 | * removal. |
242 | */ | 243 | */ |
243 | 244 | ||
244 | void pm_qos_add_request(struct pm_qos_request_list *dep, | 245 | void pm_qos_add_request(struct pm_qos_request *req, |
245 | int pm_qos_class, s32 value) | 246 | int pm_qos_class, s32 value) |
246 | { | 247 | { |
247 | struct pm_qos_object *o = pm_qos_array[pm_qos_class]; | 248 | if (!req) /*guard against callers passing in null */ |
248 | int new_value; | 249 | return; |
249 | 250 | ||
250 | if (pm_qos_request_active(dep)) { | 251 | if (pm_qos_request_active(req)) { |
251 | WARN(1, KERN_ERR "pm_qos_add_request() called for already added request\n"); | 252 | WARN(1, KERN_ERR "pm_qos_add_request() called for already added request\n"); |
252 | return; | 253 | return; |
253 | } | 254 | } |
254 | if (value == PM_QOS_DEFAULT_VALUE) | 255 | req->pm_qos_class = pm_qos_class; |
255 | new_value = o->default_value; | 256 | pm_qos_update_target(pm_qos_array[pm_qos_class]->constraints, |
256 | else | 257 | &req->node, PM_QOS_ADD_REQ, value); |
257 | new_value = value; | ||
258 | plist_node_init(&dep->list, new_value); | ||
259 | dep->pm_qos_class = pm_qos_class; | ||
260 | update_target(o, &dep->list, 0, PM_QOS_DEFAULT_VALUE); | ||
261 | } | 258 | } |
262 | EXPORT_SYMBOL_GPL(pm_qos_add_request); | 259 | EXPORT_SYMBOL_GPL(pm_qos_add_request); |
263 | 260 | ||
264 | /** | 261 | /** |
265 | * pm_qos_update_request - modifies an existing qos request | 262 | * pm_qos_update_request - modifies an existing qos request |
266 | * @pm_qos_req : handle to list element holding a pm_qos request to use | 263 | * @req : handle to list element holding a pm_qos request to use |
267 | * @value: defines the qos request | 264 | * @value: defines the qos request |
268 | * | 265 | * |
269 | * Updates an existing qos request for the pm_qos_class of parameters along | 266 | * Updates an existing qos request for the pm_qos_class of parameters along |
@@ -271,56 +268,47 @@ EXPORT_SYMBOL_GPL(pm_qos_add_request); | |||
271 | * | 268 | * |
272 | * Attempts are made to make this code callable on hot code paths. | 269 | * Attempts are made to make this code callable on hot code paths. |
273 | */ | 270 | */ |
274 | void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req, | 271 | void pm_qos_update_request(struct pm_qos_request *req, |
275 | s32 new_value) | 272 | s32 new_value) |
276 | { | 273 | { |
277 | s32 temp; | 274 | if (!req) /*guard against callers passing in null */ |
278 | struct pm_qos_object *o; | ||
279 | |||
280 | if (!pm_qos_req) /*guard against callers passing in null */ | ||
281 | return; | 275 | return; |
282 | 276 | ||
283 | if (!pm_qos_request_active(pm_qos_req)) { | 277 | if (!pm_qos_request_active(req)) { |
284 | WARN(1, KERN_ERR "pm_qos_update_request() called for unknown object\n"); | 278 | WARN(1, KERN_ERR "pm_qos_update_request() called for unknown object\n"); |
285 | return; | 279 | return; |
286 | } | 280 | } |
287 | 281 | ||
288 | o = pm_qos_array[pm_qos_req->pm_qos_class]; | 282 | if (new_value != req->node.prio) |
289 | 283 | pm_qos_update_target( | |
290 | if (new_value == PM_QOS_DEFAULT_VALUE) | 284 | pm_qos_array[req->pm_qos_class]->constraints, |
291 | temp = o->default_value; | 285 | &req->node, PM_QOS_UPDATE_REQ, new_value); |
292 | else | ||
293 | temp = new_value; | ||
294 | |||
295 | if (temp != pm_qos_req->list.prio) | ||
296 | update_target(o, &pm_qos_req->list, 0, temp); | ||
297 | } | 286 | } |
298 | EXPORT_SYMBOL_GPL(pm_qos_update_request); | 287 | EXPORT_SYMBOL_GPL(pm_qos_update_request); |
299 | 288 | ||
300 | /** | 289 | /** |
301 | * pm_qos_remove_request - modifies an existing qos request | 290 | * pm_qos_remove_request - modifies an existing qos request |
302 | * @pm_qos_req: handle to request list element | 291 | * @req: handle to request list element |
303 | * | 292 | * |
304 | * Will remove pm qos request from the list of requests and | 293 | * Will remove pm qos request from the list of constraints and |
305 | * recompute the current target value for the pm_qos_class. Call this | 294 | * recompute the current target value for the pm_qos_class. Call this |
306 | * on slow code paths. | 295 | * on slow code paths. |
307 | */ | 296 | */ |
308 | void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req) | 297 | void pm_qos_remove_request(struct pm_qos_request *req) |
309 | { | 298 | { |
310 | struct pm_qos_object *o; | 299 | if (!req) /*guard against callers passing in null */ |
311 | |||
312 | if (pm_qos_req == NULL) | ||
313 | return; | 300 | return; |
314 | /* silent return to keep pcm code cleaner */ | 301 | /* silent return to keep pcm code cleaner */ |
315 | 302 | ||
316 | if (!pm_qos_request_active(pm_qos_req)) { | 303 | if (!pm_qos_request_active(req)) { |
317 | WARN(1, KERN_ERR "pm_qos_remove_request() called for unknown object\n"); | 304 | WARN(1, KERN_ERR "pm_qos_remove_request() called for unknown object\n"); |
318 | return; | 305 | return; |
319 | } | 306 | } |
320 | 307 | ||
321 | o = pm_qos_array[pm_qos_req->pm_qos_class]; | 308 | pm_qos_update_target(pm_qos_array[req->pm_qos_class]->constraints, |
322 | update_target(o, &pm_qos_req->list, 1, PM_QOS_DEFAULT_VALUE); | 309 | &req->node, PM_QOS_REMOVE_REQ, |
323 | memset(pm_qos_req, 0, sizeof(*pm_qos_req)); | 310 | PM_QOS_DEFAULT_VALUE); |
311 | memset(req, 0, sizeof(*req)); | ||
324 | } | 312 | } |
325 | EXPORT_SYMBOL_GPL(pm_qos_remove_request); | 313 | EXPORT_SYMBOL_GPL(pm_qos_remove_request); |
326 | 314 | ||
@@ -337,7 +325,8 @@ int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier) | |||
337 | int retval; | 325 | int retval; |
338 | 326 | ||
339 | retval = blocking_notifier_chain_register( | 327 | retval = blocking_notifier_chain_register( |
340 | pm_qos_array[pm_qos_class]->notifiers, notifier); | 328 | pm_qos_array[pm_qos_class]->constraints->notifiers, |
329 | notifier); | ||
341 | 330 | ||
342 | return retval; | 331 | return retval; |
343 | } | 332 | } |
@@ -356,34 +345,57 @@ int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier) | |||
356 | int retval; | 345 | int retval; |
357 | 346 | ||
358 | retval = blocking_notifier_chain_unregister( | 347 | retval = blocking_notifier_chain_unregister( |
359 | pm_qos_array[pm_qos_class]->notifiers, notifier); | 348 | pm_qos_array[pm_qos_class]->constraints->notifiers, |
349 | notifier); | ||
360 | 350 | ||
361 | return retval; | 351 | return retval; |
362 | } | 352 | } |
363 | EXPORT_SYMBOL_GPL(pm_qos_remove_notifier); | 353 | EXPORT_SYMBOL_GPL(pm_qos_remove_notifier); |
364 | 354 | ||
355 | /* User space interface to PM QoS classes via misc devices */ | ||
356 | static int register_pm_qos_misc(struct pm_qos_object *qos) | ||
357 | { | ||
358 | qos->pm_qos_power_miscdev.minor = MISC_DYNAMIC_MINOR; | ||
359 | qos->pm_qos_power_miscdev.name = qos->name; | ||
360 | qos->pm_qos_power_miscdev.fops = &pm_qos_power_fops; | ||
361 | |||
362 | return misc_register(&qos->pm_qos_power_miscdev); | ||
363 | } | ||
364 | |||
365 | static int find_pm_qos_object_by_minor(int minor) | ||
366 | { | ||
367 | int pm_qos_class; | ||
368 | |||
369 | for (pm_qos_class = 0; | ||
370 | pm_qos_class < PM_QOS_NUM_CLASSES; pm_qos_class++) { | ||
371 | if (minor == | ||
372 | pm_qos_array[pm_qos_class]->pm_qos_power_miscdev.minor) | ||
373 | return pm_qos_class; | ||
374 | } | ||
375 | return -1; | ||
376 | } | ||
377 | |||
365 | static int pm_qos_power_open(struct inode *inode, struct file *filp) | 378 | static int pm_qos_power_open(struct inode *inode, struct file *filp) |
366 | { | 379 | { |
367 | long pm_qos_class; | 380 | long pm_qos_class; |
368 | 381 | ||
369 | pm_qos_class = find_pm_qos_object_by_minor(iminor(inode)); | 382 | pm_qos_class = find_pm_qos_object_by_minor(iminor(inode)); |
370 | if (pm_qos_class >= 0) { | 383 | if (pm_qos_class >= 0) { |
371 | struct pm_qos_request_list *req = kzalloc(sizeof(*req), GFP_KERNEL); | 384 | struct pm_qos_request *req = kzalloc(sizeof(*req), GFP_KERNEL); |
372 | if (!req) | 385 | if (!req) |
373 | return -ENOMEM; | 386 | return -ENOMEM; |
374 | 387 | ||
375 | pm_qos_add_request(req, pm_qos_class, PM_QOS_DEFAULT_VALUE); | 388 | pm_qos_add_request(req, pm_qos_class, PM_QOS_DEFAULT_VALUE); |
376 | filp->private_data = req; | 389 | filp->private_data = req; |
377 | 390 | ||
378 | if (filp->private_data) | 391 | return 0; |
379 | return 0; | ||
380 | } | 392 | } |
381 | return -EPERM; | 393 | return -EPERM; |
382 | } | 394 | } |
383 | 395 | ||
384 | static int pm_qos_power_release(struct inode *inode, struct file *filp) | 396 | static int pm_qos_power_release(struct inode *inode, struct file *filp) |
385 | { | 397 | { |
386 | struct pm_qos_request_list *req; | 398 | struct pm_qos_request *req; |
387 | 399 | ||
388 | req = filp->private_data; | 400 | req = filp->private_data; |
389 | pm_qos_remove_request(req); | 401 | pm_qos_remove_request(req); |
@@ -398,17 +410,15 @@ static ssize_t pm_qos_power_read(struct file *filp, char __user *buf, | |||
398 | { | 410 | { |
399 | s32 value; | 411 | s32 value; |
400 | unsigned long flags; | 412 | unsigned long flags; |
401 | struct pm_qos_object *o; | 413 | struct pm_qos_request *req = filp->private_data; |
402 | struct pm_qos_request_list *pm_qos_req = filp->private_data; | ||
403 | 414 | ||
404 | if (!pm_qos_req) | 415 | if (!req) |
405 | return -EINVAL; | 416 | return -EINVAL; |
406 | if (!pm_qos_request_active(pm_qos_req)) | 417 | if (!pm_qos_request_active(req)) |
407 | return -EINVAL; | 418 | return -EINVAL; |
408 | 419 | ||
409 | o = pm_qos_array[pm_qos_req->pm_qos_class]; | ||
410 | spin_lock_irqsave(&pm_qos_lock, flags); | 420 | spin_lock_irqsave(&pm_qos_lock, flags); |
411 | value = pm_qos_get_value(o); | 421 | value = pm_qos_get_value(pm_qos_array[req->pm_qos_class]->constraints); |
412 | spin_unlock_irqrestore(&pm_qos_lock, flags); | 422 | spin_unlock_irqrestore(&pm_qos_lock, flags); |
413 | 423 | ||
414 | return simple_read_from_buffer(buf, count, f_pos, &value, sizeof(s32)); | 424 | return simple_read_from_buffer(buf, count, f_pos, &value, sizeof(s32)); |
@@ -418,7 +428,7 @@ static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, | |||
418 | size_t count, loff_t *f_pos) | 428 | size_t count, loff_t *f_pos) |
419 | { | 429 | { |
420 | s32 value; | 430 | s32 value; |
421 | struct pm_qos_request_list *pm_qos_req; | 431 | struct pm_qos_request *req; |
422 | 432 | ||
423 | if (count == sizeof(s32)) { | 433 | if (count == sizeof(s32)) { |
424 | if (copy_from_user(&value, buf, sizeof(s32))) | 434 | if (copy_from_user(&value, buf, sizeof(s32))) |
@@ -449,8 +459,8 @@ static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, | |||
449 | return -EINVAL; | 459 | return -EINVAL; |
450 | } | 460 | } |
451 | 461 | ||
452 | pm_qos_req = filp->private_data; | 462 | req = filp->private_data; |
453 | pm_qos_update_request(pm_qos_req, value); | 463 | pm_qos_update_request(req, value); |
454 | 464 | ||
455 | return count; | 465 | return count; |
456 | } | 466 | } |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 06efa54f93d..cbe2c144139 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -1339,6 +1339,9 @@ int hibernate_preallocate_memory(void) | |||
1339 | count += highmem; | 1339 | count += highmem; |
1340 | count -= totalreserve_pages; | 1340 | count -= totalreserve_pages; |
1341 | 1341 | ||
1342 | /* Add number of pages required for page keys (s390 only). */ | ||
1343 | size += page_key_additional_pages(saveable); | ||
1344 | |||
1342 | /* Compute the maximum number of saveable pages to leave in memory. */ | 1345 | /* Compute the maximum number of saveable pages to leave in memory. */ |
1343 | max_size = (count - (size + PAGES_FOR_IO)) / 2 | 1346 | max_size = (count - (size + PAGES_FOR_IO)) / 2 |
1344 | - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE); | 1347 | - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE); |
@@ -1662,6 +1665,8 @@ pack_pfns(unsigned long *buf, struct memory_bitmap *bm) | |||
1662 | buf[j] = memory_bm_next_pfn(bm); | 1665 | buf[j] = memory_bm_next_pfn(bm); |
1663 | if (unlikely(buf[j] == BM_END_OF_MAP)) | 1666 | if (unlikely(buf[j] == BM_END_OF_MAP)) |
1664 | break; | 1667 | break; |
1668 | /* Save page key for data page (s390 only). */ | ||
1669 | page_key_read(buf + j); | ||
1665 | } | 1670 | } |
1666 | } | 1671 | } |
1667 | 1672 | ||
@@ -1821,6 +1826,9 @@ static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) | |||
1821 | if (unlikely(buf[j] == BM_END_OF_MAP)) | 1826 | if (unlikely(buf[j] == BM_END_OF_MAP)) |
1822 | break; | 1827 | break; |
1823 | 1828 | ||
1829 | /* Extract and buffer page key for data page (s390 only). */ | ||
1830 | page_key_memorize(buf + j); | ||
1831 | |||
1824 | if (memory_bm_pfn_present(bm, buf[j])) | 1832 | if (memory_bm_pfn_present(bm, buf[j])) |
1825 | memory_bm_set_bit(bm, buf[j]); | 1833 | memory_bm_set_bit(bm, buf[j]); |
1826 | else | 1834 | else |
@@ -2223,6 +2231,11 @@ int snapshot_write_next(struct snapshot_handle *handle) | |||
2223 | if (error) | 2231 | if (error) |
2224 | return error; | 2232 | return error; |
2225 | 2233 | ||
2234 | /* Allocate buffer for page keys. */ | ||
2235 | error = page_key_alloc(nr_copy_pages); | ||
2236 | if (error) | ||
2237 | return error; | ||
2238 | |||
2226 | } else if (handle->cur <= nr_meta_pages + 1) { | 2239 | } else if (handle->cur <= nr_meta_pages + 1) { |
2227 | error = unpack_orig_pfns(buffer, ©_bm); | 2240 | error = unpack_orig_pfns(buffer, ©_bm); |
2228 | if (error) | 2241 | if (error) |
@@ -2243,6 +2256,8 @@ int snapshot_write_next(struct snapshot_handle *handle) | |||
2243 | } | 2256 | } |
2244 | } else { | 2257 | } else { |
2245 | copy_last_highmem_page(); | 2258 | copy_last_highmem_page(); |
2259 | /* Restore page key for data page (s390 only). */ | ||
2260 | page_key_write(handle->buffer); | ||
2246 | handle->buffer = get_buffer(&orig_bm, &ca); | 2261 | handle->buffer = get_buffer(&orig_bm, &ca); |
2247 | if (IS_ERR(handle->buffer)) | 2262 | if (IS_ERR(handle->buffer)) |
2248 | return PTR_ERR(handle->buffer); | 2263 | return PTR_ERR(handle->buffer); |
@@ -2264,6 +2279,9 @@ int snapshot_write_next(struct snapshot_handle *handle) | |||
2264 | void snapshot_write_finalize(struct snapshot_handle *handle) | 2279 | void snapshot_write_finalize(struct snapshot_handle *handle) |
2265 | { | 2280 | { |
2266 | copy_last_highmem_page(); | 2281 | copy_last_highmem_page(); |
2282 | /* Restore page key for data page (s390 only). */ | ||
2283 | page_key_write(handle->buffer); | ||
2284 | page_key_free(); | ||
2267 | /* Free only if we have loaded the image entirely */ | 2285 | /* Free only if we have loaded the image entirely */ |
2268 | if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) { | 2286 | if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) { |
2269 | memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR); | 2287 | memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR); |
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index b6b71ad2208..4953dc054c5 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/delay.h> | 12 | #include <linux/delay.h> |
13 | #include <linux/errno.h> | 13 | #include <linux/errno.h> |
14 | #include <linux/init.h> | 14 | #include <linux/init.h> |
15 | #include <linux/kmod.h> | ||
15 | #include <linux/console.h> | 16 | #include <linux/console.h> |
16 | #include <linux/cpu.h> | 17 | #include <linux/cpu.h> |
17 | #include <linux/syscalls.h> | 18 | #include <linux/syscalls.h> |
@@ -21,6 +22,7 @@ | |||
21 | #include <linux/list.h> | 22 | #include <linux/list.h> |
22 | #include <linux/mm.h> | 23 | #include <linux/mm.h> |
23 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/export.h> | ||
24 | #include <linux/suspend.h> | 26 | #include <linux/suspend.h> |
25 | #include <linux/syscore_ops.h> | 27 | #include <linux/syscore_ops.h> |
26 | #include <trace/events/power.h> | 28 | #include <trace/events/power.h> |
@@ -104,7 +106,10 @@ static int suspend_prepare(void) | |||
104 | goto Finish; | 106 | goto Finish; |
105 | 107 | ||
106 | error = suspend_freeze_processes(); | 108 | error = suspend_freeze_processes(); |
107 | if (!error) | 109 | if (error) { |
110 | suspend_stats.failed_freeze++; | ||
111 | dpm_save_failed_step(SUSPEND_FREEZE); | ||
112 | } else | ||
108 | return 0; | 113 | return 0; |
109 | 114 | ||
110 | suspend_thaw_processes(); | 115 | suspend_thaw_processes(); |
@@ -315,8 +320,16 @@ int enter_state(suspend_state_t state) | |||
315 | */ | 320 | */ |
316 | int pm_suspend(suspend_state_t state) | 321 | int pm_suspend(suspend_state_t state) |
317 | { | 322 | { |
318 | if (state > PM_SUSPEND_ON && state <= PM_SUSPEND_MAX) | 323 | int ret; |
319 | return enter_state(state); | 324 | if (state > PM_SUSPEND_ON && state < PM_SUSPEND_MAX) { |
325 | ret = enter_state(state); | ||
326 | if (ret) { | ||
327 | suspend_stats.fail++; | ||
328 | dpm_save_failed_errno(ret); | ||
329 | } else | ||
330 | suspend_stats.success++; | ||
331 | return ret; | ||
332 | } | ||
320 | return -EINVAL; | 333 | return -EINVAL; |
321 | } | 334 | } |
322 | EXPORT_SYMBOL(pm_suspend); | 335 | EXPORT_SYMBOL(pm_suspend); |
diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 7c97c3a0eee..11a594c4ba2 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c | |||
@@ -27,6 +27,10 @@ | |||
27 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
28 | #include <linux/lzo.h> | 28 | #include <linux/lzo.h> |
29 | #include <linux/vmalloc.h> | 29 | #include <linux/vmalloc.h> |
30 | #include <linux/cpumask.h> | ||
31 | #include <linux/atomic.h> | ||
32 | #include <linux/kthread.h> | ||
33 | #include <linux/crc32.h> | ||
30 | 34 | ||
31 | #include "power.h" | 35 | #include "power.h" |
32 | 36 | ||
@@ -43,8 +47,7 @@ | |||
43 | * allocated and populated one at a time, so we only need one memory | 47 | * allocated and populated one at a time, so we only need one memory |
44 | * page to set up the entire structure. | 48 | * page to set up the entire structure. |
45 | * | 49 | * |
46 | * During resume we also only need to use one swap_map_page structure | 50 | * During resume we pick up all swap_map_page structures into a list. |
47 | * at a time. | ||
48 | */ | 51 | */ |
49 | 52 | ||
50 | #define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(sector_t) - 1) | 53 | #define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(sector_t) - 1) |
@@ -54,6 +57,11 @@ struct swap_map_page { | |||
54 | sector_t next_swap; | 57 | sector_t next_swap; |
55 | }; | 58 | }; |
56 | 59 | ||
60 | struct swap_map_page_list { | ||
61 | struct swap_map_page *map; | ||
62 | struct swap_map_page_list *next; | ||
63 | }; | ||
64 | |||
57 | /** | 65 | /** |
58 | * The swap_map_handle structure is used for handling swap in | 66 | * The swap_map_handle structure is used for handling swap in |
59 | * a file-alike way | 67 | * a file-alike way |
@@ -61,13 +69,18 @@ struct swap_map_page { | |||
61 | 69 | ||
62 | struct swap_map_handle { | 70 | struct swap_map_handle { |
63 | struct swap_map_page *cur; | 71 | struct swap_map_page *cur; |
72 | struct swap_map_page_list *maps; | ||
64 | sector_t cur_swap; | 73 | sector_t cur_swap; |
65 | sector_t first_sector; | 74 | sector_t first_sector; |
66 | unsigned int k; | 75 | unsigned int k; |
76 | unsigned long nr_free_pages, written; | ||
77 | u32 crc32; | ||
67 | }; | 78 | }; |
68 | 79 | ||
69 | struct swsusp_header { | 80 | struct swsusp_header { |
70 | char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int)]; | 81 | char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int) - |
82 | sizeof(u32)]; | ||
83 | u32 crc32; | ||
71 | sector_t image; | 84 | sector_t image; |
72 | unsigned int flags; /* Flags to pass to the "boot" kernel */ | 85 | unsigned int flags; /* Flags to pass to the "boot" kernel */ |
73 | char orig_sig[10]; | 86 | char orig_sig[10]; |
@@ -199,6 +212,8 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags) | |||
199 | memcpy(swsusp_header->sig, HIBERNATE_SIG, 10); | 212 | memcpy(swsusp_header->sig, HIBERNATE_SIG, 10); |
200 | swsusp_header->image = handle->first_sector; | 213 | swsusp_header->image = handle->first_sector; |
201 | swsusp_header->flags = flags; | 214 | swsusp_header->flags = flags; |
215 | if (flags & SF_CRC32_MODE) | ||
216 | swsusp_header->crc32 = handle->crc32; | ||
202 | error = hib_bio_write_page(swsusp_resume_block, | 217 | error = hib_bio_write_page(swsusp_resume_block, |
203 | swsusp_header, NULL); | 218 | swsusp_header, NULL); |
204 | } else { | 219 | } else { |
@@ -245,6 +260,7 @@ static int swsusp_swap_check(void) | |||
245 | static int write_page(void *buf, sector_t offset, struct bio **bio_chain) | 260 | static int write_page(void *buf, sector_t offset, struct bio **bio_chain) |
246 | { | 261 | { |
247 | void *src; | 262 | void *src; |
263 | int ret; | ||
248 | 264 | ||
249 | if (!offset) | 265 | if (!offset) |
250 | return -ENOSPC; | 266 | return -ENOSPC; |
@@ -254,9 +270,17 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain) | |||
254 | if (src) { | 270 | if (src) { |
255 | copy_page(src, buf); | 271 | copy_page(src, buf); |
256 | } else { | 272 | } else { |
257 | WARN_ON_ONCE(1); | 273 | ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */ |
258 | bio_chain = NULL; /* Go synchronous */ | 274 | if (ret) |
259 | src = buf; | 275 | return ret; |
276 | src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); | ||
277 | if (src) { | ||
278 | copy_page(src, buf); | ||
279 | } else { | ||
280 | WARN_ON_ONCE(1); | ||
281 | bio_chain = NULL; /* Go synchronous */ | ||
282 | src = buf; | ||
283 | } | ||
260 | } | 284 | } |
261 | } else { | 285 | } else { |
262 | src = buf; | 286 | src = buf; |
@@ -293,6 +317,8 @@ static int get_swap_writer(struct swap_map_handle *handle) | |||
293 | goto err_rel; | 317 | goto err_rel; |
294 | } | 318 | } |
295 | handle->k = 0; | 319 | handle->k = 0; |
320 | handle->nr_free_pages = nr_free_pages() >> 1; | ||
321 | handle->written = 0; | ||
296 | handle->first_sector = handle->cur_swap; | 322 | handle->first_sector = handle->cur_swap; |
297 | return 0; | 323 | return 0; |
298 | err_rel: | 324 | err_rel: |
@@ -316,20 +342,23 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, | |||
316 | return error; | 342 | return error; |
317 | handle->cur->entries[handle->k++] = offset; | 343 | handle->cur->entries[handle->k++] = offset; |
318 | if (handle->k >= MAP_PAGE_ENTRIES) { | 344 | if (handle->k >= MAP_PAGE_ENTRIES) { |
319 | error = hib_wait_on_bio_chain(bio_chain); | ||
320 | if (error) | ||
321 | goto out; | ||
322 | offset = alloc_swapdev_block(root_swap); | 345 | offset = alloc_swapdev_block(root_swap); |
323 | if (!offset) | 346 | if (!offset) |
324 | return -ENOSPC; | 347 | return -ENOSPC; |
325 | handle->cur->next_swap = offset; | 348 | handle->cur->next_swap = offset; |
326 | error = write_page(handle->cur, handle->cur_swap, NULL); | 349 | error = write_page(handle->cur, handle->cur_swap, bio_chain); |
327 | if (error) | 350 | if (error) |
328 | goto out; | 351 | goto out; |
329 | clear_page(handle->cur); | 352 | clear_page(handle->cur); |
330 | handle->cur_swap = offset; | 353 | handle->cur_swap = offset; |
331 | handle->k = 0; | 354 | handle->k = 0; |
332 | } | 355 | } |
356 | if (bio_chain && ++handle->written > handle->nr_free_pages) { | ||
357 | error = hib_wait_on_bio_chain(bio_chain); | ||
358 | if (error) | ||
359 | goto out; | ||
360 | handle->written = 0; | ||
361 | } | ||
333 | out: | 362 | out: |
334 | return error; | 363 | return error; |
335 | } | 364 | } |
@@ -372,6 +401,13 @@ static int swap_writer_finish(struct swap_map_handle *handle, | |||
372 | LZO_HEADER, PAGE_SIZE) | 401 | LZO_HEADER, PAGE_SIZE) |
373 | #define LZO_CMP_SIZE (LZO_CMP_PAGES * PAGE_SIZE) | 402 | #define LZO_CMP_SIZE (LZO_CMP_PAGES * PAGE_SIZE) |
374 | 403 | ||
404 | /* Maximum number of threads for compression/decompression. */ | ||
405 | #define LZO_THREADS 3 | ||
406 | |||
407 | /* Maximum number of pages for read buffering. */ | ||
408 | #define LZO_READ_PAGES (MAP_PAGE_ENTRIES * 8) | ||
409 | |||
410 | |||
375 | /** | 411 | /** |
376 | * save_image - save the suspend image data | 412 | * save_image - save the suspend image data |
377 | */ | 413 | */ |
@@ -419,6 +455,92 @@ static int save_image(struct swap_map_handle *handle, | |||
419 | return ret; | 455 | return ret; |
420 | } | 456 | } |
421 | 457 | ||
458 | /** | ||
459 | * Structure used for CRC32. | ||
460 | */ | ||
461 | struct crc_data { | ||
462 | struct task_struct *thr; /* thread */ | ||
463 | atomic_t ready; /* ready to start flag */ | ||
464 | atomic_t stop; /* ready to stop flag */ | ||
465 | unsigned run_threads; /* nr current threads */ | ||
466 | wait_queue_head_t go; /* start crc update */ | ||
467 | wait_queue_head_t done; /* crc update done */ | ||
468 | u32 *crc32; /* points to handle's crc32 */ | ||
469 | size_t *unc_len[LZO_THREADS]; /* uncompressed lengths */ | ||
470 | unsigned char *unc[LZO_THREADS]; /* uncompressed data */ | ||
471 | }; | ||
472 | |||
473 | /** | ||
474 | * CRC32 update function that runs in its own thread. | ||
475 | */ | ||
476 | static int crc32_threadfn(void *data) | ||
477 | { | ||
478 | struct crc_data *d = data; | ||
479 | unsigned i; | ||
480 | |||
481 | while (1) { | ||
482 | wait_event(d->go, atomic_read(&d->ready) || | ||
483 | kthread_should_stop()); | ||
484 | if (kthread_should_stop()) { | ||
485 | d->thr = NULL; | ||
486 | atomic_set(&d->stop, 1); | ||
487 | wake_up(&d->done); | ||
488 | break; | ||
489 | } | ||
490 | atomic_set(&d->ready, 0); | ||
491 | |||
492 | for (i = 0; i < d->run_threads; i++) | ||
493 | *d->crc32 = crc32_le(*d->crc32, | ||
494 | d->unc[i], *d->unc_len[i]); | ||
495 | atomic_set(&d->stop, 1); | ||
496 | wake_up(&d->done); | ||
497 | } | ||
498 | return 0; | ||
499 | } | ||
500 | /** | ||
501 | * Structure used for LZO data compression. | ||
502 | */ | ||
503 | struct cmp_data { | ||
504 | struct task_struct *thr; /* thread */ | ||
505 | atomic_t ready; /* ready to start flag */ | ||
506 | atomic_t stop; /* ready to stop flag */ | ||
507 | int ret; /* return code */ | ||
508 | wait_queue_head_t go; /* start compression */ | ||
509 | wait_queue_head_t done; /* compression done */ | ||
510 | size_t unc_len; /* uncompressed length */ | ||
511 | size_t cmp_len; /* compressed length */ | ||
512 | unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */ | ||
513 | unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */ | ||
514 | unsigned char wrk[LZO1X_1_MEM_COMPRESS]; /* compression workspace */ | ||
515 | }; | ||
516 | |||
517 | /** | ||
518 | * Compression function that runs in its own thread. | ||
519 | */ | ||
520 | static int lzo_compress_threadfn(void *data) | ||
521 | { | ||
522 | struct cmp_data *d = data; | ||
523 | |||
524 | while (1) { | ||
525 | wait_event(d->go, atomic_read(&d->ready) || | ||
526 | kthread_should_stop()); | ||
527 | if (kthread_should_stop()) { | ||
528 | d->thr = NULL; | ||
529 | d->ret = -1; | ||
530 | atomic_set(&d->stop, 1); | ||
531 | wake_up(&d->done); | ||
532 | break; | ||
533 | } | ||
534 | atomic_set(&d->ready, 0); | ||
535 | |||
536 | d->ret = lzo1x_1_compress(d->unc, d->unc_len, | ||
537 | d->cmp + LZO_HEADER, &d->cmp_len, | ||
538 | d->wrk); | ||
539 | atomic_set(&d->stop, 1); | ||
540 | wake_up(&d->done); | ||
541 | } | ||
542 | return 0; | ||
543 | } | ||
422 | 544 | ||
423 | /** | 545 | /** |
424 | * save_image_lzo - Save the suspend image data compressed with LZO. | 546 | * save_image_lzo - Save the suspend image data compressed with LZO. |
@@ -437,42 +559,93 @@ static int save_image_lzo(struct swap_map_handle *handle, | |||
437 | struct bio *bio; | 559 | struct bio *bio; |
438 | struct timeval start; | 560 | struct timeval start; |
439 | struct timeval stop; | 561 | struct timeval stop; |
440 | size_t off, unc_len, cmp_len; | 562 | size_t off; |
441 | unsigned char *unc, *cmp, *wrk, *page; | 563 | unsigned thr, run_threads, nr_threads; |
564 | unsigned char *page = NULL; | ||
565 | struct cmp_data *data = NULL; | ||
566 | struct crc_data *crc = NULL; | ||
567 | |||
568 | /* | ||
569 | * We'll limit the number of threads for compression to limit memory | ||
570 | * footprint. | ||
571 | */ | ||
572 | nr_threads = num_online_cpus() - 1; | ||
573 | nr_threads = clamp_val(nr_threads, 1, LZO_THREADS); | ||
442 | 574 | ||
443 | page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); | 575 | page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); |
444 | if (!page) { | 576 | if (!page) { |
445 | printk(KERN_ERR "PM: Failed to allocate LZO page\n"); | 577 | printk(KERN_ERR "PM: Failed to allocate LZO page\n"); |
446 | return -ENOMEM; | 578 | ret = -ENOMEM; |
579 | goto out_clean; | ||
447 | } | 580 | } |
448 | 581 | ||
449 | wrk = vmalloc(LZO1X_1_MEM_COMPRESS); | 582 | data = vmalloc(sizeof(*data) * nr_threads); |
450 | if (!wrk) { | 583 | if (!data) { |
451 | printk(KERN_ERR "PM: Failed to allocate LZO workspace\n"); | 584 | printk(KERN_ERR "PM: Failed to allocate LZO data\n"); |
452 | free_page((unsigned long)page); | 585 | ret = -ENOMEM; |
453 | return -ENOMEM; | 586 | goto out_clean; |
454 | } | 587 | } |
588 | for (thr = 0; thr < nr_threads; thr++) | ||
589 | memset(&data[thr], 0, offsetof(struct cmp_data, go)); | ||
455 | 590 | ||
456 | unc = vmalloc(LZO_UNC_SIZE); | 591 | crc = kmalloc(sizeof(*crc), GFP_KERNEL); |
457 | if (!unc) { | 592 | if (!crc) { |
458 | printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n"); | 593 | printk(KERN_ERR "PM: Failed to allocate crc\n"); |
459 | vfree(wrk); | 594 | ret = -ENOMEM; |
460 | free_page((unsigned long)page); | 595 | goto out_clean; |
461 | return -ENOMEM; | 596 | } |
597 | memset(crc, 0, offsetof(struct crc_data, go)); | ||
598 | |||
599 | /* | ||
600 | * Start the compression threads. | ||
601 | */ | ||
602 | for (thr = 0; thr < nr_threads; thr++) { | ||
603 | init_waitqueue_head(&data[thr].go); | ||
604 | init_waitqueue_head(&data[thr].done); | ||
605 | |||
606 | data[thr].thr = kthread_run(lzo_compress_threadfn, | ||
607 | &data[thr], | ||
608 | "image_compress/%u", thr); | ||
609 | if (IS_ERR(data[thr].thr)) { | ||
610 | data[thr].thr = NULL; | ||
611 | printk(KERN_ERR | ||
612 | "PM: Cannot start compression threads\n"); | ||
613 | ret = -ENOMEM; | ||
614 | goto out_clean; | ||
615 | } | ||
462 | } | 616 | } |
463 | 617 | ||
464 | cmp = vmalloc(LZO_CMP_SIZE); | 618 | /* |
465 | if (!cmp) { | 619 | * Adjust number of free pages after all allocations have been done. |
466 | printk(KERN_ERR "PM: Failed to allocate LZO compressed\n"); | 620 | * We don't want to run out of pages when writing. |
467 | vfree(unc); | 621 | */ |
468 | vfree(wrk); | 622 | handle->nr_free_pages = nr_free_pages() >> 1; |
469 | free_page((unsigned long)page); | 623 | |
470 | return -ENOMEM; | 624 | /* |
625 | * Start the CRC32 thread. | ||
626 | */ | ||
627 | init_waitqueue_head(&crc->go); | ||
628 | init_waitqueue_head(&crc->done); | ||
629 | |||
630 | handle->crc32 = 0; | ||
631 | crc->crc32 = &handle->crc32; | ||
632 | for (thr = 0; thr < nr_threads; thr++) { | ||
633 | crc->unc[thr] = data[thr].unc; | ||
634 | crc->unc_len[thr] = &data[thr].unc_len; | ||
635 | } | ||
636 | |||
637 | crc->thr = kthread_run(crc32_threadfn, crc, "image_crc32"); | ||
638 | if (IS_ERR(crc->thr)) { | ||
639 | crc->thr = NULL; | ||
640 | printk(KERN_ERR "PM: Cannot start CRC32 thread\n"); | ||
641 | ret = -ENOMEM; | ||
642 | goto out_clean; | ||
471 | } | 643 | } |
472 | 644 | ||
473 | printk(KERN_INFO | 645 | printk(KERN_INFO |
646 | "PM: Using %u thread(s) for compression.\n" | ||
474 | "PM: Compressing and saving image data (%u pages) ... ", | 647 | "PM: Compressing and saving image data (%u pages) ... ", |
475 | nr_to_write); | 648 | nr_threads, nr_to_write); |
476 | m = nr_to_write / 100; | 649 | m = nr_to_write / 100; |
477 | if (!m) | 650 | if (!m) |
478 | m = 1; | 651 | m = 1; |
@@ -480,55 +653,83 @@ static int save_image_lzo(struct swap_map_handle *handle, | |||
480 | bio = NULL; | 653 | bio = NULL; |
481 | do_gettimeofday(&start); | 654 | do_gettimeofday(&start); |
482 | for (;;) { | 655 | for (;;) { |
483 | for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) { | 656 | for (thr = 0; thr < nr_threads; thr++) { |
484 | ret = snapshot_read_next(snapshot); | 657 | for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) { |
485 | if (ret < 0) | 658 | ret = snapshot_read_next(snapshot); |
486 | goto out_finish; | 659 | if (ret < 0) |
487 | 660 | goto out_finish; | |
488 | if (!ret) | 661 | |
662 | if (!ret) | ||
663 | break; | ||
664 | |||
665 | memcpy(data[thr].unc + off, | ||
666 | data_of(*snapshot), PAGE_SIZE); | ||
667 | |||
668 | if (!(nr_pages % m)) | ||
669 | printk(KERN_CONT "\b\b\b\b%3d%%", | ||
670 | nr_pages / m); | ||
671 | nr_pages++; | ||
672 | } | ||
673 | if (!off) | ||
489 | break; | 674 | break; |
490 | 675 | ||
491 | memcpy(unc + off, data_of(*snapshot), PAGE_SIZE); | 676 | data[thr].unc_len = off; |
492 | 677 | ||
493 | if (!(nr_pages % m)) | 678 | atomic_set(&data[thr].ready, 1); |
494 | printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m); | 679 | wake_up(&data[thr].go); |
495 | nr_pages++; | ||
496 | } | 680 | } |
497 | 681 | ||
498 | if (!off) | 682 | if (!thr) |
499 | break; | 683 | break; |
500 | 684 | ||
501 | unc_len = off; | 685 | crc->run_threads = thr; |
502 | ret = lzo1x_1_compress(unc, unc_len, | 686 | atomic_set(&crc->ready, 1); |
503 | cmp + LZO_HEADER, &cmp_len, wrk); | 687 | wake_up(&crc->go); |
504 | if (ret < 0) { | ||
505 | printk(KERN_ERR "PM: LZO compression failed\n"); | ||
506 | break; | ||
507 | } | ||
508 | 688 | ||
509 | if (unlikely(!cmp_len || | 689 | for (run_threads = thr, thr = 0; thr < run_threads; thr++) { |
510 | cmp_len > lzo1x_worst_compress(unc_len))) { | 690 | wait_event(data[thr].done, |
511 | printk(KERN_ERR "PM: Invalid LZO compressed length\n"); | 691 | atomic_read(&data[thr].stop)); |
512 | ret = -1; | 692 | atomic_set(&data[thr].stop, 0); |
513 | break; | ||
514 | } | ||
515 | 693 | ||
516 | *(size_t *)cmp = cmp_len; | 694 | ret = data[thr].ret; |
517 | 695 | ||
518 | /* | 696 | if (ret < 0) { |
519 | * Given we are writing one page at a time to disk, we copy | 697 | printk(KERN_ERR "PM: LZO compression failed\n"); |
520 | * that much from the buffer, although the last bit will likely | 698 | goto out_finish; |
521 | * be smaller than full page. This is OK - we saved the length | 699 | } |
522 | * of the compressed data, so any garbage at the end will be | ||
523 | * discarded when we read it. | ||
524 | */ | ||
525 | for (off = 0; off < LZO_HEADER + cmp_len; off += PAGE_SIZE) { | ||
526 | memcpy(page, cmp + off, PAGE_SIZE); | ||
527 | 700 | ||
528 | ret = swap_write_page(handle, page, &bio); | 701 | if (unlikely(!data[thr].cmp_len || |
529 | if (ret) | 702 | data[thr].cmp_len > |
703 | lzo1x_worst_compress(data[thr].unc_len))) { | ||
704 | printk(KERN_ERR | ||
705 | "PM: Invalid LZO compressed length\n"); | ||
706 | ret = -1; | ||
530 | goto out_finish; | 707 | goto out_finish; |
708 | } | ||
709 | |||
710 | *(size_t *)data[thr].cmp = data[thr].cmp_len; | ||
711 | |||
712 | /* | ||
713 | * Given we are writing one page at a time to disk, we | ||
714 | * copy that much from the buffer, although the last | ||
715 | * bit will likely be smaller than full page. This is | ||
716 | * OK - we saved the length of the compressed data, so | ||
717 | * any garbage at the end will be discarded when we | ||
718 | * read it. | ||
719 | */ | ||
720 | for (off = 0; | ||
721 | off < LZO_HEADER + data[thr].cmp_len; | ||
722 | off += PAGE_SIZE) { | ||
723 | memcpy(page, data[thr].cmp + off, PAGE_SIZE); | ||
724 | |||
725 | ret = swap_write_page(handle, page, &bio); | ||
726 | if (ret) | ||
727 | goto out_finish; | ||
728 | } | ||
531 | } | 729 | } |
730 | |||
731 | wait_event(crc->done, atomic_read(&crc->stop)); | ||
732 | atomic_set(&crc->stop, 0); | ||
532 | } | 733 | } |
533 | 734 | ||
534 | out_finish: | 735 | out_finish: |
@@ -536,16 +737,25 @@ out_finish: | |||
536 | do_gettimeofday(&stop); | 737 | do_gettimeofday(&stop); |
537 | if (!ret) | 738 | if (!ret) |
538 | ret = err2; | 739 | ret = err2; |
539 | if (!ret) | 740 | if (!ret) { |
540 | printk(KERN_CONT "\b\b\b\bdone\n"); | 741 | printk(KERN_CONT "\b\b\b\bdone\n"); |
541 | else | 742 | } else { |
542 | printk(KERN_CONT "\n"); | 743 | printk(KERN_CONT "\n"); |
744 | } | ||
543 | swsusp_show_speed(&start, &stop, nr_to_write, "Wrote"); | 745 | swsusp_show_speed(&start, &stop, nr_to_write, "Wrote"); |
544 | 746 | out_clean: | |
545 | vfree(cmp); | 747 | if (crc) { |
546 | vfree(unc); | 748 | if (crc->thr) |
547 | vfree(wrk); | 749 | kthread_stop(crc->thr); |
548 | free_page((unsigned long)page); | 750 | kfree(crc); |
751 | } | ||
752 | if (data) { | ||
753 | for (thr = 0; thr < nr_threads; thr++) | ||
754 | if (data[thr].thr) | ||
755 | kthread_stop(data[thr].thr); | ||
756 | vfree(data); | ||
757 | } | ||
758 | if (page) free_page((unsigned long)page); | ||
549 | 759 | ||
550 | return ret; | 760 | return ret; |
551 | } | 761 | } |
@@ -625,8 +835,15 @@ out_finish: | |||
625 | 835 | ||
626 | static void release_swap_reader(struct swap_map_handle *handle) | 836 | static void release_swap_reader(struct swap_map_handle *handle) |
627 | { | 837 | { |
628 | if (handle->cur) | 838 | struct swap_map_page_list *tmp; |
629 | free_page((unsigned long)handle->cur); | 839 | |
840 | while (handle->maps) { | ||
841 | if (handle->maps->map) | ||
842 | free_page((unsigned long)handle->maps->map); | ||
843 | tmp = handle->maps; | ||
844 | handle->maps = handle->maps->next; | ||
845 | kfree(tmp); | ||
846 | } | ||
630 | handle->cur = NULL; | 847 | handle->cur = NULL; |
631 | } | 848 | } |
632 | 849 | ||
@@ -634,22 +851,46 @@ static int get_swap_reader(struct swap_map_handle *handle, | |||
634 | unsigned int *flags_p) | 851 | unsigned int *flags_p) |
635 | { | 852 | { |
636 | int error; | 853 | int error; |
854 | struct swap_map_page_list *tmp, *last; | ||
855 | sector_t offset; | ||
637 | 856 | ||
638 | *flags_p = swsusp_header->flags; | 857 | *flags_p = swsusp_header->flags; |
639 | 858 | ||
640 | if (!swsusp_header->image) /* how can this happen? */ | 859 | if (!swsusp_header->image) /* how can this happen? */ |
641 | return -EINVAL; | 860 | return -EINVAL; |
642 | 861 | ||
643 | handle->cur = (struct swap_map_page *)get_zeroed_page(__GFP_WAIT | __GFP_HIGH); | 862 | handle->cur = NULL; |
644 | if (!handle->cur) | 863 | last = handle->maps = NULL; |
645 | return -ENOMEM; | 864 | offset = swsusp_header->image; |
865 | while (offset) { | ||
866 | tmp = kmalloc(sizeof(*handle->maps), GFP_KERNEL); | ||
867 | if (!tmp) { | ||
868 | release_swap_reader(handle); | ||
869 | return -ENOMEM; | ||
870 | } | ||
871 | memset(tmp, 0, sizeof(*tmp)); | ||
872 | if (!handle->maps) | ||
873 | handle->maps = tmp; | ||
874 | if (last) | ||
875 | last->next = tmp; | ||
876 | last = tmp; | ||
877 | |||
878 | tmp->map = (struct swap_map_page *) | ||
879 | __get_free_page(__GFP_WAIT | __GFP_HIGH); | ||
880 | if (!tmp->map) { | ||
881 | release_swap_reader(handle); | ||
882 | return -ENOMEM; | ||
883 | } | ||
646 | 884 | ||
647 | error = hib_bio_read_page(swsusp_header->image, handle->cur, NULL); | 885 | error = hib_bio_read_page(offset, tmp->map, NULL); |
648 | if (error) { | 886 | if (error) { |
649 | release_swap_reader(handle); | 887 | release_swap_reader(handle); |
650 | return error; | 888 | return error; |
889 | } | ||
890 | offset = tmp->map->next_swap; | ||
651 | } | 891 | } |
652 | handle->k = 0; | 892 | handle->k = 0; |
893 | handle->cur = handle->maps->map; | ||
653 | return 0; | 894 | return 0; |
654 | } | 895 | } |
655 | 896 | ||
@@ -658,6 +899,7 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf, | |||
658 | { | 899 | { |
659 | sector_t offset; | 900 | sector_t offset; |
660 | int error; | 901 | int error; |
902 | struct swap_map_page_list *tmp; | ||
661 | 903 | ||
662 | if (!handle->cur) | 904 | if (!handle->cur) |
663 | return -EINVAL; | 905 | return -EINVAL; |
@@ -668,13 +910,15 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf, | |||
668 | if (error) | 910 | if (error) |
669 | return error; | 911 | return error; |
670 | if (++handle->k >= MAP_PAGE_ENTRIES) { | 912 | if (++handle->k >= MAP_PAGE_ENTRIES) { |
671 | error = hib_wait_on_bio_chain(bio_chain); | ||
672 | handle->k = 0; | 913 | handle->k = 0; |
673 | offset = handle->cur->next_swap; | 914 | free_page((unsigned long)handle->maps->map); |
674 | if (!offset) | 915 | tmp = handle->maps; |
916 | handle->maps = handle->maps->next; | ||
917 | kfree(tmp); | ||
918 | if (!handle->maps) | ||
675 | release_swap_reader(handle); | 919 | release_swap_reader(handle); |
676 | else if (!error) | 920 | else |
677 | error = hib_bio_read_page(offset, handle->cur, NULL); | 921 | handle->cur = handle->maps->map; |
678 | } | 922 | } |
679 | return error; | 923 | return error; |
680 | } | 924 | } |
@@ -697,7 +941,7 @@ static int load_image(struct swap_map_handle *handle, | |||
697 | unsigned int nr_to_read) | 941 | unsigned int nr_to_read) |
698 | { | 942 | { |
699 | unsigned int m; | 943 | unsigned int m; |
700 | int error = 0; | 944 | int ret = 0; |
701 | struct timeval start; | 945 | struct timeval start; |
702 | struct timeval stop; | 946 | struct timeval stop; |
703 | struct bio *bio; | 947 | struct bio *bio; |
@@ -713,15 +957,15 @@ static int load_image(struct swap_map_handle *handle, | |||
713 | bio = NULL; | 957 | bio = NULL; |
714 | do_gettimeofday(&start); | 958 | do_gettimeofday(&start); |
715 | for ( ; ; ) { | 959 | for ( ; ; ) { |
716 | error = snapshot_write_next(snapshot); | 960 | ret = snapshot_write_next(snapshot); |
717 | if (error <= 0) | 961 | if (ret <= 0) |
718 | break; | 962 | break; |
719 | error = swap_read_page(handle, data_of(*snapshot), &bio); | 963 | ret = swap_read_page(handle, data_of(*snapshot), &bio); |
720 | if (error) | 964 | if (ret) |
721 | break; | 965 | break; |
722 | if (snapshot->sync_read) | 966 | if (snapshot->sync_read) |
723 | error = hib_wait_on_bio_chain(&bio); | 967 | ret = hib_wait_on_bio_chain(&bio); |
724 | if (error) | 968 | if (ret) |
725 | break; | 969 | break; |
726 | if (!(nr_pages % m)) | 970 | if (!(nr_pages % m)) |
727 | printk("\b\b\b\b%3d%%", nr_pages / m); | 971 | printk("\b\b\b\b%3d%%", nr_pages / m); |
@@ -729,17 +973,61 @@ static int load_image(struct swap_map_handle *handle, | |||
729 | } | 973 | } |
730 | err2 = hib_wait_on_bio_chain(&bio); | 974 | err2 = hib_wait_on_bio_chain(&bio); |
731 | do_gettimeofday(&stop); | 975 | do_gettimeofday(&stop); |
732 | if (!error) | 976 | if (!ret) |
733 | error = err2; | 977 | ret = err2; |
734 | if (!error) { | 978 | if (!ret) { |
735 | printk("\b\b\b\bdone\n"); | 979 | printk("\b\b\b\bdone\n"); |
736 | snapshot_write_finalize(snapshot); | 980 | snapshot_write_finalize(snapshot); |
737 | if (!snapshot_image_loaded(snapshot)) | 981 | if (!snapshot_image_loaded(snapshot)) |
738 | error = -ENODATA; | 982 | ret = -ENODATA; |
739 | } else | 983 | } else |
740 | printk("\n"); | 984 | printk("\n"); |
741 | swsusp_show_speed(&start, &stop, nr_to_read, "Read"); | 985 | swsusp_show_speed(&start, &stop, nr_to_read, "Read"); |
742 | return error; | 986 | return ret; |
987 | } | ||
988 | |||
989 | /** | ||
990 | * Structure used for LZO data decompression. | ||
991 | */ | ||
992 | struct dec_data { | ||
993 | struct task_struct *thr; /* thread */ | ||
994 | atomic_t ready; /* ready to start flag */ | ||
995 | atomic_t stop; /* ready to stop flag */ | ||
996 | int ret; /* return code */ | ||
997 | wait_queue_head_t go; /* start decompression */ | ||
998 | wait_queue_head_t done; /* decompression done */ | ||
999 | size_t unc_len; /* uncompressed length */ | ||
1000 | size_t cmp_len; /* compressed length */ | ||
1001 | unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */ | ||
1002 | unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */ | ||
1003 | }; | ||
1004 | |||
1005 | /** | ||
1006 | * Deompression function that runs in its own thread. | ||
1007 | */ | ||
1008 | static int lzo_decompress_threadfn(void *data) | ||
1009 | { | ||
1010 | struct dec_data *d = data; | ||
1011 | |||
1012 | while (1) { | ||
1013 | wait_event(d->go, atomic_read(&d->ready) || | ||
1014 | kthread_should_stop()); | ||
1015 | if (kthread_should_stop()) { | ||
1016 | d->thr = NULL; | ||
1017 | d->ret = -1; | ||
1018 | atomic_set(&d->stop, 1); | ||
1019 | wake_up(&d->done); | ||
1020 | break; | ||
1021 | } | ||
1022 | atomic_set(&d->ready, 0); | ||
1023 | |||
1024 | d->unc_len = LZO_UNC_SIZE; | ||
1025 | d->ret = lzo1x_decompress_safe(d->cmp + LZO_HEADER, d->cmp_len, | ||
1026 | d->unc, &d->unc_len); | ||
1027 | atomic_set(&d->stop, 1); | ||
1028 | wake_up(&d->done); | ||
1029 | } | ||
1030 | return 0; | ||
743 | } | 1031 | } |
744 | 1032 | ||
745 | /** | 1033 | /** |
@@ -753,50 +1041,120 @@ static int load_image_lzo(struct swap_map_handle *handle, | |||
753 | unsigned int nr_to_read) | 1041 | unsigned int nr_to_read) |
754 | { | 1042 | { |
755 | unsigned int m; | 1043 | unsigned int m; |
756 | int error = 0; | 1044 | int ret = 0; |
1045 | int eof = 0; | ||
757 | struct bio *bio; | 1046 | struct bio *bio; |
758 | struct timeval start; | 1047 | struct timeval start; |
759 | struct timeval stop; | 1048 | struct timeval stop; |
760 | unsigned nr_pages; | 1049 | unsigned nr_pages; |
761 | size_t i, off, unc_len, cmp_len; | 1050 | size_t off; |
762 | unsigned char *unc, *cmp, *page[LZO_CMP_PAGES]; | 1051 | unsigned i, thr, run_threads, nr_threads; |
763 | 1052 | unsigned ring = 0, pg = 0, ring_size = 0, | |
764 | for (i = 0; i < LZO_CMP_PAGES; i++) { | 1053 | have = 0, want, need, asked = 0; |
765 | page[i] = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); | 1054 | unsigned long read_pages; |
766 | if (!page[i]) { | 1055 | unsigned char **page = NULL; |
767 | printk(KERN_ERR "PM: Failed to allocate LZO page\n"); | 1056 | struct dec_data *data = NULL; |
1057 | struct crc_data *crc = NULL; | ||
1058 | |||
1059 | /* | ||
1060 | * We'll limit the number of threads for decompression to limit memory | ||
1061 | * footprint. | ||
1062 | */ | ||
1063 | nr_threads = num_online_cpus() - 1; | ||
1064 | nr_threads = clamp_val(nr_threads, 1, LZO_THREADS); | ||
1065 | |||
1066 | page = vmalloc(sizeof(*page) * LZO_READ_PAGES); | ||
1067 | if (!page) { | ||
1068 | printk(KERN_ERR "PM: Failed to allocate LZO page\n"); | ||
1069 | ret = -ENOMEM; | ||
1070 | goto out_clean; | ||
1071 | } | ||
768 | 1072 | ||
769 | while (i) | 1073 | data = vmalloc(sizeof(*data) * nr_threads); |
770 | free_page((unsigned long)page[--i]); | 1074 | if (!data) { |
1075 | printk(KERN_ERR "PM: Failed to allocate LZO data\n"); | ||
1076 | ret = -ENOMEM; | ||
1077 | goto out_clean; | ||
1078 | } | ||
1079 | for (thr = 0; thr < nr_threads; thr++) | ||
1080 | memset(&data[thr], 0, offsetof(struct dec_data, go)); | ||
771 | 1081 | ||
772 | return -ENOMEM; | 1082 | crc = kmalloc(sizeof(*crc), GFP_KERNEL); |
1083 | if (!crc) { | ||
1084 | printk(KERN_ERR "PM: Failed to allocate crc\n"); | ||
1085 | ret = -ENOMEM; | ||
1086 | goto out_clean; | ||
1087 | } | ||
1088 | memset(crc, 0, offsetof(struct crc_data, go)); | ||
1089 | |||
1090 | /* | ||
1091 | * Start the decompression threads. | ||
1092 | */ | ||
1093 | for (thr = 0; thr < nr_threads; thr++) { | ||
1094 | init_waitqueue_head(&data[thr].go); | ||
1095 | init_waitqueue_head(&data[thr].done); | ||
1096 | |||
1097 | data[thr].thr = kthread_run(lzo_decompress_threadfn, | ||
1098 | &data[thr], | ||
1099 | "image_decompress/%u", thr); | ||
1100 | if (IS_ERR(data[thr].thr)) { | ||
1101 | data[thr].thr = NULL; | ||
1102 | printk(KERN_ERR | ||
1103 | "PM: Cannot start decompression threads\n"); | ||
1104 | ret = -ENOMEM; | ||
1105 | goto out_clean; | ||
773 | } | 1106 | } |
774 | } | 1107 | } |
775 | 1108 | ||
776 | unc = vmalloc(LZO_UNC_SIZE); | 1109 | /* |
777 | if (!unc) { | 1110 | * Start the CRC32 thread. |
778 | printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n"); | 1111 | */ |
779 | 1112 | init_waitqueue_head(&crc->go); | |
780 | for (i = 0; i < LZO_CMP_PAGES; i++) | 1113 | init_waitqueue_head(&crc->done); |
781 | free_page((unsigned long)page[i]); | 1114 | |
782 | 1115 | handle->crc32 = 0; | |
783 | return -ENOMEM; | 1116 | crc->crc32 = &handle->crc32; |
1117 | for (thr = 0; thr < nr_threads; thr++) { | ||
1118 | crc->unc[thr] = data[thr].unc; | ||
1119 | crc->unc_len[thr] = &data[thr].unc_len; | ||
784 | } | 1120 | } |
785 | 1121 | ||
786 | cmp = vmalloc(LZO_CMP_SIZE); | 1122 | crc->thr = kthread_run(crc32_threadfn, crc, "image_crc32"); |
787 | if (!cmp) { | 1123 | if (IS_ERR(crc->thr)) { |
788 | printk(KERN_ERR "PM: Failed to allocate LZO compressed\n"); | 1124 | crc->thr = NULL; |
1125 | printk(KERN_ERR "PM: Cannot start CRC32 thread\n"); | ||
1126 | ret = -ENOMEM; | ||
1127 | goto out_clean; | ||
1128 | } | ||
789 | 1129 | ||
790 | vfree(unc); | 1130 | /* |
791 | for (i = 0; i < LZO_CMP_PAGES; i++) | 1131 | * Adjust number of pages for read buffering, in case we are short. |
792 | free_page((unsigned long)page[i]); | 1132 | */ |
1133 | read_pages = (nr_free_pages() - snapshot_get_image_size()) >> 1; | ||
1134 | read_pages = clamp_val(read_pages, LZO_CMP_PAGES, LZO_READ_PAGES); | ||
793 | 1135 | ||
794 | return -ENOMEM; | 1136 | for (i = 0; i < read_pages; i++) { |
1137 | page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ? | ||
1138 | __GFP_WAIT | __GFP_HIGH : | ||
1139 | __GFP_WAIT); | ||
1140 | if (!page[i]) { | ||
1141 | if (i < LZO_CMP_PAGES) { | ||
1142 | ring_size = i; | ||
1143 | printk(KERN_ERR | ||
1144 | "PM: Failed to allocate LZO pages\n"); | ||
1145 | ret = -ENOMEM; | ||
1146 | goto out_clean; | ||
1147 | } else { | ||
1148 | break; | ||
1149 | } | ||
1150 | } | ||
795 | } | 1151 | } |
1152 | want = ring_size = i; | ||
796 | 1153 | ||
797 | printk(KERN_INFO | 1154 | printk(KERN_INFO |
1155 | "PM: Using %u thread(s) for decompression.\n" | ||
798 | "PM: Loading and decompressing image data (%u pages) ... ", | 1156 | "PM: Loading and decompressing image data (%u pages) ... ", |
799 | nr_to_read); | 1157 | nr_threads, nr_to_read); |
800 | m = nr_to_read / 100; | 1158 | m = nr_to_read / 100; |
801 | if (!m) | 1159 | if (!m) |
802 | m = 1; | 1160 | m = 1; |
@@ -804,85 +1162,189 @@ static int load_image_lzo(struct swap_map_handle *handle, | |||
804 | bio = NULL; | 1162 | bio = NULL; |
805 | do_gettimeofday(&start); | 1163 | do_gettimeofday(&start); |
806 | 1164 | ||
807 | error = snapshot_write_next(snapshot); | 1165 | ret = snapshot_write_next(snapshot); |
808 | if (error <= 0) | 1166 | if (ret <= 0) |
809 | goto out_finish; | 1167 | goto out_finish; |
810 | 1168 | ||
811 | for (;;) { | 1169 | for(;;) { |
812 | error = swap_read_page(handle, page[0], NULL); /* sync */ | 1170 | for (i = 0; !eof && i < want; i++) { |
813 | if (error) | 1171 | ret = swap_read_page(handle, page[ring], &bio); |
814 | break; | 1172 | if (ret) { |
815 | 1173 | /* | |
816 | cmp_len = *(size_t *)page[0]; | 1174 | * On real read error, finish. On end of data, |
817 | if (unlikely(!cmp_len || | 1175 | * set EOF flag and just exit the read loop. |
818 | cmp_len > lzo1x_worst_compress(LZO_UNC_SIZE))) { | 1176 | */ |
819 | printk(KERN_ERR "PM: Invalid LZO compressed length\n"); | 1177 | if (handle->cur && |
820 | error = -1; | 1178 | handle->cur->entries[handle->k]) { |
821 | break; | 1179 | goto out_finish; |
1180 | } else { | ||
1181 | eof = 1; | ||
1182 | break; | ||
1183 | } | ||
1184 | } | ||
1185 | if (++ring >= ring_size) | ||
1186 | ring = 0; | ||
822 | } | 1187 | } |
1188 | asked += i; | ||
1189 | want -= i; | ||
823 | 1190 | ||
824 | for (off = PAGE_SIZE, i = 1; | 1191 | /* |
825 | off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) { | 1192 | * We are out of data, wait for some more. |
826 | error = swap_read_page(handle, page[i], &bio); | 1193 | */ |
827 | if (error) | 1194 | if (!have) { |
1195 | if (!asked) | ||
1196 | break; | ||
1197 | |||
1198 | ret = hib_wait_on_bio_chain(&bio); | ||
1199 | if (ret) | ||
828 | goto out_finish; | 1200 | goto out_finish; |
1201 | have += asked; | ||
1202 | asked = 0; | ||
1203 | if (eof) | ||
1204 | eof = 2; | ||
829 | } | 1205 | } |
830 | 1206 | ||
831 | error = hib_wait_on_bio_chain(&bio); /* need all data now */ | 1207 | if (crc->run_threads) { |
832 | if (error) | 1208 | wait_event(crc->done, atomic_read(&crc->stop)); |
833 | goto out_finish; | 1209 | atomic_set(&crc->stop, 0); |
834 | 1210 | crc->run_threads = 0; | |
835 | for (off = 0, i = 0; | ||
836 | off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) { | ||
837 | memcpy(cmp + off, page[i], PAGE_SIZE); | ||
838 | } | 1211 | } |
839 | 1212 | ||
840 | unc_len = LZO_UNC_SIZE; | 1213 | for (thr = 0; have && thr < nr_threads; thr++) { |
841 | error = lzo1x_decompress_safe(cmp + LZO_HEADER, cmp_len, | 1214 | data[thr].cmp_len = *(size_t *)page[pg]; |
842 | unc, &unc_len); | 1215 | if (unlikely(!data[thr].cmp_len || |
843 | if (error < 0) { | 1216 | data[thr].cmp_len > |
844 | printk(KERN_ERR "PM: LZO decompression failed\n"); | 1217 | lzo1x_worst_compress(LZO_UNC_SIZE))) { |
845 | break; | 1218 | printk(KERN_ERR |
1219 | "PM: Invalid LZO compressed length\n"); | ||
1220 | ret = -1; | ||
1221 | goto out_finish; | ||
1222 | } | ||
1223 | |||
1224 | need = DIV_ROUND_UP(data[thr].cmp_len + LZO_HEADER, | ||
1225 | PAGE_SIZE); | ||
1226 | if (need > have) { | ||
1227 | if (eof > 1) { | ||
1228 | ret = -1; | ||
1229 | goto out_finish; | ||
1230 | } | ||
1231 | break; | ||
1232 | } | ||
1233 | |||
1234 | for (off = 0; | ||
1235 | off < LZO_HEADER + data[thr].cmp_len; | ||
1236 | off += PAGE_SIZE) { | ||
1237 | memcpy(data[thr].cmp + off, | ||
1238 | page[pg], PAGE_SIZE); | ||
1239 | have--; | ||
1240 | want++; | ||
1241 | if (++pg >= ring_size) | ||
1242 | pg = 0; | ||
1243 | } | ||
1244 | |||
1245 | atomic_set(&data[thr].ready, 1); | ||
1246 | wake_up(&data[thr].go); | ||
846 | } | 1247 | } |
847 | 1248 | ||
848 | if (unlikely(!unc_len || | 1249 | /* |
849 | unc_len > LZO_UNC_SIZE || | 1250 | * Wait for more data while we are decompressing. |
850 | unc_len & (PAGE_SIZE - 1))) { | 1251 | */ |
851 | printk(KERN_ERR "PM: Invalid LZO uncompressed length\n"); | 1252 | if (have < LZO_CMP_PAGES && asked) { |
852 | error = -1; | 1253 | ret = hib_wait_on_bio_chain(&bio); |
853 | break; | 1254 | if (ret) |
1255 | goto out_finish; | ||
1256 | have += asked; | ||
1257 | asked = 0; | ||
1258 | if (eof) | ||
1259 | eof = 2; | ||
854 | } | 1260 | } |
855 | 1261 | ||
856 | for (off = 0; off < unc_len; off += PAGE_SIZE) { | 1262 | for (run_threads = thr, thr = 0; thr < run_threads; thr++) { |
857 | memcpy(data_of(*snapshot), unc + off, PAGE_SIZE); | 1263 | wait_event(data[thr].done, |
1264 | atomic_read(&data[thr].stop)); | ||
1265 | atomic_set(&data[thr].stop, 0); | ||
1266 | |||
1267 | ret = data[thr].ret; | ||
858 | 1268 | ||
859 | if (!(nr_pages % m)) | 1269 | if (ret < 0) { |
860 | printk("\b\b\b\b%3d%%", nr_pages / m); | 1270 | printk(KERN_ERR |
861 | nr_pages++; | 1271 | "PM: LZO decompression failed\n"); |
1272 | goto out_finish; | ||
1273 | } | ||
862 | 1274 | ||
863 | error = snapshot_write_next(snapshot); | 1275 | if (unlikely(!data[thr].unc_len || |
864 | if (error <= 0) | 1276 | data[thr].unc_len > LZO_UNC_SIZE || |
1277 | data[thr].unc_len & (PAGE_SIZE - 1))) { | ||
1278 | printk(KERN_ERR | ||
1279 | "PM: Invalid LZO uncompressed length\n"); | ||
1280 | ret = -1; | ||
865 | goto out_finish; | 1281 | goto out_finish; |
1282 | } | ||
1283 | |||
1284 | for (off = 0; | ||
1285 | off < data[thr].unc_len; off += PAGE_SIZE) { | ||
1286 | memcpy(data_of(*snapshot), | ||
1287 | data[thr].unc + off, PAGE_SIZE); | ||
1288 | |||
1289 | if (!(nr_pages % m)) | ||
1290 | printk("\b\b\b\b%3d%%", nr_pages / m); | ||
1291 | nr_pages++; | ||
1292 | |||
1293 | ret = snapshot_write_next(snapshot); | ||
1294 | if (ret <= 0) { | ||
1295 | crc->run_threads = thr + 1; | ||
1296 | atomic_set(&crc->ready, 1); | ||
1297 | wake_up(&crc->go); | ||
1298 | goto out_finish; | ||
1299 | } | ||
1300 | } | ||
866 | } | 1301 | } |
1302 | |||
1303 | crc->run_threads = thr; | ||
1304 | atomic_set(&crc->ready, 1); | ||
1305 | wake_up(&crc->go); | ||
867 | } | 1306 | } |
868 | 1307 | ||
869 | out_finish: | 1308 | out_finish: |
1309 | if (crc->run_threads) { | ||
1310 | wait_event(crc->done, atomic_read(&crc->stop)); | ||
1311 | atomic_set(&crc->stop, 0); | ||
1312 | } | ||
870 | do_gettimeofday(&stop); | 1313 | do_gettimeofday(&stop); |
871 | if (!error) { | 1314 | if (!ret) { |
872 | printk("\b\b\b\bdone\n"); | 1315 | printk("\b\b\b\bdone\n"); |
873 | snapshot_write_finalize(snapshot); | 1316 | snapshot_write_finalize(snapshot); |
874 | if (!snapshot_image_loaded(snapshot)) | 1317 | if (!snapshot_image_loaded(snapshot)) |
875 | error = -ENODATA; | 1318 | ret = -ENODATA; |
1319 | if (!ret) { | ||
1320 | if (swsusp_header->flags & SF_CRC32_MODE) { | ||
1321 | if(handle->crc32 != swsusp_header->crc32) { | ||
1322 | printk(KERN_ERR | ||
1323 | "PM: Invalid image CRC32!\n"); | ||
1324 | ret = -ENODATA; | ||
1325 | } | ||
1326 | } | ||
1327 | } | ||
876 | } else | 1328 | } else |
877 | printk("\n"); | 1329 | printk("\n"); |
878 | swsusp_show_speed(&start, &stop, nr_to_read, "Read"); | 1330 | swsusp_show_speed(&start, &stop, nr_to_read, "Read"); |
879 | 1331 | out_clean: | |
880 | vfree(cmp); | 1332 | for (i = 0; i < ring_size; i++) |
881 | vfree(unc); | ||
882 | for (i = 0; i < LZO_CMP_PAGES; i++) | ||
883 | free_page((unsigned long)page[i]); | 1333 | free_page((unsigned long)page[i]); |
1334 | if (crc) { | ||
1335 | if (crc->thr) | ||
1336 | kthread_stop(crc->thr); | ||
1337 | kfree(crc); | ||
1338 | } | ||
1339 | if (data) { | ||
1340 | for (thr = 0; thr < nr_threads; thr++) | ||
1341 | if (data[thr].thr) | ||
1342 | kthread_stop(data[thr].thr); | ||
1343 | vfree(data); | ||
1344 | } | ||
1345 | if (page) vfree(page); | ||
884 | 1346 | ||
885 | return error; | 1347 | return ret; |
886 | } | 1348 | } |
887 | 1349 | ||
888 | /** | 1350 | /** |
diff --git a/kernel/power/user.c b/kernel/power/user.c index 42ddbc6f0de..6d8f535c2b8 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/suspend.h> | 12 | #include <linux/suspend.h> |
13 | #include <linux/syscalls.h> | 13 | #include <linux/syscalls.h> |
14 | #include <linux/reboot.h> | 14 | #include <linux/reboot.h> |
15 | #include <linux/kmod.h> | ||
15 | #include <linux/string.h> | 16 | #include <linux/string.h> |
16 | #include <linux/device.h> | 17 | #include <linux/device.h> |
17 | #include <linux/miscdevice.h> | 18 | #include <linux/miscdevice.h> |
diff --git a/kernel/printk.c b/kernel/printk.c index 37dff3429ad..1455a0d4eed 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -100,7 +100,7 @@ static int console_locked, console_suspended; | |||
100 | * It is also used in interesting ways to provide interlocking in | 100 | * It is also used in interesting ways to provide interlocking in |
101 | * console_unlock();. | 101 | * console_unlock();. |
102 | */ | 102 | */ |
103 | static DEFINE_SPINLOCK(logbuf_lock); | 103 | static DEFINE_RAW_SPINLOCK(logbuf_lock); |
104 | 104 | ||
105 | #define LOG_BUF_MASK (log_buf_len-1) | 105 | #define LOG_BUF_MASK (log_buf_len-1) |
106 | #define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK]) | 106 | #define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK]) |
@@ -212,7 +212,7 @@ void __init setup_log_buf(int early) | |||
212 | return; | 212 | return; |
213 | } | 213 | } |
214 | 214 | ||
215 | spin_lock_irqsave(&logbuf_lock, flags); | 215 | raw_spin_lock_irqsave(&logbuf_lock, flags); |
216 | log_buf_len = new_log_buf_len; | 216 | log_buf_len = new_log_buf_len; |
217 | log_buf = new_log_buf; | 217 | log_buf = new_log_buf; |
218 | new_log_buf_len = 0; | 218 | new_log_buf_len = 0; |
@@ -230,7 +230,7 @@ void __init setup_log_buf(int early) | |||
230 | log_start -= offset; | 230 | log_start -= offset; |
231 | con_start -= offset; | 231 | con_start -= offset; |
232 | log_end -= offset; | 232 | log_end -= offset; |
233 | spin_unlock_irqrestore(&logbuf_lock, flags); | 233 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); |
234 | 234 | ||
235 | pr_info("log_buf_len: %d\n", log_buf_len); | 235 | pr_info("log_buf_len: %d\n", log_buf_len); |
236 | pr_info("early log buf free: %d(%d%%)\n", | 236 | pr_info("early log buf free: %d(%d%%)\n", |
@@ -318,8 +318,10 @@ static int check_syslog_permissions(int type, bool from_file) | |||
318 | return 0; | 318 | return 0; |
319 | /* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */ | 319 | /* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */ |
320 | if (capable(CAP_SYS_ADMIN)) { | 320 | if (capable(CAP_SYS_ADMIN)) { |
321 | WARN_ONCE(1, "Attempt to access syslog with CAP_SYS_ADMIN " | 321 | printk_once(KERN_WARNING "%s (%d): " |
322 | "but no CAP_SYSLOG (deprecated).\n"); | 322 | "Attempt to access syslog with CAP_SYS_ADMIN " |
323 | "but no CAP_SYSLOG (deprecated).\n", | ||
324 | current->comm, task_pid_nr(current)); | ||
323 | return 0; | 325 | return 0; |
324 | } | 326 | } |
325 | return -EPERM; | 327 | return -EPERM; |
@@ -363,18 +365,18 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) | |||
363 | if (error) | 365 | if (error) |
364 | goto out; | 366 | goto out; |
365 | i = 0; | 367 | i = 0; |
366 | spin_lock_irq(&logbuf_lock); | 368 | raw_spin_lock_irq(&logbuf_lock); |
367 | while (!error && (log_start != log_end) && i < len) { | 369 | while (!error && (log_start != log_end) && i < len) { |
368 | c = LOG_BUF(log_start); | 370 | c = LOG_BUF(log_start); |
369 | log_start++; | 371 | log_start++; |
370 | spin_unlock_irq(&logbuf_lock); | 372 | raw_spin_unlock_irq(&logbuf_lock); |
371 | error = __put_user(c,buf); | 373 | error = __put_user(c,buf); |
372 | buf++; | 374 | buf++; |
373 | i++; | 375 | i++; |
374 | cond_resched(); | 376 | cond_resched(); |
375 | spin_lock_irq(&logbuf_lock); | 377 | raw_spin_lock_irq(&logbuf_lock); |
376 | } | 378 | } |
377 | spin_unlock_irq(&logbuf_lock); | 379 | raw_spin_unlock_irq(&logbuf_lock); |
378 | if (!error) | 380 | if (!error) |
379 | error = i; | 381 | error = i; |
380 | break; | 382 | break; |
@@ -397,7 +399,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) | |||
397 | count = len; | 399 | count = len; |
398 | if (count > log_buf_len) | 400 | if (count > log_buf_len) |
399 | count = log_buf_len; | 401 | count = log_buf_len; |
400 | spin_lock_irq(&logbuf_lock); | 402 | raw_spin_lock_irq(&logbuf_lock); |
401 | if (count > logged_chars) | 403 | if (count > logged_chars) |
402 | count = logged_chars; | 404 | count = logged_chars; |
403 | if (do_clear) | 405 | if (do_clear) |
@@ -414,12 +416,12 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) | |||
414 | if (j + log_buf_len < log_end) | 416 | if (j + log_buf_len < log_end) |
415 | break; | 417 | break; |
416 | c = LOG_BUF(j); | 418 | c = LOG_BUF(j); |
417 | spin_unlock_irq(&logbuf_lock); | 419 | raw_spin_unlock_irq(&logbuf_lock); |
418 | error = __put_user(c,&buf[count-1-i]); | 420 | error = __put_user(c,&buf[count-1-i]); |
419 | cond_resched(); | 421 | cond_resched(); |
420 | spin_lock_irq(&logbuf_lock); | 422 | raw_spin_lock_irq(&logbuf_lock); |
421 | } | 423 | } |
422 | spin_unlock_irq(&logbuf_lock); | 424 | raw_spin_unlock_irq(&logbuf_lock); |
423 | if (error) | 425 | if (error) |
424 | break; | 426 | break; |
425 | error = i; | 427 | error = i; |
@@ -530,6 +532,9 @@ static int __init ignore_loglevel_setup(char *str) | |||
530 | } | 532 | } |
531 | 533 | ||
532 | early_param("ignore_loglevel", ignore_loglevel_setup); | 534 | early_param("ignore_loglevel", ignore_loglevel_setup); |
535 | module_param_named(ignore_loglevel, ignore_loglevel, bool, S_IRUGO | S_IWUSR); | ||
536 | MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting, to" | ||
537 | "print all kernel messages to the console."); | ||
533 | 538 | ||
534 | /* | 539 | /* |
535 | * Write out chars from start to end - 1 inclusive | 540 | * Write out chars from start to end - 1 inclusive |
@@ -590,9 +595,6 @@ static size_t log_prefix(const char *p, unsigned int *level, char *special) | |||
590 | /* multi digit including the level and facility number */ | 595 | /* multi digit including the level and facility number */ |
591 | char *endp = NULL; | 596 | char *endp = NULL; |
592 | 597 | ||
593 | if (p[1] < '0' && p[1] > '9') | ||
594 | return 0; | ||
595 | |||
596 | lev = (simple_strtoul(&p[1], &endp, 10) & 7); | 598 | lev = (simple_strtoul(&p[1], &endp, 10) & 7); |
597 | if (endp == NULL || endp[0] != '>') | 599 | if (endp == NULL || endp[0] != '>') |
598 | return 0; | 600 | return 0; |
@@ -687,7 +689,7 @@ static void zap_locks(void) | |||
687 | oops_timestamp = jiffies; | 689 | oops_timestamp = jiffies; |
688 | 690 | ||
689 | /* If a crash is occurring, make sure we can't deadlock */ | 691 | /* If a crash is occurring, make sure we can't deadlock */ |
690 | spin_lock_init(&logbuf_lock); | 692 | raw_spin_lock_init(&logbuf_lock); |
691 | /* And make sure that we print immediately */ | 693 | /* And make sure that we print immediately */ |
692 | sema_init(&console_sem, 1); | 694 | sema_init(&console_sem, 1); |
693 | } | 695 | } |
@@ -800,9 +802,9 @@ static int console_trylock_for_printk(unsigned int cpu) | |||
800 | } | 802 | } |
801 | } | 803 | } |
802 | printk_cpu = UINT_MAX; | 804 | printk_cpu = UINT_MAX; |
803 | spin_unlock(&logbuf_lock); | ||
804 | if (wake) | 805 | if (wake) |
805 | up(&console_sem); | 806 | up(&console_sem); |
807 | raw_spin_unlock(&logbuf_lock); | ||
806 | return retval; | 808 | return retval; |
807 | } | 809 | } |
808 | static const char recursion_bug_msg [] = | 810 | static const char recursion_bug_msg [] = |
@@ -862,7 +864,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) | |||
862 | } | 864 | } |
863 | 865 | ||
864 | lockdep_off(); | 866 | lockdep_off(); |
865 | spin_lock(&logbuf_lock); | 867 | raw_spin_lock(&logbuf_lock); |
866 | printk_cpu = this_cpu; | 868 | printk_cpu = this_cpu; |
867 | 869 | ||
868 | if (recursion_bug) { | 870 | if (recursion_bug) { |
@@ -1106,6 +1108,10 @@ static int __init console_suspend_disable(char *str) | |||
1106 | return 1; | 1108 | return 1; |
1107 | } | 1109 | } |
1108 | __setup("no_console_suspend", console_suspend_disable); | 1110 | __setup("no_console_suspend", console_suspend_disable); |
1111 | module_param_named(console_suspend, console_suspend_enabled, | ||
1112 | bool, S_IRUGO | S_IWUSR); | ||
1113 | MODULE_PARM_DESC(console_suspend, "suspend console during suspend" | ||
1114 | " and hibernate operations"); | ||
1109 | 1115 | ||
1110 | /** | 1116 | /** |
1111 | * suspend_console - suspend the console subsystem | 1117 | * suspend_console - suspend the console subsystem |
@@ -1255,14 +1261,14 @@ void console_unlock(void) | |||
1255 | 1261 | ||
1256 | again: | 1262 | again: |
1257 | for ( ; ; ) { | 1263 | for ( ; ; ) { |
1258 | spin_lock_irqsave(&logbuf_lock, flags); | 1264 | raw_spin_lock_irqsave(&logbuf_lock, flags); |
1259 | wake_klogd |= log_start - log_end; | 1265 | wake_klogd |= log_start - log_end; |
1260 | if (con_start == log_end) | 1266 | if (con_start == log_end) |
1261 | break; /* Nothing to print */ | 1267 | break; /* Nothing to print */ |
1262 | _con_start = con_start; | 1268 | _con_start = con_start; |
1263 | _log_end = log_end; | 1269 | _log_end = log_end; |
1264 | con_start = log_end; /* Flush */ | 1270 | con_start = log_end; /* Flush */ |
1265 | spin_unlock(&logbuf_lock); | 1271 | raw_spin_unlock(&logbuf_lock); |
1266 | stop_critical_timings(); /* don't trace print latency */ | 1272 | stop_critical_timings(); /* don't trace print latency */ |
1267 | call_console_drivers(_con_start, _log_end); | 1273 | call_console_drivers(_con_start, _log_end); |
1268 | start_critical_timings(); | 1274 | start_critical_timings(); |
@@ -1274,7 +1280,7 @@ again: | |||
1274 | if (unlikely(exclusive_console)) | 1280 | if (unlikely(exclusive_console)) |
1275 | exclusive_console = NULL; | 1281 | exclusive_console = NULL; |
1276 | 1282 | ||
1277 | spin_unlock(&logbuf_lock); | 1283 | raw_spin_unlock(&logbuf_lock); |
1278 | 1284 | ||
1279 | up(&console_sem); | 1285 | up(&console_sem); |
1280 | 1286 | ||
@@ -1284,13 +1290,13 @@ again: | |||
1284 | * there's a new owner and the console_unlock() from them will do the | 1290 | * there's a new owner and the console_unlock() from them will do the |
1285 | * flush, no worries. | 1291 | * flush, no worries. |
1286 | */ | 1292 | */ |
1287 | spin_lock(&logbuf_lock); | 1293 | raw_spin_lock(&logbuf_lock); |
1288 | if (con_start != log_end) | 1294 | if (con_start != log_end) |
1289 | retry = 1; | 1295 | retry = 1; |
1290 | spin_unlock_irqrestore(&logbuf_lock, flags); | ||
1291 | if (retry && console_trylock()) | 1296 | if (retry && console_trylock()) |
1292 | goto again; | 1297 | goto again; |
1293 | 1298 | ||
1299 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); | ||
1294 | if (wake_klogd) | 1300 | if (wake_klogd) |
1295 | wake_up_klogd(); | 1301 | wake_up_klogd(); |
1296 | } | 1302 | } |
@@ -1520,9 +1526,9 @@ void register_console(struct console *newcon) | |||
1520 | * console_unlock(); will print out the buffered messages | 1526 | * console_unlock(); will print out the buffered messages |
1521 | * for us. | 1527 | * for us. |
1522 | */ | 1528 | */ |
1523 | spin_lock_irqsave(&logbuf_lock, flags); | 1529 | raw_spin_lock_irqsave(&logbuf_lock, flags); |
1524 | con_start = log_start; | 1530 | con_start = log_start; |
1525 | spin_unlock_irqrestore(&logbuf_lock, flags); | 1531 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); |
1526 | /* | 1532 | /* |
1527 | * We're about to replay the log buffer. Only do this to the | 1533 | * We're about to replay the log buffer. Only do this to the |
1528 | * just-registered console to avoid excessive message spam to | 1534 | * just-registered console to avoid excessive message spam to |
@@ -1602,7 +1608,7 @@ static int __init printk_late_init(void) | |||
1602 | struct console *con; | 1608 | struct console *con; |
1603 | 1609 | ||
1604 | for_each_console(con) { | 1610 | for_each_console(con) { |
1605 | if (con->flags & CON_BOOT) { | 1611 | if (!keep_bootcon && con->flags & CON_BOOT) { |
1606 | printk(KERN_INFO "turn off boot console %s%d\n", | 1612 | printk(KERN_INFO "turn off boot console %s%d\n", |
1607 | con->name, con->index); | 1613 | con->name, con->index); |
1608 | unregister_console(con); | 1614 | unregister_console(con); |
@@ -1729,10 +1735,10 @@ void kmsg_dump(enum kmsg_dump_reason reason) | |||
1729 | /* Theoretically, the log could move on after we do this, but | 1735 | /* Theoretically, the log could move on after we do this, but |
1730 | there's not a lot we can do about that. The new messages | 1736 | there's not a lot we can do about that. The new messages |
1731 | will overwrite the start of what we dump. */ | 1737 | will overwrite the start of what we dump. */ |
1732 | spin_lock_irqsave(&logbuf_lock, flags); | 1738 | raw_spin_lock_irqsave(&logbuf_lock, flags); |
1733 | end = log_end & LOG_BUF_MASK; | 1739 | end = log_end & LOG_BUF_MASK; |
1734 | chars = logged_chars; | 1740 | chars = logged_chars; |
1735 | spin_unlock_irqrestore(&logbuf_lock, flags); | 1741 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); |
1736 | 1742 | ||
1737 | if (chars > end) { | 1743 | if (chars > end) { |
1738 | s1 = log_buf + log_buf_len - chars + end; | 1744 | s1 = log_buf + log_buf_len - chars + end; |
diff --git a/kernel/profile.c b/kernel/profile.c index 961b389fe52..76b8e77773e 100644 --- a/kernel/profile.c +++ b/kernel/profile.c | |||
@@ -13,7 +13,7 @@ | |||
13 | * to resolve timer interrupt livelocks, William Irwin, Oracle, 2004 | 13 | * to resolve timer interrupt livelocks, William Irwin, Oracle, 2004 |
14 | */ | 14 | */ |
15 | 15 | ||
16 | #include <linux/module.h> | 16 | #include <linux/export.h> |
17 | #include <linux/profile.h> | 17 | #include <linux/profile.h> |
18 | #include <linux/bootmem.h> | 18 | #include <linux/bootmem.h> |
19 | #include <linux/notifier.h> | 19 | #include <linux/notifier.h> |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 9de3ecfd20f..24d04477b25 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -8,7 +8,7 @@ | |||
8 | */ | 8 | */ |
9 | 9 | ||
10 | #include <linux/capability.h> | 10 | #include <linux/capability.h> |
11 | #include <linux/module.h> | 11 | #include <linux/export.h> |
12 | #include <linux/sched.h> | 12 | #include <linux/sched.h> |
13 | #include <linux/errno.h> | 13 | #include <linux/errno.h> |
14 | #include <linux/mm.h> | 14 | #include <linux/mm.h> |
@@ -744,20 +744,17 @@ int ptrace_request(struct task_struct *child, long request, | |||
744 | break; | 744 | break; |
745 | 745 | ||
746 | si = child->last_siginfo; | 746 | si = child->last_siginfo; |
747 | if (unlikely(!si || si->si_code >> 8 != PTRACE_EVENT_STOP)) | 747 | if (likely(si && (si->si_code >> 8) == PTRACE_EVENT_STOP)) { |
748 | break; | 748 | child->jobctl |= JOBCTL_LISTENING; |
749 | 749 | /* | |
750 | child->jobctl |= JOBCTL_LISTENING; | 750 | * If NOTIFY is set, it means event happened between |
751 | 751 | * start of this trap and now. Trigger re-trap. | |
752 | /* | 752 | */ |
753 | * If NOTIFY is set, it means event happened between start | 753 | if (child->jobctl & JOBCTL_TRAP_NOTIFY) |
754 | * of this trap and now. Trigger re-trap immediately. | 754 | signal_wake_up(child, true); |
755 | */ | 755 | ret = 0; |
756 | if (child->jobctl & JOBCTL_TRAP_NOTIFY) | 756 | } |
757 | signal_wake_up(child, true); | ||
758 | |||
759 | unlock_task_sighand(child, &flags); | 757 | unlock_task_sighand(child, &flags); |
760 | ret = 0; | ||
761 | break; | 758 | break; |
762 | 759 | ||
763 | case PTRACE_DETACH: /* detach a process that was attached. */ | 760 | case PTRACE_DETACH: /* detach a process that was attached. */ |
diff --git a/kernel/range.c b/kernel/range.c index 37fa9b99ad5..9b8ae2d6ed6 100644 --- a/kernel/range.c +++ b/kernel/range.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Range add and subtract | 2 | * Range add and subtract |
3 | */ | 3 | */ |
4 | #include <linux/module.h> | 4 | #include <linux/kernel.h> |
5 | #include <linux/init.h> | 5 | #include <linux/init.h> |
6 | #include <linux/sort.h> | 6 | #include <linux/sort.h> |
7 | 7 | ||
diff --git a/kernel/rcu.h b/kernel/rcu.h new file mode 100644 index 00000000000..f600868d550 --- /dev/null +++ b/kernel/rcu.h | |||
@@ -0,0 +1,85 @@ | |||
1 | /* | ||
2 | * Read-Copy Update definitions shared among RCU implementations. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright IBM Corporation, 2011 | ||
19 | * | ||
20 | * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> | ||
21 | */ | ||
22 | |||
23 | #ifndef __LINUX_RCU_H | ||
24 | #define __LINUX_RCU_H | ||
25 | |||
26 | #ifdef CONFIG_RCU_TRACE | ||
27 | #define RCU_TRACE(stmt) stmt | ||
28 | #else /* #ifdef CONFIG_RCU_TRACE */ | ||
29 | #define RCU_TRACE(stmt) | ||
30 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ | ||
31 | |||
32 | /* | ||
33 | * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally | ||
34 | * by call_rcu() and rcu callback execution, and are therefore not part of the | ||
35 | * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors. | ||
36 | */ | ||
37 | |||
38 | #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD | ||
39 | # define STATE_RCU_HEAD_READY 0 | ||
40 | # define STATE_RCU_HEAD_QUEUED 1 | ||
41 | |||
42 | extern struct debug_obj_descr rcuhead_debug_descr; | ||
43 | |||
44 | static inline void debug_rcu_head_queue(struct rcu_head *head) | ||
45 | { | ||
46 | WARN_ON_ONCE((unsigned long)head & 0x3); | ||
47 | debug_object_activate(head, &rcuhead_debug_descr); | ||
48 | debug_object_active_state(head, &rcuhead_debug_descr, | ||
49 | STATE_RCU_HEAD_READY, | ||
50 | STATE_RCU_HEAD_QUEUED); | ||
51 | } | ||
52 | |||
53 | static inline void debug_rcu_head_unqueue(struct rcu_head *head) | ||
54 | { | ||
55 | debug_object_active_state(head, &rcuhead_debug_descr, | ||
56 | STATE_RCU_HEAD_QUEUED, | ||
57 | STATE_RCU_HEAD_READY); | ||
58 | debug_object_deactivate(head, &rcuhead_debug_descr); | ||
59 | } | ||
60 | #else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ | ||
61 | static inline void debug_rcu_head_queue(struct rcu_head *head) | ||
62 | { | ||
63 | } | ||
64 | |||
65 | static inline void debug_rcu_head_unqueue(struct rcu_head *head) | ||
66 | { | ||
67 | } | ||
68 | #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ | ||
69 | |||
70 | extern void kfree(const void *); | ||
71 | |||
72 | static inline void __rcu_reclaim(char *rn, struct rcu_head *head) | ||
73 | { | ||
74 | unsigned long offset = (unsigned long)head->func; | ||
75 | |||
76 | if (__is_kfree_rcu_offset(offset)) { | ||
77 | RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset)); | ||
78 | kfree((void *)head - offset); | ||
79 | } else { | ||
80 | RCU_TRACE(trace_rcu_invoke_callback(rn, head)); | ||
81 | head->func(head); | ||
82 | } | ||
83 | } | ||
84 | |||
85 | #endif /* __LINUX_RCU_H */ | ||
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index ddddb320be6..c5b98e565ae 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
@@ -43,9 +43,14 @@ | |||
43 | #include <linux/notifier.h> | 43 | #include <linux/notifier.h> |
44 | #include <linux/cpu.h> | 44 | #include <linux/cpu.h> |
45 | #include <linux/mutex.h> | 45 | #include <linux/mutex.h> |
46 | #include <linux/module.h> | 46 | #include <linux/export.h> |
47 | #include <linux/hardirq.h> | 47 | #include <linux/hardirq.h> |
48 | 48 | ||
49 | #define CREATE_TRACE_POINTS | ||
50 | #include <trace/events/rcu.h> | ||
51 | |||
52 | #include "rcu.h" | ||
53 | |||
49 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 54 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
50 | static struct lock_class_key rcu_lock_key; | 55 | static struct lock_class_key rcu_lock_key; |
51 | struct lockdep_map rcu_lock_map = | 56 | struct lockdep_map rcu_lock_map = |
@@ -94,11 +99,16 @@ EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); | |||
94 | 99 | ||
95 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 100 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
96 | 101 | ||
102 | struct rcu_synchronize { | ||
103 | struct rcu_head head; | ||
104 | struct completion completion; | ||
105 | }; | ||
106 | |||
97 | /* | 107 | /* |
98 | * Awaken the corresponding synchronize_rcu() instance now that a | 108 | * Awaken the corresponding synchronize_rcu() instance now that a |
99 | * grace period has elapsed. | 109 | * grace period has elapsed. |
100 | */ | 110 | */ |
101 | void wakeme_after_rcu(struct rcu_head *head) | 111 | static void wakeme_after_rcu(struct rcu_head *head) |
102 | { | 112 | { |
103 | struct rcu_synchronize *rcu; | 113 | struct rcu_synchronize *rcu; |
104 | 114 | ||
@@ -106,6 +116,20 @@ void wakeme_after_rcu(struct rcu_head *head) | |||
106 | complete(&rcu->completion); | 116 | complete(&rcu->completion); |
107 | } | 117 | } |
108 | 118 | ||
119 | void wait_rcu_gp(call_rcu_func_t crf) | ||
120 | { | ||
121 | struct rcu_synchronize rcu; | ||
122 | |||
123 | init_rcu_head_on_stack(&rcu.head); | ||
124 | init_completion(&rcu.completion); | ||
125 | /* Will wake me after RCU finished. */ | ||
126 | crf(&rcu.head, wakeme_after_rcu); | ||
127 | /* Wait for it. */ | ||
128 | wait_for_completion(&rcu.completion); | ||
129 | destroy_rcu_head_on_stack(&rcu.head); | ||
130 | } | ||
131 | EXPORT_SYMBOL_GPL(wait_rcu_gp); | ||
132 | |||
109 | #ifdef CONFIG_PROVE_RCU | 133 | #ifdef CONFIG_PROVE_RCU |
110 | /* | 134 | /* |
111 | * wrapper function to avoid #include problems. | 135 | * wrapper function to avoid #include problems. |
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 7bbac7d0f5a..636af6d9c6e 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c | |||
@@ -22,13 +22,12 @@ | |||
22 | * For detailed explanation of Read-Copy Update mechanism see - | 22 | * For detailed explanation of Read-Copy Update mechanism see - |
23 | * Documentation/RCU | 23 | * Documentation/RCU |
24 | */ | 24 | */ |
25 | #include <linux/moduleparam.h> | ||
26 | #include <linux/completion.h> | 25 | #include <linux/completion.h> |
27 | #include <linux/interrupt.h> | 26 | #include <linux/interrupt.h> |
28 | #include <linux/notifier.h> | 27 | #include <linux/notifier.h> |
29 | #include <linux/rcupdate.h> | 28 | #include <linux/rcupdate.h> |
30 | #include <linux/kernel.h> | 29 | #include <linux/kernel.h> |
31 | #include <linux/module.h> | 30 | #include <linux/export.h> |
32 | #include <linux/mutex.h> | 31 | #include <linux/mutex.h> |
33 | #include <linux/sched.h> | 32 | #include <linux/sched.h> |
34 | #include <linux/types.h> | 33 | #include <linux/types.h> |
@@ -37,16 +36,17 @@ | |||
37 | #include <linux/cpu.h> | 36 | #include <linux/cpu.h> |
38 | #include <linux/prefetch.h> | 37 | #include <linux/prefetch.h> |
39 | 38 | ||
40 | /* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */ | 39 | #ifdef CONFIG_RCU_TRACE |
41 | static struct task_struct *rcu_kthread_task; | 40 | #include <trace/events/rcu.h> |
42 | static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq); | 41 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ |
43 | static unsigned long have_rcu_kthread_work; | 42 | |
43 | #include "rcu.h" | ||
44 | 44 | ||
45 | /* Forward declarations for rcutiny_plugin.h. */ | 45 | /* Forward declarations for rcutiny_plugin.h. */ |
46 | struct rcu_ctrlblk; | 46 | struct rcu_ctrlblk; |
47 | static void invoke_rcu_kthread(void); | 47 | static void invoke_rcu_callbacks(void); |
48 | static void rcu_process_callbacks(struct rcu_ctrlblk *rcp); | 48 | static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp); |
49 | static int rcu_kthread(void *arg); | 49 | static void rcu_process_callbacks(struct softirq_action *unused); |
50 | static void __call_rcu(struct rcu_head *head, | 50 | static void __call_rcu(struct rcu_head *head, |
51 | void (*func)(struct rcu_head *rcu), | 51 | void (*func)(struct rcu_head *rcu), |
52 | struct rcu_ctrlblk *rcp); | 52 | struct rcu_ctrlblk *rcp); |
@@ -96,16 +96,6 @@ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) | |||
96 | } | 96 | } |
97 | 97 | ||
98 | /* | 98 | /* |
99 | * Wake up rcu_kthread() to process callbacks now eligible for invocation | ||
100 | * or to boost readers. | ||
101 | */ | ||
102 | static void invoke_rcu_kthread(void) | ||
103 | { | ||
104 | have_rcu_kthread_work = 1; | ||
105 | wake_up(&rcu_kthread_wq); | ||
106 | } | ||
107 | |||
108 | /* | ||
109 | * Record an rcu quiescent state. And an rcu_bh quiescent state while we | 99 | * Record an rcu quiescent state. And an rcu_bh quiescent state while we |
110 | * are at it, given that any rcu quiescent state is also an rcu_bh | 100 | * are at it, given that any rcu quiescent state is also an rcu_bh |
111 | * quiescent state. Use "+" instead of "||" to defeat short circuiting. | 101 | * quiescent state. Use "+" instead of "||" to defeat short circuiting. |
@@ -117,7 +107,7 @@ void rcu_sched_qs(int cpu) | |||
117 | local_irq_save(flags); | 107 | local_irq_save(flags); |
118 | if (rcu_qsctr_help(&rcu_sched_ctrlblk) + | 108 | if (rcu_qsctr_help(&rcu_sched_ctrlblk) + |
119 | rcu_qsctr_help(&rcu_bh_ctrlblk)) | 109 | rcu_qsctr_help(&rcu_bh_ctrlblk)) |
120 | invoke_rcu_kthread(); | 110 | invoke_rcu_callbacks(); |
121 | local_irq_restore(flags); | 111 | local_irq_restore(flags); |
122 | } | 112 | } |
123 | 113 | ||
@@ -130,7 +120,7 @@ void rcu_bh_qs(int cpu) | |||
130 | 120 | ||
131 | local_irq_save(flags); | 121 | local_irq_save(flags); |
132 | if (rcu_qsctr_help(&rcu_bh_ctrlblk)) | 122 | if (rcu_qsctr_help(&rcu_bh_ctrlblk)) |
133 | invoke_rcu_kthread(); | 123 | invoke_rcu_callbacks(); |
134 | local_irq_restore(flags); | 124 | local_irq_restore(flags); |
135 | } | 125 | } |
136 | 126 | ||
@@ -154,18 +144,23 @@ void rcu_check_callbacks(int cpu, int user) | |||
154 | * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure | 144 | * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure |
155 | * whose grace period has elapsed. | 145 | * whose grace period has elapsed. |
156 | */ | 146 | */ |
157 | static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) | 147 | static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) |
158 | { | 148 | { |
149 | char *rn = NULL; | ||
159 | struct rcu_head *next, *list; | 150 | struct rcu_head *next, *list; |
160 | unsigned long flags; | 151 | unsigned long flags; |
161 | RCU_TRACE(int cb_count = 0); | 152 | RCU_TRACE(int cb_count = 0); |
162 | 153 | ||
163 | /* If no RCU callbacks ready to invoke, just return. */ | 154 | /* If no RCU callbacks ready to invoke, just return. */ |
164 | if (&rcp->rcucblist == rcp->donetail) | 155 | if (&rcp->rcucblist == rcp->donetail) { |
156 | RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1)); | ||
157 | RCU_TRACE(trace_rcu_batch_end(rcp->name, 0)); | ||
165 | return; | 158 | return; |
159 | } | ||
166 | 160 | ||
167 | /* Move the ready-to-invoke callbacks to a local list. */ | 161 | /* Move the ready-to-invoke callbacks to a local list. */ |
168 | local_irq_save(flags); | 162 | local_irq_save(flags); |
163 | RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1)); | ||
169 | list = rcp->rcucblist; | 164 | list = rcp->rcucblist; |
170 | rcp->rcucblist = *rcp->donetail; | 165 | rcp->rcucblist = *rcp->donetail; |
171 | *rcp->donetail = NULL; | 166 | *rcp->donetail = NULL; |
@@ -176,49 +171,26 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) | |||
176 | local_irq_restore(flags); | 171 | local_irq_restore(flags); |
177 | 172 | ||
178 | /* Invoke the callbacks on the local list. */ | 173 | /* Invoke the callbacks on the local list. */ |
174 | RCU_TRACE(rn = rcp->name); | ||
179 | while (list) { | 175 | while (list) { |
180 | next = list->next; | 176 | next = list->next; |
181 | prefetch(next); | 177 | prefetch(next); |
182 | debug_rcu_head_unqueue(list); | 178 | debug_rcu_head_unqueue(list); |
183 | local_bh_disable(); | 179 | local_bh_disable(); |
184 | __rcu_reclaim(list); | 180 | __rcu_reclaim(rn, list); |
185 | local_bh_enable(); | 181 | local_bh_enable(); |
186 | list = next; | 182 | list = next; |
187 | RCU_TRACE(cb_count++); | 183 | RCU_TRACE(cb_count++); |
188 | } | 184 | } |
189 | RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); | 185 | RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); |
186 | RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count)); | ||
190 | } | 187 | } |
191 | 188 | ||
192 | /* | 189 | static void rcu_process_callbacks(struct softirq_action *unused) |
193 | * This kthread invokes RCU callbacks whose grace periods have | ||
194 | * elapsed. It is awakened as needed, and takes the place of the | ||
195 | * RCU_SOFTIRQ that was used previously for this purpose. | ||
196 | * This is a kthread, but it is never stopped, at least not until | ||
197 | * the system goes down. | ||
198 | */ | ||
199 | static int rcu_kthread(void *arg) | ||
200 | { | 190 | { |
201 | unsigned long work; | 191 | __rcu_process_callbacks(&rcu_sched_ctrlblk); |
202 | unsigned long morework; | 192 | __rcu_process_callbacks(&rcu_bh_ctrlblk); |
203 | unsigned long flags; | 193 | rcu_preempt_process_callbacks(); |
204 | |||
205 | for (;;) { | ||
206 | wait_event_interruptible(rcu_kthread_wq, | ||
207 | have_rcu_kthread_work != 0); | ||
208 | morework = rcu_boost(); | ||
209 | local_irq_save(flags); | ||
210 | work = have_rcu_kthread_work; | ||
211 | have_rcu_kthread_work = morework; | ||
212 | local_irq_restore(flags); | ||
213 | if (work) { | ||
214 | rcu_process_callbacks(&rcu_sched_ctrlblk); | ||
215 | rcu_process_callbacks(&rcu_bh_ctrlblk); | ||
216 | rcu_preempt_process_callbacks(); | ||
217 | } | ||
218 | schedule_timeout_interruptible(1); /* Leave CPU for others. */ | ||
219 | } | ||
220 | |||
221 | return 0; /* Not reached, but needed to shut gcc up. */ | ||
222 | } | 194 | } |
223 | 195 | ||
224 | /* | 196 | /* |
@@ -280,45 +252,3 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | |||
280 | __call_rcu(head, func, &rcu_bh_ctrlblk); | 252 | __call_rcu(head, func, &rcu_bh_ctrlblk); |
281 | } | 253 | } |
282 | EXPORT_SYMBOL_GPL(call_rcu_bh); | 254 | EXPORT_SYMBOL_GPL(call_rcu_bh); |
283 | |||
284 | void rcu_barrier_bh(void) | ||
285 | { | ||
286 | struct rcu_synchronize rcu; | ||
287 | |||
288 | init_rcu_head_on_stack(&rcu.head); | ||
289 | init_completion(&rcu.completion); | ||
290 | /* Will wake me after RCU finished. */ | ||
291 | call_rcu_bh(&rcu.head, wakeme_after_rcu); | ||
292 | /* Wait for it. */ | ||
293 | wait_for_completion(&rcu.completion); | ||
294 | destroy_rcu_head_on_stack(&rcu.head); | ||
295 | } | ||
296 | EXPORT_SYMBOL_GPL(rcu_barrier_bh); | ||
297 | |||
298 | void rcu_barrier_sched(void) | ||
299 | { | ||
300 | struct rcu_synchronize rcu; | ||
301 | |||
302 | init_rcu_head_on_stack(&rcu.head); | ||
303 | init_completion(&rcu.completion); | ||
304 | /* Will wake me after RCU finished. */ | ||
305 | call_rcu_sched(&rcu.head, wakeme_after_rcu); | ||
306 | /* Wait for it. */ | ||
307 | wait_for_completion(&rcu.completion); | ||
308 | destroy_rcu_head_on_stack(&rcu.head); | ||
309 | } | ||
310 | EXPORT_SYMBOL_GPL(rcu_barrier_sched); | ||
311 | |||
312 | /* | ||
313 | * Spawn the kthread that invokes RCU callbacks. | ||
314 | */ | ||
315 | static int __init rcu_spawn_kthreads(void) | ||
316 | { | ||
317 | struct sched_param sp; | ||
318 | |||
319 | rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread"); | ||
320 | sp.sched_priority = RCU_BOOST_PRIO; | ||
321 | sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp); | ||
322 | return 0; | ||
323 | } | ||
324 | early_initcall(rcu_spawn_kthreads); | ||
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index f259c676195..2b0484a5dc2 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h | |||
@@ -23,32 +23,30 @@ | |||
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/kthread.h> | 25 | #include <linux/kthread.h> |
26 | #include <linux/module.h> | ||
26 | #include <linux/debugfs.h> | 27 | #include <linux/debugfs.h> |
27 | #include <linux/seq_file.h> | 28 | #include <linux/seq_file.h> |
28 | 29 | ||
29 | #ifdef CONFIG_RCU_TRACE | ||
30 | #define RCU_TRACE(stmt) stmt | ||
31 | #else /* #ifdef CONFIG_RCU_TRACE */ | ||
32 | #define RCU_TRACE(stmt) | ||
33 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ | ||
34 | |||
35 | /* Global control variables for rcupdate callback mechanism. */ | 30 | /* Global control variables for rcupdate callback mechanism. */ |
36 | struct rcu_ctrlblk { | 31 | struct rcu_ctrlblk { |
37 | struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ | 32 | struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ |
38 | struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ | 33 | struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ |
39 | struct rcu_head **curtail; /* ->next pointer of last CB. */ | 34 | struct rcu_head **curtail; /* ->next pointer of last CB. */ |
40 | RCU_TRACE(long qlen); /* Number of pending CBs. */ | 35 | RCU_TRACE(long qlen); /* Number of pending CBs. */ |
36 | RCU_TRACE(char *name); /* Name of RCU type. */ | ||
41 | }; | 37 | }; |
42 | 38 | ||
43 | /* Definition for rcupdate control block. */ | 39 | /* Definition for rcupdate control block. */ |
44 | static struct rcu_ctrlblk rcu_sched_ctrlblk = { | 40 | static struct rcu_ctrlblk rcu_sched_ctrlblk = { |
45 | .donetail = &rcu_sched_ctrlblk.rcucblist, | 41 | .donetail = &rcu_sched_ctrlblk.rcucblist, |
46 | .curtail = &rcu_sched_ctrlblk.rcucblist, | 42 | .curtail = &rcu_sched_ctrlblk.rcucblist, |
43 | RCU_TRACE(.name = "rcu_sched") | ||
47 | }; | 44 | }; |
48 | 45 | ||
49 | static struct rcu_ctrlblk rcu_bh_ctrlblk = { | 46 | static struct rcu_ctrlblk rcu_bh_ctrlblk = { |
50 | .donetail = &rcu_bh_ctrlblk.rcucblist, | 47 | .donetail = &rcu_bh_ctrlblk.rcucblist, |
51 | .curtail = &rcu_bh_ctrlblk.rcucblist, | 48 | .curtail = &rcu_bh_ctrlblk.rcucblist, |
49 | RCU_TRACE(.name = "rcu_bh") | ||
52 | }; | 50 | }; |
53 | 51 | ||
54 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 52 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
@@ -131,6 +129,7 @@ static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { | |||
131 | .rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist, | 129 | .rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist, |
132 | .nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist, | 130 | .nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist, |
133 | .blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks), | 131 | .blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks), |
132 | RCU_TRACE(.rcb.name = "rcu_preempt") | ||
134 | }; | 133 | }; |
135 | 134 | ||
136 | static int rcu_preempted_readers_exp(void); | 135 | static int rcu_preempted_readers_exp(void); |
@@ -247,6 +246,13 @@ static void show_tiny_preempt_stats(struct seq_file *m) | |||
247 | 246 | ||
248 | #include "rtmutex_common.h" | 247 | #include "rtmutex_common.h" |
249 | 248 | ||
249 | #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO | ||
250 | |||
251 | /* Controls for rcu_kthread() kthread. */ | ||
252 | static struct task_struct *rcu_kthread_task; | ||
253 | static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq); | ||
254 | static unsigned long have_rcu_kthread_work; | ||
255 | |||
250 | /* | 256 | /* |
251 | * Carry out RCU priority boosting on the task indicated by ->boost_tasks, | 257 | * Carry out RCU priority boosting on the task indicated by ->boost_tasks, |
252 | * and advance ->boost_tasks to the next task in the ->blkd_tasks list. | 258 | * and advance ->boost_tasks to the next task in the ->blkd_tasks list. |
@@ -334,7 +340,7 @@ static int rcu_initiate_boost(void) | |||
334 | if (rcu_preempt_ctrlblk.exp_tasks == NULL) | 340 | if (rcu_preempt_ctrlblk.exp_tasks == NULL) |
335 | rcu_preempt_ctrlblk.boost_tasks = | 341 | rcu_preempt_ctrlblk.boost_tasks = |
336 | rcu_preempt_ctrlblk.gp_tasks; | 342 | rcu_preempt_ctrlblk.gp_tasks; |
337 | invoke_rcu_kthread(); | 343 | invoke_rcu_callbacks(); |
338 | } else | 344 | } else |
339 | RCU_TRACE(rcu_initiate_boost_trace()); | 345 | RCU_TRACE(rcu_initiate_boost_trace()); |
340 | return 1; | 346 | return 1; |
@@ -353,14 +359,6 @@ static void rcu_preempt_boost_start_gp(void) | |||
353 | #else /* #ifdef CONFIG_RCU_BOOST */ | 359 | #else /* #ifdef CONFIG_RCU_BOOST */ |
354 | 360 | ||
355 | /* | 361 | /* |
356 | * If there is no RCU priority boosting, we don't boost. | ||
357 | */ | ||
358 | static int rcu_boost(void) | ||
359 | { | ||
360 | return 0; | ||
361 | } | ||
362 | |||
363 | /* | ||
364 | * If there is no RCU priority boosting, we don't initiate boosting, | 362 | * If there is no RCU priority boosting, we don't initiate boosting, |
365 | * but we do indicate whether there are blocked readers blocking the | 363 | * but we do indicate whether there are blocked readers blocking the |
366 | * current grace period. | 364 | * current grace period. |
@@ -427,7 +425,7 @@ static void rcu_preempt_cpu_qs(void) | |||
427 | 425 | ||
428 | /* If there are done callbacks, cause them to be invoked. */ | 426 | /* If there are done callbacks, cause them to be invoked. */ |
429 | if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) | 427 | if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) |
430 | invoke_rcu_kthread(); | 428 | invoke_rcu_callbacks(); |
431 | } | 429 | } |
432 | 430 | ||
433 | /* | 431 | /* |
@@ -648,7 +646,7 @@ static void rcu_preempt_check_callbacks(void) | |||
648 | rcu_preempt_cpu_qs(); | 646 | rcu_preempt_cpu_qs(); |
649 | if (&rcu_preempt_ctrlblk.rcb.rcucblist != | 647 | if (&rcu_preempt_ctrlblk.rcb.rcucblist != |
650 | rcu_preempt_ctrlblk.rcb.donetail) | 648 | rcu_preempt_ctrlblk.rcb.donetail) |
651 | invoke_rcu_kthread(); | 649 | invoke_rcu_callbacks(); |
652 | if (rcu_preempt_gp_in_progress() && | 650 | if (rcu_preempt_gp_in_progress() && |
653 | rcu_cpu_blocking_cur_gp() && | 651 | rcu_cpu_blocking_cur_gp() && |
654 | rcu_preempt_running_reader()) | 652 | rcu_preempt_running_reader()) |
@@ -674,7 +672,7 @@ static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp) | |||
674 | */ | 672 | */ |
675 | static void rcu_preempt_process_callbacks(void) | 673 | static void rcu_preempt_process_callbacks(void) |
676 | { | 674 | { |
677 | rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); | 675 | __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); |
678 | } | 676 | } |
679 | 677 | ||
680 | /* | 678 | /* |
@@ -697,20 +695,6 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | |||
697 | } | 695 | } |
698 | EXPORT_SYMBOL_GPL(call_rcu); | 696 | EXPORT_SYMBOL_GPL(call_rcu); |
699 | 697 | ||
700 | void rcu_barrier(void) | ||
701 | { | ||
702 | struct rcu_synchronize rcu; | ||
703 | |||
704 | init_rcu_head_on_stack(&rcu.head); | ||
705 | init_completion(&rcu.completion); | ||
706 | /* Will wake me after RCU finished. */ | ||
707 | call_rcu(&rcu.head, wakeme_after_rcu); | ||
708 | /* Wait for it. */ | ||
709 | wait_for_completion(&rcu.completion); | ||
710 | destroy_rcu_head_on_stack(&rcu.head); | ||
711 | } | ||
712 | EXPORT_SYMBOL_GPL(rcu_barrier); | ||
713 | |||
714 | /* | 698 | /* |
715 | * synchronize_rcu - wait until a grace period has elapsed. | 699 | * synchronize_rcu - wait until a grace period has elapsed. |
716 | * | 700 | * |
@@ -864,15 +848,6 @@ static void show_tiny_preempt_stats(struct seq_file *m) | |||
864 | #endif /* #ifdef CONFIG_RCU_TRACE */ | 848 | #endif /* #ifdef CONFIG_RCU_TRACE */ |
865 | 849 | ||
866 | /* | 850 | /* |
867 | * Because preemptible RCU does not exist, it is never necessary to | ||
868 | * boost preempted RCU readers. | ||
869 | */ | ||
870 | static int rcu_boost(void) | ||
871 | { | ||
872 | return 0; | ||
873 | } | ||
874 | |||
875 | /* | ||
876 | * Because preemptible RCU does not exist, it never has any callbacks | 851 | * Because preemptible RCU does not exist, it never has any callbacks |
877 | * to check. | 852 | * to check. |
878 | */ | 853 | */ |
@@ -898,6 +873,78 @@ static void rcu_preempt_process_callbacks(void) | |||
898 | 873 | ||
899 | #endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */ | 874 | #endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */ |
900 | 875 | ||
876 | #ifdef CONFIG_RCU_BOOST | ||
877 | |||
878 | /* | ||
879 | * Wake up rcu_kthread() to process callbacks now eligible for invocation | ||
880 | * or to boost readers. | ||
881 | */ | ||
882 | static void invoke_rcu_callbacks(void) | ||
883 | { | ||
884 | have_rcu_kthread_work = 1; | ||
885 | wake_up(&rcu_kthread_wq); | ||
886 | } | ||
887 | |||
888 | /* | ||
889 | * This kthread invokes RCU callbacks whose grace periods have | ||
890 | * elapsed. It is awakened as needed, and takes the place of the | ||
891 | * RCU_SOFTIRQ that is used for this purpose when boosting is disabled. | ||
892 | * This is a kthread, but it is never stopped, at least not until | ||
893 | * the system goes down. | ||
894 | */ | ||
895 | static int rcu_kthread(void *arg) | ||
896 | { | ||
897 | unsigned long work; | ||
898 | unsigned long morework; | ||
899 | unsigned long flags; | ||
900 | |||
901 | for (;;) { | ||
902 | wait_event_interruptible(rcu_kthread_wq, | ||
903 | have_rcu_kthread_work != 0); | ||
904 | morework = rcu_boost(); | ||
905 | local_irq_save(flags); | ||
906 | work = have_rcu_kthread_work; | ||
907 | have_rcu_kthread_work = morework; | ||
908 | local_irq_restore(flags); | ||
909 | if (work) | ||
910 | rcu_process_callbacks(NULL); | ||
911 | schedule_timeout_interruptible(1); /* Leave CPU for others. */ | ||
912 | } | ||
913 | |||
914 | return 0; /* Not reached, but needed to shut gcc up. */ | ||
915 | } | ||
916 | |||
917 | /* | ||
918 | * Spawn the kthread that invokes RCU callbacks. | ||
919 | */ | ||
920 | static int __init rcu_spawn_kthreads(void) | ||
921 | { | ||
922 | struct sched_param sp; | ||
923 | |||
924 | rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread"); | ||
925 | sp.sched_priority = RCU_BOOST_PRIO; | ||
926 | sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp); | ||
927 | return 0; | ||
928 | } | ||
929 | early_initcall(rcu_spawn_kthreads); | ||
930 | |||
931 | #else /* #ifdef CONFIG_RCU_BOOST */ | ||
932 | |||
933 | /* | ||
934 | * Start up softirq processing of callbacks. | ||
935 | */ | ||
936 | void invoke_rcu_callbacks(void) | ||
937 | { | ||
938 | raise_softirq(RCU_SOFTIRQ); | ||
939 | } | ||
940 | |||
941 | void rcu_init(void) | ||
942 | { | ||
943 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | ||
944 | } | ||
945 | |||
946 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | ||
947 | |||
901 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 948 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
902 | #include <linux/kernel_stat.h> | 949 | #include <linux/kernel_stat.h> |
903 | 950 | ||
@@ -913,12 +960,6 @@ void __init rcu_scheduler_starting(void) | |||
913 | 960 | ||
914 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 961 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
915 | 962 | ||
916 | #ifdef CONFIG_RCU_BOOST | ||
917 | #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO | ||
918 | #else /* #ifdef CONFIG_RCU_BOOST */ | ||
919 | #define RCU_BOOST_PRIO 1 | ||
920 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | ||
921 | |||
922 | #ifdef CONFIG_RCU_TRACE | 963 | #ifdef CONFIG_RCU_TRACE |
923 | 964 | ||
924 | #ifdef CONFIG_RCU_BOOST | 965 | #ifdef CONFIG_RCU_BOOST |
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 98f51b13bb7..764825c2685 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
@@ -73,7 +73,7 @@ module_param(nreaders, int, 0444); | |||
73 | MODULE_PARM_DESC(nreaders, "Number of RCU reader threads"); | 73 | MODULE_PARM_DESC(nreaders, "Number of RCU reader threads"); |
74 | module_param(nfakewriters, int, 0444); | 74 | module_param(nfakewriters, int, 0444); |
75 | MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads"); | 75 | MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads"); |
76 | module_param(stat_interval, int, 0444); | 76 | module_param(stat_interval, int, 0644); |
77 | MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s"); | 77 | MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s"); |
78 | module_param(verbose, bool, 0444); | 78 | module_param(verbose, bool, 0444); |
79 | MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s"); | 79 | MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s"); |
@@ -480,30 +480,6 @@ static void rcu_bh_torture_deferred_free(struct rcu_torture *p) | |||
480 | call_rcu_bh(&p->rtort_rcu, rcu_torture_cb); | 480 | call_rcu_bh(&p->rtort_rcu, rcu_torture_cb); |
481 | } | 481 | } |
482 | 482 | ||
483 | struct rcu_bh_torture_synchronize { | ||
484 | struct rcu_head head; | ||
485 | struct completion completion; | ||
486 | }; | ||
487 | |||
488 | static void rcu_bh_torture_wakeme_after_cb(struct rcu_head *head) | ||
489 | { | ||
490 | struct rcu_bh_torture_synchronize *rcu; | ||
491 | |||
492 | rcu = container_of(head, struct rcu_bh_torture_synchronize, head); | ||
493 | complete(&rcu->completion); | ||
494 | } | ||
495 | |||
496 | static void rcu_bh_torture_synchronize(void) | ||
497 | { | ||
498 | struct rcu_bh_torture_synchronize rcu; | ||
499 | |||
500 | init_rcu_head_on_stack(&rcu.head); | ||
501 | init_completion(&rcu.completion); | ||
502 | call_rcu_bh(&rcu.head, rcu_bh_torture_wakeme_after_cb); | ||
503 | wait_for_completion(&rcu.completion); | ||
504 | destroy_rcu_head_on_stack(&rcu.head); | ||
505 | } | ||
506 | |||
507 | static struct rcu_torture_ops rcu_bh_ops = { | 483 | static struct rcu_torture_ops rcu_bh_ops = { |
508 | .init = NULL, | 484 | .init = NULL, |
509 | .cleanup = NULL, | 485 | .cleanup = NULL, |
@@ -512,7 +488,7 @@ static struct rcu_torture_ops rcu_bh_ops = { | |||
512 | .readunlock = rcu_bh_torture_read_unlock, | 488 | .readunlock = rcu_bh_torture_read_unlock, |
513 | .completed = rcu_bh_torture_completed, | 489 | .completed = rcu_bh_torture_completed, |
514 | .deferred_free = rcu_bh_torture_deferred_free, | 490 | .deferred_free = rcu_bh_torture_deferred_free, |
515 | .sync = rcu_bh_torture_synchronize, | 491 | .sync = synchronize_rcu_bh, |
516 | .cb_barrier = rcu_barrier_bh, | 492 | .cb_barrier = rcu_barrier_bh, |
517 | .fqs = rcu_bh_force_quiescent_state, | 493 | .fqs = rcu_bh_force_quiescent_state, |
518 | .stats = NULL, | 494 | .stats = NULL, |
@@ -528,7 +504,7 @@ static struct rcu_torture_ops rcu_bh_sync_ops = { | |||
528 | .readunlock = rcu_bh_torture_read_unlock, | 504 | .readunlock = rcu_bh_torture_read_unlock, |
529 | .completed = rcu_bh_torture_completed, | 505 | .completed = rcu_bh_torture_completed, |
530 | .deferred_free = rcu_sync_torture_deferred_free, | 506 | .deferred_free = rcu_sync_torture_deferred_free, |
531 | .sync = rcu_bh_torture_synchronize, | 507 | .sync = synchronize_rcu_bh, |
532 | .cb_barrier = NULL, | 508 | .cb_barrier = NULL, |
533 | .fqs = rcu_bh_force_quiescent_state, | 509 | .fqs = rcu_bh_force_quiescent_state, |
534 | .stats = NULL, | 510 | .stats = NULL, |
@@ -536,6 +512,22 @@ static struct rcu_torture_ops rcu_bh_sync_ops = { | |||
536 | .name = "rcu_bh_sync" | 512 | .name = "rcu_bh_sync" |
537 | }; | 513 | }; |
538 | 514 | ||
515 | static struct rcu_torture_ops rcu_bh_expedited_ops = { | ||
516 | .init = rcu_sync_torture_init, | ||
517 | .cleanup = NULL, | ||
518 | .readlock = rcu_bh_torture_read_lock, | ||
519 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ | ||
520 | .readunlock = rcu_bh_torture_read_unlock, | ||
521 | .completed = rcu_bh_torture_completed, | ||
522 | .deferred_free = rcu_sync_torture_deferred_free, | ||
523 | .sync = synchronize_rcu_bh_expedited, | ||
524 | .cb_barrier = NULL, | ||
525 | .fqs = rcu_bh_force_quiescent_state, | ||
526 | .stats = NULL, | ||
527 | .irq_capable = 1, | ||
528 | .name = "rcu_bh_expedited" | ||
529 | }; | ||
530 | |||
539 | /* | 531 | /* |
540 | * Definitions for srcu torture testing. | 532 | * Definitions for srcu torture testing. |
541 | */ | 533 | */ |
@@ -659,11 +651,6 @@ static void rcu_sched_torture_deferred_free(struct rcu_torture *p) | |||
659 | call_rcu_sched(&p->rtort_rcu, rcu_torture_cb); | 651 | call_rcu_sched(&p->rtort_rcu, rcu_torture_cb); |
660 | } | 652 | } |
661 | 653 | ||
662 | static void sched_torture_synchronize(void) | ||
663 | { | ||
664 | synchronize_sched(); | ||
665 | } | ||
666 | |||
667 | static struct rcu_torture_ops sched_ops = { | 654 | static struct rcu_torture_ops sched_ops = { |
668 | .init = rcu_sync_torture_init, | 655 | .init = rcu_sync_torture_init, |
669 | .cleanup = NULL, | 656 | .cleanup = NULL, |
@@ -672,7 +659,7 @@ static struct rcu_torture_ops sched_ops = { | |||
672 | .readunlock = sched_torture_read_unlock, | 659 | .readunlock = sched_torture_read_unlock, |
673 | .completed = rcu_no_completed, | 660 | .completed = rcu_no_completed, |
674 | .deferred_free = rcu_sched_torture_deferred_free, | 661 | .deferred_free = rcu_sched_torture_deferred_free, |
675 | .sync = sched_torture_synchronize, | 662 | .sync = synchronize_sched, |
676 | .cb_barrier = rcu_barrier_sched, | 663 | .cb_barrier = rcu_barrier_sched, |
677 | .fqs = rcu_sched_force_quiescent_state, | 664 | .fqs = rcu_sched_force_quiescent_state, |
678 | .stats = NULL, | 665 | .stats = NULL, |
@@ -688,7 +675,7 @@ static struct rcu_torture_ops sched_sync_ops = { | |||
688 | .readunlock = sched_torture_read_unlock, | 675 | .readunlock = sched_torture_read_unlock, |
689 | .completed = rcu_no_completed, | 676 | .completed = rcu_no_completed, |
690 | .deferred_free = rcu_sync_torture_deferred_free, | 677 | .deferred_free = rcu_sync_torture_deferred_free, |
691 | .sync = sched_torture_synchronize, | 678 | .sync = synchronize_sched, |
692 | .cb_barrier = NULL, | 679 | .cb_barrier = NULL, |
693 | .fqs = rcu_sched_force_quiescent_state, | 680 | .fqs = rcu_sched_force_quiescent_state, |
694 | .stats = NULL, | 681 | .stats = NULL, |
@@ -754,7 +741,7 @@ static int rcu_torture_boost(void *arg) | |||
754 | do { | 741 | do { |
755 | /* Wait for the next test interval. */ | 742 | /* Wait for the next test interval. */ |
756 | oldstarttime = boost_starttime; | 743 | oldstarttime = boost_starttime; |
757 | while (jiffies - oldstarttime > ULONG_MAX / 2) { | 744 | while (ULONG_CMP_LT(jiffies, oldstarttime)) { |
758 | schedule_timeout_uninterruptible(1); | 745 | schedule_timeout_uninterruptible(1); |
759 | rcu_stutter_wait("rcu_torture_boost"); | 746 | rcu_stutter_wait("rcu_torture_boost"); |
760 | if (kthread_should_stop() || | 747 | if (kthread_should_stop() || |
@@ -765,7 +752,7 @@ static int rcu_torture_boost(void *arg) | |||
765 | /* Do one boost-test interval. */ | 752 | /* Do one boost-test interval. */ |
766 | endtime = oldstarttime + test_boost_duration * HZ; | 753 | endtime = oldstarttime + test_boost_duration * HZ; |
767 | call_rcu_time = jiffies; | 754 | call_rcu_time = jiffies; |
768 | while (jiffies - endtime > ULONG_MAX / 2) { | 755 | while (ULONG_CMP_LT(jiffies, endtime)) { |
769 | /* If we don't have a callback in flight, post one. */ | 756 | /* If we don't have a callback in flight, post one. */ |
770 | if (!rbi.inflight) { | 757 | if (!rbi.inflight) { |
771 | smp_mb(); /* RCU core before ->inflight = 1. */ | 758 | smp_mb(); /* RCU core before ->inflight = 1. */ |
@@ -792,7 +779,8 @@ static int rcu_torture_boost(void *arg) | |||
792 | * interval. Besides, we are running at RT priority, | 779 | * interval. Besides, we are running at RT priority, |
793 | * so delays should be relatively rare. | 780 | * so delays should be relatively rare. |
794 | */ | 781 | */ |
795 | while (oldstarttime == boost_starttime) { | 782 | while (oldstarttime == boost_starttime && |
783 | !kthread_should_stop()) { | ||
796 | if (mutex_trylock(&boost_mutex)) { | 784 | if (mutex_trylock(&boost_mutex)) { |
797 | boost_starttime = jiffies + | 785 | boost_starttime = jiffies + |
798 | test_boost_interval * HZ; | 786 | test_boost_interval * HZ; |
@@ -809,11 +797,11 @@ checkwait: rcu_stutter_wait("rcu_torture_boost"); | |||
809 | 797 | ||
810 | /* Clean up and exit. */ | 798 | /* Clean up and exit. */ |
811 | VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping"); | 799 | VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping"); |
812 | destroy_rcu_head_on_stack(&rbi.rcu); | ||
813 | rcutorture_shutdown_absorb("rcu_torture_boost"); | 800 | rcutorture_shutdown_absorb("rcu_torture_boost"); |
814 | while (!kthread_should_stop() || rbi.inflight) | 801 | while (!kthread_should_stop() || rbi.inflight) |
815 | schedule_timeout_uninterruptible(1); | 802 | schedule_timeout_uninterruptible(1); |
816 | smp_mb(); /* order accesses to ->inflight before stack-frame death. */ | 803 | smp_mb(); /* order accesses to ->inflight before stack-frame death. */ |
804 | destroy_rcu_head_on_stack(&rbi.rcu); | ||
817 | return 0; | 805 | return 0; |
818 | } | 806 | } |
819 | 807 | ||
@@ -831,11 +819,13 @@ rcu_torture_fqs(void *arg) | |||
831 | VERBOSE_PRINTK_STRING("rcu_torture_fqs task started"); | 819 | VERBOSE_PRINTK_STRING("rcu_torture_fqs task started"); |
832 | do { | 820 | do { |
833 | fqs_resume_time = jiffies + fqs_stutter * HZ; | 821 | fqs_resume_time = jiffies + fqs_stutter * HZ; |
834 | while (jiffies - fqs_resume_time > LONG_MAX) { | 822 | while (ULONG_CMP_LT(jiffies, fqs_resume_time) && |
823 | !kthread_should_stop()) { | ||
835 | schedule_timeout_interruptible(1); | 824 | schedule_timeout_interruptible(1); |
836 | } | 825 | } |
837 | fqs_burst_remaining = fqs_duration; | 826 | fqs_burst_remaining = fqs_duration; |
838 | while (fqs_burst_remaining > 0) { | 827 | while (fqs_burst_remaining > 0 && |
828 | !kthread_should_stop()) { | ||
839 | cur_ops->fqs(); | 829 | cur_ops->fqs(); |
840 | udelay(fqs_holdoff); | 830 | udelay(fqs_holdoff); |
841 | fqs_burst_remaining -= fqs_holdoff; | 831 | fqs_burst_remaining -= fqs_holdoff; |
@@ -1280,8 +1270,9 @@ static int rcutorture_booster_init(int cpu) | |||
1280 | /* Don't allow time recalculation while creating a new task. */ | 1270 | /* Don't allow time recalculation while creating a new task. */ |
1281 | mutex_lock(&boost_mutex); | 1271 | mutex_lock(&boost_mutex); |
1282 | VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task"); | 1272 | VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task"); |
1283 | boost_tasks[cpu] = kthread_create(rcu_torture_boost, NULL, | 1273 | boost_tasks[cpu] = kthread_create_on_node(rcu_torture_boost, NULL, |
1284 | "rcu_torture_boost"); | 1274 | cpu_to_node(cpu), |
1275 | "rcu_torture_boost"); | ||
1285 | if (IS_ERR(boost_tasks[cpu])) { | 1276 | if (IS_ERR(boost_tasks[cpu])) { |
1286 | retval = PTR_ERR(boost_tasks[cpu]); | 1277 | retval = PTR_ERR(boost_tasks[cpu]); |
1287 | VERBOSE_PRINTK_STRING("rcu_torture_boost task create failed"); | 1278 | VERBOSE_PRINTK_STRING("rcu_torture_boost task create failed"); |
@@ -1424,7 +1415,7 @@ rcu_torture_init(void) | |||
1424 | int firsterr = 0; | 1415 | int firsterr = 0; |
1425 | static struct rcu_torture_ops *torture_ops[] = | 1416 | static struct rcu_torture_ops *torture_ops[] = |
1426 | { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, | 1417 | { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, |
1427 | &rcu_bh_ops, &rcu_bh_sync_ops, | 1418 | &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops, |
1428 | &srcu_ops, &srcu_expedited_ops, | 1419 | &srcu_ops, &srcu_expedited_ops, |
1429 | &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; | 1420 | &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; |
1430 | 1421 | ||
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ba06207b1dd..6b76d812740 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -38,7 +38,7 @@ | |||
38 | #include <linux/nmi.h> | 38 | #include <linux/nmi.h> |
39 | #include <linux/atomic.h> | 39 | #include <linux/atomic.h> |
40 | #include <linux/bitops.h> | 40 | #include <linux/bitops.h> |
41 | #include <linux/module.h> | 41 | #include <linux/export.h> |
42 | #include <linux/completion.h> | 42 | #include <linux/completion.h> |
43 | #include <linux/moduleparam.h> | 43 | #include <linux/moduleparam.h> |
44 | #include <linux/percpu.h> | 44 | #include <linux/percpu.h> |
@@ -52,13 +52,16 @@ | |||
52 | #include <linux/prefetch.h> | 52 | #include <linux/prefetch.h> |
53 | 53 | ||
54 | #include "rcutree.h" | 54 | #include "rcutree.h" |
55 | #include <trace/events/rcu.h> | ||
56 | |||
57 | #include "rcu.h" | ||
55 | 58 | ||
56 | /* Data structures. */ | 59 | /* Data structures. */ |
57 | 60 | ||
58 | static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; | 61 | static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; |
59 | 62 | ||
60 | #define RCU_STATE_INITIALIZER(structname) { \ | 63 | #define RCU_STATE_INITIALIZER(structname) { \ |
61 | .level = { &structname.node[0] }, \ | 64 | .level = { &structname##_state.node[0] }, \ |
62 | .levelcnt = { \ | 65 | .levelcnt = { \ |
63 | NUM_RCU_LVL_0, /* root of hierarchy. */ \ | 66 | NUM_RCU_LVL_0, /* root of hierarchy. */ \ |
64 | NUM_RCU_LVL_1, \ | 67 | NUM_RCU_LVL_1, \ |
@@ -69,17 +72,17 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; | |||
69 | .signaled = RCU_GP_IDLE, \ | 72 | .signaled = RCU_GP_IDLE, \ |
70 | .gpnum = -300, \ | 73 | .gpnum = -300, \ |
71 | .completed = -300, \ | 74 | .completed = -300, \ |
72 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \ | 75 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ |
73 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \ | 76 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \ |
74 | .n_force_qs = 0, \ | 77 | .n_force_qs = 0, \ |
75 | .n_force_qs_ngp = 0, \ | 78 | .n_force_qs_ngp = 0, \ |
76 | .name = #structname, \ | 79 | .name = #structname, \ |
77 | } | 80 | } |
78 | 81 | ||
79 | struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched_state); | 82 | struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched); |
80 | DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); | 83 | DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); |
81 | 84 | ||
82 | struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); | 85 | struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh); |
83 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); | 86 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); |
84 | 87 | ||
85 | static struct rcu_state *rcu_state; | 88 | static struct rcu_state *rcu_state; |
@@ -128,8 +131,6 @@ static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); | |||
128 | static void invoke_rcu_core(void); | 131 | static void invoke_rcu_core(void); |
129 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); | 132 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); |
130 | 133 | ||
131 | #define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */ | ||
132 | |||
133 | /* | 134 | /* |
134 | * Track the rcutorture test sequence number and the update version | 135 | * Track the rcutorture test sequence number and the update version |
135 | * number within a given test. The rcutorture_testseq is incremented | 136 | * number within a given test. The rcutorture_testseq is incremented |
@@ -156,33 +157,41 @@ static int rcu_gp_in_progress(struct rcu_state *rsp) | |||
156 | * Note a quiescent state. Because we do not need to know | 157 | * Note a quiescent state. Because we do not need to know |
157 | * how many quiescent states passed, just if there was at least | 158 | * how many quiescent states passed, just if there was at least |
158 | * one since the start of the grace period, this just sets a flag. | 159 | * one since the start of the grace period, this just sets a flag. |
160 | * The caller must have disabled preemption. | ||
159 | */ | 161 | */ |
160 | void rcu_sched_qs(int cpu) | 162 | void rcu_sched_qs(int cpu) |
161 | { | 163 | { |
162 | struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); | 164 | struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); |
163 | 165 | ||
164 | rdp->passed_quiesc_completed = rdp->gpnum - 1; | 166 | rdp->passed_quiesce_gpnum = rdp->gpnum; |
165 | barrier(); | 167 | barrier(); |
166 | rdp->passed_quiesc = 1; | 168 | if (rdp->passed_quiesce == 0) |
169 | trace_rcu_grace_period("rcu_sched", rdp->gpnum, "cpuqs"); | ||
170 | rdp->passed_quiesce = 1; | ||
167 | } | 171 | } |
168 | 172 | ||
169 | void rcu_bh_qs(int cpu) | 173 | void rcu_bh_qs(int cpu) |
170 | { | 174 | { |
171 | struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); | 175 | struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); |
172 | 176 | ||
173 | rdp->passed_quiesc_completed = rdp->gpnum - 1; | 177 | rdp->passed_quiesce_gpnum = rdp->gpnum; |
174 | barrier(); | 178 | barrier(); |
175 | rdp->passed_quiesc = 1; | 179 | if (rdp->passed_quiesce == 0) |
180 | trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs"); | ||
181 | rdp->passed_quiesce = 1; | ||
176 | } | 182 | } |
177 | 183 | ||
178 | /* | 184 | /* |
179 | * Note a context switch. This is a quiescent state for RCU-sched, | 185 | * Note a context switch. This is a quiescent state for RCU-sched, |
180 | * and requires special handling for preemptible RCU. | 186 | * and requires special handling for preemptible RCU. |
187 | * The caller must have disabled preemption. | ||
181 | */ | 188 | */ |
182 | void rcu_note_context_switch(int cpu) | 189 | void rcu_note_context_switch(int cpu) |
183 | { | 190 | { |
191 | trace_rcu_utilization("Start context switch"); | ||
184 | rcu_sched_qs(cpu); | 192 | rcu_sched_qs(cpu); |
185 | rcu_preempt_note_context_switch(cpu); | 193 | rcu_preempt_note_context_switch(cpu); |
194 | trace_rcu_utilization("End context switch"); | ||
186 | } | 195 | } |
187 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); | 196 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); |
188 | 197 | ||
@@ -193,7 +202,7 @@ DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { | |||
193 | }; | 202 | }; |
194 | #endif /* #ifdef CONFIG_NO_HZ */ | 203 | #endif /* #ifdef CONFIG_NO_HZ */ |
195 | 204 | ||
196 | static int blimit = 10; /* Maximum callbacks per softirq. */ | 205 | static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */ |
197 | static int qhimark = 10000; /* If this many pending, ignore blimit. */ | 206 | static int qhimark = 10000; /* If this many pending, ignore blimit. */ |
198 | static int qlowmark = 100; /* Once only this many pending, use blimit. */ | 207 | static int qlowmark = 100; /* Once only this many pending, use blimit. */ |
199 | 208 | ||
@@ -314,6 +323,7 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp) | |||
314 | * trust its state not to change because interrupts are disabled. | 323 | * trust its state not to change because interrupts are disabled. |
315 | */ | 324 | */ |
316 | if (cpu_is_offline(rdp->cpu)) { | 325 | if (cpu_is_offline(rdp->cpu)) { |
326 | trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); | ||
317 | rdp->offline_fqs++; | 327 | rdp->offline_fqs++; |
318 | return 1; | 328 | return 1; |
319 | } | 329 | } |
@@ -354,19 +364,13 @@ void rcu_enter_nohz(void) | |||
354 | local_irq_restore(flags); | 364 | local_irq_restore(flags); |
355 | return; | 365 | return; |
356 | } | 366 | } |
367 | trace_rcu_dyntick("Start"); | ||
357 | /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ | 368 | /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ |
358 | smp_mb__before_atomic_inc(); /* See above. */ | 369 | smp_mb__before_atomic_inc(); /* See above. */ |
359 | atomic_inc(&rdtp->dynticks); | 370 | atomic_inc(&rdtp->dynticks); |
360 | smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ | 371 | smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ |
361 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); | 372 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); |
362 | local_irq_restore(flags); | 373 | local_irq_restore(flags); |
363 | |||
364 | /* If the interrupt queued a callback, get out of dyntick mode. */ | ||
365 | if (in_irq() && | ||
366 | (__get_cpu_var(rcu_sched_data).nxtlist || | ||
367 | __get_cpu_var(rcu_bh_data).nxtlist || | ||
368 | rcu_preempt_needs_cpu(smp_processor_id()))) | ||
369 | set_need_resched(); | ||
370 | } | 374 | } |
371 | 375 | ||
372 | /* | 376 | /* |
@@ -391,6 +395,7 @@ void rcu_exit_nohz(void) | |||
391 | /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ | 395 | /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ |
392 | smp_mb__after_atomic_inc(); /* See above. */ | 396 | smp_mb__after_atomic_inc(); /* See above. */ |
393 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); | 397 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); |
398 | trace_rcu_dyntick("End"); | ||
394 | local_irq_restore(flags); | 399 | local_irq_restore(flags); |
395 | } | 400 | } |
396 | 401 | ||
@@ -481,11 +486,11 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp) | |||
481 | */ | 486 | */ |
482 | static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | 487 | static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) |
483 | { | 488 | { |
484 | unsigned long curr; | 489 | unsigned int curr; |
485 | unsigned long snap; | 490 | unsigned int snap; |
486 | 491 | ||
487 | curr = (unsigned long)atomic_add_return(0, &rdp->dynticks->dynticks); | 492 | curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks); |
488 | snap = (unsigned long)rdp->dynticks_snap; | 493 | snap = (unsigned int)rdp->dynticks_snap; |
489 | 494 | ||
490 | /* | 495 | /* |
491 | * If the CPU passed through or entered a dynticks idle phase with | 496 | * If the CPU passed through or entered a dynticks idle phase with |
@@ -495,7 +500,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | |||
495 | * read-side critical section that started before the beginning | 500 | * read-side critical section that started before the beginning |
496 | * of the current RCU grace period. | 501 | * of the current RCU grace period. |
497 | */ | 502 | */ |
498 | if ((curr & 0x1) == 0 || ULONG_CMP_GE(curr, snap + 2)) { | 503 | if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) { |
504 | trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "dti"); | ||
499 | rdp->dynticks_fqs++; | 505 | rdp->dynticks_fqs++; |
500 | return 1; | 506 | return 1; |
501 | } | 507 | } |
@@ -537,6 +543,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
537 | int cpu; | 543 | int cpu; |
538 | long delta; | 544 | long delta; |
539 | unsigned long flags; | 545 | unsigned long flags; |
546 | int ndetected; | ||
540 | struct rcu_node *rnp = rcu_get_root(rsp); | 547 | struct rcu_node *rnp = rcu_get_root(rsp); |
541 | 548 | ||
542 | /* Only let one CPU complain about others per time interval. */ | 549 | /* Only let one CPU complain about others per time interval. */ |
@@ -553,7 +560,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
553 | * Now rat on any tasks that got kicked up to the root rcu_node | 560 | * Now rat on any tasks that got kicked up to the root rcu_node |
554 | * due to CPU offlining. | 561 | * due to CPU offlining. |
555 | */ | 562 | */ |
556 | rcu_print_task_stall(rnp); | 563 | ndetected = rcu_print_task_stall(rnp); |
557 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 564 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
558 | 565 | ||
559 | /* | 566 | /* |
@@ -565,17 +572,22 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
565 | rsp->name); | 572 | rsp->name); |
566 | rcu_for_each_leaf_node(rsp, rnp) { | 573 | rcu_for_each_leaf_node(rsp, rnp) { |
567 | raw_spin_lock_irqsave(&rnp->lock, flags); | 574 | raw_spin_lock_irqsave(&rnp->lock, flags); |
568 | rcu_print_task_stall(rnp); | 575 | ndetected += rcu_print_task_stall(rnp); |
569 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 576 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
570 | if (rnp->qsmask == 0) | 577 | if (rnp->qsmask == 0) |
571 | continue; | 578 | continue; |
572 | for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) | 579 | for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) |
573 | if (rnp->qsmask & (1UL << cpu)) | 580 | if (rnp->qsmask & (1UL << cpu)) { |
574 | printk(" %d", rnp->grplo + cpu); | 581 | printk(" %d", rnp->grplo + cpu); |
582 | ndetected++; | ||
583 | } | ||
575 | } | 584 | } |
576 | printk("} (detected by %d, t=%ld jiffies)\n", | 585 | printk("} (detected by %d, t=%ld jiffies)\n", |
577 | smp_processor_id(), (long)(jiffies - rsp->gp_start)); | 586 | smp_processor_id(), (long)(jiffies - rsp->gp_start)); |
578 | trigger_all_cpu_backtrace(); | 587 | if (ndetected == 0) |
588 | printk(KERN_ERR "INFO: Stall ended before state dump start\n"); | ||
589 | else if (!trigger_all_cpu_backtrace()) | ||
590 | dump_stack(); | ||
579 | 591 | ||
580 | /* If so configured, complain about tasks blocking the grace period. */ | 592 | /* If so configured, complain about tasks blocking the grace period. */ |
581 | 593 | ||
@@ -596,7 +608,8 @@ static void print_cpu_stall(struct rcu_state *rsp) | |||
596 | */ | 608 | */ |
597 | printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", | 609 | printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", |
598 | rsp->name, smp_processor_id(), jiffies - rsp->gp_start); | 610 | rsp->name, smp_processor_id(), jiffies - rsp->gp_start); |
599 | trigger_all_cpu_backtrace(); | 611 | if (!trigger_all_cpu_backtrace()) |
612 | dump_stack(); | ||
600 | 613 | ||
601 | raw_spin_lock_irqsave(&rnp->lock, flags); | 614 | raw_spin_lock_irqsave(&rnp->lock, flags); |
602 | if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) | 615 | if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) |
@@ -678,9 +691,10 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct | |||
678 | * go looking for one. | 691 | * go looking for one. |
679 | */ | 692 | */ |
680 | rdp->gpnum = rnp->gpnum; | 693 | rdp->gpnum = rnp->gpnum; |
694 | trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart"); | ||
681 | if (rnp->qsmask & rdp->grpmask) { | 695 | if (rnp->qsmask & rdp->grpmask) { |
682 | rdp->qs_pending = 1; | 696 | rdp->qs_pending = 1; |
683 | rdp->passed_quiesc = 0; | 697 | rdp->passed_quiesce = 0; |
684 | } else | 698 | } else |
685 | rdp->qs_pending = 0; | 699 | rdp->qs_pending = 0; |
686 | } | 700 | } |
@@ -741,6 +755,7 @@ __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat | |||
741 | 755 | ||
742 | /* Remember that we saw this grace-period completion. */ | 756 | /* Remember that we saw this grace-period completion. */ |
743 | rdp->completed = rnp->completed; | 757 | rdp->completed = rnp->completed; |
758 | trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuend"); | ||
744 | 759 | ||
745 | /* | 760 | /* |
746 | * If we were in an extended quiescent state, we may have | 761 | * If we were in an extended quiescent state, we may have |
@@ -826,31 +841,31 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | |||
826 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | 841 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); |
827 | struct rcu_node *rnp = rcu_get_root(rsp); | 842 | struct rcu_node *rnp = rcu_get_root(rsp); |
828 | 843 | ||
829 | if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) { | 844 | if (!rcu_scheduler_fully_active || |
830 | if (cpu_needs_another_gp(rsp, rdp)) | 845 | !cpu_needs_another_gp(rsp, rdp)) { |
831 | rsp->fqs_need_gp = 1; | 846 | /* |
832 | if (rnp->completed == rsp->completed) { | 847 | * Either the scheduler hasn't yet spawned the first |
833 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 848 | * non-idle task or this CPU does not need another |
834 | return; | 849 | * grace period. Either way, don't start a new grace |
835 | } | 850 | * period. |
836 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 851 | */ |
852 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
853 | return; | ||
854 | } | ||
837 | 855 | ||
856 | if (rsp->fqs_active) { | ||
838 | /* | 857 | /* |
839 | * Propagate new ->completed value to rcu_node structures | 858 | * This CPU needs a grace period, but force_quiescent_state() |
840 | * so that other CPUs don't have to wait until the start | 859 | * is running. Tell it to start one on this CPU's behalf. |
841 | * of the next grace period to process their callbacks. | ||
842 | */ | 860 | */ |
843 | rcu_for_each_node_breadth_first(rsp, rnp) { | 861 | rsp->fqs_need_gp = 1; |
844 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | 862 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
845 | rnp->completed = rsp->completed; | ||
846 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
847 | } | ||
848 | local_irq_restore(flags); | ||
849 | return; | 863 | return; |
850 | } | 864 | } |
851 | 865 | ||
852 | /* Advance to a new grace period and initialize state. */ | 866 | /* Advance to a new grace period and initialize state. */ |
853 | rsp->gpnum++; | 867 | rsp->gpnum++; |
868 | trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); | ||
854 | WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT); | 869 | WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT); |
855 | rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ | 870 | rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ |
856 | rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; | 871 | rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; |
@@ -865,6 +880,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | |||
865 | rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ | 880 | rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ |
866 | rcu_start_gp_per_cpu(rsp, rnp, rdp); | 881 | rcu_start_gp_per_cpu(rsp, rnp, rdp); |
867 | rcu_preempt_boost_start_gp(rnp); | 882 | rcu_preempt_boost_start_gp(rnp); |
883 | trace_rcu_grace_period_init(rsp->name, rnp->gpnum, | ||
884 | rnp->level, rnp->grplo, | ||
885 | rnp->grphi, rnp->qsmask); | ||
868 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 886 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
869 | return; | 887 | return; |
870 | } | 888 | } |
@@ -901,6 +919,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | |||
901 | if (rnp == rdp->mynode) | 919 | if (rnp == rdp->mynode) |
902 | rcu_start_gp_per_cpu(rsp, rnp, rdp); | 920 | rcu_start_gp_per_cpu(rsp, rnp, rdp); |
903 | rcu_preempt_boost_start_gp(rnp); | 921 | rcu_preempt_boost_start_gp(rnp); |
922 | trace_rcu_grace_period_init(rsp->name, rnp->gpnum, | ||
923 | rnp->level, rnp->grplo, | ||
924 | rnp->grphi, rnp->qsmask); | ||
904 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 925 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
905 | } | 926 | } |
906 | 927 | ||
@@ -922,6 +943,8 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) | |||
922 | __releases(rcu_get_root(rsp)->lock) | 943 | __releases(rcu_get_root(rsp)->lock) |
923 | { | 944 | { |
924 | unsigned long gp_duration; | 945 | unsigned long gp_duration; |
946 | struct rcu_node *rnp = rcu_get_root(rsp); | ||
947 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | ||
925 | 948 | ||
926 | WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); | 949 | WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); |
927 | 950 | ||
@@ -933,7 +956,41 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) | |||
933 | gp_duration = jiffies - rsp->gp_start; | 956 | gp_duration = jiffies - rsp->gp_start; |
934 | if (gp_duration > rsp->gp_max) | 957 | if (gp_duration > rsp->gp_max) |
935 | rsp->gp_max = gp_duration; | 958 | rsp->gp_max = gp_duration; |
936 | rsp->completed = rsp->gpnum; | 959 | |
960 | /* | ||
961 | * We know the grace period is complete, but to everyone else | ||
962 | * it appears to still be ongoing. But it is also the case | ||
963 | * that to everyone else it looks like there is nothing that | ||
964 | * they can do to advance the grace period. It is therefore | ||
965 | * safe for us to drop the lock in order to mark the grace | ||
966 | * period as completed in all of the rcu_node structures. | ||
967 | * | ||
968 | * But if this CPU needs another grace period, it will take | ||
969 | * care of this while initializing the next grace period. | ||
970 | * We use RCU_WAIT_TAIL instead of the usual RCU_DONE_TAIL | ||
971 | * because the callbacks have not yet been advanced: Those | ||
972 | * callbacks are waiting on the grace period that just now | ||
973 | * completed. | ||
974 | */ | ||
975 | if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) { | ||
976 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
977 | |||
978 | /* | ||
979 | * Propagate new ->completed value to rcu_node structures | ||
980 | * so that other CPUs don't have to wait until the start | ||
981 | * of the next grace period to process their callbacks. | ||
982 | */ | ||
983 | rcu_for_each_node_breadth_first(rsp, rnp) { | ||
984 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | ||
985 | rnp->completed = rsp->gpnum; | ||
986 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
987 | } | ||
988 | rnp = rcu_get_root(rsp); | ||
989 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | ||
990 | } | ||
991 | |||
992 | rsp->completed = rsp->gpnum; /* Declare the grace period complete. */ | ||
993 | trace_rcu_grace_period(rsp->name, rsp->completed, "end"); | ||
937 | rsp->signaled = RCU_GP_IDLE; | 994 | rsp->signaled = RCU_GP_IDLE; |
938 | rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ | 995 | rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ |
939 | } | 996 | } |
@@ -962,6 +1019,10 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, | |||
962 | return; | 1019 | return; |
963 | } | 1020 | } |
964 | rnp->qsmask &= ~mask; | 1021 | rnp->qsmask &= ~mask; |
1022 | trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum, | ||
1023 | mask, rnp->qsmask, rnp->level, | ||
1024 | rnp->grplo, rnp->grphi, | ||
1025 | !!rnp->gp_tasks); | ||
965 | if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { | 1026 | if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { |
966 | 1027 | ||
967 | /* Other bits still set at this level, so done. */ | 1028 | /* Other bits still set at this level, so done. */ |
@@ -1000,7 +1061,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, | |||
1000 | * based on quiescent states detected in an earlier grace period! | 1061 | * based on quiescent states detected in an earlier grace period! |
1001 | */ | 1062 | */ |
1002 | static void | 1063 | static void |
1003 | rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) | 1064 | rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastgp) |
1004 | { | 1065 | { |
1005 | unsigned long flags; | 1066 | unsigned long flags; |
1006 | unsigned long mask; | 1067 | unsigned long mask; |
@@ -1008,17 +1069,15 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long las | |||
1008 | 1069 | ||
1009 | rnp = rdp->mynode; | 1070 | rnp = rdp->mynode; |
1010 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1071 | raw_spin_lock_irqsave(&rnp->lock, flags); |
1011 | if (lastcomp != rnp->completed) { | 1072 | if (lastgp != rnp->gpnum || rnp->completed == rnp->gpnum) { |
1012 | 1073 | ||
1013 | /* | 1074 | /* |
1014 | * Someone beat us to it for this grace period, so leave. | 1075 | * The grace period in which this quiescent state was |
1015 | * The race with GP start is resolved by the fact that we | 1076 | * recorded has ended, so don't report it upwards. |
1016 | * hold the leaf rcu_node lock, so that the per-CPU bits | 1077 | * We will instead need a new quiescent state that lies |
1017 | * cannot yet be initialized -- so we would simply find our | 1078 | * within the current grace period. |
1018 | * CPU's bit already cleared in rcu_report_qs_rnp() if this | ||
1019 | * race occurred. | ||
1020 | */ | 1079 | */ |
1021 | rdp->passed_quiesc = 0; /* try again later! */ | 1080 | rdp->passed_quiesce = 0; /* need qs for new gp. */ |
1022 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1081 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1023 | return; | 1082 | return; |
1024 | } | 1083 | } |
@@ -1062,14 +1121,14 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1062 | * Was there a quiescent state since the beginning of the grace | 1121 | * Was there a quiescent state since the beginning of the grace |
1063 | * period? If no, then exit and wait for the next call. | 1122 | * period? If no, then exit and wait for the next call. |
1064 | */ | 1123 | */ |
1065 | if (!rdp->passed_quiesc) | 1124 | if (!rdp->passed_quiesce) |
1066 | return; | 1125 | return; |
1067 | 1126 | ||
1068 | /* | 1127 | /* |
1069 | * Tell RCU we are done (but rcu_report_qs_rdp() will be the | 1128 | * Tell RCU we are done (but rcu_report_qs_rdp() will be the |
1070 | * judge of that). | 1129 | * judge of that). |
1071 | */ | 1130 | */ |
1072 | rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed); | 1131 | rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesce_gpnum); |
1073 | } | 1132 | } |
1074 | 1133 | ||
1075 | #ifdef CONFIG_HOTPLUG_CPU | 1134 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -1130,11 +1189,20 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | |||
1130 | if (rnp->qsmaskinit != 0) { | 1189 | if (rnp->qsmaskinit != 0) { |
1131 | if (rnp != rdp->mynode) | 1190 | if (rnp != rdp->mynode) |
1132 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 1191 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
1192 | else | ||
1193 | trace_rcu_grace_period(rsp->name, | ||
1194 | rnp->gpnum + 1 - | ||
1195 | !!(rnp->qsmask & mask), | ||
1196 | "cpuofl"); | ||
1133 | break; | 1197 | break; |
1134 | } | 1198 | } |
1135 | if (rnp == rdp->mynode) | 1199 | if (rnp == rdp->mynode) { |
1200 | trace_rcu_grace_period(rsp->name, | ||
1201 | rnp->gpnum + 1 - | ||
1202 | !!(rnp->qsmask & mask), | ||
1203 | "cpuofl"); | ||
1136 | need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); | 1204 | need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); |
1137 | else | 1205 | } else |
1138 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 1206 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
1139 | mask = rnp->grpmask; | 1207 | mask = rnp->grpmask; |
1140 | rnp = rnp->parent; | 1208 | rnp = rnp->parent; |
@@ -1190,17 +1258,22 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1190 | { | 1258 | { |
1191 | unsigned long flags; | 1259 | unsigned long flags; |
1192 | struct rcu_head *next, *list, **tail; | 1260 | struct rcu_head *next, *list, **tail; |
1193 | int count; | 1261 | int bl, count; |
1194 | 1262 | ||
1195 | /* If no callbacks are ready, just return.*/ | 1263 | /* If no callbacks are ready, just return.*/ |
1196 | if (!cpu_has_callbacks_ready_to_invoke(rdp)) | 1264 | if (!cpu_has_callbacks_ready_to_invoke(rdp)) { |
1265 | trace_rcu_batch_start(rsp->name, 0, 0); | ||
1266 | trace_rcu_batch_end(rsp->name, 0); | ||
1197 | return; | 1267 | return; |
1268 | } | ||
1198 | 1269 | ||
1199 | /* | 1270 | /* |
1200 | * Extract the list of ready callbacks, disabling to prevent | 1271 | * Extract the list of ready callbacks, disabling to prevent |
1201 | * races with call_rcu() from interrupt handlers. | 1272 | * races with call_rcu() from interrupt handlers. |
1202 | */ | 1273 | */ |
1203 | local_irq_save(flags); | 1274 | local_irq_save(flags); |
1275 | bl = rdp->blimit; | ||
1276 | trace_rcu_batch_start(rsp->name, rdp->qlen, bl); | ||
1204 | list = rdp->nxtlist; | 1277 | list = rdp->nxtlist; |
1205 | rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; | 1278 | rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; |
1206 | *rdp->nxttail[RCU_DONE_TAIL] = NULL; | 1279 | *rdp->nxttail[RCU_DONE_TAIL] = NULL; |
@@ -1216,13 +1289,14 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1216 | next = list->next; | 1289 | next = list->next; |
1217 | prefetch(next); | 1290 | prefetch(next); |
1218 | debug_rcu_head_unqueue(list); | 1291 | debug_rcu_head_unqueue(list); |
1219 | __rcu_reclaim(list); | 1292 | __rcu_reclaim(rsp->name, list); |
1220 | list = next; | 1293 | list = next; |
1221 | if (++count >= rdp->blimit) | 1294 | if (++count >= bl) |
1222 | break; | 1295 | break; |
1223 | } | 1296 | } |
1224 | 1297 | ||
1225 | local_irq_save(flags); | 1298 | local_irq_save(flags); |
1299 | trace_rcu_batch_end(rsp->name, count); | ||
1226 | 1300 | ||
1227 | /* Update count, and requeue any remaining callbacks. */ | 1301 | /* Update count, and requeue any remaining callbacks. */ |
1228 | rdp->qlen -= count; | 1302 | rdp->qlen -= count; |
@@ -1250,7 +1324,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1250 | 1324 | ||
1251 | local_irq_restore(flags); | 1325 | local_irq_restore(flags); |
1252 | 1326 | ||
1253 | /* Re-raise the RCU softirq if there are callbacks remaining. */ | 1327 | /* Re-invoke RCU core processing if there are callbacks remaining. */ |
1254 | if (cpu_has_callbacks_ready_to_invoke(rdp)) | 1328 | if (cpu_has_callbacks_ready_to_invoke(rdp)) |
1255 | invoke_rcu_core(); | 1329 | invoke_rcu_core(); |
1256 | } | 1330 | } |
@@ -1258,7 +1332,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1258 | /* | 1332 | /* |
1259 | * Check to see if this CPU is in a non-context-switch quiescent state | 1333 | * Check to see if this CPU is in a non-context-switch quiescent state |
1260 | * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). | 1334 | * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). |
1261 | * Also schedule the RCU softirq handler. | 1335 | * Also schedule RCU core processing. |
1262 | * | 1336 | * |
1263 | * This function must be called with hardirqs disabled. It is normally | 1337 | * This function must be called with hardirqs disabled. It is normally |
1264 | * invoked from the scheduling-clock interrupt. If rcu_pending returns | 1338 | * invoked from the scheduling-clock interrupt. If rcu_pending returns |
@@ -1266,6 +1340,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1266 | */ | 1340 | */ |
1267 | void rcu_check_callbacks(int cpu, int user) | 1341 | void rcu_check_callbacks(int cpu, int user) |
1268 | { | 1342 | { |
1343 | trace_rcu_utilization("Start scheduler-tick"); | ||
1269 | if (user || | 1344 | if (user || |
1270 | (idle_cpu(cpu) && rcu_scheduler_active && | 1345 | (idle_cpu(cpu) && rcu_scheduler_active && |
1271 | !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { | 1346 | !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { |
@@ -1299,6 +1374,7 @@ void rcu_check_callbacks(int cpu, int user) | |||
1299 | rcu_preempt_check_callbacks(cpu); | 1374 | rcu_preempt_check_callbacks(cpu); |
1300 | if (rcu_pending(cpu)) | 1375 | if (rcu_pending(cpu)) |
1301 | invoke_rcu_core(); | 1376 | invoke_rcu_core(); |
1377 | trace_rcu_utilization("End scheduler-tick"); | ||
1302 | } | 1378 | } |
1303 | 1379 | ||
1304 | #ifdef CONFIG_SMP | 1380 | #ifdef CONFIG_SMP |
@@ -1360,10 +1436,14 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) | |||
1360 | unsigned long flags; | 1436 | unsigned long flags; |
1361 | struct rcu_node *rnp = rcu_get_root(rsp); | 1437 | struct rcu_node *rnp = rcu_get_root(rsp); |
1362 | 1438 | ||
1363 | if (!rcu_gp_in_progress(rsp)) | 1439 | trace_rcu_utilization("Start fqs"); |
1440 | if (!rcu_gp_in_progress(rsp)) { | ||
1441 | trace_rcu_utilization("End fqs"); | ||
1364 | return; /* No grace period in progress, nothing to force. */ | 1442 | return; /* No grace period in progress, nothing to force. */ |
1443 | } | ||
1365 | if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) { | 1444 | if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) { |
1366 | rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */ | 1445 | rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */ |
1446 | trace_rcu_utilization("End fqs"); | ||
1367 | return; /* Someone else is already on the job. */ | 1447 | return; /* Someone else is already on the job. */ |
1368 | } | 1448 | } |
1369 | if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies)) | 1449 | if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies)) |
@@ -1412,11 +1492,13 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) | |||
1412 | raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */ | 1492 | raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */ |
1413 | rsp->fqs_need_gp = 0; | 1493 | rsp->fqs_need_gp = 0; |
1414 | rcu_start_gp(rsp, flags); /* releases rnp->lock */ | 1494 | rcu_start_gp(rsp, flags); /* releases rnp->lock */ |
1495 | trace_rcu_utilization("End fqs"); | ||
1415 | return; | 1496 | return; |
1416 | } | 1497 | } |
1417 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ | 1498 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ |
1418 | unlock_fqs_ret: | 1499 | unlock_fqs_ret: |
1419 | raw_spin_unlock_irqrestore(&rsp->fqslock, flags); | 1500 | raw_spin_unlock_irqrestore(&rsp->fqslock, flags); |
1501 | trace_rcu_utilization("End fqs"); | ||
1420 | } | 1502 | } |
1421 | 1503 | ||
1422 | #else /* #ifdef CONFIG_SMP */ | 1504 | #else /* #ifdef CONFIG_SMP */ |
@@ -1429,9 +1511,9 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) | |||
1429 | #endif /* #else #ifdef CONFIG_SMP */ | 1511 | #endif /* #else #ifdef CONFIG_SMP */ |
1430 | 1512 | ||
1431 | /* | 1513 | /* |
1432 | * This does the RCU processing work from softirq context for the | 1514 | * This does the RCU core processing work for the specified rcu_state |
1433 | * specified rcu_state and rcu_data structures. This may be called | 1515 | * and rcu_data structures. This may be called only from the CPU to |
1434 | * only from the CPU to whom the rdp belongs. | 1516 | * whom the rdp belongs. |
1435 | */ | 1517 | */ |
1436 | static void | 1518 | static void |
1437 | __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | 1519 | __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) |
@@ -1468,24 +1550,24 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1468 | } | 1550 | } |
1469 | 1551 | ||
1470 | /* | 1552 | /* |
1471 | * Do softirq processing for the current CPU. | 1553 | * Do RCU core processing for the current CPU. |
1472 | */ | 1554 | */ |
1473 | static void rcu_process_callbacks(struct softirq_action *unused) | 1555 | static void rcu_process_callbacks(struct softirq_action *unused) |
1474 | { | 1556 | { |
1557 | trace_rcu_utilization("Start RCU core"); | ||
1475 | __rcu_process_callbacks(&rcu_sched_state, | 1558 | __rcu_process_callbacks(&rcu_sched_state, |
1476 | &__get_cpu_var(rcu_sched_data)); | 1559 | &__get_cpu_var(rcu_sched_data)); |
1477 | __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); | 1560 | __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); |
1478 | rcu_preempt_process_callbacks(); | 1561 | rcu_preempt_process_callbacks(); |
1479 | 1562 | trace_rcu_utilization("End RCU core"); | |
1480 | /* If we are last CPU on way to dyntick-idle mode, accelerate it. */ | ||
1481 | rcu_needs_cpu_flush(); | ||
1482 | } | 1563 | } |
1483 | 1564 | ||
1484 | /* | 1565 | /* |
1485 | * Wake up the current CPU's kthread. This replaces raise_softirq() | 1566 | * Schedule RCU callback invocation. If the specified type of RCU |
1486 | * in earlier versions of RCU. Note that because we are running on | 1567 | * does not support RCU priority boosting, just do a direct call, |
1487 | * the current CPU with interrupts disabled, the rcu_cpu_kthread_task | 1568 | * otherwise wake up the per-CPU kernel kthread. Note that because we |
1488 | * cannot disappear out from under us. | 1569 | * are running on the current CPU with interrupts disabled, the |
1570 | * rcu_cpu_kthread_task cannot disappear out from under us. | ||
1489 | */ | 1571 | */ |
1490 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | 1572 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) |
1491 | { | 1573 | { |
@@ -1530,6 +1612,12 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
1530 | rdp->nxttail[RCU_NEXT_TAIL] = &head->next; | 1612 | rdp->nxttail[RCU_NEXT_TAIL] = &head->next; |
1531 | rdp->qlen++; | 1613 | rdp->qlen++; |
1532 | 1614 | ||
1615 | if (__is_kfree_rcu_offset((unsigned long)func)) | ||
1616 | trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, | ||
1617 | rdp->qlen); | ||
1618 | else | ||
1619 | trace_rcu_callback(rsp->name, head, rdp->qlen); | ||
1620 | |||
1533 | /* If interrupts were disabled, don't dive into RCU core. */ | 1621 | /* If interrupts were disabled, don't dive into RCU core. */ |
1534 | if (irqs_disabled_flags(flags)) { | 1622 | if (irqs_disabled_flags(flags)) { |
1535 | local_irq_restore(flags); | 1623 | local_irq_restore(flags); |
@@ -1613,18 +1701,9 @@ EXPORT_SYMBOL_GPL(call_rcu_bh); | |||
1613 | */ | 1701 | */ |
1614 | void synchronize_sched(void) | 1702 | void synchronize_sched(void) |
1615 | { | 1703 | { |
1616 | struct rcu_synchronize rcu; | ||
1617 | |||
1618 | if (rcu_blocking_is_gp()) | 1704 | if (rcu_blocking_is_gp()) |
1619 | return; | 1705 | return; |
1620 | 1706 | wait_rcu_gp(call_rcu_sched); | |
1621 | init_rcu_head_on_stack(&rcu.head); | ||
1622 | init_completion(&rcu.completion); | ||
1623 | /* Will wake me after RCU finished. */ | ||
1624 | call_rcu_sched(&rcu.head, wakeme_after_rcu); | ||
1625 | /* Wait for it. */ | ||
1626 | wait_for_completion(&rcu.completion); | ||
1627 | destroy_rcu_head_on_stack(&rcu.head); | ||
1628 | } | 1707 | } |
1629 | EXPORT_SYMBOL_GPL(synchronize_sched); | 1708 | EXPORT_SYMBOL_GPL(synchronize_sched); |
1630 | 1709 | ||
@@ -1639,18 +1718,9 @@ EXPORT_SYMBOL_GPL(synchronize_sched); | |||
1639 | */ | 1718 | */ |
1640 | void synchronize_rcu_bh(void) | 1719 | void synchronize_rcu_bh(void) |
1641 | { | 1720 | { |
1642 | struct rcu_synchronize rcu; | ||
1643 | |||
1644 | if (rcu_blocking_is_gp()) | 1721 | if (rcu_blocking_is_gp()) |
1645 | return; | 1722 | return; |
1646 | 1723 | wait_rcu_gp(call_rcu_bh); | |
1647 | init_rcu_head_on_stack(&rcu.head); | ||
1648 | init_completion(&rcu.completion); | ||
1649 | /* Will wake me after RCU finished. */ | ||
1650 | call_rcu_bh(&rcu.head, wakeme_after_rcu); | ||
1651 | /* Wait for it. */ | ||
1652 | wait_for_completion(&rcu.completion); | ||
1653 | destroy_rcu_head_on_stack(&rcu.head); | ||
1654 | } | 1724 | } |
1655 | EXPORT_SYMBOL_GPL(synchronize_rcu_bh); | 1725 | EXPORT_SYMBOL_GPL(synchronize_rcu_bh); |
1656 | 1726 | ||
@@ -1671,7 +1741,8 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1671 | check_cpu_stall(rsp, rdp); | 1741 | check_cpu_stall(rsp, rdp); |
1672 | 1742 | ||
1673 | /* Is the RCU core waiting for a quiescent state from this CPU? */ | 1743 | /* Is the RCU core waiting for a quiescent state from this CPU? */ |
1674 | if (rdp->qs_pending && !rdp->passed_quiesc) { | 1744 | if (rcu_scheduler_fully_active && |
1745 | rdp->qs_pending && !rdp->passed_quiesce) { | ||
1675 | 1746 | ||
1676 | /* | 1747 | /* |
1677 | * If force_quiescent_state() coming soon and this CPU | 1748 | * If force_quiescent_state() coming soon and this CPU |
@@ -1683,7 +1754,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1683 | ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1, | 1754 | ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1, |
1684 | jiffies)) | 1755 | jiffies)) |
1685 | set_need_resched(); | 1756 | set_need_resched(); |
1686 | } else if (rdp->qs_pending && rdp->passed_quiesc) { | 1757 | } else if (rdp->qs_pending && rdp->passed_quiesce) { |
1687 | rdp->n_rp_report_qs++; | 1758 | rdp->n_rp_report_qs++; |
1688 | return 1; | 1759 | return 1; |
1689 | } | 1760 | } |
@@ -1846,6 +1917,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
1846 | rdp->dynticks = &per_cpu(rcu_dynticks, cpu); | 1917 | rdp->dynticks = &per_cpu(rcu_dynticks, cpu); |
1847 | #endif /* #ifdef CONFIG_NO_HZ */ | 1918 | #endif /* #ifdef CONFIG_NO_HZ */ |
1848 | rdp->cpu = cpu; | 1919 | rdp->cpu = cpu; |
1920 | rdp->rsp = rsp; | ||
1849 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1921 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1850 | } | 1922 | } |
1851 | 1923 | ||
@@ -1865,8 +1937,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | |||
1865 | 1937 | ||
1866 | /* Set up local state, ensuring consistent view of global state. */ | 1938 | /* Set up local state, ensuring consistent view of global state. */ |
1867 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1939 | raw_spin_lock_irqsave(&rnp->lock, flags); |
1868 | rdp->passed_quiesc = 0; /* We could be racing with new GP, */ | ||
1869 | rdp->qs_pending = 1; /* so set up to respond to current GP. */ | ||
1870 | rdp->beenonline = 1; /* We have now been online. */ | 1940 | rdp->beenonline = 1; /* We have now been online. */ |
1871 | rdp->preemptible = preemptible; | 1941 | rdp->preemptible = preemptible; |
1872 | rdp->qlen_last_fqs_check = 0; | 1942 | rdp->qlen_last_fqs_check = 0; |
@@ -1891,9 +1961,17 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | |||
1891 | rnp->qsmaskinit |= mask; | 1961 | rnp->qsmaskinit |= mask; |
1892 | mask = rnp->grpmask; | 1962 | mask = rnp->grpmask; |
1893 | if (rnp == rdp->mynode) { | 1963 | if (rnp == rdp->mynode) { |
1894 | rdp->gpnum = rnp->completed; /* if GP in progress... */ | 1964 | /* |
1965 | * If there is a grace period in progress, we will | ||
1966 | * set up to wait for it next time we run the | ||
1967 | * RCU core code. | ||
1968 | */ | ||
1969 | rdp->gpnum = rnp->completed; | ||
1895 | rdp->completed = rnp->completed; | 1970 | rdp->completed = rnp->completed; |
1896 | rdp->passed_quiesc_completed = rnp->completed - 1; | 1971 | rdp->passed_quiesce = 0; |
1972 | rdp->qs_pending = 0; | ||
1973 | rdp->passed_quiesce_gpnum = rnp->gpnum - 1; | ||
1974 | trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl"); | ||
1897 | } | 1975 | } |
1898 | raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ | 1976 | raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ |
1899 | rnp = rnp->parent; | 1977 | rnp = rnp->parent; |
@@ -1919,6 +1997,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
1919 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); | 1997 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); |
1920 | struct rcu_node *rnp = rdp->mynode; | 1998 | struct rcu_node *rnp = rdp->mynode; |
1921 | 1999 | ||
2000 | trace_rcu_utilization("Start CPU hotplug"); | ||
1922 | switch (action) { | 2001 | switch (action) { |
1923 | case CPU_UP_PREPARE: | 2002 | case CPU_UP_PREPARE: |
1924 | case CPU_UP_PREPARE_FROZEN: | 2003 | case CPU_UP_PREPARE_FROZEN: |
@@ -1954,6 +2033,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
1954 | default: | 2033 | default: |
1955 | break; | 2034 | break; |
1956 | } | 2035 | } |
2036 | trace_rcu_utilization("End CPU hotplug"); | ||
1957 | return NOTIFY_OK; | 2037 | return NOTIFY_OK; |
1958 | } | 2038 | } |
1959 | 2039 | ||
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 01b2ccda26f..849ce9ec51f 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
@@ -230,9 +230,9 @@ struct rcu_data { | |||
230 | /* in order to detect GP end. */ | 230 | /* in order to detect GP end. */ |
231 | unsigned long gpnum; /* Highest gp number that this CPU */ | 231 | unsigned long gpnum; /* Highest gp number that this CPU */ |
232 | /* is aware of having started. */ | 232 | /* is aware of having started. */ |
233 | unsigned long passed_quiesc_completed; | 233 | unsigned long passed_quiesce_gpnum; |
234 | /* Value of completed at time of qs. */ | 234 | /* gpnum at time of quiescent state. */ |
235 | bool passed_quiesc; /* User-mode/idle loop etc. */ | 235 | bool passed_quiesce; /* User-mode/idle loop etc. */ |
236 | bool qs_pending; /* Core waits for quiesc state. */ | 236 | bool qs_pending; /* Core waits for quiesc state. */ |
237 | bool beenonline; /* CPU online at least once. */ | 237 | bool beenonline; /* CPU online at least once. */ |
238 | bool preemptible; /* Preemptible RCU? */ | 238 | bool preemptible; /* Preemptible RCU? */ |
@@ -299,6 +299,7 @@ struct rcu_data { | |||
299 | unsigned long n_rp_need_nothing; | 299 | unsigned long n_rp_need_nothing; |
300 | 300 | ||
301 | int cpu; | 301 | int cpu; |
302 | struct rcu_state *rsp; | ||
302 | }; | 303 | }; |
303 | 304 | ||
304 | /* Values for signaled field in struct rcu_state. */ | 305 | /* Values for signaled field in struct rcu_state. */ |
@@ -417,6 +418,13 @@ extern struct rcu_state rcu_preempt_state; | |||
417 | DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data); | 418 | DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data); |
418 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | 419 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ |
419 | 420 | ||
421 | #ifdef CONFIG_RCU_BOOST | ||
422 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | ||
423 | DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu); | ||
424 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | ||
425 | DECLARE_PER_CPU(char, rcu_cpu_has_work); | ||
426 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
427 | |||
420 | #ifndef RCU_TREE_NONCORE | 428 | #ifndef RCU_TREE_NONCORE |
421 | 429 | ||
422 | /* Forward declarations for rcutree_plugin.h */ | 430 | /* Forward declarations for rcutree_plugin.h */ |
@@ -430,7 +438,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, | |||
430 | static void rcu_stop_cpu_kthread(int cpu); | 438 | static void rcu_stop_cpu_kthread(int cpu); |
431 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 439 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
432 | static void rcu_print_detail_task_stall(struct rcu_state *rsp); | 440 | static void rcu_print_detail_task_stall(struct rcu_state *rsp); |
433 | static void rcu_print_task_stall(struct rcu_node *rnp); | 441 | static int rcu_print_task_stall(struct rcu_node *rnp); |
434 | static void rcu_preempt_stall_reset(void); | 442 | static void rcu_preempt_stall_reset(void); |
435 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); | 443 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); |
436 | #ifdef CONFIG_HOTPLUG_CPU | 444 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -450,7 +458,6 @@ static int rcu_preempt_needs_cpu(int cpu); | |||
450 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu); | 458 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu); |
451 | static void rcu_preempt_send_cbs_to_online(void); | 459 | static void rcu_preempt_send_cbs_to_online(void); |
452 | static void __init __rcu_init_preempt(void); | 460 | static void __init __rcu_init_preempt(void); |
453 | static void rcu_needs_cpu_flush(void); | ||
454 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); | 461 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); |
455 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); | 462 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); |
456 | static void invoke_rcu_callbacks_kthread(void); | 463 | static void invoke_rcu_callbacks_kthread(void); |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 8aafbb80b8b..4b9b9f8a418 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -27,6 +27,14 @@ | |||
27 | #include <linux/delay.h> | 27 | #include <linux/delay.h> |
28 | #include <linux/stop_machine.h> | 28 | #include <linux/stop_machine.h> |
29 | 29 | ||
30 | #define RCU_KTHREAD_PRIO 1 | ||
31 | |||
32 | #ifdef CONFIG_RCU_BOOST | ||
33 | #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO | ||
34 | #else | ||
35 | #define RCU_BOOST_PRIO RCU_KTHREAD_PRIO | ||
36 | #endif | ||
37 | |||
30 | /* | 38 | /* |
31 | * Check the RCU kernel configuration parameters and print informative | 39 | * Check the RCU kernel configuration parameters and print informative |
32 | * messages about anything out of the ordinary. If you like #ifdef, you | 40 | * messages about anything out of the ordinary. If you like #ifdef, you |
@@ -64,7 +72,7 @@ static void __init rcu_bootup_announce_oddness(void) | |||
64 | 72 | ||
65 | #ifdef CONFIG_TREE_PREEMPT_RCU | 73 | #ifdef CONFIG_TREE_PREEMPT_RCU |
66 | 74 | ||
67 | struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); | 75 | struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt); |
68 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); | 76 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); |
69 | static struct rcu_state *rcu_state = &rcu_preempt_state; | 77 | static struct rcu_state *rcu_state = &rcu_preempt_state; |
70 | 78 | ||
@@ -122,9 +130,11 @@ static void rcu_preempt_qs(int cpu) | |||
122 | { | 130 | { |
123 | struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); | 131 | struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); |
124 | 132 | ||
125 | rdp->passed_quiesc_completed = rdp->gpnum - 1; | 133 | rdp->passed_quiesce_gpnum = rdp->gpnum; |
126 | barrier(); | 134 | barrier(); |
127 | rdp->passed_quiesc = 1; | 135 | if (rdp->passed_quiesce == 0) |
136 | trace_rcu_grace_period("rcu_preempt", rdp->gpnum, "cpuqs"); | ||
137 | rdp->passed_quiesce = 1; | ||
128 | current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; | 138 | current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; |
129 | } | 139 | } |
130 | 140 | ||
@@ -190,6 +200,11 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
190 | if (rnp->qsmask & rdp->grpmask) | 200 | if (rnp->qsmask & rdp->grpmask) |
191 | rnp->gp_tasks = &t->rcu_node_entry; | 201 | rnp->gp_tasks = &t->rcu_node_entry; |
192 | } | 202 | } |
203 | trace_rcu_preempt_task(rdp->rsp->name, | ||
204 | t->pid, | ||
205 | (rnp->qsmask & rdp->grpmask) | ||
206 | ? rnp->gpnum | ||
207 | : rnp->gpnum + 1); | ||
193 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 208 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
194 | } else if (t->rcu_read_lock_nesting < 0 && | 209 | } else if (t->rcu_read_lock_nesting < 0 && |
195 | t->rcu_read_unlock_special) { | 210 | t->rcu_read_unlock_special) { |
@@ -299,6 +314,9 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) | |||
299 | int empty_exp; | 314 | int empty_exp; |
300 | unsigned long flags; | 315 | unsigned long flags; |
301 | struct list_head *np; | 316 | struct list_head *np; |
317 | #ifdef CONFIG_RCU_BOOST | ||
318 | struct rt_mutex *rbmp = NULL; | ||
319 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
302 | struct rcu_node *rnp; | 320 | struct rcu_node *rnp; |
303 | int special; | 321 | int special; |
304 | 322 | ||
@@ -344,6 +362,9 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) | |||
344 | smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ | 362 | smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ |
345 | np = rcu_next_node_entry(t, rnp); | 363 | np = rcu_next_node_entry(t, rnp); |
346 | list_del_init(&t->rcu_node_entry); | 364 | list_del_init(&t->rcu_node_entry); |
365 | t->rcu_blocked_node = NULL; | ||
366 | trace_rcu_unlock_preempted_task("rcu_preempt", | ||
367 | rnp->gpnum, t->pid); | ||
347 | if (&t->rcu_node_entry == rnp->gp_tasks) | 368 | if (&t->rcu_node_entry == rnp->gp_tasks) |
348 | rnp->gp_tasks = np; | 369 | rnp->gp_tasks = np; |
349 | if (&t->rcu_node_entry == rnp->exp_tasks) | 370 | if (&t->rcu_node_entry == rnp->exp_tasks) |
@@ -351,30 +372,34 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) | |||
351 | #ifdef CONFIG_RCU_BOOST | 372 | #ifdef CONFIG_RCU_BOOST |
352 | if (&t->rcu_node_entry == rnp->boost_tasks) | 373 | if (&t->rcu_node_entry == rnp->boost_tasks) |
353 | rnp->boost_tasks = np; | 374 | rnp->boost_tasks = np; |
354 | /* Snapshot and clear ->rcu_boosted with rcu_node lock held. */ | 375 | /* Snapshot/clear ->rcu_boost_mutex with rcu_node lock held. */ |
355 | if (t->rcu_boosted) { | 376 | if (t->rcu_boost_mutex) { |
356 | special |= RCU_READ_UNLOCK_BOOSTED; | 377 | rbmp = t->rcu_boost_mutex; |
357 | t->rcu_boosted = 0; | 378 | t->rcu_boost_mutex = NULL; |
358 | } | 379 | } |
359 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 380 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
360 | t->rcu_blocked_node = NULL; | ||
361 | 381 | ||
362 | /* | 382 | /* |
363 | * If this was the last task on the current list, and if | 383 | * If this was the last task on the current list, and if |
364 | * we aren't waiting on any CPUs, report the quiescent state. | 384 | * we aren't waiting on any CPUs, report the quiescent state. |
365 | * Note that rcu_report_unblock_qs_rnp() releases rnp->lock. | 385 | * Note that rcu_report_unblock_qs_rnp() releases rnp->lock. |
366 | */ | 386 | */ |
367 | if (empty) | 387 | if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) { |
368 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 388 | trace_rcu_quiescent_state_report("preempt_rcu", |
369 | else | 389 | rnp->gpnum, |
390 | 0, rnp->qsmask, | ||
391 | rnp->level, | ||
392 | rnp->grplo, | ||
393 | rnp->grphi, | ||
394 | !!rnp->gp_tasks); | ||
370 | rcu_report_unblock_qs_rnp(rnp, flags); | 395 | rcu_report_unblock_qs_rnp(rnp, flags); |
396 | } else | ||
397 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
371 | 398 | ||
372 | #ifdef CONFIG_RCU_BOOST | 399 | #ifdef CONFIG_RCU_BOOST |
373 | /* Unboost if we were boosted. */ | 400 | /* Unboost if we were boosted. */ |
374 | if (special & RCU_READ_UNLOCK_BOOSTED) { | 401 | if (rbmp) |
375 | rt_mutex_unlock(t->rcu_boost_mutex); | 402 | rt_mutex_unlock(rbmp); |
376 | t->rcu_boost_mutex = NULL; | ||
377 | } | ||
378 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 403 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
379 | 404 | ||
380 | /* | 405 | /* |
@@ -399,10 +424,10 @@ void __rcu_read_unlock(void) | |||
399 | { | 424 | { |
400 | struct task_struct *t = current; | 425 | struct task_struct *t = current; |
401 | 426 | ||
402 | barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */ | ||
403 | if (t->rcu_read_lock_nesting != 1) | 427 | if (t->rcu_read_lock_nesting != 1) |
404 | --t->rcu_read_lock_nesting; | 428 | --t->rcu_read_lock_nesting; |
405 | else { | 429 | else { |
430 | barrier(); /* critical section before exit code. */ | ||
406 | t->rcu_read_lock_nesting = INT_MIN; | 431 | t->rcu_read_lock_nesting = INT_MIN; |
407 | barrier(); /* assign before ->rcu_read_unlock_special load */ | 432 | barrier(); /* assign before ->rcu_read_unlock_special load */ |
408 | if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) | 433 | if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) |
@@ -466,16 +491,20 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp) | |||
466 | * Scan the current list of tasks blocked within RCU read-side critical | 491 | * Scan the current list of tasks blocked within RCU read-side critical |
467 | * sections, printing out the tid of each. | 492 | * sections, printing out the tid of each. |
468 | */ | 493 | */ |
469 | static void rcu_print_task_stall(struct rcu_node *rnp) | 494 | static int rcu_print_task_stall(struct rcu_node *rnp) |
470 | { | 495 | { |
471 | struct task_struct *t; | 496 | struct task_struct *t; |
497 | int ndetected = 0; | ||
472 | 498 | ||
473 | if (!rcu_preempt_blocked_readers_cgp(rnp)) | 499 | if (!rcu_preempt_blocked_readers_cgp(rnp)) |
474 | return; | 500 | return 0; |
475 | t = list_entry(rnp->gp_tasks, | 501 | t = list_entry(rnp->gp_tasks, |
476 | struct task_struct, rcu_node_entry); | 502 | struct task_struct, rcu_node_entry); |
477 | list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) | 503 | list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { |
478 | printk(" P%d", t->pid); | 504 | printk(" P%d", t->pid); |
505 | ndetected++; | ||
506 | } | ||
507 | return ndetected; | ||
479 | } | 508 | } |
480 | 509 | ||
481 | /* | 510 | /* |
@@ -656,18 +685,9 @@ EXPORT_SYMBOL_GPL(call_rcu); | |||
656 | */ | 685 | */ |
657 | void synchronize_rcu(void) | 686 | void synchronize_rcu(void) |
658 | { | 687 | { |
659 | struct rcu_synchronize rcu; | ||
660 | |||
661 | if (!rcu_scheduler_active) | 688 | if (!rcu_scheduler_active) |
662 | return; | 689 | return; |
663 | 690 | wait_rcu_gp(call_rcu); | |
664 | init_rcu_head_on_stack(&rcu.head); | ||
665 | init_completion(&rcu.completion); | ||
666 | /* Will wake me after RCU finished. */ | ||
667 | call_rcu(&rcu.head, wakeme_after_rcu); | ||
668 | /* Wait for it. */ | ||
669 | wait_for_completion(&rcu.completion); | ||
670 | destroy_rcu_head_on_stack(&rcu.head); | ||
671 | } | 691 | } |
672 | EXPORT_SYMBOL_GPL(synchronize_rcu); | 692 | EXPORT_SYMBOL_GPL(synchronize_rcu); |
673 | 693 | ||
@@ -968,8 +988,9 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp) | |||
968 | * Because preemptible RCU does not exist, we never have to check for | 988 | * Because preemptible RCU does not exist, we never have to check for |
969 | * tasks blocked within RCU read-side critical sections. | 989 | * tasks blocked within RCU read-side critical sections. |
970 | */ | 990 | */ |
971 | static void rcu_print_task_stall(struct rcu_node *rnp) | 991 | static int rcu_print_task_stall(struct rcu_node *rnp) |
972 | { | 992 | { |
993 | return 0; | ||
973 | } | 994 | } |
974 | 995 | ||
975 | /* | 996 | /* |
@@ -1136,6 +1157,8 @@ static void rcu_initiate_boost_trace(struct rcu_node *rnp) | |||
1136 | 1157 | ||
1137 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ | 1158 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ |
1138 | 1159 | ||
1160 | static struct lock_class_key rcu_boost_class; | ||
1161 | |||
1139 | /* | 1162 | /* |
1140 | * Carry out RCU priority boosting on the task indicated by ->exp_tasks | 1163 | * Carry out RCU priority boosting on the task indicated by ->exp_tasks |
1141 | * or ->boost_tasks, advancing the pointer to the next task in the | 1164 | * or ->boost_tasks, advancing the pointer to the next task in the |
@@ -1198,8 +1221,10 @@ static int rcu_boost(struct rcu_node *rnp) | |||
1198 | */ | 1221 | */ |
1199 | t = container_of(tb, struct task_struct, rcu_node_entry); | 1222 | t = container_of(tb, struct task_struct, rcu_node_entry); |
1200 | rt_mutex_init_proxy_locked(&mtx, t); | 1223 | rt_mutex_init_proxy_locked(&mtx, t); |
1224 | /* Avoid lockdep false positives. This rt_mutex is its own thing. */ | ||
1225 | lockdep_set_class_and_name(&mtx.wait_lock, &rcu_boost_class, | ||
1226 | "rcu_boost_mutex"); | ||
1201 | t->rcu_boost_mutex = &mtx; | 1227 | t->rcu_boost_mutex = &mtx; |
1202 | t->rcu_boosted = 1; | ||
1203 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1228 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1204 | rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ | 1229 | rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ |
1205 | rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ | 1230 | rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ |
@@ -1228,9 +1253,12 @@ static int rcu_boost_kthread(void *arg) | |||
1228 | int spincnt = 0; | 1253 | int spincnt = 0; |
1229 | int more2boost; | 1254 | int more2boost; |
1230 | 1255 | ||
1256 | trace_rcu_utilization("Start boost kthread@init"); | ||
1231 | for (;;) { | 1257 | for (;;) { |
1232 | rnp->boost_kthread_status = RCU_KTHREAD_WAITING; | 1258 | rnp->boost_kthread_status = RCU_KTHREAD_WAITING; |
1259 | trace_rcu_utilization("End boost kthread@rcu_wait"); | ||
1233 | rcu_wait(rnp->boost_tasks || rnp->exp_tasks); | 1260 | rcu_wait(rnp->boost_tasks || rnp->exp_tasks); |
1261 | trace_rcu_utilization("Start boost kthread@rcu_wait"); | ||
1234 | rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; | 1262 | rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; |
1235 | more2boost = rcu_boost(rnp); | 1263 | more2boost = rcu_boost(rnp); |
1236 | if (more2boost) | 1264 | if (more2boost) |
@@ -1238,11 +1266,14 @@ static int rcu_boost_kthread(void *arg) | |||
1238 | else | 1266 | else |
1239 | spincnt = 0; | 1267 | spincnt = 0; |
1240 | if (spincnt > 10) { | 1268 | if (spincnt > 10) { |
1269 | trace_rcu_utilization("End boost kthread@rcu_yield"); | ||
1241 | rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp); | 1270 | rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp); |
1271 | trace_rcu_utilization("Start boost kthread@rcu_yield"); | ||
1242 | spincnt = 0; | 1272 | spincnt = 0; |
1243 | } | 1273 | } |
1244 | } | 1274 | } |
1245 | /* NOTREACHED */ | 1275 | /* NOTREACHED */ |
1276 | trace_rcu_utilization("End boost kthread@notreached"); | ||
1246 | return 0; | 1277 | return 0; |
1247 | } | 1278 | } |
1248 | 1279 | ||
@@ -1291,11 +1322,9 @@ static void invoke_rcu_callbacks_kthread(void) | |||
1291 | 1322 | ||
1292 | local_irq_save(flags); | 1323 | local_irq_save(flags); |
1293 | __this_cpu_write(rcu_cpu_has_work, 1); | 1324 | __this_cpu_write(rcu_cpu_has_work, 1); |
1294 | if (__this_cpu_read(rcu_cpu_kthread_task) == NULL) { | 1325 | if (__this_cpu_read(rcu_cpu_kthread_task) != NULL && |
1295 | local_irq_restore(flags); | 1326 | current != __this_cpu_read(rcu_cpu_kthread_task)) |
1296 | return; | 1327 | wake_up_process(__this_cpu_read(rcu_cpu_kthread_task)); |
1297 | } | ||
1298 | wake_up_process(__this_cpu_read(rcu_cpu_kthread_task)); | ||
1299 | local_irq_restore(flags); | 1328 | local_irq_restore(flags); |
1300 | } | 1329 | } |
1301 | 1330 | ||
@@ -1343,13 +1372,13 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |||
1343 | if (rnp->boost_kthread_task != NULL) | 1372 | if (rnp->boost_kthread_task != NULL) |
1344 | return 0; | 1373 | return 0; |
1345 | t = kthread_create(rcu_boost_kthread, (void *)rnp, | 1374 | t = kthread_create(rcu_boost_kthread, (void *)rnp, |
1346 | "rcub%d", rnp_index); | 1375 | "rcub/%d", rnp_index); |
1347 | if (IS_ERR(t)) | 1376 | if (IS_ERR(t)) |
1348 | return PTR_ERR(t); | 1377 | return PTR_ERR(t); |
1349 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1378 | raw_spin_lock_irqsave(&rnp->lock, flags); |
1350 | rnp->boost_kthread_task = t; | 1379 | rnp->boost_kthread_task = t; |
1351 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1380 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1352 | sp.sched_priority = RCU_KTHREAD_PRIO; | 1381 | sp.sched_priority = RCU_BOOST_PRIO; |
1353 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | 1382 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); |
1354 | wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ | 1383 | wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ |
1355 | return 0; | 1384 | return 0; |
@@ -1444,6 +1473,7 @@ static void rcu_yield(void (*f)(unsigned long), unsigned long arg) | |||
1444 | { | 1473 | { |
1445 | struct sched_param sp; | 1474 | struct sched_param sp; |
1446 | struct timer_list yield_timer; | 1475 | struct timer_list yield_timer; |
1476 | int prio = current->rt_priority; | ||
1447 | 1477 | ||
1448 | setup_timer_on_stack(&yield_timer, f, arg); | 1478 | setup_timer_on_stack(&yield_timer, f, arg); |
1449 | mod_timer(&yield_timer, jiffies + 2); | 1479 | mod_timer(&yield_timer, jiffies + 2); |
@@ -1451,7 +1481,8 @@ static void rcu_yield(void (*f)(unsigned long), unsigned long arg) | |||
1451 | sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp); | 1481 | sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp); |
1452 | set_user_nice(current, 19); | 1482 | set_user_nice(current, 19); |
1453 | schedule(); | 1483 | schedule(); |
1454 | sp.sched_priority = RCU_KTHREAD_PRIO; | 1484 | set_user_nice(current, 0); |
1485 | sp.sched_priority = prio; | ||
1455 | sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); | 1486 | sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); |
1456 | del_timer(&yield_timer); | 1487 | del_timer(&yield_timer); |
1457 | } | 1488 | } |
@@ -1489,7 +1520,8 @@ static int rcu_cpu_kthread_should_stop(int cpu) | |||
1489 | 1520 | ||
1490 | /* | 1521 | /* |
1491 | * Per-CPU kernel thread that invokes RCU callbacks. This replaces the | 1522 | * Per-CPU kernel thread that invokes RCU callbacks. This replaces the |
1492 | * earlier RCU softirq. | 1523 | * RCU softirq used in flavors and configurations of RCU that do not |
1524 | * support RCU priority boosting. | ||
1493 | */ | 1525 | */ |
1494 | static int rcu_cpu_kthread(void *arg) | 1526 | static int rcu_cpu_kthread(void *arg) |
1495 | { | 1527 | { |
@@ -1500,9 +1532,12 @@ static int rcu_cpu_kthread(void *arg) | |||
1500 | char work; | 1532 | char work; |
1501 | char *workp = &per_cpu(rcu_cpu_has_work, cpu); | 1533 | char *workp = &per_cpu(rcu_cpu_has_work, cpu); |
1502 | 1534 | ||
1535 | trace_rcu_utilization("Start CPU kthread@init"); | ||
1503 | for (;;) { | 1536 | for (;;) { |
1504 | *statusp = RCU_KTHREAD_WAITING; | 1537 | *statusp = RCU_KTHREAD_WAITING; |
1538 | trace_rcu_utilization("End CPU kthread@rcu_wait"); | ||
1505 | rcu_wait(*workp != 0 || kthread_should_stop()); | 1539 | rcu_wait(*workp != 0 || kthread_should_stop()); |
1540 | trace_rcu_utilization("Start CPU kthread@rcu_wait"); | ||
1506 | local_bh_disable(); | 1541 | local_bh_disable(); |
1507 | if (rcu_cpu_kthread_should_stop(cpu)) { | 1542 | if (rcu_cpu_kthread_should_stop(cpu)) { |
1508 | local_bh_enable(); | 1543 | local_bh_enable(); |
@@ -1523,11 +1558,14 @@ static int rcu_cpu_kthread(void *arg) | |||
1523 | spincnt = 0; | 1558 | spincnt = 0; |
1524 | if (spincnt > 10) { | 1559 | if (spincnt > 10) { |
1525 | *statusp = RCU_KTHREAD_YIELDING; | 1560 | *statusp = RCU_KTHREAD_YIELDING; |
1561 | trace_rcu_utilization("End CPU kthread@rcu_yield"); | ||
1526 | rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu); | 1562 | rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu); |
1563 | trace_rcu_utilization("Start CPU kthread@rcu_yield"); | ||
1527 | spincnt = 0; | 1564 | spincnt = 0; |
1528 | } | 1565 | } |
1529 | } | 1566 | } |
1530 | *statusp = RCU_KTHREAD_STOPPED; | 1567 | *statusp = RCU_KTHREAD_STOPPED; |
1568 | trace_rcu_utilization("End CPU kthread@term"); | ||
1531 | return 0; | 1569 | return 0; |
1532 | } | 1570 | } |
1533 | 1571 | ||
@@ -1560,7 +1598,10 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) | |||
1560 | if (!rcu_scheduler_fully_active || | 1598 | if (!rcu_scheduler_fully_active || |
1561 | per_cpu(rcu_cpu_kthread_task, cpu) != NULL) | 1599 | per_cpu(rcu_cpu_kthread_task, cpu) != NULL) |
1562 | return 0; | 1600 | return 0; |
1563 | t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu); | 1601 | t = kthread_create_on_node(rcu_cpu_kthread, |
1602 | (void *)(long)cpu, | ||
1603 | cpu_to_node(cpu), | ||
1604 | "rcuc/%d", cpu); | ||
1564 | if (IS_ERR(t)) | 1605 | if (IS_ERR(t)) |
1565 | return PTR_ERR(t); | 1606 | return PTR_ERR(t); |
1566 | if (cpu_online(cpu)) | 1607 | if (cpu_online(cpu)) |
@@ -1669,7 +1710,7 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, | |||
1669 | return 0; | 1710 | return 0; |
1670 | if (rnp->node_kthread_task == NULL) { | 1711 | if (rnp->node_kthread_task == NULL) { |
1671 | t = kthread_create(rcu_node_kthread, (void *)rnp, | 1712 | t = kthread_create(rcu_node_kthread, (void *)rnp, |
1672 | "rcun%d", rnp_index); | 1713 | "rcun/%d", rnp_index); |
1673 | if (IS_ERR(t)) | 1714 | if (IS_ERR(t)) |
1674 | return PTR_ERR(t); | 1715 | return PTR_ERR(t); |
1675 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1716 | raw_spin_lock_irqsave(&rnp->lock, flags); |
@@ -1907,15 +1948,6 @@ int rcu_needs_cpu(int cpu) | |||
1907 | return rcu_needs_cpu_quick_check(cpu); | 1948 | return rcu_needs_cpu_quick_check(cpu); |
1908 | } | 1949 | } |
1909 | 1950 | ||
1910 | /* | ||
1911 | * Check to see if we need to continue a callback-flush operations to | ||
1912 | * allow the last CPU to enter dyntick-idle mode. But fast dyntick-idle | ||
1913 | * entry is not configured, so we never do need to. | ||
1914 | */ | ||
1915 | static void rcu_needs_cpu_flush(void) | ||
1916 | { | ||
1917 | } | ||
1918 | |||
1919 | #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ | 1951 | #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ |
1920 | 1952 | ||
1921 | #define RCU_NEEDS_CPU_FLUSHES 5 | 1953 | #define RCU_NEEDS_CPU_FLUSHES 5 |
@@ -1991,20 +2023,4 @@ int rcu_needs_cpu(int cpu) | |||
1991 | return c; | 2023 | return c; |
1992 | } | 2024 | } |
1993 | 2025 | ||
1994 | /* | ||
1995 | * Check to see if we need to continue a callback-flush operations to | ||
1996 | * allow the last CPU to enter dyntick-idle mode. | ||
1997 | */ | ||
1998 | static void rcu_needs_cpu_flush(void) | ||
1999 | { | ||
2000 | int cpu = smp_processor_id(); | ||
2001 | unsigned long flags; | ||
2002 | |||
2003 | if (per_cpu(rcu_dyntick_drain, cpu) <= 0) | ||
2004 | return; | ||
2005 | local_irq_save(flags); | ||
2006 | (void)rcu_needs_cpu(cpu); | ||
2007 | local_irq_restore(flags); | ||
2008 | } | ||
2009 | |||
2010 | #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ | 2026 | #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ |
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 3b0c0986afc..9feffa4c069 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c | |||
@@ -48,11 +48,6 @@ | |||
48 | 48 | ||
49 | #ifdef CONFIG_RCU_BOOST | 49 | #ifdef CONFIG_RCU_BOOST |
50 | 50 | ||
51 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | ||
52 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_cpu); | ||
53 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | ||
54 | DECLARE_PER_CPU(char, rcu_cpu_has_work); | ||
55 | |||
56 | static char convert_kthread_status(unsigned int kthread_status) | 51 | static char convert_kthread_status(unsigned int kthread_status) |
57 | { | 52 | { |
58 | if (kthread_status > RCU_KTHREAD_MAX) | 53 | if (kthread_status > RCU_KTHREAD_MAX) |
@@ -66,11 +61,11 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) | |||
66 | { | 61 | { |
67 | if (!rdp->beenonline) | 62 | if (!rdp->beenonline) |
68 | return; | 63 | return; |
69 | seq_printf(m, "%3d%cc=%lu g=%lu pq=%d pqc=%lu qp=%d", | 64 | seq_printf(m, "%3d%cc=%lu g=%lu pq=%d pgp=%lu qp=%d", |
70 | rdp->cpu, | 65 | rdp->cpu, |
71 | cpu_is_offline(rdp->cpu) ? '!' : ' ', | 66 | cpu_is_offline(rdp->cpu) ? '!' : ' ', |
72 | rdp->completed, rdp->gpnum, | 67 | rdp->completed, rdp->gpnum, |
73 | rdp->passed_quiesc, rdp->passed_quiesc_completed, | 68 | rdp->passed_quiesce, rdp->passed_quiesce_gpnum, |
74 | rdp->qs_pending); | 69 | rdp->qs_pending); |
75 | #ifdef CONFIG_NO_HZ | 70 | #ifdef CONFIG_NO_HZ |
76 | seq_printf(m, " dt=%d/%d/%d df=%lu", | 71 | seq_printf(m, " dt=%d/%d/%d df=%lu", |
@@ -144,7 +139,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) | |||
144 | rdp->cpu, | 139 | rdp->cpu, |
145 | cpu_is_offline(rdp->cpu) ? "\"N\"" : "\"Y\"", | 140 | cpu_is_offline(rdp->cpu) ? "\"N\"" : "\"Y\"", |
146 | rdp->completed, rdp->gpnum, | 141 | rdp->completed, rdp->gpnum, |
147 | rdp->passed_quiesc, rdp->passed_quiesc_completed, | 142 | rdp->passed_quiesce, rdp->passed_quiesce_gpnum, |
148 | rdp->qs_pending); | 143 | rdp->qs_pending); |
149 | #ifdef CONFIG_NO_HZ | 144 | #ifdef CONFIG_NO_HZ |
150 | seq_printf(m, ",%d,%d,%d,%lu", | 145 | seq_printf(m, ",%d,%d,%d,%lu", |
@@ -175,7 +170,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) | |||
175 | 170 | ||
176 | static int show_rcudata_csv(struct seq_file *m, void *unused) | 171 | static int show_rcudata_csv(struct seq_file *m, void *unused) |
177 | { | 172 | { |
178 | seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\","); | 173 | seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\","); |
179 | #ifdef CONFIG_NO_HZ | 174 | #ifdef CONFIG_NO_HZ |
180 | seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); | 175 | seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); |
181 | #endif /* #ifdef CONFIG_NO_HZ */ | 176 | #endif /* #ifdef CONFIG_NO_HZ */ |
diff --git a/kernel/relay.c b/kernel/relay.c index 859ea5a9605..226fade4d72 100644 --- a/kernel/relay.c +++ b/kernel/relay.c | |||
@@ -15,7 +15,7 @@ | |||
15 | #include <linux/errno.h> | 15 | #include <linux/errno.h> |
16 | #include <linux/stddef.h> | 16 | #include <linux/stddef.h> |
17 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
18 | #include <linux/module.h> | 18 | #include <linux/export.h> |
19 | #include <linux/string.h> | 19 | #include <linux/string.h> |
20 | #include <linux/relay.h> | 20 | #include <linux/relay.h> |
21 | #include <linux/vmalloc.h> | 21 | #include <linux/vmalloc.h> |
diff --git a/kernel/resource.c b/kernel/resource.c index 3b3cedc5259..7640b3a947d 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
@@ -7,7 +7,7 @@ | |||
7 | * Arbitrary resource management. | 7 | * Arbitrary resource management. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #include <linux/module.h> | 10 | #include <linux/export.h> |
11 | #include <linux/errno.h> | 11 | #include <linux/errno.h> |
12 | #include <linux/ioport.h> | 12 | #include <linux/ioport.h> |
13 | #include <linux/init.h> | 13 | #include <linux/init.h> |
@@ -419,6 +419,9 @@ static int __find_resource(struct resource *root, struct resource *old, | |||
419 | else | 419 | else |
420 | tmp.end = root->end; | 420 | tmp.end = root->end; |
421 | 421 | ||
422 | if (tmp.end < tmp.start) | ||
423 | goto next; | ||
424 | |||
422 | resource_clip(&tmp, constraint->min, constraint->max); | 425 | resource_clip(&tmp, constraint->min, constraint->max); |
423 | arch_remove_reservations(&tmp); | 426 | arch_remove_reservations(&tmp); |
424 | 427 | ||
@@ -436,8 +439,10 @@ static int __find_resource(struct resource *root, struct resource *old, | |||
436 | return 0; | 439 | return 0; |
437 | } | 440 | } |
438 | } | 441 | } |
439 | if (!this) | 442 | |
443 | next: if (!this || this->end == root->end) | ||
440 | break; | 444 | break; |
445 | |||
441 | if (this != old) | 446 | if (this != old) |
442 | tmp.start = this->end + 1; | 447 | tmp.start = this->end + 1; |
443 | this = this->sibling; | 448 | this = this->sibling; |
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c index 3c7cbc2c33b..8eafd1bd273 100644 --- a/kernel/rtmutex-debug.c +++ b/kernel/rtmutex-debug.c | |||
@@ -18,7 +18,7 @@ | |||
18 | */ | 18 | */ |
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <linux/delay.h> | 20 | #include <linux/delay.h> |
21 | #include <linux/module.h> | 21 | #include <linux/export.h> |
22 | #include <linux/spinlock.h> | 22 | #include <linux/spinlock.h> |
23 | #include <linux/kallsyms.h> | 23 | #include <linux/kallsyms.h> |
24 | #include <linux/syscalls.h> | 24 | #include <linux/syscalls.h> |
@@ -29,61 +29,6 @@ | |||
29 | 29 | ||
30 | #include "rtmutex_common.h" | 30 | #include "rtmutex_common.h" |
31 | 31 | ||
32 | # define TRACE_WARN_ON(x) WARN_ON(x) | ||
33 | # define TRACE_BUG_ON(x) BUG_ON(x) | ||
34 | |||
35 | # define TRACE_OFF() \ | ||
36 | do { \ | ||
37 | if (rt_trace_on) { \ | ||
38 | rt_trace_on = 0; \ | ||
39 | console_verbose(); \ | ||
40 | if (raw_spin_is_locked(¤t->pi_lock)) \ | ||
41 | raw_spin_unlock(¤t->pi_lock); \ | ||
42 | } \ | ||
43 | } while (0) | ||
44 | |||
45 | # define TRACE_OFF_NOLOCK() \ | ||
46 | do { \ | ||
47 | if (rt_trace_on) { \ | ||
48 | rt_trace_on = 0; \ | ||
49 | console_verbose(); \ | ||
50 | } \ | ||
51 | } while (0) | ||
52 | |||
53 | # define TRACE_BUG_LOCKED() \ | ||
54 | do { \ | ||
55 | TRACE_OFF(); \ | ||
56 | BUG(); \ | ||
57 | } while (0) | ||
58 | |||
59 | # define TRACE_WARN_ON_LOCKED(c) \ | ||
60 | do { \ | ||
61 | if (unlikely(c)) { \ | ||
62 | TRACE_OFF(); \ | ||
63 | WARN_ON(1); \ | ||
64 | } \ | ||
65 | } while (0) | ||
66 | |||
67 | # define TRACE_BUG_ON_LOCKED(c) \ | ||
68 | do { \ | ||
69 | if (unlikely(c)) \ | ||
70 | TRACE_BUG_LOCKED(); \ | ||
71 | } while (0) | ||
72 | |||
73 | #ifdef CONFIG_SMP | ||
74 | # define SMP_TRACE_BUG_ON_LOCKED(c) TRACE_BUG_ON_LOCKED(c) | ||
75 | #else | ||
76 | # define SMP_TRACE_BUG_ON_LOCKED(c) do { } while (0) | ||
77 | #endif | ||
78 | |||
79 | /* | ||
80 | * deadlock detection flag. We turn it off when we detect | ||
81 | * the first problem because we dont want to recurse back | ||
82 | * into the tracing code when doing error printk or | ||
83 | * executing a BUG(): | ||
84 | */ | ||
85 | static int rt_trace_on = 1; | ||
86 | |||
87 | static void printk_task(struct task_struct *p) | 32 | static void printk_task(struct task_struct *p) |
88 | { | 33 | { |
89 | if (p) | 34 | if (p) |
@@ -111,8 +56,8 @@ static void printk_lock(struct rt_mutex *lock, int print_owner) | |||
111 | 56 | ||
112 | void rt_mutex_debug_task_free(struct task_struct *task) | 57 | void rt_mutex_debug_task_free(struct task_struct *task) |
113 | { | 58 | { |
114 | WARN_ON(!plist_head_empty(&task->pi_waiters)); | 59 | DEBUG_LOCKS_WARN_ON(!plist_head_empty(&task->pi_waiters)); |
115 | WARN_ON(task->pi_blocked_on); | 60 | DEBUG_LOCKS_WARN_ON(task->pi_blocked_on); |
116 | } | 61 | } |
117 | 62 | ||
118 | /* | 63 | /* |
@@ -125,7 +70,7 @@ void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *act_waiter, | |||
125 | { | 70 | { |
126 | struct task_struct *task; | 71 | struct task_struct *task; |
127 | 72 | ||
128 | if (!rt_trace_on || detect || !act_waiter) | 73 | if (!debug_locks || detect || !act_waiter) |
129 | return; | 74 | return; |
130 | 75 | ||
131 | task = rt_mutex_owner(act_waiter->lock); | 76 | task = rt_mutex_owner(act_waiter->lock); |
@@ -139,7 +84,7 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter) | |||
139 | { | 84 | { |
140 | struct task_struct *task; | 85 | struct task_struct *task; |
141 | 86 | ||
142 | if (!waiter->deadlock_lock || !rt_trace_on) | 87 | if (!waiter->deadlock_lock || !debug_locks) |
143 | return; | 88 | return; |
144 | 89 | ||
145 | rcu_read_lock(); | 90 | rcu_read_lock(); |
@@ -149,7 +94,10 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter) | |||
149 | return; | 94 | return; |
150 | } | 95 | } |
151 | 96 | ||
152 | TRACE_OFF_NOLOCK(); | 97 | if (!debug_locks_off()) { |
98 | rcu_read_unlock(); | ||
99 | return; | ||
100 | } | ||
153 | 101 | ||
154 | printk("\n============================================\n"); | 102 | printk("\n============================================\n"); |
155 | printk( "[ BUG: circular locking deadlock detected! ]\n"); | 103 | printk( "[ BUG: circular locking deadlock detected! ]\n"); |
@@ -180,7 +128,6 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter) | |||
180 | 128 | ||
181 | printk("[ turning off deadlock detection." | 129 | printk("[ turning off deadlock detection." |
182 | "Please report this trace. ]\n\n"); | 130 | "Please report this trace. ]\n\n"); |
183 | local_irq_disable(); | ||
184 | } | 131 | } |
185 | 132 | ||
186 | void debug_rt_mutex_lock(struct rt_mutex *lock) | 133 | void debug_rt_mutex_lock(struct rt_mutex *lock) |
@@ -189,7 +136,7 @@ void debug_rt_mutex_lock(struct rt_mutex *lock) | |||
189 | 136 | ||
190 | void debug_rt_mutex_unlock(struct rt_mutex *lock) | 137 | void debug_rt_mutex_unlock(struct rt_mutex *lock) |
191 | { | 138 | { |
192 | TRACE_WARN_ON_LOCKED(rt_mutex_owner(lock) != current); | 139 | DEBUG_LOCKS_WARN_ON(rt_mutex_owner(lock) != current); |
193 | } | 140 | } |
194 | 141 | ||
195 | void | 142 | void |
@@ -199,7 +146,7 @@ debug_rt_mutex_proxy_lock(struct rt_mutex *lock, struct task_struct *powner) | |||
199 | 146 | ||
200 | void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock) | 147 | void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock) |
201 | { | 148 | { |
202 | TRACE_WARN_ON_LOCKED(!rt_mutex_owner(lock)); | 149 | DEBUG_LOCKS_WARN_ON(!rt_mutex_owner(lock)); |
203 | } | 150 | } |
204 | 151 | ||
205 | void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) | 152 | void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) |
@@ -213,8 +160,8 @@ void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) | |||
213 | void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter) | 160 | void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter) |
214 | { | 161 | { |
215 | put_pid(waiter->deadlock_task_pid); | 162 | put_pid(waiter->deadlock_task_pid); |
216 | TRACE_WARN_ON(!plist_node_empty(&waiter->list_entry)); | 163 | DEBUG_LOCKS_WARN_ON(!plist_node_empty(&waiter->list_entry)); |
217 | TRACE_WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); | 164 | DEBUG_LOCKS_WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); |
218 | memset(waiter, 0x22, sizeof(*waiter)); | 165 | memset(waiter, 0x22, sizeof(*waiter)); |
219 | } | 166 | } |
220 | 167 | ||
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c index 5c9ccd38096..3d9f31cd79e 100644 --- a/kernel/rtmutex-tester.c +++ b/kernel/rtmutex-tester.c | |||
@@ -7,7 +7,7 @@ | |||
7 | * | 7 | * |
8 | */ | 8 | */ |
9 | #include <linux/kthread.h> | 9 | #include <linux/kthread.h> |
10 | #include <linux/module.h> | 10 | #include <linux/export.h> |
11 | #include <linux/sched.h> | 11 | #include <linux/sched.h> |
12 | #include <linux/spinlock.h> | 12 | #include <linux/spinlock.h> |
13 | #include <linux/sysdev.h> | 13 | #include <linux/sysdev.h> |
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 255e1662acd..f9d8482dd48 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c | |||
@@ -11,7 +11,7 @@ | |||
11 | * See Documentation/rt-mutex-design.txt for details. | 11 | * See Documentation/rt-mutex-design.txt for details. |
12 | */ | 12 | */ |
13 | #include <linux/spinlock.h> | 13 | #include <linux/spinlock.h> |
14 | #include <linux/module.h> | 14 | #include <linux/export.h> |
15 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
16 | #include <linux/timer.h> | 16 | #include <linux/timer.h> |
17 | 17 | ||
@@ -579,6 +579,7 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, | |||
579 | struct rt_mutex_waiter *waiter) | 579 | struct rt_mutex_waiter *waiter) |
580 | { | 580 | { |
581 | int ret = 0; | 581 | int ret = 0; |
582 | int was_disabled; | ||
582 | 583 | ||
583 | for (;;) { | 584 | for (;;) { |
584 | /* Try to acquire the lock: */ | 585 | /* Try to acquire the lock: */ |
@@ -601,10 +602,17 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, | |||
601 | 602 | ||
602 | raw_spin_unlock(&lock->wait_lock); | 603 | raw_spin_unlock(&lock->wait_lock); |
603 | 604 | ||
605 | was_disabled = irqs_disabled(); | ||
606 | if (was_disabled) | ||
607 | local_irq_enable(); | ||
608 | |||
604 | debug_rt_mutex_print_deadlock(waiter); | 609 | debug_rt_mutex_print_deadlock(waiter); |
605 | 610 | ||
606 | schedule_rt_mutex(lock); | 611 | schedule_rt_mutex(lock); |
607 | 612 | ||
613 | if (was_disabled) | ||
614 | local_irq_disable(); | ||
615 | |||
608 | raw_spin_lock(&lock->wait_lock); | 616 | raw_spin_lock(&lock->wait_lock); |
609 | set_current_state(state); | 617 | set_current_state(state); |
610 | } | 618 | } |
diff --git a/kernel/rwsem.c b/kernel/rwsem.c index 9f48f3d82e9..b152f74f02d 100644 --- a/kernel/rwsem.c +++ b/kernel/rwsem.c | |||
@@ -7,7 +7,7 @@ | |||
7 | #include <linux/types.h> | 7 | #include <linux/types.h> |
8 | #include <linux/kernel.h> | 8 | #include <linux/kernel.h> |
9 | #include <linux/sched.h> | 9 | #include <linux/sched.h> |
10 | #include <linux/module.h> | 10 | #include <linux/export.h> |
11 | #include <linux/rwsem.h> | 11 | #include <linux/rwsem.h> |
12 | 12 | ||
13 | #include <asm/system.h> | 13 | #include <asm/system.h> |
diff --git a/kernel/sched.c b/kernel/sched.c index e1290ecee3c..d6b149ccf92 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -71,6 +71,7 @@ | |||
71 | #include <linux/ctype.h> | 71 | #include <linux/ctype.h> |
72 | #include <linux/ftrace.h> | 72 | #include <linux/ftrace.h> |
73 | #include <linux/slab.h> | 73 | #include <linux/slab.h> |
74 | #include <linux/init_task.h> | ||
74 | 75 | ||
75 | #include <asm/tlb.h> | 76 | #include <asm/tlb.h> |
76 | #include <asm/irq_regs.h> | 77 | #include <asm/irq_regs.h> |
@@ -196,10 +197,28 @@ static inline int rt_bandwidth_enabled(void) | |||
196 | return sysctl_sched_rt_runtime >= 0; | 197 | return sysctl_sched_rt_runtime >= 0; |
197 | } | 198 | } |
198 | 199 | ||
199 | static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | 200 | static void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period) |
200 | { | 201 | { |
201 | ktime_t now; | 202 | unsigned long delta; |
203 | ktime_t soft, hard, now; | ||
204 | |||
205 | for (;;) { | ||
206 | if (hrtimer_active(period_timer)) | ||
207 | break; | ||
208 | |||
209 | now = hrtimer_cb_get_time(period_timer); | ||
210 | hrtimer_forward(period_timer, now, period); | ||
202 | 211 | ||
212 | soft = hrtimer_get_softexpires(period_timer); | ||
213 | hard = hrtimer_get_expires(period_timer); | ||
214 | delta = ktime_to_ns(ktime_sub(hard, soft)); | ||
215 | __hrtimer_start_range_ns(period_timer, soft, delta, | ||
216 | HRTIMER_MODE_ABS_PINNED, 0); | ||
217 | } | ||
218 | } | ||
219 | |||
220 | static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | ||
221 | { | ||
203 | if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) | 222 | if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) |
204 | return; | 223 | return; |
205 | 224 | ||
@@ -207,22 +226,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | |||
207 | return; | 226 | return; |
208 | 227 | ||
209 | raw_spin_lock(&rt_b->rt_runtime_lock); | 228 | raw_spin_lock(&rt_b->rt_runtime_lock); |
210 | for (;;) { | 229 | start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period); |
211 | unsigned long delta; | ||
212 | ktime_t soft, hard; | ||
213 | |||
214 | if (hrtimer_active(&rt_b->rt_period_timer)) | ||
215 | break; | ||
216 | |||
217 | now = hrtimer_cb_get_time(&rt_b->rt_period_timer); | ||
218 | hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period); | ||
219 | |||
220 | soft = hrtimer_get_softexpires(&rt_b->rt_period_timer); | ||
221 | hard = hrtimer_get_expires(&rt_b->rt_period_timer); | ||
222 | delta = ktime_to_ns(ktime_sub(hard, soft)); | ||
223 | __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta, | ||
224 | HRTIMER_MODE_ABS_PINNED, 0); | ||
225 | } | ||
226 | raw_spin_unlock(&rt_b->rt_runtime_lock); | 230 | raw_spin_unlock(&rt_b->rt_runtime_lock); |
227 | } | 231 | } |
228 | 232 | ||
@@ -247,6 +251,24 @@ struct cfs_rq; | |||
247 | 251 | ||
248 | static LIST_HEAD(task_groups); | 252 | static LIST_HEAD(task_groups); |
249 | 253 | ||
254 | struct cfs_bandwidth { | ||
255 | #ifdef CONFIG_CFS_BANDWIDTH | ||
256 | raw_spinlock_t lock; | ||
257 | ktime_t period; | ||
258 | u64 quota, runtime; | ||
259 | s64 hierarchal_quota; | ||
260 | u64 runtime_expires; | ||
261 | |||
262 | int idle, timer_active; | ||
263 | struct hrtimer period_timer, slack_timer; | ||
264 | struct list_head throttled_cfs_rq; | ||
265 | |||
266 | /* statistics */ | ||
267 | int nr_periods, nr_throttled; | ||
268 | u64 throttled_time; | ||
269 | #endif | ||
270 | }; | ||
271 | |||
250 | /* task group related information */ | 272 | /* task group related information */ |
251 | struct task_group { | 273 | struct task_group { |
252 | struct cgroup_subsys_state css; | 274 | struct cgroup_subsys_state css; |
@@ -278,6 +300,8 @@ struct task_group { | |||
278 | #ifdef CONFIG_SCHED_AUTOGROUP | 300 | #ifdef CONFIG_SCHED_AUTOGROUP |
279 | struct autogroup *autogroup; | 301 | struct autogroup *autogroup; |
280 | #endif | 302 | #endif |
303 | |||
304 | struct cfs_bandwidth cfs_bandwidth; | ||
281 | }; | 305 | }; |
282 | 306 | ||
283 | /* task_group_lock serializes the addition/removal of task groups */ | 307 | /* task_group_lock serializes the addition/removal of task groups */ |
@@ -311,7 +335,7 @@ struct task_group root_task_group; | |||
311 | /* CFS-related fields in a runqueue */ | 335 | /* CFS-related fields in a runqueue */ |
312 | struct cfs_rq { | 336 | struct cfs_rq { |
313 | struct load_weight load; | 337 | struct load_weight load; |
314 | unsigned long nr_running; | 338 | unsigned long nr_running, h_nr_running; |
315 | 339 | ||
316 | u64 exec_clock; | 340 | u64 exec_clock; |
317 | u64 min_vruntime; | 341 | u64 min_vruntime; |
@@ -377,9 +401,120 @@ struct cfs_rq { | |||
377 | 401 | ||
378 | unsigned long load_contribution; | 402 | unsigned long load_contribution; |
379 | #endif | 403 | #endif |
404 | #ifdef CONFIG_CFS_BANDWIDTH | ||
405 | int runtime_enabled; | ||
406 | u64 runtime_expires; | ||
407 | s64 runtime_remaining; | ||
408 | |||
409 | u64 throttled_timestamp; | ||
410 | int throttled, throttle_count; | ||
411 | struct list_head throttled_list; | ||
412 | #endif | ||
380 | #endif | 413 | #endif |
381 | }; | 414 | }; |
382 | 415 | ||
416 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
417 | #ifdef CONFIG_CFS_BANDWIDTH | ||
418 | static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) | ||
419 | { | ||
420 | return &tg->cfs_bandwidth; | ||
421 | } | ||
422 | |||
423 | static inline u64 default_cfs_period(void); | ||
424 | static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun); | ||
425 | static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b); | ||
426 | |||
427 | static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer) | ||
428 | { | ||
429 | struct cfs_bandwidth *cfs_b = | ||
430 | container_of(timer, struct cfs_bandwidth, slack_timer); | ||
431 | do_sched_cfs_slack_timer(cfs_b); | ||
432 | |||
433 | return HRTIMER_NORESTART; | ||
434 | } | ||
435 | |||
436 | static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) | ||
437 | { | ||
438 | struct cfs_bandwidth *cfs_b = | ||
439 | container_of(timer, struct cfs_bandwidth, period_timer); | ||
440 | ktime_t now; | ||
441 | int overrun; | ||
442 | int idle = 0; | ||
443 | |||
444 | for (;;) { | ||
445 | now = hrtimer_cb_get_time(timer); | ||
446 | overrun = hrtimer_forward(timer, now, cfs_b->period); | ||
447 | |||
448 | if (!overrun) | ||
449 | break; | ||
450 | |||
451 | idle = do_sched_cfs_period_timer(cfs_b, overrun); | ||
452 | } | ||
453 | |||
454 | return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; | ||
455 | } | ||
456 | |||
457 | static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) | ||
458 | { | ||
459 | raw_spin_lock_init(&cfs_b->lock); | ||
460 | cfs_b->runtime = 0; | ||
461 | cfs_b->quota = RUNTIME_INF; | ||
462 | cfs_b->period = ns_to_ktime(default_cfs_period()); | ||
463 | |||
464 | INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq); | ||
465 | hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
466 | cfs_b->period_timer.function = sched_cfs_period_timer; | ||
467 | hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
468 | cfs_b->slack_timer.function = sched_cfs_slack_timer; | ||
469 | } | ||
470 | |||
471 | static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) | ||
472 | { | ||
473 | cfs_rq->runtime_enabled = 0; | ||
474 | INIT_LIST_HEAD(&cfs_rq->throttled_list); | ||
475 | } | ||
476 | |||
477 | /* requires cfs_b->lock, may release to reprogram timer */ | ||
478 | static void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b) | ||
479 | { | ||
480 | /* | ||
481 | * The timer may be active because we're trying to set a new bandwidth | ||
482 | * period or because we're racing with the tear-down path | ||
483 | * (timer_active==0 becomes visible before the hrtimer call-back | ||
484 | * terminates). In either case we ensure that it's re-programmed | ||
485 | */ | ||
486 | while (unlikely(hrtimer_active(&cfs_b->period_timer))) { | ||
487 | raw_spin_unlock(&cfs_b->lock); | ||
488 | /* ensure cfs_b->lock is available while we wait */ | ||
489 | hrtimer_cancel(&cfs_b->period_timer); | ||
490 | |||
491 | raw_spin_lock(&cfs_b->lock); | ||
492 | /* if someone else restarted the timer then we're done */ | ||
493 | if (cfs_b->timer_active) | ||
494 | return; | ||
495 | } | ||
496 | |||
497 | cfs_b->timer_active = 1; | ||
498 | start_bandwidth_timer(&cfs_b->period_timer, cfs_b->period); | ||
499 | } | ||
500 | |||
501 | static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) | ||
502 | { | ||
503 | hrtimer_cancel(&cfs_b->period_timer); | ||
504 | hrtimer_cancel(&cfs_b->slack_timer); | ||
505 | } | ||
506 | #else | ||
507 | static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} | ||
508 | static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} | ||
509 | static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} | ||
510 | |||
511 | static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) | ||
512 | { | ||
513 | return NULL; | ||
514 | } | ||
515 | #endif /* CONFIG_CFS_BANDWIDTH */ | ||
516 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | ||
517 | |||
383 | /* Real-Time classes' related field in a runqueue: */ | 518 | /* Real-Time classes' related field in a runqueue: */ |
384 | struct rt_rq { | 519 | struct rt_rq { |
385 | struct rt_prio_array active; | 520 | struct rt_prio_array active; |
@@ -510,7 +645,7 @@ struct rq { | |||
510 | 645 | ||
511 | unsigned long cpu_power; | 646 | unsigned long cpu_power; |
512 | 647 | ||
513 | unsigned char idle_at_tick; | 648 | unsigned char idle_balance; |
514 | /* For active balancing */ | 649 | /* For active balancing */ |
515 | int post_schedule; | 650 | int post_schedule; |
516 | int active_balance; | 651 | int active_balance; |
@@ -520,8 +655,6 @@ struct rq { | |||
520 | int cpu; | 655 | int cpu; |
521 | int online; | 656 | int online; |
522 | 657 | ||
523 | unsigned long avg_load_per_task; | ||
524 | |||
525 | u64 rt_avg; | 658 | u64 rt_avg; |
526 | u64 age_stamp; | 659 | u64 age_stamp; |
527 | u64 idle_stamp; | 660 | u64 idle_stamp; |
@@ -570,7 +703,7 @@ struct rq { | |||
570 | #endif | 703 | #endif |
571 | 704 | ||
572 | #ifdef CONFIG_SMP | 705 | #ifdef CONFIG_SMP |
573 | struct task_struct *wake_list; | 706 | struct llist_head wake_list; |
574 | #endif | 707 | #endif |
575 | }; | 708 | }; |
576 | 709 | ||
@@ -1272,6 +1405,18 @@ void wake_up_idle_cpu(int cpu) | |||
1272 | smp_send_reschedule(cpu); | 1405 | smp_send_reschedule(cpu); |
1273 | } | 1406 | } |
1274 | 1407 | ||
1408 | static inline bool got_nohz_idle_kick(void) | ||
1409 | { | ||
1410 | return idle_cpu(smp_processor_id()) && this_rq()->nohz_balance_kick; | ||
1411 | } | ||
1412 | |||
1413 | #else /* CONFIG_NO_HZ */ | ||
1414 | |||
1415 | static inline bool got_nohz_idle_kick(void) | ||
1416 | { | ||
1417 | return false; | ||
1418 | } | ||
1419 | |||
1275 | #endif /* CONFIG_NO_HZ */ | 1420 | #endif /* CONFIG_NO_HZ */ |
1276 | 1421 | ||
1277 | static u64 sched_avg_period(void) | 1422 | static u64 sched_avg_period(void) |
@@ -1471,24 +1616,28 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load) | |||
1471 | update_load_sub(&rq->load, load); | 1616 | update_load_sub(&rq->load, load); |
1472 | } | 1617 | } |
1473 | 1618 | ||
1474 | #if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) || defined(CONFIG_RT_GROUP_SCHED) | 1619 | #if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \ |
1620 | (defined(CONFIG_SMP) || defined(CONFIG_CFS_BANDWIDTH))) | ||
1475 | typedef int (*tg_visitor)(struct task_group *, void *); | 1621 | typedef int (*tg_visitor)(struct task_group *, void *); |
1476 | 1622 | ||
1477 | /* | 1623 | /* |
1478 | * Iterate the full tree, calling @down when first entering a node and @up when | 1624 | * Iterate task_group tree rooted at *from, calling @down when first entering a |
1479 | * leaving it for the final time. | 1625 | * node and @up when leaving it for the final time. |
1626 | * | ||
1627 | * Caller must hold rcu_lock or sufficient equivalent. | ||
1480 | */ | 1628 | */ |
1481 | static int walk_tg_tree(tg_visitor down, tg_visitor up, void *data) | 1629 | static int walk_tg_tree_from(struct task_group *from, |
1630 | tg_visitor down, tg_visitor up, void *data) | ||
1482 | { | 1631 | { |
1483 | struct task_group *parent, *child; | 1632 | struct task_group *parent, *child; |
1484 | int ret; | 1633 | int ret; |
1485 | 1634 | ||
1486 | rcu_read_lock(); | 1635 | parent = from; |
1487 | parent = &root_task_group; | 1636 | |
1488 | down: | 1637 | down: |
1489 | ret = (*down)(parent, data); | 1638 | ret = (*down)(parent, data); |
1490 | if (ret) | 1639 | if (ret) |
1491 | goto out_unlock; | 1640 | goto out; |
1492 | list_for_each_entry_rcu(child, &parent->children, siblings) { | 1641 | list_for_each_entry_rcu(child, &parent->children, siblings) { |
1493 | parent = child; | 1642 | parent = child; |
1494 | goto down; | 1643 | goto down; |
@@ -1497,19 +1646,29 @@ up: | |||
1497 | continue; | 1646 | continue; |
1498 | } | 1647 | } |
1499 | ret = (*up)(parent, data); | 1648 | ret = (*up)(parent, data); |
1500 | if (ret) | 1649 | if (ret || parent == from) |
1501 | goto out_unlock; | 1650 | goto out; |
1502 | 1651 | ||
1503 | child = parent; | 1652 | child = parent; |
1504 | parent = parent->parent; | 1653 | parent = parent->parent; |
1505 | if (parent) | 1654 | if (parent) |
1506 | goto up; | 1655 | goto up; |
1507 | out_unlock: | 1656 | out: |
1508 | rcu_read_unlock(); | ||
1509 | |||
1510 | return ret; | 1657 | return ret; |
1511 | } | 1658 | } |
1512 | 1659 | ||
1660 | /* | ||
1661 | * Iterate the full tree, calling @down when first entering a node and @up when | ||
1662 | * leaving it for the final time. | ||
1663 | * | ||
1664 | * Caller must hold rcu_lock or sufficient equivalent. | ||
1665 | */ | ||
1666 | |||
1667 | static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data) | ||
1668 | { | ||
1669 | return walk_tg_tree_from(&root_task_group, down, up, data); | ||
1670 | } | ||
1671 | |||
1513 | static int tg_nop(struct task_group *tg, void *data) | 1672 | static int tg_nop(struct task_group *tg, void *data) |
1514 | { | 1673 | { |
1515 | return 0; | 1674 | return 0; |
@@ -1569,11 +1728,9 @@ static unsigned long cpu_avg_load_per_task(int cpu) | |||
1569 | unsigned long nr_running = ACCESS_ONCE(rq->nr_running); | 1728 | unsigned long nr_running = ACCESS_ONCE(rq->nr_running); |
1570 | 1729 | ||
1571 | if (nr_running) | 1730 | if (nr_running) |
1572 | rq->avg_load_per_task = rq->load.weight / nr_running; | 1731 | return rq->load.weight / nr_running; |
1573 | else | ||
1574 | rq->avg_load_per_task = 0; | ||
1575 | 1732 | ||
1576 | return rq->avg_load_per_task; | 1733 | return 0; |
1577 | } | 1734 | } |
1578 | 1735 | ||
1579 | #ifdef CONFIG_PREEMPT | 1736 | #ifdef CONFIG_PREEMPT |
@@ -1739,7 +1896,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | |||
1739 | #ifdef CONFIG_SMP | 1896 | #ifdef CONFIG_SMP |
1740 | /* | 1897 | /* |
1741 | * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be | 1898 | * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be |
1742 | * successfuly executed on another CPU. We must ensure that updates of | 1899 | * successfully executed on another CPU. We must ensure that updates of |
1743 | * per-task data have been completed by this moment. | 1900 | * per-task data have been completed by this moment. |
1744 | */ | 1901 | */ |
1745 | smp_wmb(); | 1902 | smp_wmb(); |
@@ -1806,7 +1963,6 @@ static void activate_task(struct rq *rq, struct task_struct *p, int flags) | |||
1806 | rq->nr_uninterruptible--; | 1963 | rq->nr_uninterruptible--; |
1807 | 1964 | ||
1808 | enqueue_task(rq, p, flags); | 1965 | enqueue_task(rq, p, flags); |
1809 | inc_nr_running(rq); | ||
1810 | } | 1966 | } |
1811 | 1967 | ||
1812 | /* | 1968 | /* |
@@ -1818,7 +1974,6 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags) | |||
1818 | rq->nr_uninterruptible++; | 1974 | rq->nr_uninterruptible++; |
1819 | 1975 | ||
1820 | dequeue_task(rq, p, flags); | 1976 | dequeue_task(rq, p, flags); |
1821 | dec_nr_running(rq); | ||
1822 | } | 1977 | } |
1823 | 1978 | ||
1824 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | 1979 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING |
@@ -2390,11 +2545,11 @@ static int select_fallback_rq(int cpu, struct task_struct *p) | |||
2390 | 2545 | ||
2391 | /* Look for allowed, online CPU in same node. */ | 2546 | /* Look for allowed, online CPU in same node. */ |
2392 | for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) | 2547 | for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) |
2393 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) | 2548 | if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) |
2394 | return dest_cpu; | 2549 | return dest_cpu; |
2395 | 2550 | ||
2396 | /* Any allowed, online CPU? */ | 2551 | /* Any allowed, online CPU? */ |
2397 | dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask); | 2552 | dest_cpu = cpumask_any_and(tsk_cpus_allowed(p), cpu_active_mask); |
2398 | if (dest_cpu < nr_cpu_ids) | 2553 | if (dest_cpu < nr_cpu_ids) |
2399 | return dest_cpu; | 2554 | return dest_cpu; |
2400 | 2555 | ||
@@ -2431,7 +2586,7 @@ int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) | |||
2431 | * [ this allows ->select_task() to simply return task_cpu(p) and | 2586 | * [ this allows ->select_task() to simply return task_cpu(p) and |
2432 | * not worry about this generic constraint ] | 2587 | * not worry about this generic constraint ] |
2433 | */ | 2588 | */ |
2434 | if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) || | 2589 | if (unlikely(!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) || |
2435 | !cpu_online(cpu))) | 2590 | !cpu_online(cpu))) |
2436 | cpu = select_fallback_rq(task_cpu(p), p); | 2591 | cpu = select_fallback_rq(task_cpu(p), p); |
2437 | 2592 | ||
@@ -2556,42 +2711,26 @@ static int ttwu_remote(struct task_struct *p, int wake_flags) | |||
2556 | } | 2711 | } |
2557 | 2712 | ||
2558 | #ifdef CONFIG_SMP | 2713 | #ifdef CONFIG_SMP |
2559 | static void sched_ttwu_do_pending(struct task_struct *list) | 2714 | static void sched_ttwu_pending(void) |
2560 | { | 2715 | { |
2561 | struct rq *rq = this_rq(); | 2716 | struct rq *rq = this_rq(); |
2717 | struct llist_node *llist = llist_del_all(&rq->wake_list); | ||
2718 | struct task_struct *p; | ||
2562 | 2719 | ||
2563 | raw_spin_lock(&rq->lock); | 2720 | raw_spin_lock(&rq->lock); |
2564 | 2721 | ||
2565 | while (list) { | 2722 | while (llist) { |
2566 | struct task_struct *p = list; | 2723 | p = llist_entry(llist, struct task_struct, wake_entry); |
2567 | list = list->wake_entry; | 2724 | llist = llist_next(llist); |
2568 | ttwu_do_activate(rq, p, 0); | 2725 | ttwu_do_activate(rq, p, 0); |
2569 | } | 2726 | } |
2570 | 2727 | ||
2571 | raw_spin_unlock(&rq->lock); | 2728 | raw_spin_unlock(&rq->lock); |
2572 | } | 2729 | } |
2573 | 2730 | ||
2574 | #ifdef CONFIG_HOTPLUG_CPU | ||
2575 | |||
2576 | static void sched_ttwu_pending(void) | ||
2577 | { | ||
2578 | struct rq *rq = this_rq(); | ||
2579 | struct task_struct *list = xchg(&rq->wake_list, NULL); | ||
2580 | |||
2581 | if (!list) | ||
2582 | return; | ||
2583 | |||
2584 | sched_ttwu_do_pending(list); | ||
2585 | } | ||
2586 | |||
2587 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
2588 | |||
2589 | void scheduler_ipi(void) | 2731 | void scheduler_ipi(void) |
2590 | { | 2732 | { |
2591 | struct rq *rq = this_rq(); | 2733 | if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()) |
2592 | struct task_struct *list = xchg(&rq->wake_list, NULL); | ||
2593 | |||
2594 | if (!list) | ||
2595 | return; | 2734 | return; |
2596 | 2735 | ||
2597 | /* | 2736 | /* |
@@ -2608,25 +2747,21 @@ void scheduler_ipi(void) | |||
2608 | * somewhat pessimize the simple resched case. | 2747 | * somewhat pessimize the simple resched case. |
2609 | */ | 2748 | */ |
2610 | irq_enter(); | 2749 | irq_enter(); |
2611 | sched_ttwu_do_pending(list); | 2750 | sched_ttwu_pending(); |
2751 | |||
2752 | /* | ||
2753 | * Check if someone kicked us for doing the nohz idle load balance. | ||
2754 | */ | ||
2755 | if (unlikely(got_nohz_idle_kick() && !need_resched())) { | ||
2756 | this_rq()->idle_balance = 1; | ||
2757 | raise_softirq_irqoff(SCHED_SOFTIRQ); | ||
2758 | } | ||
2612 | irq_exit(); | 2759 | irq_exit(); |
2613 | } | 2760 | } |
2614 | 2761 | ||
2615 | static void ttwu_queue_remote(struct task_struct *p, int cpu) | 2762 | static void ttwu_queue_remote(struct task_struct *p, int cpu) |
2616 | { | 2763 | { |
2617 | struct rq *rq = cpu_rq(cpu); | 2764 | if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) |
2618 | struct task_struct *next = rq->wake_list; | ||
2619 | |||
2620 | for (;;) { | ||
2621 | struct task_struct *old = next; | ||
2622 | |||
2623 | p->wake_entry = next; | ||
2624 | next = cmpxchg(&rq->wake_list, old, p); | ||
2625 | if (next == old) | ||
2626 | break; | ||
2627 | } | ||
2628 | |||
2629 | if (!next) | ||
2630 | smp_send_reschedule(cpu); | 2765 | smp_send_reschedule(cpu); |
2631 | } | 2766 | } |
2632 | 2767 | ||
@@ -2848,19 +2983,23 @@ void sched_fork(struct task_struct *p) | |||
2848 | p->state = TASK_RUNNING; | 2983 | p->state = TASK_RUNNING; |
2849 | 2984 | ||
2850 | /* | 2985 | /* |
2986 | * Make sure we do not leak PI boosting priority to the child. | ||
2987 | */ | ||
2988 | p->prio = current->normal_prio; | ||
2989 | |||
2990 | /* | ||
2851 | * Revert to default priority/policy on fork if requested. | 2991 | * Revert to default priority/policy on fork if requested. |
2852 | */ | 2992 | */ |
2853 | if (unlikely(p->sched_reset_on_fork)) { | 2993 | if (unlikely(p->sched_reset_on_fork)) { |
2854 | if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) { | 2994 | if (task_has_rt_policy(p)) { |
2855 | p->policy = SCHED_NORMAL; | 2995 | p->policy = SCHED_NORMAL; |
2856 | p->normal_prio = p->static_prio; | ||
2857 | } | ||
2858 | |||
2859 | if (PRIO_TO_NICE(p->static_prio) < 0) { | ||
2860 | p->static_prio = NICE_TO_PRIO(0); | 2996 | p->static_prio = NICE_TO_PRIO(0); |
2861 | p->normal_prio = p->static_prio; | 2997 | p->rt_priority = 0; |
2862 | set_load_weight(p); | 2998 | } else if (PRIO_TO_NICE(p->static_prio) < 0) |
2863 | } | 2999 | p->static_prio = NICE_TO_PRIO(0); |
3000 | |||
3001 | p->prio = p->normal_prio = __normal_prio(p); | ||
3002 | set_load_weight(p); | ||
2864 | 3003 | ||
2865 | /* | 3004 | /* |
2866 | * We don't need the reset flag anymore after the fork. It has | 3005 | * We don't need the reset flag anymore after the fork. It has |
@@ -2869,11 +3008,6 @@ void sched_fork(struct task_struct *p) | |||
2869 | p->sched_reset_on_fork = 0; | 3008 | p->sched_reset_on_fork = 0; |
2870 | } | 3009 | } |
2871 | 3010 | ||
2872 | /* | ||
2873 | * Make sure we do not leak PI boosting priority to the child. | ||
2874 | */ | ||
2875 | p->prio = current->normal_prio; | ||
2876 | |||
2877 | if (!rt_prio(p->prio)) | 3011 | if (!rt_prio(p->prio)) |
2878 | p->sched_class = &fair_sched_class; | 3012 | p->sched_class = &fair_sched_class; |
2879 | 3013 | ||
@@ -3065,7 +3199,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
3065 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | 3199 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW |
3066 | local_irq_disable(); | 3200 | local_irq_disable(); |
3067 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ | 3201 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ |
3068 | perf_event_task_sched_in(current); | 3202 | perf_event_task_sched_in(prev, current); |
3069 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | 3203 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW |
3070 | local_irq_enable(); | 3204 | local_irq_enable(); |
3071 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ | 3205 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ |
@@ -4116,7 +4250,7 @@ void scheduler_tick(void) | |||
4116 | perf_event_task_tick(); | 4250 | perf_event_task_tick(); |
4117 | 4251 | ||
4118 | #ifdef CONFIG_SMP | 4252 | #ifdef CONFIG_SMP |
4119 | rq->idle_at_tick = idle_cpu(cpu); | 4253 | rq->idle_balance = idle_cpu(cpu); |
4120 | trigger_load_balance(rq, cpu); | 4254 | trigger_load_balance(rq, cpu); |
4121 | #endif | 4255 | #endif |
4122 | } | 4256 | } |
@@ -4213,6 +4347,7 @@ static inline void schedule_debug(struct task_struct *prev) | |||
4213 | */ | 4347 | */ |
4214 | if (unlikely(in_atomic_preempt_off() && !prev->exit_state)) | 4348 | if (unlikely(in_atomic_preempt_off() && !prev->exit_state)) |
4215 | __schedule_bug(prev); | 4349 | __schedule_bug(prev); |
4350 | rcu_sleep_check(); | ||
4216 | 4351 | ||
4217 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); | 4352 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); |
4218 | 4353 | ||
@@ -4239,7 +4374,7 @@ pick_next_task(struct rq *rq) | |||
4239 | * Optimization: we know that if all tasks are in | 4374 | * Optimization: we know that if all tasks are in |
4240 | * the fair class we can call that function directly: | 4375 | * the fair class we can call that function directly: |
4241 | */ | 4376 | */ |
4242 | if (likely(rq->nr_running == rq->cfs.nr_running)) { | 4377 | if (likely(rq->nr_running == rq->cfs.h_nr_running)) { |
4243 | p = fair_sched_class.pick_next_task(rq); | 4378 | p = fair_sched_class.pick_next_task(rq); |
4244 | if (likely(p)) | 4379 | if (likely(p)) |
4245 | return p; | 4380 | return p; |
@@ -4255,9 +4390,9 @@ pick_next_task(struct rq *rq) | |||
4255 | } | 4390 | } |
4256 | 4391 | ||
4257 | /* | 4392 | /* |
4258 | * schedule() is the main scheduler function. | 4393 | * __schedule() is the main scheduler function. |
4259 | */ | 4394 | */ |
4260 | asmlinkage void __sched schedule(void) | 4395 | static void __sched __schedule(void) |
4261 | { | 4396 | { |
4262 | struct task_struct *prev, *next; | 4397 | struct task_struct *prev, *next; |
4263 | unsigned long *switch_count; | 4398 | unsigned long *switch_count; |
@@ -4298,16 +4433,6 @@ need_resched: | |||
4298 | if (to_wakeup) | 4433 | if (to_wakeup) |
4299 | try_to_wake_up_local(to_wakeup); | 4434 | try_to_wake_up_local(to_wakeup); |
4300 | } | 4435 | } |
4301 | |||
4302 | /* | ||
4303 | * If we are going to sleep and we have plugged IO | ||
4304 | * queued, make sure to submit it to avoid deadlocks. | ||
4305 | */ | ||
4306 | if (blk_needs_flush_plug(prev)) { | ||
4307 | raw_spin_unlock(&rq->lock); | ||
4308 | blk_schedule_flush_plug(prev); | ||
4309 | raw_spin_lock(&rq->lock); | ||
4310 | } | ||
4311 | } | 4436 | } |
4312 | switch_count = &prev->nvcsw; | 4437 | switch_count = &prev->nvcsw; |
4313 | } | 4438 | } |
@@ -4345,6 +4470,26 @@ need_resched: | |||
4345 | if (need_resched()) | 4470 | if (need_resched()) |
4346 | goto need_resched; | 4471 | goto need_resched; |
4347 | } | 4472 | } |
4473 | |||
4474 | static inline void sched_submit_work(struct task_struct *tsk) | ||
4475 | { | ||
4476 | if (!tsk->state) | ||
4477 | return; | ||
4478 | /* | ||
4479 | * If we are going to sleep and we have plugged IO queued, | ||
4480 | * make sure to submit it to avoid deadlocks. | ||
4481 | */ | ||
4482 | if (blk_needs_flush_plug(tsk)) | ||
4483 | blk_schedule_flush_plug(tsk); | ||
4484 | } | ||
4485 | |||
4486 | asmlinkage void __sched schedule(void) | ||
4487 | { | ||
4488 | struct task_struct *tsk = current; | ||
4489 | |||
4490 | sched_submit_work(tsk); | ||
4491 | __schedule(); | ||
4492 | } | ||
4348 | EXPORT_SYMBOL(schedule); | 4493 | EXPORT_SYMBOL(schedule); |
4349 | 4494 | ||
4350 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | 4495 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER |
@@ -4411,7 +4556,7 @@ asmlinkage void __sched notrace preempt_schedule(void) | |||
4411 | 4556 | ||
4412 | do { | 4557 | do { |
4413 | add_preempt_count_notrace(PREEMPT_ACTIVE); | 4558 | add_preempt_count_notrace(PREEMPT_ACTIVE); |
4414 | schedule(); | 4559 | __schedule(); |
4415 | sub_preempt_count_notrace(PREEMPT_ACTIVE); | 4560 | sub_preempt_count_notrace(PREEMPT_ACTIVE); |
4416 | 4561 | ||
4417 | /* | 4562 | /* |
@@ -4439,7 +4584,7 @@ asmlinkage void __sched preempt_schedule_irq(void) | |||
4439 | do { | 4584 | do { |
4440 | add_preempt_count(PREEMPT_ACTIVE); | 4585 | add_preempt_count(PREEMPT_ACTIVE); |
4441 | local_irq_enable(); | 4586 | local_irq_enable(); |
4442 | schedule(); | 4587 | __schedule(); |
4443 | local_irq_disable(); | 4588 | local_irq_disable(); |
4444 | sub_preempt_count(PREEMPT_ACTIVE); | 4589 | sub_preempt_count(PREEMPT_ACTIVE); |
4445 | 4590 | ||
@@ -4666,6 +4811,9 @@ EXPORT_SYMBOL(wait_for_completion); | |||
4666 | * This waits for either a completion of a specific task to be signaled or for a | 4811 | * This waits for either a completion of a specific task to be signaled or for a |
4667 | * specified timeout to expire. The timeout is in jiffies. It is not | 4812 | * specified timeout to expire. The timeout is in jiffies. It is not |
4668 | * interruptible. | 4813 | * interruptible. |
4814 | * | ||
4815 | * The return value is 0 if timed out, and positive (at least 1, or number of | ||
4816 | * jiffies left till timeout) if completed. | ||
4669 | */ | 4817 | */ |
4670 | unsigned long __sched | 4818 | unsigned long __sched |
4671 | wait_for_completion_timeout(struct completion *x, unsigned long timeout) | 4819 | wait_for_completion_timeout(struct completion *x, unsigned long timeout) |
@@ -4680,6 +4828,8 @@ EXPORT_SYMBOL(wait_for_completion_timeout); | |||
4680 | * | 4828 | * |
4681 | * This waits for completion of a specific task to be signaled. It is | 4829 | * This waits for completion of a specific task to be signaled. It is |
4682 | * interruptible. | 4830 | * interruptible. |
4831 | * | ||
4832 | * The return value is -ERESTARTSYS if interrupted, 0 if completed. | ||
4683 | */ | 4833 | */ |
4684 | int __sched wait_for_completion_interruptible(struct completion *x) | 4834 | int __sched wait_for_completion_interruptible(struct completion *x) |
4685 | { | 4835 | { |
@@ -4697,6 +4847,9 @@ EXPORT_SYMBOL(wait_for_completion_interruptible); | |||
4697 | * | 4847 | * |
4698 | * This waits for either a completion of a specific task to be signaled or for a | 4848 | * This waits for either a completion of a specific task to be signaled or for a |
4699 | * specified timeout to expire. It is interruptible. The timeout is in jiffies. | 4849 | * specified timeout to expire. It is interruptible. The timeout is in jiffies. |
4850 | * | ||
4851 | * The return value is -ERESTARTSYS if interrupted, 0 if timed out, | ||
4852 | * positive (at least 1, or number of jiffies left till timeout) if completed. | ||
4700 | */ | 4853 | */ |
4701 | long __sched | 4854 | long __sched |
4702 | wait_for_completion_interruptible_timeout(struct completion *x, | 4855 | wait_for_completion_interruptible_timeout(struct completion *x, |
@@ -4712,6 +4865,8 @@ EXPORT_SYMBOL(wait_for_completion_interruptible_timeout); | |||
4712 | * | 4865 | * |
4713 | * This waits to be signaled for completion of a specific task. It can be | 4866 | * This waits to be signaled for completion of a specific task. It can be |
4714 | * interrupted by a kill signal. | 4867 | * interrupted by a kill signal. |
4868 | * | ||
4869 | * The return value is -ERESTARTSYS if interrupted, 0 if completed. | ||
4715 | */ | 4870 | */ |
4716 | int __sched wait_for_completion_killable(struct completion *x) | 4871 | int __sched wait_for_completion_killable(struct completion *x) |
4717 | { | 4872 | { |
@@ -4730,6 +4885,9 @@ EXPORT_SYMBOL(wait_for_completion_killable); | |||
4730 | * This waits for either a completion of a specific task to be | 4885 | * This waits for either a completion of a specific task to be |
4731 | * signaled or for a specified timeout to expire. It can be | 4886 | * signaled or for a specified timeout to expire. It can be |
4732 | * interrupted by a kill signal. The timeout is in jiffies. | 4887 | * interrupted by a kill signal. The timeout is in jiffies. |
4888 | * | ||
4889 | * The return value is -ERESTARTSYS if interrupted, 0 if timed out, | ||
4890 | * positive (at least 1, or number of jiffies left till timeout) if completed. | ||
4733 | */ | 4891 | */ |
4734 | long __sched | 4892 | long __sched |
4735 | wait_for_completion_killable_timeout(struct completion *x, | 4893 | wait_for_completion_killable_timeout(struct completion *x, |
@@ -5015,7 +5173,20 @@ EXPORT_SYMBOL(task_nice); | |||
5015 | */ | 5173 | */ |
5016 | int idle_cpu(int cpu) | 5174 | int idle_cpu(int cpu) |
5017 | { | 5175 | { |
5018 | return cpu_curr(cpu) == cpu_rq(cpu)->idle; | 5176 | struct rq *rq = cpu_rq(cpu); |
5177 | |||
5178 | if (rq->curr != rq->idle) | ||
5179 | return 0; | ||
5180 | |||
5181 | if (rq->nr_running) | ||
5182 | return 0; | ||
5183 | |||
5184 | #ifdef CONFIG_SMP | ||
5185 | if (!llist_empty(&rq->wake_list)) | ||
5186 | return 0; | ||
5187 | #endif | ||
5188 | |||
5189 | return 1; | ||
5019 | } | 5190 | } |
5020 | 5191 | ||
5021 | /** | 5192 | /** |
@@ -5564,7 +5735,7 @@ static inline int should_resched(void) | |||
5564 | static void __cond_resched(void) | 5735 | static void __cond_resched(void) |
5565 | { | 5736 | { |
5566 | add_preempt_count(PREEMPT_ACTIVE); | 5737 | add_preempt_count(PREEMPT_ACTIVE); |
5567 | schedule(); | 5738 | __schedule(); |
5568 | sub_preempt_count(PREEMPT_ACTIVE); | 5739 | sub_preempt_count(PREEMPT_ACTIVE); |
5569 | } | 5740 | } |
5570 | 5741 | ||
@@ -5865,7 +6036,7 @@ void show_state_filter(unsigned long state_filter) | |||
5865 | printk(KERN_INFO | 6036 | printk(KERN_INFO |
5866 | " task PC stack pid father\n"); | 6037 | " task PC stack pid father\n"); |
5867 | #endif | 6038 | #endif |
5868 | read_lock(&tasklist_lock); | 6039 | rcu_read_lock(); |
5869 | do_each_thread(g, p) { | 6040 | do_each_thread(g, p) { |
5870 | /* | 6041 | /* |
5871 | * reset the NMI-timeout, listing all files on a slow | 6042 | * reset the NMI-timeout, listing all files on a slow |
@@ -5881,7 +6052,7 @@ void show_state_filter(unsigned long state_filter) | |||
5881 | #ifdef CONFIG_SCHED_DEBUG | 6052 | #ifdef CONFIG_SCHED_DEBUG |
5882 | sysrq_sched_debug_show(); | 6053 | sysrq_sched_debug_show(); |
5883 | #endif | 6054 | #endif |
5884 | read_unlock(&tasklist_lock); | 6055 | rcu_read_unlock(); |
5885 | /* | 6056 | /* |
5886 | * Only show locks if all tasks are dumped: | 6057 | * Only show locks if all tasks are dumped: |
5887 | */ | 6058 | */ |
@@ -5942,18 +6113,12 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
5942 | */ | 6113 | */ |
5943 | idle->sched_class = &idle_sched_class; | 6114 | idle->sched_class = &idle_sched_class; |
5944 | ftrace_graph_init_idle_task(idle, cpu); | 6115 | ftrace_graph_init_idle_task(idle, cpu); |
6116 | #if defined(CONFIG_SMP) | ||
6117 | sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); | ||
6118 | #endif | ||
5945 | } | 6119 | } |
5946 | 6120 | ||
5947 | /* | 6121 | /* |
5948 | * In a system that switches off the HZ timer nohz_cpu_mask | ||
5949 | * indicates which cpus entered this state. This is used | ||
5950 | * in the rcu update to wait only for active cpus. For system | ||
5951 | * which do not switch off the HZ timer nohz_cpu_mask should | ||
5952 | * always be CPU_BITS_NONE. | ||
5953 | */ | ||
5954 | cpumask_var_t nohz_cpu_mask; | ||
5955 | |||
5956 | /* | ||
5957 | * Increase the granularity value when there are more CPUs, | 6122 | * Increase the granularity value when there are more CPUs, |
5958 | * because with more CPUs the 'effective latency' as visible | 6123 | * because with more CPUs the 'effective latency' as visible |
5959 | * to users decreases. But the relationship is not linear, | 6124 | * to users decreases. But the relationship is not linear, |
@@ -6005,10 +6170,9 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) | |||
6005 | { | 6170 | { |
6006 | if (p->sched_class && p->sched_class->set_cpus_allowed) | 6171 | if (p->sched_class && p->sched_class->set_cpus_allowed) |
6007 | p->sched_class->set_cpus_allowed(p, new_mask); | 6172 | p->sched_class->set_cpus_allowed(p, new_mask); |
6008 | else { | 6173 | |
6009 | cpumask_copy(&p->cpus_allowed, new_mask); | 6174 | cpumask_copy(&p->cpus_allowed, new_mask); |
6010 | p->rt.nr_cpus_allowed = cpumask_weight(new_mask); | 6175 | p->rt.nr_cpus_allowed = cpumask_weight(new_mask); |
6011 | } | ||
6012 | } | 6176 | } |
6013 | 6177 | ||
6014 | /* | 6178 | /* |
@@ -6106,7 +6270,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | |||
6106 | if (task_cpu(p) != src_cpu) | 6270 | if (task_cpu(p) != src_cpu) |
6107 | goto done; | 6271 | goto done; |
6108 | /* Affinity changed (again). */ | 6272 | /* Affinity changed (again). */ |
6109 | if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) | 6273 | if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) |
6110 | goto fail; | 6274 | goto fail; |
6111 | 6275 | ||
6112 | /* | 6276 | /* |
@@ -6187,6 +6351,30 @@ static void calc_global_load_remove(struct rq *rq) | |||
6187 | rq->calc_load_active = 0; | 6351 | rq->calc_load_active = 0; |
6188 | } | 6352 | } |
6189 | 6353 | ||
6354 | #ifdef CONFIG_CFS_BANDWIDTH | ||
6355 | static void unthrottle_offline_cfs_rqs(struct rq *rq) | ||
6356 | { | ||
6357 | struct cfs_rq *cfs_rq; | ||
6358 | |||
6359 | for_each_leaf_cfs_rq(rq, cfs_rq) { | ||
6360 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); | ||
6361 | |||
6362 | if (!cfs_rq->runtime_enabled) | ||
6363 | continue; | ||
6364 | |||
6365 | /* | ||
6366 | * clock_task is not advancing so we just need to make sure | ||
6367 | * there's some valid quota amount | ||
6368 | */ | ||
6369 | cfs_rq->runtime_remaining = cfs_b->quota; | ||
6370 | if (cfs_rq_throttled(cfs_rq)) | ||
6371 | unthrottle_cfs_rq(cfs_rq); | ||
6372 | } | ||
6373 | } | ||
6374 | #else | ||
6375 | static void unthrottle_offline_cfs_rqs(struct rq *rq) {} | ||
6376 | #endif | ||
6377 | |||
6190 | /* | 6378 | /* |
6191 | * Migrate all tasks from the rq, sleeping tasks will be migrated by | 6379 | * Migrate all tasks from the rq, sleeping tasks will be migrated by |
6192 | * try_to_wake_up()->select_task_rq(). | 6380 | * try_to_wake_up()->select_task_rq(). |
@@ -6212,6 +6400,9 @@ static void migrate_tasks(unsigned int dead_cpu) | |||
6212 | */ | 6400 | */ |
6213 | rq->stop = NULL; | 6401 | rq->stop = NULL; |
6214 | 6402 | ||
6403 | /* Ensure any throttled groups are reachable by pick_next_task */ | ||
6404 | unthrottle_offline_cfs_rqs(rq); | ||
6405 | |||
6215 | for ( ; ; ) { | 6406 | for ( ; ; ) { |
6216 | /* | 6407 | /* |
6217 | * There's this thread running, bail when that's the only | 6408 | * There's this thread running, bail when that's the only |
@@ -6913,8 +7104,6 @@ static int __init isolated_cpu_setup(char *str) | |||
6913 | 7104 | ||
6914 | __setup("isolcpus=", isolated_cpu_setup); | 7105 | __setup("isolcpus=", isolated_cpu_setup); |
6915 | 7106 | ||
6916 | #define SD_NODES_PER_DOMAIN 16 | ||
6917 | |||
6918 | #ifdef CONFIG_NUMA | 7107 | #ifdef CONFIG_NUMA |
6919 | 7108 | ||
6920 | /** | 7109 | /** |
@@ -7419,6 +7608,7 @@ static void __sdt_free(const struct cpumask *cpu_map) | |||
7419 | struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j); | 7608 | struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j); |
7420 | if (sd && (sd->flags & SD_OVERLAP)) | 7609 | if (sd && (sd->flags & SD_OVERLAP)) |
7421 | free_sched_groups(sd->groups, 0); | 7610 | free_sched_groups(sd->groups, 0); |
7611 | kfree(*per_cpu_ptr(sdd->sd, j)); | ||
7422 | kfree(*per_cpu_ptr(sdd->sg, j)); | 7612 | kfree(*per_cpu_ptr(sdd->sg, j)); |
7423 | kfree(*per_cpu_ptr(sdd->sgp, j)); | 7613 | kfree(*per_cpu_ptr(sdd->sgp, j)); |
7424 | } | 7614 | } |
@@ -7954,6 +8144,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, | |||
7954 | /* allow initial update_cfs_load() to truncate */ | 8144 | /* allow initial update_cfs_load() to truncate */ |
7955 | cfs_rq->load_stamp = 1; | 8145 | cfs_rq->load_stamp = 1; |
7956 | #endif | 8146 | #endif |
8147 | init_cfs_rq_runtime(cfs_rq); | ||
7957 | 8148 | ||
7958 | tg->cfs_rq[cpu] = cfs_rq; | 8149 | tg->cfs_rq[cpu] = cfs_rq; |
7959 | tg->se[cpu] = se; | 8150 | tg->se[cpu] = se; |
@@ -8093,6 +8284,7 @@ void __init sched_init(void) | |||
8093 | * We achieve this by letting root_task_group's tasks sit | 8284 | * We achieve this by letting root_task_group's tasks sit |
8094 | * directly in rq->cfs (i.e root_task_group->se[] = NULL). | 8285 | * directly in rq->cfs (i.e root_task_group->se[] = NULL). |
8095 | */ | 8286 | */ |
8287 | init_cfs_bandwidth(&root_task_group.cfs_bandwidth); | ||
8096 | init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL); | 8288 | init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL); |
8097 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 8289 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
8098 | 8290 | ||
@@ -8122,7 +8314,6 @@ void __init sched_init(void) | |||
8122 | rq_attach_root(rq, &def_root_domain); | 8314 | rq_attach_root(rq, &def_root_domain); |
8123 | #ifdef CONFIG_NO_HZ | 8315 | #ifdef CONFIG_NO_HZ |
8124 | rq->nohz_balance_kick = 0; | 8316 | rq->nohz_balance_kick = 0; |
8125 | init_sched_softirq_csd(&per_cpu(remote_sched_softirq_cb, i)); | ||
8126 | #endif | 8317 | #endif |
8127 | #endif | 8318 | #endif |
8128 | init_rq_hrtick(rq); | 8319 | init_rq_hrtick(rq); |
@@ -8164,8 +8355,6 @@ void __init sched_init(void) | |||
8164 | */ | 8355 | */ |
8165 | current->sched_class = &fair_sched_class; | 8356 | current->sched_class = &fair_sched_class; |
8166 | 8357 | ||
8167 | /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ | ||
8168 | zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); | ||
8169 | #ifdef CONFIG_SMP | 8358 | #ifdef CONFIG_SMP |
8170 | zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT); | 8359 | zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT); |
8171 | #ifdef CONFIG_NO_HZ | 8360 | #ifdef CONFIG_NO_HZ |
@@ -8195,6 +8384,7 @@ void __might_sleep(const char *file, int line, int preempt_offset) | |||
8195 | { | 8384 | { |
8196 | static unsigned long prev_jiffy; /* ratelimiting */ | 8385 | static unsigned long prev_jiffy; /* ratelimiting */ |
8197 | 8386 | ||
8387 | rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */ | ||
8198 | if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || | 8388 | if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || |
8199 | system_state != SYSTEM_RUNNING || oops_in_progress) | 8389 | system_state != SYSTEM_RUNNING || oops_in_progress) |
8200 | return; | 8390 | return; |
@@ -8334,6 +8524,8 @@ static void free_fair_sched_group(struct task_group *tg) | |||
8334 | { | 8524 | { |
8335 | int i; | 8525 | int i; |
8336 | 8526 | ||
8527 | destroy_cfs_bandwidth(tg_cfs_bandwidth(tg)); | ||
8528 | |||
8337 | for_each_possible_cpu(i) { | 8529 | for_each_possible_cpu(i) { |
8338 | if (tg->cfs_rq) | 8530 | if (tg->cfs_rq) |
8339 | kfree(tg->cfs_rq[i]); | 8531 | kfree(tg->cfs_rq[i]); |
@@ -8361,6 +8553,8 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
8361 | 8553 | ||
8362 | tg->shares = NICE_0_LOAD; | 8554 | tg->shares = NICE_0_LOAD; |
8363 | 8555 | ||
8556 | init_cfs_bandwidth(tg_cfs_bandwidth(tg)); | ||
8557 | |||
8364 | for_each_possible_cpu(i) { | 8558 | for_each_possible_cpu(i) { |
8365 | cfs_rq = kzalloc_node(sizeof(struct cfs_rq), | 8559 | cfs_rq = kzalloc_node(sizeof(struct cfs_rq), |
8366 | GFP_KERNEL, cpu_to_node(i)); | 8560 | GFP_KERNEL, cpu_to_node(i)); |
@@ -8636,12 +8830,7 @@ unsigned long sched_group_shares(struct task_group *tg) | |||
8636 | } | 8830 | } |
8637 | #endif | 8831 | #endif |
8638 | 8832 | ||
8639 | #ifdef CONFIG_RT_GROUP_SCHED | 8833 | #if defined(CONFIG_RT_GROUP_SCHED) || defined(CONFIG_CFS_BANDWIDTH) |
8640 | /* | ||
8641 | * Ensure that the real time constraints are schedulable. | ||
8642 | */ | ||
8643 | static DEFINE_MUTEX(rt_constraints_mutex); | ||
8644 | |||
8645 | static unsigned long to_ratio(u64 period, u64 runtime) | 8834 | static unsigned long to_ratio(u64 period, u64 runtime) |
8646 | { | 8835 | { |
8647 | if (runtime == RUNTIME_INF) | 8836 | if (runtime == RUNTIME_INF) |
@@ -8649,6 +8838,13 @@ static unsigned long to_ratio(u64 period, u64 runtime) | |||
8649 | 8838 | ||
8650 | return div64_u64(runtime << 20, period); | 8839 | return div64_u64(runtime << 20, period); |
8651 | } | 8840 | } |
8841 | #endif | ||
8842 | |||
8843 | #ifdef CONFIG_RT_GROUP_SCHED | ||
8844 | /* | ||
8845 | * Ensure that the real time constraints are schedulable. | ||
8846 | */ | ||
8847 | static DEFINE_MUTEX(rt_constraints_mutex); | ||
8652 | 8848 | ||
8653 | /* Must be called with tasklist_lock held */ | 8849 | /* Must be called with tasklist_lock held */ |
8654 | static inline int tg_has_rt_tasks(struct task_group *tg) | 8850 | static inline int tg_has_rt_tasks(struct task_group *tg) |
@@ -8669,7 +8865,7 @@ struct rt_schedulable_data { | |||
8669 | u64 rt_runtime; | 8865 | u64 rt_runtime; |
8670 | }; | 8866 | }; |
8671 | 8867 | ||
8672 | static int tg_schedulable(struct task_group *tg, void *data) | 8868 | static int tg_rt_schedulable(struct task_group *tg, void *data) |
8673 | { | 8869 | { |
8674 | struct rt_schedulable_data *d = data; | 8870 | struct rt_schedulable_data *d = data; |
8675 | struct task_group *child; | 8871 | struct task_group *child; |
@@ -8727,16 +8923,22 @@ static int tg_schedulable(struct task_group *tg, void *data) | |||
8727 | 8923 | ||
8728 | static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) | 8924 | static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) |
8729 | { | 8925 | { |
8926 | int ret; | ||
8927 | |||
8730 | struct rt_schedulable_data data = { | 8928 | struct rt_schedulable_data data = { |
8731 | .tg = tg, | 8929 | .tg = tg, |
8732 | .rt_period = period, | 8930 | .rt_period = period, |
8733 | .rt_runtime = runtime, | 8931 | .rt_runtime = runtime, |
8734 | }; | 8932 | }; |
8735 | 8933 | ||
8736 | return walk_tg_tree(tg_schedulable, tg_nop, &data); | 8934 | rcu_read_lock(); |
8935 | ret = walk_tg_tree(tg_rt_schedulable, tg_nop, &data); | ||
8936 | rcu_read_unlock(); | ||
8937 | |||
8938 | return ret; | ||
8737 | } | 8939 | } |
8738 | 8940 | ||
8739 | static int tg_set_bandwidth(struct task_group *tg, | 8941 | static int tg_set_rt_bandwidth(struct task_group *tg, |
8740 | u64 rt_period, u64 rt_runtime) | 8942 | u64 rt_period, u64 rt_runtime) |
8741 | { | 8943 | { |
8742 | int i, err = 0; | 8944 | int i, err = 0; |
@@ -8775,7 +8977,7 @@ int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) | |||
8775 | if (rt_runtime_us < 0) | 8977 | if (rt_runtime_us < 0) |
8776 | rt_runtime = RUNTIME_INF; | 8978 | rt_runtime = RUNTIME_INF; |
8777 | 8979 | ||
8778 | return tg_set_bandwidth(tg, rt_period, rt_runtime); | 8980 | return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); |
8779 | } | 8981 | } |
8780 | 8982 | ||
8781 | long sched_group_rt_runtime(struct task_group *tg) | 8983 | long sched_group_rt_runtime(struct task_group *tg) |
@@ -8800,7 +9002,7 @@ int sched_group_set_rt_period(struct task_group *tg, long rt_period_us) | |||
8800 | if (rt_period == 0) | 9002 | if (rt_period == 0) |
8801 | return -EINVAL; | 9003 | return -EINVAL; |
8802 | 9004 | ||
8803 | return tg_set_bandwidth(tg, rt_period, rt_runtime); | 9005 | return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); |
8804 | } | 9006 | } |
8805 | 9007 | ||
8806 | long sched_group_rt_period(struct task_group *tg) | 9008 | long sched_group_rt_period(struct task_group *tg) |
@@ -8990,6 +9192,238 @@ static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft) | |||
8990 | 9192 | ||
8991 | return (u64) scale_load_down(tg->shares); | 9193 | return (u64) scale_load_down(tg->shares); |
8992 | } | 9194 | } |
9195 | |||
9196 | #ifdef CONFIG_CFS_BANDWIDTH | ||
9197 | static DEFINE_MUTEX(cfs_constraints_mutex); | ||
9198 | |||
9199 | const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */ | ||
9200 | const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */ | ||
9201 | |||
9202 | static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime); | ||
9203 | |||
9204 | static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) | ||
9205 | { | ||
9206 | int i, ret = 0, runtime_enabled; | ||
9207 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); | ||
9208 | |||
9209 | if (tg == &root_task_group) | ||
9210 | return -EINVAL; | ||
9211 | |||
9212 | /* | ||
9213 | * Ensure we have at some amount of bandwidth every period. This is | ||
9214 | * to prevent reaching a state of large arrears when throttled via | ||
9215 | * entity_tick() resulting in prolonged exit starvation. | ||
9216 | */ | ||
9217 | if (quota < min_cfs_quota_period || period < min_cfs_quota_period) | ||
9218 | return -EINVAL; | ||
9219 | |||
9220 | /* | ||
9221 | * Likewise, bound things on the otherside by preventing insane quota | ||
9222 | * periods. This also allows us to normalize in computing quota | ||
9223 | * feasibility. | ||
9224 | */ | ||
9225 | if (period > max_cfs_quota_period) | ||
9226 | return -EINVAL; | ||
9227 | |||
9228 | mutex_lock(&cfs_constraints_mutex); | ||
9229 | ret = __cfs_schedulable(tg, period, quota); | ||
9230 | if (ret) | ||
9231 | goto out_unlock; | ||
9232 | |||
9233 | runtime_enabled = quota != RUNTIME_INF; | ||
9234 | raw_spin_lock_irq(&cfs_b->lock); | ||
9235 | cfs_b->period = ns_to_ktime(period); | ||
9236 | cfs_b->quota = quota; | ||
9237 | |||
9238 | __refill_cfs_bandwidth_runtime(cfs_b); | ||
9239 | /* restart the period timer (if active) to handle new period expiry */ | ||
9240 | if (runtime_enabled && cfs_b->timer_active) { | ||
9241 | /* force a reprogram */ | ||
9242 | cfs_b->timer_active = 0; | ||
9243 | __start_cfs_bandwidth(cfs_b); | ||
9244 | } | ||
9245 | raw_spin_unlock_irq(&cfs_b->lock); | ||
9246 | |||
9247 | for_each_possible_cpu(i) { | ||
9248 | struct cfs_rq *cfs_rq = tg->cfs_rq[i]; | ||
9249 | struct rq *rq = rq_of(cfs_rq); | ||
9250 | |||
9251 | raw_spin_lock_irq(&rq->lock); | ||
9252 | cfs_rq->runtime_enabled = runtime_enabled; | ||
9253 | cfs_rq->runtime_remaining = 0; | ||
9254 | |||
9255 | if (cfs_rq_throttled(cfs_rq)) | ||
9256 | unthrottle_cfs_rq(cfs_rq); | ||
9257 | raw_spin_unlock_irq(&rq->lock); | ||
9258 | } | ||
9259 | out_unlock: | ||
9260 | mutex_unlock(&cfs_constraints_mutex); | ||
9261 | |||
9262 | return ret; | ||
9263 | } | ||
9264 | |||
9265 | int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us) | ||
9266 | { | ||
9267 | u64 quota, period; | ||
9268 | |||
9269 | period = ktime_to_ns(tg_cfs_bandwidth(tg)->period); | ||
9270 | if (cfs_quota_us < 0) | ||
9271 | quota = RUNTIME_INF; | ||
9272 | else | ||
9273 | quota = (u64)cfs_quota_us * NSEC_PER_USEC; | ||
9274 | |||
9275 | return tg_set_cfs_bandwidth(tg, period, quota); | ||
9276 | } | ||
9277 | |||
9278 | long tg_get_cfs_quota(struct task_group *tg) | ||
9279 | { | ||
9280 | u64 quota_us; | ||
9281 | |||
9282 | if (tg_cfs_bandwidth(tg)->quota == RUNTIME_INF) | ||
9283 | return -1; | ||
9284 | |||
9285 | quota_us = tg_cfs_bandwidth(tg)->quota; | ||
9286 | do_div(quota_us, NSEC_PER_USEC); | ||
9287 | |||
9288 | return quota_us; | ||
9289 | } | ||
9290 | |||
9291 | int tg_set_cfs_period(struct task_group *tg, long cfs_period_us) | ||
9292 | { | ||
9293 | u64 quota, period; | ||
9294 | |||
9295 | period = (u64)cfs_period_us * NSEC_PER_USEC; | ||
9296 | quota = tg_cfs_bandwidth(tg)->quota; | ||
9297 | |||
9298 | if (period <= 0) | ||
9299 | return -EINVAL; | ||
9300 | |||
9301 | return tg_set_cfs_bandwidth(tg, period, quota); | ||
9302 | } | ||
9303 | |||
9304 | long tg_get_cfs_period(struct task_group *tg) | ||
9305 | { | ||
9306 | u64 cfs_period_us; | ||
9307 | |||
9308 | cfs_period_us = ktime_to_ns(tg_cfs_bandwidth(tg)->period); | ||
9309 | do_div(cfs_period_us, NSEC_PER_USEC); | ||
9310 | |||
9311 | return cfs_period_us; | ||
9312 | } | ||
9313 | |||
9314 | static s64 cpu_cfs_quota_read_s64(struct cgroup *cgrp, struct cftype *cft) | ||
9315 | { | ||
9316 | return tg_get_cfs_quota(cgroup_tg(cgrp)); | ||
9317 | } | ||
9318 | |||
9319 | static int cpu_cfs_quota_write_s64(struct cgroup *cgrp, struct cftype *cftype, | ||
9320 | s64 cfs_quota_us) | ||
9321 | { | ||
9322 | return tg_set_cfs_quota(cgroup_tg(cgrp), cfs_quota_us); | ||
9323 | } | ||
9324 | |||
9325 | static u64 cpu_cfs_period_read_u64(struct cgroup *cgrp, struct cftype *cft) | ||
9326 | { | ||
9327 | return tg_get_cfs_period(cgroup_tg(cgrp)); | ||
9328 | } | ||
9329 | |||
9330 | static int cpu_cfs_period_write_u64(struct cgroup *cgrp, struct cftype *cftype, | ||
9331 | u64 cfs_period_us) | ||
9332 | { | ||
9333 | return tg_set_cfs_period(cgroup_tg(cgrp), cfs_period_us); | ||
9334 | } | ||
9335 | |||
9336 | struct cfs_schedulable_data { | ||
9337 | struct task_group *tg; | ||
9338 | u64 period, quota; | ||
9339 | }; | ||
9340 | |||
9341 | /* | ||
9342 | * normalize group quota/period to be quota/max_period | ||
9343 | * note: units are usecs | ||
9344 | */ | ||
9345 | static u64 normalize_cfs_quota(struct task_group *tg, | ||
9346 | struct cfs_schedulable_data *d) | ||
9347 | { | ||
9348 | u64 quota, period; | ||
9349 | |||
9350 | if (tg == d->tg) { | ||
9351 | period = d->period; | ||
9352 | quota = d->quota; | ||
9353 | } else { | ||
9354 | period = tg_get_cfs_period(tg); | ||
9355 | quota = tg_get_cfs_quota(tg); | ||
9356 | } | ||
9357 | |||
9358 | /* note: these should typically be equivalent */ | ||
9359 | if (quota == RUNTIME_INF || quota == -1) | ||
9360 | return RUNTIME_INF; | ||
9361 | |||
9362 | return to_ratio(period, quota); | ||
9363 | } | ||
9364 | |||
9365 | static int tg_cfs_schedulable_down(struct task_group *tg, void *data) | ||
9366 | { | ||
9367 | struct cfs_schedulable_data *d = data; | ||
9368 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); | ||
9369 | s64 quota = 0, parent_quota = -1; | ||
9370 | |||
9371 | if (!tg->parent) { | ||
9372 | quota = RUNTIME_INF; | ||
9373 | } else { | ||
9374 | struct cfs_bandwidth *parent_b = tg_cfs_bandwidth(tg->parent); | ||
9375 | |||
9376 | quota = normalize_cfs_quota(tg, d); | ||
9377 | parent_quota = parent_b->hierarchal_quota; | ||
9378 | |||
9379 | /* | ||
9380 | * ensure max(child_quota) <= parent_quota, inherit when no | ||
9381 | * limit is set | ||
9382 | */ | ||
9383 | if (quota == RUNTIME_INF) | ||
9384 | quota = parent_quota; | ||
9385 | else if (parent_quota != RUNTIME_INF && quota > parent_quota) | ||
9386 | return -EINVAL; | ||
9387 | } | ||
9388 | cfs_b->hierarchal_quota = quota; | ||
9389 | |||
9390 | return 0; | ||
9391 | } | ||
9392 | |||
9393 | static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota) | ||
9394 | { | ||
9395 | int ret; | ||
9396 | struct cfs_schedulable_data data = { | ||
9397 | .tg = tg, | ||
9398 | .period = period, | ||
9399 | .quota = quota, | ||
9400 | }; | ||
9401 | |||
9402 | if (quota != RUNTIME_INF) { | ||
9403 | do_div(data.period, NSEC_PER_USEC); | ||
9404 | do_div(data.quota, NSEC_PER_USEC); | ||
9405 | } | ||
9406 | |||
9407 | rcu_read_lock(); | ||
9408 | ret = walk_tg_tree(tg_cfs_schedulable_down, tg_nop, &data); | ||
9409 | rcu_read_unlock(); | ||
9410 | |||
9411 | return ret; | ||
9412 | } | ||
9413 | |||
9414 | static int cpu_stats_show(struct cgroup *cgrp, struct cftype *cft, | ||
9415 | struct cgroup_map_cb *cb) | ||
9416 | { | ||
9417 | struct task_group *tg = cgroup_tg(cgrp); | ||
9418 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); | ||
9419 | |||
9420 | cb->fill(cb, "nr_periods", cfs_b->nr_periods); | ||
9421 | cb->fill(cb, "nr_throttled", cfs_b->nr_throttled); | ||
9422 | cb->fill(cb, "throttled_time", cfs_b->throttled_time); | ||
9423 | |||
9424 | return 0; | ||
9425 | } | ||
9426 | #endif /* CONFIG_CFS_BANDWIDTH */ | ||
8993 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 9427 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
8994 | 9428 | ||
8995 | #ifdef CONFIG_RT_GROUP_SCHED | 9429 | #ifdef CONFIG_RT_GROUP_SCHED |
@@ -9024,6 +9458,22 @@ static struct cftype cpu_files[] = { | |||
9024 | .write_u64 = cpu_shares_write_u64, | 9458 | .write_u64 = cpu_shares_write_u64, |
9025 | }, | 9459 | }, |
9026 | #endif | 9460 | #endif |
9461 | #ifdef CONFIG_CFS_BANDWIDTH | ||
9462 | { | ||
9463 | .name = "cfs_quota_us", | ||
9464 | .read_s64 = cpu_cfs_quota_read_s64, | ||
9465 | .write_s64 = cpu_cfs_quota_write_s64, | ||
9466 | }, | ||
9467 | { | ||
9468 | .name = "cfs_period_us", | ||
9469 | .read_u64 = cpu_cfs_period_read_u64, | ||
9470 | .write_u64 = cpu_cfs_period_write_u64, | ||
9471 | }, | ||
9472 | { | ||
9473 | .name = "stat", | ||
9474 | .read_map = cpu_stats_show, | ||
9475 | }, | ||
9476 | #endif | ||
9027 | #ifdef CONFIG_RT_GROUP_SCHED | 9477 | #ifdef CONFIG_RT_GROUP_SCHED |
9028 | { | 9478 | { |
9029 | .name = "rt_runtime_us", | 9479 | .name = "rt_runtime_us", |
@@ -9333,4 +9783,3 @@ struct cgroup_subsys cpuacct_subsys = { | |||
9333 | .subsys_id = cpuacct_subsys_id, | 9783 | .subsys_id = cpuacct_subsys_id, |
9334 | }; | 9784 | }; |
9335 | #endif /* CONFIG_CGROUP_CPUACCT */ | 9785 | #endif /* CONFIG_CGROUP_CPUACCT */ |
9336 | |||
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index 9d8af0b3fb6..c685e31492d 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c | |||
@@ -62,7 +62,7 @@ | |||
62 | */ | 62 | */ |
63 | #include <linux/spinlock.h> | 63 | #include <linux/spinlock.h> |
64 | #include <linux/hardirq.h> | 64 | #include <linux/hardirq.h> |
65 | #include <linux/module.h> | 65 | #include <linux/export.h> |
66 | #include <linux/percpu.h> | 66 | #include <linux/percpu.h> |
67 | #include <linux/ktime.h> | 67 | #include <linux/ktime.h> |
68 | #include <linux/sched.h> | 68 | #include <linux/sched.h> |
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c index 2722dc1b413..a86cf9d9eb1 100644 --- a/kernel/sched_cpupri.c +++ b/kernel/sched_cpupri.c | |||
@@ -47,9 +47,6 @@ static int convert_prio(int prio) | |||
47 | return cpupri; | 47 | return cpupri; |
48 | } | 48 | } |
49 | 49 | ||
50 | #define for_each_cpupri_active(array, idx) \ | ||
51 | for_each_set_bit(idx, array, CPUPRI_NR_PRIORITIES) | ||
52 | |||
53 | /** | 50 | /** |
54 | * cpupri_find - find the best (lowest-pri) CPU in the system | 51 | * cpupri_find - find the best (lowest-pri) CPU in the system |
55 | * @cp: The cpupri context | 52 | * @cp: The cpupri context |
@@ -71,11 +68,38 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, | |||
71 | int idx = 0; | 68 | int idx = 0; |
72 | int task_pri = convert_prio(p->prio); | 69 | int task_pri = convert_prio(p->prio); |
73 | 70 | ||
74 | for_each_cpupri_active(cp->pri_active, idx) { | 71 | if (task_pri >= MAX_RT_PRIO) |
75 | struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; | 72 | return 0; |
76 | 73 | ||
77 | if (idx >= task_pri) | 74 | for (idx = 0; idx < task_pri; idx++) { |
78 | break; | 75 | struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; |
76 | int skip = 0; | ||
77 | |||
78 | if (!atomic_read(&(vec)->count)) | ||
79 | skip = 1; | ||
80 | /* | ||
81 | * When looking at the vector, we need to read the counter, | ||
82 | * do a memory barrier, then read the mask. | ||
83 | * | ||
84 | * Note: This is still all racey, but we can deal with it. | ||
85 | * Ideally, we only want to look at masks that are set. | ||
86 | * | ||
87 | * If a mask is not set, then the only thing wrong is that we | ||
88 | * did a little more work than necessary. | ||
89 | * | ||
90 | * If we read a zero count but the mask is set, because of the | ||
91 | * memory barriers, that can only happen when the highest prio | ||
92 | * task for a run queue has left the run queue, in which case, | ||
93 | * it will be followed by a pull. If the task we are processing | ||
94 | * fails to find a proper place to go, that pull request will | ||
95 | * pull this task if the run queue is running at a lower | ||
96 | * priority. | ||
97 | */ | ||
98 | smp_rmb(); | ||
99 | |||
100 | /* Need to do the rmb for every iteration */ | ||
101 | if (skip) | ||
102 | continue; | ||
79 | 103 | ||
80 | if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) | 104 | if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) |
81 | continue; | 105 | continue; |
@@ -115,7 +139,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri) | |||
115 | { | 139 | { |
116 | int *currpri = &cp->cpu_to_pri[cpu]; | 140 | int *currpri = &cp->cpu_to_pri[cpu]; |
117 | int oldpri = *currpri; | 141 | int oldpri = *currpri; |
118 | unsigned long flags; | 142 | int do_mb = 0; |
119 | 143 | ||
120 | newpri = convert_prio(newpri); | 144 | newpri = convert_prio(newpri); |
121 | 145 | ||
@@ -128,32 +152,46 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri) | |||
128 | * If the cpu was currently mapped to a different value, we | 152 | * If the cpu was currently mapped to a different value, we |
129 | * need to map it to the new value then remove the old value. | 153 | * need to map it to the new value then remove the old value. |
130 | * Note, we must add the new value first, otherwise we risk the | 154 | * Note, we must add the new value first, otherwise we risk the |
131 | * cpu being cleared from pri_active, and this cpu could be | 155 | * cpu being missed by the priority loop in cpupri_find. |
132 | * missed for a push or pull. | ||
133 | */ | 156 | */ |
134 | if (likely(newpri != CPUPRI_INVALID)) { | 157 | if (likely(newpri != CPUPRI_INVALID)) { |
135 | struct cpupri_vec *vec = &cp->pri_to_cpu[newpri]; | 158 | struct cpupri_vec *vec = &cp->pri_to_cpu[newpri]; |
136 | 159 | ||
137 | raw_spin_lock_irqsave(&vec->lock, flags); | ||
138 | |||
139 | cpumask_set_cpu(cpu, vec->mask); | 160 | cpumask_set_cpu(cpu, vec->mask); |
140 | vec->count++; | 161 | /* |
141 | if (vec->count == 1) | 162 | * When adding a new vector, we update the mask first, |
142 | set_bit(newpri, cp->pri_active); | 163 | * do a write memory barrier, and then update the count, to |
143 | 164 | * make sure the vector is visible when count is set. | |
144 | raw_spin_unlock_irqrestore(&vec->lock, flags); | 165 | */ |
166 | smp_mb__before_atomic_inc(); | ||
167 | atomic_inc(&(vec)->count); | ||
168 | do_mb = 1; | ||
145 | } | 169 | } |
146 | if (likely(oldpri != CPUPRI_INVALID)) { | 170 | if (likely(oldpri != CPUPRI_INVALID)) { |
147 | struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri]; | 171 | struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri]; |
148 | 172 | ||
149 | raw_spin_lock_irqsave(&vec->lock, flags); | 173 | /* |
150 | 174 | * Because the order of modification of the vec->count | |
151 | vec->count--; | 175 | * is important, we must make sure that the update |
152 | if (!vec->count) | 176 | * of the new prio is seen before we decrement the |
153 | clear_bit(oldpri, cp->pri_active); | 177 | * old prio. This makes sure that the loop sees |
178 | * one or the other when we raise the priority of | ||
179 | * the run queue. We don't care about when we lower the | ||
180 | * priority, as that will trigger an rt pull anyway. | ||
181 | * | ||
182 | * We only need to do a memory barrier if we updated | ||
183 | * the new priority vec. | ||
184 | */ | ||
185 | if (do_mb) | ||
186 | smp_mb__after_atomic_inc(); | ||
187 | |||
188 | /* | ||
189 | * When removing from the vector, we decrement the counter first | ||
190 | * do a memory barrier and then clear the mask. | ||
191 | */ | ||
192 | atomic_dec(&(vec)->count); | ||
193 | smp_mb__after_atomic_inc(); | ||
154 | cpumask_clear_cpu(cpu, vec->mask); | 194 | cpumask_clear_cpu(cpu, vec->mask); |
155 | |||
156 | raw_spin_unlock_irqrestore(&vec->lock, flags); | ||
157 | } | 195 | } |
158 | 196 | ||
159 | *currpri = newpri; | 197 | *currpri = newpri; |
@@ -175,8 +213,7 @@ int cpupri_init(struct cpupri *cp) | |||
175 | for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) { | 213 | for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) { |
176 | struct cpupri_vec *vec = &cp->pri_to_cpu[i]; | 214 | struct cpupri_vec *vec = &cp->pri_to_cpu[i]; |
177 | 215 | ||
178 | raw_spin_lock_init(&vec->lock); | 216 | atomic_set(&vec->count, 0); |
179 | vec->count = 0; | ||
180 | if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL)) | 217 | if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL)) |
181 | goto cleanup; | 218 | goto cleanup; |
182 | } | 219 | } |
diff --git a/kernel/sched_cpupri.h b/kernel/sched_cpupri.h index 9fc7d386fea..f6d75617349 100644 --- a/kernel/sched_cpupri.h +++ b/kernel/sched_cpupri.h | |||
@@ -4,7 +4,6 @@ | |||
4 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
5 | 5 | ||
6 | #define CPUPRI_NR_PRIORITIES (MAX_RT_PRIO + 2) | 6 | #define CPUPRI_NR_PRIORITIES (MAX_RT_PRIO + 2) |
7 | #define CPUPRI_NR_PRI_WORDS BITS_TO_LONGS(CPUPRI_NR_PRIORITIES) | ||
8 | 7 | ||
9 | #define CPUPRI_INVALID -1 | 8 | #define CPUPRI_INVALID -1 |
10 | #define CPUPRI_IDLE 0 | 9 | #define CPUPRI_IDLE 0 |
@@ -12,14 +11,12 @@ | |||
12 | /* values 2-101 are RT priorities 0-99 */ | 11 | /* values 2-101 are RT priorities 0-99 */ |
13 | 12 | ||
14 | struct cpupri_vec { | 13 | struct cpupri_vec { |
15 | raw_spinlock_t lock; | 14 | atomic_t count; |
16 | int count; | 15 | cpumask_var_t mask; |
17 | cpumask_var_t mask; | ||
18 | }; | 16 | }; |
19 | 17 | ||
20 | struct cpupri { | 18 | struct cpupri { |
21 | struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES]; | 19 | struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES]; |
22 | long pri_active[CPUPRI_NR_PRI_WORDS]; | ||
23 | int cpu_to_pri[NR_CPUS]; | 20 | int cpu_to_pri[NR_CPUS]; |
24 | }; | 21 | }; |
25 | 22 | ||
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index bc8ee999381..a78ed2736ba 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -89,6 +89,20 @@ const_debug unsigned int sysctl_sched_migration_cost = 500000UL; | |||
89 | */ | 89 | */ |
90 | unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL; | 90 | unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL; |
91 | 91 | ||
92 | #ifdef CONFIG_CFS_BANDWIDTH | ||
93 | /* | ||
94 | * Amount of runtime to allocate from global (tg) to local (per-cfs_rq) pool | ||
95 | * each time a cfs_rq requests quota. | ||
96 | * | ||
97 | * Note: in the case that the slice exceeds the runtime remaining (either due | ||
98 | * to consumption or the quota being specified to be smaller than the slice) | ||
99 | * we will always only issue the remaining available time. | ||
100 | * | ||
101 | * default: 5 msec, units: microseconds | ||
102 | */ | ||
103 | unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; | ||
104 | #endif | ||
105 | |||
92 | static const struct sched_class fair_sched_class; | 106 | static const struct sched_class fair_sched_class; |
93 | 107 | ||
94 | /************************************************************** | 108 | /************************************************************** |
@@ -292,6 +306,8 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse) | |||
292 | 306 | ||
293 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 307 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
294 | 308 | ||
309 | static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, | ||
310 | unsigned long delta_exec); | ||
295 | 311 | ||
296 | /************************************************************** | 312 | /************************************************************** |
297 | * Scheduling class tree data structure manipulation methods: | 313 | * Scheduling class tree data structure manipulation methods: |
@@ -583,6 +599,8 @@ static void update_curr(struct cfs_rq *cfs_rq) | |||
583 | cpuacct_charge(curtask, delta_exec); | 599 | cpuacct_charge(curtask, delta_exec); |
584 | account_group_exec_runtime(curtask, delta_exec); | 600 | account_group_exec_runtime(curtask, delta_exec); |
585 | } | 601 | } |
602 | |||
603 | account_cfs_rq_runtime(cfs_rq, delta_exec); | ||
586 | } | 604 | } |
587 | 605 | ||
588 | static inline void | 606 | static inline void |
@@ -688,6 +706,8 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
688 | } | 706 | } |
689 | 707 | ||
690 | #ifdef CONFIG_FAIR_GROUP_SCHED | 708 | #ifdef CONFIG_FAIR_GROUP_SCHED |
709 | /* we need this in update_cfs_load and load-balance functions below */ | ||
710 | static inline int throttled_hierarchy(struct cfs_rq *cfs_rq); | ||
691 | # ifdef CONFIG_SMP | 711 | # ifdef CONFIG_SMP |
692 | static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq, | 712 | static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq, |
693 | int global_update) | 713 | int global_update) |
@@ -710,7 +730,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | |||
710 | u64 now, delta; | 730 | u64 now, delta; |
711 | unsigned long load = cfs_rq->load.weight; | 731 | unsigned long load = cfs_rq->load.weight; |
712 | 732 | ||
713 | if (cfs_rq->tg == &root_task_group) | 733 | if (cfs_rq->tg == &root_task_group || throttled_hierarchy(cfs_rq)) |
714 | return; | 734 | return; |
715 | 735 | ||
716 | now = rq_of(cfs_rq)->clock_task; | 736 | now = rq_of(cfs_rq)->clock_task; |
@@ -752,19 +772,32 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | |||
752 | list_del_leaf_cfs_rq(cfs_rq); | 772 | list_del_leaf_cfs_rq(cfs_rq); |
753 | } | 773 | } |
754 | 774 | ||
775 | static inline long calc_tg_weight(struct task_group *tg, struct cfs_rq *cfs_rq) | ||
776 | { | ||
777 | long tg_weight; | ||
778 | |||
779 | /* | ||
780 | * Use this CPU's actual weight instead of the last load_contribution | ||
781 | * to gain a more accurate current total weight. See | ||
782 | * update_cfs_rq_load_contribution(). | ||
783 | */ | ||
784 | tg_weight = atomic_read(&tg->load_weight); | ||
785 | tg_weight -= cfs_rq->load_contribution; | ||
786 | tg_weight += cfs_rq->load.weight; | ||
787 | |||
788 | return tg_weight; | ||
789 | } | ||
790 | |||
755 | static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) | 791 | static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) |
756 | { | 792 | { |
757 | long load_weight, load, shares; | 793 | long tg_weight, load, shares; |
758 | 794 | ||
795 | tg_weight = calc_tg_weight(tg, cfs_rq); | ||
759 | load = cfs_rq->load.weight; | 796 | load = cfs_rq->load.weight; |
760 | 797 | ||
761 | load_weight = atomic_read(&tg->load_weight); | ||
762 | load_weight += load; | ||
763 | load_weight -= cfs_rq->load_contribution; | ||
764 | |||
765 | shares = (tg->shares * load); | 798 | shares = (tg->shares * load); |
766 | if (load_weight) | 799 | if (tg_weight) |
767 | shares /= load_weight; | 800 | shares /= tg_weight; |
768 | 801 | ||
769 | if (shares < MIN_SHARES) | 802 | if (shares < MIN_SHARES) |
770 | shares = MIN_SHARES; | 803 | shares = MIN_SHARES; |
@@ -819,7 +852,7 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq) | |||
819 | 852 | ||
820 | tg = cfs_rq->tg; | 853 | tg = cfs_rq->tg; |
821 | se = tg->se[cpu_of(rq_of(cfs_rq))]; | 854 | se = tg->se[cpu_of(rq_of(cfs_rq))]; |
822 | if (!se) | 855 | if (!se || throttled_hierarchy(cfs_rq)) |
823 | return; | 856 | return; |
824 | #ifndef CONFIG_SMP | 857 | #ifndef CONFIG_SMP |
825 | if (likely(se->load.weight == tg->shares)) | 858 | if (likely(se->load.weight == tg->shares)) |
@@ -950,6 +983,8 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
950 | se->vruntime = vruntime; | 983 | se->vruntime = vruntime; |
951 | } | 984 | } |
952 | 985 | ||
986 | static void check_enqueue_throttle(struct cfs_rq *cfs_rq); | ||
987 | |||
953 | static void | 988 | static void |
954 | enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | 989 | enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) |
955 | { | 990 | { |
@@ -979,8 +1014,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | |||
979 | __enqueue_entity(cfs_rq, se); | 1014 | __enqueue_entity(cfs_rq, se); |
980 | se->on_rq = 1; | 1015 | se->on_rq = 1; |
981 | 1016 | ||
982 | if (cfs_rq->nr_running == 1) | 1017 | if (cfs_rq->nr_running == 1) { |
983 | list_add_leaf_cfs_rq(cfs_rq); | 1018 | list_add_leaf_cfs_rq(cfs_rq); |
1019 | check_enqueue_throttle(cfs_rq); | ||
1020 | } | ||
984 | } | 1021 | } |
985 | 1022 | ||
986 | static void __clear_buddies_last(struct sched_entity *se) | 1023 | static void __clear_buddies_last(struct sched_entity *se) |
@@ -1028,6 +1065,8 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
1028 | __clear_buddies_skip(se); | 1065 | __clear_buddies_skip(se); |
1029 | } | 1066 | } |
1030 | 1067 | ||
1068 | static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); | ||
1069 | |||
1031 | static void | 1070 | static void |
1032 | dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | 1071 | dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) |
1033 | { | 1072 | { |
@@ -1066,6 +1105,9 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | |||
1066 | if (!(flags & DEQUEUE_SLEEP)) | 1105 | if (!(flags & DEQUEUE_SLEEP)) |
1067 | se->vruntime -= cfs_rq->min_vruntime; | 1106 | se->vruntime -= cfs_rq->min_vruntime; |
1068 | 1107 | ||
1108 | /* return excess runtime on last dequeue */ | ||
1109 | return_cfs_rq_runtime(cfs_rq); | ||
1110 | |||
1069 | update_min_vruntime(cfs_rq); | 1111 | update_min_vruntime(cfs_rq); |
1070 | update_cfs_shares(cfs_rq); | 1112 | update_cfs_shares(cfs_rq); |
1071 | } | 1113 | } |
@@ -1077,6 +1119,8 @@ static void | |||
1077 | check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) | 1119 | check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) |
1078 | { | 1120 | { |
1079 | unsigned long ideal_runtime, delta_exec; | 1121 | unsigned long ideal_runtime, delta_exec; |
1122 | struct sched_entity *se; | ||
1123 | s64 delta; | ||
1080 | 1124 | ||
1081 | ideal_runtime = sched_slice(cfs_rq, curr); | 1125 | ideal_runtime = sched_slice(cfs_rq, curr); |
1082 | delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; | 1126 | delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; |
@@ -1095,22 +1139,17 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) | |||
1095 | * narrow margin doesn't have to wait for a full slice. | 1139 | * narrow margin doesn't have to wait for a full slice. |
1096 | * This also mitigates buddy induced latencies under load. | 1140 | * This also mitigates buddy induced latencies under load. |
1097 | */ | 1141 | */ |
1098 | if (!sched_feat(WAKEUP_PREEMPT)) | ||
1099 | return; | ||
1100 | |||
1101 | if (delta_exec < sysctl_sched_min_granularity) | 1142 | if (delta_exec < sysctl_sched_min_granularity) |
1102 | return; | 1143 | return; |
1103 | 1144 | ||
1104 | if (cfs_rq->nr_running > 1) { | 1145 | se = __pick_first_entity(cfs_rq); |
1105 | struct sched_entity *se = __pick_first_entity(cfs_rq); | 1146 | delta = curr->vruntime - se->vruntime; |
1106 | s64 delta = curr->vruntime - se->vruntime; | ||
1107 | 1147 | ||
1108 | if (delta < 0) | 1148 | if (delta < 0) |
1109 | return; | 1149 | return; |
1110 | 1150 | ||
1111 | if (delta > ideal_runtime) | 1151 | if (delta > ideal_runtime) |
1112 | resched_task(rq_of(cfs_rq)->curr); | 1152 | resched_task(rq_of(cfs_rq)->curr); |
1113 | } | ||
1114 | } | 1153 | } |
1115 | 1154 | ||
1116 | static void | 1155 | static void |
@@ -1185,6 +1224,8 @@ static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) | |||
1185 | return se; | 1224 | return se; |
1186 | } | 1225 | } |
1187 | 1226 | ||
1227 | static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq); | ||
1228 | |||
1188 | static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) | 1229 | static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) |
1189 | { | 1230 | { |
1190 | /* | 1231 | /* |
@@ -1194,6 +1235,9 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) | |||
1194 | if (prev->on_rq) | 1235 | if (prev->on_rq) |
1195 | update_curr(cfs_rq); | 1236 | update_curr(cfs_rq); |
1196 | 1237 | ||
1238 | /* throttle cfs_rqs exceeding runtime */ | ||
1239 | check_cfs_rq_runtime(cfs_rq); | ||
1240 | |||
1197 | check_spread(cfs_rq, prev); | 1241 | check_spread(cfs_rq, prev); |
1198 | if (prev->on_rq) { | 1242 | if (prev->on_rq) { |
1199 | update_stats_wait_start(cfs_rq, prev); | 1243 | update_stats_wait_start(cfs_rq, prev); |
@@ -1233,10 +1277,583 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) | |||
1233 | return; | 1277 | return; |
1234 | #endif | 1278 | #endif |
1235 | 1279 | ||
1236 | if (cfs_rq->nr_running > 1 || !sched_feat(WAKEUP_PREEMPT)) | 1280 | if (cfs_rq->nr_running > 1) |
1237 | check_preempt_tick(cfs_rq, curr); | 1281 | check_preempt_tick(cfs_rq, curr); |
1238 | } | 1282 | } |
1239 | 1283 | ||
1284 | |||
1285 | /************************************************** | ||
1286 | * CFS bandwidth control machinery | ||
1287 | */ | ||
1288 | |||
1289 | #ifdef CONFIG_CFS_BANDWIDTH | ||
1290 | /* | ||
1291 | * default period for cfs group bandwidth. | ||
1292 | * default: 0.1s, units: nanoseconds | ||
1293 | */ | ||
1294 | static inline u64 default_cfs_period(void) | ||
1295 | { | ||
1296 | return 100000000ULL; | ||
1297 | } | ||
1298 | |||
1299 | static inline u64 sched_cfs_bandwidth_slice(void) | ||
1300 | { | ||
1301 | return (u64)sysctl_sched_cfs_bandwidth_slice * NSEC_PER_USEC; | ||
1302 | } | ||
1303 | |||
1304 | /* | ||
1305 | * Replenish runtime according to assigned quota and update expiration time. | ||
1306 | * We use sched_clock_cpu directly instead of rq->clock to avoid adding | ||
1307 | * additional synchronization around rq->lock. | ||
1308 | * | ||
1309 | * requires cfs_b->lock | ||
1310 | */ | ||
1311 | static void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b) | ||
1312 | { | ||
1313 | u64 now; | ||
1314 | |||
1315 | if (cfs_b->quota == RUNTIME_INF) | ||
1316 | return; | ||
1317 | |||
1318 | now = sched_clock_cpu(smp_processor_id()); | ||
1319 | cfs_b->runtime = cfs_b->quota; | ||
1320 | cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period); | ||
1321 | } | ||
1322 | |||
1323 | /* returns 0 on failure to allocate runtime */ | ||
1324 | static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) | ||
1325 | { | ||
1326 | struct task_group *tg = cfs_rq->tg; | ||
1327 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); | ||
1328 | u64 amount = 0, min_amount, expires; | ||
1329 | |||
1330 | /* note: this is a positive sum as runtime_remaining <= 0 */ | ||
1331 | min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining; | ||
1332 | |||
1333 | raw_spin_lock(&cfs_b->lock); | ||
1334 | if (cfs_b->quota == RUNTIME_INF) | ||
1335 | amount = min_amount; | ||
1336 | else { | ||
1337 | /* | ||
1338 | * If the bandwidth pool has become inactive, then at least one | ||
1339 | * period must have elapsed since the last consumption. | ||
1340 | * Refresh the global state and ensure bandwidth timer becomes | ||
1341 | * active. | ||
1342 | */ | ||
1343 | if (!cfs_b->timer_active) { | ||
1344 | __refill_cfs_bandwidth_runtime(cfs_b); | ||
1345 | __start_cfs_bandwidth(cfs_b); | ||
1346 | } | ||
1347 | |||
1348 | if (cfs_b->runtime > 0) { | ||
1349 | amount = min(cfs_b->runtime, min_amount); | ||
1350 | cfs_b->runtime -= amount; | ||
1351 | cfs_b->idle = 0; | ||
1352 | } | ||
1353 | } | ||
1354 | expires = cfs_b->runtime_expires; | ||
1355 | raw_spin_unlock(&cfs_b->lock); | ||
1356 | |||
1357 | cfs_rq->runtime_remaining += amount; | ||
1358 | /* | ||
1359 | * we may have advanced our local expiration to account for allowed | ||
1360 | * spread between our sched_clock and the one on which runtime was | ||
1361 | * issued. | ||
1362 | */ | ||
1363 | if ((s64)(expires - cfs_rq->runtime_expires) > 0) | ||
1364 | cfs_rq->runtime_expires = expires; | ||
1365 | |||
1366 | return cfs_rq->runtime_remaining > 0; | ||
1367 | } | ||
1368 | |||
1369 | /* | ||
1370 | * Note: This depends on the synchronization provided by sched_clock and the | ||
1371 | * fact that rq->clock snapshots this value. | ||
1372 | */ | ||
1373 | static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq) | ||
1374 | { | ||
1375 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); | ||
1376 | struct rq *rq = rq_of(cfs_rq); | ||
1377 | |||
1378 | /* if the deadline is ahead of our clock, nothing to do */ | ||
1379 | if (likely((s64)(rq->clock - cfs_rq->runtime_expires) < 0)) | ||
1380 | return; | ||
1381 | |||
1382 | if (cfs_rq->runtime_remaining < 0) | ||
1383 | return; | ||
1384 | |||
1385 | /* | ||
1386 | * If the local deadline has passed we have to consider the | ||
1387 | * possibility that our sched_clock is 'fast' and the global deadline | ||
1388 | * has not truly expired. | ||
1389 | * | ||
1390 | * Fortunately we can check determine whether this the case by checking | ||
1391 | * whether the global deadline has advanced. | ||
1392 | */ | ||
1393 | |||
1394 | if ((s64)(cfs_rq->runtime_expires - cfs_b->runtime_expires) >= 0) { | ||
1395 | /* extend local deadline, drift is bounded above by 2 ticks */ | ||
1396 | cfs_rq->runtime_expires += TICK_NSEC; | ||
1397 | } else { | ||
1398 | /* global deadline is ahead, expiration has passed */ | ||
1399 | cfs_rq->runtime_remaining = 0; | ||
1400 | } | ||
1401 | } | ||
1402 | |||
1403 | static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, | ||
1404 | unsigned long delta_exec) | ||
1405 | { | ||
1406 | /* dock delta_exec before expiring quota (as it could span periods) */ | ||
1407 | cfs_rq->runtime_remaining -= delta_exec; | ||
1408 | expire_cfs_rq_runtime(cfs_rq); | ||
1409 | |||
1410 | if (likely(cfs_rq->runtime_remaining > 0)) | ||
1411 | return; | ||
1412 | |||
1413 | /* | ||
1414 | * if we're unable to extend our runtime we resched so that the active | ||
1415 | * hierarchy can be throttled | ||
1416 | */ | ||
1417 | if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) | ||
1418 | resched_task(rq_of(cfs_rq)->curr); | ||
1419 | } | ||
1420 | |||
1421 | static __always_inline void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, | ||
1422 | unsigned long delta_exec) | ||
1423 | { | ||
1424 | if (!cfs_rq->runtime_enabled) | ||
1425 | return; | ||
1426 | |||
1427 | __account_cfs_rq_runtime(cfs_rq, delta_exec); | ||
1428 | } | ||
1429 | |||
1430 | static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq) | ||
1431 | { | ||
1432 | return cfs_rq->throttled; | ||
1433 | } | ||
1434 | |||
1435 | /* check whether cfs_rq, or any parent, is throttled */ | ||
1436 | static inline int throttled_hierarchy(struct cfs_rq *cfs_rq) | ||
1437 | { | ||
1438 | return cfs_rq->throttle_count; | ||
1439 | } | ||
1440 | |||
1441 | /* | ||
1442 | * Ensure that neither of the group entities corresponding to src_cpu or | ||
1443 | * dest_cpu are members of a throttled hierarchy when performing group | ||
1444 | * load-balance operations. | ||
1445 | */ | ||
1446 | static inline int throttled_lb_pair(struct task_group *tg, | ||
1447 | int src_cpu, int dest_cpu) | ||
1448 | { | ||
1449 | struct cfs_rq *src_cfs_rq, *dest_cfs_rq; | ||
1450 | |||
1451 | src_cfs_rq = tg->cfs_rq[src_cpu]; | ||
1452 | dest_cfs_rq = tg->cfs_rq[dest_cpu]; | ||
1453 | |||
1454 | return throttled_hierarchy(src_cfs_rq) || | ||
1455 | throttled_hierarchy(dest_cfs_rq); | ||
1456 | } | ||
1457 | |||
1458 | /* updated child weight may affect parent so we have to do this bottom up */ | ||
1459 | static int tg_unthrottle_up(struct task_group *tg, void *data) | ||
1460 | { | ||
1461 | struct rq *rq = data; | ||
1462 | struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)]; | ||
1463 | |||
1464 | cfs_rq->throttle_count--; | ||
1465 | #ifdef CONFIG_SMP | ||
1466 | if (!cfs_rq->throttle_count) { | ||
1467 | u64 delta = rq->clock_task - cfs_rq->load_stamp; | ||
1468 | |||
1469 | /* leaving throttled state, advance shares averaging windows */ | ||
1470 | cfs_rq->load_stamp += delta; | ||
1471 | cfs_rq->load_last += delta; | ||
1472 | |||
1473 | /* update entity weight now that we are on_rq again */ | ||
1474 | update_cfs_shares(cfs_rq); | ||
1475 | } | ||
1476 | #endif | ||
1477 | |||
1478 | return 0; | ||
1479 | } | ||
1480 | |||
1481 | static int tg_throttle_down(struct task_group *tg, void *data) | ||
1482 | { | ||
1483 | struct rq *rq = data; | ||
1484 | struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)]; | ||
1485 | |||
1486 | /* group is entering throttled state, record last load */ | ||
1487 | if (!cfs_rq->throttle_count) | ||
1488 | update_cfs_load(cfs_rq, 0); | ||
1489 | cfs_rq->throttle_count++; | ||
1490 | |||
1491 | return 0; | ||
1492 | } | ||
1493 | |||
1494 | static void throttle_cfs_rq(struct cfs_rq *cfs_rq) | ||
1495 | { | ||
1496 | struct rq *rq = rq_of(cfs_rq); | ||
1497 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); | ||
1498 | struct sched_entity *se; | ||
1499 | long task_delta, dequeue = 1; | ||
1500 | |||
1501 | se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))]; | ||
1502 | |||
1503 | /* account load preceding throttle */ | ||
1504 | rcu_read_lock(); | ||
1505 | walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq); | ||
1506 | rcu_read_unlock(); | ||
1507 | |||
1508 | task_delta = cfs_rq->h_nr_running; | ||
1509 | for_each_sched_entity(se) { | ||
1510 | struct cfs_rq *qcfs_rq = cfs_rq_of(se); | ||
1511 | /* throttled entity or throttle-on-deactivate */ | ||
1512 | if (!se->on_rq) | ||
1513 | break; | ||
1514 | |||
1515 | if (dequeue) | ||
1516 | dequeue_entity(qcfs_rq, se, DEQUEUE_SLEEP); | ||
1517 | qcfs_rq->h_nr_running -= task_delta; | ||
1518 | |||
1519 | if (qcfs_rq->load.weight) | ||
1520 | dequeue = 0; | ||
1521 | } | ||
1522 | |||
1523 | if (!se) | ||
1524 | rq->nr_running -= task_delta; | ||
1525 | |||
1526 | cfs_rq->throttled = 1; | ||
1527 | cfs_rq->throttled_timestamp = rq->clock; | ||
1528 | raw_spin_lock(&cfs_b->lock); | ||
1529 | list_add_tail_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); | ||
1530 | raw_spin_unlock(&cfs_b->lock); | ||
1531 | } | ||
1532 | |||
1533 | static void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) | ||
1534 | { | ||
1535 | struct rq *rq = rq_of(cfs_rq); | ||
1536 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); | ||
1537 | struct sched_entity *se; | ||
1538 | int enqueue = 1; | ||
1539 | long task_delta; | ||
1540 | |||
1541 | se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))]; | ||
1542 | |||
1543 | cfs_rq->throttled = 0; | ||
1544 | raw_spin_lock(&cfs_b->lock); | ||
1545 | cfs_b->throttled_time += rq->clock - cfs_rq->throttled_timestamp; | ||
1546 | list_del_rcu(&cfs_rq->throttled_list); | ||
1547 | raw_spin_unlock(&cfs_b->lock); | ||
1548 | cfs_rq->throttled_timestamp = 0; | ||
1549 | |||
1550 | update_rq_clock(rq); | ||
1551 | /* update hierarchical throttle state */ | ||
1552 | walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq); | ||
1553 | |||
1554 | if (!cfs_rq->load.weight) | ||
1555 | return; | ||
1556 | |||
1557 | task_delta = cfs_rq->h_nr_running; | ||
1558 | for_each_sched_entity(se) { | ||
1559 | if (se->on_rq) | ||
1560 | enqueue = 0; | ||
1561 | |||
1562 | cfs_rq = cfs_rq_of(se); | ||
1563 | if (enqueue) | ||
1564 | enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP); | ||
1565 | cfs_rq->h_nr_running += task_delta; | ||
1566 | |||
1567 | if (cfs_rq_throttled(cfs_rq)) | ||
1568 | break; | ||
1569 | } | ||
1570 | |||
1571 | if (!se) | ||
1572 | rq->nr_running += task_delta; | ||
1573 | |||
1574 | /* determine whether we need to wake up potentially idle cpu */ | ||
1575 | if (rq->curr == rq->idle && rq->cfs.nr_running) | ||
1576 | resched_task(rq->curr); | ||
1577 | } | ||
1578 | |||
1579 | static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, | ||
1580 | u64 remaining, u64 expires) | ||
1581 | { | ||
1582 | struct cfs_rq *cfs_rq; | ||
1583 | u64 runtime = remaining; | ||
1584 | |||
1585 | rcu_read_lock(); | ||
1586 | list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq, | ||
1587 | throttled_list) { | ||
1588 | struct rq *rq = rq_of(cfs_rq); | ||
1589 | |||
1590 | raw_spin_lock(&rq->lock); | ||
1591 | if (!cfs_rq_throttled(cfs_rq)) | ||
1592 | goto next; | ||
1593 | |||
1594 | runtime = -cfs_rq->runtime_remaining + 1; | ||
1595 | if (runtime > remaining) | ||
1596 | runtime = remaining; | ||
1597 | remaining -= runtime; | ||
1598 | |||
1599 | cfs_rq->runtime_remaining += runtime; | ||
1600 | cfs_rq->runtime_expires = expires; | ||
1601 | |||
1602 | /* we check whether we're throttled above */ | ||
1603 | if (cfs_rq->runtime_remaining > 0) | ||
1604 | unthrottle_cfs_rq(cfs_rq); | ||
1605 | |||
1606 | next: | ||
1607 | raw_spin_unlock(&rq->lock); | ||
1608 | |||
1609 | if (!remaining) | ||
1610 | break; | ||
1611 | } | ||
1612 | rcu_read_unlock(); | ||
1613 | |||
1614 | return remaining; | ||
1615 | } | ||
1616 | |||
1617 | /* | ||
1618 | * Responsible for refilling a task_group's bandwidth and unthrottling its | ||
1619 | * cfs_rqs as appropriate. If there has been no activity within the last | ||
1620 | * period the timer is deactivated until scheduling resumes; cfs_b->idle is | ||
1621 | * used to track this state. | ||
1622 | */ | ||
1623 | static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) | ||
1624 | { | ||
1625 | u64 runtime, runtime_expires; | ||
1626 | int idle = 1, throttled; | ||
1627 | |||
1628 | raw_spin_lock(&cfs_b->lock); | ||
1629 | /* no need to continue the timer with no bandwidth constraint */ | ||
1630 | if (cfs_b->quota == RUNTIME_INF) | ||
1631 | goto out_unlock; | ||
1632 | |||
1633 | throttled = !list_empty(&cfs_b->throttled_cfs_rq); | ||
1634 | /* idle depends on !throttled (for the case of a large deficit) */ | ||
1635 | idle = cfs_b->idle && !throttled; | ||
1636 | cfs_b->nr_periods += overrun; | ||
1637 | |||
1638 | /* if we're going inactive then everything else can be deferred */ | ||
1639 | if (idle) | ||
1640 | goto out_unlock; | ||
1641 | |||
1642 | __refill_cfs_bandwidth_runtime(cfs_b); | ||
1643 | |||
1644 | if (!throttled) { | ||
1645 | /* mark as potentially idle for the upcoming period */ | ||
1646 | cfs_b->idle = 1; | ||
1647 | goto out_unlock; | ||
1648 | } | ||
1649 | |||
1650 | /* account preceding periods in which throttling occurred */ | ||
1651 | cfs_b->nr_throttled += overrun; | ||
1652 | |||
1653 | /* | ||
1654 | * There are throttled entities so we must first use the new bandwidth | ||
1655 | * to unthrottle them before making it generally available. This | ||
1656 | * ensures that all existing debts will be paid before a new cfs_rq is | ||
1657 | * allowed to run. | ||
1658 | */ | ||
1659 | runtime = cfs_b->runtime; | ||
1660 | runtime_expires = cfs_b->runtime_expires; | ||
1661 | cfs_b->runtime = 0; | ||
1662 | |||
1663 | /* | ||
1664 | * This check is repeated as we are holding onto the new bandwidth | ||
1665 | * while we unthrottle. This can potentially race with an unthrottled | ||
1666 | * group trying to acquire new bandwidth from the global pool. | ||
1667 | */ | ||
1668 | while (throttled && runtime > 0) { | ||
1669 | raw_spin_unlock(&cfs_b->lock); | ||
1670 | /* we can't nest cfs_b->lock while distributing bandwidth */ | ||
1671 | runtime = distribute_cfs_runtime(cfs_b, runtime, | ||
1672 | runtime_expires); | ||
1673 | raw_spin_lock(&cfs_b->lock); | ||
1674 | |||
1675 | throttled = !list_empty(&cfs_b->throttled_cfs_rq); | ||
1676 | } | ||
1677 | |||
1678 | /* return (any) remaining runtime */ | ||
1679 | cfs_b->runtime = runtime; | ||
1680 | /* | ||
1681 | * While we are ensured activity in the period following an | ||
1682 | * unthrottle, this also covers the case in which the new bandwidth is | ||
1683 | * insufficient to cover the existing bandwidth deficit. (Forcing the | ||
1684 | * timer to remain active while there are any throttled entities.) | ||
1685 | */ | ||
1686 | cfs_b->idle = 0; | ||
1687 | out_unlock: | ||
1688 | if (idle) | ||
1689 | cfs_b->timer_active = 0; | ||
1690 | raw_spin_unlock(&cfs_b->lock); | ||
1691 | |||
1692 | return idle; | ||
1693 | } | ||
1694 | |||
1695 | /* a cfs_rq won't donate quota below this amount */ | ||
1696 | static const u64 min_cfs_rq_runtime = 1 * NSEC_PER_MSEC; | ||
1697 | /* minimum remaining period time to redistribute slack quota */ | ||
1698 | static const u64 min_bandwidth_expiration = 2 * NSEC_PER_MSEC; | ||
1699 | /* how long we wait to gather additional slack before distributing */ | ||
1700 | static const u64 cfs_bandwidth_slack_period = 5 * NSEC_PER_MSEC; | ||
1701 | |||
1702 | /* are we near the end of the current quota period? */ | ||
1703 | static int runtime_refresh_within(struct cfs_bandwidth *cfs_b, u64 min_expire) | ||
1704 | { | ||
1705 | struct hrtimer *refresh_timer = &cfs_b->period_timer; | ||
1706 | u64 remaining; | ||
1707 | |||
1708 | /* if the call-back is running a quota refresh is already occurring */ | ||
1709 | if (hrtimer_callback_running(refresh_timer)) | ||
1710 | return 1; | ||
1711 | |||
1712 | /* is a quota refresh about to occur? */ | ||
1713 | remaining = ktime_to_ns(hrtimer_expires_remaining(refresh_timer)); | ||
1714 | if (remaining < min_expire) | ||
1715 | return 1; | ||
1716 | |||
1717 | return 0; | ||
1718 | } | ||
1719 | |||
1720 | static void start_cfs_slack_bandwidth(struct cfs_bandwidth *cfs_b) | ||
1721 | { | ||
1722 | u64 min_left = cfs_bandwidth_slack_period + min_bandwidth_expiration; | ||
1723 | |||
1724 | /* if there's a quota refresh soon don't bother with slack */ | ||
1725 | if (runtime_refresh_within(cfs_b, min_left)) | ||
1726 | return; | ||
1727 | |||
1728 | start_bandwidth_timer(&cfs_b->slack_timer, | ||
1729 | ns_to_ktime(cfs_bandwidth_slack_period)); | ||
1730 | } | ||
1731 | |||
1732 | /* we know any runtime found here is valid as update_curr() precedes return */ | ||
1733 | static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq) | ||
1734 | { | ||
1735 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); | ||
1736 | s64 slack_runtime = cfs_rq->runtime_remaining - min_cfs_rq_runtime; | ||
1737 | |||
1738 | if (slack_runtime <= 0) | ||
1739 | return; | ||
1740 | |||
1741 | raw_spin_lock(&cfs_b->lock); | ||
1742 | if (cfs_b->quota != RUNTIME_INF && | ||
1743 | cfs_rq->runtime_expires == cfs_b->runtime_expires) { | ||
1744 | cfs_b->runtime += slack_runtime; | ||
1745 | |||
1746 | /* we are under rq->lock, defer unthrottling using a timer */ | ||
1747 | if (cfs_b->runtime > sched_cfs_bandwidth_slice() && | ||
1748 | !list_empty(&cfs_b->throttled_cfs_rq)) | ||
1749 | start_cfs_slack_bandwidth(cfs_b); | ||
1750 | } | ||
1751 | raw_spin_unlock(&cfs_b->lock); | ||
1752 | |||
1753 | /* even if it's not valid for return we don't want to try again */ | ||
1754 | cfs_rq->runtime_remaining -= slack_runtime; | ||
1755 | } | ||
1756 | |||
1757 | static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) | ||
1758 | { | ||
1759 | if (!cfs_rq->runtime_enabled || cfs_rq->nr_running) | ||
1760 | return; | ||
1761 | |||
1762 | __return_cfs_rq_runtime(cfs_rq); | ||
1763 | } | ||
1764 | |||
1765 | /* | ||
1766 | * This is done with a timer (instead of inline with bandwidth return) since | ||
1767 | * it's necessary to juggle rq->locks to unthrottle their respective cfs_rqs. | ||
1768 | */ | ||
1769 | static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) | ||
1770 | { | ||
1771 | u64 runtime = 0, slice = sched_cfs_bandwidth_slice(); | ||
1772 | u64 expires; | ||
1773 | |||
1774 | /* confirm we're still not at a refresh boundary */ | ||
1775 | if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) | ||
1776 | return; | ||
1777 | |||
1778 | raw_spin_lock(&cfs_b->lock); | ||
1779 | if (cfs_b->quota != RUNTIME_INF && cfs_b->runtime > slice) { | ||
1780 | runtime = cfs_b->runtime; | ||
1781 | cfs_b->runtime = 0; | ||
1782 | } | ||
1783 | expires = cfs_b->runtime_expires; | ||
1784 | raw_spin_unlock(&cfs_b->lock); | ||
1785 | |||
1786 | if (!runtime) | ||
1787 | return; | ||
1788 | |||
1789 | runtime = distribute_cfs_runtime(cfs_b, runtime, expires); | ||
1790 | |||
1791 | raw_spin_lock(&cfs_b->lock); | ||
1792 | if (expires == cfs_b->runtime_expires) | ||
1793 | cfs_b->runtime = runtime; | ||
1794 | raw_spin_unlock(&cfs_b->lock); | ||
1795 | } | ||
1796 | |||
1797 | /* | ||
1798 | * When a group wakes up we want to make sure that its quota is not already | ||
1799 | * expired/exceeded, otherwise it may be allowed to steal additional ticks of | ||
1800 | * runtime as update_curr() throttling can not not trigger until it's on-rq. | ||
1801 | */ | ||
1802 | static void check_enqueue_throttle(struct cfs_rq *cfs_rq) | ||
1803 | { | ||
1804 | /* an active group must be handled by the update_curr()->put() path */ | ||
1805 | if (!cfs_rq->runtime_enabled || cfs_rq->curr) | ||
1806 | return; | ||
1807 | |||
1808 | /* ensure the group is not already throttled */ | ||
1809 | if (cfs_rq_throttled(cfs_rq)) | ||
1810 | return; | ||
1811 | |||
1812 | /* update runtime allocation */ | ||
1813 | account_cfs_rq_runtime(cfs_rq, 0); | ||
1814 | if (cfs_rq->runtime_remaining <= 0) | ||
1815 | throttle_cfs_rq(cfs_rq); | ||
1816 | } | ||
1817 | |||
1818 | /* conditionally throttle active cfs_rq's from put_prev_entity() */ | ||
1819 | static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) | ||
1820 | { | ||
1821 | if (likely(!cfs_rq->runtime_enabled || cfs_rq->runtime_remaining > 0)) | ||
1822 | return; | ||
1823 | |||
1824 | /* | ||
1825 | * it's possible for a throttled entity to be forced into a running | ||
1826 | * state (e.g. set_curr_task), in this case we're finished. | ||
1827 | */ | ||
1828 | if (cfs_rq_throttled(cfs_rq)) | ||
1829 | return; | ||
1830 | |||
1831 | throttle_cfs_rq(cfs_rq); | ||
1832 | } | ||
1833 | #else | ||
1834 | static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, | ||
1835 | unsigned long delta_exec) {} | ||
1836 | static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} | ||
1837 | static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {} | ||
1838 | static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} | ||
1839 | |||
1840 | static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq) | ||
1841 | { | ||
1842 | return 0; | ||
1843 | } | ||
1844 | |||
1845 | static inline int throttled_hierarchy(struct cfs_rq *cfs_rq) | ||
1846 | { | ||
1847 | return 0; | ||
1848 | } | ||
1849 | |||
1850 | static inline int throttled_lb_pair(struct task_group *tg, | ||
1851 | int src_cpu, int dest_cpu) | ||
1852 | { | ||
1853 | return 0; | ||
1854 | } | ||
1855 | #endif | ||
1856 | |||
1240 | /************************************************** | 1857 | /************************************************** |
1241 | * CFS operations on tasks: | 1858 | * CFS operations on tasks: |
1242 | */ | 1859 | */ |
@@ -1313,16 +1930,33 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) | |||
1313 | break; | 1930 | break; |
1314 | cfs_rq = cfs_rq_of(se); | 1931 | cfs_rq = cfs_rq_of(se); |
1315 | enqueue_entity(cfs_rq, se, flags); | 1932 | enqueue_entity(cfs_rq, se, flags); |
1933 | |||
1934 | /* | ||
1935 | * end evaluation on encountering a throttled cfs_rq | ||
1936 | * | ||
1937 | * note: in the case of encountering a throttled cfs_rq we will | ||
1938 | * post the final h_nr_running increment below. | ||
1939 | */ | ||
1940 | if (cfs_rq_throttled(cfs_rq)) | ||
1941 | break; | ||
1942 | cfs_rq->h_nr_running++; | ||
1943 | |||
1316 | flags = ENQUEUE_WAKEUP; | 1944 | flags = ENQUEUE_WAKEUP; |
1317 | } | 1945 | } |
1318 | 1946 | ||
1319 | for_each_sched_entity(se) { | 1947 | for_each_sched_entity(se) { |
1320 | cfs_rq = cfs_rq_of(se); | 1948 | cfs_rq = cfs_rq_of(se); |
1949 | cfs_rq->h_nr_running++; | ||
1950 | |||
1951 | if (cfs_rq_throttled(cfs_rq)) | ||
1952 | break; | ||
1321 | 1953 | ||
1322 | update_cfs_load(cfs_rq, 0); | 1954 | update_cfs_load(cfs_rq, 0); |
1323 | update_cfs_shares(cfs_rq); | 1955 | update_cfs_shares(cfs_rq); |
1324 | } | 1956 | } |
1325 | 1957 | ||
1958 | if (!se) | ||
1959 | inc_nr_running(rq); | ||
1326 | hrtick_update(rq); | 1960 | hrtick_update(rq); |
1327 | } | 1961 | } |
1328 | 1962 | ||
@@ -1343,6 +1977,16 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) | |||
1343 | cfs_rq = cfs_rq_of(se); | 1977 | cfs_rq = cfs_rq_of(se); |
1344 | dequeue_entity(cfs_rq, se, flags); | 1978 | dequeue_entity(cfs_rq, se, flags); |
1345 | 1979 | ||
1980 | /* | ||
1981 | * end evaluation on encountering a throttled cfs_rq | ||
1982 | * | ||
1983 | * note: in the case of encountering a throttled cfs_rq we will | ||
1984 | * post the final h_nr_running decrement below. | ||
1985 | */ | ||
1986 | if (cfs_rq_throttled(cfs_rq)) | ||
1987 | break; | ||
1988 | cfs_rq->h_nr_running--; | ||
1989 | |||
1346 | /* Don't dequeue parent if it has other entities besides us */ | 1990 | /* Don't dequeue parent if it has other entities besides us */ |
1347 | if (cfs_rq->load.weight) { | 1991 | if (cfs_rq->load.weight) { |
1348 | /* | 1992 | /* |
@@ -1361,11 +2005,17 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) | |||
1361 | 2005 | ||
1362 | for_each_sched_entity(se) { | 2006 | for_each_sched_entity(se) { |
1363 | cfs_rq = cfs_rq_of(se); | 2007 | cfs_rq = cfs_rq_of(se); |
2008 | cfs_rq->h_nr_running--; | ||
2009 | |||
2010 | if (cfs_rq_throttled(cfs_rq)) | ||
2011 | break; | ||
1364 | 2012 | ||
1365 | update_cfs_load(cfs_rq, 0); | 2013 | update_cfs_load(cfs_rq, 0); |
1366 | update_cfs_shares(cfs_rq); | 2014 | update_cfs_shares(cfs_rq); |
1367 | } | 2015 | } |
1368 | 2016 | ||
2017 | if (!se) | ||
2018 | dec_nr_running(rq); | ||
1369 | hrtick_update(rq); | 2019 | hrtick_update(rq); |
1370 | } | 2020 | } |
1371 | 2021 | ||
@@ -1399,42 +2049,105 @@ static void task_waking_fair(struct task_struct *p) | |||
1399 | * Adding load to a group doesn't make a group heavier, but can cause movement | 2049 | * Adding load to a group doesn't make a group heavier, but can cause movement |
1400 | * of group shares between cpus. Assuming the shares were perfectly aligned one | 2050 | * of group shares between cpus. Assuming the shares were perfectly aligned one |
1401 | * can calculate the shift in shares. | 2051 | * can calculate the shift in shares. |
2052 | * | ||
2053 | * Calculate the effective load difference if @wl is added (subtracted) to @tg | ||
2054 | * on this @cpu and results in a total addition (subtraction) of @wg to the | ||
2055 | * total group weight. | ||
2056 | * | ||
2057 | * Given a runqueue weight distribution (rw_i) we can compute a shares | ||
2058 | * distribution (s_i) using: | ||
2059 | * | ||
2060 | * s_i = rw_i / \Sum rw_j (1) | ||
2061 | * | ||
2062 | * Suppose we have 4 CPUs and our @tg is a direct child of the root group and | ||
2063 | * has 7 equal weight tasks, distributed as below (rw_i), with the resulting | ||
2064 | * shares distribution (s_i): | ||
2065 | * | ||
2066 | * rw_i = { 2, 4, 1, 0 } | ||
2067 | * s_i = { 2/7, 4/7, 1/7, 0 } | ||
2068 | * | ||
2069 | * As per wake_affine() we're interested in the load of two CPUs (the CPU the | ||
2070 | * task used to run on and the CPU the waker is running on), we need to | ||
2071 | * compute the effect of waking a task on either CPU and, in case of a sync | ||
2072 | * wakeup, compute the effect of the current task going to sleep. | ||
2073 | * | ||
2074 | * So for a change of @wl to the local @cpu with an overall group weight change | ||
2075 | * of @wl we can compute the new shares distribution (s'_i) using: | ||
2076 | * | ||
2077 | * s'_i = (rw_i + @wl) / (@wg + \Sum rw_j) (2) | ||
2078 | * | ||
2079 | * Suppose we're interested in CPUs 0 and 1, and want to compute the load | ||
2080 | * differences in waking a task to CPU 0. The additional task changes the | ||
2081 | * weight and shares distributions like: | ||
2082 | * | ||
2083 | * rw'_i = { 3, 4, 1, 0 } | ||
2084 | * s'_i = { 3/8, 4/8, 1/8, 0 } | ||
2085 | * | ||
2086 | * We can then compute the difference in effective weight by using: | ||
2087 | * | ||
2088 | * dw_i = S * (s'_i - s_i) (3) | ||
2089 | * | ||
2090 | * Where 'S' is the group weight as seen by its parent. | ||
2091 | * | ||
2092 | * Therefore the effective change in loads on CPU 0 would be 5/56 (3/8 - 2/7) | ||
2093 | * times the weight of the group. The effect on CPU 1 would be -4/56 (4/8 - | ||
2094 | * 4/7) times the weight of the group. | ||
1402 | */ | 2095 | */ |
1403 | static long effective_load(struct task_group *tg, int cpu, long wl, long wg) | 2096 | static long effective_load(struct task_group *tg, int cpu, long wl, long wg) |
1404 | { | 2097 | { |
1405 | struct sched_entity *se = tg->se[cpu]; | 2098 | struct sched_entity *se = tg->se[cpu]; |
1406 | 2099 | ||
1407 | if (!tg->parent) | 2100 | if (!tg->parent) /* the trivial, non-cgroup case */ |
1408 | return wl; | 2101 | return wl; |
1409 | 2102 | ||
1410 | for_each_sched_entity(se) { | 2103 | for_each_sched_entity(se) { |
1411 | long lw, w; | 2104 | long w, W; |
1412 | 2105 | ||
1413 | tg = se->my_q->tg; | 2106 | tg = se->my_q->tg; |
1414 | w = se->my_q->load.weight; | ||
1415 | 2107 | ||
1416 | /* use this cpu's instantaneous contribution */ | 2108 | /* |
1417 | lw = atomic_read(&tg->load_weight); | 2109 | * W = @wg + \Sum rw_j |
1418 | lw -= se->my_q->load_contribution; | 2110 | */ |
1419 | lw += w + wg; | 2111 | W = wg + calc_tg_weight(tg, se->my_q); |
1420 | 2112 | ||
1421 | wl += w; | 2113 | /* |
2114 | * w = rw_i + @wl | ||
2115 | */ | ||
2116 | w = se->my_q->load.weight + wl; | ||
1422 | 2117 | ||
1423 | if (lw > 0 && wl < lw) | 2118 | /* |
1424 | wl = (wl * tg->shares) / lw; | 2119 | * wl = S * s'_i; see (2) |
2120 | */ | ||
2121 | if (W > 0 && w < W) | ||
2122 | wl = (w * tg->shares) / W; | ||
1425 | else | 2123 | else |
1426 | wl = tg->shares; | 2124 | wl = tg->shares; |
1427 | 2125 | ||
1428 | /* zero point is MIN_SHARES */ | 2126 | /* |
2127 | * Per the above, wl is the new se->load.weight value; since | ||
2128 | * those are clipped to [MIN_SHARES, ...) do so now. See | ||
2129 | * calc_cfs_shares(). | ||
2130 | */ | ||
1429 | if (wl < MIN_SHARES) | 2131 | if (wl < MIN_SHARES) |
1430 | wl = MIN_SHARES; | 2132 | wl = MIN_SHARES; |
2133 | |||
2134 | /* | ||
2135 | * wl = dw_i = S * (s'_i - s_i); see (3) | ||
2136 | */ | ||
1431 | wl -= se->load.weight; | 2137 | wl -= se->load.weight; |
2138 | |||
2139 | /* | ||
2140 | * Recursively apply this logic to all parent groups to compute | ||
2141 | * the final effective load change on the root group. Since | ||
2142 | * only the @tg group gets extra weight, all parent groups can | ||
2143 | * only redistribute existing shares. @wl is the shift in shares | ||
2144 | * resulting from this level per the above. | ||
2145 | */ | ||
1432 | wg = 0; | 2146 | wg = 0; |
1433 | } | 2147 | } |
1434 | 2148 | ||
1435 | return wl; | 2149 | return wl; |
1436 | } | 2150 | } |
1437 | |||
1438 | #else | 2151 | #else |
1439 | 2152 | ||
1440 | static inline unsigned long effective_load(struct task_group *tg, int cpu, | 2153 | static inline unsigned long effective_load(struct task_group *tg, int cpu, |
@@ -1547,7 +2260,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, | |||
1547 | 2260 | ||
1548 | /* Skip over this group if it has no CPUs allowed */ | 2261 | /* Skip over this group if it has no CPUs allowed */ |
1549 | if (!cpumask_intersects(sched_group_cpus(group), | 2262 | if (!cpumask_intersects(sched_group_cpus(group), |
1550 | &p->cpus_allowed)) | 2263 | tsk_cpus_allowed(p))) |
1551 | continue; | 2264 | continue; |
1552 | 2265 | ||
1553 | local_group = cpumask_test_cpu(this_cpu, | 2266 | local_group = cpumask_test_cpu(this_cpu, |
@@ -1593,7 +2306,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) | |||
1593 | int i; | 2306 | int i; |
1594 | 2307 | ||
1595 | /* Traverse only the allowed CPUs */ | 2308 | /* Traverse only the allowed CPUs */ |
1596 | for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) { | 2309 | for_each_cpu_and(i, sched_group_cpus(group), tsk_cpus_allowed(p)) { |
1597 | load = weighted_cpuload(i); | 2310 | load = weighted_cpuload(i); |
1598 | 2311 | ||
1599 | if (load < min_load || (load == min_load && i == this_cpu)) { | 2312 | if (load < min_load || (load == min_load && i == this_cpu)) { |
@@ -1613,7 +2326,8 @@ static int select_idle_sibling(struct task_struct *p, int target) | |||
1613 | int cpu = smp_processor_id(); | 2326 | int cpu = smp_processor_id(); |
1614 | int prev_cpu = task_cpu(p); | 2327 | int prev_cpu = task_cpu(p); |
1615 | struct sched_domain *sd; | 2328 | struct sched_domain *sd; |
1616 | int i; | 2329 | struct sched_group *sg; |
2330 | int i, smt = 0; | ||
1617 | 2331 | ||
1618 | /* | 2332 | /* |
1619 | * If the task is going to be woken-up on this cpu and if it is | 2333 | * If the task is going to be woken-up on this cpu and if it is |
@@ -1633,25 +2347,38 @@ static int select_idle_sibling(struct task_struct *p, int target) | |||
1633 | * Otherwise, iterate the domains and find an elegible idle cpu. | 2347 | * Otherwise, iterate the domains and find an elegible idle cpu. |
1634 | */ | 2348 | */ |
1635 | rcu_read_lock(); | 2349 | rcu_read_lock(); |
2350 | again: | ||
1636 | for_each_domain(target, sd) { | 2351 | for_each_domain(target, sd) { |
1637 | if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) | 2352 | if (!smt && (sd->flags & SD_SHARE_CPUPOWER)) |
1638 | break; | 2353 | continue; |
1639 | 2354 | ||
1640 | for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) { | 2355 | if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) { |
1641 | if (idle_cpu(i)) { | 2356 | if (!smt) { |
1642 | target = i; | 2357 | smt = 1; |
1643 | break; | 2358 | goto again; |
1644 | } | 2359 | } |
2360 | break; | ||
1645 | } | 2361 | } |
1646 | 2362 | ||
1647 | /* | 2363 | sg = sd->groups; |
1648 | * Lets stop looking for an idle sibling when we reached | 2364 | do { |
1649 | * the domain that spans the current cpu and prev_cpu. | 2365 | if (!cpumask_intersects(sched_group_cpus(sg), |
1650 | */ | 2366 | tsk_cpus_allowed(p))) |
1651 | if (cpumask_test_cpu(cpu, sched_domain_span(sd)) && | 2367 | goto next; |
1652 | cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) | 2368 | |
1653 | break; | 2369 | for_each_cpu(i, sched_group_cpus(sg)) { |
2370 | if (!idle_cpu(i)) | ||
2371 | goto next; | ||
2372 | } | ||
2373 | |||
2374 | target = cpumask_first_and(sched_group_cpus(sg), | ||
2375 | tsk_cpus_allowed(p)); | ||
2376 | goto done; | ||
2377 | next: | ||
2378 | sg = sg->next; | ||
2379 | } while (sg != sd->groups); | ||
1654 | } | 2380 | } |
2381 | done: | ||
1655 | rcu_read_unlock(); | 2382 | rcu_read_unlock(); |
1656 | 2383 | ||
1657 | return target; | 2384 | return target; |
@@ -1680,7 +2407,7 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) | |||
1680 | int sync = wake_flags & WF_SYNC; | 2407 | int sync = wake_flags & WF_SYNC; |
1681 | 2408 | ||
1682 | if (sd_flag & SD_BALANCE_WAKE) { | 2409 | if (sd_flag & SD_BALANCE_WAKE) { |
1683 | if (cpumask_test_cpu(cpu, &p->cpus_allowed)) | 2410 | if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) |
1684 | want_affine = 1; | 2411 | want_affine = 1; |
1685 | new_cpu = prev_cpu; | 2412 | new_cpu = prev_cpu; |
1686 | } | 2413 | } |
@@ -1875,6 +2602,15 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
1875 | if (unlikely(se == pse)) | 2602 | if (unlikely(se == pse)) |
1876 | return; | 2603 | return; |
1877 | 2604 | ||
2605 | /* | ||
2606 | * This is possible from callers such as pull_task(), in which we | ||
2607 | * unconditionally check_prempt_curr() after an enqueue (which may have | ||
2608 | * lead to a throttle). This both saves work and prevents false | ||
2609 | * next-buddy nomination below. | ||
2610 | */ | ||
2611 | if (unlikely(throttled_hierarchy(cfs_rq_of(pse)))) | ||
2612 | return; | ||
2613 | |||
1878 | if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK)) { | 2614 | if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK)) { |
1879 | set_next_buddy(pse); | 2615 | set_next_buddy(pse); |
1880 | next_buddy_marked = 1; | 2616 | next_buddy_marked = 1; |
@@ -1883,6 +2619,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
1883 | /* | 2619 | /* |
1884 | * We can come here with TIF_NEED_RESCHED already set from new task | 2620 | * We can come here with TIF_NEED_RESCHED already set from new task |
1885 | * wake up path. | 2621 | * wake up path. |
2622 | * | ||
2623 | * Note: this also catches the edge-case of curr being in a throttled | ||
2624 | * group (e.g. via set_curr_task), since update_curr() (in the | ||
2625 | * enqueue of curr) will have resulted in resched being set. This | ||
2626 | * prevents us from potentially nominating it as a false LAST_BUDDY | ||
2627 | * below. | ||
1886 | */ | 2628 | */ |
1887 | if (test_tsk_need_resched(curr)) | 2629 | if (test_tsk_need_resched(curr)) |
1888 | return; | 2630 | return; |
@@ -1899,10 +2641,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
1899 | if (unlikely(p->policy != SCHED_NORMAL)) | 2641 | if (unlikely(p->policy != SCHED_NORMAL)) |
1900 | return; | 2642 | return; |
1901 | 2643 | ||
1902 | |||
1903 | if (!sched_feat(WAKEUP_PREEMPT)) | ||
1904 | return; | ||
1905 | |||
1906 | find_matching_se(&se, &pse); | 2644 | find_matching_se(&se, &pse); |
1907 | update_curr(cfs_rq_of(se)); | 2645 | update_curr(cfs_rq_of(se)); |
1908 | BUG_ON(!pse); | 2646 | BUG_ON(!pse); |
@@ -2005,7 +2743,8 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp | |||
2005 | { | 2743 | { |
2006 | struct sched_entity *se = &p->se; | 2744 | struct sched_entity *se = &p->se; |
2007 | 2745 | ||
2008 | if (!se->on_rq) | 2746 | /* throttled hierarchies are not runnable */ |
2747 | if (!se->on_rq || throttled_hierarchy(cfs_rq_of(se))) | ||
2009 | return false; | 2748 | return false; |
2010 | 2749 | ||
2011 | /* Tell the scheduler that we'd really like pse to run next. */ | 2750 | /* Tell the scheduler that we'd really like pse to run next. */ |
@@ -2049,7 +2788,7 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, | |||
2049 | * 2) cannot be migrated to this CPU due to cpus_allowed, or | 2788 | * 2) cannot be migrated to this CPU due to cpus_allowed, or |
2050 | * 3) are cache-hot on their current CPU. | 2789 | * 3) are cache-hot on their current CPU. |
2051 | */ | 2790 | */ |
2052 | if (!cpumask_test_cpu(this_cpu, &p->cpus_allowed)) { | 2791 | if (!cpumask_test_cpu(this_cpu, tsk_cpus_allowed(p))) { |
2053 | schedstat_inc(p, se.statistics.nr_failed_migrations_affine); | 2792 | schedstat_inc(p, se.statistics.nr_failed_migrations_affine); |
2054 | return 0; | 2793 | return 0; |
2055 | } | 2794 | } |
@@ -2102,6 +2841,9 @@ move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
2102 | 2841 | ||
2103 | for_each_leaf_cfs_rq(busiest, cfs_rq) { | 2842 | for_each_leaf_cfs_rq(busiest, cfs_rq) { |
2104 | list_for_each_entry_safe(p, n, &cfs_rq->tasks, se.group_node) { | 2843 | list_for_each_entry_safe(p, n, &cfs_rq->tasks, se.group_node) { |
2844 | if (throttled_lb_pair(task_group(p), | ||
2845 | busiest->cpu, this_cpu)) | ||
2846 | break; | ||
2105 | 2847 | ||
2106 | if (!can_migrate_task(p, busiest, this_cpu, | 2848 | if (!can_migrate_task(p, busiest, this_cpu, |
2107 | sd, idle, &pinned)) | 2849 | sd, idle, &pinned)) |
@@ -2217,8 +2959,13 @@ static void update_shares(int cpu) | |||
2217 | * Iterates the task_group tree in a bottom up fashion, see | 2959 | * Iterates the task_group tree in a bottom up fashion, see |
2218 | * list_add_leaf_cfs_rq() for details. | 2960 | * list_add_leaf_cfs_rq() for details. |
2219 | */ | 2961 | */ |
2220 | for_each_leaf_cfs_rq(rq, cfs_rq) | 2962 | for_each_leaf_cfs_rq(rq, cfs_rq) { |
2963 | /* throttled entities do not contribute to load */ | ||
2964 | if (throttled_hierarchy(cfs_rq)) | ||
2965 | continue; | ||
2966 | |||
2221 | update_shares_cpu(cfs_rq->tg, cpu); | 2967 | update_shares_cpu(cfs_rq->tg, cpu); |
2968 | } | ||
2222 | rcu_read_unlock(); | 2969 | rcu_read_unlock(); |
2223 | } | 2970 | } |
2224 | 2971 | ||
@@ -2268,9 +3015,10 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
2268 | u64 rem_load, moved_load; | 3015 | u64 rem_load, moved_load; |
2269 | 3016 | ||
2270 | /* | 3017 | /* |
2271 | * empty group | 3018 | * empty group or part of a throttled hierarchy |
2272 | */ | 3019 | */ |
2273 | if (!busiest_cfs_rq->task_weight) | 3020 | if (!busiest_cfs_rq->task_weight || |
3021 | throttled_lb_pair(busiest_cfs_rq->tg, cpu_of(busiest), this_cpu)) | ||
2274 | continue; | 3022 | continue; |
2275 | 3023 | ||
2276 | rem_load = (u64)rem_load_move * busiest_weight; | 3024 | rem_load = (u64)rem_load_move * busiest_weight; |
@@ -2854,7 +3602,7 @@ static bool update_sd_pick_busiest(struct sched_domain *sd, | |||
2854 | } | 3602 | } |
2855 | 3603 | ||
2856 | /** | 3604 | /** |
2857 | * update_sd_lb_stats - Update sched_group's statistics for load balancing. | 3605 | * update_sd_lb_stats - Update sched_domain's statistics for load balancing. |
2858 | * @sd: sched_domain whose statistics are to be updated. | 3606 | * @sd: sched_domain whose statistics are to be updated. |
2859 | * @this_cpu: Cpu for which load balance is currently performed. | 3607 | * @this_cpu: Cpu for which load balance is currently performed. |
2860 | * @idle: Idle status of this_cpu | 3608 | * @idle: Idle status of this_cpu |
@@ -3430,7 +4178,7 @@ redo: | |||
3430 | * moved to this_cpu | 4178 | * moved to this_cpu |
3431 | */ | 4179 | */ |
3432 | if (!cpumask_test_cpu(this_cpu, | 4180 | if (!cpumask_test_cpu(this_cpu, |
3433 | &busiest->curr->cpus_allowed)) { | 4181 | tsk_cpus_allowed(busiest->curr))) { |
3434 | raw_spin_unlock_irqrestore(&busiest->lock, | 4182 | raw_spin_unlock_irqrestore(&busiest->lock, |
3435 | flags); | 4183 | flags); |
3436 | all_pinned = 1; | 4184 | all_pinned = 1; |
@@ -3612,22 +4360,6 @@ out_unlock: | |||
3612 | } | 4360 | } |
3613 | 4361 | ||
3614 | #ifdef CONFIG_NO_HZ | 4362 | #ifdef CONFIG_NO_HZ |
3615 | |||
3616 | static DEFINE_PER_CPU(struct call_single_data, remote_sched_softirq_cb); | ||
3617 | |||
3618 | static void trigger_sched_softirq(void *data) | ||
3619 | { | ||
3620 | raise_softirq_irqoff(SCHED_SOFTIRQ); | ||
3621 | } | ||
3622 | |||
3623 | static inline void init_sched_softirq_csd(struct call_single_data *csd) | ||
3624 | { | ||
3625 | csd->func = trigger_sched_softirq; | ||
3626 | csd->info = NULL; | ||
3627 | csd->flags = 0; | ||
3628 | csd->priv = 0; | ||
3629 | } | ||
3630 | |||
3631 | /* | 4363 | /* |
3632 | * idle load balancing details | 4364 | * idle load balancing details |
3633 | * - One of the idle CPUs nominates itself as idle load_balancer, while | 4365 | * - One of the idle CPUs nominates itself as idle load_balancer, while |
@@ -3667,7 +4399,7 @@ static inline struct sched_domain *lowest_flag_domain(int cpu, int flag) | |||
3667 | struct sched_domain *sd; | 4399 | struct sched_domain *sd; |
3668 | 4400 | ||
3669 | for_each_domain(cpu, sd) | 4401 | for_each_domain(cpu, sd) |
3670 | if (sd && (sd->flags & flag)) | 4402 | if (sd->flags & flag) |
3671 | break; | 4403 | break; |
3672 | 4404 | ||
3673 | return sd; | 4405 | return sd; |
@@ -3793,11 +4525,16 @@ static void nohz_balancer_kick(int cpu) | |||
3793 | } | 4525 | } |
3794 | 4526 | ||
3795 | if (!cpu_rq(ilb_cpu)->nohz_balance_kick) { | 4527 | if (!cpu_rq(ilb_cpu)->nohz_balance_kick) { |
3796 | struct call_single_data *cp; | ||
3797 | |||
3798 | cpu_rq(ilb_cpu)->nohz_balance_kick = 1; | 4528 | cpu_rq(ilb_cpu)->nohz_balance_kick = 1; |
3799 | cp = &per_cpu(remote_sched_softirq_cb, cpu); | 4529 | |
3800 | __smp_call_function_single(ilb_cpu, cp, 0); | 4530 | smp_mb(); |
4531 | /* | ||
4532 | * Use smp_send_reschedule() instead of resched_cpu(). | ||
4533 | * This way we generate a sched IPI on the target cpu which | ||
4534 | * is idle. And the softirq performing nohz idle load balance | ||
4535 | * will be run before returning from the IPI. | ||
4536 | */ | ||
4537 | smp_send_reschedule(ilb_cpu); | ||
3801 | } | 4538 | } |
3802 | return; | 4539 | return; |
3803 | } | 4540 | } |
@@ -4030,7 +4767,7 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu) | |||
4030 | if (time_before(now, nohz.next_balance)) | 4767 | if (time_before(now, nohz.next_balance)) |
4031 | return 0; | 4768 | return 0; |
4032 | 4769 | ||
4033 | if (rq->idle_at_tick) | 4770 | if (idle_cpu(cpu)) |
4034 | return 0; | 4771 | return 0; |
4035 | 4772 | ||
4036 | first_pick_cpu = atomic_read(&nohz.first_pick_cpu); | 4773 | first_pick_cpu = atomic_read(&nohz.first_pick_cpu); |
@@ -4066,7 +4803,7 @@ static void run_rebalance_domains(struct softirq_action *h) | |||
4066 | { | 4803 | { |
4067 | int this_cpu = smp_processor_id(); | 4804 | int this_cpu = smp_processor_id(); |
4068 | struct rq *this_rq = cpu_rq(this_cpu); | 4805 | struct rq *this_rq = cpu_rq(this_cpu); |
4069 | enum cpu_idle_type idle = this_rq->idle_at_tick ? | 4806 | enum cpu_idle_type idle = this_rq->idle_balance ? |
4070 | CPU_IDLE : CPU_NOT_IDLE; | 4807 | CPU_IDLE : CPU_NOT_IDLE; |
4071 | 4808 | ||
4072 | rebalance_domains(this_cpu, idle); | 4809 | rebalance_domains(this_cpu, idle); |
@@ -4251,8 +4988,13 @@ static void set_curr_task_fair(struct rq *rq) | |||
4251 | { | 4988 | { |
4252 | struct sched_entity *se = &rq->curr->se; | 4989 | struct sched_entity *se = &rq->curr->se; |
4253 | 4990 | ||
4254 | for_each_sched_entity(se) | 4991 | for_each_sched_entity(se) { |
4255 | set_next_entity(cfs_rq_of(se), se); | 4992 | struct cfs_rq *cfs_rq = cfs_rq_of(se); |
4993 | |||
4994 | set_next_entity(cfs_rq, se); | ||
4995 | /* ensure bandwidth has been allocated on our new cfs_rq */ | ||
4996 | account_cfs_rq_runtime(cfs_rq, 0); | ||
4997 | } | ||
4256 | } | 4998 | } |
4257 | 4999 | ||
4258 | #ifdef CONFIG_FAIR_GROUP_SCHED | 5000 | #ifdef CONFIG_FAIR_GROUP_SCHED |
diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 2e74677cb04..84802245abd 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h | |||
@@ -12,11 +12,6 @@ SCHED_FEAT(GENTLE_FAIR_SLEEPERS, 1) | |||
12 | SCHED_FEAT(START_DEBIT, 1) | 12 | SCHED_FEAT(START_DEBIT, 1) |
13 | 13 | ||
14 | /* | 14 | /* |
15 | * Should wakeups try to preempt running tasks. | ||
16 | */ | ||
17 | SCHED_FEAT(WAKEUP_PREEMPT, 1) | ||
18 | |||
19 | /* | ||
20 | * Based on load and program behaviour, see if it makes sense to place | 15 | * Based on load and program behaviour, see if it makes sense to place |
21 | * a newly woken task on the same cpu as the task that woke it -- | 16 | * a newly woken task on the same cpu as the task that woke it -- |
22 | * improve cache locality. Typically used with SYNC wakeups as | 17 | * improve cache locality. Typically used with SYNC wakeups as |
@@ -72,3 +67,4 @@ SCHED_FEAT(NONTASK_POWER, 1) | |||
72 | SCHED_FEAT(TTWU_QUEUE, 1) | 67 | SCHED_FEAT(TTWU_QUEUE, 1) |
73 | 68 | ||
74 | SCHED_FEAT(FORCE_SD_OVERLAP, 0) | 69 | SCHED_FEAT(FORCE_SD_OVERLAP, 0) |
70 | SCHED_FEAT(RT_RUNTIME_SHARE, 1) | ||
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 97540f0c9e4..583a1368afe 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -124,21 +124,33 @@ static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
124 | update_rt_migration(rt_rq); | 124 | update_rt_migration(rt_rq); |
125 | } | 125 | } |
126 | 126 | ||
127 | static inline int has_pushable_tasks(struct rq *rq) | ||
128 | { | ||
129 | return !plist_head_empty(&rq->rt.pushable_tasks); | ||
130 | } | ||
131 | |||
127 | static void enqueue_pushable_task(struct rq *rq, struct task_struct *p) | 132 | static void enqueue_pushable_task(struct rq *rq, struct task_struct *p) |
128 | { | 133 | { |
129 | plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); | 134 | plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); |
130 | plist_node_init(&p->pushable_tasks, p->prio); | 135 | plist_node_init(&p->pushable_tasks, p->prio); |
131 | plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks); | 136 | plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks); |
137 | |||
138 | /* Update the highest prio pushable task */ | ||
139 | if (p->prio < rq->rt.highest_prio.next) | ||
140 | rq->rt.highest_prio.next = p->prio; | ||
132 | } | 141 | } |
133 | 142 | ||
134 | static void dequeue_pushable_task(struct rq *rq, struct task_struct *p) | 143 | static void dequeue_pushable_task(struct rq *rq, struct task_struct *p) |
135 | { | 144 | { |
136 | plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); | 145 | plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); |
137 | } | ||
138 | 146 | ||
139 | static inline int has_pushable_tasks(struct rq *rq) | 147 | /* Update the new highest prio pushable task */ |
140 | { | 148 | if (has_pushable_tasks(rq)) { |
141 | return !plist_head_empty(&rq->rt.pushable_tasks); | 149 | p = plist_first_entry(&rq->rt.pushable_tasks, |
150 | struct task_struct, pushable_tasks); | ||
151 | rq->rt.highest_prio.next = p->prio; | ||
152 | } else | ||
153 | rq->rt.highest_prio.next = MAX_RT_PRIO; | ||
142 | } | 154 | } |
143 | 155 | ||
144 | #else | 156 | #else |
@@ -548,6 +560,9 @@ static int balance_runtime(struct rt_rq *rt_rq) | |||
548 | { | 560 | { |
549 | int more = 0; | 561 | int more = 0; |
550 | 562 | ||
563 | if (!sched_feat(RT_RUNTIME_SHARE)) | ||
564 | return more; | ||
565 | |||
551 | if (rt_rq->rt_time > rt_rq->rt_runtime) { | 566 | if (rt_rq->rt_time > rt_rq->rt_runtime) { |
552 | raw_spin_unlock(&rt_rq->rt_runtime_lock); | 567 | raw_spin_unlock(&rt_rq->rt_runtime_lock); |
553 | more = do_balance_runtime(rt_rq); | 568 | more = do_balance_runtime(rt_rq); |
@@ -643,6 +658,7 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) | |||
643 | 658 | ||
644 | if (rt_rq->rt_time > runtime) { | 659 | if (rt_rq->rt_time > runtime) { |
645 | rt_rq->rt_throttled = 1; | 660 | rt_rq->rt_throttled = 1; |
661 | printk_once(KERN_WARNING "sched: RT throttling activated\n"); | ||
646 | if (rt_rq_throttled(rt_rq)) { | 662 | if (rt_rq_throttled(rt_rq)) { |
647 | sched_rt_rq_dequeue(rt_rq); | 663 | sched_rt_rq_dequeue(rt_rq); |
648 | return 1; | 664 | return 1; |
@@ -698,47 +714,13 @@ static void update_curr_rt(struct rq *rq) | |||
698 | 714 | ||
699 | #if defined CONFIG_SMP | 715 | #if defined CONFIG_SMP |
700 | 716 | ||
701 | static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu); | ||
702 | |||
703 | static inline int next_prio(struct rq *rq) | ||
704 | { | ||
705 | struct task_struct *next = pick_next_highest_task_rt(rq, rq->cpu); | ||
706 | |||
707 | if (next && rt_prio(next->prio)) | ||
708 | return next->prio; | ||
709 | else | ||
710 | return MAX_RT_PRIO; | ||
711 | } | ||
712 | |||
713 | static void | 717 | static void |
714 | inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) | 718 | inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) |
715 | { | 719 | { |
716 | struct rq *rq = rq_of_rt_rq(rt_rq); | 720 | struct rq *rq = rq_of_rt_rq(rt_rq); |
717 | 721 | ||
718 | if (prio < prev_prio) { | 722 | if (rq->online && prio < prev_prio) |
719 | 723 | cpupri_set(&rq->rd->cpupri, rq->cpu, prio); | |
720 | /* | ||
721 | * If the new task is higher in priority than anything on the | ||
722 | * run-queue, we know that the previous high becomes our | ||
723 | * next-highest. | ||
724 | */ | ||
725 | rt_rq->highest_prio.next = prev_prio; | ||
726 | |||
727 | if (rq->online) | ||
728 | cpupri_set(&rq->rd->cpupri, rq->cpu, prio); | ||
729 | |||
730 | } else if (prio == rt_rq->highest_prio.curr) | ||
731 | /* | ||
732 | * If the next task is equal in priority to the highest on | ||
733 | * the run-queue, then we implicitly know that the next highest | ||
734 | * task cannot be any lower than current | ||
735 | */ | ||
736 | rt_rq->highest_prio.next = prio; | ||
737 | else if (prio < rt_rq->highest_prio.next) | ||
738 | /* | ||
739 | * Otherwise, we need to recompute next-highest | ||
740 | */ | ||
741 | rt_rq->highest_prio.next = next_prio(rq); | ||
742 | } | 724 | } |
743 | 725 | ||
744 | static void | 726 | static void |
@@ -746,9 +728,6 @@ dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) | |||
746 | { | 728 | { |
747 | struct rq *rq = rq_of_rt_rq(rt_rq); | 729 | struct rq *rq = rq_of_rt_rq(rt_rq); |
748 | 730 | ||
749 | if (rt_rq->rt_nr_running && (prio <= rt_rq->highest_prio.next)) | ||
750 | rt_rq->highest_prio.next = next_prio(rq); | ||
751 | |||
752 | if (rq->online && rt_rq->highest_prio.curr != prev_prio) | 731 | if (rq->online && rt_rq->highest_prio.curr != prev_prio) |
753 | cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr); | 732 | cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr); |
754 | } | 733 | } |
@@ -961,6 +940,8 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) | |||
961 | 940 | ||
962 | if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1) | 941 | if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1) |
963 | enqueue_pushable_task(rq, p); | 942 | enqueue_pushable_task(rq, p); |
943 | |||
944 | inc_nr_running(rq); | ||
964 | } | 945 | } |
965 | 946 | ||
966 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) | 947 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) |
@@ -971,6 +952,8 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) | |||
971 | dequeue_rt_entity(rt_se); | 952 | dequeue_rt_entity(rt_se); |
972 | 953 | ||
973 | dequeue_pushable_task(rq, p); | 954 | dequeue_pushable_task(rq, p); |
955 | |||
956 | dec_nr_running(rq); | ||
974 | } | 957 | } |
975 | 958 | ||
976 | /* | 959 | /* |
@@ -1017,10 +1000,12 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) | |||
1017 | struct rq *rq; | 1000 | struct rq *rq; |
1018 | int cpu; | 1001 | int cpu; |
1019 | 1002 | ||
1020 | if (sd_flag != SD_BALANCE_WAKE) | ||
1021 | return smp_processor_id(); | ||
1022 | |||
1023 | cpu = task_cpu(p); | 1003 | cpu = task_cpu(p); |
1004 | |||
1005 | /* For anything but wake ups, just return the task_cpu */ | ||
1006 | if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK) | ||
1007 | goto out; | ||
1008 | |||
1024 | rq = cpu_rq(cpu); | 1009 | rq = cpu_rq(cpu); |
1025 | 1010 | ||
1026 | rcu_read_lock(); | 1011 | rcu_read_lock(); |
@@ -1050,7 +1035,7 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) | |||
1050 | */ | 1035 | */ |
1051 | if (curr && unlikely(rt_task(curr)) && | 1036 | if (curr && unlikely(rt_task(curr)) && |
1052 | (curr->rt.nr_cpus_allowed < 2 || | 1037 | (curr->rt.nr_cpus_allowed < 2 || |
1053 | curr->prio < p->prio) && | 1038 | curr->prio <= p->prio) && |
1054 | (p->rt.nr_cpus_allowed > 1)) { | 1039 | (p->rt.nr_cpus_allowed > 1)) { |
1055 | int target = find_lowest_rq(p); | 1040 | int target = find_lowest_rq(p); |
1056 | 1041 | ||
@@ -1059,6 +1044,7 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) | |||
1059 | } | 1044 | } |
1060 | rcu_read_unlock(); | 1045 | rcu_read_unlock(); |
1061 | 1046 | ||
1047 | out: | ||
1062 | return cpu; | 1048 | return cpu; |
1063 | } | 1049 | } |
1064 | 1050 | ||
@@ -1178,7 +1164,6 @@ static struct task_struct *pick_next_task_rt(struct rq *rq) | |||
1178 | static void put_prev_task_rt(struct rq *rq, struct task_struct *p) | 1164 | static void put_prev_task_rt(struct rq *rq, struct task_struct *p) |
1179 | { | 1165 | { |
1180 | update_curr_rt(rq); | 1166 | update_curr_rt(rq); |
1181 | p->se.exec_start = 0; | ||
1182 | 1167 | ||
1183 | /* | 1168 | /* |
1184 | * The previous task needs to be made eligible for pushing | 1169 | * The previous task needs to be made eligible for pushing |
@@ -1198,7 +1183,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); | |||
1198 | static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) | 1183 | static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) |
1199 | { | 1184 | { |
1200 | if (!task_running(rq, p) && | 1185 | if (!task_running(rq, p) && |
1201 | (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) && | 1186 | (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) && |
1202 | (p->rt.nr_cpus_allowed > 1)) | 1187 | (p->rt.nr_cpus_allowed > 1)) |
1203 | return 1; | 1188 | return 1; |
1204 | return 0; | 1189 | return 0; |
@@ -1343,7 +1328,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) | |||
1343 | */ | 1328 | */ |
1344 | if (unlikely(task_rq(task) != rq || | 1329 | if (unlikely(task_rq(task) != rq || |
1345 | !cpumask_test_cpu(lowest_rq->cpu, | 1330 | !cpumask_test_cpu(lowest_rq->cpu, |
1346 | &task->cpus_allowed) || | 1331 | tsk_cpus_allowed(task)) || |
1347 | task_running(rq, task) || | 1332 | task_running(rq, task) || |
1348 | !task->on_rq)) { | 1333 | !task->on_rq)) { |
1349 | 1334 | ||
@@ -1394,6 +1379,7 @@ static int push_rt_task(struct rq *rq) | |||
1394 | { | 1379 | { |
1395 | struct task_struct *next_task; | 1380 | struct task_struct *next_task; |
1396 | struct rq *lowest_rq; | 1381 | struct rq *lowest_rq; |
1382 | int ret = 0; | ||
1397 | 1383 | ||
1398 | if (!rq->rt.overloaded) | 1384 | if (!rq->rt.overloaded) |
1399 | return 0; | 1385 | return 0; |
@@ -1426,7 +1412,7 @@ retry: | |||
1426 | if (!lowest_rq) { | 1412 | if (!lowest_rq) { |
1427 | struct task_struct *task; | 1413 | struct task_struct *task; |
1428 | /* | 1414 | /* |
1429 | * find lock_lowest_rq releases rq->lock | 1415 | * find_lock_lowest_rq releases rq->lock |
1430 | * so it is possible that next_task has migrated. | 1416 | * so it is possible that next_task has migrated. |
1431 | * | 1417 | * |
1432 | * We need to make sure that the task is still on the same | 1418 | * We need to make sure that the task is still on the same |
@@ -1436,12 +1422,11 @@ retry: | |||
1436 | task = pick_next_pushable_task(rq); | 1422 | task = pick_next_pushable_task(rq); |
1437 | if (task_cpu(next_task) == rq->cpu && task == next_task) { | 1423 | if (task_cpu(next_task) == rq->cpu && task == next_task) { |
1438 | /* | 1424 | /* |
1439 | * If we get here, the task hasn't moved at all, but | 1425 | * The task hasn't migrated, and is still the next |
1440 | * it has failed to push. We will not try again, | 1426 | * eligible task, but we failed to find a run-queue |
1441 | * since the other cpus will pull from us when they | 1427 | * to push it to. Do not retry in this case, since |
1442 | * are ready. | 1428 | * other cpus will pull from us when ready. |
1443 | */ | 1429 | */ |
1444 | dequeue_pushable_task(rq, next_task); | ||
1445 | goto out; | 1430 | goto out; |
1446 | } | 1431 | } |
1447 | 1432 | ||
@@ -1460,6 +1445,7 @@ retry: | |||
1460 | deactivate_task(rq, next_task, 0); | 1445 | deactivate_task(rq, next_task, 0); |
1461 | set_task_cpu(next_task, lowest_rq->cpu); | 1446 | set_task_cpu(next_task, lowest_rq->cpu); |
1462 | activate_task(lowest_rq, next_task, 0); | 1447 | activate_task(lowest_rq, next_task, 0); |
1448 | ret = 1; | ||
1463 | 1449 | ||
1464 | resched_task(lowest_rq->curr); | 1450 | resched_task(lowest_rq->curr); |
1465 | 1451 | ||
@@ -1468,7 +1454,7 @@ retry: | |||
1468 | out: | 1454 | out: |
1469 | put_task_struct(next_task); | 1455 | put_task_struct(next_task); |
1470 | 1456 | ||
1471 | return 1; | 1457 | return ret; |
1472 | } | 1458 | } |
1473 | 1459 | ||
1474 | static void push_rt_tasks(struct rq *rq) | 1460 | static void push_rt_tasks(struct rq *rq) |
@@ -1581,7 +1567,7 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p) | |||
1581 | p->rt.nr_cpus_allowed > 1 && | 1567 | p->rt.nr_cpus_allowed > 1 && |
1582 | rt_task(rq->curr) && | 1568 | rt_task(rq->curr) && |
1583 | (rq->curr->rt.nr_cpus_allowed < 2 || | 1569 | (rq->curr->rt.nr_cpus_allowed < 2 || |
1584 | rq->curr->prio < p->prio)) | 1570 | rq->curr->prio <= p->prio)) |
1585 | push_rt_tasks(rq); | 1571 | push_rt_tasks(rq); |
1586 | } | 1572 | } |
1587 | 1573 | ||
@@ -1626,9 +1612,6 @@ static void set_cpus_allowed_rt(struct task_struct *p, | |||
1626 | 1612 | ||
1627 | update_rt_migration(&rq->rt); | 1613 | update_rt_migration(&rq->rt); |
1628 | } | 1614 | } |
1629 | |||
1630 | cpumask_copy(&p->cpus_allowed, new_mask); | ||
1631 | p->rt.nr_cpus_allowed = weight; | ||
1632 | } | 1615 | } |
1633 | 1616 | ||
1634 | /* Assumes rq->lock is held */ | 1617 | /* Assumes rq->lock is held */ |
@@ -1863,4 +1846,3 @@ static void print_rt_stats(struct seq_file *m, int cpu) | |||
1863 | rcu_read_unlock(); | 1846 | rcu_read_unlock(); |
1864 | } | 1847 | } |
1865 | #endif /* CONFIG_SCHED_DEBUG */ | 1848 | #endif /* CONFIG_SCHED_DEBUG */ |
1866 | |||
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index 331e01bcd02..87f9e36ea56 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h | |||
@@ -282,10 +282,10 @@ static inline void account_group_user_time(struct task_struct *tsk, | |||
282 | if (!cputimer->running) | 282 | if (!cputimer->running) |
283 | return; | 283 | return; |
284 | 284 | ||
285 | spin_lock(&cputimer->lock); | 285 | raw_spin_lock(&cputimer->lock); |
286 | cputimer->cputime.utime = | 286 | cputimer->cputime.utime = |
287 | cputime_add(cputimer->cputime.utime, cputime); | 287 | cputime_add(cputimer->cputime.utime, cputime); |
288 | spin_unlock(&cputimer->lock); | 288 | raw_spin_unlock(&cputimer->lock); |
289 | } | 289 | } |
290 | 290 | ||
291 | /** | 291 | /** |
@@ -306,10 +306,10 @@ static inline void account_group_system_time(struct task_struct *tsk, | |||
306 | if (!cputimer->running) | 306 | if (!cputimer->running) |
307 | return; | 307 | return; |
308 | 308 | ||
309 | spin_lock(&cputimer->lock); | 309 | raw_spin_lock(&cputimer->lock); |
310 | cputimer->cputime.stime = | 310 | cputimer->cputime.stime = |
311 | cputime_add(cputimer->cputime.stime, cputime); | 311 | cputime_add(cputimer->cputime.stime, cputime); |
312 | spin_unlock(&cputimer->lock); | 312 | raw_spin_unlock(&cputimer->lock); |
313 | } | 313 | } |
314 | 314 | ||
315 | /** | 315 | /** |
@@ -330,7 +330,7 @@ static inline void account_group_exec_runtime(struct task_struct *tsk, | |||
330 | if (!cputimer->running) | 330 | if (!cputimer->running) |
331 | return; | 331 | return; |
332 | 332 | ||
333 | spin_lock(&cputimer->lock); | 333 | raw_spin_lock(&cputimer->lock); |
334 | cputimer->cputime.sum_exec_runtime += ns; | 334 | cputimer->cputime.sum_exec_runtime += ns; |
335 | spin_unlock(&cputimer->lock); | 335 | raw_spin_unlock(&cputimer->lock); |
336 | } | 336 | } |
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c index 6f437632afa..8b44e7fa7fb 100644 --- a/kernel/sched_stoptask.c +++ b/kernel/sched_stoptask.c | |||
@@ -34,11 +34,13 @@ static struct task_struct *pick_next_task_stop(struct rq *rq) | |||
34 | static void | 34 | static void |
35 | enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags) | 35 | enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags) |
36 | { | 36 | { |
37 | inc_nr_running(rq); | ||
37 | } | 38 | } |
38 | 39 | ||
39 | static void | 40 | static void |
40 | dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags) | 41 | dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags) |
41 | { | 42 | { |
43 | dec_nr_running(rq); | ||
42 | } | 44 | } |
43 | 45 | ||
44 | static void yield_task_stop(struct rq *rq) | 46 | static void yield_task_stop(struct rq *rq) |
diff --git a/kernel/semaphore.c b/kernel/semaphore.c index 94a62c0d4ad..60636a4e25c 100644 --- a/kernel/semaphore.c +++ b/kernel/semaphore.c | |||
@@ -27,7 +27,7 @@ | |||
27 | 27 | ||
28 | #include <linux/compiler.h> | 28 | #include <linux/compiler.h> |
29 | #include <linux/kernel.h> | 29 | #include <linux/kernel.h> |
30 | #include <linux/module.h> | 30 | #include <linux/export.h> |
31 | #include <linux/sched.h> | 31 | #include <linux/sched.h> |
32 | #include <linux/semaphore.h> | 32 | #include <linux/semaphore.h> |
33 | #include <linux/spinlock.h> | 33 | #include <linux/spinlock.h> |
@@ -54,12 +54,12 @@ void down(struct semaphore *sem) | |||
54 | { | 54 | { |
55 | unsigned long flags; | 55 | unsigned long flags; |
56 | 56 | ||
57 | spin_lock_irqsave(&sem->lock, flags); | 57 | raw_spin_lock_irqsave(&sem->lock, flags); |
58 | if (likely(sem->count > 0)) | 58 | if (likely(sem->count > 0)) |
59 | sem->count--; | 59 | sem->count--; |
60 | else | 60 | else |
61 | __down(sem); | 61 | __down(sem); |
62 | spin_unlock_irqrestore(&sem->lock, flags); | 62 | raw_spin_unlock_irqrestore(&sem->lock, flags); |
63 | } | 63 | } |
64 | EXPORT_SYMBOL(down); | 64 | EXPORT_SYMBOL(down); |
65 | 65 | ||
@@ -77,12 +77,12 @@ int down_interruptible(struct semaphore *sem) | |||
77 | unsigned long flags; | 77 | unsigned long flags; |
78 | int result = 0; | 78 | int result = 0; |
79 | 79 | ||
80 | spin_lock_irqsave(&sem->lock, flags); | 80 | raw_spin_lock_irqsave(&sem->lock, flags); |
81 | if (likely(sem->count > 0)) | 81 | if (likely(sem->count > 0)) |
82 | sem->count--; | 82 | sem->count--; |
83 | else | 83 | else |
84 | result = __down_interruptible(sem); | 84 | result = __down_interruptible(sem); |
85 | spin_unlock_irqrestore(&sem->lock, flags); | 85 | raw_spin_unlock_irqrestore(&sem->lock, flags); |
86 | 86 | ||
87 | return result; | 87 | return result; |
88 | } | 88 | } |
@@ -103,12 +103,12 @@ int down_killable(struct semaphore *sem) | |||
103 | unsigned long flags; | 103 | unsigned long flags; |
104 | int result = 0; | 104 | int result = 0; |
105 | 105 | ||
106 | spin_lock_irqsave(&sem->lock, flags); | 106 | raw_spin_lock_irqsave(&sem->lock, flags); |
107 | if (likely(sem->count > 0)) | 107 | if (likely(sem->count > 0)) |
108 | sem->count--; | 108 | sem->count--; |
109 | else | 109 | else |
110 | result = __down_killable(sem); | 110 | result = __down_killable(sem); |
111 | spin_unlock_irqrestore(&sem->lock, flags); | 111 | raw_spin_unlock_irqrestore(&sem->lock, flags); |
112 | 112 | ||
113 | return result; | 113 | return result; |
114 | } | 114 | } |
@@ -132,11 +132,11 @@ int down_trylock(struct semaphore *sem) | |||
132 | unsigned long flags; | 132 | unsigned long flags; |
133 | int count; | 133 | int count; |
134 | 134 | ||
135 | spin_lock_irqsave(&sem->lock, flags); | 135 | raw_spin_lock_irqsave(&sem->lock, flags); |
136 | count = sem->count - 1; | 136 | count = sem->count - 1; |
137 | if (likely(count >= 0)) | 137 | if (likely(count >= 0)) |
138 | sem->count = count; | 138 | sem->count = count; |
139 | spin_unlock_irqrestore(&sem->lock, flags); | 139 | raw_spin_unlock_irqrestore(&sem->lock, flags); |
140 | 140 | ||
141 | return (count < 0); | 141 | return (count < 0); |
142 | } | 142 | } |
@@ -157,12 +157,12 @@ int down_timeout(struct semaphore *sem, long jiffies) | |||
157 | unsigned long flags; | 157 | unsigned long flags; |
158 | int result = 0; | 158 | int result = 0; |
159 | 159 | ||
160 | spin_lock_irqsave(&sem->lock, flags); | 160 | raw_spin_lock_irqsave(&sem->lock, flags); |
161 | if (likely(sem->count > 0)) | 161 | if (likely(sem->count > 0)) |
162 | sem->count--; | 162 | sem->count--; |
163 | else | 163 | else |
164 | result = __down_timeout(sem, jiffies); | 164 | result = __down_timeout(sem, jiffies); |
165 | spin_unlock_irqrestore(&sem->lock, flags); | 165 | raw_spin_unlock_irqrestore(&sem->lock, flags); |
166 | 166 | ||
167 | return result; | 167 | return result; |
168 | } | 168 | } |
@@ -179,12 +179,12 @@ void up(struct semaphore *sem) | |||
179 | { | 179 | { |
180 | unsigned long flags; | 180 | unsigned long flags; |
181 | 181 | ||
182 | spin_lock_irqsave(&sem->lock, flags); | 182 | raw_spin_lock_irqsave(&sem->lock, flags); |
183 | if (likely(list_empty(&sem->wait_list))) | 183 | if (likely(list_empty(&sem->wait_list))) |
184 | sem->count++; | 184 | sem->count++; |
185 | else | 185 | else |
186 | __up(sem); | 186 | __up(sem); |
187 | spin_unlock_irqrestore(&sem->lock, flags); | 187 | raw_spin_unlock_irqrestore(&sem->lock, flags); |
188 | } | 188 | } |
189 | EXPORT_SYMBOL(up); | 189 | EXPORT_SYMBOL(up); |
190 | 190 | ||
@@ -217,9 +217,9 @@ static inline int __sched __down_common(struct semaphore *sem, long state, | |||
217 | if (timeout <= 0) | 217 | if (timeout <= 0) |
218 | goto timed_out; | 218 | goto timed_out; |
219 | __set_task_state(task, state); | 219 | __set_task_state(task, state); |
220 | spin_unlock_irq(&sem->lock); | 220 | raw_spin_unlock_irq(&sem->lock); |
221 | timeout = schedule_timeout(timeout); | 221 | timeout = schedule_timeout(timeout); |
222 | spin_lock_irq(&sem->lock); | 222 | raw_spin_lock_irq(&sem->lock); |
223 | if (waiter.up) | 223 | if (waiter.up) |
224 | return 0; | 224 | return 0; |
225 | } | 225 | } |
diff --git a/kernel/signal.c b/kernel/signal.c index 291c9700be7..b3f78d09a10 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -11,7 +11,7 @@ | |||
11 | */ | 11 | */ |
12 | 12 | ||
13 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
14 | #include <linux/module.h> | 14 | #include <linux/export.h> |
15 | #include <linux/init.h> | 15 | #include <linux/init.h> |
16 | #include <linux/sched.h> | 16 | #include <linux/sched.h> |
17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
@@ -1344,13 +1344,24 @@ int kill_proc_info(int sig, struct siginfo *info, pid_t pid) | |||
1344 | return error; | 1344 | return error; |
1345 | } | 1345 | } |
1346 | 1346 | ||
1347 | static int kill_as_cred_perm(const struct cred *cred, | ||
1348 | struct task_struct *target) | ||
1349 | { | ||
1350 | const struct cred *pcred = __task_cred(target); | ||
1351 | if (cred->user_ns != pcred->user_ns) | ||
1352 | return 0; | ||
1353 | if (cred->euid != pcred->suid && cred->euid != pcred->uid && | ||
1354 | cred->uid != pcred->suid && cred->uid != pcred->uid) | ||
1355 | return 0; | ||
1356 | return 1; | ||
1357 | } | ||
1358 | |||
1347 | /* like kill_pid_info(), but doesn't use uid/euid of "current" */ | 1359 | /* like kill_pid_info(), but doesn't use uid/euid of "current" */ |
1348 | int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid, | 1360 | int kill_pid_info_as_cred(int sig, struct siginfo *info, struct pid *pid, |
1349 | uid_t uid, uid_t euid, u32 secid) | 1361 | const struct cred *cred, u32 secid) |
1350 | { | 1362 | { |
1351 | int ret = -EINVAL; | 1363 | int ret = -EINVAL; |
1352 | struct task_struct *p; | 1364 | struct task_struct *p; |
1353 | const struct cred *pcred; | ||
1354 | unsigned long flags; | 1365 | unsigned long flags; |
1355 | 1366 | ||
1356 | if (!valid_signal(sig)) | 1367 | if (!valid_signal(sig)) |
@@ -1362,10 +1373,7 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid, | |||
1362 | ret = -ESRCH; | 1373 | ret = -ESRCH; |
1363 | goto out_unlock; | 1374 | goto out_unlock; |
1364 | } | 1375 | } |
1365 | pcred = __task_cred(p); | 1376 | if (si_fromuser(info) && !kill_as_cred_perm(cred, p)) { |
1366 | if (si_fromuser(info) && | ||
1367 | euid != pcred->suid && euid != pcred->uid && | ||
1368 | uid != pcred->suid && uid != pcred->uid) { | ||
1369 | ret = -EPERM; | 1377 | ret = -EPERM; |
1370 | goto out_unlock; | 1378 | goto out_unlock; |
1371 | } | 1379 | } |
@@ -1384,7 +1392,7 @@ out_unlock: | |||
1384 | rcu_read_unlock(); | 1392 | rcu_read_unlock(); |
1385 | return ret; | 1393 | return ret; |
1386 | } | 1394 | } |
1387 | EXPORT_SYMBOL_GPL(kill_pid_info_as_uid); | 1395 | EXPORT_SYMBOL_GPL(kill_pid_info_as_cred); |
1388 | 1396 | ||
1389 | /* | 1397 | /* |
1390 | * kill_something_info() interprets pid in interesting ways just like kill(2). | 1398 | * kill_something_info() interprets pid in interesting ways just like kill(2). |
diff --git a/kernel/smp.c b/kernel/smp.c index fb67dfa8394..db197d60489 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -6,7 +6,7 @@ | |||
6 | #include <linux/rcupdate.h> | 6 | #include <linux/rcupdate.h> |
7 | #include <linux/rculist.h> | 7 | #include <linux/rculist.h> |
8 | #include <linux/kernel.h> | 8 | #include <linux/kernel.h> |
9 | #include <linux/module.h> | 9 | #include <linux/export.h> |
10 | #include <linux/percpu.h> | 10 | #include <linux/percpu.h> |
11 | #include <linux/init.h> | 11 | #include <linux/init.h> |
12 | #include <linux/gfp.h> | 12 | #include <linux/gfp.h> |
diff --git a/kernel/softirq.c b/kernel/softirq.c index fca82c32042..2c71d91efff 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -10,7 +10,7 @@ | |||
10 | * Remote softirq infrastructure is by Jens Axboe. | 10 | * Remote softirq infrastructure is by Jens Axboe. |
11 | */ | 11 | */ |
12 | 12 | ||
13 | #include <linux/module.h> | 13 | #include <linux/export.h> |
14 | #include <linux/kernel_stat.h> | 14 | #include <linux/kernel_stat.h> |
15 | #include <linux/interrupt.h> | 15 | #include <linux/interrupt.h> |
16 | #include <linux/init.h> | 16 | #include <linux/init.h> |
diff --git a/kernel/spinlock.c b/kernel/spinlock.c index be6517fb9c1..84c7d96918b 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c | |||
@@ -19,7 +19,7 @@ | |||
19 | #include <linux/spinlock.h> | 19 | #include <linux/spinlock.h> |
20 | #include <linux/interrupt.h> | 20 | #include <linux/interrupt.h> |
21 | #include <linux/debug_locks.h> | 21 | #include <linux/debug_locks.h> |
22 | #include <linux/module.h> | 22 | #include <linux/export.h> |
23 | 23 | ||
24 | /* | 24 | /* |
25 | * If lockdep is enabled then we use the non-preemption spin-ops | 25 | * If lockdep is enabled then we use the non-preemption spin-ops |
diff --git a/kernel/srcu.c b/kernel/srcu.c index 73ce23feaea..0febf61e1aa 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c | |||
@@ -24,7 +24,7 @@ | |||
24 | * | 24 | * |
25 | */ | 25 | */ |
26 | 26 | ||
27 | #include <linux/module.h> | 27 | #include <linux/export.h> |
28 | #include <linux/mutex.h> | 28 | #include <linux/mutex.h> |
29 | #include <linux/percpu.h> | 29 | #include <linux/percpu.h> |
30 | #include <linux/preempt.h> | 30 | #include <linux/preempt.h> |
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c index d20c6983aad..00fe55cc5a8 100644 --- a/kernel/stacktrace.c +++ b/kernel/stacktrace.c | |||
@@ -7,7 +7,7 @@ | |||
7 | */ | 7 | */ |
8 | #include <linux/sched.h> | 8 | #include <linux/sched.h> |
9 | #include <linux/kernel.h> | 9 | #include <linux/kernel.h> |
10 | #include <linux/module.h> | 10 | #include <linux/export.h> |
11 | #include <linux/kallsyms.h> | 11 | #include <linux/kallsyms.h> |
12 | #include <linux/stacktrace.h> | 12 | #include <linux/stacktrace.h> |
13 | 13 | ||
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index ba5070ce576..2f194e96571 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
@@ -12,7 +12,7 @@ | |||
12 | #include <linux/cpu.h> | 12 | #include <linux/cpu.h> |
13 | #include <linux/init.h> | 13 | #include <linux/init.h> |
14 | #include <linux/kthread.h> | 14 | #include <linux/kthread.h> |
15 | #include <linux/module.h> | 15 | #include <linux/export.h> |
16 | #include <linux/percpu.h> | 16 | #include <linux/percpu.h> |
17 | #include <linux/sched.h> | 17 | #include <linux/sched.h> |
18 | #include <linux/stop_machine.h> | 18 | #include <linux/stop_machine.h> |
@@ -41,6 +41,7 @@ struct cpu_stopper { | |||
41 | }; | 41 | }; |
42 | 42 | ||
43 | static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper); | 43 | static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper); |
44 | static bool stop_machine_initialized = false; | ||
44 | 45 | ||
45 | static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo) | 46 | static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo) |
46 | { | 47 | { |
@@ -386,6 +387,8 @@ static int __init cpu_stop_init(void) | |||
386 | cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_ONLINE, bcpu); | 387 | cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_ONLINE, bcpu); |
387 | register_cpu_notifier(&cpu_stop_cpu_notifier); | 388 | register_cpu_notifier(&cpu_stop_cpu_notifier); |
388 | 389 | ||
390 | stop_machine_initialized = true; | ||
391 | |||
389 | return 0; | 392 | return 0; |
390 | } | 393 | } |
391 | early_initcall(cpu_stop_init); | 394 | early_initcall(cpu_stop_init); |
@@ -485,6 +488,25 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) | |||
485 | .num_threads = num_online_cpus(), | 488 | .num_threads = num_online_cpus(), |
486 | .active_cpus = cpus }; | 489 | .active_cpus = cpus }; |
487 | 490 | ||
491 | if (!stop_machine_initialized) { | ||
492 | /* | ||
493 | * Handle the case where stop_machine() is called | ||
494 | * early in boot before stop_machine() has been | ||
495 | * initialized. | ||
496 | */ | ||
497 | unsigned long flags; | ||
498 | int ret; | ||
499 | |||
500 | WARN_ON_ONCE(smdata.num_threads != 1); | ||
501 | |||
502 | local_irq_save(flags); | ||
503 | hard_irq_disable(); | ||
504 | ret = (*fn)(data); | ||
505 | local_irq_restore(flags); | ||
506 | |||
507 | return ret; | ||
508 | } | ||
509 | |||
488 | /* Set the initial state and stop all online cpus. */ | 510 | /* Set the initial state and stop all online cpus. */ |
489 | set_state(&smdata, STOPMACHINE_PREPARE); | 511 | set_state(&smdata, STOPMACHINE_PREPARE); |
490 | return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata); | 512 | return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata); |
diff --git a/kernel/sys.c b/kernel/sys.c index a101ba36c44..481611fbd07 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -4,7 +4,7 @@ | |||
4 | * Copyright (C) 1991, 1992 Linus Torvalds | 4 | * Copyright (C) 1991, 1992 Linus Torvalds |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include <linux/module.h> | 7 | #include <linux/export.h> |
8 | #include <linux/mm.h> | 8 | #include <linux/mm.h> |
9 | #include <linux/utsname.h> | 9 | #include <linux/utsname.h> |
10 | #include <linux/mman.h> | 10 | #include <linux/mman.h> |
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/prctl.h> | 12 | #include <linux/prctl.h> |
13 | #include <linux/highuid.h> | 13 | #include <linux/highuid.h> |
14 | #include <linux/fs.h> | 14 | #include <linux/fs.h> |
15 | #include <linux/kmod.h> | ||
15 | #include <linux/perf_event.h> | 16 | #include <linux/perf_event.h> |
16 | #include <linux/resource.h> | 17 | #include <linux/resource.h> |
17 | #include <linux/kernel.h> | 18 | #include <linux/kernel.h> |
@@ -37,6 +38,8 @@ | |||
37 | #include <linux/fs_struct.h> | 38 | #include <linux/fs_struct.h> |
38 | #include <linux/gfp.h> | 39 | #include <linux/gfp.h> |
39 | #include <linux/syscore_ops.h> | 40 | #include <linux/syscore_ops.h> |
41 | #include <linux/version.h> | ||
42 | #include <linux/ctype.h> | ||
40 | 43 | ||
41 | #include <linux/compat.h> | 44 | #include <linux/compat.h> |
42 | #include <linux/syscalls.h> | 45 | #include <linux/syscalls.h> |
@@ -44,6 +47,8 @@ | |||
44 | #include <linux/user_namespace.h> | 47 | #include <linux/user_namespace.h> |
45 | 48 | ||
46 | #include <linux/kmsg_dump.h> | 49 | #include <linux/kmsg_dump.h> |
50 | /* Move somewhere else to avoid recompiling? */ | ||
51 | #include <generated/utsrelease.h> | ||
47 | 52 | ||
48 | #include <asm/uaccess.h> | 53 | #include <asm/uaccess.h> |
49 | #include <asm/io.h> | 54 | #include <asm/io.h> |
@@ -621,11 +626,18 @@ static int set_user(struct cred *new) | |||
621 | if (!new_user) | 626 | if (!new_user) |
622 | return -EAGAIN; | 627 | return -EAGAIN; |
623 | 628 | ||
629 | /* | ||
630 | * We don't fail in case of NPROC limit excess here because too many | ||
631 | * poorly written programs don't check set*uid() return code, assuming | ||
632 | * it never fails if called by root. We may still enforce NPROC limit | ||
633 | * for programs doing set*uid()+execve() by harmlessly deferring the | ||
634 | * failure to the execve() stage. | ||
635 | */ | ||
624 | if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) && | 636 | if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) && |
625 | new_user != INIT_USER) { | 637 | new_user != INIT_USER) |
626 | free_uid(new_user); | 638 | current->flags |= PF_NPROC_EXCEEDED; |
627 | return -EAGAIN; | 639 | else |
628 | } | 640 | current->flags &= ~PF_NPROC_EXCEEDED; |
629 | 641 | ||
630 | free_uid(new->user); | 642 | free_uid(new->user); |
631 | new->user = new_user; | 643 | new->user = new_user; |
@@ -1154,6 +1166,34 @@ DECLARE_RWSEM(uts_sem); | |||
1154 | #define override_architecture(name) 0 | 1166 | #define override_architecture(name) 0 |
1155 | #endif | 1167 | #endif |
1156 | 1168 | ||
1169 | /* | ||
1170 | * Work around broken programs that cannot handle "Linux 3.0". | ||
1171 | * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40 | ||
1172 | */ | ||
1173 | static int override_release(char __user *release, int len) | ||
1174 | { | ||
1175 | int ret = 0; | ||
1176 | char buf[65]; | ||
1177 | |||
1178 | if (current->personality & UNAME26) { | ||
1179 | char *rest = UTS_RELEASE; | ||
1180 | int ndots = 0; | ||
1181 | unsigned v; | ||
1182 | |||
1183 | while (*rest) { | ||
1184 | if (*rest == '.' && ++ndots >= 3) | ||
1185 | break; | ||
1186 | if (!isdigit(*rest) && *rest != '.') | ||
1187 | break; | ||
1188 | rest++; | ||
1189 | } | ||
1190 | v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40; | ||
1191 | snprintf(buf, len, "2.6.%u%s", v, rest); | ||
1192 | ret = copy_to_user(release, buf, len); | ||
1193 | } | ||
1194 | return ret; | ||
1195 | } | ||
1196 | |||
1157 | SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) | 1197 | SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) |
1158 | { | 1198 | { |
1159 | int errno = 0; | 1199 | int errno = 0; |
@@ -1163,6 +1203,8 @@ SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) | |||
1163 | errno = -EFAULT; | 1203 | errno = -EFAULT; |
1164 | up_read(&uts_sem); | 1204 | up_read(&uts_sem); |
1165 | 1205 | ||
1206 | if (!errno && override_release(name->release, sizeof(name->release))) | ||
1207 | errno = -EFAULT; | ||
1166 | if (!errno && override_architecture(name)) | 1208 | if (!errno && override_architecture(name)) |
1167 | errno = -EFAULT; | 1209 | errno = -EFAULT; |
1168 | return errno; | 1210 | return errno; |
@@ -1184,6 +1226,8 @@ SYSCALL_DEFINE1(uname, struct old_utsname __user *, name) | |||
1184 | error = -EFAULT; | 1226 | error = -EFAULT; |
1185 | up_read(&uts_sem); | 1227 | up_read(&uts_sem); |
1186 | 1228 | ||
1229 | if (!error && override_release(name->release, sizeof(name->release))) | ||
1230 | error = -EFAULT; | ||
1187 | if (!error && override_architecture(name)) | 1231 | if (!error && override_architecture(name)) |
1188 | error = -EFAULT; | 1232 | error = -EFAULT; |
1189 | return error; | 1233 | return error; |
@@ -1218,6 +1262,8 @@ SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name) | |||
1218 | 1262 | ||
1219 | if (!error && override_architecture(name)) | 1263 | if (!error && override_architecture(name)) |
1220 | error = -EFAULT; | 1264 | error = -EFAULT; |
1265 | if (!error && override_release(name->release, sizeof(name->release))) | ||
1266 | error = -EFAULT; | ||
1221 | return error ? -EFAULT : 0; | 1267 | return error ? -EFAULT : 0; |
1222 | } | 1268 | } |
1223 | #endif | 1269 | #endif |
@@ -1241,6 +1287,7 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) | |||
1241 | memset(u->nodename + len, 0, sizeof(u->nodename) - len); | 1287 | memset(u->nodename + len, 0, sizeof(u->nodename) - len); |
1242 | errno = 0; | 1288 | errno = 0; |
1243 | } | 1289 | } |
1290 | uts_proc_notify(UTS_PROC_HOSTNAME); | ||
1244 | up_write(&uts_sem); | 1291 | up_write(&uts_sem); |
1245 | return errno; | 1292 | return errno; |
1246 | } | 1293 | } |
@@ -1291,6 +1338,7 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) | |||
1291 | memset(u->domainname + len, 0, sizeof(u->domainname) - len); | 1338 | memset(u->domainname + len, 0, sizeof(u->domainname) - len); |
1292 | errno = 0; | 1339 | errno = 0; |
1293 | } | 1340 | } |
1341 | uts_proc_notify(UTS_PROC_DOMAINNAME); | ||
1294 | up_write(&uts_sem); | 1342 | up_write(&uts_sem); |
1295 | return errno; | 1343 | return errno; |
1296 | } | 1344 | } |
@@ -1714,6 +1762,7 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, | |||
1714 | sizeof(me->comm) - 1) < 0) | 1762 | sizeof(me->comm) - 1) < 0) |
1715 | return -EFAULT; | 1763 | return -EFAULT; |
1716 | set_task_comm(me, comm); | 1764 | set_task_comm(me, comm); |
1765 | proc_comm_connector(me); | ||
1717 | return 0; | 1766 | return 0; |
1718 | case PR_GET_NAME: | 1767 | case PR_GET_NAME: |
1719 | get_task_comm(comm, me); | 1768 | get_task_comm(comm, me); |
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 62cbc8877fe..47bfa16430d 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c | |||
@@ -16,7 +16,6 @@ asmlinkage long sys_ni_syscall(void) | |||
16 | return -ENOSYS; | 16 | return -ENOSYS; |
17 | } | 17 | } |
18 | 18 | ||
19 | cond_syscall(sys_nfsservctl); | ||
20 | cond_syscall(sys_quotactl); | 19 | cond_syscall(sys_quotactl); |
21 | cond_syscall(sys32_quotactl); | 20 | cond_syscall(sys32_quotactl); |
22 | cond_syscall(sys_acct); | 21 | cond_syscall(sys_acct); |
@@ -146,6 +145,10 @@ cond_syscall(sys_io_submit); | |||
146 | cond_syscall(sys_io_cancel); | 145 | cond_syscall(sys_io_cancel); |
147 | cond_syscall(sys_io_getevents); | 146 | cond_syscall(sys_io_getevents); |
148 | cond_syscall(sys_syslog); | 147 | cond_syscall(sys_syslog); |
148 | cond_syscall(sys_process_vm_readv); | ||
149 | cond_syscall(sys_process_vm_writev); | ||
150 | cond_syscall(compat_sys_process_vm_readv); | ||
151 | cond_syscall(compat_sys_process_vm_writev); | ||
149 | 152 | ||
150 | /* arch-specific weak syscall entries */ | 153 | /* arch-specific weak syscall entries */ |
151 | cond_syscall(sys_pciconfig_read); | 154 | cond_syscall(sys_pciconfig_read); |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 11d65b531e5..ae271964385 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -57,6 +57,7 @@ | |||
57 | #include <linux/pipe_fs_i.h> | 57 | #include <linux/pipe_fs_i.h> |
58 | #include <linux/oom.h> | 58 | #include <linux/oom.h> |
59 | #include <linux/kmod.h> | 59 | #include <linux/kmod.h> |
60 | #include <linux/capability.h> | ||
60 | 61 | ||
61 | #include <asm/uaccess.h> | 62 | #include <asm/uaccess.h> |
62 | #include <asm/processor.h> | 63 | #include <asm/processor.h> |
@@ -134,6 +135,7 @@ static int minolduid; | |||
134 | static int min_percpu_pagelist_fract = 8; | 135 | static int min_percpu_pagelist_fract = 8; |
135 | 136 | ||
136 | static int ngroups_max = NGROUPS_MAX; | 137 | static int ngroups_max = NGROUPS_MAX; |
138 | static const int cap_last_cap = CAP_LAST_CAP; | ||
137 | 139 | ||
138 | #ifdef CONFIG_INOTIFY_USER | 140 | #ifdef CONFIG_INOTIFY_USER |
139 | #include <linux/inotify.h> | 141 | #include <linux/inotify.h> |
@@ -151,14 +153,6 @@ extern int pwrsw_enabled; | |||
151 | extern int unaligned_enabled; | 153 | extern int unaligned_enabled; |
152 | #endif | 154 | #endif |
153 | 155 | ||
154 | #ifdef CONFIG_S390 | ||
155 | #ifdef CONFIG_MATHEMU | ||
156 | extern int sysctl_ieee_emulation_warnings; | ||
157 | #endif | ||
158 | extern int sysctl_userprocess_debug; | ||
159 | extern int spin_retry; | ||
160 | #endif | ||
161 | |||
162 | #ifdef CONFIG_IA64 | 156 | #ifdef CONFIG_IA64 |
163 | extern int no_unaligned_warning; | 157 | extern int no_unaligned_warning; |
164 | extern int unaligned_dump_stack; | 158 | extern int unaligned_dump_stack; |
@@ -379,6 +373,16 @@ static struct ctl_table kern_table[] = { | |||
379 | .extra2 = &one, | 373 | .extra2 = &one, |
380 | }, | 374 | }, |
381 | #endif | 375 | #endif |
376 | #ifdef CONFIG_CFS_BANDWIDTH | ||
377 | { | ||
378 | .procname = "sched_cfs_bandwidth_slice_us", | ||
379 | .data = &sysctl_sched_cfs_bandwidth_slice, | ||
380 | .maxlen = sizeof(unsigned int), | ||
381 | .mode = 0644, | ||
382 | .proc_handler = proc_dointvec_minmax, | ||
383 | .extra1 = &one, | ||
384 | }, | ||
385 | #endif | ||
382 | #ifdef CONFIG_PROVE_LOCKING | 386 | #ifdef CONFIG_PROVE_LOCKING |
383 | { | 387 | { |
384 | .procname = "prove_locking", | 388 | .procname = "prove_locking", |
@@ -730,6 +734,13 @@ static struct ctl_table kern_table[] = { | |||
730 | .mode = 0444, | 734 | .mode = 0444, |
731 | .proc_handler = proc_dointvec, | 735 | .proc_handler = proc_dointvec, |
732 | }, | 736 | }, |
737 | { | ||
738 | .procname = "cap_last_cap", | ||
739 | .data = (void *)&cap_last_cap, | ||
740 | .maxlen = sizeof(int), | ||
741 | .mode = 0444, | ||
742 | .proc_handler = proc_dointvec, | ||
743 | }, | ||
733 | #if defined(CONFIG_LOCKUP_DETECTOR) | 744 | #if defined(CONFIG_LOCKUP_DETECTOR) |
734 | { | 745 | { |
735 | .procname = "watchdog", | 746 | .procname = "watchdog", |
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 3b8e028b960..6318b511afa 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c | |||
@@ -1,6 +1,6 @@ | |||
1 | #include <linux/stat.h> | 1 | #include <linux/stat.h> |
2 | #include <linux/sysctl.h> | 2 | #include <linux/sysctl.h> |
3 | #include "../fs/xfs/linux-2.6/xfs_sysctl.h" | 3 | #include "../fs/xfs/xfs_sysctl.h" |
4 | #include <linux/sunrpc/debug.h> | 4 | #include <linux/sunrpc/debug.h> |
5 | #include <linux/string.h> | 5 | #include <linux/string.h> |
6 | #include <net/ip_vs.h> | 6 | #include <net/ip_vs.h> |
@@ -214,7 +214,7 @@ static const struct bin_table bin_net_ipv4_route_table[] = { | |||
214 | { CTL_INT, NET_IPV4_ROUTE_GC_MIN_INTERVAL, "gc_min_interval" }, | 214 | { CTL_INT, NET_IPV4_ROUTE_GC_MIN_INTERVAL, "gc_min_interval" }, |
215 | { CTL_INT, NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS, "gc_min_interval_ms" }, | 215 | { CTL_INT, NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS, "gc_min_interval_ms" }, |
216 | { CTL_INT, NET_IPV4_ROUTE_GC_TIMEOUT, "gc_timeout" }, | 216 | { CTL_INT, NET_IPV4_ROUTE_GC_TIMEOUT, "gc_timeout" }, |
217 | { CTL_INT, NET_IPV4_ROUTE_GC_INTERVAL, "gc_interval" }, | 217 | /* NET_IPV4_ROUTE_GC_INTERVAL "gc_interval" no longer used */ |
218 | { CTL_INT, NET_IPV4_ROUTE_REDIRECT_LOAD, "redirect_load" }, | 218 | { CTL_INT, NET_IPV4_ROUTE_REDIRECT_LOAD, "redirect_load" }, |
219 | { CTL_INT, NET_IPV4_ROUTE_REDIRECT_NUMBER, "redirect_number" }, | 219 | { CTL_INT, NET_IPV4_ROUTE_REDIRECT_NUMBER, "redirect_number" }, |
220 | { CTL_INT, NET_IPV4_ROUTE_REDIRECT_SILENCE, "redirect_silence" }, | 220 | { CTL_INT, NET_IPV4_ROUTE_REDIRECT_SILENCE, "redirect_silence" }, |
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c index 4e4932a7b36..362da653813 100644 --- a/kernel/sysctl_check.c +++ b/kernel/sysctl_check.c | |||
@@ -1,6 +1,6 @@ | |||
1 | #include <linux/stat.h> | 1 | #include <linux/stat.h> |
2 | #include <linux/sysctl.h> | 2 | #include <linux/sysctl.h> |
3 | #include "../fs/xfs/linux-2.6/xfs_sysctl.h" | 3 | #include "../fs/xfs/xfs_sysctl.h" |
4 | #include <linux/sunrpc/debug.h> | 4 | #include <linux/sunrpc/debug.h> |
5 | #include <linux/string.h> | 5 | #include <linux/string.h> |
6 | #include <net/ip_vs.h> | 6 | #include <net/ip_vs.h> |
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index e19ce1454ee..e66046456f4 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c | |||
@@ -655,6 +655,7 @@ static struct genl_ops taskstats_ops = { | |||
655 | .cmd = TASKSTATS_CMD_GET, | 655 | .cmd = TASKSTATS_CMD_GET, |
656 | .doit = taskstats_user_cmd, | 656 | .doit = taskstats_user_cmd, |
657 | .policy = taskstats_cmd_get_policy, | 657 | .policy = taskstats_cmd_get_policy, |
658 | .flags = GENL_ADMIN_PERM, | ||
658 | }; | 659 | }; |
659 | 660 | ||
660 | static struct genl_ops cgroupstats_ops = { | 661 | static struct genl_ops cgroupstats_ops = { |
diff --git a/kernel/time.c b/kernel/time.c index d7760621452..73e416db0a1 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
@@ -27,7 +27,7 @@ | |||
27 | * with nanosecond accuracy | 27 | * with nanosecond accuracy |
28 | */ | 28 | */ |
29 | 29 | ||
30 | #include <linux/module.h> | 30 | #include <linux/export.h> |
31 | #include <linux/timex.h> | 31 | #include <linux/timex.h> |
32 | #include <linux/capability.h> | 32 | #include <linux/capability.h> |
33 | #include <linux/clocksource.h> | 33 | #include <linux/clocksource.h> |
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index c340ca658f3..ce033c7aa2e 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c | |||
@@ -18,6 +18,7 @@ | |||
18 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 18 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
19 | */ | 19 | */ |
20 | #include <linux/device.h> | 20 | #include <linux/device.h> |
21 | #include <linux/export.h> | ||
21 | #include <linux/file.h> | 22 | #include <linux/file.h> |
22 | #include <linux/posix-clock.h> | 23 | #include <linux/posix-clock.h> |
23 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 7e2e0817cbf..40420644d0b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -139,7 +139,6 @@ static void tick_nohz_update_jiffies(ktime_t now) | |||
139 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 139 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
140 | unsigned long flags; | 140 | unsigned long flags; |
141 | 141 | ||
142 | cpumask_clear_cpu(cpu, nohz_cpu_mask); | ||
143 | ts->idle_waketime = now; | 142 | ts->idle_waketime = now; |
144 | 143 | ||
145 | local_irq_save(flags); | 144 | local_irq_save(flags); |
@@ -418,9 +417,6 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
418 | else | 417 | else |
419 | expires.tv64 = KTIME_MAX; | 418 | expires.tv64 = KTIME_MAX; |
420 | 419 | ||
421 | if (delta_jiffies > 1) | ||
422 | cpumask_set_cpu(cpu, nohz_cpu_mask); | ||
423 | |||
424 | /* Skip reprogram of event if its not changed */ | 420 | /* Skip reprogram of event if its not changed */ |
425 | if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) | 421 | if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) |
426 | goto out; | 422 | goto out; |
@@ -470,7 +466,6 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
470 | * softirq. | 466 | * softirq. |
471 | */ | 467 | */ |
472 | tick_do_update_jiffies64(ktime_get()); | 468 | tick_do_update_jiffies64(ktime_get()); |
473 | cpumask_clear_cpu(cpu, nohz_cpu_mask); | ||
474 | } | 469 | } |
475 | raise_softirq_irqoff(TIMER_SOFTIRQ); | 470 | raise_softirq_irqoff(TIMER_SOFTIRQ); |
476 | out: | 471 | out: |
@@ -553,7 +548,6 @@ void tick_nohz_restart_sched_tick(void) | |||
553 | /* Update jiffies first */ | 548 | /* Update jiffies first */ |
554 | select_nohz_load_balancer(0); | 549 | select_nohz_load_balancer(0); |
555 | tick_do_update_jiffies64(now); | 550 | tick_do_update_jiffies64(now); |
556 | cpumask_clear_cpu(cpu, nohz_cpu_mask); | ||
557 | 551 | ||
558 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | 552 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING |
559 | /* | 553 | /* |
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c index a5d0a3a85dd..0b537f27b55 100644 --- a/kernel/time/timer_stats.c +++ b/kernel/time/timer_stats.c | |||
@@ -81,7 +81,7 @@ struct entry { | |||
81 | /* | 81 | /* |
82 | * Spinlock protecting the tables - not taken during lookup: | 82 | * Spinlock protecting the tables - not taken during lookup: |
83 | */ | 83 | */ |
84 | static DEFINE_SPINLOCK(table_lock); | 84 | static DEFINE_RAW_SPINLOCK(table_lock); |
85 | 85 | ||
86 | /* | 86 | /* |
87 | * Per-CPU lookup locks for fast hash lookup: | 87 | * Per-CPU lookup locks for fast hash lookup: |
@@ -188,7 +188,7 @@ static struct entry *tstat_lookup(struct entry *entry, char *comm) | |||
188 | prev = NULL; | 188 | prev = NULL; |
189 | curr = *head; | 189 | curr = *head; |
190 | 190 | ||
191 | spin_lock(&table_lock); | 191 | raw_spin_lock(&table_lock); |
192 | /* | 192 | /* |
193 | * Make sure we have not raced with another CPU: | 193 | * Make sure we have not raced with another CPU: |
194 | */ | 194 | */ |
@@ -215,7 +215,7 @@ static struct entry *tstat_lookup(struct entry *entry, char *comm) | |||
215 | *head = curr; | 215 | *head = curr; |
216 | } | 216 | } |
217 | out_unlock: | 217 | out_unlock: |
218 | spin_unlock(&table_lock); | 218 | raw_spin_unlock(&table_lock); |
219 | 219 | ||
220 | return curr; | 220 | return curr; |
221 | } | 221 | } |
diff --git a/kernel/timer.c b/kernel/timer.c index 8cff36119e4..dbaa62422b1 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -20,7 +20,7 @@ | |||
20 | */ | 20 | */ |
21 | 21 | ||
22 | #include <linux/kernel_stat.h> | 22 | #include <linux/kernel_stat.h> |
23 | #include <linux/module.h> | 23 | #include <linux/export.h> |
24 | #include <linux/interrupt.h> | 24 | #include <linux/interrupt.h> |
25 | #include <linux/percpu.h> | 25 | #include <linux/percpu.h> |
26 | #include <linux/init.h> | 26 | #include <linux/init.h> |
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 2ad39e556cb..cd3134510f3 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -82,7 +82,7 @@ config EVENT_POWER_TRACING_DEPRECATED | |||
82 | power:power_frequency | 82 | power:power_frequency |
83 | This is for userspace compatibility | 83 | This is for userspace compatibility |
84 | and will vanish after 5 kernel iterations, | 84 | and will vanish after 5 kernel iterations, |
85 | namely 2.6.41. | 85 | namely 3.1. |
86 | 86 | ||
87 | config CONTEXT_SWITCH_TRACER | 87 | config CONTEXT_SWITCH_TRACER |
88 | bool | 88 | bool |
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 761c510a06c..5f39a07fe5e 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile | |||
@@ -15,6 +15,8 @@ ifdef CONFIG_TRACING_BRANCHES | |||
15 | KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING | 15 | KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING |
16 | endif | 16 | endif |
17 | 17 | ||
18 | CFLAGS_trace_events_filter.o := -I$(src) | ||
19 | |||
18 | # | 20 | # |
19 | # Make the trace clocks available generally: it's infrastructure | 21 | # Make the trace clocks available generally: it's infrastructure |
20 | # relied on by ptrace for example: | 22 | # relied on by ptrace for example: |
@@ -53,6 +55,9 @@ endif | |||
53 | obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o | 55 | obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o |
54 | obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o | 56 | obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o |
55 | obj-$(CONFIG_TRACEPOINTS) += power-traces.o | 57 | obj-$(CONFIG_TRACEPOINTS) += power-traces.o |
58 | ifeq ($(CONFIG_PM_RUNTIME),y) | ||
59 | obj-$(CONFIG_TRACEPOINTS) += rpm-traces.o | ||
60 | endif | ||
56 | ifeq ($(CONFIG_TRACING),y) | 61 | ifeq ($(CONFIG_TRACING),y) |
57 | obj-$(CONFIG_KGDB_KDB) += trace_kdb.o | 62 | obj-$(CONFIG_KGDB_KDB) += trace_kdb.o |
58 | endif | 63 | endif |
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 6957aa298df..16fc34a0806 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/mutex.h> | 23 | #include <linux/mutex.h> |
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/debugfs.h> | 25 | #include <linux/debugfs.h> |
26 | #include <linux/export.h> | ||
26 | #include <linux/time.h> | 27 | #include <linux/time.h> |
27 | #include <linux/uaccess.h> | 28 | #include <linux/uaccess.h> |
28 | 29 | ||
@@ -206,6 +207,8 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, | |||
206 | what |= MASK_TC_BIT(rw, RAHEAD); | 207 | what |= MASK_TC_BIT(rw, RAHEAD); |
207 | what |= MASK_TC_BIT(rw, META); | 208 | what |= MASK_TC_BIT(rw, META); |
208 | what |= MASK_TC_BIT(rw, DISCARD); | 209 | what |= MASK_TC_BIT(rw, DISCARD); |
210 | what |= MASK_TC_BIT(rw, FLUSH); | ||
211 | what |= MASK_TC_BIT(rw, FUA); | ||
209 | 212 | ||
210 | pid = tsk->pid; | 213 | pid = tsk->pid; |
211 | if (act_log_check(bt, what, sector, pid)) | 214 | if (act_log_check(bt, what, sector, pid)) |
@@ -1054,6 +1057,9 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) | |||
1054 | goto out; | 1057 | goto out; |
1055 | } | 1058 | } |
1056 | 1059 | ||
1060 | if (tc & BLK_TC_FLUSH) | ||
1061 | rwbs[i++] = 'F'; | ||
1062 | |||
1057 | if (tc & BLK_TC_DISCARD) | 1063 | if (tc & BLK_TC_DISCARD) |
1058 | rwbs[i++] = 'D'; | 1064 | rwbs[i++] = 'D'; |
1059 | else if (tc & BLK_TC_WRITE) | 1065 | else if (tc & BLK_TC_WRITE) |
@@ -1063,10 +1069,10 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) | |||
1063 | else | 1069 | else |
1064 | rwbs[i++] = 'N'; | 1070 | rwbs[i++] = 'N'; |
1065 | 1071 | ||
1072 | if (tc & BLK_TC_FUA) | ||
1073 | rwbs[i++] = 'F'; | ||
1066 | if (tc & BLK_TC_AHEAD) | 1074 | if (tc & BLK_TC_AHEAD) |
1067 | rwbs[i++] = 'A'; | 1075 | rwbs[i++] = 'A'; |
1068 | if (tc & BLK_TC_BARRIER) | ||
1069 | rwbs[i++] = 'B'; | ||
1070 | if (tc & BLK_TC_SYNC) | 1076 | if (tc & BLK_TC_SYNC) |
1071 | rwbs[i++] = 'S'; | 1077 | rwbs[i++] = 'S'; |
1072 | if (tc & BLK_TC_META) | 1078 | if (tc & BLK_TC_META) |
@@ -1132,7 +1138,7 @@ typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act); | |||
1132 | 1138 | ||
1133 | static int blk_log_action_classic(struct trace_iterator *iter, const char *act) | 1139 | static int blk_log_action_classic(struct trace_iterator *iter, const char *act) |
1134 | { | 1140 | { |
1135 | char rwbs[6]; | 1141 | char rwbs[RWBS_LEN]; |
1136 | unsigned long long ts = iter->ts; | 1142 | unsigned long long ts = iter->ts; |
1137 | unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC); | 1143 | unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC); |
1138 | unsigned secs = (unsigned long)ts; | 1144 | unsigned secs = (unsigned long)ts; |
@@ -1148,7 +1154,7 @@ static int blk_log_action_classic(struct trace_iterator *iter, const char *act) | |||
1148 | 1154 | ||
1149 | static int blk_log_action(struct trace_iterator *iter, const char *act) | 1155 | static int blk_log_action(struct trace_iterator *iter, const char *act) |
1150 | { | 1156 | { |
1151 | char rwbs[6]; | 1157 | char rwbs[RWBS_LEN]; |
1152 | const struct blk_io_trace *t = te_blk_io_trace(iter->ent); | 1158 | const struct blk_io_trace *t = te_blk_io_trace(iter->ent); |
1153 | 1159 | ||
1154 | fill_rwbs(rwbs, t); | 1160 | fill_rwbs(rwbs, t); |
@@ -1561,7 +1567,7 @@ static const struct { | |||
1561 | } mask_maps[] = { | 1567 | } mask_maps[] = { |
1562 | { BLK_TC_READ, "read" }, | 1568 | { BLK_TC_READ, "read" }, |
1563 | { BLK_TC_WRITE, "write" }, | 1569 | { BLK_TC_WRITE, "write" }, |
1564 | { BLK_TC_BARRIER, "barrier" }, | 1570 | { BLK_TC_FLUSH, "flush" }, |
1565 | { BLK_TC_SYNC, "sync" }, | 1571 | { BLK_TC_SYNC, "sync" }, |
1566 | { BLK_TC_QUEUE, "queue" }, | 1572 | { BLK_TC_QUEUE, "queue" }, |
1567 | { BLK_TC_REQUEUE, "requeue" }, | 1573 | { BLK_TC_REQUEUE, "requeue" }, |
@@ -1573,6 +1579,7 @@ static const struct { | |||
1573 | { BLK_TC_META, "meta" }, | 1579 | { BLK_TC_META, "meta" }, |
1574 | { BLK_TC_DISCARD, "discard" }, | 1580 | { BLK_TC_DISCARD, "discard" }, |
1575 | { BLK_TC_DRV_DATA, "drv_data" }, | 1581 | { BLK_TC_DRV_DATA, "drv_data" }, |
1582 | { BLK_TC_FUA, "fua" }, | ||
1576 | }; | 1583 | }; |
1577 | 1584 | ||
1578 | static int blk_trace_str2mask(const char *str) | 1585 | static int blk_trace_str2mask(const char *str) |
@@ -1788,6 +1795,9 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) | |||
1788 | { | 1795 | { |
1789 | int i = 0; | 1796 | int i = 0; |
1790 | 1797 | ||
1798 | if (rw & REQ_FLUSH) | ||
1799 | rwbs[i++] = 'F'; | ||
1800 | |||
1791 | if (rw & WRITE) | 1801 | if (rw & WRITE) |
1792 | rwbs[i++] = 'W'; | 1802 | rwbs[i++] = 'W'; |
1793 | else if (rw & REQ_DISCARD) | 1803 | else if (rw & REQ_DISCARD) |
@@ -1797,6 +1807,8 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) | |||
1797 | else | 1807 | else |
1798 | rwbs[i++] = 'N'; | 1808 | rwbs[i++] = 'N'; |
1799 | 1809 | ||
1810 | if (rw & REQ_FUA) | ||
1811 | rwbs[i++] = 'F'; | ||
1800 | if (rw & REQ_RAHEAD) | 1812 | if (rw & REQ_RAHEAD) |
1801 | rwbs[i++] = 'A'; | 1813 | rwbs[i++] = 'A'; |
1802 | if (rw & REQ_SYNC) | 1814 | if (rw & REQ_SYNC) |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index c3e4575e782..900b409543d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/hardirq.h> | 22 | #include <linux/hardirq.h> |
23 | #include <linux/kthread.h> | 23 | #include <linux/kthread.h> |
24 | #include <linux/uaccess.h> | 24 | #include <linux/uaccess.h> |
25 | #include <linux/module.h> | ||
25 | #include <linux/ftrace.h> | 26 | #include <linux/ftrace.h> |
26 | #include <linux/sysctl.h> | 27 | #include <linux/sysctl.h> |
27 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
@@ -3863,6 +3864,14 @@ void ftrace_kill(void) | |||
3863 | } | 3864 | } |
3864 | 3865 | ||
3865 | /** | 3866 | /** |
3867 | * Test if ftrace is dead or not. | ||
3868 | */ | ||
3869 | int ftrace_is_dead(void) | ||
3870 | { | ||
3871 | return ftrace_disabled; | ||
3872 | } | ||
3873 | |||
3874 | /** | ||
3866 | * register_ftrace_function - register a function for profiling | 3875 | * register_ftrace_function - register a function for profiling |
3867 | * @ops - ops structure that holds the function for profiling. | 3876 | * @ops - ops structure that holds the function for profiling. |
3868 | * | 3877 | * |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 731201bf4ac..f5b7b5c1195 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -478,7 +478,7 @@ struct ring_buffer_per_cpu { | |||
478 | int cpu; | 478 | int cpu; |
479 | atomic_t record_disabled; | 479 | atomic_t record_disabled; |
480 | struct ring_buffer *buffer; | 480 | struct ring_buffer *buffer; |
481 | spinlock_t reader_lock; /* serialize readers */ | 481 | raw_spinlock_t reader_lock; /* serialize readers */ |
482 | arch_spinlock_t lock; | 482 | arch_spinlock_t lock; |
483 | struct lock_class_key lock_key; | 483 | struct lock_class_key lock_key; |
484 | struct list_head *pages; | 484 | struct list_head *pages; |
@@ -488,12 +488,14 @@ struct ring_buffer_per_cpu { | |||
488 | struct buffer_page *reader_page; | 488 | struct buffer_page *reader_page; |
489 | unsigned long lost_events; | 489 | unsigned long lost_events; |
490 | unsigned long last_overrun; | 490 | unsigned long last_overrun; |
491 | local_t entries_bytes; | ||
491 | local_t commit_overrun; | 492 | local_t commit_overrun; |
492 | local_t overrun; | 493 | local_t overrun; |
493 | local_t entries; | 494 | local_t entries; |
494 | local_t committing; | 495 | local_t committing; |
495 | local_t commits; | 496 | local_t commits; |
496 | unsigned long read; | 497 | unsigned long read; |
498 | unsigned long read_bytes; | ||
497 | u64 write_stamp; | 499 | u64 write_stamp; |
498 | u64 read_stamp; | 500 | u64 read_stamp; |
499 | }; | 501 | }; |
@@ -1062,7 +1064,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) | |||
1062 | 1064 | ||
1063 | cpu_buffer->cpu = cpu; | 1065 | cpu_buffer->cpu = cpu; |
1064 | cpu_buffer->buffer = buffer; | 1066 | cpu_buffer->buffer = buffer; |
1065 | spin_lock_init(&cpu_buffer->reader_lock); | 1067 | raw_spin_lock_init(&cpu_buffer->reader_lock); |
1066 | lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); | 1068 | lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); |
1067 | cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; | 1069 | cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; |
1068 | 1070 | ||
@@ -1259,7 +1261,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) | |||
1259 | struct list_head *p; | 1261 | struct list_head *p; |
1260 | unsigned i; | 1262 | unsigned i; |
1261 | 1263 | ||
1262 | spin_lock_irq(&cpu_buffer->reader_lock); | 1264 | raw_spin_lock_irq(&cpu_buffer->reader_lock); |
1263 | rb_head_page_deactivate(cpu_buffer); | 1265 | rb_head_page_deactivate(cpu_buffer); |
1264 | 1266 | ||
1265 | for (i = 0; i < nr_pages; i++) { | 1267 | for (i = 0; i < nr_pages; i++) { |
@@ -1277,7 +1279,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) | |||
1277 | rb_check_pages(cpu_buffer); | 1279 | rb_check_pages(cpu_buffer); |
1278 | 1280 | ||
1279 | out: | 1281 | out: |
1280 | spin_unlock_irq(&cpu_buffer->reader_lock); | 1282 | raw_spin_unlock_irq(&cpu_buffer->reader_lock); |
1281 | } | 1283 | } |
1282 | 1284 | ||
1283 | static void | 1285 | static void |
@@ -1288,7 +1290,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, | |||
1288 | struct list_head *p; | 1290 | struct list_head *p; |
1289 | unsigned i; | 1291 | unsigned i; |
1290 | 1292 | ||
1291 | spin_lock_irq(&cpu_buffer->reader_lock); | 1293 | raw_spin_lock_irq(&cpu_buffer->reader_lock); |
1292 | rb_head_page_deactivate(cpu_buffer); | 1294 | rb_head_page_deactivate(cpu_buffer); |
1293 | 1295 | ||
1294 | for (i = 0; i < nr_pages; i++) { | 1296 | for (i = 0; i < nr_pages; i++) { |
@@ -1303,7 +1305,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, | |||
1303 | rb_check_pages(cpu_buffer); | 1305 | rb_check_pages(cpu_buffer); |
1304 | 1306 | ||
1305 | out: | 1307 | out: |
1306 | spin_unlock_irq(&cpu_buffer->reader_lock); | 1308 | raw_spin_unlock_irq(&cpu_buffer->reader_lock); |
1307 | } | 1309 | } |
1308 | 1310 | ||
1309 | /** | 1311 | /** |
@@ -1708,6 +1710,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, | |||
1708 | * the counters. | 1710 | * the counters. |
1709 | */ | 1711 | */ |
1710 | local_add(entries, &cpu_buffer->overrun); | 1712 | local_add(entries, &cpu_buffer->overrun); |
1713 | local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); | ||
1711 | 1714 | ||
1712 | /* | 1715 | /* |
1713 | * The entries will be zeroed out when we move the | 1716 | * The entries will be zeroed out when we move the |
@@ -1863,6 +1866,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1863 | event = __rb_page_index(tail_page, tail); | 1866 | event = __rb_page_index(tail_page, tail); |
1864 | kmemcheck_annotate_bitfield(event, bitfield); | 1867 | kmemcheck_annotate_bitfield(event, bitfield); |
1865 | 1868 | ||
1869 | /* account for padding bytes */ | ||
1870 | local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); | ||
1871 | |||
1866 | /* | 1872 | /* |
1867 | * Save the original length to the meta data. | 1873 | * Save the original length to the meta data. |
1868 | * This will be used by the reader to add lost event | 1874 | * This will be used by the reader to add lost event |
@@ -2054,6 +2060,9 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
2054 | if (!tail) | 2060 | if (!tail) |
2055 | tail_page->page->time_stamp = ts; | 2061 | tail_page->page->time_stamp = ts; |
2056 | 2062 | ||
2063 | /* account for these added bytes */ | ||
2064 | local_add(length, &cpu_buffer->entries_bytes); | ||
2065 | |||
2057 | return event; | 2066 | return event; |
2058 | } | 2067 | } |
2059 | 2068 | ||
@@ -2076,6 +2085,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, | |||
2076 | if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { | 2085 | if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { |
2077 | unsigned long write_mask = | 2086 | unsigned long write_mask = |
2078 | local_read(&bpage->write) & ~RB_WRITE_MASK; | 2087 | local_read(&bpage->write) & ~RB_WRITE_MASK; |
2088 | unsigned long event_length = rb_event_length(event); | ||
2079 | /* | 2089 | /* |
2080 | * This is on the tail page. It is possible that | 2090 | * This is on the tail page. It is possible that |
2081 | * a write could come in and move the tail page | 2091 | * a write could come in and move the tail page |
@@ -2085,8 +2095,11 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, | |||
2085 | old_index += write_mask; | 2095 | old_index += write_mask; |
2086 | new_index += write_mask; | 2096 | new_index += write_mask; |
2087 | index = local_cmpxchg(&bpage->write, old_index, new_index); | 2097 | index = local_cmpxchg(&bpage->write, old_index, new_index); |
2088 | if (index == old_index) | 2098 | if (index == old_index) { |
2099 | /* update counters */ | ||
2100 | local_sub(event_length, &cpu_buffer->entries_bytes); | ||
2089 | return 1; | 2101 | return 1; |
2102 | } | ||
2090 | } | 2103 | } |
2091 | 2104 | ||
2092 | /* could not discard */ | 2105 | /* could not discard */ |
@@ -2661,6 +2674,58 @@ rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer) | |||
2661 | } | 2674 | } |
2662 | 2675 | ||
2663 | /** | 2676 | /** |
2677 | * ring_buffer_oldest_event_ts - get the oldest event timestamp from the buffer | ||
2678 | * @buffer: The ring buffer | ||
2679 | * @cpu: The per CPU buffer to read from. | ||
2680 | */ | ||
2681 | unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu) | ||
2682 | { | ||
2683 | unsigned long flags; | ||
2684 | struct ring_buffer_per_cpu *cpu_buffer; | ||
2685 | struct buffer_page *bpage; | ||
2686 | unsigned long ret; | ||
2687 | |||
2688 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | ||
2689 | return 0; | ||
2690 | |||
2691 | cpu_buffer = buffer->buffers[cpu]; | ||
2692 | raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | ||
2693 | /* | ||
2694 | * if the tail is on reader_page, oldest time stamp is on the reader | ||
2695 | * page | ||
2696 | */ | ||
2697 | if (cpu_buffer->tail_page == cpu_buffer->reader_page) | ||
2698 | bpage = cpu_buffer->reader_page; | ||
2699 | else | ||
2700 | bpage = rb_set_head_page(cpu_buffer); | ||
2701 | ret = bpage->page->time_stamp; | ||
2702 | raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | ||
2703 | |||
2704 | return ret; | ||
2705 | } | ||
2706 | EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts); | ||
2707 | |||
2708 | /** | ||
2709 | * ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer | ||
2710 | * @buffer: The ring buffer | ||
2711 | * @cpu: The per CPU buffer to read from. | ||
2712 | */ | ||
2713 | unsigned long ring_buffer_bytes_cpu(struct ring_buffer *buffer, int cpu) | ||
2714 | { | ||
2715 | struct ring_buffer_per_cpu *cpu_buffer; | ||
2716 | unsigned long ret; | ||
2717 | |||
2718 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | ||
2719 | return 0; | ||
2720 | |||
2721 | cpu_buffer = buffer->buffers[cpu]; | ||
2722 | ret = local_read(&cpu_buffer->entries_bytes) - cpu_buffer->read_bytes; | ||
2723 | |||
2724 | return ret; | ||
2725 | } | ||
2726 | EXPORT_SYMBOL_GPL(ring_buffer_bytes_cpu); | ||
2727 | |||
2728 | /** | ||
2664 | * ring_buffer_entries_cpu - get the number of entries in a cpu buffer | 2729 | * ring_buffer_entries_cpu - get the number of entries in a cpu buffer |
2665 | * @buffer: The ring buffer | 2730 | * @buffer: The ring buffer |
2666 | * @cpu: The per CPU buffer to get the entries from. | 2731 | * @cpu: The per CPU buffer to get the entries from. |
@@ -2804,9 +2869,9 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter) | |||
2804 | 2869 | ||
2805 | cpu_buffer = iter->cpu_buffer; | 2870 | cpu_buffer = iter->cpu_buffer; |
2806 | 2871 | ||
2807 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 2872 | raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
2808 | rb_iter_reset(iter); | 2873 | rb_iter_reset(iter); |
2809 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 2874 | raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
2810 | } | 2875 | } |
2811 | EXPORT_SYMBOL_GPL(ring_buffer_iter_reset); | 2876 | EXPORT_SYMBOL_GPL(ring_buffer_iter_reset); |
2812 | 2877 | ||
@@ -3265,12 +3330,12 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts, | |||
3265 | again: | 3330 | again: |
3266 | local_irq_save(flags); | 3331 | local_irq_save(flags); |
3267 | if (dolock) | 3332 | if (dolock) |
3268 | spin_lock(&cpu_buffer->reader_lock); | 3333 | raw_spin_lock(&cpu_buffer->reader_lock); |
3269 | event = rb_buffer_peek(cpu_buffer, ts, lost_events); | 3334 | event = rb_buffer_peek(cpu_buffer, ts, lost_events); |
3270 | if (event && event->type_len == RINGBUF_TYPE_PADDING) | 3335 | if (event && event->type_len == RINGBUF_TYPE_PADDING) |
3271 | rb_advance_reader(cpu_buffer); | 3336 | rb_advance_reader(cpu_buffer); |
3272 | if (dolock) | 3337 | if (dolock) |
3273 | spin_unlock(&cpu_buffer->reader_lock); | 3338 | raw_spin_unlock(&cpu_buffer->reader_lock); |
3274 | local_irq_restore(flags); | 3339 | local_irq_restore(flags); |
3275 | 3340 | ||
3276 | if (event && event->type_len == RINGBUF_TYPE_PADDING) | 3341 | if (event && event->type_len == RINGBUF_TYPE_PADDING) |
@@ -3295,9 +3360,9 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
3295 | unsigned long flags; | 3360 | unsigned long flags; |
3296 | 3361 | ||
3297 | again: | 3362 | again: |
3298 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 3363 | raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
3299 | event = rb_iter_peek(iter, ts); | 3364 | event = rb_iter_peek(iter, ts); |
3300 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3365 | raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
3301 | 3366 | ||
3302 | if (event && event->type_len == RINGBUF_TYPE_PADDING) | 3367 | if (event && event->type_len == RINGBUF_TYPE_PADDING) |
3303 | goto again; | 3368 | goto again; |
@@ -3337,7 +3402,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, | |||
3337 | cpu_buffer = buffer->buffers[cpu]; | 3402 | cpu_buffer = buffer->buffers[cpu]; |
3338 | local_irq_save(flags); | 3403 | local_irq_save(flags); |
3339 | if (dolock) | 3404 | if (dolock) |
3340 | spin_lock(&cpu_buffer->reader_lock); | 3405 | raw_spin_lock(&cpu_buffer->reader_lock); |
3341 | 3406 | ||
3342 | event = rb_buffer_peek(cpu_buffer, ts, lost_events); | 3407 | event = rb_buffer_peek(cpu_buffer, ts, lost_events); |
3343 | if (event) { | 3408 | if (event) { |
@@ -3346,7 +3411,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, | |||
3346 | } | 3411 | } |
3347 | 3412 | ||
3348 | if (dolock) | 3413 | if (dolock) |
3349 | spin_unlock(&cpu_buffer->reader_lock); | 3414 | raw_spin_unlock(&cpu_buffer->reader_lock); |
3350 | local_irq_restore(flags); | 3415 | local_irq_restore(flags); |
3351 | 3416 | ||
3352 | out: | 3417 | out: |
@@ -3438,11 +3503,11 @@ ring_buffer_read_start(struct ring_buffer_iter *iter) | |||
3438 | 3503 | ||
3439 | cpu_buffer = iter->cpu_buffer; | 3504 | cpu_buffer = iter->cpu_buffer; |
3440 | 3505 | ||
3441 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 3506 | raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
3442 | arch_spin_lock(&cpu_buffer->lock); | 3507 | arch_spin_lock(&cpu_buffer->lock); |
3443 | rb_iter_reset(iter); | 3508 | rb_iter_reset(iter); |
3444 | arch_spin_unlock(&cpu_buffer->lock); | 3509 | arch_spin_unlock(&cpu_buffer->lock); |
3445 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3510 | raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
3446 | } | 3511 | } |
3447 | EXPORT_SYMBOL_GPL(ring_buffer_read_start); | 3512 | EXPORT_SYMBOL_GPL(ring_buffer_read_start); |
3448 | 3513 | ||
@@ -3477,7 +3542,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) | |||
3477 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; | 3542 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; |
3478 | unsigned long flags; | 3543 | unsigned long flags; |
3479 | 3544 | ||
3480 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 3545 | raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
3481 | again: | 3546 | again: |
3482 | event = rb_iter_peek(iter, ts); | 3547 | event = rb_iter_peek(iter, ts); |
3483 | if (!event) | 3548 | if (!event) |
@@ -3488,7 +3553,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) | |||
3488 | 3553 | ||
3489 | rb_advance_iter(iter); | 3554 | rb_advance_iter(iter); |
3490 | out: | 3555 | out: |
3491 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3556 | raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
3492 | 3557 | ||
3493 | return event; | 3558 | return event; |
3494 | } | 3559 | } |
@@ -3527,11 +3592,13 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) | |||
3527 | cpu_buffer->reader_page->read = 0; | 3592 | cpu_buffer->reader_page->read = 0; |
3528 | 3593 | ||
3529 | local_set(&cpu_buffer->commit_overrun, 0); | 3594 | local_set(&cpu_buffer->commit_overrun, 0); |
3595 | local_set(&cpu_buffer->entries_bytes, 0); | ||
3530 | local_set(&cpu_buffer->overrun, 0); | 3596 | local_set(&cpu_buffer->overrun, 0); |
3531 | local_set(&cpu_buffer->entries, 0); | 3597 | local_set(&cpu_buffer->entries, 0); |
3532 | local_set(&cpu_buffer->committing, 0); | 3598 | local_set(&cpu_buffer->committing, 0); |
3533 | local_set(&cpu_buffer->commits, 0); | 3599 | local_set(&cpu_buffer->commits, 0); |
3534 | cpu_buffer->read = 0; | 3600 | cpu_buffer->read = 0; |
3601 | cpu_buffer->read_bytes = 0; | ||
3535 | 3602 | ||
3536 | cpu_buffer->write_stamp = 0; | 3603 | cpu_buffer->write_stamp = 0; |
3537 | cpu_buffer->read_stamp = 0; | 3604 | cpu_buffer->read_stamp = 0; |
@@ -3557,7 +3624,7 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) | |||
3557 | 3624 | ||
3558 | atomic_inc(&cpu_buffer->record_disabled); | 3625 | atomic_inc(&cpu_buffer->record_disabled); |
3559 | 3626 | ||
3560 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 3627 | raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
3561 | 3628 | ||
3562 | if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing))) | 3629 | if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing))) |
3563 | goto out; | 3630 | goto out; |
@@ -3569,7 +3636,7 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) | |||
3569 | arch_spin_unlock(&cpu_buffer->lock); | 3636 | arch_spin_unlock(&cpu_buffer->lock); |
3570 | 3637 | ||
3571 | out: | 3638 | out: |
3572 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 3639 | raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
3573 | 3640 | ||
3574 | atomic_dec(&cpu_buffer->record_disabled); | 3641 | atomic_dec(&cpu_buffer->record_disabled); |
3575 | } | 3642 | } |
@@ -3607,10 +3674,10 @@ int ring_buffer_empty(struct ring_buffer *buffer) | |||
3607 | cpu_buffer = buffer->buffers[cpu]; | 3674 | cpu_buffer = buffer->buffers[cpu]; |
3608 | local_irq_save(flags); | 3675 | local_irq_save(flags); |
3609 | if (dolock) | 3676 | if (dolock) |
3610 | spin_lock(&cpu_buffer->reader_lock); | 3677 | raw_spin_lock(&cpu_buffer->reader_lock); |
3611 | ret = rb_per_cpu_empty(cpu_buffer); | 3678 | ret = rb_per_cpu_empty(cpu_buffer); |
3612 | if (dolock) | 3679 | if (dolock) |
3613 | spin_unlock(&cpu_buffer->reader_lock); | 3680 | raw_spin_unlock(&cpu_buffer->reader_lock); |
3614 | local_irq_restore(flags); | 3681 | local_irq_restore(flags); |
3615 | 3682 | ||
3616 | if (!ret) | 3683 | if (!ret) |
@@ -3641,10 +3708,10 @@ int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) | |||
3641 | cpu_buffer = buffer->buffers[cpu]; | 3708 | cpu_buffer = buffer->buffers[cpu]; |
3642 | local_irq_save(flags); | 3709 | local_irq_save(flags); |
3643 | if (dolock) | 3710 | if (dolock) |
3644 | spin_lock(&cpu_buffer->reader_lock); | 3711 | raw_spin_lock(&cpu_buffer->reader_lock); |
3645 | ret = rb_per_cpu_empty(cpu_buffer); | 3712 | ret = rb_per_cpu_empty(cpu_buffer); |
3646 | if (dolock) | 3713 | if (dolock) |
3647 | spin_unlock(&cpu_buffer->reader_lock); | 3714 | raw_spin_unlock(&cpu_buffer->reader_lock); |
3648 | local_irq_restore(flags); | 3715 | local_irq_restore(flags); |
3649 | 3716 | ||
3650 | return ret; | 3717 | return ret; |
@@ -3841,7 +3908,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3841 | if (!bpage) | 3908 | if (!bpage) |
3842 | goto out; | 3909 | goto out; |
3843 | 3910 | ||
3844 | spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | 3911 | raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
3845 | 3912 | ||
3846 | reader = rb_get_reader_page(cpu_buffer); | 3913 | reader = rb_get_reader_page(cpu_buffer); |
3847 | if (!reader) | 3914 | if (!reader) |
@@ -3918,6 +3985,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3918 | } else { | 3985 | } else { |
3919 | /* update the entry counter */ | 3986 | /* update the entry counter */ |
3920 | cpu_buffer->read += rb_page_entries(reader); | 3987 | cpu_buffer->read += rb_page_entries(reader); |
3988 | cpu_buffer->read_bytes += BUF_PAGE_SIZE; | ||
3921 | 3989 | ||
3922 | /* swap the pages */ | 3990 | /* swap the pages */ |
3923 | rb_init_page(bpage); | 3991 | rb_init_page(bpage); |
@@ -3964,7 +4032,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3964 | memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit); | 4032 | memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit); |
3965 | 4033 | ||
3966 | out_unlock: | 4034 | out_unlock: |
3967 | spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | 4035 | raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
3968 | 4036 | ||
3969 | out: | 4037 | out: |
3970 | return ret; | 4038 | return ret; |
diff --git a/kernel/trace/rpm-traces.c b/kernel/trace/rpm-traces.c new file mode 100644 index 00000000000..4b3b5eaf94d --- /dev/null +++ b/kernel/trace/rpm-traces.c | |||
@@ -0,0 +1,20 @@ | |||
1 | /* | ||
2 | * Power trace points | ||
3 | * | ||
4 | * Copyright (C) 2009 Ming Lei <ming.lei@canonical.com> | ||
5 | */ | ||
6 | |||
7 | #include <linux/string.h> | ||
8 | #include <linux/types.h> | ||
9 | #include <linux/workqueue.h> | ||
10 | #include <linux/sched.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/usb.h> | ||
13 | |||
14 | #define CREATE_TRACE_POINTS | ||
15 | #include <trace/events/rpm.h> | ||
16 | |||
17 | EXPORT_TRACEPOINT_SYMBOL_GPL(rpm_return_int); | ||
18 | EXPORT_TRACEPOINT_SYMBOL_GPL(rpm_idle); | ||
19 | EXPORT_TRACEPOINT_SYMBOL_GPL(rpm_suspend); | ||
20 | EXPORT_TRACEPOINT_SYMBOL_GPL(rpm_resume); | ||
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e5df02c69b1..f2bd275bb60 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -341,7 +341,7 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | | |||
341 | TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE; | 341 | TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE; |
342 | 342 | ||
343 | static int trace_stop_count; | 343 | static int trace_stop_count; |
344 | static DEFINE_SPINLOCK(tracing_start_lock); | 344 | static DEFINE_RAW_SPINLOCK(tracing_start_lock); |
345 | 345 | ||
346 | static void wakeup_work_handler(struct work_struct *work) | 346 | static void wakeup_work_handler(struct work_struct *work) |
347 | { | 347 | { |
@@ -435,6 +435,7 @@ static struct { | |||
435 | } trace_clocks[] = { | 435 | } trace_clocks[] = { |
436 | { trace_clock_local, "local" }, | 436 | { trace_clock_local, "local" }, |
437 | { trace_clock_global, "global" }, | 437 | { trace_clock_global, "global" }, |
438 | { trace_clock_counter, "counter" }, | ||
438 | }; | 439 | }; |
439 | 440 | ||
440 | int trace_clock_id; | 441 | int trace_clock_id; |
@@ -960,7 +961,7 @@ void tracing_start(void) | |||
960 | if (tracing_disabled) | 961 | if (tracing_disabled) |
961 | return; | 962 | return; |
962 | 963 | ||
963 | spin_lock_irqsave(&tracing_start_lock, flags); | 964 | raw_spin_lock_irqsave(&tracing_start_lock, flags); |
964 | if (--trace_stop_count) { | 965 | if (--trace_stop_count) { |
965 | if (trace_stop_count < 0) { | 966 | if (trace_stop_count < 0) { |
966 | /* Someone screwed up their debugging */ | 967 | /* Someone screwed up their debugging */ |
@@ -985,7 +986,7 @@ void tracing_start(void) | |||
985 | 986 | ||
986 | ftrace_start(); | 987 | ftrace_start(); |
987 | out: | 988 | out: |
988 | spin_unlock_irqrestore(&tracing_start_lock, flags); | 989 | raw_spin_unlock_irqrestore(&tracing_start_lock, flags); |
989 | } | 990 | } |
990 | 991 | ||
991 | /** | 992 | /** |
@@ -1000,7 +1001,7 @@ void tracing_stop(void) | |||
1000 | unsigned long flags; | 1001 | unsigned long flags; |
1001 | 1002 | ||
1002 | ftrace_stop(); | 1003 | ftrace_stop(); |
1003 | spin_lock_irqsave(&tracing_start_lock, flags); | 1004 | raw_spin_lock_irqsave(&tracing_start_lock, flags); |
1004 | if (trace_stop_count++) | 1005 | if (trace_stop_count++) |
1005 | goto out; | 1006 | goto out; |
1006 | 1007 | ||
@@ -1018,7 +1019,7 @@ void tracing_stop(void) | |||
1018 | arch_spin_unlock(&ftrace_max_lock); | 1019 | arch_spin_unlock(&ftrace_max_lock); |
1019 | 1020 | ||
1020 | out: | 1021 | out: |
1021 | spin_unlock_irqrestore(&tracing_start_lock, flags); | 1022 | raw_spin_unlock_irqrestore(&tracing_start_lock, flags); |
1022 | } | 1023 | } |
1023 | 1024 | ||
1024 | void trace_stop_cmdline_recording(void); | 1025 | void trace_stop_cmdline_recording(void); |
@@ -2159,6 +2160,14 @@ void trace_default_header(struct seq_file *m) | |||
2159 | } | 2160 | } |
2160 | } | 2161 | } |
2161 | 2162 | ||
2163 | static void test_ftrace_alive(struct seq_file *m) | ||
2164 | { | ||
2165 | if (!ftrace_is_dead()) | ||
2166 | return; | ||
2167 | seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"); | ||
2168 | seq_printf(m, "# MAY BE MISSING FUNCTION EVENTS\n"); | ||
2169 | } | ||
2170 | |||
2162 | static int s_show(struct seq_file *m, void *v) | 2171 | static int s_show(struct seq_file *m, void *v) |
2163 | { | 2172 | { |
2164 | struct trace_iterator *iter = v; | 2173 | struct trace_iterator *iter = v; |
@@ -2168,6 +2177,7 @@ static int s_show(struct seq_file *m, void *v) | |||
2168 | if (iter->tr) { | 2177 | if (iter->tr) { |
2169 | seq_printf(m, "# tracer: %s\n", iter->trace->name); | 2178 | seq_printf(m, "# tracer: %s\n", iter->trace->name); |
2170 | seq_puts(m, "#\n"); | 2179 | seq_puts(m, "#\n"); |
2180 | test_ftrace_alive(m); | ||
2171 | } | 2181 | } |
2172 | if (iter->trace && iter->trace->print_header) | 2182 | if (iter->trace && iter->trace->print_header) |
2173 | iter->trace->print_header(m); | 2183 | iter->trace->print_header(m); |
@@ -2710,9 +2720,9 @@ static const char readme_msg[] = | |||
2710 | "# cat /sys/kernel/debug/tracing/trace_options\n" | 2720 | "# cat /sys/kernel/debug/tracing/trace_options\n" |
2711 | "noprint-parent nosym-offset nosym-addr noverbose\n" | 2721 | "noprint-parent nosym-offset nosym-addr noverbose\n" |
2712 | "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n" | 2722 | "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n" |
2713 | "# echo 1 > /sys/kernel/debug/tracing/tracing_enabled\n" | 2723 | "# echo 1 > /sys/kernel/debug/tracing/tracing_on\n" |
2714 | "# cat /sys/kernel/debug/tracing/trace > /tmp/trace.txt\n" | 2724 | "# cat /sys/kernel/debug/tracing/trace > /tmp/trace.txt\n" |
2715 | "# echo 0 > /sys/kernel/debug/tracing/tracing_enabled\n" | 2725 | "# echo 0 > /sys/kernel/debug/tracing/tracing_on\n" |
2716 | ; | 2726 | ; |
2717 | 2727 | ||
2718 | static ssize_t | 2728 | static ssize_t |
@@ -3569,6 +3579,30 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, | |||
3569 | } | 3579 | } |
3570 | 3580 | ||
3571 | static ssize_t | 3581 | static ssize_t |
3582 | tracing_total_entries_read(struct file *filp, char __user *ubuf, | ||
3583 | size_t cnt, loff_t *ppos) | ||
3584 | { | ||
3585 | struct trace_array *tr = filp->private_data; | ||
3586 | char buf[64]; | ||
3587 | int r, cpu; | ||
3588 | unsigned long size = 0, expanded_size = 0; | ||
3589 | |||
3590 | mutex_lock(&trace_types_lock); | ||
3591 | for_each_tracing_cpu(cpu) { | ||
3592 | size += tr->entries >> 10; | ||
3593 | if (!ring_buffer_expanded) | ||
3594 | expanded_size += trace_buf_size >> 10; | ||
3595 | } | ||
3596 | if (ring_buffer_expanded) | ||
3597 | r = sprintf(buf, "%lu\n", size); | ||
3598 | else | ||
3599 | r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size); | ||
3600 | mutex_unlock(&trace_types_lock); | ||
3601 | |||
3602 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | ||
3603 | } | ||
3604 | |||
3605 | static ssize_t | ||
3572 | tracing_free_buffer_write(struct file *filp, const char __user *ubuf, | 3606 | tracing_free_buffer_write(struct file *filp, const char __user *ubuf, |
3573 | size_t cnt, loff_t *ppos) | 3607 | size_t cnt, loff_t *ppos) |
3574 | { | 3608 | { |
@@ -3594,22 +3628,24 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp) | |||
3594 | return 0; | 3628 | return 0; |
3595 | } | 3629 | } |
3596 | 3630 | ||
3597 | static int mark_printk(const char *fmt, ...) | ||
3598 | { | ||
3599 | int ret; | ||
3600 | va_list args; | ||
3601 | va_start(args, fmt); | ||
3602 | ret = trace_vprintk(0, fmt, args); | ||
3603 | va_end(args); | ||
3604 | return ret; | ||
3605 | } | ||
3606 | |||
3607 | static ssize_t | 3631 | static ssize_t |
3608 | tracing_mark_write(struct file *filp, const char __user *ubuf, | 3632 | tracing_mark_write(struct file *filp, const char __user *ubuf, |
3609 | size_t cnt, loff_t *fpos) | 3633 | size_t cnt, loff_t *fpos) |
3610 | { | 3634 | { |
3611 | char *buf; | 3635 | unsigned long addr = (unsigned long)ubuf; |
3612 | size_t written; | 3636 | struct ring_buffer_event *event; |
3637 | struct ring_buffer *buffer; | ||
3638 | struct print_entry *entry; | ||
3639 | unsigned long irq_flags; | ||
3640 | struct page *pages[2]; | ||
3641 | int nr_pages = 1; | ||
3642 | ssize_t written; | ||
3643 | void *page1; | ||
3644 | void *page2; | ||
3645 | int offset; | ||
3646 | int size; | ||
3647 | int len; | ||
3648 | int ret; | ||
3613 | 3649 | ||
3614 | if (tracing_disabled) | 3650 | if (tracing_disabled) |
3615 | return -EINVAL; | 3651 | return -EINVAL; |
@@ -3617,28 +3653,81 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, | |||
3617 | if (cnt > TRACE_BUF_SIZE) | 3653 | if (cnt > TRACE_BUF_SIZE) |
3618 | cnt = TRACE_BUF_SIZE; | 3654 | cnt = TRACE_BUF_SIZE; |
3619 | 3655 | ||
3620 | buf = kmalloc(cnt + 2, GFP_KERNEL); | 3656 | /* |
3621 | if (buf == NULL) | 3657 | * Userspace is injecting traces into the kernel trace buffer. |
3622 | return -ENOMEM; | 3658 | * We want to be as non intrusive as possible. |
3659 | * To do so, we do not want to allocate any special buffers | ||
3660 | * or take any locks, but instead write the userspace data | ||
3661 | * straight into the ring buffer. | ||
3662 | * | ||
3663 | * First we need to pin the userspace buffer into memory, | ||
3664 | * which, most likely it is, because it just referenced it. | ||
3665 | * But there's no guarantee that it is. By using get_user_pages_fast() | ||
3666 | * and kmap_atomic/kunmap_atomic() we can get access to the | ||
3667 | * pages directly. We then write the data directly into the | ||
3668 | * ring buffer. | ||
3669 | */ | ||
3670 | BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE); | ||
3623 | 3671 | ||
3624 | if (copy_from_user(buf, ubuf, cnt)) { | 3672 | /* check if we cross pages */ |
3625 | kfree(buf); | 3673 | if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK)) |
3626 | return -EFAULT; | 3674 | nr_pages = 2; |
3675 | |||
3676 | offset = addr & (PAGE_SIZE - 1); | ||
3677 | addr &= PAGE_MASK; | ||
3678 | |||
3679 | ret = get_user_pages_fast(addr, nr_pages, 0, pages); | ||
3680 | if (ret < nr_pages) { | ||
3681 | while (--ret >= 0) | ||
3682 | put_page(pages[ret]); | ||
3683 | written = -EFAULT; | ||
3684 | goto out; | ||
3685 | } | ||
3686 | |||
3687 | page1 = kmap_atomic(pages[0]); | ||
3688 | if (nr_pages == 2) | ||
3689 | page2 = kmap_atomic(pages[1]); | ||
3690 | |||
3691 | local_save_flags(irq_flags); | ||
3692 | size = sizeof(*entry) + cnt + 2; /* possible \n added */ | ||
3693 | buffer = global_trace.buffer; | ||
3694 | event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, | ||
3695 | irq_flags, preempt_count()); | ||
3696 | if (!event) { | ||
3697 | /* Ring buffer disabled, return as if not open for write */ | ||
3698 | written = -EBADF; | ||
3699 | goto out_unlock; | ||
3627 | } | 3700 | } |
3628 | if (buf[cnt-1] != '\n') { | 3701 | |
3629 | buf[cnt] = '\n'; | 3702 | entry = ring_buffer_event_data(event); |
3630 | buf[cnt+1] = '\0'; | 3703 | entry->ip = _THIS_IP_; |
3704 | |||
3705 | if (nr_pages == 2) { | ||
3706 | len = PAGE_SIZE - offset; | ||
3707 | memcpy(&entry->buf, page1 + offset, len); | ||
3708 | memcpy(&entry->buf[len], page2, cnt - len); | ||
3631 | } else | 3709 | } else |
3632 | buf[cnt] = '\0'; | 3710 | memcpy(&entry->buf, page1 + offset, cnt); |
3633 | 3711 | ||
3634 | written = mark_printk("%s", buf); | 3712 | if (entry->buf[cnt - 1] != '\n') { |
3635 | kfree(buf); | 3713 | entry->buf[cnt] = '\n'; |
3636 | *fpos += written; | 3714 | entry->buf[cnt + 1] = '\0'; |
3715 | } else | ||
3716 | entry->buf[cnt] = '\0'; | ||
3717 | |||
3718 | ring_buffer_unlock_commit(buffer, event); | ||
3637 | 3719 | ||
3638 | /* don't tell userspace we wrote more - it might confuse them */ | 3720 | written = cnt; |
3639 | if (written > cnt) | ||
3640 | written = cnt; | ||
3641 | 3721 | ||
3722 | *fpos += written; | ||
3723 | |||
3724 | out_unlock: | ||
3725 | if (nr_pages == 2) | ||
3726 | kunmap_atomic(page2); | ||
3727 | kunmap_atomic(page1); | ||
3728 | while (nr_pages > 0) | ||
3729 | put_page(pages[--nr_pages]); | ||
3730 | out: | ||
3642 | return written; | 3731 | return written; |
3643 | } | 3732 | } |
3644 | 3733 | ||
@@ -3739,6 +3828,12 @@ static const struct file_operations tracing_entries_fops = { | |||
3739 | .llseek = generic_file_llseek, | 3828 | .llseek = generic_file_llseek, |
3740 | }; | 3829 | }; |
3741 | 3830 | ||
3831 | static const struct file_operations tracing_total_entries_fops = { | ||
3832 | .open = tracing_open_generic, | ||
3833 | .read = tracing_total_entries_read, | ||
3834 | .llseek = generic_file_llseek, | ||
3835 | }; | ||
3836 | |||
3742 | static const struct file_operations tracing_free_buffer_fops = { | 3837 | static const struct file_operations tracing_free_buffer_fops = { |
3743 | .write = tracing_free_buffer_write, | 3838 | .write = tracing_free_buffer_write, |
3744 | .release = tracing_free_buffer_release, | 3839 | .release = tracing_free_buffer_release, |
@@ -3808,8 +3903,6 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, | |||
3808 | if (info->read < PAGE_SIZE) | 3903 | if (info->read < PAGE_SIZE) |
3809 | goto read; | 3904 | goto read; |
3810 | 3905 | ||
3811 | info->read = 0; | ||
3812 | |||
3813 | trace_access_lock(info->cpu); | 3906 | trace_access_lock(info->cpu); |
3814 | ret = ring_buffer_read_page(info->tr->buffer, | 3907 | ret = ring_buffer_read_page(info->tr->buffer, |
3815 | &info->spare, | 3908 | &info->spare, |
@@ -3819,6 +3912,8 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, | |||
3819 | if (ret < 0) | 3912 | if (ret < 0) |
3820 | return 0; | 3913 | return 0; |
3821 | 3914 | ||
3915 | info->read = 0; | ||
3916 | |||
3822 | read: | 3917 | read: |
3823 | size = PAGE_SIZE - info->read; | 3918 | size = PAGE_SIZE - info->read; |
3824 | if (size > count) | 3919 | if (size > count) |
@@ -4026,6 +4121,8 @@ tracing_stats_read(struct file *filp, char __user *ubuf, | |||
4026 | struct trace_array *tr = &global_trace; | 4121 | struct trace_array *tr = &global_trace; |
4027 | struct trace_seq *s; | 4122 | struct trace_seq *s; |
4028 | unsigned long cnt; | 4123 | unsigned long cnt; |
4124 | unsigned long long t; | ||
4125 | unsigned long usec_rem; | ||
4029 | 4126 | ||
4030 | s = kmalloc(sizeof(*s), GFP_KERNEL); | 4127 | s = kmalloc(sizeof(*s), GFP_KERNEL); |
4031 | if (!s) | 4128 | if (!s) |
@@ -4042,6 +4139,17 @@ tracing_stats_read(struct file *filp, char __user *ubuf, | |||
4042 | cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu); | 4139 | cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu); |
4043 | trace_seq_printf(s, "commit overrun: %ld\n", cnt); | 4140 | trace_seq_printf(s, "commit overrun: %ld\n", cnt); |
4044 | 4141 | ||
4142 | cnt = ring_buffer_bytes_cpu(tr->buffer, cpu); | ||
4143 | trace_seq_printf(s, "bytes: %ld\n", cnt); | ||
4144 | |||
4145 | t = ns2usecs(ring_buffer_oldest_event_ts(tr->buffer, cpu)); | ||
4146 | usec_rem = do_div(t, USEC_PER_SEC); | ||
4147 | trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n", t, usec_rem); | ||
4148 | |||
4149 | t = ns2usecs(ring_buffer_time_stamp(tr->buffer, cpu)); | ||
4150 | usec_rem = do_div(t, USEC_PER_SEC); | ||
4151 | trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem); | ||
4152 | |||
4045 | count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); | 4153 | count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); |
4046 | 4154 | ||
4047 | kfree(s); | 4155 | kfree(s); |
@@ -4450,6 +4558,9 @@ static __init int tracer_init_debugfs(void) | |||
4450 | trace_create_file("buffer_size_kb", 0644, d_tracer, | 4558 | trace_create_file("buffer_size_kb", 0644, d_tracer, |
4451 | &global_trace, &tracing_entries_fops); | 4559 | &global_trace, &tracing_entries_fops); |
4452 | 4560 | ||
4561 | trace_create_file("buffer_total_size_kb", 0444, d_tracer, | ||
4562 | &global_trace, &tracing_total_entries_fops); | ||
4563 | |||
4453 | trace_create_file("free_buffer", 0644, d_tracer, | 4564 | trace_create_file("free_buffer", 0644, d_tracer, |
4454 | &global_trace, &tracing_free_buffer_fops); | 4565 | &global_trace, &tracing_free_buffer_fops); |
4455 | 4566 | ||
@@ -4566,6 +4677,12 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) | |||
4566 | 4677 | ||
4567 | tracing_off(); | 4678 | tracing_off(); |
4568 | 4679 | ||
4680 | /* Did function tracer already get disabled? */ | ||
4681 | if (ftrace_is_dead()) { | ||
4682 | printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n"); | ||
4683 | printk("# MAY BE MISSING FUNCTION EVENTS\n"); | ||
4684 | } | ||
4685 | |||
4569 | if (disable_tracing) | 4686 | if (disable_tracing) |
4570 | ftrace_kill(); | 4687 | ftrace_kill(); |
4571 | 4688 | ||
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 616846bcfee..092e1f8d18d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -579,11 +579,13 @@ static inline int ftrace_trace_task(struct task_struct *task) | |||
579 | 579 | ||
580 | return test_tsk_trace_trace(task); | 580 | return test_tsk_trace_trace(task); |
581 | } | 581 | } |
582 | extern int ftrace_is_dead(void); | ||
582 | #else | 583 | #else |
583 | static inline int ftrace_trace_task(struct task_struct *task) | 584 | static inline int ftrace_trace_task(struct task_struct *task) |
584 | { | 585 | { |
585 | return 1; | 586 | return 1; |
586 | } | 587 | } |
588 | static inline int ftrace_is_dead(void) { return 0; } | ||
587 | #endif | 589 | #endif |
588 | 590 | ||
589 | /* | 591 | /* |
@@ -761,16 +763,10 @@ struct filter_pred { | |||
761 | filter_pred_fn_t fn; | 763 | filter_pred_fn_t fn; |
762 | u64 val; | 764 | u64 val; |
763 | struct regex regex; | 765 | struct regex regex; |
764 | /* | 766 | unsigned short *ops; |
765 | * Leaf nodes use field_name, ops is used by AND and OR | 767 | #ifdef CONFIG_FTRACE_STARTUP_TEST |
766 | * nodes. The field_name is always freed when freeing a pred. | 768 | struct ftrace_event_field *field; |
767 | * We can overload field_name for ops and have it freed | 769 | #endif |
768 | * as well. | ||
769 | */ | ||
770 | union { | ||
771 | char *field_name; | ||
772 | unsigned short *ops; | ||
773 | }; | ||
774 | int offset; | 770 | int offset; |
775 | int not; | 771 | int not; |
776 | int op; | 772 | int op; |
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 6302747a139..394783531cb 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c | |||
@@ -113,3 +113,15 @@ u64 notrace trace_clock_global(void) | |||
113 | 113 | ||
114 | return now; | 114 | return now; |
115 | } | 115 | } |
116 | |||
117 | static atomic64_t trace_counter; | ||
118 | |||
119 | /* | ||
120 | * trace_clock_counter(): simply an atomic counter. | ||
121 | * Use the trace_counter "counter" for cases where you do not care | ||
122 | * about timings, but are interested in strict ordering. | ||
123 | */ | ||
124 | u64 notrace trace_clock_counter(void) | ||
125 | { | ||
126 | return atomic64_add_return(1, &trace_counter); | ||
127 | } | ||
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 256764ecccd..816d3d07497 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c | |||
@@ -381,6 +381,63 @@ get_pred_parent(struct filter_pred *pred, struct filter_pred *preds, | |||
381 | return pred; | 381 | return pred; |
382 | } | 382 | } |
383 | 383 | ||
384 | enum walk_return { | ||
385 | WALK_PRED_ABORT, | ||
386 | WALK_PRED_PARENT, | ||
387 | WALK_PRED_DEFAULT, | ||
388 | }; | ||
389 | |||
390 | typedef int (*filter_pred_walkcb_t) (enum move_type move, | ||
391 | struct filter_pred *pred, | ||
392 | int *err, void *data); | ||
393 | |||
394 | static int walk_pred_tree(struct filter_pred *preds, | ||
395 | struct filter_pred *root, | ||
396 | filter_pred_walkcb_t cb, void *data) | ||
397 | { | ||
398 | struct filter_pred *pred = root; | ||
399 | enum move_type move = MOVE_DOWN; | ||
400 | int done = 0; | ||
401 | |||
402 | if (!preds) | ||
403 | return -EINVAL; | ||
404 | |||
405 | do { | ||
406 | int err = 0, ret; | ||
407 | |||
408 | ret = cb(move, pred, &err, data); | ||
409 | if (ret == WALK_PRED_ABORT) | ||
410 | return err; | ||
411 | if (ret == WALK_PRED_PARENT) | ||
412 | goto get_parent; | ||
413 | |||
414 | switch (move) { | ||
415 | case MOVE_DOWN: | ||
416 | if (pred->left != FILTER_PRED_INVALID) { | ||
417 | pred = &preds[pred->left]; | ||
418 | continue; | ||
419 | } | ||
420 | goto get_parent; | ||
421 | case MOVE_UP_FROM_LEFT: | ||
422 | pred = &preds[pred->right]; | ||
423 | move = MOVE_DOWN; | ||
424 | continue; | ||
425 | case MOVE_UP_FROM_RIGHT: | ||
426 | get_parent: | ||
427 | if (pred == root) | ||
428 | break; | ||
429 | pred = get_pred_parent(pred, preds, | ||
430 | pred->parent, | ||
431 | &move); | ||
432 | continue; | ||
433 | } | ||
434 | done = 1; | ||
435 | } while (!done); | ||
436 | |||
437 | /* We are fine. */ | ||
438 | return 0; | ||
439 | } | ||
440 | |||
384 | /* | 441 | /* |
385 | * A series of AND or ORs where found together. Instead of | 442 | * A series of AND or ORs where found together. Instead of |
386 | * climbing up and down the tree branches, an array of the | 443 | * climbing up and down the tree branches, an array of the |
@@ -410,99 +467,91 @@ static int process_ops(struct filter_pred *preds, | |||
410 | 467 | ||
411 | for (i = 0; i < op->val; i++) { | 468 | for (i = 0; i < op->val; i++) { |
412 | pred = &preds[op->ops[i]]; | 469 | pred = &preds[op->ops[i]]; |
413 | match = pred->fn(pred, rec); | 470 | if (!WARN_ON_ONCE(!pred->fn)) |
471 | match = pred->fn(pred, rec); | ||
414 | if (!!match == type) | 472 | if (!!match == type) |
415 | return match; | 473 | return match; |
416 | } | 474 | } |
417 | return match; | 475 | return match; |
418 | } | 476 | } |
419 | 477 | ||
478 | struct filter_match_preds_data { | ||
479 | struct filter_pred *preds; | ||
480 | int match; | ||
481 | void *rec; | ||
482 | }; | ||
483 | |||
484 | static int filter_match_preds_cb(enum move_type move, struct filter_pred *pred, | ||
485 | int *err, void *data) | ||
486 | { | ||
487 | struct filter_match_preds_data *d = data; | ||
488 | |||
489 | *err = 0; | ||
490 | switch (move) { | ||
491 | case MOVE_DOWN: | ||
492 | /* only AND and OR have children */ | ||
493 | if (pred->left != FILTER_PRED_INVALID) { | ||
494 | /* If ops is set, then it was folded. */ | ||
495 | if (!pred->ops) | ||
496 | return WALK_PRED_DEFAULT; | ||
497 | /* We can treat folded ops as a leaf node */ | ||
498 | d->match = process_ops(d->preds, pred, d->rec); | ||
499 | } else { | ||
500 | if (!WARN_ON_ONCE(!pred->fn)) | ||
501 | d->match = pred->fn(pred, d->rec); | ||
502 | } | ||
503 | |||
504 | return WALK_PRED_PARENT; | ||
505 | case MOVE_UP_FROM_LEFT: | ||
506 | /* | ||
507 | * Check for short circuits. | ||
508 | * | ||
509 | * Optimization: !!match == (pred->op == OP_OR) | ||
510 | * is the same as: | ||
511 | * if ((match && pred->op == OP_OR) || | ||
512 | * (!match && pred->op == OP_AND)) | ||
513 | */ | ||
514 | if (!!d->match == (pred->op == OP_OR)) | ||
515 | return WALK_PRED_PARENT; | ||
516 | break; | ||
517 | case MOVE_UP_FROM_RIGHT: | ||
518 | break; | ||
519 | } | ||
520 | |||
521 | return WALK_PRED_DEFAULT; | ||
522 | } | ||
523 | |||
420 | /* return 1 if event matches, 0 otherwise (discard) */ | 524 | /* return 1 if event matches, 0 otherwise (discard) */ |
421 | int filter_match_preds(struct event_filter *filter, void *rec) | 525 | int filter_match_preds(struct event_filter *filter, void *rec) |
422 | { | 526 | { |
423 | int match = -1; | ||
424 | enum move_type move = MOVE_DOWN; | ||
425 | struct filter_pred *preds; | 527 | struct filter_pred *preds; |
426 | struct filter_pred *pred; | ||
427 | struct filter_pred *root; | 528 | struct filter_pred *root; |
428 | int n_preds; | 529 | struct filter_match_preds_data data = { |
429 | int done = 0; | 530 | /* match is currently meaningless */ |
531 | .match = -1, | ||
532 | .rec = rec, | ||
533 | }; | ||
534 | int n_preds, ret; | ||
430 | 535 | ||
431 | /* no filter is considered a match */ | 536 | /* no filter is considered a match */ |
432 | if (!filter) | 537 | if (!filter) |
433 | return 1; | 538 | return 1; |
434 | 539 | ||
435 | n_preds = filter->n_preds; | 540 | n_preds = filter->n_preds; |
436 | |||
437 | if (!n_preds) | 541 | if (!n_preds) |
438 | return 1; | 542 | return 1; |
439 | 543 | ||
440 | /* | 544 | /* |
441 | * n_preds, root and filter->preds are protect with preemption disabled. | 545 | * n_preds, root and filter->preds are protect with preemption disabled. |
442 | */ | 546 | */ |
443 | preds = rcu_dereference_sched(filter->preds); | ||
444 | root = rcu_dereference_sched(filter->root); | 547 | root = rcu_dereference_sched(filter->root); |
445 | if (!root) | 548 | if (!root) |
446 | return 1; | 549 | return 1; |
447 | 550 | ||
448 | pred = root; | 551 | data.preds = preds = rcu_dereference_sched(filter->preds); |
449 | 552 | ret = walk_pred_tree(preds, root, filter_match_preds_cb, &data); | |
450 | /* match is currently meaningless */ | 553 | WARN_ON(ret); |
451 | match = -1; | 554 | return data.match; |
452 | |||
453 | do { | ||
454 | switch (move) { | ||
455 | case MOVE_DOWN: | ||
456 | /* only AND and OR have children */ | ||
457 | if (pred->left != FILTER_PRED_INVALID) { | ||
458 | /* If ops is set, then it was folded. */ | ||
459 | if (!pred->ops) { | ||
460 | /* keep going to down the left side */ | ||
461 | pred = &preds[pred->left]; | ||
462 | continue; | ||
463 | } | ||
464 | /* We can treat folded ops as a leaf node */ | ||
465 | match = process_ops(preds, pred, rec); | ||
466 | } else | ||
467 | match = pred->fn(pred, rec); | ||
468 | /* If this pred is the only pred */ | ||
469 | if (pred == root) | ||
470 | break; | ||
471 | pred = get_pred_parent(pred, preds, | ||
472 | pred->parent, &move); | ||
473 | continue; | ||
474 | case MOVE_UP_FROM_LEFT: | ||
475 | /* | ||
476 | * Check for short circuits. | ||
477 | * | ||
478 | * Optimization: !!match == (pred->op == OP_OR) | ||
479 | * is the same as: | ||
480 | * if ((match && pred->op == OP_OR) || | ||
481 | * (!match && pred->op == OP_AND)) | ||
482 | */ | ||
483 | if (!!match == (pred->op == OP_OR)) { | ||
484 | if (pred == root) | ||
485 | break; | ||
486 | pred = get_pred_parent(pred, preds, | ||
487 | pred->parent, &move); | ||
488 | continue; | ||
489 | } | ||
490 | /* now go down the right side of the tree. */ | ||
491 | pred = &preds[pred->right]; | ||
492 | move = MOVE_DOWN; | ||
493 | continue; | ||
494 | case MOVE_UP_FROM_RIGHT: | ||
495 | /* We finished this equation. */ | ||
496 | if (pred == root) | ||
497 | break; | ||
498 | pred = get_pred_parent(pred, preds, | ||
499 | pred->parent, &move); | ||
500 | continue; | ||
501 | } | ||
502 | done = 1; | ||
503 | } while (!done); | ||
504 | |||
505 | return match; | ||
506 | } | 555 | } |
507 | EXPORT_SYMBOL_GPL(filter_match_preds); | 556 | EXPORT_SYMBOL_GPL(filter_match_preds); |
508 | 557 | ||
@@ -628,22 +677,6 @@ find_event_field(struct ftrace_event_call *call, char *name) | |||
628 | return __find_event_field(head, name); | 677 | return __find_event_field(head, name); |
629 | } | 678 | } |
630 | 679 | ||
631 | static void filter_free_pred(struct filter_pred *pred) | ||
632 | { | ||
633 | if (!pred) | ||
634 | return; | ||
635 | |||
636 | kfree(pred->field_name); | ||
637 | kfree(pred); | ||
638 | } | ||
639 | |||
640 | static void filter_clear_pred(struct filter_pred *pred) | ||
641 | { | ||
642 | kfree(pred->field_name); | ||
643 | pred->field_name = NULL; | ||
644 | pred->regex.len = 0; | ||
645 | } | ||
646 | |||
647 | static int __alloc_pred_stack(struct pred_stack *stack, int n_preds) | 680 | static int __alloc_pred_stack(struct pred_stack *stack, int n_preds) |
648 | { | 681 | { |
649 | stack->preds = kzalloc(sizeof(*stack->preds)*(n_preds + 1), GFP_KERNEL); | 682 | stack->preds = kzalloc(sizeof(*stack->preds)*(n_preds + 1), GFP_KERNEL); |
@@ -689,20 +722,13 @@ __pop_pred_stack(struct pred_stack *stack) | |||
689 | static int filter_set_pred(struct event_filter *filter, | 722 | static int filter_set_pred(struct event_filter *filter, |
690 | int idx, | 723 | int idx, |
691 | struct pred_stack *stack, | 724 | struct pred_stack *stack, |
692 | struct filter_pred *src, | 725 | struct filter_pred *src) |
693 | filter_pred_fn_t fn) | ||
694 | { | 726 | { |
695 | struct filter_pred *dest = &filter->preds[idx]; | 727 | struct filter_pred *dest = &filter->preds[idx]; |
696 | struct filter_pred *left; | 728 | struct filter_pred *left; |
697 | struct filter_pred *right; | 729 | struct filter_pred *right; |
698 | 730 | ||
699 | *dest = *src; | 731 | *dest = *src; |
700 | if (src->field_name) { | ||
701 | dest->field_name = kstrdup(src->field_name, GFP_KERNEL); | ||
702 | if (!dest->field_name) | ||
703 | return -ENOMEM; | ||
704 | } | ||
705 | dest->fn = fn; | ||
706 | dest->index = idx; | 732 | dest->index = idx; |
707 | 733 | ||
708 | if (dest->op == OP_OR || dest->op == OP_AND) { | 734 | if (dest->op == OP_OR || dest->op == OP_AND) { |
@@ -743,11 +769,7 @@ static int filter_set_pred(struct event_filter *filter, | |||
743 | 769 | ||
744 | static void __free_preds(struct event_filter *filter) | 770 | static void __free_preds(struct event_filter *filter) |
745 | { | 771 | { |
746 | int i; | ||
747 | |||
748 | if (filter->preds) { | 772 | if (filter->preds) { |
749 | for (i = 0; i < filter->a_preds; i++) | ||
750 | kfree(filter->preds[i].field_name); | ||
751 | kfree(filter->preds); | 773 | kfree(filter->preds); |
752 | filter->preds = NULL; | 774 | filter->preds = NULL; |
753 | } | 775 | } |
@@ -840,23 +862,19 @@ static void filter_free_subsystem_filters(struct event_subsystem *system) | |||
840 | } | 862 | } |
841 | } | 863 | } |
842 | 864 | ||
843 | static int filter_add_pred_fn(struct filter_parse_state *ps, | 865 | static int filter_add_pred(struct filter_parse_state *ps, |
844 | struct ftrace_event_call *call, | 866 | struct event_filter *filter, |
845 | struct event_filter *filter, | 867 | struct filter_pred *pred, |
846 | struct filter_pred *pred, | 868 | struct pred_stack *stack) |
847 | struct pred_stack *stack, | ||
848 | filter_pred_fn_t fn) | ||
849 | { | 869 | { |
850 | int idx, err; | 870 | int err; |
851 | 871 | ||
852 | if (WARN_ON(filter->n_preds == filter->a_preds)) { | 872 | if (WARN_ON(filter->n_preds == filter->a_preds)) { |
853 | parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); | 873 | parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); |
854 | return -ENOSPC; | 874 | return -ENOSPC; |
855 | } | 875 | } |
856 | 876 | ||
857 | idx = filter->n_preds; | 877 | err = filter_set_pred(filter, filter->n_preds, stack, pred); |
858 | filter_clear_pred(&filter->preds[idx]); | ||
859 | err = filter_set_pred(filter, idx, stack, pred, fn); | ||
860 | if (err) | 878 | if (err) |
861 | return err; | 879 | return err; |
862 | 880 | ||
@@ -937,31 +955,15 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size, | |||
937 | return fn; | 955 | return fn; |
938 | } | 956 | } |
939 | 957 | ||
940 | static int filter_add_pred(struct filter_parse_state *ps, | 958 | static int init_pred(struct filter_parse_state *ps, |
941 | struct ftrace_event_call *call, | 959 | struct ftrace_event_field *field, |
942 | struct event_filter *filter, | 960 | struct filter_pred *pred) |
943 | struct filter_pred *pred, | 961 | |
944 | struct pred_stack *stack, | ||
945 | bool dry_run) | ||
946 | { | 962 | { |
947 | struct ftrace_event_field *field; | 963 | filter_pred_fn_t fn = filter_pred_none; |
948 | filter_pred_fn_t fn; | ||
949 | unsigned long long val; | 964 | unsigned long long val; |
950 | int ret; | 965 | int ret; |
951 | 966 | ||
952 | fn = pred->fn = filter_pred_none; | ||
953 | |||
954 | if (pred->op == OP_AND) | ||
955 | goto add_pred_fn; | ||
956 | else if (pred->op == OP_OR) | ||
957 | goto add_pred_fn; | ||
958 | |||
959 | field = find_event_field(call, pred->field_name); | ||
960 | if (!field) { | ||
961 | parse_error(ps, FILT_ERR_FIELD_NOT_FOUND, 0); | ||
962 | return -EINVAL; | ||
963 | } | ||
964 | |||
965 | pred->offset = field->offset; | 967 | pred->offset = field->offset; |
966 | 968 | ||
967 | if (!is_legal_op(field, pred->op)) { | 969 | if (!is_legal_op(field, pred->op)) { |
@@ -1001,9 +1003,7 @@ static int filter_add_pred(struct filter_parse_state *ps, | |||
1001 | if (pred->op == OP_NE) | 1003 | if (pred->op == OP_NE) |
1002 | pred->not = 1; | 1004 | pred->not = 1; |
1003 | 1005 | ||
1004 | add_pred_fn: | 1006 | pred->fn = fn; |
1005 | if (!dry_run) | ||
1006 | return filter_add_pred_fn(ps, call, filter, pred, stack, fn); | ||
1007 | return 0; | 1007 | return 0; |
1008 | } | 1008 | } |
1009 | 1009 | ||
@@ -1302,39 +1302,37 @@ parse_operand: | |||
1302 | return 0; | 1302 | return 0; |
1303 | } | 1303 | } |
1304 | 1304 | ||
1305 | static struct filter_pred *create_pred(int op, char *operand1, char *operand2) | 1305 | static struct filter_pred *create_pred(struct filter_parse_state *ps, |
1306 | struct ftrace_event_call *call, | ||
1307 | int op, char *operand1, char *operand2) | ||
1306 | { | 1308 | { |
1307 | struct filter_pred *pred; | 1309 | struct ftrace_event_field *field; |
1310 | static struct filter_pred pred; | ||
1308 | 1311 | ||
1309 | pred = kzalloc(sizeof(*pred), GFP_KERNEL); | 1312 | memset(&pred, 0, sizeof(pred)); |
1310 | if (!pred) | 1313 | pred.op = op; |
1311 | return NULL; | ||
1312 | 1314 | ||
1313 | pred->field_name = kstrdup(operand1, GFP_KERNEL); | 1315 | if (op == OP_AND || op == OP_OR) |
1314 | if (!pred->field_name) { | 1316 | return &pred; |
1315 | kfree(pred); | 1317 | |
1318 | if (!operand1 || !operand2) { | ||
1319 | parse_error(ps, FILT_ERR_MISSING_FIELD, 0); | ||
1316 | return NULL; | 1320 | return NULL; |
1317 | } | 1321 | } |
1318 | 1322 | ||
1319 | strcpy(pred->regex.pattern, operand2); | 1323 | field = find_event_field(call, operand1); |
1320 | pred->regex.len = strlen(pred->regex.pattern); | 1324 | if (!field) { |
1321 | 1325 | parse_error(ps, FILT_ERR_FIELD_NOT_FOUND, 0); | |
1322 | pred->op = op; | ||
1323 | |||
1324 | return pred; | ||
1325 | } | ||
1326 | |||
1327 | static struct filter_pred *create_logical_pred(int op) | ||
1328 | { | ||
1329 | struct filter_pred *pred; | ||
1330 | |||
1331 | pred = kzalloc(sizeof(*pred), GFP_KERNEL); | ||
1332 | if (!pred) | ||
1333 | return NULL; | 1326 | return NULL; |
1327 | } | ||
1334 | 1328 | ||
1335 | pred->op = op; | 1329 | strcpy(pred.regex.pattern, operand2); |
1330 | pred.regex.len = strlen(pred.regex.pattern); | ||
1336 | 1331 | ||
1337 | return pred; | 1332 | #ifdef CONFIG_FTRACE_STARTUP_TEST |
1333 | pred.field = field; | ||
1334 | #endif | ||
1335 | return init_pred(ps, field, &pred) ? NULL : &pred; | ||
1338 | } | 1336 | } |
1339 | 1337 | ||
1340 | static int check_preds(struct filter_parse_state *ps) | 1338 | static int check_preds(struct filter_parse_state *ps) |
@@ -1375,6 +1373,23 @@ static int count_preds(struct filter_parse_state *ps) | |||
1375 | return n_preds; | 1373 | return n_preds; |
1376 | } | 1374 | } |
1377 | 1375 | ||
1376 | struct check_pred_data { | ||
1377 | int count; | ||
1378 | int max; | ||
1379 | }; | ||
1380 | |||
1381 | static int check_pred_tree_cb(enum move_type move, struct filter_pred *pred, | ||
1382 | int *err, void *data) | ||
1383 | { | ||
1384 | struct check_pred_data *d = data; | ||
1385 | |||
1386 | if (WARN_ON(d->count++ > d->max)) { | ||
1387 | *err = -EINVAL; | ||
1388 | return WALK_PRED_ABORT; | ||
1389 | } | ||
1390 | return WALK_PRED_DEFAULT; | ||
1391 | } | ||
1392 | |||
1378 | /* | 1393 | /* |
1379 | * The tree is walked at filtering of an event. If the tree is not correctly | 1394 | * The tree is walked at filtering of an event. If the tree is not correctly |
1380 | * built, it may cause an infinite loop. Check here that the tree does | 1395 | * built, it may cause an infinite loop. Check here that the tree does |
@@ -1383,107 +1398,76 @@ static int count_preds(struct filter_parse_state *ps) | |||
1383 | static int check_pred_tree(struct event_filter *filter, | 1398 | static int check_pred_tree(struct event_filter *filter, |
1384 | struct filter_pred *root) | 1399 | struct filter_pred *root) |
1385 | { | 1400 | { |
1386 | struct filter_pred *preds; | 1401 | struct check_pred_data data = { |
1387 | struct filter_pred *pred; | 1402 | /* |
1388 | enum move_type move = MOVE_DOWN; | 1403 | * The max that we can hit a node is three times. |
1389 | int count = 0; | 1404 | * Once going down, once coming up from left, and |
1390 | int done = 0; | 1405 | * once coming up from right. This is more than enough |
1391 | int max; | 1406 | * since leafs are only hit a single time. |
1392 | 1407 | */ | |
1393 | /* | 1408 | .max = 3 * filter->n_preds, |
1394 | * The max that we can hit a node is three times. | 1409 | .count = 0, |
1395 | * Once going down, once coming up from left, and | 1410 | }; |
1396 | * once coming up from right. This is more than enough | ||
1397 | * since leafs are only hit a single time. | ||
1398 | */ | ||
1399 | max = 3 * filter->n_preds; | ||
1400 | 1411 | ||
1401 | preds = filter->preds; | 1412 | return walk_pred_tree(filter->preds, root, |
1402 | if (!preds) | 1413 | check_pred_tree_cb, &data); |
1403 | return -EINVAL; | 1414 | } |
1404 | pred = root; | ||
1405 | 1415 | ||
1406 | do { | 1416 | static int count_leafs_cb(enum move_type move, struct filter_pred *pred, |
1407 | if (WARN_ON(count++ > max)) | 1417 | int *err, void *data) |
1408 | return -EINVAL; | 1418 | { |
1419 | int *count = data; | ||
1409 | 1420 | ||
1410 | switch (move) { | 1421 | if ((move == MOVE_DOWN) && |
1411 | case MOVE_DOWN: | 1422 | (pred->left == FILTER_PRED_INVALID)) |
1412 | if (pred->left != FILTER_PRED_INVALID) { | 1423 | (*count)++; |
1413 | pred = &preds[pred->left]; | ||
1414 | continue; | ||
1415 | } | ||
1416 | /* A leaf at the root is just a leaf in the tree */ | ||
1417 | if (pred == root) | ||
1418 | break; | ||
1419 | pred = get_pred_parent(pred, preds, | ||
1420 | pred->parent, &move); | ||
1421 | continue; | ||
1422 | case MOVE_UP_FROM_LEFT: | ||
1423 | pred = &preds[pred->right]; | ||
1424 | move = MOVE_DOWN; | ||
1425 | continue; | ||
1426 | case MOVE_UP_FROM_RIGHT: | ||
1427 | if (pred == root) | ||
1428 | break; | ||
1429 | pred = get_pred_parent(pred, preds, | ||
1430 | pred->parent, &move); | ||
1431 | continue; | ||
1432 | } | ||
1433 | done = 1; | ||
1434 | } while (!done); | ||
1435 | 1424 | ||
1436 | /* We are fine. */ | 1425 | return WALK_PRED_DEFAULT; |
1437 | return 0; | ||
1438 | } | 1426 | } |
1439 | 1427 | ||
1440 | static int count_leafs(struct filter_pred *preds, struct filter_pred *root) | 1428 | static int count_leafs(struct filter_pred *preds, struct filter_pred *root) |
1441 | { | 1429 | { |
1442 | struct filter_pred *pred; | 1430 | int count = 0, ret; |
1443 | enum move_type move = MOVE_DOWN; | ||
1444 | int count = 0; | ||
1445 | int done = 0; | ||
1446 | 1431 | ||
1447 | pred = root; | 1432 | ret = walk_pred_tree(preds, root, count_leafs_cb, &count); |
1433 | WARN_ON(ret); | ||
1434 | return count; | ||
1435 | } | ||
1448 | 1436 | ||
1449 | do { | 1437 | struct fold_pred_data { |
1450 | switch (move) { | 1438 | struct filter_pred *root; |
1451 | case MOVE_DOWN: | 1439 | int count; |
1452 | if (pred->left != FILTER_PRED_INVALID) { | 1440 | int children; |
1453 | pred = &preds[pred->left]; | 1441 | }; |
1454 | continue; | ||
1455 | } | ||
1456 | /* A leaf at the root is just a leaf in the tree */ | ||
1457 | if (pred == root) | ||
1458 | return 1; | ||
1459 | count++; | ||
1460 | pred = get_pred_parent(pred, preds, | ||
1461 | pred->parent, &move); | ||
1462 | continue; | ||
1463 | case MOVE_UP_FROM_LEFT: | ||
1464 | pred = &preds[pred->right]; | ||
1465 | move = MOVE_DOWN; | ||
1466 | continue; | ||
1467 | case MOVE_UP_FROM_RIGHT: | ||
1468 | if (pred == root) | ||
1469 | break; | ||
1470 | pred = get_pred_parent(pred, preds, | ||
1471 | pred->parent, &move); | ||
1472 | continue; | ||
1473 | } | ||
1474 | done = 1; | ||
1475 | } while (!done); | ||
1476 | 1442 | ||
1477 | return count; | 1443 | static int fold_pred_cb(enum move_type move, struct filter_pred *pred, |
1444 | int *err, void *data) | ||
1445 | { | ||
1446 | struct fold_pred_data *d = data; | ||
1447 | struct filter_pred *root = d->root; | ||
1448 | |||
1449 | if (move != MOVE_DOWN) | ||
1450 | return WALK_PRED_DEFAULT; | ||
1451 | if (pred->left != FILTER_PRED_INVALID) | ||
1452 | return WALK_PRED_DEFAULT; | ||
1453 | |||
1454 | if (WARN_ON(d->count == d->children)) { | ||
1455 | *err = -EINVAL; | ||
1456 | return WALK_PRED_ABORT; | ||
1457 | } | ||
1458 | |||
1459 | pred->index &= ~FILTER_PRED_FOLD; | ||
1460 | root->ops[d->count++] = pred->index; | ||
1461 | return WALK_PRED_DEFAULT; | ||
1478 | } | 1462 | } |
1479 | 1463 | ||
1480 | static int fold_pred(struct filter_pred *preds, struct filter_pred *root) | 1464 | static int fold_pred(struct filter_pred *preds, struct filter_pred *root) |
1481 | { | 1465 | { |
1482 | struct filter_pred *pred; | 1466 | struct fold_pred_data data = { |
1483 | enum move_type move = MOVE_DOWN; | 1467 | .root = root, |
1484 | int count = 0; | 1468 | .count = 0, |
1469 | }; | ||
1485 | int children; | 1470 | int children; |
1486 | int done = 0; | ||
1487 | 1471 | ||
1488 | /* No need to keep the fold flag */ | 1472 | /* No need to keep the fold flag */ |
1489 | root->index &= ~FILTER_PRED_FOLD; | 1473 | root->index &= ~FILTER_PRED_FOLD; |
@@ -1501,37 +1485,26 @@ static int fold_pred(struct filter_pred *preds, struct filter_pred *root) | |||
1501 | return -ENOMEM; | 1485 | return -ENOMEM; |
1502 | 1486 | ||
1503 | root->val = children; | 1487 | root->val = children; |
1488 | data.children = children; | ||
1489 | return walk_pred_tree(preds, root, fold_pred_cb, &data); | ||
1490 | } | ||
1504 | 1491 | ||
1505 | pred = root; | 1492 | static int fold_pred_tree_cb(enum move_type move, struct filter_pred *pred, |
1506 | do { | 1493 | int *err, void *data) |
1507 | switch (move) { | 1494 | { |
1508 | case MOVE_DOWN: | 1495 | struct filter_pred *preds = data; |
1509 | if (pred->left != FILTER_PRED_INVALID) { | ||
1510 | pred = &preds[pred->left]; | ||
1511 | continue; | ||
1512 | } | ||
1513 | if (WARN_ON(count == children)) | ||
1514 | return -EINVAL; | ||
1515 | pred->index &= ~FILTER_PRED_FOLD; | ||
1516 | root->ops[count++] = pred->index; | ||
1517 | pred = get_pred_parent(pred, preds, | ||
1518 | pred->parent, &move); | ||
1519 | continue; | ||
1520 | case MOVE_UP_FROM_LEFT: | ||
1521 | pred = &preds[pred->right]; | ||
1522 | move = MOVE_DOWN; | ||
1523 | continue; | ||
1524 | case MOVE_UP_FROM_RIGHT: | ||
1525 | if (pred == root) | ||
1526 | break; | ||
1527 | pred = get_pred_parent(pred, preds, | ||
1528 | pred->parent, &move); | ||
1529 | continue; | ||
1530 | } | ||
1531 | done = 1; | ||
1532 | } while (!done); | ||
1533 | 1496 | ||
1534 | return 0; | 1497 | if (move != MOVE_DOWN) |
1498 | return WALK_PRED_DEFAULT; | ||
1499 | if (!(pred->index & FILTER_PRED_FOLD)) | ||
1500 | return WALK_PRED_DEFAULT; | ||
1501 | |||
1502 | *err = fold_pred(preds, pred); | ||
1503 | if (*err) | ||
1504 | return WALK_PRED_ABORT; | ||
1505 | |||
1506 | /* eveyrhing below is folded, continue with parent */ | ||
1507 | return WALK_PRED_PARENT; | ||
1535 | } | 1508 | } |
1536 | 1509 | ||
1537 | /* | 1510 | /* |
@@ -1542,51 +1515,8 @@ static int fold_pred(struct filter_pred *preds, struct filter_pred *root) | |||
1542 | static int fold_pred_tree(struct event_filter *filter, | 1515 | static int fold_pred_tree(struct event_filter *filter, |
1543 | struct filter_pred *root) | 1516 | struct filter_pred *root) |
1544 | { | 1517 | { |
1545 | struct filter_pred *preds; | 1518 | return walk_pred_tree(filter->preds, root, fold_pred_tree_cb, |
1546 | struct filter_pred *pred; | 1519 | filter->preds); |
1547 | enum move_type move = MOVE_DOWN; | ||
1548 | int done = 0; | ||
1549 | int err; | ||
1550 | |||
1551 | preds = filter->preds; | ||
1552 | if (!preds) | ||
1553 | return -EINVAL; | ||
1554 | pred = root; | ||
1555 | |||
1556 | do { | ||
1557 | switch (move) { | ||
1558 | case MOVE_DOWN: | ||
1559 | if (pred->index & FILTER_PRED_FOLD) { | ||
1560 | err = fold_pred(preds, pred); | ||
1561 | if (err) | ||
1562 | return err; | ||
1563 | /* Folded nodes are like leafs */ | ||
1564 | } else if (pred->left != FILTER_PRED_INVALID) { | ||
1565 | pred = &preds[pred->left]; | ||
1566 | continue; | ||
1567 | } | ||
1568 | |||
1569 | /* A leaf at the root is just a leaf in the tree */ | ||
1570 | if (pred == root) | ||
1571 | break; | ||
1572 | pred = get_pred_parent(pred, preds, | ||
1573 | pred->parent, &move); | ||
1574 | continue; | ||
1575 | case MOVE_UP_FROM_LEFT: | ||
1576 | pred = &preds[pred->right]; | ||
1577 | move = MOVE_DOWN; | ||
1578 | continue; | ||
1579 | case MOVE_UP_FROM_RIGHT: | ||
1580 | if (pred == root) | ||
1581 | break; | ||
1582 | pred = get_pred_parent(pred, preds, | ||
1583 | pred->parent, &move); | ||
1584 | continue; | ||
1585 | } | ||
1586 | done = 1; | ||
1587 | } while (!done); | ||
1588 | |||
1589 | return 0; | ||
1590 | } | 1520 | } |
1591 | 1521 | ||
1592 | static int replace_preds(struct ftrace_event_call *call, | 1522 | static int replace_preds(struct ftrace_event_call *call, |
@@ -1643,27 +1573,17 @@ static int replace_preds(struct ftrace_event_call *call, | |||
1643 | goto fail; | 1573 | goto fail; |
1644 | } | 1574 | } |
1645 | 1575 | ||
1646 | if (elt->op == OP_AND || elt->op == OP_OR) { | 1576 | pred = create_pred(ps, call, elt->op, operand1, operand2); |
1647 | pred = create_logical_pred(elt->op); | 1577 | if (!pred) { |
1648 | goto add_pred; | ||
1649 | } | ||
1650 | |||
1651 | if (!operand1 || !operand2) { | ||
1652 | parse_error(ps, FILT_ERR_MISSING_FIELD, 0); | ||
1653 | err = -EINVAL; | 1578 | err = -EINVAL; |
1654 | goto fail; | 1579 | goto fail; |
1655 | } | 1580 | } |
1656 | 1581 | ||
1657 | pred = create_pred(elt->op, operand1, operand2); | 1582 | if (!dry_run) { |
1658 | add_pred: | 1583 | err = filter_add_pred(ps, filter, pred, &stack); |
1659 | if (!pred) { | 1584 | if (err) |
1660 | err = -ENOMEM; | 1585 | goto fail; |
1661 | goto fail; | ||
1662 | } | 1586 | } |
1663 | err = filter_add_pred(ps, call, filter, pred, &stack, dry_run); | ||
1664 | filter_free_pred(pred); | ||
1665 | if (err) | ||
1666 | goto fail; | ||
1667 | 1587 | ||
1668 | operand1 = operand2 = NULL; | 1588 | operand1 = operand2 = NULL; |
1669 | } | 1589 | } |
@@ -1958,17 +1878,14 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id, | |||
1958 | int err; | 1878 | int err; |
1959 | struct event_filter *filter; | 1879 | struct event_filter *filter; |
1960 | struct filter_parse_state *ps; | 1880 | struct filter_parse_state *ps; |
1961 | struct ftrace_event_call *call = NULL; | 1881 | struct ftrace_event_call *call; |
1962 | 1882 | ||
1963 | mutex_lock(&event_mutex); | 1883 | mutex_lock(&event_mutex); |
1964 | 1884 | ||
1965 | list_for_each_entry(call, &ftrace_events, list) { | 1885 | call = event->tp_event; |
1966 | if (call->event.type == event_id) | ||
1967 | break; | ||
1968 | } | ||
1969 | 1886 | ||
1970 | err = -EINVAL; | 1887 | err = -EINVAL; |
1971 | if (&call->list == &ftrace_events) | 1888 | if (!call) |
1972 | goto out_unlock; | 1889 | goto out_unlock; |
1973 | 1890 | ||
1974 | err = -EEXIST; | 1891 | err = -EEXIST; |
@@ -2012,3 +1929,215 @@ out_unlock: | |||
2012 | 1929 | ||
2013 | #endif /* CONFIG_PERF_EVENTS */ | 1930 | #endif /* CONFIG_PERF_EVENTS */ |
2014 | 1931 | ||
1932 | #ifdef CONFIG_FTRACE_STARTUP_TEST | ||
1933 | |||
1934 | #include <linux/types.h> | ||
1935 | #include <linux/tracepoint.h> | ||
1936 | |||
1937 | #define CREATE_TRACE_POINTS | ||
1938 | #include "trace_events_filter_test.h" | ||
1939 | |||
1940 | static int test_get_filter(char *filter_str, struct ftrace_event_call *call, | ||
1941 | struct event_filter **pfilter) | ||
1942 | { | ||
1943 | struct event_filter *filter; | ||
1944 | struct filter_parse_state *ps; | ||
1945 | int err = -ENOMEM; | ||
1946 | |||
1947 | filter = __alloc_filter(); | ||
1948 | if (!filter) | ||
1949 | goto out; | ||
1950 | |||
1951 | ps = kzalloc(sizeof(*ps), GFP_KERNEL); | ||
1952 | if (!ps) | ||
1953 | goto free_filter; | ||
1954 | |||
1955 | parse_init(ps, filter_ops, filter_str); | ||
1956 | err = filter_parse(ps); | ||
1957 | if (err) | ||
1958 | goto free_ps; | ||
1959 | |||
1960 | err = replace_preds(call, filter, ps, filter_str, false); | ||
1961 | if (!err) | ||
1962 | *pfilter = filter; | ||
1963 | |||
1964 | free_ps: | ||
1965 | filter_opstack_clear(ps); | ||
1966 | postfix_clear(ps); | ||
1967 | kfree(ps); | ||
1968 | |||
1969 | free_filter: | ||
1970 | if (err) | ||
1971 | __free_filter(filter); | ||
1972 | |||
1973 | out: | ||
1974 | return err; | ||
1975 | } | ||
1976 | |||
1977 | #define DATA_REC(m, va, vb, vc, vd, ve, vf, vg, vh, nvisit) \ | ||
1978 | { \ | ||
1979 | .filter = FILTER, \ | ||
1980 | .rec = { .a = va, .b = vb, .c = vc, .d = vd, \ | ||
1981 | .e = ve, .f = vf, .g = vg, .h = vh }, \ | ||
1982 | .match = m, \ | ||
1983 | .not_visited = nvisit, \ | ||
1984 | } | ||
1985 | #define YES 1 | ||
1986 | #define NO 0 | ||
1987 | |||
1988 | static struct test_filter_data_t { | ||
1989 | char *filter; | ||
1990 | struct ftrace_raw_ftrace_test_filter rec; | ||
1991 | int match; | ||
1992 | char *not_visited; | ||
1993 | } test_filter_data[] = { | ||
1994 | #define FILTER "a == 1 && b == 1 && c == 1 && d == 1 && " \ | ||
1995 | "e == 1 && f == 1 && g == 1 && h == 1" | ||
1996 | DATA_REC(YES, 1, 1, 1, 1, 1, 1, 1, 1, ""), | ||
1997 | DATA_REC(NO, 0, 1, 1, 1, 1, 1, 1, 1, "bcdefgh"), | ||
1998 | DATA_REC(NO, 1, 1, 1, 1, 1, 1, 1, 0, ""), | ||
1999 | #undef FILTER | ||
2000 | #define FILTER "a == 1 || b == 1 || c == 1 || d == 1 || " \ | ||
2001 | "e == 1 || f == 1 || g == 1 || h == 1" | ||
2002 | DATA_REC(NO, 0, 0, 0, 0, 0, 0, 0, 0, ""), | ||
2003 | DATA_REC(YES, 0, 0, 0, 0, 0, 0, 0, 1, ""), | ||
2004 | DATA_REC(YES, 1, 0, 0, 0, 0, 0, 0, 0, "bcdefgh"), | ||
2005 | #undef FILTER | ||
2006 | #define FILTER "(a == 1 || b == 1) && (c == 1 || d == 1) && " \ | ||
2007 | "(e == 1 || f == 1) && (g == 1 || h == 1)" | ||
2008 | DATA_REC(NO, 0, 0, 1, 1, 1, 1, 1, 1, "dfh"), | ||
2009 | DATA_REC(YES, 0, 1, 0, 1, 0, 1, 0, 1, ""), | ||
2010 | DATA_REC(YES, 1, 0, 1, 0, 0, 1, 0, 1, "bd"), | ||
2011 | DATA_REC(NO, 1, 0, 1, 0, 0, 1, 0, 0, "bd"), | ||
2012 | #undef FILTER | ||
2013 | #define FILTER "(a == 1 && b == 1) || (c == 1 && d == 1) || " \ | ||
2014 | "(e == 1 && f == 1) || (g == 1 && h == 1)" | ||
2015 | DATA_REC(YES, 1, 0, 1, 1, 1, 1, 1, 1, "efgh"), | ||
2016 | DATA_REC(YES, 0, 0, 0, 0, 0, 0, 1, 1, ""), | ||
2017 | DATA_REC(NO, 0, 0, 0, 0, 0, 0, 0, 1, ""), | ||
2018 | #undef FILTER | ||
2019 | #define FILTER "(a == 1 && b == 1) && (c == 1 && d == 1) && " \ | ||
2020 | "(e == 1 && f == 1) || (g == 1 && h == 1)" | ||
2021 | DATA_REC(YES, 1, 1, 1, 1, 1, 1, 0, 0, "gh"), | ||
2022 | DATA_REC(NO, 0, 0, 0, 0, 0, 0, 0, 1, ""), | ||
2023 | DATA_REC(YES, 1, 1, 1, 1, 1, 0, 1, 1, ""), | ||
2024 | #undef FILTER | ||
2025 | #define FILTER "((a == 1 || b == 1) || (c == 1 || d == 1) || " \ | ||
2026 | "(e == 1 || f == 1)) && (g == 1 || h == 1)" | ||
2027 | DATA_REC(YES, 1, 1, 1, 1, 1, 1, 0, 1, "bcdef"), | ||
2028 | DATA_REC(NO, 0, 0, 0, 0, 0, 0, 0, 0, ""), | ||
2029 | DATA_REC(YES, 1, 1, 1, 1, 1, 0, 1, 1, "h"), | ||
2030 | #undef FILTER | ||
2031 | #define FILTER "((((((((a == 1) && (b == 1)) || (c == 1)) && (d == 1)) || " \ | ||
2032 | "(e == 1)) && (f == 1)) || (g == 1)) && (h == 1))" | ||
2033 | DATA_REC(YES, 1, 1, 1, 1, 1, 1, 1, 1, "ceg"), | ||
2034 | DATA_REC(NO, 0, 1, 0, 1, 0, 1, 0, 1, ""), | ||
2035 | DATA_REC(NO, 1, 0, 1, 0, 1, 0, 1, 0, ""), | ||
2036 | #undef FILTER | ||
2037 | #define FILTER "((((((((a == 1) || (b == 1)) && (c == 1)) || (d == 1)) && " \ | ||
2038 | "(e == 1)) || (f == 1)) && (g == 1)) || (h == 1))" | ||
2039 | DATA_REC(YES, 1, 1, 1, 1, 1, 1, 1, 1, "bdfh"), | ||
2040 | DATA_REC(YES, 0, 1, 0, 1, 0, 1, 0, 1, ""), | ||
2041 | DATA_REC(YES, 1, 0, 1, 0, 1, 0, 1, 0, "bdfh"), | ||
2042 | }; | ||
2043 | |||
2044 | #undef DATA_REC | ||
2045 | #undef FILTER | ||
2046 | #undef YES | ||
2047 | #undef NO | ||
2048 | |||
2049 | #define DATA_CNT (sizeof(test_filter_data)/sizeof(struct test_filter_data_t)) | ||
2050 | |||
2051 | static int test_pred_visited; | ||
2052 | |||
2053 | static int test_pred_visited_fn(struct filter_pred *pred, void *event) | ||
2054 | { | ||
2055 | struct ftrace_event_field *field = pred->field; | ||
2056 | |||
2057 | test_pred_visited = 1; | ||
2058 | printk(KERN_INFO "\npred visited %s\n", field->name); | ||
2059 | return 1; | ||
2060 | } | ||
2061 | |||
2062 | static int test_walk_pred_cb(enum move_type move, struct filter_pred *pred, | ||
2063 | int *err, void *data) | ||
2064 | { | ||
2065 | char *fields = data; | ||
2066 | |||
2067 | if ((move == MOVE_DOWN) && | ||
2068 | (pred->left == FILTER_PRED_INVALID)) { | ||
2069 | struct ftrace_event_field *field = pred->field; | ||
2070 | |||
2071 | if (!field) { | ||
2072 | WARN(1, "all leafs should have field defined"); | ||
2073 | return WALK_PRED_DEFAULT; | ||
2074 | } | ||
2075 | if (!strchr(fields, *field->name)) | ||
2076 | return WALK_PRED_DEFAULT; | ||
2077 | |||
2078 | WARN_ON(!pred->fn); | ||
2079 | pred->fn = test_pred_visited_fn; | ||
2080 | } | ||
2081 | return WALK_PRED_DEFAULT; | ||
2082 | } | ||
2083 | |||
2084 | static __init int ftrace_test_event_filter(void) | ||
2085 | { | ||
2086 | int i; | ||
2087 | |||
2088 | printk(KERN_INFO "Testing ftrace filter: "); | ||
2089 | |||
2090 | for (i = 0; i < DATA_CNT; i++) { | ||
2091 | struct event_filter *filter = NULL; | ||
2092 | struct test_filter_data_t *d = &test_filter_data[i]; | ||
2093 | int err; | ||
2094 | |||
2095 | err = test_get_filter(d->filter, &event_ftrace_test_filter, | ||
2096 | &filter); | ||
2097 | if (err) { | ||
2098 | printk(KERN_INFO | ||
2099 | "Failed to get filter for '%s', err %d\n", | ||
2100 | d->filter, err); | ||
2101 | break; | ||
2102 | } | ||
2103 | |||
2104 | /* | ||
2105 | * The preemption disabling is not really needed for self | ||
2106 | * tests, but the rcu dereference will complain without it. | ||
2107 | */ | ||
2108 | preempt_disable(); | ||
2109 | if (*d->not_visited) | ||
2110 | walk_pred_tree(filter->preds, filter->root, | ||
2111 | test_walk_pred_cb, | ||
2112 | d->not_visited); | ||
2113 | |||
2114 | test_pred_visited = 0; | ||
2115 | err = filter_match_preds(filter, &d->rec); | ||
2116 | preempt_enable(); | ||
2117 | |||
2118 | __free_filter(filter); | ||
2119 | |||
2120 | if (test_pred_visited) { | ||
2121 | printk(KERN_INFO | ||
2122 | "Failed, unwanted pred visited for filter %s\n", | ||
2123 | d->filter); | ||
2124 | break; | ||
2125 | } | ||
2126 | |||
2127 | if (err != d->match) { | ||
2128 | printk(KERN_INFO | ||
2129 | "Failed to match filter '%s', expected %d\n", | ||
2130 | d->filter, d->match); | ||
2131 | break; | ||
2132 | } | ||
2133 | } | ||
2134 | |||
2135 | if (i == DATA_CNT) | ||
2136 | printk(KERN_CONT "OK\n"); | ||
2137 | |||
2138 | return 0; | ||
2139 | } | ||
2140 | |||
2141 | late_initcall(ftrace_test_event_filter); | ||
2142 | |||
2143 | #endif /* CONFIG_FTRACE_STARTUP_TEST */ | ||
diff --git a/kernel/trace/trace_events_filter_test.h b/kernel/trace/trace_events_filter_test.h new file mode 100644 index 00000000000..bfd4dba0d60 --- /dev/null +++ b/kernel/trace/trace_events_filter_test.h | |||
@@ -0,0 +1,50 @@ | |||
1 | #undef TRACE_SYSTEM | ||
2 | #define TRACE_SYSTEM test | ||
3 | |||
4 | #if !defined(_TRACE_TEST_H) || defined(TRACE_HEADER_MULTI_READ) | ||
5 | #define _TRACE_TEST_H | ||
6 | |||
7 | #include <linux/tracepoint.h> | ||
8 | |||
9 | TRACE_EVENT(ftrace_test_filter, | ||
10 | |||
11 | TP_PROTO(int a, int b, int c, int d, int e, int f, int g, int h), | ||
12 | |||
13 | TP_ARGS(a, b, c, d, e, f, g, h), | ||
14 | |||
15 | TP_STRUCT__entry( | ||
16 | __field(int, a) | ||
17 | __field(int, b) | ||
18 | __field(int, c) | ||
19 | __field(int, d) | ||
20 | __field(int, e) | ||
21 | __field(int, f) | ||
22 | __field(int, g) | ||
23 | __field(int, h) | ||
24 | ), | ||
25 | |||
26 | TP_fast_assign( | ||
27 | __entry->a = a; | ||
28 | __entry->b = b; | ||
29 | __entry->c = c; | ||
30 | __entry->d = d; | ||
31 | __entry->e = e; | ||
32 | __entry->f = f; | ||
33 | __entry->g = g; | ||
34 | __entry->h = h; | ||
35 | ), | ||
36 | |||
37 | TP_printk("a %d, b %d, c %d, d %d, e %d, f %d, g %d, h %d", | ||
38 | __entry->a, __entry->b, __entry->c, __entry->d, | ||
39 | __entry->e, __entry->f, __entry->g, __entry->h) | ||
40 | ); | ||
41 | |||
42 | #endif /* _TRACE_TEST_H || TRACE_HEADER_MULTI_READ */ | ||
43 | |||
44 | #undef TRACE_INCLUDE_PATH | ||
45 | #undef TRACE_INCLUDE_FILE | ||
46 | #define TRACE_INCLUDE_PATH . | ||
47 | #define TRACE_INCLUDE_FILE trace_events_filter_test | ||
48 | |||
49 | /* This part must be outside protection */ | ||
50 | #include <trace/define_trace.h> | ||
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 667aa8cc0cf..20dad0d7a16 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c | |||
@@ -23,7 +23,7 @@ static int tracer_enabled __read_mostly; | |||
23 | 23 | ||
24 | static DEFINE_PER_CPU(int, tracing_cpu); | 24 | static DEFINE_PER_CPU(int, tracing_cpu); |
25 | 25 | ||
26 | static DEFINE_SPINLOCK(max_trace_lock); | 26 | static DEFINE_RAW_SPINLOCK(max_trace_lock); |
27 | 27 | ||
28 | enum { | 28 | enum { |
29 | TRACER_IRQS_OFF = (1 << 1), | 29 | TRACER_IRQS_OFF = (1 << 1), |
@@ -321,7 +321,7 @@ check_critical_timing(struct trace_array *tr, | |||
321 | if (!report_latency(delta)) | 321 | if (!report_latency(delta)) |
322 | goto out; | 322 | goto out; |
323 | 323 | ||
324 | spin_lock_irqsave(&max_trace_lock, flags); | 324 | raw_spin_lock_irqsave(&max_trace_lock, flags); |
325 | 325 | ||
326 | /* check if we are still the max latency */ | 326 | /* check if we are still the max latency */ |
327 | if (!report_latency(delta)) | 327 | if (!report_latency(delta)) |
@@ -344,7 +344,7 @@ check_critical_timing(struct trace_array *tr, | |||
344 | max_sequence++; | 344 | max_sequence++; |
345 | 345 | ||
346 | out_unlock: | 346 | out_unlock: |
347 | spin_unlock_irqrestore(&max_trace_lock, flags); | 347 | raw_spin_unlock_irqrestore(&max_trace_lock, flags); |
348 | 348 | ||
349 | out: | 349 | out: |
350 | data->critical_sequence = max_sequence; | 350 | data->critical_sequence = max_sequence; |
@@ -505,13 +505,13 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller); | |||
505 | #ifdef CONFIG_PREEMPT_TRACER | 505 | #ifdef CONFIG_PREEMPT_TRACER |
506 | void trace_preempt_on(unsigned long a0, unsigned long a1) | 506 | void trace_preempt_on(unsigned long a0, unsigned long a1) |
507 | { | 507 | { |
508 | if (preempt_trace()) | 508 | if (preempt_trace() && !irq_trace()) |
509 | stop_critical_timing(a0, a1); | 509 | stop_critical_timing(a0, a1); |
510 | } | 510 | } |
511 | 511 | ||
512 | void trace_preempt_off(unsigned long a0, unsigned long a1) | 512 | void trace_preempt_off(unsigned long a0, unsigned long a1) |
513 | { | 513 | { |
514 | if (preempt_trace()) | 514 | if (preempt_trace() && !irq_trace()) |
515 | start_critical_timing(a0, a1); | 515 | start_critical_timing(a0, a1); |
516 | } | 516 | } |
517 | #endif /* CONFIG_PREEMPT_TRACER */ | 517 | #endif /* CONFIG_PREEMPT_TRACER */ |
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 5fb3697bf0e..00d527c945a 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
@@ -836,11 +836,17 @@ static void __unregister_trace_probe(struct trace_probe *tp) | |||
836 | } | 836 | } |
837 | 837 | ||
838 | /* Unregister a trace_probe and probe_event: call with locking probe_lock */ | 838 | /* Unregister a trace_probe and probe_event: call with locking probe_lock */ |
839 | static void unregister_trace_probe(struct trace_probe *tp) | 839 | static int unregister_trace_probe(struct trace_probe *tp) |
840 | { | 840 | { |
841 | /* Enabled event can not be unregistered */ | ||
842 | if (trace_probe_is_enabled(tp)) | ||
843 | return -EBUSY; | ||
844 | |||
841 | __unregister_trace_probe(tp); | 845 | __unregister_trace_probe(tp); |
842 | list_del(&tp->list); | 846 | list_del(&tp->list); |
843 | unregister_probe_event(tp); | 847 | unregister_probe_event(tp); |
848 | |||
849 | return 0; | ||
844 | } | 850 | } |
845 | 851 | ||
846 | /* Register a trace_probe and probe_event */ | 852 | /* Register a trace_probe and probe_event */ |
@@ -854,7 +860,9 @@ static int register_trace_probe(struct trace_probe *tp) | |||
854 | /* Delete old (same name) event if exist */ | 860 | /* Delete old (same name) event if exist */ |
855 | old_tp = find_trace_probe(tp->call.name, tp->call.class->system); | 861 | old_tp = find_trace_probe(tp->call.name, tp->call.class->system); |
856 | if (old_tp) { | 862 | if (old_tp) { |
857 | unregister_trace_probe(old_tp); | 863 | ret = unregister_trace_probe(old_tp); |
864 | if (ret < 0) | ||
865 | goto end; | ||
858 | free_trace_probe(old_tp); | 866 | free_trace_probe(old_tp); |
859 | } | 867 | } |
860 | 868 | ||
@@ -892,6 +900,7 @@ static int trace_probe_module_callback(struct notifier_block *nb, | |||
892 | mutex_lock(&probe_lock); | 900 | mutex_lock(&probe_lock); |
893 | list_for_each_entry(tp, &probe_list, list) { | 901 | list_for_each_entry(tp, &probe_list, list) { |
894 | if (trace_probe_within_module(tp, mod)) { | 902 | if (trace_probe_within_module(tp, mod)) { |
903 | /* Don't need to check busy - this should have gone. */ | ||
895 | __unregister_trace_probe(tp); | 904 | __unregister_trace_probe(tp); |
896 | ret = __register_trace_probe(tp); | 905 | ret = __register_trace_probe(tp); |
897 | if (ret) | 906 | if (ret) |
@@ -1205,10 +1214,11 @@ static int create_trace_probe(int argc, char **argv) | |||
1205 | return -ENOENT; | 1214 | return -ENOENT; |
1206 | } | 1215 | } |
1207 | /* delete an event */ | 1216 | /* delete an event */ |
1208 | unregister_trace_probe(tp); | 1217 | ret = unregister_trace_probe(tp); |
1209 | free_trace_probe(tp); | 1218 | if (ret == 0) |
1219 | free_trace_probe(tp); | ||
1210 | mutex_unlock(&probe_lock); | 1220 | mutex_unlock(&probe_lock); |
1211 | return 0; | 1221 | return ret; |
1212 | } | 1222 | } |
1213 | 1223 | ||
1214 | if (argc < 2) { | 1224 | if (argc < 2) { |
@@ -1317,18 +1327,29 @@ error: | |||
1317 | return ret; | 1327 | return ret; |
1318 | } | 1328 | } |
1319 | 1329 | ||
1320 | static void release_all_trace_probes(void) | 1330 | static int release_all_trace_probes(void) |
1321 | { | 1331 | { |
1322 | struct trace_probe *tp; | 1332 | struct trace_probe *tp; |
1333 | int ret = 0; | ||
1323 | 1334 | ||
1324 | mutex_lock(&probe_lock); | 1335 | mutex_lock(&probe_lock); |
1336 | /* Ensure no probe is in use. */ | ||
1337 | list_for_each_entry(tp, &probe_list, list) | ||
1338 | if (trace_probe_is_enabled(tp)) { | ||
1339 | ret = -EBUSY; | ||
1340 | goto end; | ||
1341 | } | ||
1325 | /* TODO: Use batch unregistration */ | 1342 | /* TODO: Use batch unregistration */ |
1326 | while (!list_empty(&probe_list)) { | 1343 | while (!list_empty(&probe_list)) { |
1327 | tp = list_entry(probe_list.next, struct trace_probe, list); | 1344 | tp = list_entry(probe_list.next, struct trace_probe, list); |
1328 | unregister_trace_probe(tp); | 1345 | unregister_trace_probe(tp); |
1329 | free_trace_probe(tp); | 1346 | free_trace_probe(tp); |
1330 | } | 1347 | } |
1348 | |||
1349 | end: | ||
1331 | mutex_unlock(&probe_lock); | 1350 | mutex_unlock(&probe_lock); |
1351 | |||
1352 | return ret; | ||
1332 | } | 1353 | } |
1333 | 1354 | ||
1334 | /* Probes listing interfaces */ | 1355 | /* Probes listing interfaces */ |
@@ -1380,9 +1401,13 @@ static const struct seq_operations probes_seq_op = { | |||
1380 | 1401 | ||
1381 | static int probes_open(struct inode *inode, struct file *file) | 1402 | static int probes_open(struct inode *inode, struct file *file) |
1382 | { | 1403 | { |
1383 | if ((file->f_mode & FMODE_WRITE) && | 1404 | int ret; |
1384 | (file->f_flags & O_TRUNC)) | 1405 | |
1385 | release_all_trace_probes(); | 1406 | if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { |
1407 | ret = release_all_trace_probes(); | ||
1408 | if (ret < 0) | ||
1409 | return ret; | ||
1410 | } | ||
1386 | 1411 | ||
1387 | return seq_open(file, &probes_seq_op); | 1412 | return seq_open(file, &probes_seq_op); |
1388 | } | 1413 | } |
@@ -2055,6 +2080,21 @@ static __init int kprobe_trace_self_tests_init(void) | |||
2055 | 2080 | ||
2056 | ret = target(1, 2, 3, 4, 5, 6); | 2081 | ret = target(1, 2, 3, 4, 5, 6); |
2057 | 2082 | ||
2083 | /* Disable trace points before removing it */ | ||
2084 | tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM); | ||
2085 | if (WARN_ON_ONCE(tp == NULL)) { | ||
2086 | pr_warning("error on getting test probe.\n"); | ||
2087 | warn++; | ||
2088 | } else | ||
2089 | disable_trace_probe(tp, TP_FLAG_TRACE); | ||
2090 | |||
2091 | tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM); | ||
2092 | if (WARN_ON_ONCE(tp == NULL)) { | ||
2093 | pr_warning("error on getting 2nd test probe.\n"); | ||
2094 | warn++; | ||
2095 | } else | ||
2096 | disable_trace_probe(tp, TP_FLAG_TRACE); | ||
2097 | |||
2058 | ret = command_trace_probe("-:testprobe"); | 2098 | ret = command_trace_probe("-:testprobe"); |
2059 | if (WARN_ON_ONCE(ret)) { | 2099 | if (WARN_ON_ONCE(ret)) { |
2060 | pr_warning("error on deleting a probe.\n"); | 2100 | pr_warning("error on deleting a probe.\n"); |
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index 1f06468a10d..6fd4ffd042f 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c | |||
@@ -59,18 +59,19 @@ void hold_module_trace_bprintk_format(const char **start, const char **end) | |||
59 | continue; | 59 | continue; |
60 | } | 60 | } |
61 | 61 | ||
62 | fmt = NULL; | ||
62 | tb_fmt = kmalloc(sizeof(*tb_fmt), GFP_KERNEL); | 63 | tb_fmt = kmalloc(sizeof(*tb_fmt), GFP_KERNEL); |
63 | if (tb_fmt) | 64 | if (tb_fmt) { |
64 | fmt = kmalloc(strlen(*iter) + 1, GFP_KERNEL); | 65 | fmt = kmalloc(strlen(*iter) + 1, GFP_KERNEL); |
65 | if (tb_fmt && fmt) { | 66 | if (fmt) { |
66 | list_add_tail(&tb_fmt->list, &trace_bprintk_fmt_list); | 67 | list_add_tail(&tb_fmt->list, &trace_bprintk_fmt_list); |
67 | strcpy(fmt, *iter); | 68 | strcpy(fmt, *iter); |
68 | tb_fmt->fmt = fmt; | 69 | tb_fmt->fmt = fmt; |
69 | *iter = tb_fmt->fmt; | 70 | } else |
70 | } else { | 71 | kfree(tb_fmt); |
71 | kfree(tb_fmt); | ||
72 | *iter = NULL; | ||
73 | } | 72 | } |
73 | *iter = fmt; | ||
74 | |||
74 | } | 75 | } |
75 | mutex_unlock(&btrace_mutex); | 76 | mutex_unlock(&btrace_mutex); |
76 | } | 77 | } |
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index ee7b5a0bb9f..cb654542c1a 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
@@ -2,6 +2,7 @@ | |||
2 | #include <trace/events/syscalls.h> | 2 | #include <trace/events/syscalls.h> |
3 | #include <linux/slab.h> | 3 | #include <linux/slab.h> |
4 | #include <linux/kernel.h> | 4 | #include <linux/kernel.h> |
5 | #include <linux/module.h> /* for MODULE_NAME_LEN via KSYM_SYMBOL_LEN */ | ||
5 | #include <linux/ftrace.h> | 6 | #include <linux/ftrace.h> |
6 | #include <linux/perf_event.h> | 7 | #include <linux/perf_event.h> |
7 | #include <asm/syscall.h> | 8 | #include <asm/syscall.h> |
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index b219f1449c5..db110b8ae03 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c | |||
@@ -34,11 +34,16 @@ extern struct tracepoint * const __stop___tracepoints_ptrs[]; | |||
34 | static const int tracepoint_debug; | 34 | static const int tracepoint_debug; |
35 | 35 | ||
36 | /* | 36 | /* |
37 | * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the | 37 | * Tracepoints mutex protects the builtin and module tracepoints and the hash |
38 | * builtin and module tracepoints and the hash table. | 38 | * table, as well as the local module list. |
39 | */ | 39 | */ |
40 | static DEFINE_MUTEX(tracepoints_mutex); | 40 | static DEFINE_MUTEX(tracepoints_mutex); |
41 | 41 | ||
42 | #ifdef CONFIG_MODULES | ||
43 | /* Local list of struct module */ | ||
44 | static LIST_HEAD(tracepoint_module_list); | ||
45 | #endif /* CONFIG_MODULES */ | ||
46 | |||
42 | /* | 47 | /* |
43 | * Tracepoint hash table, containing the active tracepoints. | 48 | * Tracepoint hash table, containing the active tracepoints. |
44 | * Protected by tracepoints_mutex. | 49 | * Protected by tracepoints_mutex. |
@@ -292,9 +297,10 @@ static void disable_tracepoint(struct tracepoint *elem) | |||
292 | * @end: end of the range | 297 | * @end: end of the range |
293 | * | 298 | * |
294 | * Updates the probe callback corresponding to a range of tracepoints. | 299 | * Updates the probe callback corresponding to a range of tracepoints. |
300 | * Called with tracepoints_mutex held. | ||
295 | */ | 301 | */ |
296 | void tracepoint_update_probe_range(struct tracepoint * const *begin, | 302 | static void tracepoint_update_probe_range(struct tracepoint * const *begin, |
297 | struct tracepoint * const *end) | 303 | struct tracepoint * const *end) |
298 | { | 304 | { |
299 | struct tracepoint * const *iter; | 305 | struct tracepoint * const *iter; |
300 | struct tracepoint_entry *mark_entry; | 306 | struct tracepoint_entry *mark_entry; |
@@ -302,7 +308,6 @@ void tracepoint_update_probe_range(struct tracepoint * const *begin, | |||
302 | if (!begin) | 308 | if (!begin) |
303 | return; | 309 | return; |
304 | 310 | ||
305 | mutex_lock(&tracepoints_mutex); | ||
306 | for (iter = begin; iter < end; iter++) { | 311 | for (iter = begin; iter < end; iter++) { |
307 | mark_entry = get_tracepoint((*iter)->name); | 312 | mark_entry = get_tracepoint((*iter)->name); |
308 | if (mark_entry) { | 313 | if (mark_entry) { |
@@ -312,11 +317,27 @@ void tracepoint_update_probe_range(struct tracepoint * const *begin, | |||
312 | disable_tracepoint(*iter); | 317 | disable_tracepoint(*iter); |
313 | } | 318 | } |
314 | } | 319 | } |
315 | mutex_unlock(&tracepoints_mutex); | ||
316 | } | 320 | } |
317 | 321 | ||
322 | #ifdef CONFIG_MODULES | ||
323 | void module_update_tracepoints(void) | ||
324 | { | ||
325 | struct tp_module *tp_mod; | ||
326 | |||
327 | list_for_each_entry(tp_mod, &tracepoint_module_list, list) | ||
328 | tracepoint_update_probe_range(tp_mod->tracepoints_ptrs, | ||
329 | tp_mod->tracepoints_ptrs + tp_mod->num_tracepoints); | ||
330 | } | ||
331 | #else /* CONFIG_MODULES */ | ||
332 | void module_update_tracepoints(void) | ||
333 | { | ||
334 | } | ||
335 | #endif /* CONFIG_MODULES */ | ||
336 | |||
337 | |||
318 | /* | 338 | /* |
319 | * Update probes, removing the faulty probes. | 339 | * Update probes, removing the faulty probes. |
340 | * Called with tracepoints_mutex held. | ||
320 | */ | 341 | */ |
321 | static void tracepoint_update_probes(void) | 342 | static void tracepoint_update_probes(void) |
322 | { | 343 | { |
@@ -359,11 +380,12 @@ int tracepoint_probe_register(const char *name, void *probe, void *data) | |||
359 | 380 | ||
360 | mutex_lock(&tracepoints_mutex); | 381 | mutex_lock(&tracepoints_mutex); |
361 | old = tracepoint_add_probe(name, probe, data); | 382 | old = tracepoint_add_probe(name, probe, data); |
362 | mutex_unlock(&tracepoints_mutex); | 383 | if (IS_ERR(old)) { |
363 | if (IS_ERR(old)) | 384 | mutex_unlock(&tracepoints_mutex); |
364 | return PTR_ERR(old); | 385 | return PTR_ERR(old); |
365 | 386 | } | |
366 | tracepoint_update_probes(); /* may update entry */ | 387 | tracepoint_update_probes(); /* may update entry */ |
388 | mutex_unlock(&tracepoints_mutex); | ||
367 | release_probes(old); | 389 | release_probes(old); |
368 | return 0; | 390 | return 0; |
369 | } | 391 | } |
@@ -402,11 +424,12 @@ int tracepoint_probe_unregister(const char *name, void *probe, void *data) | |||
402 | 424 | ||
403 | mutex_lock(&tracepoints_mutex); | 425 | mutex_lock(&tracepoints_mutex); |
404 | old = tracepoint_remove_probe(name, probe, data); | 426 | old = tracepoint_remove_probe(name, probe, data); |
405 | mutex_unlock(&tracepoints_mutex); | 427 | if (IS_ERR(old)) { |
406 | if (IS_ERR(old)) | 428 | mutex_unlock(&tracepoints_mutex); |
407 | return PTR_ERR(old); | 429 | return PTR_ERR(old); |
408 | 430 | } | |
409 | tracepoint_update_probes(); /* may update entry */ | 431 | tracepoint_update_probes(); /* may update entry */ |
432 | mutex_unlock(&tracepoints_mutex); | ||
410 | release_probes(old); | 433 | release_probes(old); |
411 | return 0; | 434 | return 0; |
412 | } | 435 | } |
@@ -489,9 +512,8 @@ void tracepoint_probe_update_all(void) | |||
489 | if (!list_empty(&old_probes)) | 512 | if (!list_empty(&old_probes)) |
490 | list_replace_init(&old_probes, &release_probes); | 513 | list_replace_init(&old_probes, &release_probes); |
491 | need_update = 0; | 514 | need_update = 0; |
492 | mutex_unlock(&tracepoints_mutex); | ||
493 | |||
494 | tracepoint_update_probes(); | 515 | tracepoint_update_probes(); |
516 | mutex_unlock(&tracepoints_mutex); | ||
495 | list_for_each_entry_safe(pos, next, &release_probes, u.list) { | 517 | list_for_each_entry_safe(pos, next, &release_probes, u.list) { |
496 | list_del(&pos->u.list); | 518 | list_del(&pos->u.list); |
497 | call_rcu_sched(&pos->u.rcu, rcu_free_old_probes); | 519 | call_rcu_sched(&pos->u.rcu, rcu_free_old_probes); |
@@ -509,7 +531,7 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_update_all); | |||
509 | * Will return the first tracepoint in the range if the input tracepoint is | 531 | * Will return the first tracepoint in the range if the input tracepoint is |
510 | * NULL. | 532 | * NULL. |
511 | */ | 533 | */ |
512 | int tracepoint_get_iter_range(struct tracepoint * const **tracepoint, | 534 | static int tracepoint_get_iter_range(struct tracepoint * const **tracepoint, |
513 | struct tracepoint * const *begin, struct tracepoint * const *end) | 535 | struct tracepoint * const *begin, struct tracepoint * const *end) |
514 | { | 536 | { |
515 | if (!*tracepoint && begin != end) { | 537 | if (!*tracepoint && begin != end) { |
@@ -520,11 +542,12 @@ int tracepoint_get_iter_range(struct tracepoint * const **tracepoint, | |||
520 | return 1; | 542 | return 1; |
521 | return 0; | 543 | return 0; |
522 | } | 544 | } |
523 | EXPORT_SYMBOL_GPL(tracepoint_get_iter_range); | ||
524 | 545 | ||
546 | #ifdef CONFIG_MODULES | ||
525 | static void tracepoint_get_iter(struct tracepoint_iter *iter) | 547 | static void tracepoint_get_iter(struct tracepoint_iter *iter) |
526 | { | 548 | { |
527 | int found = 0; | 549 | int found = 0; |
550 | struct tp_module *iter_mod; | ||
528 | 551 | ||
529 | /* Core kernel tracepoints */ | 552 | /* Core kernel tracepoints */ |
530 | if (!iter->module) { | 553 | if (!iter->module) { |
@@ -534,12 +557,43 @@ static void tracepoint_get_iter(struct tracepoint_iter *iter) | |||
534 | if (found) | 557 | if (found) |
535 | goto end; | 558 | goto end; |
536 | } | 559 | } |
537 | /* tracepoints in modules. */ | 560 | /* Tracepoints in modules */ |
538 | found = module_get_iter_tracepoints(iter); | 561 | mutex_lock(&tracepoints_mutex); |
562 | list_for_each_entry(iter_mod, &tracepoint_module_list, list) { | ||
563 | /* | ||
564 | * Sorted module list | ||
565 | */ | ||
566 | if (iter_mod < iter->module) | ||
567 | continue; | ||
568 | else if (iter_mod > iter->module) | ||
569 | iter->tracepoint = NULL; | ||
570 | found = tracepoint_get_iter_range(&iter->tracepoint, | ||
571 | iter_mod->tracepoints_ptrs, | ||
572 | iter_mod->tracepoints_ptrs | ||
573 | + iter_mod->num_tracepoints); | ||
574 | if (found) { | ||
575 | iter->module = iter_mod; | ||
576 | break; | ||
577 | } | ||
578 | } | ||
579 | mutex_unlock(&tracepoints_mutex); | ||
539 | end: | 580 | end: |
540 | if (!found) | 581 | if (!found) |
541 | tracepoint_iter_reset(iter); | 582 | tracepoint_iter_reset(iter); |
542 | } | 583 | } |
584 | #else /* CONFIG_MODULES */ | ||
585 | static void tracepoint_get_iter(struct tracepoint_iter *iter) | ||
586 | { | ||
587 | int found = 0; | ||
588 | |||
589 | /* Core kernel tracepoints */ | ||
590 | found = tracepoint_get_iter_range(&iter->tracepoint, | ||
591 | __start___tracepoints_ptrs, | ||
592 | __stop___tracepoints_ptrs); | ||
593 | if (!found) | ||
594 | tracepoint_iter_reset(iter); | ||
595 | } | ||
596 | #endif /* CONFIG_MODULES */ | ||
543 | 597 | ||
544 | void tracepoint_iter_start(struct tracepoint_iter *iter) | 598 | void tracepoint_iter_start(struct tracepoint_iter *iter) |
545 | { | 599 | { |
@@ -566,26 +620,98 @@ EXPORT_SYMBOL_GPL(tracepoint_iter_stop); | |||
566 | 620 | ||
567 | void tracepoint_iter_reset(struct tracepoint_iter *iter) | 621 | void tracepoint_iter_reset(struct tracepoint_iter *iter) |
568 | { | 622 | { |
623 | #ifdef CONFIG_MODULES | ||
569 | iter->module = NULL; | 624 | iter->module = NULL; |
625 | #endif /* CONFIG_MODULES */ | ||
570 | iter->tracepoint = NULL; | 626 | iter->tracepoint = NULL; |
571 | } | 627 | } |
572 | EXPORT_SYMBOL_GPL(tracepoint_iter_reset); | 628 | EXPORT_SYMBOL_GPL(tracepoint_iter_reset); |
573 | 629 | ||
574 | #ifdef CONFIG_MODULES | 630 | #ifdef CONFIG_MODULES |
631 | static int tracepoint_module_coming(struct module *mod) | ||
632 | { | ||
633 | struct tp_module *tp_mod, *iter; | ||
634 | int ret = 0; | ||
635 | |||
636 | /* | ||
637 | * We skip modules that tain the kernel, especially those with different | ||
638 | * module header (for forced load), to make sure we don't cause a crash. | ||
639 | */ | ||
640 | if (mod->taints) | ||
641 | return 0; | ||
642 | mutex_lock(&tracepoints_mutex); | ||
643 | tp_mod = kmalloc(sizeof(struct tp_module), GFP_KERNEL); | ||
644 | if (!tp_mod) { | ||
645 | ret = -ENOMEM; | ||
646 | goto end; | ||
647 | } | ||
648 | tp_mod->num_tracepoints = mod->num_tracepoints; | ||
649 | tp_mod->tracepoints_ptrs = mod->tracepoints_ptrs; | ||
650 | |||
651 | /* | ||
652 | * tracepoint_module_list is kept sorted by struct module pointer | ||
653 | * address for iteration on tracepoints from a seq_file that can release | ||
654 | * the mutex between calls. | ||
655 | */ | ||
656 | list_for_each_entry_reverse(iter, &tracepoint_module_list, list) { | ||
657 | BUG_ON(iter == tp_mod); /* Should never be in the list twice */ | ||
658 | if (iter < tp_mod) { | ||
659 | /* We belong to the location right after iter. */ | ||
660 | list_add(&tp_mod->list, &iter->list); | ||
661 | goto module_added; | ||
662 | } | ||
663 | } | ||
664 | /* We belong to the beginning of the list */ | ||
665 | list_add(&tp_mod->list, &tracepoint_module_list); | ||
666 | module_added: | ||
667 | tracepoint_update_probe_range(mod->tracepoints_ptrs, | ||
668 | mod->tracepoints_ptrs + mod->num_tracepoints); | ||
669 | end: | ||
670 | mutex_unlock(&tracepoints_mutex); | ||
671 | return ret; | ||
672 | } | ||
673 | |||
674 | static int tracepoint_module_going(struct module *mod) | ||
675 | { | ||
676 | struct tp_module *pos; | ||
677 | |||
678 | mutex_lock(&tracepoints_mutex); | ||
679 | tracepoint_update_probe_range(mod->tracepoints_ptrs, | ||
680 | mod->tracepoints_ptrs + mod->num_tracepoints); | ||
681 | list_for_each_entry(pos, &tracepoint_module_list, list) { | ||
682 | if (pos->tracepoints_ptrs == mod->tracepoints_ptrs) { | ||
683 | list_del(&pos->list); | ||
684 | kfree(pos); | ||
685 | break; | ||
686 | } | ||
687 | } | ||
688 | /* | ||
689 | * In the case of modules that were tainted at "coming", we'll simply | ||
690 | * walk through the list without finding it. We cannot use the "tainted" | ||
691 | * flag on "going", in case a module taints the kernel only after being | ||
692 | * loaded. | ||
693 | */ | ||
694 | mutex_unlock(&tracepoints_mutex); | ||
695 | return 0; | ||
696 | } | ||
575 | 697 | ||
576 | int tracepoint_module_notify(struct notifier_block *self, | 698 | int tracepoint_module_notify(struct notifier_block *self, |
577 | unsigned long val, void *data) | 699 | unsigned long val, void *data) |
578 | { | 700 | { |
579 | struct module *mod = data; | 701 | struct module *mod = data; |
702 | int ret = 0; | ||
580 | 703 | ||
581 | switch (val) { | 704 | switch (val) { |
582 | case MODULE_STATE_COMING: | 705 | case MODULE_STATE_COMING: |
706 | ret = tracepoint_module_coming(mod); | ||
707 | break; | ||
708 | case MODULE_STATE_LIVE: | ||
709 | break; | ||
583 | case MODULE_STATE_GOING: | 710 | case MODULE_STATE_GOING: |
584 | tracepoint_update_probe_range(mod->tracepoints_ptrs, | 711 | ret = tracepoint_module_going(mod); |
585 | mod->tracepoints_ptrs + mod->num_tracepoints); | ||
586 | break; | 712 | break; |
587 | } | 713 | } |
588 | return 0; | 714 | return ret; |
589 | } | 715 | } |
590 | 716 | ||
591 | struct notifier_block tracepoint_module_nb = { | 717 | struct notifier_block tracepoint_module_nb = { |
@@ -598,7 +724,6 @@ static int init_tracepoints(void) | |||
598 | return register_module_notifier(&tracepoint_module_nb); | 724 | return register_module_notifier(&tracepoint_module_nb); |
599 | } | 725 | } |
600 | __initcall(init_tracepoints); | 726 | __initcall(init_tracepoints); |
601 | |||
602 | #endif /* CONFIG_MODULES */ | 727 | #endif /* CONFIG_MODULES */ |
603 | 728 | ||
604 | #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS | 729 | #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS |
diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 24dc60d9fa1..5bbfac85866 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c | |||
@@ -78,6 +78,7 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) | |||
78 | 78 | ||
79 | #define KB 1024 | 79 | #define KB 1024 |
80 | #define MB (1024*KB) | 80 | #define MB (1024*KB) |
81 | #define KB_MASK (~(KB-1)) | ||
81 | /* | 82 | /* |
82 | * fill in extended accounting fields | 83 | * fill in extended accounting fields |
83 | */ | 84 | */ |
@@ -95,14 +96,14 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) | |||
95 | stats->hiwater_vm = get_mm_hiwater_vm(mm) * PAGE_SIZE / KB; | 96 | stats->hiwater_vm = get_mm_hiwater_vm(mm) * PAGE_SIZE / KB; |
96 | mmput(mm); | 97 | mmput(mm); |
97 | } | 98 | } |
98 | stats->read_char = p->ioac.rchar; | 99 | stats->read_char = p->ioac.rchar & KB_MASK; |
99 | stats->write_char = p->ioac.wchar; | 100 | stats->write_char = p->ioac.wchar & KB_MASK; |
100 | stats->read_syscalls = p->ioac.syscr; | 101 | stats->read_syscalls = p->ioac.syscr & KB_MASK; |
101 | stats->write_syscalls = p->ioac.syscw; | 102 | stats->write_syscalls = p->ioac.syscw & KB_MASK; |
102 | #ifdef CONFIG_TASK_IO_ACCOUNTING | 103 | #ifdef CONFIG_TASK_IO_ACCOUNTING |
103 | stats->read_bytes = p->ioac.read_bytes; | 104 | stats->read_bytes = p->ioac.read_bytes & KB_MASK; |
104 | stats->write_bytes = p->ioac.write_bytes; | 105 | stats->write_bytes = p->ioac.write_bytes & KB_MASK; |
105 | stats->cancelled_write_bytes = p->ioac.cancelled_write_bytes; | 106 | stats->cancelled_write_bytes = p->ioac.cancelled_write_bytes & KB_MASK; |
106 | #else | 107 | #else |
107 | stats->read_bytes = 0; | 108 | stats->read_bytes = 0; |
108 | stats->write_bytes = 0; | 109 | stats->write_bytes = 0; |
diff --git a/kernel/up.c b/kernel/up.c index 1ff27a28bb7..c54c75e9faf 100644 --- a/kernel/up.c +++ b/kernel/up.c | |||
@@ -4,7 +4,7 @@ | |||
4 | 4 | ||
5 | #include <linux/interrupt.h> | 5 | #include <linux/interrupt.h> |
6 | #include <linux/kernel.h> | 6 | #include <linux/kernel.h> |
7 | #include <linux/module.h> | 7 | #include <linux/export.h> |
8 | #include <linux/smp.h> | 8 | #include <linux/smp.h> |
9 | 9 | ||
10 | int smp_call_function_single(int cpu, void (*func) (void *info), void *info, | 10 | int smp_call_function_single(int cpu, void (*func) (void *info), void *info, |
diff --git a/kernel/user-return-notifier.c b/kernel/user-return-notifier.c index 92cb706c7fc..1744bb80f1f 100644 --- a/kernel/user-return-notifier.c +++ b/kernel/user-return-notifier.c | |||
@@ -2,7 +2,7 @@ | |||
2 | #include <linux/user-return-notifier.h> | 2 | #include <linux/user-return-notifier.h> |
3 | #include <linux/percpu.h> | 3 | #include <linux/percpu.h> |
4 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
5 | #include <linux/module.h> | 5 | #include <linux/export.h> |
6 | 6 | ||
7 | static DEFINE_PER_CPU(struct hlist_head, return_notifier_list); | 7 | static DEFINE_PER_CPU(struct hlist_head, return_notifier_list); |
8 | 8 | ||
diff --git a/kernel/user.c b/kernel/user.c index 9e03e9c1df8..71dd2363ab0 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
@@ -14,7 +14,7 @@ | |||
14 | #include <linux/bitops.h> | 14 | #include <linux/bitops.h> |
15 | #include <linux/key.h> | 15 | #include <linux/key.h> |
16 | #include <linux/interrupt.h> | 16 | #include <linux/interrupt.h> |
17 | #include <linux/module.h> | 17 | #include <linux/export.h> |
18 | #include <linux/user_namespace.h> | 18 | #include <linux/user_namespace.h> |
19 | 19 | ||
20 | /* | 20 | /* |
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 9da289c34f2..3b906e98b1d 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * License. | 5 | * License. |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/module.h> | 8 | #include <linux/export.h> |
9 | #include <linux/nsproxy.h> | 9 | #include <linux/nsproxy.h> |
10 | #include <linux/slab.h> | 10 | #include <linux/slab.h> |
11 | #include <linux/user_namespace.h> | 11 | #include <linux/user_namespace.h> |
diff --git a/kernel/utsname.c b/kernel/utsname.c index bff131b9510..405caf91aad 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c | |||
@@ -9,7 +9,7 @@ | |||
9 | * License. | 9 | * License. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <linux/module.h> | 12 | #include <linux/export.h> |
13 | #include <linux/uts.h> | 13 | #include <linux/uts.h> |
14 | #include <linux/utsname.h> | 14 | #include <linux/utsname.h> |
15 | #include <linux/err.h> | 15 | #include <linux/err.h> |
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c index a2cd77e70d4..63da38c2d82 100644 --- a/kernel/utsname_sysctl.c +++ b/kernel/utsname_sysctl.c | |||
@@ -9,10 +9,11 @@ | |||
9 | * License. | 9 | * License. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <linux/module.h> | 12 | #include <linux/export.h> |
13 | #include <linux/uts.h> | 13 | #include <linux/uts.h> |
14 | #include <linux/utsname.h> | 14 | #include <linux/utsname.h> |
15 | #include <linux/sysctl.h> | 15 | #include <linux/sysctl.h> |
16 | #include <linux/wait.h> | ||
16 | 17 | ||
17 | static void *get_uts(ctl_table *table, int write) | 18 | static void *get_uts(ctl_table *table, int write) |
18 | { | 19 | { |
@@ -51,12 +52,19 @@ static int proc_do_uts_string(ctl_table *table, int write, | |||
51 | uts_table.data = get_uts(table, write); | 52 | uts_table.data = get_uts(table, write); |
52 | r = proc_dostring(&uts_table,write,buffer,lenp, ppos); | 53 | r = proc_dostring(&uts_table,write,buffer,lenp, ppos); |
53 | put_uts(table, write, uts_table.data); | 54 | put_uts(table, write, uts_table.data); |
55 | |||
56 | if (write) | ||
57 | proc_sys_poll_notify(table->poll); | ||
58 | |||
54 | return r; | 59 | return r; |
55 | } | 60 | } |
56 | #else | 61 | #else |
57 | #define proc_do_uts_string NULL | 62 | #define proc_do_uts_string NULL |
58 | #endif | 63 | #endif |
59 | 64 | ||
65 | static DEFINE_CTL_TABLE_POLL(hostname_poll); | ||
66 | static DEFINE_CTL_TABLE_POLL(domainname_poll); | ||
67 | |||
60 | static struct ctl_table uts_kern_table[] = { | 68 | static struct ctl_table uts_kern_table[] = { |
61 | { | 69 | { |
62 | .procname = "ostype", | 70 | .procname = "ostype", |
@@ -85,6 +93,7 @@ static struct ctl_table uts_kern_table[] = { | |||
85 | .maxlen = sizeof(init_uts_ns.name.nodename), | 93 | .maxlen = sizeof(init_uts_ns.name.nodename), |
86 | .mode = 0644, | 94 | .mode = 0644, |
87 | .proc_handler = proc_do_uts_string, | 95 | .proc_handler = proc_do_uts_string, |
96 | .poll = &hostname_poll, | ||
88 | }, | 97 | }, |
89 | { | 98 | { |
90 | .procname = "domainname", | 99 | .procname = "domainname", |
@@ -92,6 +101,7 @@ static struct ctl_table uts_kern_table[] = { | |||
92 | .maxlen = sizeof(init_uts_ns.name.domainname), | 101 | .maxlen = sizeof(init_uts_ns.name.domainname), |
93 | .mode = 0644, | 102 | .mode = 0644, |
94 | .proc_handler = proc_do_uts_string, | 103 | .proc_handler = proc_do_uts_string, |
104 | .poll = &domainname_poll, | ||
95 | }, | 105 | }, |
96 | {} | 106 | {} |
97 | }; | 107 | }; |
@@ -105,6 +115,19 @@ static struct ctl_table uts_root_table[] = { | |||
105 | {} | 115 | {} |
106 | }; | 116 | }; |
107 | 117 | ||
118 | #ifdef CONFIG_PROC_SYSCTL | ||
119 | /* | ||
120 | * Notify userspace about a change in a certain entry of uts_kern_table, | ||
121 | * identified by the parameter proc. | ||
122 | */ | ||
123 | void uts_proc_notify(enum uts_proc proc) | ||
124 | { | ||
125 | struct ctl_table *table = &uts_kern_table[proc]; | ||
126 | |||
127 | proc_sys_poll_notify(table->poll); | ||
128 | } | ||
129 | #endif | ||
130 | |||
108 | static int __init utsname_sysctl_init(void) | 131 | static int __init utsname_sysctl_init(void) |
109 | { | 132 | { |
110 | register_sysctl_table(uts_root_table); | 133 | register_sysctl_table(uts_root_table); |
diff --git a/kernel/wait.c b/kernel/wait.c index f45ea8d2a1c..26fa7797f90 100644 --- a/kernel/wait.c +++ b/kernel/wait.c | |||
@@ -4,7 +4,7 @@ | |||
4 | * (C) 2004 William Irwin, Oracle | 4 | * (C) 2004 William Irwin, Oracle |
5 | */ | 5 | */ |
6 | #include <linux/init.h> | 6 | #include <linux/init.h> |
7 | #include <linux/module.h> | 7 | #include <linux/export.h> |
8 | #include <linux/sched.h> | 8 | #include <linux/sched.h> |
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/wait.h> | 10 | #include <linux/wait.h> |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 36491cd5b7d..1d7bca7f4f5 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -321,7 +321,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | |||
321 | */ | 321 | */ |
322 | static int watchdog(void *unused) | 322 | static int watchdog(void *unused) |
323 | { | 323 | { |
324 | static struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | 324 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; |
325 | struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); | 325 | struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); |
326 | 326 | ||
327 | sched_setscheduler(current, SCHED_FIFO, ¶m); | 327 | sched_setscheduler(current, SCHED_FIFO, ¶m); |
@@ -350,7 +350,8 @@ static int watchdog(void *unused) | |||
350 | set_current_state(TASK_INTERRUPTIBLE); | 350 | set_current_state(TASK_INTERRUPTIBLE); |
351 | } | 351 | } |
352 | __set_current_state(TASK_RUNNING); | 352 | __set_current_state(TASK_RUNNING); |
353 | 353 | param.sched_priority = 0; | |
354 | sched_setscheduler(current, SCHED_NORMAL, ¶m); | ||
354 | return 0; | 355 | return 0; |
355 | } | 356 | } |
356 | 357 | ||
@@ -438,7 +439,7 @@ static int watchdog_enable(int cpu) | |||
438 | 439 | ||
439 | /* create the watchdog thread */ | 440 | /* create the watchdog thread */ |
440 | if (!p) { | 441 | if (!p) { |
441 | p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu); | 442 | p = kthread_create_on_node(watchdog, NULL, cpu_to_node(cpu), "watchdog/%d", cpu); |
442 | if (IS_ERR(p)) { | 443 | if (IS_ERR(p)) { |
443 | printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu); | 444 | printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu); |
444 | if (!err) { | 445 | if (!err) { |
@@ -480,6 +481,8 @@ static void watchdog_disable(int cpu) | |||
480 | } | 481 | } |
481 | } | 482 | } |
482 | 483 | ||
484 | /* sysctl functions */ | ||
485 | #ifdef CONFIG_SYSCTL | ||
483 | static void watchdog_enable_all_cpus(void) | 486 | static void watchdog_enable_all_cpus(void) |
484 | { | 487 | { |
485 | int cpu; | 488 | int cpu; |
@@ -509,8 +512,6 @@ static void watchdog_disable_all_cpus(void) | |||
509 | } | 512 | } |
510 | 513 | ||
511 | 514 | ||
512 | /* sysctl functions */ | ||
513 | #ifdef CONFIG_SYSCTL | ||
514 | /* | 515 | /* |
515 | * proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh | 516 | * proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh |
516 | */ | 517 | */ |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 25fb1b0e53f..42fa9ad0a81 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -23,7 +23,7 @@ | |||
23 | * Please read Documentation/workqueue.txt for details. | 23 | * Please read Documentation/workqueue.txt for details. |
24 | */ | 24 | */ |
25 | 25 | ||
26 | #include <linux/module.h> | 26 | #include <linux/export.h> |
27 | #include <linux/kernel.h> | 27 | #include <linux/kernel.h> |
28 | #include <linux/sched.h> | 28 | #include <linux/sched.h> |
29 | #include <linux/init.h> | 29 | #include <linux/init.h> |
@@ -2412,8 +2412,13 @@ reflush: | |||
2412 | 2412 | ||
2413 | for_each_cwq_cpu(cpu, wq) { | 2413 | for_each_cwq_cpu(cpu, wq) { |
2414 | struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); | 2414 | struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); |
2415 | bool drained; | ||
2415 | 2416 | ||
2416 | if (!cwq->nr_active && list_empty(&cwq->delayed_works)) | 2417 | spin_lock_irq(&cwq->gcwq->lock); |
2418 | drained = !cwq->nr_active && list_empty(&cwq->delayed_works); | ||
2419 | spin_unlock_irq(&cwq->gcwq->lock); | ||
2420 | |||
2421 | if (drained) | ||
2417 | continue; | 2422 | continue; |
2418 | 2423 | ||
2419 | if (++flush_cnt == 10 || | 2424 | if (++flush_cnt == 10 || |