aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/irq/Kconfig3
-rw-r--r--kernel/irq/handle.c111
-rw-r--r--kernel/lockdep.c18
-rw-r--r--kernel/sched.c26
-rw-r--r--kernel/sched_autogroup.c32
-rw-r--r--kernel/sched_autogroup.h4
-rw-r--r--kernel/sched_debug.c42
-rw-r--r--kernel/sched_fair.c35
-rw-r--r--kernel/smp.c62
-rw-r--r--kernel/trace/trace_irqsoff.c8
-rw-r--r--kernel/workqueue.c20
11 files changed, 187 insertions, 174 deletions
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 31d766bf5d2e..8e42fec7686d 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -9,9 +9,6 @@ menu "IRQ subsystem"
9config GENERIC_HARDIRQS 9config GENERIC_HARDIRQS
10 def_bool y 10 def_bool y
11 11
12config GENERIC_HARDIRQS_NO__DO_IRQ
13 def_bool y
14
15# Select this to disable the deprecated stuff 12# Select this to disable the deprecated stuff
16config GENERIC_HARDIRQS_NO_DEPRECATED 13config GENERIC_HARDIRQS_NO_DEPRECATED
17 def_bool n 14 def_bool n
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index e2347eb63306..3540a7190122 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -118,114 +118,3 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
118 118
119 return retval; 119 return retval;
120} 120}
121
122#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
123
124#ifdef CONFIG_ENABLE_WARN_DEPRECATED
125# warning __do_IRQ is deprecated. Please convert to proper flow handlers
126#endif
127
128/**
129 * __do_IRQ - original all in one highlevel IRQ handler
130 * @irq: the interrupt number
131 *
132 * __do_IRQ handles all normal device IRQ's (the special
133 * SMP cross-CPU interrupts have their own specific
134 * handlers).
135 *
136 * This is the original x86 implementation which is used for every
137 * interrupt type.
138 */
139unsigned int __do_IRQ(unsigned int irq)
140{
141 struct irq_desc *desc = irq_to_desc(irq);
142 struct irqaction *action;
143 unsigned int status;
144
145 kstat_incr_irqs_this_cpu(irq, desc);
146
147 if (CHECK_IRQ_PER_CPU(desc->status)) {
148 irqreturn_t action_ret;
149
150 /*
151 * No locking required for CPU-local interrupts:
152 */
153 if (desc->irq_data.chip->ack)
154 desc->irq_data.chip->ack(irq);
155 if (likely(!(desc->status & IRQ_DISABLED))) {
156 action_ret = handle_IRQ_event(irq, desc->action);
157 if (!noirqdebug)
158 note_interrupt(irq, desc, action_ret);
159 }
160 desc->irq_data.chip->end(irq);
161 return 1;
162 }
163
164 raw_spin_lock(&desc->lock);
165 if (desc->irq_data.chip->ack)
166 desc->irq_data.chip->ack(irq);
167 /*
168 * REPLAY is when Linux resends an IRQ that was dropped earlier
169 * WAITING is used by probe to mark irqs that are being tested
170 */
171 status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
172 status |= IRQ_PENDING; /* we _want_ to handle it */
173
174 /*
175 * If the IRQ is disabled for whatever reason, we cannot
176 * use the action we have.
177 */
178 action = NULL;
179 if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) {
180 action = desc->action;
181 status &= ~IRQ_PENDING; /* we commit to handling */
182 status |= IRQ_INPROGRESS; /* we are handling it */
183 }
184 desc->status = status;
185
186 /*
187 * If there is no IRQ handler or it was disabled, exit early.
188 * Since we set PENDING, if another processor is handling
189 * a different instance of this same irq, the other processor
190 * will take care of it.
191 */
192 if (unlikely(!action))
193 goto out;
194
195 /*
196 * Edge triggered interrupts need to remember
197 * pending events.
198 * This applies to any hw interrupts that allow a second
199 * instance of the same irq to arrive while we are in do_IRQ
200 * or in the handler. But the code here only handles the _second_
201 * instance of the irq, not the third or fourth. So it is mostly
202 * useful for irq hardware that does not mask cleanly in an
203 * SMP environment.
204 */
205 for (;;) {
206 irqreturn_t action_ret;
207
208 raw_spin_unlock(&desc->lock);
209
210 action_ret = handle_IRQ_event(irq, action);
211 if (!noirqdebug)
212 note_interrupt(irq, desc, action_ret);
213
214 raw_spin_lock(&desc->lock);
215 if (likely(!(desc->status & IRQ_PENDING)))
216 break;
217 desc->status &= ~IRQ_PENDING;
218 }
219 desc->status &= ~IRQ_INPROGRESS;
220
221out:
222 /*
223 * The ->end() handler has to deal with interrupts which got
224 * disabled while the handler was running.
225 */
226 desc->irq_data.chip->end(irq);
227 raw_spin_unlock(&desc->lock);
228
229 return 1;
230}
231#endif
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 42ba65dff7d9..0d2058da80f5 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2292,22 +2292,6 @@ mark_held_locks(struct task_struct *curr, enum mark_type mark)
2292} 2292}
2293 2293
2294/* 2294/*
2295 * Debugging helper: via this flag we know that we are in
2296 * 'early bootup code', and will warn about any invalid irqs-on event:
2297 */
2298static int early_boot_irqs_enabled;
2299
2300void early_boot_irqs_off(void)
2301{
2302 early_boot_irqs_enabled = 0;
2303}
2304
2305void early_boot_irqs_on(void)
2306{
2307 early_boot_irqs_enabled = 1;
2308}
2309
2310/*
2311 * Hardirqs will be enabled: 2295 * Hardirqs will be enabled:
2312 */ 2296 */
2313void trace_hardirqs_on_caller(unsigned long ip) 2297void trace_hardirqs_on_caller(unsigned long ip)
@@ -2319,7 +2303,7 @@ void trace_hardirqs_on_caller(unsigned long ip)
2319 if (unlikely(!debug_locks || current->lockdep_recursion)) 2303 if (unlikely(!debug_locks || current->lockdep_recursion))
2320 return; 2304 return;
2321 2305
2322 if (DEBUG_LOCKS_WARN_ON(unlikely(!early_boot_irqs_enabled))) 2306 if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled)))
2323 return; 2307 return;
2324 2308
2325 if (unlikely(curr->hardirqs_enabled)) { 2309 if (unlikely(curr->hardirqs_enabled)) {
diff --git a/kernel/sched.c b/kernel/sched.c
index ea3e5eff3878..18d38e4ec7ba 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -553,9 +553,6 @@ struct rq {
553 /* try_to_wake_up() stats */ 553 /* try_to_wake_up() stats */
554 unsigned int ttwu_count; 554 unsigned int ttwu_count;
555 unsigned int ttwu_local; 555 unsigned int ttwu_local;
556
557 /* BKL stats */
558 unsigned int bkl_count;
559#endif 556#endif
560}; 557};
561 558
@@ -609,6 +606,9 @@ static inline struct task_group *task_group(struct task_struct *p)
609 struct task_group *tg; 606 struct task_group *tg;
610 struct cgroup_subsys_state *css; 607 struct cgroup_subsys_state *css;
611 608
609 if (p->flags & PF_EXITING)
610 return &root_task_group;
611
612 css = task_subsys_state_check(p, cpu_cgroup_subsys_id, 612 css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
613 lockdep_is_held(&task_rq(p)->lock)); 613 lockdep_is_held(&task_rq(p)->lock));
614 tg = container_of(css, struct task_group, css); 614 tg = container_of(css, struct task_group, css);
@@ -3887,7 +3887,7 @@ static inline void schedule_debug(struct task_struct *prev)
3887 schedstat_inc(this_rq(), sched_count); 3887 schedstat_inc(this_rq(), sched_count);
3888#ifdef CONFIG_SCHEDSTATS 3888#ifdef CONFIG_SCHEDSTATS
3889 if (unlikely(prev->lock_depth >= 0)) { 3889 if (unlikely(prev->lock_depth >= 0)) {
3890 schedstat_inc(this_rq(), bkl_count); 3890 schedstat_inc(this_rq(), rq_sched_info.bkl_count);
3891 schedstat_inc(prev, sched_info.bkl_count); 3891 schedstat_inc(prev, sched_info.bkl_count);
3892 } 3892 }
3893#endif 3893#endif
@@ -4871,7 +4871,8 @@ recheck:
4871 * assigned. 4871 * assigned.
4872 */ 4872 */
4873 if (rt_bandwidth_enabled() && rt_policy(policy) && 4873 if (rt_bandwidth_enabled() && rt_policy(policy) &&
4874 task_group(p)->rt_bandwidth.rt_runtime == 0) { 4874 task_group(p)->rt_bandwidth.rt_runtime == 0 &&
4875 !task_group_is_autogroup(task_group(p))) {
4875 __task_rq_unlock(rq); 4876 __task_rq_unlock(rq);
4876 raw_spin_unlock_irqrestore(&p->pi_lock, flags); 4877 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
4877 return -EPERM; 4878 return -EPERM;
@@ -8882,6 +8883,20 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
8882 } 8883 }
8883} 8884}
8884 8885
8886static void
8887cpu_cgroup_exit(struct cgroup_subsys *ss, struct task_struct *task)
8888{
8889 /*
8890 * cgroup_exit() is called in the copy_process() failure path.
8891 * Ignore this case since the task hasn't ran yet, this avoids
8892 * trying to poke a half freed task state from generic code.
8893 */
8894 if (!(task->flags & PF_EXITING))
8895 return;
8896
8897 sched_move_task(task);
8898}
8899
8885#ifdef CONFIG_FAIR_GROUP_SCHED 8900#ifdef CONFIG_FAIR_GROUP_SCHED
8886static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype, 8901static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype,
8887 u64 shareval) 8902 u64 shareval)
@@ -8954,6 +8969,7 @@ struct cgroup_subsys cpu_cgroup_subsys = {
8954 .destroy = cpu_cgroup_destroy, 8969 .destroy = cpu_cgroup_destroy,
8955 .can_attach = cpu_cgroup_can_attach, 8970 .can_attach = cpu_cgroup_can_attach,
8956 .attach = cpu_cgroup_attach, 8971 .attach = cpu_cgroup_attach,
8972 .exit = cpu_cgroup_exit,
8957 .populate = cpu_cgroup_populate, 8973 .populate = cpu_cgroup_populate,
8958 .subsys_id = cpu_cgroup_subsys_id, 8974 .subsys_id = cpu_cgroup_subsys_id,
8959 .early_init = 1, 8975 .early_init = 1,
diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c
index 32a723b8f84c..9fb656283157 100644
--- a/kernel/sched_autogroup.c
+++ b/kernel/sched_autogroup.c
@@ -27,6 +27,11 @@ static inline void autogroup_destroy(struct kref *kref)
27{ 27{
28 struct autogroup *ag = container_of(kref, struct autogroup, kref); 28 struct autogroup *ag = container_of(kref, struct autogroup, kref);
29 29
30#ifdef CONFIG_RT_GROUP_SCHED
31 /* We've redirected RT tasks to the root task group... */
32 ag->tg->rt_se = NULL;
33 ag->tg->rt_rq = NULL;
34#endif
30 sched_destroy_group(ag->tg); 35 sched_destroy_group(ag->tg);
31} 36}
32 37
@@ -55,6 +60,10 @@ static inline struct autogroup *autogroup_task_get(struct task_struct *p)
55 return ag; 60 return ag;
56} 61}
57 62
63#ifdef CONFIG_RT_GROUP_SCHED
64static void free_rt_sched_group(struct task_group *tg);
65#endif
66
58static inline struct autogroup *autogroup_create(void) 67static inline struct autogroup *autogroup_create(void)
59{ 68{
60 struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL); 69 struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL);
@@ -72,6 +81,19 @@ static inline struct autogroup *autogroup_create(void)
72 init_rwsem(&ag->lock); 81 init_rwsem(&ag->lock);
73 ag->id = atomic_inc_return(&autogroup_seq_nr); 82 ag->id = atomic_inc_return(&autogroup_seq_nr);
74 ag->tg = tg; 83 ag->tg = tg;
84#ifdef CONFIG_RT_GROUP_SCHED
85 /*
86 * Autogroup RT tasks are redirected to the root task group
87 * so we don't have to move tasks around upon policy change,
88 * or flail around trying to allocate bandwidth on the fly.
89 * A bandwidth exception in __sched_setscheduler() allows
90 * the policy change to proceed. Thereafter, task_group()
91 * returns &root_task_group, so zero bandwidth is required.
92 */
93 free_rt_sched_group(tg);
94 tg->rt_se = root_task_group.rt_se;
95 tg->rt_rq = root_task_group.rt_rq;
96#endif
75 tg->autogroup = ag; 97 tg->autogroup = ag;
76 98
77 return ag; 99 return ag;
@@ -106,6 +128,11 @@ task_wants_autogroup(struct task_struct *p, struct task_group *tg)
106 return true; 128 return true;
107} 129}
108 130
131static inline bool task_group_is_autogroup(struct task_group *tg)
132{
133 return tg != &root_task_group && tg->autogroup;
134}
135
109static inline struct task_group * 136static inline struct task_group *
110autogroup_task_group(struct task_struct *p, struct task_group *tg) 137autogroup_task_group(struct task_struct *p, struct task_group *tg)
111{ 138{
@@ -231,6 +258,11 @@ void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m)
231#ifdef CONFIG_SCHED_DEBUG 258#ifdef CONFIG_SCHED_DEBUG
232static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) 259static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
233{ 260{
261 int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
262
263 if (!enabled || !tg->autogroup)
264 return 0;
265
234 return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); 266 return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id);
235} 267}
236#endif /* CONFIG_SCHED_DEBUG */ 268#endif /* CONFIG_SCHED_DEBUG */
diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h
index 5358e241cb20..7b859ffe5dad 100644
--- a/kernel/sched_autogroup.h
+++ b/kernel/sched_autogroup.h
@@ -15,6 +15,10 @@ autogroup_task_group(struct task_struct *p, struct task_group *tg);
15 15
16static inline void autogroup_init(struct task_struct *init_task) { } 16static inline void autogroup_init(struct task_struct *init_task) { }
17static inline void autogroup_free(struct task_group *tg) { } 17static inline void autogroup_free(struct task_group *tg) { }
18static inline bool task_group_is_autogroup(struct task_group *tg)
19{
20 return 0;
21}
18 22
19static inline struct task_group * 23static inline struct task_group *
20autogroup_task_group(struct task_struct *p, struct task_group *tg) 24autogroup_task_group(struct task_struct *p, struct task_group *tg)
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 1dfae3d014b5..eb6cb8edd075 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -16,6 +16,8 @@
16#include <linux/kallsyms.h> 16#include <linux/kallsyms.h>
17#include <linux/utsname.h> 17#include <linux/utsname.h>
18 18
19static DEFINE_SPINLOCK(sched_debug_lock);
20
19/* 21/*
20 * This allows printing both to /proc/sched_debug and 22 * This allows printing both to /proc/sched_debug and
21 * to the console 23 * to the console
@@ -86,6 +88,26 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
86} 88}
87#endif 89#endif
88 90
91#ifdef CONFIG_CGROUP_SCHED
92static char group_path[PATH_MAX];
93
94static char *task_group_path(struct task_group *tg)
95{
96 if (autogroup_path(tg, group_path, PATH_MAX))
97 return group_path;
98
99 /*
100 * May be NULL if the underlying cgroup isn't fully-created yet
101 */
102 if (!tg->css.cgroup) {
103 group_path[0] = '\0';
104 return group_path;
105 }
106 cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
107 return group_path;
108}
109#endif
110
89static void 111static void
90print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) 112print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
91{ 113{
@@ -108,6 +130,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
108 SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld", 130 SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld",
109 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L); 131 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L);
110#endif 132#endif
133#ifdef CONFIG_CGROUP_SCHED
134 SEQ_printf(m, " %s", task_group_path(task_group(p)));
135#endif
111 136
112 SEQ_printf(m, "\n"); 137 SEQ_printf(m, "\n");
113} 138}
@@ -144,7 +169,11 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
144 struct sched_entity *last; 169 struct sched_entity *last;
145 unsigned long flags; 170 unsigned long flags;
146 171
172#ifdef CONFIG_FAIR_GROUP_SCHED
173 SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, task_group_path(cfs_rq->tg));
174#else
147 SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); 175 SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu);
176#endif
148 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", 177 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock",
149 SPLIT_NS(cfs_rq->exec_clock)); 178 SPLIT_NS(cfs_rq->exec_clock));
150 179
@@ -191,7 +220,11 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
191 220
192void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) 221void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
193{ 222{
223#ifdef CONFIG_RT_GROUP_SCHED
224 SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, task_group_path(rt_rq->tg));
225#else
194 SEQ_printf(m, "\nrt_rq[%d]:\n", cpu); 226 SEQ_printf(m, "\nrt_rq[%d]:\n", cpu);
227#endif
195 228
196#define P(x) \ 229#define P(x) \
197 SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x)) 230 SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
@@ -212,6 +245,7 @@ extern __read_mostly int sched_clock_running;
212static void print_cpu(struct seq_file *m, int cpu) 245static void print_cpu(struct seq_file *m, int cpu)
213{ 246{
214 struct rq *rq = cpu_rq(cpu); 247 struct rq *rq = cpu_rq(cpu);
248 unsigned long flags;
215 249
216#ifdef CONFIG_X86 250#ifdef CONFIG_X86
217 { 251 {
@@ -262,14 +296,20 @@ static void print_cpu(struct seq_file *m, int cpu)
262 P(ttwu_count); 296 P(ttwu_count);
263 P(ttwu_local); 297 P(ttwu_local);
264 298
265 P(bkl_count); 299 SEQ_printf(m, " .%-30s: %d\n", "bkl_count",
300 rq->rq_sched_info.bkl_count);
266 301
267#undef P 302#undef P
303#undef P64
268#endif 304#endif
305 spin_lock_irqsave(&sched_debug_lock, flags);
269 print_cfs_stats(m, cpu); 306 print_cfs_stats(m, cpu);
270 print_rt_stats(m, cpu); 307 print_rt_stats(m, cpu);
271 308
309 rcu_read_lock();
272 print_rq(m, rq, cpu); 310 print_rq(m, rq, cpu);
311 rcu_read_unlock();
312 spin_unlock_irqrestore(&sched_debug_lock, flags);
273} 313}
274 314
275static const char *sched_tunable_scaling_names[] = { 315static const char *sched_tunable_scaling_names[] = {
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index c62ebae65cf0..77e9166d7bbf 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1062,6 +1062,9 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
1062 struct sched_entity *se = __pick_next_entity(cfs_rq); 1062 struct sched_entity *se = __pick_next_entity(cfs_rq);
1063 s64 delta = curr->vruntime - se->vruntime; 1063 s64 delta = curr->vruntime - se->vruntime;
1064 1064
1065 if (delta < 0)
1066 return;
1067
1065 if (delta > ideal_runtime) 1068 if (delta > ideal_runtime)
1066 resched_task(rq_of(cfs_rq)->curr); 1069 resched_task(rq_of(cfs_rq)->curr);
1067 } 1070 }
@@ -1362,27 +1365,27 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
1362 return wl; 1365 return wl;
1363 1366
1364 for_each_sched_entity(se) { 1367 for_each_sched_entity(se) {
1365 long S, rw, s, a, b; 1368 long lw, w;
1366 1369
1367 S = se->my_q->tg->shares; 1370 tg = se->my_q->tg;
1368 s = se->load.weight; 1371 w = se->my_q->load.weight;
1369 rw = se->my_q->load.weight;
1370 1372
1371 a = S*(rw + wl); 1373 /* use this cpu's instantaneous contribution */
1372 b = S*rw + s*wg; 1374 lw = atomic_read(&tg->load_weight);
1375 lw -= se->my_q->load_contribution;
1376 lw += w + wg;
1373 1377
1374 wl = s*(a-b); 1378 wl += w;
1375 1379
1376 if (likely(b)) 1380 if (lw > 0 && wl < lw)
1377 wl /= b; 1381 wl = (wl * tg->shares) / lw;
1382 else
1383 wl = tg->shares;
1378 1384
1379 /* 1385 /* zero point is MIN_SHARES */
1380 * Assume the group is already running and will 1386 if (wl < MIN_SHARES)
1381 * thus already be accounted for in the weight. 1387 wl = MIN_SHARES;
1382 * 1388 wl -= se->load.weight;
1383 * That is, moving shares between CPUs, does not
1384 * alter the group weight.
1385 */
1386 wg = 0; 1389 wg = 0;
1387 } 1390 }
1388 1391
diff --git a/kernel/smp.c b/kernel/smp.c
index 4ec30e069987..9910744f0856 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -194,23 +194,52 @@ void generic_smp_call_function_interrupt(void)
194 */ 194 */
195 list_for_each_entry_rcu(data, &call_function.queue, csd.list) { 195 list_for_each_entry_rcu(data, &call_function.queue, csd.list) {
196 int refs; 196 int refs;
197 void (*func) (void *info);
197 198
198 if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) 199 /*
200 * Since we walk the list without any locks, we might
201 * see an entry that was completed, removed from the
202 * list and is in the process of being reused.
203 *
204 * We must check that the cpu is in the cpumask before
205 * checking the refs, and both must be set before
206 * executing the callback on this cpu.
207 */
208
209 if (!cpumask_test_cpu(cpu, data->cpumask))
210 continue;
211
212 smp_rmb();
213
214 if (atomic_read(&data->refs) == 0)
199 continue; 215 continue;
200 216
217 func = data->csd.func; /* for later warn */
201 data->csd.func(data->csd.info); 218 data->csd.func(data->csd.info);
202 219
220 /*
221 * If the cpu mask is not still set then it enabled interrupts,
222 * we took another smp interrupt, and executed the function
223 * twice on this cpu. In theory that copy decremented refs.
224 */
225 if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) {
226 WARN(1, "%pS enabled interrupts and double executed\n",
227 func);
228 continue;
229 }
230
203 refs = atomic_dec_return(&data->refs); 231 refs = atomic_dec_return(&data->refs);
204 WARN_ON(refs < 0); 232 WARN_ON(refs < 0);
205 if (!refs) {
206 raw_spin_lock(&call_function.lock);
207 list_del_rcu(&data->csd.list);
208 raw_spin_unlock(&call_function.lock);
209 }
210 233
211 if (refs) 234 if (refs)
212 continue; 235 continue;
213 236
237 WARN_ON(!cpumask_empty(data->cpumask));
238
239 raw_spin_lock(&call_function.lock);
240 list_del_rcu(&data->csd.list);
241 raw_spin_unlock(&call_function.lock);
242
214 csd_unlock(&data->csd); 243 csd_unlock(&data->csd);
215 } 244 }
216 245
@@ -430,7 +459,7 @@ void smp_call_function_many(const struct cpumask *mask,
430 * can't happen. 459 * can't happen.
431 */ 460 */
432 WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() 461 WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
433 && !oops_in_progress); 462 && !oops_in_progress && !early_boot_irqs_disabled);
434 463
435 /* So, what's a CPU they want? Ignoring this one. */ 464 /* So, what's a CPU they want? Ignoring this one. */
436 cpu = cpumask_first_and(mask, cpu_online_mask); 465 cpu = cpumask_first_and(mask, cpu_online_mask);
@@ -454,11 +483,21 @@ void smp_call_function_many(const struct cpumask *mask,
454 483
455 data = &__get_cpu_var(cfd_data); 484 data = &__get_cpu_var(cfd_data);
456 csd_lock(&data->csd); 485 csd_lock(&data->csd);
486 BUG_ON(atomic_read(&data->refs) || !cpumask_empty(data->cpumask));
457 487
458 data->csd.func = func; 488 data->csd.func = func;
459 data->csd.info = info; 489 data->csd.info = info;
460 cpumask_and(data->cpumask, mask, cpu_online_mask); 490 cpumask_and(data->cpumask, mask, cpu_online_mask);
461 cpumask_clear_cpu(this_cpu, data->cpumask); 491 cpumask_clear_cpu(this_cpu, data->cpumask);
492
493 /*
494 * To ensure the interrupt handler gets an complete view
495 * we order the cpumask and refs writes and order the read
496 * of them in the interrupt handler. In addition we may
497 * only clear our own cpu bit from the mask.
498 */
499 smp_wmb();
500
462 atomic_set(&data->refs, cpumask_weight(data->cpumask)); 501 atomic_set(&data->refs, cpumask_weight(data->cpumask));
463 502
464 raw_spin_lock_irqsave(&call_function.lock, flags); 503 raw_spin_lock_irqsave(&call_function.lock, flags);
@@ -533,17 +572,20 @@ void ipi_call_unlock_irq(void)
533#endif /* USE_GENERIC_SMP_HELPERS */ 572#endif /* USE_GENERIC_SMP_HELPERS */
534 573
535/* 574/*
536 * Call a function on all processors 575 * Call a function on all processors. May be used during early boot while
576 * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead
577 * of local_irq_disable/enable().
537 */ 578 */
538int on_each_cpu(void (*func) (void *info), void *info, int wait) 579int on_each_cpu(void (*func) (void *info), void *info, int wait)
539{ 580{
581 unsigned long flags;
540 int ret = 0; 582 int ret = 0;
541 583
542 preempt_disable(); 584 preempt_disable();
543 ret = smp_call_function(func, info, wait); 585 ret = smp_call_function(func, info, wait);
544 local_irq_disable(); 586 local_irq_save(flags);
545 func(info); 587 func(info);
546 local_irq_enable(); 588 local_irq_restore(flags);
547 preempt_enable(); 589 preempt_enable();
548 return ret; 590 return ret;
549} 591}
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 5cf8c602b880..92b6e1e12d98 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -453,14 +453,6 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1)
453 * Stubs: 453 * Stubs:
454 */ 454 */
455 455
456void early_boot_irqs_off(void)
457{
458}
459
460void early_boot_irqs_on(void)
461{
462}
463
464void trace_softirqs_on(unsigned long ip) 456void trace_softirqs_on(unsigned long ip)
465{ 457{
466} 458}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 8ee6ec82f88a..11869faa6819 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -768,7 +768,11 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
768 768
769 worker->flags &= ~flags; 769 worker->flags &= ~flags;
770 770
771 /* if transitioning out of NOT_RUNNING, increment nr_running */ 771 /*
772 * If transitioning out of NOT_RUNNING, increment nr_running. Note
773 * that the nested NOT_RUNNING is not a noop. NOT_RUNNING is mask
774 * of multiple flags, not a single flag.
775 */
772 if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING)) 776 if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
773 if (!(worker->flags & WORKER_NOT_RUNNING)) 777 if (!(worker->flags & WORKER_NOT_RUNNING))
774 atomic_inc(get_gcwq_nr_running(gcwq->cpu)); 778 atomic_inc(get_gcwq_nr_running(gcwq->cpu));
@@ -1840,7 +1844,7 @@ __acquires(&gcwq->lock)
1840 spin_unlock_irq(&gcwq->lock); 1844 spin_unlock_irq(&gcwq->lock);
1841 1845
1842 work_clear_pending(work); 1846 work_clear_pending(work);
1843 lock_map_acquire(&cwq->wq->lockdep_map); 1847 lock_map_acquire_read(&cwq->wq->lockdep_map);
1844 lock_map_acquire(&lockdep_map); 1848 lock_map_acquire(&lockdep_map);
1845 trace_workqueue_execute_start(work); 1849 trace_workqueue_execute_start(work);
1846 f(work); 1850 f(work);
@@ -2384,8 +2388,18 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
2384 insert_wq_barrier(cwq, barr, work, worker); 2388 insert_wq_barrier(cwq, barr, work, worker);
2385 spin_unlock_irq(&gcwq->lock); 2389 spin_unlock_irq(&gcwq->lock);
2386 2390
2387 lock_map_acquire(&cwq->wq->lockdep_map); 2391 /*
2392 * If @max_active is 1 or rescuer is in use, flushing another work
2393 * item on the same workqueue may lead to deadlock. Make sure the
2394 * flusher is not running on the same workqueue by verifying write
2395 * access.
2396 */
2397 if (cwq->wq->saved_max_active == 1 || cwq->wq->flags & WQ_RESCUER)
2398 lock_map_acquire(&cwq->wq->lockdep_map);
2399 else
2400 lock_map_acquire_read(&cwq->wq->lockdep_map);
2388 lock_map_release(&cwq->wq->lockdep_map); 2401 lock_map_release(&cwq->wq->lockdep_map);
2402
2389 return true; 2403 return true;
2390already_gone: 2404already_gone:
2391 spin_unlock_irq(&gcwq->lock); 2405 spin_unlock_irq(&gcwq->lock);