aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorSascha Hauer <s.hauer@pengutronix.de>2011-02-11 02:32:18 -0500
committerSascha Hauer <s.hauer@pengutronix.de>2011-02-11 02:33:14 -0500
commitf19693a17c6705e197eb24d4618060eaac1b535c (patch)
treefc39dc23297c0e6be730cb0dfd74a34d9c0b8bfd /kernel
parent23b120cdfae4f5c29da69de750d545bad719ead4 (diff)
parent100b33c8bd8a3235fd0b7948338d6cbb3db3c63d (diff)
Merge commit 'v2.6.38-rc4' into imx-for-2.6.39
Conflicts: arch/arm/mach-mxs/clock-mx28.c Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cred.c16
-rw-r--r--kernel/irq/Kconfig3
-rw-r--r--kernel/irq/handle.c111
-rw-r--r--kernel/irq/migration.c14
-rw-r--r--kernel/lockdep.c18
-rw-r--r--kernel/module.c16
-rw-r--r--kernel/params.c65
-rw-r--r--kernel/perf_event.c79
-rw-r--r--kernel/printk.c100
-rw-r--r--kernel/sched.c26
-rw-r--r--kernel/sched_autogroup.c32
-rw-r--r--kernel/sched_autogroup.h4
-rw-r--r--kernel/sched_debug.c42
-rw-r--r--kernel/sched_fair.c126
-rw-r--r--kernel/sched_rt.c2
-rw-r--r--kernel/smp.c62
-rw-r--r--kernel/sys.c3
-rw-r--r--kernel/sysctl.c3
-rw-r--r--kernel/time/tick-sched.c7
-rw-r--r--kernel/timer.c6
-rw-r--r--kernel/trace/trace_events.c12
-rw-r--r--kernel/trace/trace_export.c6
-rw-r--r--kernel/trace/trace_irqsoff.c8
-rw-r--r--kernel/trace/trace_syscalls.c19
-rw-r--r--kernel/tracepoint.c31
-rw-r--r--kernel/watchdog.c43
-rw-r--r--kernel/workqueue.c20
27 files changed, 494 insertions, 380 deletions
diff --git a/kernel/cred.c b/kernel/cred.c
index 6a1aa004e37..3a9d6dd53a6 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -252,13 +252,13 @@ struct cred *cred_alloc_blank(void)
252#endif 252#endif
253 253
254 atomic_set(&new->usage, 1); 254 atomic_set(&new->usage, 1);
255#ifdef CONFIG_DEBUG_CREDENTIALS
256 new->magic = CRED_MAGIC;
257#endif
255 258
256 if (security_cred_alloc_blank(new, GFP_KERNEL) < 0) 259 if (security_cred_alloc_blank(new, GFP_KERNEL) < 0)
257 goto error; 260 goto error;
258 261
259#ifdef CONFIG_DEBUG_CREDENTIALS
260 new->magic = CRED_MAGIC;
261#endif
262 return new; 262 return new;
263 263
264error: 264error:
@@ -657,6 +657,8 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
657 validate_creds(old); 657 validate_creds(old);
658 658
659 *new = *old; 659 *new = *old;
660 atomic_set(&new->usage, 1);
661 set_cred_subscribers(new, 0);
660 get_uid(new->user); 662 get_uid(new->user);
661 get_group_info(new->group_info); 663 get_group_info(new->group_info);
662 664
@@ -674,8 +676,6 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
674 if (security_prepare_creds(new, old, GFP_KERNEL) < 0) 676 if (security_prepare_creds(new, old, GFP_KERNEL) < 0)
675 goto error; 677 goto error;
676 678
677 atomic_set(&new->usage, 1);
678 set_cred_subscribers(new, 0);
679 put_cred(old); 679 put_cred(old);
680 validate_creds(new); 680 validate_creds(new);
681 return new; 681 return new;
@@ -748,7 +748,11 @@ bool creds_are_invalid(const struct cred *cred)
748 if (cred->magic != CRED_MAGIC) 748 if (cred->magic != CRED_MAGIC)
749 return true; 749 return true;
750#ifdef CONFIG_SECURITY_SELINUX 750#ifdef CONFIG_SECURITY_SELINUX
751 if (selinux_is_enabled()) { 751 /*
752 * cred->security == NULL if security_cred_alloc_blank() or
753 * security_prepare_creds() returned an error.
754 */
755 if (selinux_is_enabled() && cred->security) {
752 if ((unsigned long) cred->security < PAGE_SIZE) 756 if ((unsigned long) cred->security < PAGE_SIZE)
753 return true; 757 return true;
754 if ((*(u32 *)cred->security & 0xffffff00) == 758 if ((*(u32 *)cred->security & 0xffffff00) ==
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 31d766bf5d2..8e42fec7686 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -9,9 +9,6 @@ menu "IRQ subsystem"
9config GENERIC_HARDIRQS 9config GENERIC_HARDIRQS
10 def_bool y 10 def_bool y
11 11
12config GENERIC_HARDIRQS_NO__DO_IRQ
13 def_bool y
14
15# Select this to disable the deprecated stuff 12# Select this to disable the deprecated stuff
16config GENERIC_HARDIRQS_NO_DEPRECATED 13config GENERIC_HARDIRQS_NO_DEPRECATED
17 def_bool n 14 def_bool n
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index e2347eb6330..3540a719012 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -118,114 +118,3 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
118 118
119 return retval; 119 return retval;
120} 120}
121
122#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
123
124#ifdef CONFIG_ENABLE_WARN_DEPRECATED
125# warning __do_IRQ is deprecated. Please convert to proper flow handlers
126#endif
127
128/**
129 * __do_IRQ - original all in one highlevel IRQ handler
130 * @irq: the interrupt number
131 *
132 * __do_IRQ handles all normal device IRQ's (the special
133 * SMP cross-CPU interrupts have their own specific
134 * handlers).
135 *
136 * This is the original x86 implementation which is used for every
137 * interrupt type.
138 */
139unsigned int __do_IRQ(unsigned int irq)
140{
141 struct irq_desc *desc = irq_to_desc(irq);
142 struct irqaction *action;
143 unsigned int status;
144
145 kstat_incr_irqs_this_cpu(irq, desc);
146
147 if (CHECK_IRQ_PER_CPU(desc->status)) {
148 irqreturn_t action_ret;
149
150 /*
151 * No locking required for CPU-local interrupts:
152 */
153 if (desc->irq_data.chip->ack)
154 desc->irq_data.chip->ack(irq);
155 if (likely(!(desc->status & IRQ_DISABLED))) {
156 action_ret = handle_IRQ_event(irq, desc->action);
157 if (!noirqdebug)
158 note_interrupt(irq, desc, action_ret);
159 }
160 desc->irq_data.chip->end(irq);
161 return 1;
162 }
163
164 raw_spin_lock(&desc->lock);
165 if (desc->irq_data.chip->ack)
166 desc->irq_data.chip->ack(irq);
167 /*
168 * REPLAY is when Linux resends an IRQ that was dropped earlier
169 * WAITING is used by probe to mark irqs that are being tested
170 */
171 status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
172 status |= IRQ_PENDING; /* we _want_ to handle it */
173
174 /*
175 * If the IRQ is disabled for whatever reason, we cannot
176 * use the action we have.
177 */
178 action = NULL;
179 if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) {
180 action = desc->action;
181 status &= ~IRQ_PENDING; /* we commit to handling */
182 status |= IRQ_INPROGRESS; /* we are handling it */
183 }
184 desc->status = status;
185
186 /*
187 * If there is no IRQ handler or it was disabled, exit early.
188 * Since we set PENDING, if another processor is handling
189 * a different instance of this same irq, the other processor
190 * will take care of it.
191 */
192 if (unlikely(!action))
193 goto out;
194
195 /*
196 * Edge triggered interrupts need to remember
197 * pending events.
198 * This applies to any hw interrupts that allow a second
199 * instance of the same irq to arrive while we are in do_IRQ
200 * or in the handler. But the code here only handles the _second_
201 * instance of the irq, not the third or fourth. So it is mostly
202 * useful for irq hardware that does not mask cleanly in an
203 * SMP environment.
204 */
205 for (;;) {
206 irqreturn_t action_ret;
207
208 raw_spin_unlock(&desc->lock);
209
210 action_ret = handle_IRQ_event(irq, action);
211 if (!noirqdebug)
212 note_interrupt(irq, desc, action_ret);
213
214 raw_spin_lock(&desc->lock);
215 if (likely(!(desc->status & IRQ_PENDING)))
216 break;
217 desc->status &= ~IRQ_PENDING;
218 }
219 desc->status &= ~IRQ_INPROGRESS;
220
221out:
222 /*
223 * The ->end() handler has to deal with interrupts which got
224 * disabled while the handler was running.
225 */
226 desc->irq_data.chip->end(irq);
227 raw_spin_unlock(&desc->lock);
228
229 return 1;
230}
231#endif
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 1d254194048..441fd629ff0 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -56,6 +56,7 @@ void move_masked_irq(int irq)
56void move_native_irq(int irq) 56void move_native_irq(int irq)
57{ 57{
58 struct irq_desc *desc = irq_to_desc(irq); 58 struct irq_desc *desc = irq_to_desc(irq);
59 bool masked;
59 60
60 if (likely(!(desc->status & IRQ_MOVE_PENDING))) 61 if (likely(!(desc->status & IRQ_MOVE_PENDING)))
61 return; 62 return;
@@ -63,8 +64,15 @@ void move_native_irq(int irq)
63 if (unlikely(desc->status & IRQ_DISABLED)) 64 if (unlikely(desc->status & IRQ_DISABLED))
64 return; 65 return;
65 66
66 desc->irq_data.chip->irq_mask(&desc->irq_data); 67 /*
68 * Be careful vs. already masked interrupts. If this is a
69 * threaded interrupt with ONESHOT set, we can end up with an
70 * interrupt storm.
71 */
72 masked = desc->status & IRQ_MASKED;
73 if (!masked)
74 desc->irq_data.chip->irq_mask(&desc->irq_data);
67 move_masked_irq(irq); 75 move_masked_irq(irq);
68 desc->irq_data.chip->irq_unmask(&desc->irq_data); 76 if (!masked)
77 desc->irq_data.chip->irq_unmask(&desc->irq_data);
69} 78}
70
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 42ba65dff7d..0d2058da80f 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2292,22 +2292,6 @@ mark_held_locks(struct task_struct *curr, enum mark_type mark)
2292} 2292}
2293 2293
2294/* 2294/*
2295 * Debugging helper: via this flag we know that we are in
2296 * 'early bootup code', and will warn about any invalid irqs-on event:
2297 */
2298static int early_boot_irqs_enabled;
2299
2300void early_boot_irqs_off(void)
2301{
2302 early_boot_irqs_enabled = 0;
2303}
2304
2305void early_boot_irqs_on(void)
2306{
2307 early_boot_irqs_enabled = 1;
2308}
2309
2310/*
2311 * Hardirqs will be enabled: 2295 * Hardirqs will be enabled:
2312 */ 2296 */
2313void trace_hardirqs_on_caller(unsigned long ip) 2297void trace_hardirqs_on_caller(unsigned long ip)
@@ -2319,7 +2303,7 @@ void trace_hardirqs_on_caller(unsigned long ip)
2319 if (unlikely(!debug_locks || current->lockdep_recursion)) 2303 if (unlikely(!debug_locks || current->lockdep_recursion))
2320 return; 2304 return;
2321 2305
2322 if (DEBUG_LOCKS_WARN_ON(unlikely(!early_boot_irqs_enabled))) 2306 if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled)))
2323 return; 2307 return;
2324 2308
2325 if (unlikely(curr->hardirqs_enabled)) { 2309 if (unlikely(curr->hardirqs_enabled)) {
diff --git a/kernel/module.c b/kernel/module.c
index 34e00b708fa..efa290ea94b 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2460,9 +2460,9 @@ static void find_module_sections(struct module *mod, struct load_info *info)
2460#endif 2460#endif
2461 2461
2462#ifdef CONFIG_TRACEPOINTS 2462#ifdef CONFIG_TRACEPOINTS
2463 mod->tracepoints = section_objs(info, "__tracepoints", 2463 mod->tracepoints_ptrs = section_objs(info, "__tracepoints_ptrs",
2464 sizeof(*mod->tracepoints), 2464 sizeof(*mod->tracepoints_ptrs),
2465 &mod->num_tracepoints); 2465 &mod->num_tracepoints);
2466#endif 2466#endif
2467#ifdef HAVE_JUMP_LABEL 2467#ifdef HAVE_JUMP_LABEL
2468 mod->jump_entries = section_objs(info, "__jump_table", 2468 mod->jump_entries = section_objs(info, "__jump_table",
@@ -3393,7 +3393,7 @@ void module_layout(struct module *mod,
3393 struct modversion_info *ver, 3393 struct modversion_info *ver,
3394 struct kernel_param *kp, 3394 struct kernel_param *kp,
3395 struct kernel_symbol *ks, 3395 struct kernel_symbol *ks,
3396 struct tracepoint *tp) 3396 struct tracepoint * const *tp)
3397{ 3397{
3398} 3398}
3399EXPORT_SYMBOL(module_layout); 3399EXPORT_SYMBOL(module_layout);
@@ -3407,8 +3407,8 @@ void module_update_tracepoints(void)
3407 mutex_lock(&module_mutex); 3407 mutex_lock(&module_mutex);
3408 list_for_each_entry(mod, &modules, list) 3408 list_for_each_entry(mod, &modules, list)
3409 if (!mod->taints) 3409 if (!mod->taints)
3410 tracepoint_update_probe_range(mod->tracepoints, 3410 tracepoint_update_probe_range(mod->tracepoints_ptrs,
3411 mod->tracepoints + mod->num_tracepoints); 3411 mod->tracepoints_ptrs + mod->num_tracepoints);
3412 mutex_unlock(&module_mutex); 3412 mutex_unlock(&module_mutex);
3413} 3413}
3414 3414
@@ -3432,8 +3432,8 @@ int module_get_iter_tracepoints(struct tracepoint_iter *iter)
3432 else if (iter_mod > iter->module) 3432 else if (iter_mod > iter->module)
3433 iter->tracepoint = NULL; 3433 iter->tracepoint = NULL;
3434 found = tracepoint_get_iter_range(&iter->tracepoint, 3434 found = tracepoint_get_iter_range(&iter->tracepoint,
3435 iter_mod->tracepoints, 3435 iter_mod->tracepoints_ptrs,
3436 iter_mod->tracepoints 3436 iter_mod->tracepoints_ptrs
3437 + iter_mod->num_tracepoints); 3437 + iter_mod->num_tracepoints);
3438 if (found) { 3438 if (found) {
3439 iter->module = iter_mod; 3439 iter->module = iter_mod;
diff --git a/kernel/params.c b/kernel/params.c
index 08107d18175..0da1411222b 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -719,9 +719,7 @@ void destroy_params(const struct kernel_param *params, unsigned num)
719 params[i].ops->free(params[i].arg); 719 params[i].ops->free(params[i].arg);
720} 720}
721 721
722static void __init kernel_add_sysfs_param(const char *name, 722static struct module_kobject * __init locate_module_kobject(const char *name)
723 struct kernel_param *kparam,
724 unsigned int name_skip)
725{ 723{
726 struct module_kobject *mk; 724 struct module_kobject *mk;
727 struct kobject *kobj; 725 struct kobject *kobj;
@@ -729,10 +727,7 @@ static void __init kernel_add_sysfs_param(const char *name,
729 727
730 kobj = kset_find_obj(module_kset, name); 728 kobj = kset_find_obj(module_kset, name);
731 if (kobj) { 729 if (kobj) {
732 /* We already have one. Remove params so we can add more. */
733 mk = to_module_kobject(kobj); 730 mk = to_module_kobject(kobj);
734 /* We need to remove it before adding parameters. */
735 sysfs_remove_group(&mk->kobj, &mk->mp->grp);
736 } else { 731 } else {
737 mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); 732 mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL);
738 BUG_ON(!mk); 733 BUG_ON(!mk);
@@ -743,15 +738,36 @@ static void __init kernel_add_sysfs_param(const char *name,
743 "%s", name); 738 "%s", name);
744 if (err) { 739 if (err) {
745 kobject_put(&mk->kobj); 740 kobject_put(&mk->kobj);
746 printk(KERN_ERR "Module '%s' failed add to sysfs, " 741 printk(KERN_ERR
747 "error number %d\n", name, err); 742 "Module '%s' failed add to sysfs, error number %d\n",
748 printk(KERN_ERR "The system will be unstable now.\n"); 743 name, err);
749 return; 744 printk(KERN_ERR
745 "The system will be unstable now.\n");
746 return NULL;
750 } 747 }
751 /* So that exit path is even. */ 748
749 /* So that we hold reference in both cases. */
752 kobject_get(&mk->kobj); 750 kobject_get(&mk->kobj);
753 } 751 }
754 752
753 return mk;
754}
755
756static void __init kernel_add_sysfs_param(const char *name,
757 struct kernel_param *kparam,
758 unsigned int name_skip)
759{
760 struct module_kobject *mk;
761 int err;
762
763 mk = locate_module_kobject(name);
764 if (!mk)
765 return;
766
767 /* We need to remove old parameters before adding more. */
768 if (mk->mp)
769 sysfs_remove_group(&mk->kobj, &mk->mp->grp);
770
755 /* These should not fail at boot. */ 771 /* These should not fail at boot. */
756 err = add_sysfs_param(mk, kparam, kparam->name + name_skip); 772 err = add_sysfs_param(mk, kparam, kparam->name + name_skip);
757 BUG_ON(err); 773 BUG_ON(err);
@@ -796,6 +812,32 @@ static void __init param_sysfs_builtin(void)
796 } 812 }
797} 813}
798 814
815ssize_t __modver_version_show(struct module_attribute *mattr,
816 struct module *mod, char *buf)
817{
818 struct module_version_attribute *vattr =
819 container_of(mattr, struct module_version_attribute, mattr);
820
821 return sprintf(buf, "%s\n", vattr->version);
822}
823
824extern struct module_version_attribute __start___modver[], __stop___modver[];
825
826static void __init version_sysfs_builtin(void)
827{
828 const struct module_version_attribute *vattr;
829 struct module_kobject *mk;
830 int err;
831
832 for (vattr = __start___modver; vattr < __stop___modver; vattr++) {
833 mk = locate_module_kobject(vattr->module_name);
834 if (mk) {
835 err = sysfs_create_file(&mk->kobj, &vattr->mattr.attr);
836 kobject_uevent(&mk->kobj, KOBJ_ADD);
837 kobject_put(&mk->kobj);
838 }
839 }
840}
799 841
800/* module-related sysfs stuff */ 842/* module-related sysfs stuff */
801 843
@@ -875,6 +917,7 @@ static int __init param_sysfs_init(void)
875 } 917 }
876 module_sysfs_initialized = 1; 918 module_sysfs_initialized = 1;
877 919
920 version_sysfs_builtin();
878 param_sysfs_builtin(); 921 param_sysfs_builtin();
879 922
880 return 0; 923 return 0;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 05ebe841270..999835b6112 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1901,11 +1901,12 @@ static void __perf_event_read(void *info)
1901 return; 1901 return;
1902 1902
1903 raw_spin_lock(&ctx->lock); 1903 raw_spin_lock(&ctx->lock);
1904 update_context_time(ctx); 1904 if (ctx->is_active)
1905 update_context_time(ctx);
1905 update_event_times(event); 1906 update_event_times(event);
1907 if (event->state == PERF_EVENT_STATE_ACTIVE)
1908 event->pmu->read(event);
1906 raw_spin_unlock(&ctx->lock); 1909 raw_spin_unlock(&ctx->lock);
1907
1908 event->pmu->read(event);
1909} 1910}
1910 1911
1911static inline u64 perf_event_count(struct perf_event *event) 1912static inline u64 perf_event_count(struct perf_event *event)
@@ -1999,8 +2000,7 @@ static int alloc_callchain_buffers(void)
1999 * accessed from NMI. Use a temporary manual per cpu allocation 2000 * accessed from NMI. Use a temporary manual per cpu allocation
2000 * until that gets sorted out. 2001 * until that gets sorted out.
2001 */ 2002 */
2002 size = sizeof(*entries) + sizeof(struct perf_callchain_entry *) * 2003 size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]);
2003 num_possible_cpus();
2004 2004
2005 entries = kzalloc(size, GFP_KERNEL); 2005 entries = kzalloc(size, GFP_KERNEL);
2006 if (!entries) 2006 if (!entries)
@@ -2201,13 +2201,6 @@ find_lively_task_by_vpid(pid_t vpid)
2201 if (!task) 2201 if (!task)
2202 return ERR_PTR(-ESRCH); 2202 return ERR_PTR(-ESRCH);
2203 2203
2204 /*
2205 * Can't attach events to a dying task.
2206 */
2207 err = -ESRCH;
2208 if (task->flags & PF_EXITING)
2209 goto errout;
2210
2211 /* Reuse ptrace permission checks for now. */ 2204 /* Reuse ptrace permission checks for now. */
2212 err = -EACCES; 2205 err = -EACCES;
2213 if (!ptrace_may_access(task, PTRACE_MODE_READ)) 2206 if (!ptrace_may_access(task, PTRACE_MODE_READ))
@@ -2228,14 +2221,11 @@ find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
2228 unsigned long flags; 2221 unsigned long flags;
2229 int ctxn, err; 2222 int ctxn, err;
2230 2223
2231 if (!task && cpu != -1) { 2224 if (!task) {
2232 /* Must be root to operate on a CPU event: */ 2225 /* Must be root to operate on a CPU event: */
2233 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) 2226 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
2234 return ERR_PTR(-EACCES); 2227 return ERR_PTR(-EACCES);
2235 2228
2236 if (cpu < 0 || cpu >= nr_cpumask_bits)
2237 return ERR_PTR(-EINVAL);
2238
2239 /* 2229 /*
2240 * We could be clever and allow to attach a event to an 2230 * We could be clever and allow to attach a event to an
2241 * offline CPU and activate it when the CPU comes up, but 2231 * offline CPU and activate it when the CPU comes up, but
@@ -2271,14 +2261,27 @@ retry:
2271 2261
2272 get_ctx(ctx); 2262 get_ctx(ctx);
2273 2263
2274 if (cmpxchg(&task->perf_event_ctxp[ctxn], NULL, ctx)) { 2264 err = 0;
2275 /* 2265 mutex_lock(&task->perf_event_mutex);
2276 * We raced with some other task; use 2266 /*
2277 * the context they set. 2267 * If it has already passed perf_event_exit_task().
2278 */ 2268 * we must see PF_EXITING, it takes this mutex too.
2269 */
2270 if (task->flags & PF_EXITING)
2271 err = -ESRCH;
2272 else if (task->perf_event_ctxp[ctxn])
2273 err = -EAGAIN;
2274 else
2275 rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx);
2276 mutex_unlock(&task->perf_event_mutex);
2277
2278 if (unlikely(err)) {
2279 put_task_struct(task); 2279 put_task_struct(task);
2280 kfree(ctx); 2280 kfree(ctx);
2281 goto retry; 2281
2282 if (err == -EAGAIN)
2283 goto retry;
2284 goto errout;
2282 } 2285 }
2283 } 2286 }
2284 2287
@@ -5377,6 +5380,8 @@ free_dev:
5377 goto out; 5380 goto out;
5378} 5381}
5379 5382
5383static struct lock_class_key cpuctx_mutex;
5384
5380int perf_pmu_register(struct pmu *pmu, char *name, int type) 5385int perf_pmu_register(struct pmu *pmu, char *name, int type)
5381{ 5386{
5382 int cpu, ret; 5387 int cpu, ret;
@@ -5425,6 +5430,7 @@ skip_type:
5425 5430
5426 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); 5431 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
5427 __perf_event_init_context(&cpuctx->ctx); 5432 __perf_event_init_context(&cpuctx->ctx);
5433 lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex);
5428 cpuctx->ctx.type = cpu_context; 5434 cpuctx->ctx.type = cpu_context;
5429 cpuctx->ctx.pmu = pmu; 5435 cpuctx->ctx.pmu = pmu;
5430 cpuctx->jiffies_interval = 1; 5436 cpuctx->jiffies_interval = 1;
@@ -5541,6 +5547,11 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
5541 struct hw_perf_event *hwc; 5547 struct hw_perf_event *hwc;
5542 long err; 5548 long err;
5543 5549
5550 if ((unsigned)cpu >= nr_cpu_ids) {
5551 if (!task || cpu != -1)
5552 return ERR_PTR(-EINVAL);
5553 }
5554
5544 event = kzalloc(sizeof(*event), GFP_KERNEL); 5555 event = kzalloc(sizeof(*event), GFP_KERNEL);
5545 if (!event) 5556 if (!event)
5546 return ERR_PTR(-ENOMEM); 5557 return ERR_PTR(-ENOMEM);
@@ -5589,7 +5600,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
5589 5600
5590 if (!overflow_handler && parent_event) 5601 if (!overflow_handler && parent_event)
5591 overflow_handler = parent_event->overflow_handler; 5602 overflow_handler = parent_event->overflow_handler;
5592 5603
5593 event->overflow_handler = overflow_handler; 5604 event->overflow_handler = overflow_handler;
5594 5605
5595 if (attr->disabled) 5606 if (attr->disabled)
@@ -6125,7 +6136,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
6125 * scheduled, so we are now safe from rescheduling changing 6136 * scheduled, so we are now safe from rescheduling changing
6126 * our context. 6137 * our context.
6127 */ 6138 */
6128 child_ctx = child->perf_event_ctxp[ctxn]; 6139 child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]);
6129 task_ctx_sched_out(child_ctx, EVENT_ALL); 6140 task_ctx_sched_out(child_ctx, EVENT_ALL);
6130 6141
6131 /* 6142 /*
@@ -6438,11 +6449,6 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
6438 unsigned long flags; 6449 unsigned long flags;
6439 int ret = 0; 6450 int ret = 0;
6440 6451
6441 child->perf_event_ctxp[ctxn] = NULL;
6442
6443 mutex_init(&child->perf_event_mutex);
6444 INIT_LIST_HEAD(&child->perf_event_list);
6445
6446 if (likely(!parent->perf_event_ctxp[ctxn])) 6452 if (likely(!parent->perf_event_ctxp[ctxn]))
6447 return 0; 6453 return 0;
6448 6454
@@ -6494,7 +6500,6 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
6494 6500
6495 raw_spin_lock_irqsave(&parent_ctx->lock, flags); 6501 raw_spin_lock_irqsave(&parent_ctx->lock, flags);
6496 parent_ctx->rotate_disable = 0; 6502 parent_ctx->rotate_disable = 0;
6497 raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
6498 6503
6499 child_ctx = child->perf_event_ctxp[ctxn]; 6504 child_ctx = child->perf_event_ctxp[ctxn];
6500 6505
@@ -6502,12 +6507,11 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
6502 /* 6507 /*
6503 * Mark the child context as a clone of the parent 6508 * Mark the child context as a clone of the parent
6504 * context, or of whatever the parent is a clone of. 6509 * context, or of whatever the parent is a clone of.
6505 * Note that if the parent is a clone, it could get 6510 *
6506 * uncloned at any point, but that doesn't matter 6511 * Note that if the parent is a clone, the holding of
6507 * because the list of events and the generation 6512 * parent_ctx->lock avoids it from being uncloned.
6508 * count can't have changed since we took the mutex.
6509 */ 6513 */
6510 cloned_ctx = rcu_dereference(parent_ctx->parent_ctx); 6514 cloned_ctx = parent_ctx->parent_ctx;
6511 if (cloned_ctx) { 6515 if (cloned_ctx) {
6512 child_ctx->parent_ctx = cloned_ctx; 6516 child_ctx->parent_ctx = cloned_ctx;
6513 child_ctx->parent_gen = parent_ctx->parent_gen; 6517 child_ctx->parent_gen = parent_ctx->parent_gen;
@@ -6518,6 +6522,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
6518 get_ctx(child_ctx->parent_ctx); 6522 get_ctx(child_ctx->parent_ctx);
6519 } 6523 }
6520 6524
6525 raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
6521 mutex_unlock(&parent_ctx->mutex); 6526 mutex_unlock(&parent_ctx->mutex);
6522 6527
6523 perf_unpin_context(parent_ctx); 6528 perf_unpin_context(parent_ctx);
@@ -6532,6 +6537,10 @@ int perf_event_init_task(struct task_struct *child)
6532{ 6537{
6533 int ctxn, ret; 6538 int ctxn, ret;
6534 6539
6540 memset(child->perf_event_ctxp, 0, sizeof(child->perf_event_ctxp));
6541 mutex_init(&child->perf_event_mutex);
6542 INIT_LIST_HEAD(&child->perf_event_list);
6543
6535 for_each_task_context_nr(ctxn) { 6544 for_each_task_context_nr(ctxn) {
6536 ret = perf_event_init_context(child, ctxn); 6545 ret = perf_event_init_context(child, ctxn);
6537 if (ret) 6546 if (ret)
diff --git a/kernel/printk.c b/kernel/printk.c
index 53d9a9ec88e..2ddbdc73aad 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -97,7 +97,7 @@ static int console_locked, console_suspended;
97/* 97/*
98 * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars 98 * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars
99 * It is also used in interesting ways to provide interlocking in 99 * It is also used in interesting ways to provide interlocking in
100 * release_console_sem(). 100 * console_unlock();.
101 */ 101 */
102static DEFINE_SPINLOCK(logbuf_lock); 102static DEFINE_SPINLOCK(logbuf_lock);
103 103
@@ -501,7 +501,7 @@ static void _call_console_drivers(unsigned start,
501/* 501/*
502 * Call the console drivers, asking them to write out 502 * Call the console drivers, asking them to write out
503 * log_buf[start] to log_buf[end - 1]. 503 * log_buf[start] to log_buf[end - 1].
504 * The console_sem must be held. 504 * The console_lock must be held.
505 */ 505 */
506static void call_console_drivers(unsigned start, unsigned end) 506static void call_console_drivers(unsigned start, unsigned end)
507{ 507{
@@ -604,11 +604,11 @@ static int have_callable_console(void)
604 * 604 *
605 * This is printk(). It can be called from any context. We want it to work. 605 * This is printk(). It can be called from any context. We want it to work.
606 * 606 *
607 * We try to grab the console_sem. If we succeed, it's easy - we log the output and 607 * We try to grab the console_lock. If we succeed, it's easy - we log the output and
608 * call the console drivers. If we fail to get the semaphore we place the output 608 * call the console drivers. If we fail to get the semaphore we place the output
609 * into the log buffer and return. The current holder of the console_sem will 609 * into the log buffer and return. The current holder of the console_sem will
610 * notice the new output in release_console_sem() and will send it to the 610 * notice the new output in console_unlock(); and will send it to the
611 * consoles before releasing the semaphore. 611 * consoles before releasing the lock.
612 * 612 *
613 * One effect of this deferred printing is that code which calls printk() and 613 * One effect of this deferred printing is that code which calls printk() and
614 * then changes console_loglevel may break. This is because console_loglevel 614 * then changes console_loglevel may break. This is because console_loglevel
@@ -659,19 +659,19 @@ static inline int can_use_console(unsigned int cpu)
659/* 659/*
660 * Try to get console ownership to actually show the kernel 660 * Try to get console ownership to actually show the kernel
661 * messages from a 'printk'. Return true (and with the 661 * messages from a 'printk'. Return true (and with the
662 * console_semaphore held, and 'console_locked' set) if it 662 * console_lock held, and 'console_locked' set) if it
663 * is successful, false otherwise. 663 * is successful, false otherwise.
664 * 664 *
665 * This gets called with the 'logbuf_lock' spinlock held and 665 * This gets called with the 'logbuf_lock' spinlock held and
666 * interrupts disabled. It should return with 'lockbuf_lock' 666 * interrupts disabled. It should return with 'lockbuf_lock'
667 * released but interrupts still disabled. 667 * released but interrupts still disabled.
668 */ 668 */
669static int acquire_console_semaphore_for_printk(unsigned int cpu) 669static int console_trylock_for_printk(unsigned int cpu)
670 __releases(&logbuf_lock) 670 __releases(&logbuf_lock)
671{ 671{
672 int retval = 0; 672 int retval = 0;
673 673
674 if (!try_acquire_console_sem()) { 674 if (console_trylock()) {
675 retval = 1; 675 retval = 1;
676 676
677 /* 677 /*
@@ -827,12 +827,12 @@ asmlinkage int vprintk(const char *fmt, va_list args)
827 * actual magic (print out buffers, wake up klogd, 827 * actual magic (print out buffers, wake up klogd,
828 * etc). 828 * etc).
829 * 829 *
830 * The acquire_console_semaphore_for_printk() function 830 * The console_trylock_for_printk() function
831 * will release 'logbuf_lock' regardless of whether it 831 * will release 'logbuf_lock' regardless of whether it
832 * actually gets the semaphore or not. 832 * actually gets the semaphore or not.
833 */ 833 */
834 if (acquire_console_semaphore_for_printk(this_cpu)) 834 if (console_trylock_for_printk(this_cpu))
835 release_console_sem(); 835 console_unlock();
836 836
837 lockdep_on(); 837 lockdep_on();
838out_restore_irqs: 838out_restore_irqs:
@@ -993,7 +993,7 @@ void suspend_console(void)
993 if (!console_suspend_enabled) 993 if (!console_suspend_enabled)
994 return; 994 return;
995 printk("Suspending console(s) (use no_console_suspend to debug)\n"); 995 printk("Suspending console(s) (use no_console_suspend to debug)\n");
996 acquire_console_sem(); 996 console_lock();
997 console_suspended = 1; 997 console_suspended = 1;
998 up(&console_sem); 998 up(&console_sem);
999} 999}
@@ -1004,7 +1004,7 @@ void resume_console(void)
1004 return; 1004 return;
1005 down(&console_sem); 1005 down(&console_sem);
1006 console_suspended = 0; 1006 console_suspended = 0;
1007 release_console_sem(); 1007 console_unlock();
1008} 1008}
1009 1009
1010/** 1010/**
@@ -1027,21 +1027,21 @@ static int __cpuinit console_cpu_notify(struct notifier_block *self,
1027 case CPU_DYING: 1027 case CPU_DYING:
1028 case CPU_DOWN_FAILED: 1028 case CPU_DOWN_FAILED:
1029 case CPU_UP_CANCELED: 1029 case CPU_UP_CANCELED:
1030 acquire_console_sem(); 1030 console_lock();
1031 release_console_sem(); 1031 console_unlock();
1032 } 1032 }
1033 return NOTIFY_OK; 1033 return NOTIFY_OK;
1034} 1034}
1035 1035
1036/** 1036/**
1037 * acquire_console_sem - lock the console system for exclusive use. 1037 * console_lock - lock the console system for exclusive use.
1038 * 1038 *
1039 * Acquires a semaphore which guarantees that the caller has 1039 * Acquires a lock which guarantees that the caller has
1040 * exclusive access to the console system and the console_drivers list. 1040 * exclusive access to the console system and the console_drivers list.
1041 * 1041 *
1042 * Can sleep, returns nothing. 1042 * Can sleep, returns nothing.
1043 */ 1043 */
1044void acquire_console_sem(void) 1044void console_lock(void)
1045{ 1045{
1046 BUG_ON(in_interrupt()); 1046 BUG_ON(in_interrupt());
1047 down(&console_sem); 1047 down(&console_sem);
@@ -1050,21 +1050,29 @@ void acquire_console_sem(void)
1050 console_locked = 1; 1050 console_locked = 1;
1051 console_may_schedule = 1; 1051 console_may_schedule = 1;
1052} 1052}
1053EXPORT_SYMBOL(acquire_console_sem); 1053EXPORT_SYMBOL(console_lock);
1054 1054
1055int try_acquire_console_sem(void) 1055/**
1056 * console_trylock - try to lock the console system for exclusive use.
1057 *
1058 * Tried to acquire a lock which guarantees that the caller has
1059 * exclusive access to the console system and the console_drivers list.
1060 *
1061 * returns 1 on success, and 0 on failure to acquire the lock.
1062 */
1063int console_trylock(void)
1056{ 1064{
1057 if (down_trylock(&console_sem)) 1065 if (down_trylock(&console_sem))
1058 return -1; 1066 return 0;
1059 if (console_suspended) { 1067 if (console_suspended) {
1060 up(&console_sem); 1068 up(&console_sem);
1061 return -1; 1069 return 0;
1062 } 1070 }
1063 console_locked = 1; 1071 console_locked = 1;
1064 console_may_schedule = 0; 1072 console_may_schedule = 0;
1065 return 0; 1073 return 1;
1066} 1074}
1067EXPORT_SYMBOL(try_acquire_console_sem); 1075EXPORT_SYMBOL(console_trylock);
1068 1076
1069int is_console_locked(void) 1077int is_console_locked(void)
1070{ 1078{
@@ -1095,20 +1103,20 @@ void wake_up_klogd(void)
1095} 1103}
1096 1104
1097/** 1105/**
1098 * release_console_sem - unlock the console system 1106 * console_unlock - unlock the console system
1099 * 1107 *
1100 * Releases the semaphore which the caller holds on the console system 1108 * Releases the console_lock which the caller holds on the console system
1101 * and the console driver list. 1109 * and the console driver list.
1102 * 1110 *
1103 * While the semaphore was held, console output may have been buffered 1111 * While the console_lock was held, console output may have been buffered
1104 * by printk(). If this is the case, release_console_sem() emits 1112 * by printk(). If this is the case, console_unlock(); emits
1105 * the output prior to releasing the semaphore. 1113 * the output prior to releasing the lock.
1106 * 1114 *
1107 * If there is output waiting for klogd, we wake it up. 1115 * If there is output waiting for klogd, we wake it up.
1108 * 1116 *
1109 * release_console_sem() may be called from any context. 1117 * console_unlock(); may be called from any context.
1110 */ 1118 */
1111void release_console_sem(void) 1119void console_unlock(void)
1112{ 1120{
1113 unsigned long flags; 1121 unsigned long flags;
1114 unsigned _con_start, _log_end; 1122 unsigned _con_start, _log_end;
@@ -1141,7 +1149,7 @@ void release_console_sem(void)
1141 if (wake_klogd) 1149 if (wake_klogd)
1142 wake_up_klogd(); 1150 wake_up_klogd();
1143} 1151}
1144EXPORT_SYMBOL(release_console_sem); 1152EXPORT_SYMBOL(console_unlock);
1145 1153
1146/** 1154/**
1147 * console_conditional_schedule - yield the CPU if required 1155 * console_conditional_schedule - yield the CPU if required
@@ -1150,7 +1158,7 @@ EXPORT_SYMBOL(release_console_sem);
1150 * if this CPU should yield the CPU to another task, do 1158 * if this CPU should yield the CPU to another task, do
1151 * so here. 1159 * so here.
1152 * 1160 *
1153 * Must be called within acquire_console_sem(). 1161 * Must be called within console_lock();.
1154 */ 1162 */
1155void __sched console_conditional_schedule(void) 1163void __sched console_conditional_schedule(void)
1156{ 1164{
@@ -1171,14 +1179,14 @@ void console_unblank(void)
1171 if (down_trylock(&console_sem) != 0) 1179 if (down_trylock(&console_sem) != 0)
1172 return; 1180 return;
1173 } else 1181 } else
1174 acquire_console_sem(); 1182 console_lock();
1175 1183
1176 console_locked = 1; 1184 console_locked = 1;
1177 console_may_schedule = 0; 1185 console_may_schedule = 0;
1178 for_each_console(c) 1186 for_each_console(c)
1179 if ((c->flags & CON_ENABLED) && c->unblank) 1187 if ((c->flags & CON_ENABLED) && c->unblank)
1180 c->unblank(); 1188 c->unblank();
1181 release_console_sem(); 1189 console_unlock();
1182} 1190}
1183 1191
1184/* 1192/*
@@ -1189,7 +1197,7 @@ struct tty_driver *console_device(int *index)
1189 struct console *c; 1197 struct console *c;
1190 struct tty_driver *driver = NULL; 1198 struct tty_driver *driver = NULL;
1191 1199
1192 acquire_console_sem(); 1200 console_lock();
1193 for_each_console(c) { 1201 for_each_console(c) {
1194 if (!c->device) 1202 if (!c->device)
1195 continue; 1203 continue;
@@ -1197,7 +1205,7 @@ struct tty_driver *console_device(int *index)
1197 if (driver) 1205 if (driver)
1198 break; 1206 break;
1199 } 1207 }
1200 release_console_sem(); 1208 console_unlock();
1201 return driver; 1209 return driver;
1202} 1210}
1203 1211
@@ -1208,17 +1216,17 @@ struct tty_driver *console_device(int *index)
1208 */ 1216 */
1209void console_stop(struct console *console) 1217void console_stop(struct console *console)
1210{ 1218{
1211 acquire_console_sem(); 1219 console_lock();
1212 console->flags &= ~CON_ENABLED; 1220 console->flags &= ~CON_ENABLED;
1213 release_console_sem(); 1221 console_unlock();
1214} 1222}
1215EXPORT_SYMBOL(console_stop); 1223EXPORT_SYMBOL(console_stop);
1216 1224
1217void console_start(struct console *console) 1225void console_start(struct console *console)
1218{ 1226{
1219 acquire_console_sem(); 1227 console_lock();
1220 console->flags |= CON_ENABLED; 1228 console->flags |= CON_ENABLED;
1221 release_console_sem(); 1229 console_unlock();
1222} 1230}
1223EXPORT_SYMBOL(console_start); 1231EXPORT_SYMBOL(console_start);
1224 1232
@@ -1340,7 +1348,7 @@ void register_console(struct console *newcon)
1340 * Put this console in the list - keep the 1348 * Put this console in the list - keep the
1341 * preferred driver at the head of the list. 1349 * preferred driver at the head of the list.
1342 */ 1350 */
1343 acquire_console_sem(); 1351 console_lock();
1344 if ((newcon->flags & CON_CONSDEV) || console_drivers == NULL) { 1352 if ((newcon->flags & CON_CONSDEV) || console_drivers == NULL) {
1345 newcon->next = console_drivers; 1353 newcon->next = console_drivers;
1346 console_drivers = newcon; 1354 console_drivers = newcon;
@@ -1352,14 +1360,14 @@ void register_console(struct console *newcon)
1352 } 1360 }
1353 if (newcon->flags & CON_PRINTBUFFER) { 1361 if (newcon->flags & CON_PRINTBUFFER) {
1354 /* 1362 /*
1355 * release_console_sem() will print out the buffered messages 1363 * console_unlock(); will print out the buffered messages
1356 * for us. 1364 * for us.
1357 */ 1365 */
1358 spin_lock_irqsave(&logbuf_lock, flags); 1366 spin_lock_irqsave(&logbuf_lock, flags);
1359 con_start = log_start; 1367 con_start = log_start;
1360 spin_unlock_irqrestore(&logbuf_lock, flags); 1368 spin_unlock_irqrestore(&logbuf_lock, flags);
1361 } 1369 }
1362 release_console_sem(); 1370 console_unlock();
1363 console_sysfs_notify(); 1371 console_sysfs_notify();
1364 1372
1365 /* 1373 /*
@@ -1396,7 +1404,7 @@ int unregister_console(struct console *console)
1396 return braille_unregister_console(console); 1404 return braille_unregister_console(console);
1397#endif 1405#endif
1398 1406
1399 acquire_console_sem(); 1407 console_lock();
1400 if (console_drivers == console) { 1408 if (console_drivers == console) {
1401 console_drivers=console->next; 1409 console_drivers=console->next;
1402 res = 0; 1410 res = 0;
@@ -1418,7 +1426,7 @@ int unregister_console(struct console *console)
1418 if (console_drivers != NULL && console->flags & CON_CONSDEV) 1426 if (console_drivers != NULL && console->flags & CON_CONSDEV)
1419 console_drivers->flags |= CON_CONSDEV; 1427 console_drivers->flags |= CON_CONSDEV;
1420 1428
1421 release_console_sem(); 1429 console_unlock();
1422 console_sysfs_notify(); 1430 console_sysfs_notify();
1423 return res; 1431 return res;
1424} 1432}
diff --git a/kernel/sched.c b/kernel/sched.c
index ea3e5eff387..18d38e4ec7b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -553,9 +553,6 @@ struct rq {
553 /* try_to_wake_up() stats */ 553 /* try_to_wake_up() stats */
554 unsigned int ttwu_count; 554 unsigned int ttwu_count;
555 unsigned int ttwu_local; 555 unsigned int ttwu_local;
556
557 /* BKL stats */
558 unsigned int bkl_count;
559#endif 556#endif
560}; 557};
561 558
@@ -609,6 +606,9 @@ static inline struct task_group *task_group(struct task_struct *p)
609 struct task_group *tg; 606 struct task_group *tg;
610 struct cgroup_subsys_state *css; 607 struct cgroup_subsys_state *css;
611 608
609 if (p->flags & PF_EXITING)
610 return &root_task_group;
611
612 css = task_subsys_state_check(p, cpu_cgroup_subsys_id, 612 css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
613 lockdep_is_held(&task_rq(p)->lock)); 613 lockdep_is_held(&task_rq(p)->lock));
614 tg = container_of(css, struct task_group, css); 614 tg = container_of(css, struct task_group, css);
@@ -3887,7 +3887,7 @@ static inline void schedule_debug(struct task_struct *prev)
3887 schedstat_inc(this_rq(), sched_count); 3887 schedstat_inc(this_rq(), sched_count);
3888#ifdef CONFIG_SCHEDSTATS 3888#ifdef CONFIG_SCHEDSTATS
3889 if (unlikely(prev->lock_depth >= 0)) { 3889 if (unlikely(prev->lock_depth >= 0)) {
3890 schedstat_inc(this_rq(), bkl_count); 3890 schedstat_inc(this_rq(), rq_sched_info.bkl_count);
3891 schedstat_inc(prev, sched_info.bkl_count); 3891 schedstat_inc(prev, sched_info.bkl_count);
3892 } 3892 }
3893#endif 3893#endif
@@ -4871,7 +4871,8 @@ recheck:
4871 * assigned. 4871 * assigned.
4872 */ 4872 */
4873 if (rt_bandwidth_enabled() && rt_policy(policy) && 4873 if (rt_bandwidth_enabled() && rt_policy(policy) &&
4874 task_group(p)->rt_bandwidth.rt_runtime == 0) { 4874 task_group(p)->rt_bandwidth.rt_runtime == 0 &&
4875 !task_group_is_autogroup(task_group(p))) {
4875 __task_rq_unlock(rq); 4876 __task_rq_unlock(rq);
4876 raw_spin_unlock_irqrestore(&p->pi_lock, flags); 4877 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
4877 return -EPERM; 4878 return -EPERM;
@@ -8882,6 +8883,20 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
8882 } 8883 }
8883} 8884}
8884 8885
8886static void
8887cpu_cgroup_exit(struct cgroup_subsys *ss, struct task_struct *task)
8888{
8889 /*
8890 * cgroup_exit() is called in the copy_process() failure path.
8891 * Ignore this case since the task hasn't ran yet, this avoids
8892 * trying to poke a half freed task state from generic code.
8893 */
8894 if (!(task->flags & PF_EXITING))
8895 return;
8896
8897 sched_move_task(task);
8898}
8899
8885#ifdef CONFIG_FAIR_GROUP_SCHED 8900#ifdef CONFIG_FAIR_GROUP_SCHED
8886static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype, 8901static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype,
8887 u64 shareval) 8902 u64 shareval)
@@ -8954,6 +8969,7 @@ struct cgroup_subsys cpu_cgroup_subsys = {
8954 .destroy = cpu_cgroup_destroy, 8969 .destroy = cpu_cgroup_destroy,
8955 .can_attach = cpu_cgroup_can_attach, 8970 .can_attach = cpu_cgroup_can_attach,
8956 .attach = cpu_cgroup_attach, 8971 .attach = cpu_cgroup_attach,
8972 .exit = cpu_cgroup_exit,
8957 .populate = cpu_cgroup_populate, 8973 .populate = cpu_cgroup_populate,
8958 .subsys_id = cpu_cgroup_subsys_id, 8974 .subsys_id = cpu_cgroup_subsys_id,
8959 .early_init = 1, 8975 .early_init = 1,
diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c
index 32a723b8f84..9fb65628315 100644
--- a/kernel/sched_autogroup.c
+++ b/kernel/sched_autogroup.c
@@ -27,6 +27,11 @@ static inline void autogroup_destroy(struct kref *kref)
27{ 27{
28 struct autogroup *ag = container_of(kref, struct autogroup, kref); 28 struct autogroup *ag = container_of(kref, struct autogroup, kref);
29 29
30#ifdef CONFIG_RT_GROUP_SCHED
31 /* We've redirected RT tasks to the root task group... */
32 ag->tg->rt_se = NULL;
33 ag->tg->rt_rq = NULL;
34#endif
30 sched_destroy_group(ag->tg); 35 sched_destroy_group(ag->tg);
31} 36}
32 37
@@ -55,6 +60,10 @@ static inline struct autogroup *autogroup_task_get(struct task_struct *p)
55 return ag; 60 return ag;
56} 61}
57 62
63#ifdef CONFIG_RT_GROUP_SCHED
64static void free_rt_sched_group(struct task_group *tg);
65#endif
66
58static inline struct autogroup *autogroup_create(void) 67static inline struct autogroup *autogroup_create(void)
59{ 68{
60 struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL); 69 struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL);
@@ -72,6 +81,19 @@ static inline struct autogroup *autogroup_create(void)
72 init_rwsem(&ag->lock); 81 init_rwsem(&ag->lock);
73 ag->id = atomic_inc_return(&autogroup_seq_nr); 82 ag->id = atomic_inc_return(&autogroup_seq_nr);
74 ag->tg = tg; 83 ag->tg = tg;
84#ifdef CONFIG_RT_GROUP_SCHED
85 /*
86 * Autogroup RT tasks are redirected to the root task group
87 * so we don't have to move tasks around upon policy change,
88 * or flail around trying to allocate bandwidth on the fly.
89 * A bandwidth exception in __sched_setscheduler() allows
90 * the policy change to proceed. Thereafter, task_group()
91 * returns &root_task_group, so zero bandwidth is required.
92 */
93 free_rt_sched_group(tg);
94 tg->rt_se = root_task_group.rt_se;
95 tg->rt_rq = root_task_group.rt_rq;
96#endif
75 tg->autogroup = ag; 97 tg->autogroup = ag;
76 98
77 return ag; 99 return ag;
@@ -106,6 +128,11 @@ task_wants_autogroup(struct task_struct *p, struct task_group *tg)
106 return true; 128 return true;
107} 129}
108 130
131static inline bool task_group_is_autogroup(struct task_group *tg)
132{
133 return tg != &root_task_group && tg->autogroup;
134}
135
109static inline struct task_group * 136static inline struct task_group *
110autogroup_task_group(struct task_struct *p, struct task_group *tg) 137autogroup_task_group(struct task_struct *p, struct task_group *tg)
111{ 138{
@@ -231,6 +258,11 @@ void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m)
231#ifdef CONFIG_SCHED_DEBUG 258#ifdef CONFIG_SCHED_DEBUG
232static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) 259static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
233{ 260{
261 int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
262
263 if (!enabled || !tg->autogroup)
264 return 0;
265
234 return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); 266 return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id);
235} 267}
236#endif /* CONFIG_SCHED_DEBUG */ 268#endif /* CONFIG_SCHED_DEBUG */
diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h
index 5358e241cb2..7b859ffe5da 100644
--- a/kernel/sched_autogroup.h
+++ b/kernel/sched_autogroup.h
@@ -15,6 +15,10 @@ autogroup_task_group(struct task_struct *p, struct task_group *tg);
15 15
16static inline void autogroup_init(struct task_struct *init_task) { } 16static inline void autogroup_init(struct task_struct *init_task) { }
17static inline void autogroup_free(struct task_group *tg) { } 17static inline void autogroup_free(struct task_group *tg) { }
18static inline bool task_group_is_autogroup(struct task_group *tg)
19{
20 return 0;
21}
18 22
19static inline struct task_group * 23static inline struct task_group *
20autogroup_task_group(struct task_struct *p, struct task_group *tg) 24autogroup_task_group(struct task_struct *p, struct task_group *tg)
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 1dfae3d014b..eb6cb8edd07 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -16,6 +16,8 @@
16#include <linux/kallsyms.h> 16#include <linux/kallsyms.h>
17#include <linux/utsname.h> 17#include <linux/utsname.h>
18 18
19static DEFINE_SPINLOCK(sched_debug_lock);
20
19/* 21/*
20 * This allows printing both to /proc/sched_debug and 22 * This allows printing both to /proc/sched_debug and
21 * to the console 23 * to the console
@@ -86,6 +88,26 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
86} 88}
87#endif 89#endif
88 90
91#ifdef CONFIG_CGROUP_SCHED
92static char group_path[PATH_MAX];
93
94static char *task_group_path(struct task_group *tg)
95{
96 if (autogroup_path(tg, group_path, PATH_MAX))
97 return group_path;
98
99 /*
100 * May be NULL if the underlying cgroup isn't fully-created yet
101 */
102 if (!tg->css.cgroup) {
103 group_path[0] = '\0';
104 return group_path;
105 }
106 cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
107 return group_path;
108}
109#endif
110
89static void 111static void
90print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) 112print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
91{ 113{
@@ -108,6 +130,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
108 SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld", 130 SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld",
109 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L); 131 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L);
110#endif 132#endif
133#ifdef CONFIG_CGROUP_SCHED
134 SEQ_printf(m, " %s", task_group_path(task_group(p)));
135#endif
111 136
112 SEQ_printf(m, "\n"); 137 SEQ_printf(m, "\n");
113} 138}
@@ -144,7 +169,11 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
144 struct sched_entity *last; 169 struct sched_entity *last;
145 unsigned long flags; 170 unsigned long flags;
146 171
172#ifdef CONFIG_FAIR_GROUP_SCHED
173 SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, task_group_path(cfs_rq->tg));
174#else
147 SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); 175 SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu);
176#endif
148 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", 177 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock",
149 SPLIT_NS(cfs_rq->exec_clock)); 178 SPLIT_NS(cfs_rq->exec_clock));
150 179
@@ -191,7 +220,11 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
191 220
192void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) 221void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
193{ 222{
223#ifdef CONFIG_RT_GROUP_SCHED
224 SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, task_group_path(rt_rq->tg));
225#else
194 SEQ_printf(m, "\nrt_rq[%d]:\n", cpu); 226 SEQ_printf(m, "\nrt_rq[%d]:\n", cpu);
227#endif
195 228
196#define P(x) \ 229#define P(x) \
197 SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x)) 230 SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
@@ -212,6 +245,7 @@ extern __read_mostly int sched_clock_running;
212static void print_cpu(struct seq_file *m, int cpu) 245static void print_cpu(struct seq_file *m, int cpu)
213{ 246{
214 struct rq *rq = cpu_rq(cpu); 247 struct rq *rq = cpu_rq(cpu);
248 unsigned long flags;
215 249
216#ifdef CONFIG_X86 250#ifdef CONFIG_X86
217 { 251 {
@@ -262,14 +296,20 @@ static void print_cpu(struct seq_file *m, int cpu)
262 P(ttwu_count); 296 P(ttwu_count);
263 P(ttwu_local); 297 P(ttwu_local);
264 298
265 P(bkl_count); 299 SEQ_printf(m, " .%-30s: %d\n", "bkl_count",
300 rq->rq_sched_info.bkl_count);
266 301
267#undef P 302#undef P
303#undef P64
268#endif 304#endif
305 spin_lock_irqsave(&sched_debug_lock, flags);
269 print_cfs_stats(m, cpu); 306 print_cfs_stats(m, cpu);
270 print_rt_stats(m, cpu); 307 print_rt_stats(m, cpu);
271 308
309 rcu_read_lock();
272 print_rq(m, rq, cpu); 310 print_rq(m, rq, cpu);
311 rcu_read_unlock();
312 spin_unlock_irqrestore(&sched_debug_lock, flags);
273} 313}
274 314
275static const char *sched_tunable_scaling_names[] = { 315static const char *sched_tunable_scaling_names[] = {
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index c62ebae65cf..0c26e2df450 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -699,7 +699,8 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
699 cfs_rq->nr_running--; 699 cfs_rq->nr_running--;
700} 700}
701 701
702#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED 702#ifdef CONFIG_FAIR_GROUP_SCHED
703# ifdef CONFIG_SMP
703static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq, 704static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq,
704 int global_update) 705 int global_update)
705{ 706{
@@ -721,10 +722,10 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
721 u64 now, delta; 722 u64 now, delta;
722 unsigned long load = cfs_rq->load.weight; 723 unsigned long load = cfs_rq->load.weight;
723 724
724 if (!cfs_rq) 725 if (cfs_rq->tg == &root_task_group)
725 return; 726 return;
726 727
727 now = rq_of(cfs_rq)->clock; 728 now = rq_of(cfs_rq)->clock_task;
728 delta = now - cfs_rq->load_stamp; 729 delta = now - cfs_rq->load_stamp;
729 730
730 /* truncate load history at 4 idle periods */ 731 /* truncate load history at 4 idle periods */
@@ -762,6 +763,51 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
762 list_del_leaf_cfs_rq(cfs_rq); 763 list_del_leaf_cfs_rq(cfs_rq);
763} 764}
764 765
766static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg,
767 long weight_delta)
768{
769 long load_weight, load, shares;
770
771 load = cfs_rq->load.weight + weight_delta;
772
773 load_weight = atomic_read(&tg->load_weight);
774 load_weight -= cfs_rq->load_contribution;
775 load_weight += load;
776
777 shares = (tg->shares * load);
778 if (load_weight)
779 shares /= load_weight;
780
781 if (shares < MIN_SHARES)
782 shares = MIN_SHARES;
783 if (shares > tg->shares)
784 shares = tg->shares;
785
786 return shares;
787}
788
789static void update_entity_shares_tick(struct cfs_rq *cfs_rq)
790{
791 if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) {
792 update_cfs_load(cfs_rq, 0);
793 update_cfs_shares(cfs_rq, 0);
794 }
795}
796# else /* CONFIG_SMP */
797static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
798{
799}
800
801static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg,
802 long weight_delta)
803{
804 return tg->shares;
805}
806
807static inline void update_entity_shares_tick(struct cfs_rq *cfs_rq)
808{
809}
810# endif /* CONFIG_SMP */
765static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, 811static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
766 unsigned long weight) 812 unsigned long weight)
767{ 813{
@@ -782,41 +828,20 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta)
782{ 828{
783 struct task_group *tg; 829 struct task_group *tg;
784 struct sched_entity *se; 830 struct sched_entity *se;
785 long load_weight, load, shares; 831 long shares;
786
787 if (!cfs_rq)
788 return;
789 832
790 tg = cfs_rq->tg; 833 tg = cfs_rq->tg;
791 se = tg->se[cpu_of(rq_of(cfs_rq))]; 834 se = tg->se[cpu_of(rq_of(cfs_rq))];
792 if (!se) 835 if (!se)
793 return; 836 return;
794 837#ifndef CONFIG_SMP
795 load = cfs_rq->load.weight + weight_delta; 838 if (likely(se->load.weight == tg->shares))
796 839 return;
797 load_weight = atomic_read(&tg->load_weight); 840#endif
798 load_weight -= cfs_rq->load_contribution; 841 shares = calc_cfs_shares(cfs_rq, tg, weight_delta);
799 load_weight += load;
800
801 shares = (tg->shares * load);
802 if (load_weight)
803 shares /= load_weight;
804
805 if (shares < MIN_SHARES)
806 shares = MIN_SHARES;
807 if (shares > tg->shares)
808 shares = tg->shares;
809 842
810 reweight_entity(cfs_rq_of(se), se, shares); 843 reweight_entity(cfs_rq_of(se), se, shares);
811} 844}
812
813static void update_entity_shares_tick(struct cfs_rq *cfs_rq)
814{
815 if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) {
816 update_cfs_load(cfs_rq, 0);
817 update_cfs_shares(cfs_rq, 0);
818 }
819}
820#else /* CONFIG_FAIR_GROUP_SCHED */ 845#else /* CONFIG_FAIR_GROUP_SCHED */
821static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) 846static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
822{ 847{
@@ -1062,6 +1087,9 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
1062 struct sched_entity *se = __pick_next_entity(cfs_rq); 1087 struct sched_entity *se = __pick_next_entity(cfs_rq);
1063 s64 delta = curr->vruntime - se->vruntime; 1088 s64 delta = curr->vruntime - se->vruntime;
1064 1089
1090 if (delta < 0)
1091 return;
1092
1065 if (delta > ideal_runtime) 1093 if (delta > ideal_runtime)
1066 resched_task(rq_of(cfs_rq)->curr); 1094 resched_task(rq_of(cfs_rq)->curr);
1067 } 1095 }
@@ -1362,27 +1390,27 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
1362 return wl; 1390 return wl;
1363 1391
1364 for_each_sched_entity(se) { 1392 for_each_sched_entity(se) {
1365 long S, rw, s, a, b; 1393 long lw, w;
1366 1394
1367 S = se->my_q->tg->shares; 1395 tg = se->my_q->tg;
1368 s = se->load.weight; 1396 w = se->my_q->load.weight;
1369 rw = se->my_q->load.weight;
1370 1397
1371 a = S*(rw + wl); 1398 /* use this cpu's instantaneous contribution */
1372 b = S*rw + s*wg; 1399 lw = atomic_read(&tg->load_weight);
1400 lw -= se->my_q->load_contribution;
1401 lw += w + wg;
1373 1402
1374 wl = s*(a-b); 1403 wl += w;
1375 1404
1376 if (likely(b)) 1405 if (lw > 0 && wl < lw)
1377 wl /= b; 1406 wl = (wl * tg->shares) / lw;
1407 else
1408 wl = tg->shares;
1378 1409
1379 /* 1410 /* zero point is MIN_SHARES */
1380 * Assume the group is already running and will 1411 if (wl < MIN_SHARES)
1381 * thus already be accounted for in the weight. 1412 wl = MIN_SHARES;
1382 * 1413 wl -= se->load.weight;
1383 * That is, moving shares between CPUs, does not
1384 * alter the group weight.
1385 */
1386 wg = 0; 1414 wg = 0;
1387 } 1415 }
1388 1416
@@ -1401,7 +1429,7 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu,
1401 1429
1402static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) 1430static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
1403{ 1431{
1404 unsigned long this_load, load; 1432 s64 this_load, load;
1405 int idx, this_cpu, prev_cpu; 1433 int idx, this_cpu, prev_cpu;
1406 unsigned long tl_per_task; 1434 unsigned long tl_per_task;
1407 struct task_group *tg; 1435 struct task_group *tg;
@@ -1440,8 +1468,8 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
1440 * Otherwise check if either cpus are near enough in load to allow this 1468 * Otherwise check if either cpus are near enough in load to allow this
1441 * task to be woken on this_cpu. 1469 * task to be woken on this_cpu.
1442 */ 1470 */
1443 if (this_load) { 1471 if (this_load > 0) {
1444 unsigned long this_eff_load, prev_eff_load; 1472 s64 this_eff_load, prev_eff_load;
1445 1473
1446 this_eff_load = 100; 1474 this_eff_load = 100;
1447 this_eff_load *= power_of(prev_cpu); 1475 this_eff_load *= power_of(prev_cpu);
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index c914ec747ca..ad6267714c8 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -625,7 +625,7 @@ static void update_curr_rt(struct rq *rq)
625 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 625 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
626 u64 delta_exec; 626 u64 delta_exec;
627 627
628 if (!task_has_rt_policy(curr)) 628 if (curr->sched_class != &rt_sched_class)
629 return; 629 return;
630 630
631 delta_exec = rq->clock_task - curr->se.exec_start; 631 delta_exec = rq->clock_task - curr->se.exec_start;
diff --git a/kernel/smp.c b/kernel/smp.c
index 4ec30e06998..9910744f085 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -194,23 +194,52 @@ void generic_smp_call_function_interrupt(void)
194 */ 194 */
195 list_for_each_entry_rcu(data, &call_function.queue, csd.list) { 195 list_for_each_entry_rcu(data, &call_function.queue, csd.list) {
196 int refs; 196 int refs;
197 void (*func) (void *info);
197 198
198 if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) 199 /*
200 * Since we walk the list without any locks, we might
201 * see an entry that was completed, removed from the
202 * list and is in the process of being reused.
203 *
204 * We must check that the cpu is in the cpumask before
205 * checking the refs, and both must be set before
206 * executing the callback on this cpu.
207 */
208
209 if (!cpumask_test_cpu(cpu, data->cpumask))
210 continue;
211
212 smp_rmb();
213
214 if (atomic_read(&data->refs) == 0)
199 continue; 215 continue;
200 216
217 func = data->csd.func; /* for later warn */
201 data->csd.func(data->csd.info); 218 data->csd.func(data->csd.info);
202 219
220 /*
221 * If the cpu mask is not still set then it enabled interrupts,
222 * we took another smp interrupt, and executed the function
223 * twice on this cpu. In theory that copy decremented refs.
224 */
225 if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) {
226 WARN(1, "%pS enabled interrupts and double executed\n",
227 func);
228 continue;
229 }
230
203 refs = atomic_dec_return(&data->refs); 231 refs = atomic_dec_return(&data->refs);
204 WARN_ON(refs < 0); 232 WARN_ON(refs < 0);
205 if (!refs) {
206 raw_spin_lock(&call_function.lock);
207 list_del_rcu(&data->csd.list);
208 raw_spin_unlock(&call_function.lock);
209 }
210 233
211 if (refs) 234 if (refs)
212 continue; 235 continue;
213 236
237 WARN_ON(!cpumask_empty(data->cpumask));
238
239 raw_spin_lock(&call_function.lock);
240 list_del_rcu(&data->csd.list);
241 raw_spin_unlock(&call_function.lock);
242
214 csd_unlock(&data->csd); 243 csd_unlock(&data->csd);
215 } 244 }
216 245
@@ -430,7 +459,7 @@ void smp_call_function_many(const struct cpumask *mask,
430 * can't happen. 459 * can't happen.
431 */ 460 */
432 WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() 461 WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
433 && !oops_in_progress); 462 && !oops_in_progress && !early_boot_irqs_disabled);
434 463
435 /* So, what's a CPU they want? Ignoring this one. */ 464 /* So, what's a CPU they want? Ignoring this one. */
436 cpu = cpumask_first_and(mask, cpu_online_mask); 465 cpu = cpumask_first_and(mask, cpu_online_mask);
@@ -454,11 +483,21 @@ void smp_call_function_many(const struct cpumask *mask,
454 483
455 data = &__get_cpu_var(cfd_data); 484 data = &__get_cpu_var(cfd_data);
456 csd_lock(&data->csd); 485 csd_lock(&data->csd);
486 BUG_ON(atomic_read(&data->refs) || !cpumask_empty(data->cpumask));
457 487
458 data->csd.func = func; 488 data->csd.func = func;
459 data->csd.info = info; 489 data->csd.info = info;
460 cpumask_and(data->cpumask, mask, cpu_online_mask); 490 cpumask_and(data->cpumask, mask, cpu_online_mask);
461 cpumask_clear_cpu(this_cpu, data->cpumask); 491 cpumask_clear_cpu(this_cpu, data->cpumask);
492
493 /*
494 * To ensure the interrupt handler gets an complete view
495 * we order the cpumask and refs writes and order the read
496 * of them in the interrupt handler. In addition we may
497 * only clear our own cpu bit from the mask.
498 */
499 smp_wmb();
500
462 atomic_set(&data->refs, cpumask_weight(data->cpumask)); 501 atomic_set(&data->refs, cpumask_weight(data->cpumask));
463 502
464 raw_spin_lock_irqsave(&call_function.lock, flags); 503 raw_spin_lock_irqsave(&call_function.lock, flags);
@@ -533,17 +572,20 @@ void ipi_call_unlock_irq(void)
533#endif /* USE_GENERIC_SMP_HELPERS */ 572#endif /* USE_GENERIC_SMP_HELPERS */
534 573
535/* 574/*
536 * Call a function on all processors 575 * Call a function on all processors. May be used during early boot while
576 * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead
577 * of local_irq_disable/enable().
537 */ 578 */
538int on_each_cpu(void (*func) (void *info), void *info, int wait) 579int on_each_cpu(void (*func) (void *info), void *info, int wait)
539{ 580{
581 unsigned long flags;
540 int ret = 0; 582 int ret = 0;
541 583
542 preempt_disable(); 584 preempt_disable();
543 ret = smp_call_function(func, info, wait); 585 ret = smp_call_function(func, info, wait);
544 local_irq_disable(); 586 local_irq_save(flags);
545 func(info); 587 func(info);
546 local_irq_enable(); 588 local_irq_restore(flags);
547 preempt_enable(); 589 preempt_enable();
548 return ret; 590 return ret;
549} 591}
diff --git a/kernel/sys.c b/kernel/sys.c
index 31b71a276b4..18da702ec81 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1385,7 +1385,8 @@ static int check_prlimit_permission(struct task_struct *task)
1385 const struct cred *cred = current_cred(), *tcred; 1385 const struct cred *cred = current_cred(), *tcred;
1386 1386
1387 tcred = __task_cred(task); 1387 tcred = __task_cred(task);
1388 if ((cred->uid != tcred->euid || 1388 if (current != task &&
1389 (cred->uid != tcred->euid ||
1389 cred->uid != tcred->suid || 1390 cred->uid != tcred->suid ||
1390 cred->uid != tcred->uid || 1391 cred->uid != tcred->uid ||
1391 cred->gid != tcred->egid || 1392 cred->gid != tcred->egid ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index bc86bb32e12..0f1bd83db98 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -170,7 +170,8 @@ static int proc_taint(struct ctl_table *table, int write,
170#endif 170#endif
171 171
172#ifdef CONFIG_MAGIC_SYSRQ 172#ifdef CONFIG_MAGIC_SYSRQ
173static int __sysrq_enabled; /* Note: sysrq code ises it's own private copy */ 173/* Note: sysrq code uses it's own private copy */
174static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE;
174 175
175static int sysrq_sysctl_handler(ctl_table *table, int write, 176static int sysrq_sysctl_handler(ctl_table *table, int write,
176 void __user *buffer, size_t *lenp, 177 void __user *buffer, size_t *lenp,
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 3e216e01bbd..c55ea243347 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -642,8 +642,7 @@ static void tick_nohz_switch_to_nohz(void)
642 } 642 }
643 local_irq_enable(); 643 local_irq_enable();
644 644
645 printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", 645 printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id());
646 smp_processor_id());
647} 646}
648 647
649/* 648/*
@@ -795,8 +794,10 @@ void tick_setup_sched_timer(void)
795 } 794 }
796 795
797#ifdef CONFIG_NO_HZ 796#ifdef CONFIG_NO_HZ
798 if (tick_nohz_enabled) 797 if (tick_nohz_enabled) {
799 ts->nohz_mode = NOHZ_MODE_HIGHRES; 798 ts->nohz_mode = NOHZ_MODE_HIGHRES;
799 printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id());
800 }
800#endif 801#endif
801} 802}
802#endif /* HIGH_RES_TIMERS */ 803#endif /* HIGH_RES_TIMERS */
diff --git a/kernel/timer.c b/kernel/timer.c
index 43ca9936f2d..d53ce66daea 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -969,10 +969,14 @@ EXPORT_SYMBOL(try_to_del_timer_sync);
969int del_timer_sync(struct timer_list *timer) 969int del_timer_sync(struct timer_list *timer)
970{ 970{
971#ifdef CONFIG_LOCKDEP 971#ifdef CONFIG_LOCKDEP
972 unsigned long flags;
973
974 raw_local_irq_save(flags);
972 local_bh_disable(); 975 local_bh_disable();
973 lock_map_acquire(&timer->lockdep_map); 976 lock_map_acquire(&timer->lockdep_map);
974 lock_map_release(&timer->lockdep_map); 977 lock_map_release(&timer->lockdep_map);
975 local_bh_enable(); 978 _local_bh_enable();
979 raw_local_irq_restore(flags);
976#endif 980#endif
977 /* 981 /*
978 * don't use it in hardirq context, because it 982 * don't use it in hardirq context, because it
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 35fde09b81d..5f499e0438a 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1284,7 +1284,7 @@ trace_create_file_ops(struct module *mod)
1284static void trace_module_add_events(struct module *mod) 1284static void trace_module_add_events(struct module *mod)
1285{ 1285{
1286 struct ftrace_module_file_ops *file_ops = NULL; 1286 struct ftrace_module_file_ops *file_ops = NULL;
1287 struct ftrace_event_call *call, *start, *end; 1287 struct ftrace_event_call **call, **start, **end;
1288 1288
1289 start = mod->trace_events; 1289 start = mod->trace_events;
1290 end = mod->trace_events + mod->num_trace_events; 1290 end = mod->trace_events + mod->num_trace_events;
@@ -1297,7 +1297,7 @@ static void trace_module_add_events(struct module *mod)
1297 return; 1297 return;
1298 1298
1299 for_each_event(call, start, end) { 1299 for_each_event(call, start, end) {
1300 __trace_add_event_call(call, mod, 1300 __trace_add_event_call(*call, mod,
1301 &file_ops->id, &file_ops->enable, 1301 &file_ops->id, &file_ops->enable,
1302 &file_ops->filter, &file_ops->format); 1302 &file_ops->filter, &file_ops->format);
1303 } 1303 }
@@ -1367,8 +1367,8 @@ static struct notifier_block trace_module_nb = {
1367 .priority = 0, 1367 .priority = 0,
1368}; 1368};
1369 1369
1370extern struct ftrace_event_call __start_ftrace_events[]; 1370extern struct ftrace_event_call *__start_ftrace_events[];
1371extern struct ftrace_event_call __stop_ftrace_events[]; 1371extern struct ftrace_event_call *__stop_ftrace_events[];
1372 1372
1373static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata; 1373static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
1374 1374
@@ -1384,7 +1384,7 @@ __setup("trace_event=", setup_trace_event);
1384 1384
1385static __init int event_trace_init(void) 1385static __init int event_trace_init(void)
1386{ 1386{
1387 struct ftrace_event_call *call; 1387 struct ftrace_event_call **call;
1388 struct dentry *d_tracer; 1388 struct dentry *d_tracer;
1389 struct dentry *entry; 1389 struct dentry *entry;
1390 struct dentry *d_events; 1390 struct dentry *d_events;
@@ -1430,7 +1430,7 @@ static __init int event_trace_init(void)
1430 pr_warning("tracing: Failed to allocate common fields"); 1430 pr_warning("tracing: Failed to allocate common fields");
1431 1431
1432 for_each_event(call, __start_ftrace_events, __stop_ftrace_events) { 1432 for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
1433 __trace_add_event_call(call, NULL, &ftrace_event_id_fops, 1433 __trace_add_event_call(*call, NULL, &ftrace_event_id_fops,
1434 &ftrace_enable_fops, 1434 &ftrace_enable_fops,
1435 &ftrace_event_filter_fops, 1435 &ftrace_event_filter_fops,
1436 &ftrace_event_format_fops); 1436 &ftrace_event_format_fops);
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 4b74d71705c..bbeec31e0ae 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -161,13 +161,13 @@ struct ftrace_event_class event_class_ftrace_##call = { \
161 .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\ 161 .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\
162}; \ 162}; \
163 \ 163 \
164struct ftrace_event_call __used \ 164struct ftrace_event_call __used event_##call = { \
165__attribute__((__aligned__(4))) \
166__attribute__((section("_ftrace_events"))) event_##call = { \
167 .name = #call, \ 165 .name = #call, \
168 .event.type = etype, \ 166 .event.type = etype, \
169 .class = &event_class_ftrace_##call, \ 167 .class = &event_class_ftrace_##call, \
170 .print_fmt = print, \ 168 .print_fmt = print, \
171}; \ 169}; \
170struct ftrace_event_call __used \
171__attribute__((section("_ftrace_events"))) *__event_##call = &event_##call;
172 172
173#include "trace_entries.h" 173#include "trace_entries.h"
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 5cf8c602b88..92b6e1e12d9 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -453,14 +453,6 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1)
453 * Stubs: 453 * Stubs:
454 */ 454 */
455 455
456void early_boot_irqs_off(void)
457{
458}
459
460void early_boot_irqs_on(void)
461{
462}
463
464void trace_softirqs_on(unsigned long ip) 456void trace_softirqs_on(unsigned long ip)
465{ 457{
466} 458}
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index b706529b4fc..5c9fe08d209 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -55,20 +55,21 @@ struct ftrace_event_class event_class_syscall_exit = {
55 .raw_init = init_syscall_trace, 55 .raw_init = init_syscall_trace,
56}; 56};
57 57
58extern unsigned long __start_syscalls_metadata[]; 58extern struct syscall_metadata *__start_syscalls_metadata[];
59extern unsigned long __stop_syscalls_metadata[]; 59extern struct syscall_metadata *__stop_syscalls_metadata[];
60 60
61static struct syscall_metadata **syscalls_metadata; 61static struct syscall_metadata **syscalls_metadata;
62 62
63static struct syscall_metadata *find_syscall_meta(unsigned long syscall) 63static __init struct syscall_metadata *
64find_syscall_meta(unsigned long syscall)
64{ 65{
65 struct syscall_metadata *start; 66 struct syscall_metadata **start;
66 struct syscall_metadata *stop; 67 struct syscall_metadata **stop;
67 char str[KSYM_SYMBOL_LEN]; 68 char str[KSYM_SYMBOL_LEN];
68 69
69 70
70 start = (struct syscall_metadata *)__start_syscalls_metadata; 71 start = __start_syscalls_metadata;
71 stop = (struct syscall_metadata *)__stop_syscalls_metadata; 72 stop = __stop_syscalls_metadata;
72 kallsyms_lookup(syscall, NULL, NULL, NULL, str); 73 kallsyms_lookup(syscall, NULL, NULL, NULL, str);
73 74
74 for ( ; start < stop; start++) { 75 for ( ; start < stop; start++) {
@@ -78,8 +79,8 @@ static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
78 * with "SyS" instead of "sys", leading to an unwanted 79 * with "SyS" instead of "sys", leading to an unwanted
79 * mismatch. 80 * mismatch.
80 */ 81 */
81 if (start->name && !strcmp(start->name + 3, str + 3)) 82 if ((*start)->name && !strcmp((*start)->name + 3, str + 3))
82 return start; 83 return *start;
83 } 84 }
84 return NULL; 85 return NULL;
85} 86}
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index e95ee7f31d4..68187af4889 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -27,8 +27,8 @@
27#include <linux/sched.h> 27#include <linux/sched.h>
28#include <linux/jump_label.h> 28#include <linux/jump_label.h>
29 29
30extern struct tracepoint __start___tracepoints[]; 30extern struct tracepoint * const __start___tracepoints_ptrs[];
31extern struct tracepoint __stop___tracepoints[]; 31extern struct tracepoint * const __stop___tracepoints_ptrs[];
32 32
33/* Set to 1 to enable tracepoint debug output */ 33/* Set to 1 to enable tracepoint debug output */
34static const int tracepoint_debug; 34static const int tracepoint_debug;
@@ -298,10 +298,10 @@ static void disable_tracepoint(struct tracepoint *elem)
298 * 298 *
299 * Updates the probe callback corresponding to a range of tracepoints. 299 * Updates the probe callback corresponding to a range of tracepoints.
300 */ 300 */
301void 301void tracepoint_update_probe_range(struct tracepoint * const *begin,
302tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) 302 struct tracepoint * const *end)
303{ 303{
304 struct tracepoint *iter; 304 struct tracepoint * const *iter;
305 struct tracepoint_entry *mark_entry; 305 struct tracepoint_entry *mark_entry;
306 306
307 if (!begin) 307 if (!begin)
@@ -309,12 +309,12 @@ tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end)
309 309
310 mutex_lock(&tracepoints_mutex); 310 mutex_lock(&tracepoints_mutex);
311 for (iter = begin; iter < end; iter++) { 311 for (iter = begin; iter < end; iter++) {
312 mark_entry = get_tracepoint(iter->name); 312 mark_entry = get_tracepoint((*iter)->name);
313 if (mark_entry) { 313 if (mark_entry) {
314 set_tracepoint(&mark_entry, iter, 314 set_tracepoint(&mark_entry, *iter,
315 !!mark_entry->refcount); 315 !!mark_entry->refcount);
316 } else { 316 } else {
317 disable_tracepoint(iter); 317 disable_tracepoint(*iter);
318 } 318 }
319 } 319 }
320 mutex_unlock(&tracepoints_mutex); 320 mutex_unlock(&tracepoints_mutex);
@@ -326,8 +326,8 @@ tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end)
326static void tracepoint_update_probes(void) 326static void tracepoint_update_probes(void)
327{ 327{
328 /* Core kernel tracepoints */ 328 /* Core kernel tracepoints */
329 tracepoint_update_probe_range(__start___tracepoints, 329 tracepoint_update_probe_range(__start___tracepoints_ptrs,
330 __stop___tracepoints); 330 __stop___tracepoints_ptrs);
331 /* tracepoints in modules. */ 331 /* tracepoints in modules. */
332 module_update_tracepoints(); 332 module_update_tracepoints();
333} 333}
@@ -514,8 +514,8 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_update_all);
514 * Will return the first tracepoint in the range if the input tracepoint is 514 * Will return the first tracepoint in the range if the input tracepoint is
515 * NULL. 515 * NULL.
516 */ 516 */
517int tracepoint_get_iter_range(struct tracepoint **tracepoint, 517int tracepoint_get_iter_range(struct tracepoint * const **tracepoint,
518 struct tracepoint *begin, struct tracepoint *end) 518 struct tracepoint * const *begin, struct tracepoint * const *end)
519{ 519{
520 if (!*tracepoint && begin != end) { 520 if (!*tracepoint && begin != end) {
521 *tracepoint = begin; 521 *tracepoint = begin;
@@ -534,7 +534,8 @@ static void tracepoint_get_iter(struct tracepoint_iter *iter)
534 /* Core kernel tracepoints */ 534 /* Core kernel tracepoints */
535 if (!iter->module) { 535 if (!iter->module) {
536 found = tracepoint_get_iter_range(&iter->tracepoint, 536 found = tracepoint_get_iter_range(&iter->tracepoint,
537 __start___tracepoints, __stop___tracepoints); 537 __start___tracepoints_ptrs,
538 __stop___tracepoints_ptrs);
538 if (found) 539 if (found)
539 goto end; 540 goto end;
540 } 541 }
@@ -585,8 +586,8 @@ int tracepoint_module_notify(struct notifier_block *self,
585 switch (val) { 586 switch (val) {
586 case MODULE_STATE_COMING: 587 case MODULE_STATE_COMING:
587 case MODULE_STATE_GOING: 588 case MODULE_STATE_GOING:
588 tracepoint_update_probe_range(mod->tracepoints, 589 tracepoint_update_probe_range(mod->tracepoints_ptrs,
589 mod->tracepoints + mod->num_tracepoints); 590 mod->tracepoints_ptrs + mod->num_tracepoints);
590 break; 591 break;
591 } 592 }
592 return 0; 593 return 0;
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index d7ebdf4cea9..f37f974aa81 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -27,7 +27,7 @@
27#include <asm/irq_regs.h> 27#include <asm/irq_regs.h>
28#include <linux/perf_event.h> 28#include <linux/perf_event.h>
29 29
30int watchdog_enabled; 30int watchdog_enabled = 1;
31int __read_mostly softlockup_thresh = 60; 31int __read_mostly softlockup_thresh = 60;
32 32
33static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); 33static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
@@ -43,9 +43,6 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
43static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); 43static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
44#endif 44#endif
45 45
46static int no_watchdog;
47
48
49/* boot commands */ 46/* boot commands */
50/* 47/*
51 * Should we panic when a soft-lockup or hard-lockup occurs: 48 * Should we panic when a soft-lockup or hard-lockup occurs:
@@ -58,7 +55,7 @@ static int __init hardlockup_panic_setup(char *str)
58 if (!strncmp(str, "panic", 5)) 55 if (!strncmp(str, "panic", 5))
59 hardlockup_panic = 1; 56 hardlockup_panic = 1;
60 else if (!strncmp(str, "0", 1)) 57 else if (!strncmp(str, "0", 1))
61 no_watchdog = 1; 58 watchdog_enabled = 0;
62 return 1; 59 return 1;
63} 60}
64__setup("nmi_watchdog=", hardlockup_panic_setup); 61__setup("nmi_watchdog=", hardlockup_panic_setup);
@@ -77,7 +74,7 @@ __setup("softlockup_panic=", softlockup_panic_setup);
77 74
78static int __init nowatchdog_setup(char *str) 75static int __init nowatchdog_setup(char *str)
79{ 76{
80 no_watchdog = 1; 77 watchdog_enabled = 0;
81 return 1; 78 return 1;
82} 79}
83__setup("nowatchdog", nowatchdog_setup); 80__setup("nowatchdog", nowatchdog_setup);
@@ -85,7 +82,7 @@ __setup("nowatchdog", nowatchdog_setup);
85/* deprecated */ 82/* deprecated */
86static int __init nosoftlockup_setup(char *str) 83static int __init nosoftlockup_setup(char *str)
87{ 84{
88 no_watchdog = 1; 85 watchdog_enabled = 0;
89 return 1; 86 return 1;
90} 87}
91__setup("nosoftlockup", nosoftlockup_setup); 88__setup("nosoftlockup", nosoftlockup_setup);
@@ -432,9 +429,6 @@ static int watchdog_enable(int cpu)
432 wake_up_process(p); 429 wake_up_process(p);
433 } 430 }
434 431
435 /* if any cpu succeeds, watchdog is considered enabled for the system */
436 watchdog_enabled = 1;
437
438 return 0; 432 return 0;
439} 433}
440 434
@@ -462,12 +456,16 @@ static void watchdog_disable(int cpu)
462static void watchdog_enable_all_cpus(void) 456static void watchdog_enable_all_cpus(void)
463{ 457{
464 int cpu; 458 int cpu;
465 int result = 0; 459
460 watchdog_enabled = 0;
466 461
467 for_each_online_cpu(cpu) 462 for_each_online_cpu(cpu)
468 result += watchdog_enable(cpu); 463 if (!watchdog_enable(cpu))
464 /* if any cpu succeeds, watchdog is considered
465 enabled for the system */
466 watchdog_enabled = 1;
469 467
470 if (result) 468 if (!watchdog_enabled)
471 printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n"); 469 printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n");
472 470
473} 471}
@@ -476,9 +474,6 @@ static void watchdog_disable_all_cpus(void)
476{ 474{
477 int cpu; 475 int cpu;
478 476
479 if (no_watchdog)
480 return;
481
482 for_each_online_cpu(cpu) 477 for_each_online_cpu(cpu)
483 watchdog_disable(cpu); 478 watchdog_disable(cpu);
484 479
@@ -498,10 +493,12 @@ int proc_dowatchdog_enabled(struct ctl_table *table, int write,
498{ 493{
499 proc_dointvec(table, write, buffer, length, ppos); 494 proc_dointvec(table, write, buffer, length, ppos);
500 495
501 if (watchdog_enabled) 496 if (write) {
502 watchdog_enable_all_cpus(); 497 if (watchdog_enabled)
503 else 498 watchdog_enable_all_cpus();
504 watchdog_disable_all_cpus(); 499 else
500 watchdog_disable_all_cpus();
501 }
505 return 0; 502 return 0;
506} 503}
507 504
@@ -530,7 +527,8 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
530 break; 527 break;
531 case CPU_ONLINE: 528 case CPU_ONLINE:
532 case CPU_ONLINE_FROZEN: 529 case CPU_ONLINE_FROZEN:
533 err = watchdog_enable(hotcpu); 530 if (watchdog_enabled)
531 err = watchdog_enable(hotcpu);
534 break; 532 break;
535#ifdef CONFIG_HOTPLUG_CPU 533#ifdef CONFIG_HOTPLUG_CPU
536 case CPU_UP_CANCELED: 534 case CPU_UP_CANCELED:
@@ -555,9 +553,6 @@ void __init lockup_detector_init(void)
555 void *cpu = (void *)(long)smp_processor_id(); 553 void *cpu = (void *)(long)smp_processor_id();
556 int err; 554 int err;
557 555
558 if (no_watchdog)
559 return;
560
561 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); 556 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
562 WARN_ON(notifier_to_errno(err)); 557 WARN_ON(notifier_to_errno(err));
563 558
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 8ee6ec82f88..11869faa681 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -768,7 +768,11 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
768 768
769 worker->flags &= ~flags; 769 worker->flags &= ~flags;
770 770
771 /* if transitioning out of NOT_RUNNING, increment nr_running */ 771 /*
772 * If transitioning out of NOT_RUNNING, increment nr_running. Note
773 * that the nested NOT_RUNNING is not a noop. NOT_RUNNING is mask
774 * of multiple flags, not a single flag.
775 */
772 if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING)) 776 if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
773 if (!(worker->flags & WORKER_NOT_RUNNING)) 777 if (!(worker->flags & WORKER_NOT_RUNNING))
774 atomic_inc(get_gcwq_nr_running(gcwq->cpu)); 778 atomic_inc(get_gcwq_nr_running(gcwq->cpu));
@@ -1840,7 +1844,7 @@ __acquires(&gcwq->lock)
1840 spin_unlock_irq(&gcwq->lock); 1844 spin_unlock_irq(&gcwq->lock);
1841 1845
1842 work_clear_pending(work); 1846 work_clear_pending(work);
1843 lock_map_acquire(&cwq->wq->lockdep_map); 1847 lock_map_acquire_read(&cwq->wq->lockdep_map);
1844 lock_map_acquire(&lockdep_map); 1848 lock_map_acquire(&lockdep_map);
1845 trace_workqueue_execute_start(work); 1849 trace_workqueue_execute_start(work);
1846 f(work); 1850 f(work);
@@ -2384,8 +2388,18 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
2384 insert_wq_barrier(cwq, barr, work, worker); 2388 insert_wq_barrier(cwq, barr, work, worker);
2385 spin_unlock_irq(&gcwq->lock); 2389 spin_unlock_irq(&gcwq->lock);
2386 2390
2387 lock_map_acquire(&cwq->wq->lockdep_map); 2391 /*
2392 * If @max_active is 1 or rescuer is in use, flushing another work
2393 * item on the same workqueue may lead to deadlock. Make sure the
2394 * flusher is not running on the same workqueue by verifying write
2395 * access.
2396 */
2397 if (cwq->wq->saved_max_active == 1 || cwq->wq->flags & WQ_RESCUER)
2398 lock_map_acquire(&cwq->wq->lockdep_map);
2399 else
2400 lock_map_acquire_read(&cwq->wq->lockdep_map);
2388 lock_map_release(&cwq->wq->lockdep_map); 2401 lock_map_release(&cwq->wq->lockdep_map);
2402
2389 return true; 2403 return true;
2390already_gone: 2404already_gone:
2391 spin_unlock_irq(&gcwq->lock); 2405 spin_unlock_irq(&gcwq->lock);