aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c175
-rw-r--r--kernel/cpuset.c162
-rw-r--r--kernel/irq_work.c4
-rw-r--r--kernel/kprobes.c18
-rw-r--r--kernel/sys_ni.c2
-rw-r--r--kernel/time/clocksource.c2
-rw-r--r--kernel/time/tick-sched.c2
-rw-r--r--kernel/trace/trace.c2
-rw-r--r--kernel/workqueue.c30
9 files changed, 220 insertions, 177 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 136eceadeed1..bb263d0caab3 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -277,6 +277,10 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
277 if (!(cgrp->root->subsys_mask & (1 << ss->id))) 277 if (!(cgrp->root->subsys_mask & (1 << ss->id)))
278 return NULL; 278 return NULL;
279 279
280 /*
281 * This function is used while updating css associations and thus
282 * can't test the csses directly. Use ->child_subsys_mask.
283 */
280 while (cgroup_parent(cgrp) && 284 while (cgroup_parent(cgrp) &&
281 !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ss->id))) 285 !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ss->id)))
282 cgrp = cgroup_parent(cgrp); 286 cgrp = cgroup_parent(cgrp);
@@ -284,6 +288,39 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
284 return cgroup_css(cgrp, ss); 288 return cgroup_css(cgrp, ss);
285} 289}
286 290
291/**
292 * cgroup_get_e_css - get a cgroup's effective css for the specified subsystem
293 * @cgrp: the cgroup of interest
294 * @ss: the subsystem of interest
295 *
296 * Find and get the effective css of @cgrp for @ss. The effective css is
297 * defined as the matching css of the nearest ancestor including self which
298 * has @ss enabled. If @ss is not mounted on the hierarchy @cgrp is on,
299 * the root css is returned, so this function always returns a valid css.
300 * The returned css must be put using css_put().
301 */
302struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp,
303 struct cgroup_subsys *ss)
304{
305 struct cgroup_subsys_state *css;
306
307 rcu_read_lock();
308
309 do {
310 css = cgroup_css(cgrp, ss);
311
312 if (css && css_tryget_online(css))
313 goto out_unlock;
314 cgrp = cgroup_parent(cgrp);
315 } while (cgrp);
316
317 css = init_css_set.subsys[ss->id];
318 css_get(css);
319out_unlock:
320 rcu_read_unlock();
321 return css;
322}
323
287/* convenient tests for these bits */ 324/* convenient tests for these bits */
288static inline bool cgroup_is_dead(const struct cgroup *cgrp) 325static inline bool cgroup_is_dead(const struct cgroup *cgrp)
289{ 326{
@@ -1019,31 +1056,30 @@ static void cgroup_put(struct cgroup *cgrp)
1019} 1056}
1020 1057
1021/** 1058/**
1022 * cgroup_refresh_child_subsys_mask - update child_subsys_mask 1059 * cgroup_calc_child_subsys_mask - calculate child_subsys_mask
1023 * @cgrp: the target cgroup 1060 * @cgrp: the target cgroup
1061 * @subtree_control: the new subtree_control mask to consider
1024 * 1062 *
1025 * On the default hierarchy, a subsystem may request other subsystems to be 1063 * On the default hierarchy, a subsystem may request other subsystems to be
1026 * enabled together through its ->depends_on mask. In such cases, more 1064 * enabled together through its ->depends_on mask. In such cases, more
1027 * subsystems than specified in "cgroup.subtree_control" may be enabled. 1065 * subsystems than specified in "cgroup.subtree_control" may be enabled.
1028 * 1066 *
1029 * This function determines which subsystems need to be enabled given the 1067 * This function calculates which subsystems need to be enabled if
1030 * current @cgrp->subtree_control and records it in 1068 * @subtree_control is to be applied to @cgrp. The returned mask is always
1031 * @cgrp->child_subsys_mask. The resulting mask is always a superset of 1069 * a superset of @subtree_control and follows the usual hierarchy rules.
1032 * @cgrp->subtree_control and follows the usual hierarchy rules.
1033 */ 1070 */
1034static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp) 1071static unsigned int cgroup_calc_child_subsys_mask(struct cgroup *cgrp,
1072 unsigned int subtree_control)
1035{ 1073{
1036 struct cgroup *parent = cgroup_parent(cgrp); 1074 struct cgroup *parent = cgroup_parent(cgrp);
1037 unsigned int cur_ss_mask = cgrp->subtree_control; 1075 unsigned int cur_ss_mask = subtree_control;
1038 struct cgroup_subsys *ss; 1076 struct cgroup_subsys *ss;
1039 int ssid; 1077 int ssid;
1040 1078
1041 lockdep_assert_held(&cgroup_mutex); 1079 lockdep_assert_held(&cgroup_mutex);
1042 1080
1043 if (!cgroup_on_dfl(cgrp)) { 1081 if (!cgroup_on_dfl(cgrp))
1044 cgrp->child_subsys_mask = cur_ss_mask; 1082 return cur_ss_mask;
1045 return;
1046 }
1047 1083
1048 while (true) { 1084 while (true) {
1049 unsigned int new_ss_mask = cur_ss_mask; 1085 unsigned int new_ss_mask = cur_ss_mask;
@@ -1067,7 +1103,20 @@ static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp)
1067 cur_ss_mask = new_ss_mask; 1103 cur_ss_mask = new_ss_mask;
1068 } 1104 }
1069 1105
1070 cgrp->child_subsys_mask = cur_ss_mask; 1106 return cur_ss_mask;
1107}
1108
1109/**
1110 * cgroup_refresh_child_subsys_mask - update child_subsys_mask
1111 * @cgrp: the target cgroup
1112 *
1113 * Update @cgrp->child_subsys_mask according to the current
1114 * @cgrp->subtree_control using cgroup_calc_child_subsys_mask().
1115 */
1116static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp)
1117{
1118 cgrp->child_subsys_mask =
1119 cgroup_calc_child_subsys_mask(cgrp, cgrp->subtree_control);
1071} 1120}
1072 1121
1073/** 1122/**
@@ -2641,7 +2690,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
2641 loff_t off) 2690 loff_t off)
2642{ 2691{
2643 unsigned int enable = 0, disable = 0; 2692 unsigned int enable = 0, disable = 0;
2644 unsigned int css_enable, css_disable, old_ctrl, new_ctrl; 2693 unsigned int css_enable, css_disable, old_sc, new_sc, old_ss, new_ss;
2645 struct cgroup *cgrp, *child; 2694 struct cgroup *cgrp, *child;
2646 struct cgroup_subsys *ss; 2695 struct cgroup_subsys *ss;
2647 char *tok; 2696 char *tok;
@@ -2693,36 +2742,6 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
2693 ret = -ENOENT; 2742 ret = -ENOENT;
2694 goto out_unlock; 2743 goto out_unlock;
2695 } 2744 }
2696
2697 /*
2698 * @ss is already enabled through dependency and
2699 * we'll just make it visible. Skip draining.
2700 */
2701 if (cgrp->child_subsys_mask & (1 << ssid))
2702 continue;
2703
2704 /*
2705 * Because css offlining is asynchronous, userland
2706 * might try to re-enable the same controller while
2707 * the previous instance is still around. In such
2708 * cases, wait till it's gone using offline_waitq.
2709 */
2710 cgroup_for_each_live_child(child, cgrp) {
2711 DEFINE_WAIT(wait);
2712
2713 if (!cgroup_css(child, ss))
2714 continue;
2715
2716 cgroup_get(child);
2717 prepare_to_wait(&child->offline_waitq, &wait,
2718 TASK_UNINTERRUPTIBLE);
2719 cgroup_kn_unlock(of->kn);
2720 schedule();
2721 finish_wait(&child->offline_waitq, &wait);
2722 cgroup_put(child);
2723
2724 return restart_syscall();
2725 }
2726 } else if (disable & (1 << ssid)) { 2745 } else if (disable & (1 << ssid)) {
2727 if (!(cgrp->subtree_control & (1 << ssid))) { 2746 if (!(cgrp->subtree_control & (1 << ssid))) {
2728 disable &= ~(1 << ssid); 2747 disable &= ~(1 << ssid);
@@ -2758,19 +2777,48 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
2758 * subsystems than specified may need to be enabled or disabled 2777 * subsystems than specified may need to be enabled or disabled
2759 * depending on subsystem dependencies. 2778 * depending on subsystem dependencies.
2760 */ 2779 */
2761 cgrp->subtree_control |= enable; 2780 old_sc = cgrp->subtree_control;
2762 cgrp->subtree_control &= ~disable; 2781 old_ss = cgrp->child_subsys_mask;
2782 new_sc = (old_sc | enable) & ~disable;
2783 new_ss = cgroup_calc_child_subsys_mask(cgrp, new_sc);
2763 2784
2764 old_ctrl = cgrp->child_subsys_mask; 2785 css_enable = ~old_ss & new_ss;
2765 cgroup_refresh_child_subsys_mask(cgrp); 2786 css_disable = old_ss & ~new_ss;
2766 new_ctrl = cgrp->child_subsys_mask;
2767
2768 css_enable = ~old_ctrl & new_ctrl;
2769 css_disable = old_ctrl & ~new_ctrl;
2770 enable |= css_enable; 2787 enable |= css_enable;
2771 disable |= css_disable; 2788 disable |= css_disable;
2772 2789
2773 /* 2790 /*
2791 * Because css offlining is asynchronous, userland might try to
2792 * re-enable the same controller while the previous instance is
2793 * still around. In such cases, wait till it's gone using
2794 * offline_waitq.
2795 */
2796 for_each_subsys(ss, ssid) {
2797 if (!(css_enable & (1 << ssid)))
2798 continue;
2799
2800 cgroup_for_each_live_child(child, cgrp) {
2801 DEFINE_WAIT(wait);
2802
2803 if (!cgroup_css(child, ss))
2804 continue;
2805
2806 cgroup_get(child);
2807 prepare_to_wait(&child->offline_waitq, &wait,
2808 TASK_UNINTERRUPTIBLE);
2809 cgroup_kn_unlock(of->kn);
2810 schedule();
2811 finish_wait(&child->offline_waitq, &wait);
2812 cgroup_put(child);
2813
2814 return restart_syscall();
2815 }
2816 }
2817
2818 cgrp->subtree_control = new_sc;
2819 cgrp->child_subsys_mask = new_ss;
2820
2821 /*
2774 * Create new csses or make the existing ones visible. A css is 2822 * Create new csses or make the existing ones visible. A css is
2775 * created invisible if it's being implicitly enabled through 2823 * created invisible if it's being implicitly enabled through
2776 * dependency. An invisible css is made visible when the userland 2824 * dependency. An invisible css is made visible when the userland
@@ -2825,6 +2873,24 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
2825 } 2873 }
2826 } 2874 }
2827 2875
2876 /*
2877 * The effective csses of all the descendants (excluding @cgrp) may
2878 * have changed. Subsystems can optionally subscribe to this event
2879 * by implementing ->css_e_css_changed() which is invoked if any of
2880 * the effective csses seen from the css's cgroup may have changed.
2881 */
2882 for_each_subsys(ss, ssid) {
2883 struct cgroup_subsys_state *this_css = cgroup_css(cgrp, ss);
2884 struct cgroup_subsys_state *css;
2885
2886 if (!ss->css_e_css_changed || !this_css)
2887 continue;
2888
2889 css_for_each_descendant_pre(css, this_css)
2890 if (css != this_css)
2891 ss->css_e_css_changed(css);
2892 }
2893
2828 kernfs_activate(cgrp->kn); 2894 kernfs_activate(cgrp->kn);
2829 ret = 0; 2895 ret = 0;
2830out_unlock: 2896out_unlock:
@@ -2832,9 +2898,8 @@ out_unlock:
2832 return ret ?: nbytes; 2898 return ret ?: nbytes;
2833 2899
2834err_undo_css: 2900err_undo_css:
2835 cgrp->subtree_control &= ~enable; 2901 cgrp->subtree_control = old_sc;
2836 cgrp->subtree_control |= disable; 2902 cgrp->child_subsys_mask = old_ss;
2837 cgroup_refresh_child_subsys_mask(cgrp);
2838 2903
2839 for_each_subsys(ss, ssid) { 2904 for_each_subsys(ss, ssid) {
2840 if (!(enable & (1 << ssid))) 2905 if (!(enable & (1 << ssid)))
@@ -4370,6 +4435,8 @@ static void css_release_work_fn(struct work_struct *work)
4370 if (ss) { 4435 if (ss) {
4371 /* css release path */ 4436 /* css release path */
4372 cgroup_idr_remove(&ss->css_idr, css->id); 4437 cgroup_idr_remove(&ss->css_idr, css->id);
4438 if (ss->css_released)
4439 ss->css_released(css);
4373 } else { 4440 } else {
4374 /* cgroup release path */ 4441 /* cgroup release path */
4375 cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id); 4442 cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 723cfc9d0ad7..64b257f6bca2 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -248,34 +248,34 @@ static struct cpuset top_cpuset = {
248 if (is_cpuset_online(((des_cs) = css_cs((pos_css))))) 248 if (is_cpuset_online(((des_cs) = css_cs((pos_css)))))
249 249
250/* 250/*
251 * There are two global mutexes guarding cpuset structures - cpuset_mutex 251 * There are two global locks guarding cpuset structures - cpuset_mutex and
252 * and callback_mutex. The latter may nest inside the former. We also 252 * callback_lock. We also require taking task_lock() when dereferencing a
253 * require taking task_lock() when dereferencing a task's cpuset pointer. 253 * task's cpuset pointer. See "The task_lock() exception", at the end of this
254 * See "The task_lock() exception", at the end of this comment. 254 * comment.
255 * 255 *
256 * A task must hold both mutexes to modify cpusets. If a task holds 256 * A task must hold both locks to modify cpusets. If a task holds
257 * cpuset_mutex, then it blocks others wanting that mutex, ensuring that it 257 * cpuset_mutex, then it blocks others wanting that mutex, ensuring that it
258 * is the only task able to also acquire callback_mutex and be able to 258 * is the only task able to also acquire callback_lock and be able to
259 * modify cpusets. It can perform various checks on the cpuset structure 259 * modify cpusets. It can perform various checks on the cpuset structure
260 * first, knowing nothing will change. It can also allocate memory while 260 * first, knowing nothing will change. It can also allocate memory while
261 * just holding cpuset_mutex. While it is performing these checks, various 261 * just holding cpuset_mutex. While it is performing these checks, various
262 * callback routines can briefly acquire callback_mutex to query cpusets. 262 * callback routines can briefly acquire callback_lock to query cpusets.
263 * Once it is ready to make the changes, it takes callback_mutex, blocking 263 * Once it is ready to make the changes, it takes callback_lock, blocking
264 * everyone else. 264 * everyone else.
265 * 265 *
266 * Calls to the kernel memory allocator can not be made while holding 266 * Calls to the kernel memory allocator can not be made while holding
267 * callback_mutex, as that would risk double tripping on callback_mutex 267 * callback_lock, as that would risk double tripping on callback_lock
268 * from one of the callbacks into the cpuset code from within 268 * from one of the callbacks into the cpuset code from within
269 * __alloc_pages(). 269 * __alloc_pages().
270 * 270 *
271 * If a task is only holding callback_mutex, then it has read-only 271 * If a task is only holding callback_lock, then it has read-only
272 * access to cpusets. 272 * access to cpusets.
273 * 273 *
274 * Now, the task_struct fields mems_allowed and mempolicy may be changed 274 * Now, the task_struct fields mems_allowed and mempolicy may be changed
275 * by other task, we use alloc_lock in the task_struct fields to protect 275 * by other task, we use alloc_lock in the task_struct fields to protect
276 * them. 276 * them.
277 * 277 *
278 * The cpuset_common_file_read() handlers only hold callback_mutex across 278 * The cpuset_common_file_read() handlers only hold callback_lock across
279 * small pieces of code, such as when reading out possibly multi-word 279 * small pieces of code, such as when reading out possibly multi-word
280 * cpumasks and nodemasks. 280 * cpumasks and nodemasks.
281 * 281 *
@@ -284,7 +284,7 @@ static struct cpuset top_cpuset = {
284 */ 284 */
285 285
286static DEFINE_MUTEX(cpuset_mutex); 286static DEFINE_MUTEX(cpuset_mutex);
287static DEFINE_MUTEX(callback_mutex); 287static DEFINE_SPINLOCK(callback_lock);
288 288
289/* 289/*
290 * CPU / memory hotplug is handled asynchronously. 290 * CPU / memory hotplug is handled asynchronously.
@@ -329,7 +329,7 @@ static struct file_system_type cpuset_fs_type = {
329 * One way or another, we guarantee to return some non-empty subset 329 * One way or another, we guarantee to return some non-empty subset
330 * of cpu_online_mask. 330 * of cpu_online_mask.
331 * 331 *
332 * Call with callback_mutex held. 332 * Call with callback_lock or cpuset_mutex held.
333 */ 333 */
334static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) 334static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
335{ 335{
@@ -347,7 +347,7 @@ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
347 * One way or another, we guarantee to return some non-empty subset 347 * One way or another, we guarantee to return some non-empty subset
348 * of node_states[N_MEMORY]. 348 * of node_states[N_MEMORY].
349 * 349 *
350 * Call with callback_mutex held. 350 * Call with callback_lock or cpuset_mutex held.
351 */ 351 */
352static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) 352static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
353{ 353{
@@ -359,7 +359,7 @@ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
359/* 359/*
360 * update task's spread flag if cpuset's page/slab spread flag is set 360 * update task's spread flag if cpuset's page/slab spread flag is set
361 * 361 *
362 * Called with callback_mutex/cpuset_mutex held 362 * Call with callback_lock or cpuset_mutex held.
363 */ 363 */
364static void cpuset_update_task_spread_flag(struct cpuset *cs, 364static void cpuset_update_task_spread_flag(struct cpuset *cs,
365 struct task_struct *tsk) 365 struct task_struct *tsk)
@@ -886,9 +886,9 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus)
886 continue; 886 continue;
887 rcu_read_unlock(); 887 rcu_read_unlock();
888 888
889 mutex_lock(&callback_mutex); 889 spin_lock_irq(&callback_lock);
890 cpumask_copy(cp->effective_cpus, new_cpus); 890 cpumask_copy(cp->effective_cpus, new_cpus);
891 mutex_unlock(&callback_mutex); 891 spin_unlock_irq(&callback_lock);
892 892
893 WARN_ON(!cgroup_on_dfl(cp->css.cgroup) && 893 WARN_ON(!cgroup_on_dfl(cp->css.cgroup) &&
894 !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); 894 !cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
@@ -953,9 +953,9 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
953 if (retval < 0) 953 if (retval < 0)
954 return retval; 954 return retval;
955 955
956 mutex_lock(&callback_mutex); 956 spin_lock_irq(&callback_lock);
957 cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); 957 cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
958 mutex_unlock(&callback_mutex); 958 spin_unlock_irq(&callback_lock);
959 959
960 /* use trialcs->cpus_allowed as a temp variable */ 960 /* use trialcs->cpus_allowed as a temp variable */
961 update_cpumasks_hier(cs, trialcs->cpus_allowed); 961 update_cpumasks_hier(cs, trialcs->cpus_allowed);
@@ -1142,9 +1142,9 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
1142 continue; 1142 continue;
1143 rcu_read_unlock(); 1143 rcu_read_unlock();
1144 1144
1145 mutex_lock(&callback_mutex); 1145 spin_lock_irq(&callback_lock);
1146 cp->effective_mems = *new_mems; 1146 cp->effective_mems = *new_mems;
1147 mutex_unlock(&callback_mutex); 1147 spin_unlock_irq(&callback_lock);
1148 1148
1149 WARN_ON(!cgroup_on_dfl(cp->css.cgroup) && 1149 WARN_ON(!cgroup_on_dfl(cp->css.cgroup) &&
1150 !nodes_equal(cp->mems_allowed, cp->effective_mems)); 1150 !nodes_equal(cp->mems_allowed, cp->effective_mems));
@@ -1165,7 +1165,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
1165 * mempolicies and if the cpuset is marked 'memory_migrate', 1165 * mempolicies and if the cpuset is marked 'memory_migrate',
1166 * migrate the tasks pages to the new memory. 1166 * migrate the tasks pages to the new memory.
1167 * 1167 *
1168 * Call with cpuset_mutex held. May take callback_mutex during call. 1168 * Call with cpuset_mutex held. May take callback_lock during call.
1169 * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, 1169 * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
1170 * lock each such tasks mm->mmap_sem, scan its vma's and rebind 1170 * lock each such tasks mm->mmap_sem, scan its vma's and rebind
1171 * their mempolicies to the cpusets new mems_allowed. 1171 * their mempolicies to the cpusets new mems_allowed.
@@ -1212,9 +1212,9 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
1212 if (retval < 0) 1212 if (retval < 0)
1213 goto done; 1213 goto done;
1214 1214
1215 mutex_lock(&callback_mutex); 1215 spin_lock_irq(&callback_lock);
1216 cs->mems_allowed = trialcs->mems_allowed; 1216 cs->mems_allowed = trialcs->mems_allowed;
1217 mutex_unlock(&callback_mutex); 1217 spin_unlock_irq(&callback_lock);
1218 1218
1219 /* use trialcs->mems_allowed as a temp variable */ 1219 /* use trialcs->mems_allowed as a temp variable */
1220 update_nodemasks_hier(cs, &cs->mems_allowed); 1220 update_nodemasks_hier(cs, &cs->mems_allowed);
@@ -1305,9 +1305,9 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
1305 spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs)) 1305 spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
1306 || (is_spread_page(cs) != is_spread_page(trialcs))); 1306 || (is_spread_page(cs) != is_spread_page(trialcs)));
1307 1307
1308 mutex_lock(&callback_mutex); 1308 spin_lock_irq(&callback_lock);
1309 cs->flags = trialcs->flags; 1309 cs->flags = trialcs->flags;
1310 mutex_unlock(&callback_mutex); 1310 spin_unlock_irq(&callback_lock);
1311 1311
1312 if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) 1312 if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
1313 rebuild_sched_domains_locked(); 1313 rebuild_sched_domains_locked();
@@ -1714,7 +1714,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
1714 count = seq_get_buf(sf, &buf); 1714 count = seq_get_buf(sf, &buf);
1715 s = buf; 1715 s = buf;
1716 1716
1717 mutex_lock(&callback_mutex); 1717 spin_lock_irq(&callback_lock);
1718 1718
1719 switch (type) { 1719 switch (type) {
1720 case FILE_CPULIST: 1720 case FILE_CPULIST:
@@ -1741,7 +1741,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
1741 seq_commit(sf, -1); 1741 seq_commit(sf, -1);
1742 } 1742 }
1743out_unlock: 1743out_unlock:
1744 mutex_unlock(&callback_mutex); 1744 spin_unlock_irq(&callback_lock);
1745 return ret; 1745 return ret;
1746} 1746}
1747 1747
@@ -1958,12 +1958,12 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
1958 1958
1959 cpuset_inc(); 1959 cpuset_inc();
1960 1960
1961 mutex_lock(&callback_mutex); 1961 spin_lock_irq(&callback_lock);
1962 if (cgroup_on_dfl(cs->css.cgroup)) { 1962 if (cgroup_on_dfl(cs->css.cgroup)) {
1963 cpumask_copy(cs->effective_cpus, parent->effective_cpus); 1963 cpumask_copy(cs->effective_cpus, parent->effective_cpus);
1964 cs->effective_mems = parent->effective_mems; 1964 cs->effective_mems = parent->effective_mems;
1965 } 1965 }
1966 mutex_unlock(&callback_mutex); 1966 spin_unlock_irq(&callback_lock);
1967 1967
1968 if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) 1968 if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags))
1969 goto out_unlock; 1969 goto out_unlock;
@@ -1990,10 +1990,10 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
1990 } 1990 }
1991 rcu_read_unlock(); 1991 rcu_read_unlock();
1992 1992
1993 mutex_lock(&callback_mutex); 1993 spin_lock_irq(&callback_lock);
1994 cs->mems_allowed = parent->mems_allowed; 1994 cs->mems_allowed = parent->mems_allowed;
1995 cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); 1995 cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
1996 mutex_unlock(&callback_mutex); 1996 spin_unlock_irq(&callback_lock);
1997out_unlock: 1997out_unlock:
1998 mutex_unlock(&cpuset_mutex); 1998 mutex_unlock(&cpuset_mutex);
1999 return 0; 1999 return 0;
@@ -2032,7 +2032,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css)
2032static void cpuset_bind(struct cgroup_subsys_state *root_css) 2032static void cpuset_bind(struct cgroup_subsys_state *root_css)
2033{ 2033{
2034 mutex_lock(&cpuset_mutex); 2034 mutex_lock(&cpuset_mutex);
2035 mutex_lock(&callback_mutex); 2035 spin_lock_irq(&callback_lock);
2036 2036
2037 if (cgroup_on_dfl(root_css->cgroup)) { 2037 if (cgroup_on_dfl(root_css->cgroup)) {
2038 cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); 2038 cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
@@ -2043,7 +2043,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
2043 top_cpuset.mems_allowed = top_cpuset.effective_mems; 2043 top_cpuset.mems_allowed = top_cpuset.effective_mems;
2044 } 2044 }
2045 2045
2046 mutex_unlock(&callback_mutex); 2046 spin_unlock_irq(&callback_lock);
2047 mutex_unlock(&cpuset_mutex); 2047 mutex_unlock(&cpuset_mutex);
2048} 2048}
2049 2049
@@ -2128,12 +2128,12 @@ hotplug_update_tasks_legacy(struct cpuset *cs,
2128{ 2128{
2129 bool is_empty; 2129 bool is_empty;
2130 2130
2131 mutex_lock(&callback_mutex); 2131 spin_lock_irq(&callback_lock);
2132 cpumask_copy(cs->cpus_allowed, new_cpus); 2132 cpumask_copy(cs->cpus_allowed, new_cpus);
2133 cpumask_copy(cs->effective_cpus, new_cpus); 2133 cpumask_copy(cs->effective_cpus, new_cpus);
2134 cs->mems_allowed = *new_mems; 2134 cs->mems_allowed = *new_mems;
2135 cs->effective_mems = *new_mems; 2135 cs->effective_mems = *new_mems;
2136 mutex_unlock(&callback_mutex); 2136 spin_unlock_irq(&callback_lock);
2137 2137
2138 /* 2138 /*
2139 * Don't call update_tasks_cpumask() if the cpuset becomes empty, 2139 * Don't call update_tasks_cpumask() if the cpuset becomes empty,
@@ -2170,10 +2170,10 @@ hotplug_update_tasks(struct cpuset *cs,
2170 if (nodes_empty(*new_mems)) 2170 if (nodes_empty(*new_mems))
2171 *new_mems = parent_cs(cs)->effective_mems; 2171 *new_mems = parent_cs(cs)->effective_mems;
2172 2172
2173 mutex_lock(&callback_mutex); 2173 spin_lock_irq(&callback_lock);
2174 cpumask_copy(cs->effective_cpus, new_cpus); 2174 cpumask_copy(cs->effective_cpus, new_cpus);
2175 cs->effective_mems = *new_mems; 2175 cs->effective_mems = *new_mems;
2176 mutex_unlock(&callback_mutex); 2176 spin_unlock_irq(&callback_lock);
2177 2177
2178 if (cpus_updated) 2178 if (cpus_updated)
2179 update_tasks_cpumask(cs); 2179 update_tasks_cpumask(cs);
@@ -2259,21 +2259,21 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
2259 2259
2260 /* synchronize cpus_allowed to cpu_active_mask */ 2260 /* synchronize cpus_allowed to cpu_active_mask */
2261 if (cpus_updated) { 2261 if (cpus_updated) {
2262 mutex_lock(&callback_mutex); 2262 spin_lock_irq(&callback_lock);
2263 if (!on_dfl) 2263 if (!on_dfl)
2264 cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); 2264 cpumask_copy(top_cpuset.cpus_allowed, &new_cpus);
2265 cpumask_copy(top_cpuset.effective_cpus, &new_cpus); 2265 cpumask_copy(top_cpuset.effective_cpus, &new_cpus);
2266 mutex_unlock(&callback_mutex); 2266 spin_unlock_irq(&callback_lock);
2267 /* we don't mess with cpumasks of tasks in top_cpuset */ 2267 /* we don't mess with cpumasks of tasks in top_cpuset */
2268 } 2268 }
2269 2269
2270 /* synchronize mems_allowed to N_MEMORY */ 2270 /* synchronize mems_allowed to N_MEMORY */
2271 if (mems_updated) { 2271 if (mems_updated) {
2272 mutex_lock(&callback_mutex); 2272 spin_lock_irq(&callback_lock);
2273 if (!on_dfl) 2273 if (!on_dfl)
2274 top_cpuset.mems_allowed = new_mems; 2274 top_cpuset.mems_allowed = new_mems;
2275 top_cpuset.effective_mems = new_mems; 2275 top_cpuset.effective_mems = new_mems;
2276 mutex_unlock(&callback_mutex); 2276 spin_unlock_irq(&callback_lock);
2277 update_tasks_nodemask(&top_cpuset); 2277 update_tasks_nodemask(&top_cpuset);
2278 } 2278 }
2279 2279
@@ -2366,11 +2366,13 @@ void __init cpuset_init_smp(void)
2366 2366
2367void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) 2367void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
2368{ 2368{
2369 mutex_lock(&callback_mutex); 2369 unsigned long flags;
2370
2371 spin_lock_irqsave(&callback_lock, flags);
2370 rcu_read_lock(); 2372 rcu_read_lock();
2371 guarantee_online_cpus(task_cs(tsk), pmask); 2373 guarantee_online_cpus(task_cs(tsk), pmask);
2372 rcu_read_unlock(); 2374 rcu_read_unlock();
2373 mutex_unlock(&callback_mutex); 2375 spin_unlock_irqrestore(&callback_lock, flags);
2374} 2376}
2375 2377
2376void cpuset_cpus_allowed_fallback(struct task_struct *tsk) 2378void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
@@ -2416,12 +2418,13 @@ void cpuset_init_current_mems_allowed(void)
2416nodemask_t cpuset_mems_allowed(struct task_struct *tsk) 2418nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
2417{ 2419{
2418 nodemask_t mask; 2420 nodemask_t mask;
2421 unsigned long flags;
2419 2422
2420 mutex_lock(&callback_mutex); 2423 spin_lock_irqsave(&callback_lock, flags);
2421 rcu_read_lock(); 2424 rcu_read_lock();
2422 guarantee_online_mems(task_cs(tsk), &mask); 2425 guarantee_online_mems(task_cs(tsk), &mask);
2423 rcu_read_unlock(); 2426 rcu_read_unlock();
2424 mutex_unlock(&callback_mutex); 2427 spin_unlock_irqrestore(&callback_lock, flags);
2425 2428
2426 return mask; 2429 return mask;
2427} 2430}
@@ -2440,7 +2443,7 @@ int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
2440/* 2443/*
2441 * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or 2444 * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or
2442 * mem_hardwall ancestor to the specified cpuset. Call holding 2445 * mem_hardwall ancestor to the specified cpuset. Call holding
2443 * callback_mutex. If no ancestor is mem_exclusive or mem_hardwall 2446 * callback_lock. If no ancestor is mem_exclusive or mem_hardwall
2444 * (an unusual configuration), then returns the root cpuset. 2447 * (an unusual configuration), then returns the root cpuset.
2445 */ 2448 */
2446static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) 2449static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
@@ -2451,7 +2454,7 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
2451} 2454}
2452 2455
2453/** 2456/**
2454 * cpuset_node_allowed_softwall - Can we allocate on a memory node? 2457 * cpuset_node_allowed - Can we allocate on a memory node?
2455 * @node: is this an allowed node? 2458 * @node: is this an allowed node?
2456 * @gfp_mask: memory allocation flags 2459 * @gfp_mask: memory allocation flags
2457 * 2460 *
@@ -2463,13 +2466,6 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
2463 * flag, yes. 2466 * flag, yes.
2464 * Otherwise, no. 2467 * Otherwise, no.
2465 * 2468 *
2466 * If __GFP_HARDWALL is set, cpuset_node_allowed_softwall() reduces to
2467 * cpuset_node_allowed_hardwall(). Otherwise, cpuset_node_allowed_softwall()
2468 * might sleep, and might allow a node from an enclosing cpuset.
2469 *
2470 * cpuset_node_allowed_hardwall() only handles the simpler case of hardwall
2471 * cpusets, and never sleeps.
2472 *
2473 * The __GFP_THISNODE placement logic is really handled elsewhere, 2469 * The __GFP_THISNODE placement logic is really handled elsewhere,
2474 * by forcibly using a zonelist starting at a specified node, and by 2470 * by forcibly using a zonelist starting at a specified node, and by
2475 * (in get_page_from_freelist()) refusing to consider the zones for 2471 * (in get_page_from_freelist()) refusing to consider the zones for
@@ -2482,13 +2478,12 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
2482 * GFP_KERNEL allocations are not so marked, so can escape to the 2478 * GFP_KERNEL allocations are not so marked, so can escape to the
2483 * nearest enclosing hardwalled ancestor cpuset. 2479 * nearest enclosing hardwalled ancestor cpuset.
2484 * 2480 *
2485 * Scanning up parent cpusets requires callback_mutex. The 2481 * Scanning up parent cpusets requires callback_lock. The
2486 * __alloc_pages() routine only calls here with __GFP_HARDWALL bit 2482 * __alloc_pages() routine only calls here with __GFP_HARDWALL bit
2487 * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the 2483 * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the
2488 * current tasks mems_allowed came up empty on the first pass over 2484 * current tasks mems_allowed came up empty on the first pass over
2489 * the zonelist. So only GFP_KERNEL allocations, if all nodes in the 2485 * the zonelist. So only GFP_KERNEL allocations, if all nodes in the
2490 * cpuset are short of memory, might require taking the callback_mutex 2486 * cpuset are short of memory, might require taking the callback_lock.
2491 * mutex.
2492 * 2487 *
2493 * The first call here from mm/page_alloc:get_page_from_freelist() 2488 * The first call here from mm/page_alloc:get_page_from_freelist()
2494 * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, 2489 * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets,
@@ -2505,20 +2500,15 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
2505 * TIF_MEMDIE - any node ok 2500 * TIF_MEMDIE - any node ok
2506 * GFP_KERNEL - any node in enclosing hardwalled cpuset ok 2501 * GFP_KERNEL - any node in enclosing hardwalled cpuset ok
2507 * GFP_USER - only nodes in current tasks mems allowed ok. 2502 * GFP_USER - only nodes in current tasks mems allowed ok.
2508 *
2509 * Rule:
2510 * Don't call cpuset_node_allowed_softwall if you can't sleep, unless you
2511 * pass in the __GFP_HARDWALL flag set in gfp_flag, which disables
2512 * the code that might scan up ancestor cpusets and sleep.
2513 */ 2503 */
2514int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) 2504int __cpuset_node_allowed(int node, gfp_t gfp_mask)
2515{ 2505{
2516 struct cpuset *cs; /* current cpuset ancestors */ 2506 struct cpuset *cs; /* current cpuset ancestors */
2517 int allowed; /* is allocation in zone z allowed? */ 2507 int allowed; /* is allocation in zone z allowed? */
2508 unsigned long flags;
2518 2509
2519 if (in_interrupt() || (gfp_mask & __GFP_THISNODE)) 2510 if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
2520 return 1; 2511 return 1;
2521 might_sleep_if(!(gfp_mask & __GFP_HARDWALL));
2522 if (node_isset(node, current->mems_allowed)) 2512 if (node_isset(node, current->mems_allowed))
2523 return 1; 2513 return 1;
2524 /* 2514 /*
@@ -2534,55 +2524,17 @@ int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask)
2534 return 1; 2524 return 1;
2535 2525
2536 /* Not hardwall and node outside mems_allowed: scan up cpusets */ 2526 /* Not hardwall and node outside mems_allowed: scan up cpusets */
2537 mutex_lock(&callback_mutex); 2527 spin_lock_irqsave(&callback_lock, flags);
2538 2528
2539 rcu_read_lock(); 2529 rcu_read_lock();
2540 cs = nearest_hardwall_ancestor(task_cs(current)); 2530 cs = nearest_hardwall_ancestor(task_cs(current));
2541 allowed = node_isset(node, cs->mems_allowed); 2531 allowed = node_isset(node, cs->mems_allowed);
2542 rcu_read_unlock(); 2532 rcu_read_unlock();
2543 2533
2544 mutex_unlock(&callback_mutex); 2534 spin_unlock_irqrestore(&callback_lock, flags);
2545 return allowed; 2535 return allowed;
2546} 2536}
2547 2537
2548/*
2549 * cpuset_node_allowed_hardwall - Can we allocate on a memory node?
2550 * @node: is this an allowed node?
2551 * @gfp_mask: memory allocation flags
2552 *
2553 * If we're in interrupt, yes, we can always allocate. If __GFP_THISNODE is
2554 * set, yes, we can always allocate. If node is in our task's mems_allowed,
2555 * yes. If the task has been OOM killed and has access to memory reserves as
2556 * specified by the TIF_MEMDIE flag, yes.
2557 * Otherwise, no.
2558 *
2559 * The __GFP_THISNODE placement logic is really handled elsewhere,
2560 * by forcibly using a zonelist starting at a specified node, and by
2561 * (in get_page_from_freelist()) refusing to consider the zones for
2562 * any node on the zonelist except the first. By the time any such
2563 * calls get to this routine, we should just shut up and say 'yes'.
2564 *
2565 * Unlike the cpuset_node_allowed_softwall() variant, above,
2566 * this variant requires that the node be in the current task's
2567 * mems_allowed or that we're in interrupt. It does not scan up the
2568 * cpuset hierarchy for the nearest enclosing mem_exclusive cpuset.
2569 * It never sleeps.
2570 */
2571int __cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask)
2572{
2573 if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
2574 return 1;
2575 if (node_isset(node, current->mems_allowed))
2576 return 1;
2577 /*
2578 * Allow tasks that have access to memory reserves because they have
2579 * been OOM killed to get memory anywhere.
2580 */
2581 if (unlikely(test_thread_flag(TIF_MEMDIE)))
2582 return 1;
2583 return 0;
2584}
2585
2586/** 2538/**
2587 * cpuset_mem_spread_node() - On which node to begin search for a file page 2539 * cpuset_mem_spread_node() - On which node to begin search for a file page
2588 * cpuset_slab_spread_node() - On which node to begin search for a slab page 2540 * cpuset_slab_spread_node() - On which node to begin search for a slab page
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 3ab9048483fa..cbf9fb899d92 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -175,11 +175,11 @@ EXPORT_SYMBOL_GPL(irq_work_run);
175 175
176void irq_work_tick(void) 176void irq_work_tick(void)
177{ 177{
178 struct llist_head *raised = &__get_cpu_var(raised_list); 178 struct llist_head *raised = this_cpu_ptr(&raised_list);
179 179
180 if (!llist_empty(raised) && !arch_irq_work_has_interrupt()) 180 if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
181 irq_work_run_list(raised); 181 irq_work_run_list(raised);
182 irq_work_run_list(&__get_cpu_var(lazy_list)); 182 irq_work_run_list(this_cpu_ptr(&lazy_list));
183} 183}
184 184
185/* 185/*
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 831978cebf1d..06f58309fed2 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1410,16 +1410,10 @@ static inline int check_kprobe_rereg(struct kprobe *p)
1410 return ret; 1410 return ret;
1411} 1411}
1412 1412
1413static int check_kprobe_address_safe(struct kprobe *p, 1413int __weak arch_check_ftrace_location(struct kprobe *p)
1414 struct module **probed_mod)
1415{ 1414{
1416 int ret = 0;
1417 unsigned long ftrace_addr; 1415 unsigned long ftrace_addr;
1418 1416
1419 /*
1420 * If the address is located on a ftrace nop, set the
1421 * breakpoint to the following instruction.
1422 */
1423 ftrace_addr = ftrace_location((unsigned long)p->addr); 1417 ftrace_addr = ftrace_location((unsigned long)p->addr);
1424 if (ftrace_addr) { 1418 if (ftrace_addr) {
1425#ifdef CONFIG_KPROBES_ON_FTRACE 1419#ifdef CONFIG_KPROBES_ON_FTRACE
@@ -1431,7 +1425,17 @@ static int check_kprobe_address_safe(struct kprobe *p,
1431 return -EINVAL; 1425 return -EINVAL;
1432#endif 1426#endif
1433 } 1427 }
1428 return 0;
1429}
1434 1430
1431static int check_kprobe_address_safe(struct kprobe *p,
1432 struct module **probed_mod)
1433{
1434 int ret;
1435
1436 ret = arch_check_ftrace_location(p);
1437 if (ret)
1438 return ret;
1435 jump_label_lock(); 1439 jump_label_lock();
1436 preempt_disable(); 1440 preempt_disable();
1437 1441
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 02aa4185b17e..61eea02b53f5 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -169,6 +169,8 @@ cond_syscall(ppc_rtas);
169cond_syscall(sys_spu_run); 169cond_syscall(sys_spu_run);
170cond_syscall(sys_spu_create); 170cond_syscall(sys_spu_create);
171cond_syscall(sys_subpage_prot); 171cond_syscall(sys_subpage_prot);
172cond_syscall(sys_s390_pci_mmio_read);
173cond_syscall(sys_s390_pci_mmio_write);
172 174
173/* mmu depending weak syscall entries */ 175/* mmu depending weak syscall entries */
174cond_syscall(sys_mprotect); 176cond_syscall(sys_mprotect);
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 2e949cc9c9f1..b79f39bda7e1 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -792,7 +792,7 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
792 /* Initialize mult/shift and max_idle_ns */ 792 /* Initialize mult/shift and max_idle_ns */
793 __clocksource_updatefreq_scale(cs, scale, freq); 793 __clocksource_updatefreq_scale(cs, scale, freq);
794 794
795 /* Add clocksource to the clcoksource list */ 795 /* Add clocksource to the clocksource list */
796 mutex_lock(&clocksource_mutex); 796 mutex_lock(&clocksource_mutex);
797 clocksource_enqueue(cs); 797 clocksource_enqueue(cs);
798 clocksource_enqueue_watchdog(cs); 798 clocksource_enqueue_watchdog(cs);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 1f4356037a7d..4d54b7540585 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -235,7 +235,7 @@ void tick_nohz_full_kick(void)
235 if (!tick_nohz_full_cpu(smp_processor_id())) 235 if (!tick_nohz_full_cpu(smp_processor_id()))
236 return; 236 return;
237 237
238 irq_work_queue(&__get_cpu_var(nohz_full_kick_work)); 238 irq_work_queue(this_cpu_ptr(&nohz_full_kick_work));
239} 239}
240 240
241/* 241/*
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1af4f8f2ab5d..ab76b7bcb36a 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2031,7 +2031,7 @@ void trace_printk_init_buffers(void)
2031 pr_warning("** trace_printk() being used. Allocating extra memory. **\n"); 2031 pr_warning("** trace_printk() being used. Allocating extra memory. **\n");
2032 pr_warning("** **\n"); 2032 pr_warning("** **\n");
2033 pr_warning("** This means that this is a DEBUG kernel and it is **\n"); 2033 pr_warning("** This means that this is a DEBUG kernel and it is **\n");
2034 pr_warning("** unsafe for produciton use. **\n"); 2034 pr_warning("** unsafe for production use. **\n");
2035 pr_warning("** **\n"); 2035 pr_warning("** **\n");
2036 pr_warning("** If you see this message and you are not debugging **\n"); 2036 pr_warning("** If you see this message and you are not debugging **\n");
2037 pr_warning("** the kernel, report this immediately to your vendor! **\n"); 2037 pr_warning("** the kernel, report this immediately to your vendor! **\n");
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 09b685daee3d..6202b08f1933 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1804,8 +1804,8 @@ static void pool_mayday_timeout(unsigned long __pool)
1804 struct worker_pool *pool = (void *)__pool; 1804 struct worker_pool *pool = (void *)__pool;
1805 struct work_struct *work; 1805 struct work_struct *work;
1806 1806
1807 spin_lock_irq(&wq_mayday_lock); /* for wq->maydays */ 1807 spin_lock_irq(&pool->lock);
1808 spin_lock(&pool->lock); 1808 spin_lock(&wq_mayday_lock); /* for wq->maydays */
1809 1809
1810 if (need_to_create_worker(pool)) { 1810 if (need_to_create_worker(pool)) {
1811 /* 1811 /*
@@ -1818,8 +1818,8 @@ static void pool_mayday_timeout(unsigned long __pool)
1818 send_mayday(work); 1818 send_mayday(work);
1819 } 1819 }
1820 1820
1821 spin_unlock(&pool->lock); 1821 spin_unlock(&wq_mayday_lock);
1822 spin_unlock_irq(&wq_mayday_lock); 1822 spin_unlock_irq(&pool->lock);
1823 1823
1824 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL); 1824 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
1825} 1825}
@@ -2248,12 +2248,30 @@ repeat:
2248 * Slurp in all works issued via this workqueue and 2248 * Slurp in all works issued via this workqueue and
2249 * process'em. 2249 * process'em.
2250 */ 2250 */
2251 WARN_ON_ONCE(!list_empty(&rescuer->scheduled)); 2251 WARN_ON_ONCE(!list_empty(scheduled));
2252 list_for_each_entry_safe(work, n, &pool->worklist, entry) 2252 list_for_each_entry_safe(work, n, &pool->worklist, entry)
2253 if (get_work_pwq(work) == pwq) 2253 if (get_work_pwq(work) == pwq)
2254 move_linked_works(work, scheduled, &n); 2254 move_linked_works(work, scheduled, &n);
2255 2255
2256 process_scheduled_works(rescuer); 2256 if (!list_empty(scheduled)) {
2257 process_scheduled_works(rescuer);
2258
2259 /*
2260 * The above execution of rescued work items could
2261 * have created more to rescue through
2262 * pwq_activate_first_delayed() or chained
2263 * queueing. Let's put @pwq back on mayday list so
2264 * that such back-to-back work items, which may be
2265 * being used to relieve memory pressure, don't
2266 * incur MAYDAY_INTERVAL delay inbetween.
2267 */
2268 if (need_to_create_worker(pool)) {
2269 spin_lock(&wq_mayday_lock);
2270 get_pwq(pwq);
2271 list_move_tail(&pwq->mayday_node, &wq->maydays);
2272 spin_unlock(&wq_mayday_lock);
2273 }
2274 }
2257 2275
2258 /* 2276 /*
2259 * Put the reference grabbed by send_mayday(). @pool won't 2277 * Put the reference grabbed by send_mayday(). @pool won't