aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/cgroup.h4
-rw-r--r--include/linux/cpuset.h37
-rw-r--r--kernel/cgroup.c175
-rw-r--r--kernel/cpuset.c162
-rw-r--r--mm/hugetlb.c2
-rw-r--r--mm/oom_kill.c2
-rw-r--r--mm/page_alloc.c6
-rw-r--r--mm/slab.c2
-rw-r--r--mm/slub.c3
-rw-r--r--mm/vmscan.c5
10 files changed, 200 insertions, 198 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 641e56494a92..da0dae0600e6 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -638,8 +638,10 @@ struct cgroup_subsys {
638 struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css); 638 struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css);
639 int (*css_online)(struct cgroup_subsys_state *css); 639 int (*css_online)(struct cgroup_subsys_state *css);
640 void (*css_offline)(struct cgroup_subsys_state *css); 640 void (*css_offline)(struct cgroup_subsys_state *css);
641 void (*css_released)(struct cgroup_subsys_state *css);
641 void (*css_free)(struct cgroup_subsys_state *css); 642 void (*css_free)(struct cgroup_subsys_state *css);
642 void (*css_reset)(struct cgroup_subsys_state *css); 643 void (*css_reset)(struct cgroup_subsys_state *css);
644 void (*css_e_css_changed)(struct cgroup_subsys_state *css);
643 645
644 int (*can_attach)(struct cgroup_subsys_state *css, 646 int (*can_attach)(struct cgroup_subsys_state *css,
645 struct cgroup_taskset *tset); 647 struct cgroup_taskset *tset);
@@ -934,6 +936,8 @@ void css_task_iter_end(struct css_task_iter *it);
934int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); 936int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
935int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); 937int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
936 938
939struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup,
940 struct cgroup_subsys *ss);
937struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, 941struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
938 struct cgroup_subsys *ss); 942 struct cgroup_subsys *ss);
939 943
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 2f073db7392e..1b357997cac5 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -48,29 +48,16 @@ extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
48void cpuset_init_current_mems_allowed(void); 48void cpuset_init_current_mems_allowed(void);
49int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask); 49int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask);
50 50
51extern int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask); 51extern int __cpuset_node_allowed(int node, gfp_t gfp_mask);
52extern int __cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask);
53 52
54static inline int cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) 53static inline int cpuset_node_allowed(int node, gfp_t gfp_mask)
55{ 54{
56 return nr_cpusets() <= 1 || 55 return nr_cpusets() <= 1 || __cpuset_node_allowed(node, gfp_mask);
57 __cpuset_node_allowed_softwall(node, gfp_mask);
58} 56}
59 57
60static inline int cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask) 58static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
61{ 59{
62 return nr_cpusets() <= 1 || 60 return cpuset_node_allowed(zone_to_nid(z), gfp_mask);
63 __cpuset_node_allowed_hardwall(node, gfp_mask);
64}
65
66static inline int cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
67{
68 return cpuset_node_allowed_softwall(zone_to_nid(z), gfp_mask);
69}
70
71static inline int cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask)
72{
73 return cpuset_node_allowed_hardwall(zone_to_nid(z), gfp_mask);
74} 61}
75 62
76extern int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, 63extern int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
@@ -179,22 +166,12 @@ static inline int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
179 return 1; 166 return 1;
180} 167}
181 168
182static inline int cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) 169static inline int cpuset_node_allowed(int node, gfp_t gfp_mask)
183{
184 return 1;
185}
186
187static inline int cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask)
188{
189 return 1;
190}
191
192static inline int cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
193{ 170{
194 return 1; 171 return 1;
195} 172}
196 173
197static inline int cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask) 174static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
198{ 175{
199 return 1; 176 return 1;
200} 177}
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 136eceadeed1..bb263d0caab3 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -277,6 +277,10 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
277 if (!(cgrp->root->subsys_mask & (1 << ss->id))) 277 if (!(cgrp->root->subsys_mask & (1 << ss->id)))
278 return NULL; 278 return NULL;
279 279
280 /*
281 * This function is used while updating css associations and thus
282 * can't test the csses directly. Use ->child_subsys_mask.
283 */
280 while (cgroup_parent(cgrp) && 284 while (cgroup_parent(cgrp) &&
281 !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ss->id))) 285 !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ss->id)))
282 cgrp = cgroup_parent(cgrp); 286 cgrp = cgroup_parent(cgrp);
@@ -284,6 +288,39 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
284 return cgroup_css(cgrp, ss); 288 return cgroup_css(cgrp, ss);
285} 289}
286 290
291/**
292 * cgroup_get_e_css - get a cgroup's effective css for the specified subsystem
293 * @cgrp: the cgroup of interest
294 * @ss: the subsystem of interest
295 *
296 * Find and get the effective css of @cgrp for @ss. The effective css is
297 * defined as the matching css of the nearest ancestor including self which
298 * has @ss enabled. If @ss is not mounted on the hierarchy @cgrp is on,
299 * the root css is returned, so this function always returns a valid css.
300 * The returned css must be put using css_put().
301 */
302struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp,
303 struct cgroup_subsys *ss)
304{
305 struct cgroup_subsys_state *css;
306
307 rcu_read_lock();
308
309 do {
310 css = cgroup_css(cgrp, ss);
311
312 if (css && css_tryget_online(css))
313 goto out_unlock;
314 cgrp = cgroup_parent(cgrp);
315 } while (cgrp);
316
317 css = init_css_set.subsys[ss->id];
318 css_get(css);
319out_unlock:
320 rcu_read_unlock();
321 return css;
322}
323
287/* convenient tests for these bits */ 324/* convenient tests for these bits */
288static inline bool cgroup_is_dead(const struct cgroup *cgrp) 325static inline bool cgroup_is_dead(const struct cgroup *cgrp)
289{ 326{
@@ -1019,31 +1056,30 @@ static void cgroup_put(struct cgroup *cgrp)
1019} 1056}
1020 1057
1021/** 1058/**
1022 * cgroup_refresh_child_subsys_mask - update child_subsys_mask 1059 * cgroup_calc_child_subsys_mask - calculate child_subsys_mask
1023 * @cgrp: the target cgroup 1060 * @cgrp: the target cgroup
1061 * @subtree_control: the new subtree_control mask to consider
1024 * 1062 *
1025 * On the default hierarchy, a subsystem may request other subsystems to be 1063 * On the default hierarchy, a subsystem may request other subsystems to be
1026 * enabled together through its ->depends_on mask. In such cases, more 1064 * enabled together through its ->depends_on mask. In such cases, more
1027 * subsystems than specified in "cgroup.subtree_control" may be enabled. 1065 * subsystems than specified in "cgroup.subtree_control" may be enabled.
1028 * 1066 *
1029 * This function determines which subsystems need to be enabled given the 1067 * This function calculates which subsystems need to be enabled if
1030 * current @cgrp->subtree_control and records it in 1068 * @subtree_control is to be applied to @cgrp. The returned mask is always
1031 * @cgrp->child_subsys_mask. The resulting mask is always a superset of 1069 * a superset of @subtree_control and follows the usual hierarchy rules.
1032 * @cgrp->subtree_control and follows the usual hierarchy rules.
1033 */ 1070 */
1034static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp) 1071static unsigned int cgroup_calc_child_subsys_mask(struct cgroup *cgrp,
1072 unsigned int subtree_control)
1035{ 1073{
1036 struct cgroup *parent = cgroup_parent(cgrp); 1074 struct cgroup *parent = cgroup_parent(cgrp);
1037 unsigned int cur_ss_mask = cgrp->subtree_control; 1075 unsigned int cur_ss_mask = subtree_control;
1038 struct cgroup_subsys *ss; 1076 struct cgroup_subsys *ss;
1039 int ssid; 1077 int ssid;
1040 1078
1041 lockdep_assert_held(&cgroup_mutex); 1079 lockdep_assert_held(&cgroup_mutex);
1042 1080
1043 if (!cgroup_on_dfl(cgrp)) { 1081 if (!cgroup_on_dfl(cgrp))
1044 cgrp->child_subsys_mask = cur_ss_mask; 1082 return cur_ss_mask;
1045 return;
1046 }
1047 1083
1048 while (true) { 1084 while (true) {
1049 unsigned int new_ss_mask = cur_ss_mask; 1085 unsigned int new_ss_mask = cur_ss_mask;
@@ -1067,7 +1103,20 @@ static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp)
1067 cur_ss_mask = new_ss_mask; 1103 cur_ss_mask = new_ss_mask;
1068 } 1104 }
1069 1105
1070 cgrp->child_subsys_mask = cur_ss_mask; 1106 return cur_ss_mask;
1107}
1108
1109/**
1110 * cgroup_refresh_child_subsys_mask - update child_subsys_mask
1111 * @cgrp: the target cgroup
1112 *
1113 * Update @cgrp->child_subsys_mask according to the current
1114 * @cgrp->subtree_control using cgroup_calc_child_subsys_mask().
1115 */
1116static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp)
1117{
1118 cgrp->child_subsys_mask =
1119 cgroup_calc_child_subsys_mask(cgrp, cgrp->subtree_control);
1071} 1120}
1072 1121
1073/** 1122/**
@@ -2641,7 +2690,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
2641 loff_t off) 2690 loff_t off)
2642{ 2691{
2643 unsigned int enable = 0, disable = 0; 2692 unsigned int enable = 0, disable = 0;
2644 unsigned int css_enable, css_disable, old_ctrl, new_ctrl; 2693 unsigned int css_enable, css_disable, old_sc, new_sc, old_ss, new_ss;
2645 struct cgroup *cgrp, *child; 2694 struct cgroup *cgrp, *child;
2646 struct cgroup_subsys *ss; 2695 struct cgroup_subsys *ss;
2647 char *tok; 2696 char *tok;
@@ -2693,36 +2742,6 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
2693 ret = -ENOENT; 2742 ret = -ENOENT;
2694 goto out_unlock; 2743 goto out_unlock;
2695 } 2744 }
2696
2697 /*
2698 * @ss is already enabled through dependency and
2699 * we'll just make it visible. Skip draining.
2700 */
2701 if (cgrp->child_subsys_mask & (1 << ssid))
2702 continue;
2703
2704 /*
2705 * Because css offlining is asynchronous, userland
2706 * might try to re-enable the same controller while
2707 * the previous instance is still around. In such
2708 * cases, wait till it's gone using offline_waitq.
2709 */
2710 cgroup_for_each_live_child(child, cgrp) {
2711 DEFINE_WAIT(wait);
2712
2713 if (!cgroup_css(child, ss))
2714 continue;
2715
2716 cgroup_get(child);
2717 prepare_to_wait(&child->offline_waitq, &wait,
2718 TASK_UNINTERRUPTIBLE);
2719 cgroup_kn_unlock(of->kn);
2720 schedule();
2721 finish_wait(&child->offline_waitq, &wait);
2722 cgroup_put(child);
2723
2724 return restart_syscall();
2725 }
2726 } else if (disable & (1 << ssid)) { 2745 } else if (disable & (1 << ssid)) {
2727 if (!(cgrp->subtree_control & (1 << ssid))) { 2746 if (!(cgrp->subtree_control & (1 << ssid))) {
2728 disable &= ~(1 << ssid); 2747 disable &= ~(1 << ssid);
@@ -2758,19 +2777,48 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
2758 * subsystems than specified may need to be enabled or disabled 2777 * subsystems than specified may need to be enabled or disabled
2759 * depending on subsystem dependencies. 2778 * depending on subsystem dependencies.
2760 */ 2779 */
2761 cgrp->subtree_control |= enable; 2780 old_sc = cgrp->subtree_control;
2762 cgrp->subtree_control &= ~disable; 2781 old_ss = cgrp->child_subsys_mask;
2782 new_sc = (old_sc | enable) & ~disable;
2783 new_ss = cgroup_calc_child_subsys_mask(cgrp, new_sc);
2763 2784
2764 old_ctrl = cgrp->child_subsys_mask; 2785 css_enable = ~old_ss & new_ss;
2765 cgroup_refresh_child_subsys_mask(cgrp); 2786 css_disable = old_ss & ~new_ss;
2766 new_ctrl = cgrp->child_subsys_mask;
2767
2768 css_enable = ~old_ctrl & new_ctrl;
2769 css_disable = old_ctrl & ~new_ctrl;
2770 enable |= css_enable; 2787 enable |= css_enable;
2771 disable |= css_disable; 2788 disable |= css_disable;
2772 2789
2773 /* 2790 /*
2791 * Because css offlining is asynchronous, userland might try to
2792 * re-enable the same controller while the previous instance is
2793 * still around. In such cases, wait till it's gone using
2794 * offline_waitq.
2795 */
2796 for_each_subsys(ss, ssid) {
2797 if (!(css_enable & (1 << ssid)))
2798 continue;
2799
2800 cgroup_for_each_live_child(child, cgrp) {
2801 DEFINE_WAIT(wait);
2802
2803 if (!cgroup_css(child, ss))
2804 continue;
2805
2806 cgroup_get(child);
2807 prepare_to_wait(&child->offline_waitq, &wait,
2808 TASK_UNINTERRUPTIBLE);
2809 cgroup_kn_unlock(of->kn);
2810 schedule();
2811 finish_wait(&child->offline_waitq, &wait);
2812 cgroup_put(child);
2813
2814 return restart_syscall();
2815 }
2816 }
2817
2818 cgrp->subtree_control = new_sc;
2819 cgrp->child_subsys_mask = new_ss;
2820
2821 /*
2774 * Create new csses or make the existing ones visible. A css is 2822 * Create new csses or make the existing ones visible. A css is
2775 * created invisible if it's being implicitly enabled through 2823 * created invisible if it's being implicitly enabled through
2776 * dependency. An invisible css is made visible when the userland 2824 * dependency. An invisible css is made visible when the userland
@@ -2825,6 +2873,24 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
2825 } 2873 }
2826 } 2874 }
2827 2875
2876 /*
2877 * The effective csses of all the descendants (excluding @cgrp) may
2878 * have changed. Subsystems can optionally subscribe to this event
2879 * by implementing ->css_e_css_changed() which is invoked if any of
2880 * the effective csses seen from the css's cgroup may have changed.
2881 */
2882 for_each_subsys(ss, ssid) {
2883 struct cgroup_subsys_state *this_css = cgroup_css(cgrp, ss);
2884 struct cgroup_subsys_state *css;
2885
2886 if (!ss->css_e_css_changed || !this_css)
2887 continue;
2888
2889 css_for_each_descendant_pre(css, this_css)
2890 if (css != this_css)
2891 ss->css_e_css_changed(css);
2892 }
2893
2828 kernfs_activate(cgrp->kn); 2894 kernfs_activate(cgrp->kn);
2829 ret = 0; 2895 ret = 0;
2830out_unlock: 2896out_unlock:
@@ -2832,9 +2898,8 @@ out_unlock:
2832 return ret ?: nbytes; 2898 return ret ?: nbytes;
2833 2899
2834err_undo_css: 2900err_undo_css:
2835 cgrp->subtree_control &= ~enable; 2901 cgrp->subtree_control = old_sc;
2836 cgrp->subtree_control |= disable; 2902 cgrp->child_subsys_mask = old_ss;
2837 cgroup_refresh_child_subsys_mask(cgrp);
2838 2903
2839 for_each_subsys(ss, ssid) { 2904 for_each_subsys(ss, ssid) {
2840 if (!(enable & (1 << ssid))) 2905 if (!(enable & (1 << ssid)))
@@ -4370,6 +4435,8 @@ static void css_release_work_fn(struct work_struct *work)
4370 if (ss) { 4435 if (ss) {
4371 /* css release path */ 4436 /* css release path */
4372 cgroup_idr_remove(&ss->css_idr, css->id); 4437 cgroup_idr_remove(&ss->css_idr, css->id);
4438 if (ss->css_released)
4439 ss->css_released(css);
4373 } else { 4440 } else {
4374 /* cgroup release path */ 4441 /* cgroup release path */
4375 cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id); 4442 cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 723cfc9d0ad7..64b257f6bca2 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -248,34 +248,34 @@ static struct cpuset top_cpuset = {
248 if (is_cpuset_online(((des_cs) = css_cs((pos_css))))) 248 if (is_cpuset_online(((des_cs) = css_cs((pos_css)))))
249 249
250/* 250/*
251 * There are two global mutexes guarding cpuset structures - cpuset_mutex 251 * There are two global locks guarding cpuset structures - cpuset_mutex and
252 * and callback_mutex. The latter may nest inside the former. We also 252 * callback_lock. We also require taking task_lock() when dereferencing a
253 * require taking task_lock() when dereferencing a task's cpuset pointer. 253 * task's cpuset pointer. See "The task_lock() exception", at the end of this
254 * See "The task_lock() exception", at the end of this comment. 254 * comment.
255 * 255 *
256 * A task must hold both mutexes to modify cpusets. If a task holds 256 * A task must hold both locks to modify cpusets. If a task holds
257 * cpuset_mutex, then it blocks others wanting that mutex, ensuring that it 257 * cpuset_mutex, then it blocks others wanting that mutex, ensuring that it
258 * is the only task able to also acquire callback_mutex and be able to 258 * is the only task able to also acquire callback_lock and be able to
259 * modify cpusets. It can perform various checks on the cpuset structure 259 * modify cpusets. It can perform various checks on the cpuset structure
260 * first, knowing nothing will change. It can also allocate memory while 260 * first, knowing nothing will change. It can also allocate memory while
261 * just holding cpuset_mutex. While it is performing these checks, various 261 * just holding cpuset_mutex. While it is performing these checks, various
262 * callback routines can briefly acquire callback_mutex to query cpusets. 262 * callback routines can briefly acquire callback_lock to query cpusets.
263 * Once it is ready to make the changes, it takes callback_mutex, blocking 263 * Once it is ready to make the changes, it takes callback_lock, blocking
264 * everyone else. 264 * everyone else.
265 * 265 *
266 * Calls to the kernel memory allocator can not be made while holding 266 * Calls to the kernel memory allocator can not be made while holding
267 * callback_mutex, as that would risk double tripping on callback_mutex 267 * callback_lock, as that would risk double tripping on callback_lock
268 * from one of the callbacks into the cpuset code from within 268 * from one of the callbacks into the cpuset code from within
269 * __alloc_pages(). 269 * __alloc_pages().
270 * 270 *
271 * If a task is only holding callback_mutex, then it has read-only 271 * If a task is only holding callback_lock, then it has read-only
272 * access to cpusets. 272 * access to cpusets.
273 * 273 *
274 * Now, the task_struct fields mems_allowed and mempolicy may be changed 274 * Now, the task_struct fields mems_allowed and mempolicy may be changed
275 * by other task, we use alloc_lock in the task_struct fields to protect 275 * by other task, we use alloc_lock in the task_struct fields to protect
276 * them. 276 * them.
277 * 277 *
278 * The cpuset_common_file_read() handlers only hold callback_mutex across 278 * The cpuset_common_file_read() handlers only hold callback_lock across
279 * small pieces of code, such as when reading out possibly multi-word 279 * small pieces of code, such as when reading out possibly multi-word
280 * cpumasks and nodemasks. 280 * cpumasks and nodemasks.
281 * 281 *
@@ -284,7 +284,7 @@ static struct cpuset top_cpuset = {
284 */ 284 */
285 285
286static DEFINE_MUTEX(cpuset_mutex); 286static DEFINE_MUTEX(cpuset_mutex);
287static DEFINE_MUTEX(callback_mutex); 287static DEFINE_SPINLOCK(callback_lock);
288 288
289/* 289/*
290 * CPU / memory hotplug is handled asynchronously. 290 * CPU / memory hotplug is handled asynchronously.
@@ -329,7 +329,7 @@ static struct file_system_type cpuset_fs_type = {
329 * One way or another, we guarantee to return some non-empty subset 329 * One way or another, we guarantee to return some non-empty subset
330 * of cpu_online_mask. 330 * of cpu_online_mask.
331 * 331 *
332 * Call with callback_mutex held. 332 * Call with callback_lock or cpuset_mutex held.
333 */ 333 */
334static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) 334static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
335{ 335{
@@ -347,7 +347,7 @@ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
347 * One way or another, we guarantee to return some non-empty subset 347 * One way or another, we guarantee to return some non-empty subset
348 * of node_states[N_MEMORY]. 348 * of node_states[N_MEMORY].
349 * 349 *
350 * Call with callback_mutex held. 350 * Call with callback_lock or cpuset_mutex held.
351 */ 351 */
352static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) 352static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
353{ 353{
@@ -359,7 +359,7 @@ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
359/* 359/*
360 * update task's spread flag if cpuset's page/slab spread flag is set 360 * update task's spread flag if cpuset's page/slab spread flag is set
361 * 361 *
362 * Called with callback_mutex/cpuset_mutex held 362 * Call with callback_lock or cpuset_mutex held.
363 */ 363 */
364static void cpuset_update_task_spread_flag(struct cpuset *cs, 364static void cpuset_update_task_spread_flag(struct cpuset *cs,
365 struct task_struct *tsk) 365 struct task_struct *tsk)
@@ -886,9 +886,9 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus)
886 continue; 886 continue;
887 rcu_read_unlock(); 887 rcu_read_unlock();
888 888
889 mutex_lock(&callback_mutex); 889 spin_lock_irq(&callback_lock);
890 cpumask_copy(cp->effective_cpus, new_cpus); 890 cpumask_copy(cp->effective_cpus, new_cpus);
891 mutex_unlock(&callback_mutex); 891 spin_unlock_irq(&callback_lock);
892 892
893 WARN_ON(!cgroup_on_dfl(cp->css.cgroup) && 893 WARN_ON(!cgroup_on_dfl(cp->css.cgroup) &&
894 !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); 894 !cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
@@ -953,9 +953,9 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
953 if (retval < 0) 953 if (retval < 0)
954 return retval; 954 return retval;
955 955
956 mutex_lock(&callback_mutex); 956 spin_lock_irq(&callback_lock);
957 cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); 957 cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
958 mutex_unlock(&callback_mutex); 958 spin_unlock_irq(&callback_lock);
959 959
960 /* use trialcs->cpus_allowed as a temp variable */ 960 /* use trialcs->cpus_allowed as a temp variable */
961 update_cpumasks_hier(cs, trialcs->cpus_allowed); 961 update_cpumasks_hier(cs, trialcs->cpus_allowed);
@@ -1142,9 +1142,9 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
1142 continue; 1142 continue;
1143 rcu_read_unlock(); 1143 rcu_read_unlock();
1144 1144
1145 mutex_lock(&callback_mutex); 1145 spin_lock_irq(&callback_lock);
1146 cp->effective_mems = *new_mems; 1146 cp->effective_mems = *new_mems;
1147 mutex_unlock(&callback_mutex); 1147 spin_unlock_irq(&callback_lock);
1148 1148
1149 WARN_ON(!cgroup_on_dfl(cp->css.cgroup) && 1149 WARN_ON(!cgroup_on_dfl(cp->css.cgroup) &&
1150 !nodes_equal(cp->mems_allowed, cp->effective_mems)); 1150 !nodes_equal(cp->mems_allowed, cp->effective_mems));
@@ -1165,7 +1165,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
1165 * mempolicies and if the cpuset is marked 'memory_migrate', 1165 * mempolicies and if the cpuset is marked 'memory_migrate',
1166 * migrate the tasks pages to the new memory. 1166 * migrate the tasks pages to the new memory.
1167 * 1167 *
1168 * Call with cpuset_mutex held. May take callback_mutex during call. 1168 * Call with cpuset_mutex held. May take callback_lock during call.
1169 * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, 1169 * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
1170 * lock each such tasks mm->mmap_sem, scan its vma's and rebind 1170 * lock each such tasks mm->mmap_sem, scan its vma's and rebind
1171 * their mempolicies to the cpusets new mems_allowed. 1171 * their mempolicies to the cpusets new mems_allowed.
@@ -1212,9 +1212,9 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
1212 if (retval < 0) 1212 if (retval < 0)
1213 goto done; 1213 goto done;
1214 1214
1215 mutex_lock(&callback_mutex); 1215 spin_lock_irq(&callback_lock);
1216 cs->mems_allowed = trialcs->mems_allowed; 1216 cs->mems_allowed = trialcs->mems_allowed;
1217 mutex_unlock(&callback_mutex); 1217 spin_unlock_irq(&callback_lock);
1218 1218
1219 /* use trialcs->mems_allowed as a temp variable */ 1219 /* use trialcs->mems_allowed as a temp variable */
1220 update_nodemasks_hier(cs, &cs->mems_allowed); 1220 update_nodemasks_hier(cs, &cs->mems_allowed);
@@ -1305,9 +1305,9 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
1305 spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs)) 1305 spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
1306 || (is_spread_page(cs) != is_spread_page(trialcs))); 1306 || (is_spread_page(cs) != is_spread_page(trialcs)));
1307 1307
1308 mutex_lock(&callback_mutex); 1308 spin_lock_irq(&callback_lock);
1309 cs->flags = trialcs->flags; 1309 cs->flags = trialcs->flags;
1310 mutex_unlock(&callback_mutex); 1310 spin_unlock_irq(&callback_lock);
1311 1311
1312 if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) 1312 if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
1313 rebuild_sched_domains_locked(); 1313 rebuild_sched_domains_locked();
@@ -1714,7 +1714,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
1714 count = seq_get_buf(sf, &buf); 1714 count = seq_get_buf(sf, &buf);
1715 s = buf; 1715 s = buf;
1716 1716
1717 mutex_lock(&callback_mutex); 1717 spin_lock_irq(&callback_lock);
1718 1718
1719 switch (type) { 1719 switch (type) {
1720 case FILE_CPULIST: 1720 case FILE_CPULIST:
@@ -1741,7 +1741,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
1741 seq_commit(sf, -1); 1741 seq_commit(sf, -1);
1742 } 1742 }
1743out_unlock: 1743out_unlock:
1744 mutex_unlock(&callback_mutex); 1744 spin_unlock_irq(&callback_lock);
1745 return ret; 1745 return ret;
1746} 1746}
1747 1747
@@ -1958,12 +1958,12 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
1958 1958
1959 cpuset_inc(); 1959 cpuset_inc();
1960 1960
1961 mutex_lock(&callback_mutex); 1961 spin_lock_irq(&callback_lock);
1962 if (cgroup_on_dfl(cs->css.cgroup)) { 1962 if (cgroup_on_dfl(cs->css.cgroup)) {
1963 cpumask_copy(cs->effective_cpus, parent->effective_cpus); 1963 cpumask_copy(cs->effective_cpus, parent->effective_cpus);
1964 cs->effective_mems = parent->effective_mems; 1964 cs->effective_mems = parent->effective_mems;
1965 } 1965 }
1966 mutex_unlock(&callback_mutex); 1966 spin_unlock_irq(&callback_lock);
1967 1967
1968 if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) 1968 if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags))
1969 goto out_unlock; 1969 goto out_unlock;
@@ -1990,10 +1990,10 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
1990 } 1990 }
1991 rcu_read_unlock(); 1991 rcu_read_unlock();
1992 1992
1993 mutex_lock(&callback_mutex); 1993 spin_lock_irq(&callback_lock);
1994 cs->mems_allowed = parent->mems_allowed; 1994 cs->mems_allowed = parent->mems_allowed;
1995 cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); 1995 cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
1996 mutex_unlock(&callback_mutex); 1996 spin_unlock_irq(&callback_lock);
1997out_unlock: 1997out_unlock:
1998 mutex_unlock(&cpuset_mutex); 1998 mutex_unlock(&cpuset_mutex);
1999 return 0; 1999 return 0;
@@ -2032,7 +2032,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css)
2032static void cpuset_bind(struct cgroup_subsys_state *root_css) 2032static void cpuset_bind(struct cgroup_subsys_state *root_css)
2033{ 2033{
2034 mutex_lock(&cpuset_mutex); 2034 mutex_lock(&cpuset_mutex);
2035 mutex_lock(&callback_mutex); 2035 spin_lock_irq(&callback_lock);
2036 2036
2037 if (cgroup_on_dfl(root_css->cgroup)) { 2037 if (cgroup_on_dfl(root_css->cgroup)) {
2038 cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); 2038 cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
@@ -2043,7 +2043,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
2043 top_cpuset.mems_allowed = top_cpuset.effective_mems; 2043 top_cpuset.mems_allowed = top_cpuset.effective_mems;
2044 } 2044 }
2045 2045
2046 mutex_unlock(&callback_mutex); 2046 spin_unlock_irq(&callback_lock);
2047 mutex_unlock(&cpuset_mutex); 2047 mutex_unlock(&cpuset_mutex);
2048} 2048}
2049 2049
@@ -2128,12 +2128,12 @@ hotplug_update_tasks_legacy(struct cpuset *cs,
2128{ 2128{
2129 bool is_empty; 2129 bool is_empty;
2130 2130
2131 mutex_lock(&callback_mutex); 2131 spin_lock_irq(&callback_lock);
2132 cpumask_copy(cs->cpus_allowed, new_cpus); 2132 cpumask_copy(cs->cpus_allowed, new_cpus);
2133 cpumask_copy(cs->effective_cpus, new_cpus); 2133 cpumask_copy(cs->effective_cpus, new_cpus);
2134 cs->mems_allowed = *new_mems; 2134 cs->mems_allowed = *new_mems;
2135 cs->effective_mems = *new_mems; 2135 cs->effective_mems = *new_mems;
2136 mutex_unlock(&callback_mutex); 2136 spin_unlock_irq(&callback_lock);
2137 2137
2138 /* 2138 /*
2139 * Don't call update_tasks_cpumask() if the cpuset becomes empty, 2139 * Don't call update_tasks_cpumask() if the cpuset becomes empty,
@@ -2170,10 +2170,10 @@ hotplug_update_tasks(struct cpuset *cs,
2170 if (nodes_empty(*new_mems)) 2170 if (nodes_empty(*new_mems))
2171 *new_mems = parent_cs(cs)->effective_mems; 2171 *new_mems = parent_cs(cs)->effective_mems;
2172 2172
2173 mutex_lock(&callback_mutex); 2173 spin_lock_irq(&callback_lock);
2174 cpumask_copy(cs->effective_cpus, new_cpus); 2174 cpumask_copy(cs->effective_cpus, new_cpus);
2175 cs->effective_mems = *new_mems; 2175 cs->effective_mems = *new_mems;
2176 mutex_unlock(&callback_mutex); 2176 spin_unlock_irq(&callback_lock);
2177 2177
2178 if (cpus_updated) 2178 if (cpus_updated)
2179 update_tasks_cpumask(cs); 2179 update_tasks_cpumask(cs);
@@ -2259,21 +2259,21 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
2259 2259
2260 /* synchronize cpus_allowed to cpu_active_mask */ 2260 /* synchronize cpus_allowed to cpu_active_mask */
2261 if (cpus_updated) { 2261 if (cpus_updated) {
2262 mutex_lock(&callback_mutex); 2262 spin_lock_irq(&callback_lock);
2263 if (!on_dfl) 2263 if (!on_dfl)
2264 cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); 2264 cpumask_copy(top_cpuset.cpus_allowed, &new_cpus);
2265 cpumask_copy(top_cpuset.effective_cpus, &new_cpus); 2265 cpumask_copy(top_cpuset.effective_cpus, &new_cpus);
2266 mutex_unlock(&callback_mutex); 2266 spin_unlock_irq(&callback_lock);
2267 /* we don't mess with cpumasks of tasks in top_cpuset */ 2267 /* we don't mess with cpumasks of tasks in top_cpuset */
2268 } 2268 }
2269 2269
2270 /* synchronize mems_allowed to N_MEMORY */ 2270 /* synchronize mems_allowed to N_MEMORY */
2271 if (mems_updated) { 2271 if (mems_updated) {
2272 mutex_lock(&callback_mutex); 2272 spin_lock_irq(&callback_lock);
2273 if (!on_dfl) 2273 if (!on_dfl)
2274 top_cpuset.mems_allowed = new_mems; 2274 top_cpuset.mems_allowed = new_mems;
2275 top_cpuset.effective_mems = new_mems; 2275 top_cpuset.effective_mems = new_mems;
2276 mutex_unlock(&callback_mutex); 2276 spin_unlock_irq(&callback_lock);
2277 update_tasks_nodemask(&top_cpuset); 2277 update_tasks_nodemask(&top_cpuset);
2278 } 2278 }
2279 2279
@@ -2366,11 +2366,13 @@ void __init cpuset_init_smp(void)
2366 2366
2367void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) 2367void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
2368{ 2368{
2369 mutex_lock(&callback_mutex); 2369 unsigned long flags;
2370
2371 spin_lock_irqsave(&callback_lock, flags);
2370 rcu_read_lock(); 2372 rcu_read_lock();
2371 guarantee_online_cpus(task_cs(tsk), pmask); 2373 guarantee_online_cpus(task_cs(tsk), pmask);
2372 rcu_read_unlock(); 2374 rcu_read_unlock();
2373 mutex_unlock(&callback_mutex); 2375 spin_unlock_irqrestore(&callback_lock, flags);
2374} 2376}
2375 2377
2376void cpuset_cpus_allowed_fallback(struct task_struct *tsk) 2378void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
@@ -2416,12 +2418,13 @@ void cpuset_init_current_mems_allowed(void)
2416nodemask_t cpuset_mems_allowed(struct task_struct *tsk) 2418nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
2417{ 2419{
2418 nodemask_t mask; 2420 nodemask_t mask;
2421 unsigned long flags;
2419 2422
2420 mutex_lock(&callback_mutex); 2423 spin_lock_irqsave(&callback_lock, flags);
2421 rcu_read_lock(); 2424 rcu_read_lock();
2422 guarantee_online_mems(task_cs(tsk), &mask); 2425 guarantee_online_mems(task_cs(tsk), &mask);
2423 rcu_read_unlock(); 2426 rcu_read_unlock();
2424 mutex_unlock(&callback_mutex); 2427 spin_unlock_irqrestore(&callback_lock, flags);
2425 2428
2426 return mask; 2429 return mask;
2427} 2430}
@@ -2440,7 +2443,7 @@ int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
2440/* 2443/*
2441 * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or 2444 * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or
2442 * mem_hardwall ancestor to the specified cpuset. Call holding 2445 * mem_hardwall ancestor to the specified cpuset. Call holding
2443 * callback_mutex. If no ancestor is mem_exclusive or mem_hardwall 2446 * callback_lock. If no ancestor is mem_exclusive or mem_hardwall
2444 * (an unusual configuration), then returns the root cpuset. 2447 * (an unusual configuration), then returns the root cpuset.
2445 */ 2448 */
2446static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) 2449static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
@@ -2451,7 +2454,7 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
2451} 2454}
2452 2455
2453/** 2456/**
2454 * cpuset_node_allowed_softwall - Can we allocate on a memory node? 2457 * cpuset_node_allowed - Can we allocate on a memory node?
2455 * @node: is this an allowed node? 2458 * @node: is this an allowed node?
2456 * @gfp_mask: memory allocation flags 2459 * @gfp_mask: memory allocation flags
2457 * 2460 *
@@ -2463,13 +2466,6 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
2463 * flag, yes. 2466 * flag, yes.
2464 * Otherwise, no. 2467 * Otherwise, no.
2465 * 2468 *
2466 * If __GFP_HARDWALL is set, cpuset_node_allowed_softwall() reduces to
2467 * cpuset_node_allowed_hardwall(). Otherwise, cpuset_node_allowed_softwall()
2468 * might sleep, and might allow a node from an enclosing cpuset.
2469 *
2470 * cpuset_node_allowed_hardwall() only handles the simpler case of hardwall
2471 * cpusets, and never sleeps.
2472 *
2473 * The __GFP_THISNODE placement logic is really handled elsewhere, 2469 * The __GFP_THISNODE placement logic is really handled elsewhere,
2474 * by forcibly using a zonelist starting at a specified node, and by 2470 * by forcibly using a zonelist starting at a specified node, and by
2475 * (in get_page_from_freelist()) refusing to consider the zones for 2471 * (in get_page_from_freelist()) refusing to consider the zones for
@@ -2482,13 +2478,12 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
2482 * GFP_KERNEL allocations are not so marked, so can escape to the 2478 * GFP_KERNEL allocations are not so marked, so can escape to the
2483 * nearest enclosing hardwalled ancestor cpuset. 2479 * nearest enclosing hardwalled ancestor cpuset.
2484 * 2480 *
2485 * Scanning up parent cpusets requires callback_mutex. The 2481 * Scanning up parent cpusets requires callback_lock. The
2486 * __alloc_pages() routine only calls here with __GFP_HARDWALL bit 2482 * __alloc_pages() routine only calls here with __GFP_HARDWALL bit
2487 * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the 2483 * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the
2488 * current tasks mems_allowed came up empty on the first pass over 2484 * current tasks mems_allowed came up empty on the first pass over
2489 * the zonelist. So only GFP_KERNEL allocations, if all nodes in the 2485 * the zonelist. So only GFP_KERNEL allocations, if all nodes in the
2490 * cpuset are short of memory, might require taking the callback_mutex 2486 * cpuset are short of memory, might require taking the callback_lock.
2491 * mutex.
2492 * 2487 *
2493 * The first call here from mm/page_alloc:get_page_from_freelist() 2488 * The first call here from mm/page_alloc:get_page_from_freelist()
2494 * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, 2489 * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets,
@@ -2505,20 +2500,15 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
2505 * TIF_MEMDIE - any node ok 2500 * TIF_MEMDIE - any node ok
2506 * GFP_KERNEL - any node in enclosing hardwalled cpuset ok 2501 * GFP_KERNEL - any node in enclosing hardwalled cpuset ok
2507 * GFP_USER - only nodes in current tasks mems allowed ok. 2502 * GFP_USER - only nodes in current tasks mems allowed ok.
2508 *
2509 * Rule:
2510 * Don't call cpuset_node_allowed_softwall if you can't sleep, unless you
2511 * pass in the __GFP_HARDWALL flag set in gfp_flag, which disables
2512 * the code that might scan up ancestor cpusets and sleep.
2513 */ 2503 */
2514int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) 2504int __cpuset_node_allowed(int node, gfp_t gfp_mask)
2515{ 2505{
2516 struct cpuset *cs; /* current cpuset ancestors */ 2506 struct cpuset *cs; /* current cpuset ancestors */
2517 int allowed; /* is allocation in zone z allowed? */ 2507 int allowed; /* is allocation in zone z allowed? */
2508 unsigned long flags;
2518 2509
2519 if (in_interrupt() || (gfp_mask & __GFP_THISNODE)) 2510 if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
2520 return 1; 2511 return 1;
2521 might_sleep_if(!(gfp_mask & __GFP_HARDWALL));
2522 if (node_isset(node, current->mems_allowed)) 2512 if (node_isset(node, current->mems_allowed))
2523 return 1; 2513 return 1;
2524 /* 2514 /*
@@ -2534,55 +2524,17 @@ int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask)
2534 return 1; 2524 return 1;
2535 2525
2536 /* Not hardwall and node outside mems_allowed: scan up cpusets */ 2526 /* Not hardwall and node outside mems_allowed: scan up cpusets */
2537 mutex_lock(&callback_mutex); 2527 spin_lock_irqsave(&callback_lock, flags);
2538 2528
2539 rcu_read_lock(); 2529 rcu_read_lock();
2540 cs = nearest_hardwall_ancestor(task_cs(current)); 2530 cs = nearest_hardwall_ancestor(task_cs(current));
2541 allowed = node_isset(node, cs->mems_allowed); 2531 allowed = node_isset(node, cs->mems_allowed);
2542 rcu_read_unlock(); 2532 rcu_read_unlock();
2543 2533
2544 mutex_unlock(&callback_mutex); 2534 spin_unlock_irqrestore(&callback_lock, flags);
2545 return allowed; 2535 return allowed;
2546} 2536}
2547 2537
2548/*
2549 * cpuset_node_allowed_hardwall - Can we allocate on a memory node?
2550 * @node: is this an allowed node?
2551 * @gfp_mask: memory allocation flags
2552 *
2553 * If we're in interrupt, yes, we can always allocate. If __GFP_THISNODE is
2554 * set, yes, we can always allocate. If node is in our task's mems_allowed,
2555 * yes. If the task has been OOM killed and has access to memory reserves as
2556 * specified by the TIF_MEMDIE flag, yes.
2557 * Otherwise, no.
2558 *
2559 * The __GFP_THISNODE placement logic is really handled elsewhere,
2560 * by forcibly using a zonelist starting at a specified node, and by
2561 * (in get_page_from_freelist()) refusing to consider the zones for
2562 * any node on the zonelist except the first. By the time any such
2563 * calls get to this routine, we should just shut up and say 'yes'.
2564 *
2565 * Unlike the cpuset_node_allowed_softwall() variant, above,
2566 * this variant requires that the node be in the current task's
2567 * mems_allowed or that we're in interrupt. It does not scan up the
2568 * cpuset hierarchy for the nearest enclosing mem_exclusive cpuset.
2569 * It never sleeps.
2570 */
2571int __cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask)
2572{
2573 if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
2574 return 1;
2575 if (node_isset(node, current->mems_allowed))
2576 return 1;
2577 /*
2578 * Allow tasks that have access to memory reserves because they have
2579 * been OOM killed to get memory anywhere.
2580 */
2581 if (unlikely(test_thread_flag(TIF_MEMDIE)))
2582 return 1;
2583 return 0;
2584}
2585
2586/** 2538/**
2587 * cpuset_mem_spread_node() - On which node to begin search for a file page 2539 * cpuset_mem_spread_node() - On which node to begin search for a file page
2588 * cpuset_slab_spread_node() - On which node to begin search for a slab page 2540 * cpuset_slab_spread_node() - On which node to begin search for a slab page
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 30cd96879152..919b86a2164d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -582,7 +582,7 @@ retry_cpuset:
582 582
583 for_each_zone_zonelist_nodemask(zone, z, zonelist, 583 for_each_zone_zonelist_nodemask(zone, z, zonelist,
584 MAX_NR_ZONES - 1, nodemask) { 584 MAX_NR_ZONES - 1, nodemask) {
585 if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask(h))) { 585 if (cpuset_zone_allowed(zone, htlb_alloc_mask(h))) {
586 page = dequeue_huge_page_node(h, zone_to_nid(zone)); 586 page = dequeue_huge_page_node(h, zone_to_nid(zone));
587 if (page) { 587 if (page) {
588 if (avoid_reserve) 588 if (avoid_reserve)
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 3b014d326151..864bba992735 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -233,7 +233,7 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
233 /* Check this allocation failure is caused by cpuset's wall function */ 233 /* Check this allocation failure is caused by cpuset's wall function */
234 for_each_zone_zonelist_nodemask(zone, z, zonelist, 234 for_each_zone_zonelist_nodemask(zone, z, zonelist,
235 high_zoneidx, nodemask) 235 high_zoneidx, nodemask)
236 if (!cpuset_zone_allowed_softwall(zone, gfp_mask)) 236 if (!cpuset_zone_allowed(zone, gfp_mask))
237 cpuset_limited = true; 237 cpuset_limited = true;
238 238
239 if (cpuset_limited) { 239 if (cpuset_limited) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a7198c065999..df542feaac3b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1990,7 +1990,7 @@ zonelist_scan:
1990 1990
1991 /* 1991 /*
1992 * Scan zonelist, looking for a zone with enough free. 1992 * Scan zonelist, looking for a zone with enough free.
1993 * See also __cpuset_node_allowed_softwall() comment in kernel/cpuset.c. 1993 * See also __cpuset_node_allowed() comment in kernel/cpuset.c.
1994 */ 1994 */
1995 for_each_zone_zonelist_nodemask(zone, z, zonelist, 1995 for_each_zone_zonelist_nodemask(zone, z, zonelist,
1996 high_zoneidx, nodemask) { 1996 high_zoneidx, nodemask) {
@@ -2001,7 +2001,7 @@ zonelist_scan:
2001 continue; 2001 continue;
2002 if (cpusets_enabled() && 2002 if (cpusets_enabled() &&
2003 (alloc_flags & ALLOC_CPUSET) && 2003 (alloc_flags & ALLOC_CPUSET) &&
2004 !cpuset_zone_allowed_softwall(zone, gfp_mask)) 2004 !cpuset_zone_allowed(zone, gfp_mask))
2005 continue; 2005 continue;
2006 /* 2006 /*
2007 * Distribute pages in proportion to the individual 2007 * Distribute pages in proportion to the individual
@@ -2529,7 +2529,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
2529 alloc_flags |= ALLOC_HARDER; 2529 alloc_flags |= ALLOC_HARDER;
2530 /* 2530 /*
2531 * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the 2531 * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the
2532 * comment for __cpuset_node_allowed_softwall(). 2532 * comment for __cpuset_node_allowed().
2533 */ 2533 */
2534 alloc_flags &= ~ALLOC_CPUSET; 2534 alloc_flags &= ~ALLOC_CPUSET;
2535 } else if (unlikely(rt_task(current)) && !in_interrupt()) 2535 } else if (unlikely(rt_task(current)) && !in_interrupt())
diff --git a/mm/slab.c b/mm/slab.c
index 79e15f0a2a6e..fee275b5b6b7 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3015,7 +3015,7 @@ retry:
3015 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { 3015 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
3016 nid = zone_to_nid(zone); 3016 nid = zone_to_nid(zone);
3017 3017
3018 if (cpuset_zone_allowed_hardwall(zone, flags) && 3018 if (cpuset_zone_allowed(zone, flags | __GFP_HARDWALL) &&
3019 get_node(cache, nid) && 3019 get_node(cache, nid) &&
3020 get_node(cache, nid)->free_objects) { 3020 get_node(cache, nid)->free_objects) {
3021 obj = ____cache_alloc_node(cache, 3021 obj = ____cache_alloc_node(cache,
diff --git a/mm/slub.c b/mm/slub.c
index 386bbed76e94..765c5884d03d 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1665,7 +1665,8 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
1665 1665
1666 n = get_node(s, zone_to_nid(zone)); 1666 n = get_node(s, zone_to_nid(zone));
1667 1667
1668 if (n && cpuset_zone_allowed_hardwall(zone, flags) && 1668 if (n && cpuset_zone_allowed(zone,
1669 flags | __GFP_HARDWALL) &&
1669 n->nr_partial > s->min_partial) { 1670 n->nr_partial > s->min_partial) {
1670 object = get_partial_node(s, n, c, flags); 1671 object = get_partial_node(s, n, c, flags);
1671 if (object) { 1672 if (object) {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 4636d9e822c1..a384339bf718 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2405,7 +2405,8 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
2405 * to global LRU. 2405 * to global LRU.
2406 */ 2406 */
2407 if (global_reclaim(sc)) { 2407 if (global_reclaim(sc)) {
2408 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) 2408 if (!cpuset_zone_allowed(zone,
2409 GFP_KERNEL | __GFP_HARDWALL))
2409 continue; 2410 continue;
2410 2411
2411 lru_pages += zone_reclaimable_pages(zone); 2412 lru_pages += zone_reclaimable_pages(zone);
@@ -3388,7 +3389,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
3388 if (!populated_zone(zone)) 3389 if (!populated_zone(zone))
3389 return; 3390 return;
3390 3391
3391 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) 3392 if (!cpuset_zone_allowed(zone, GFP_KERNEL | __GFP_HARDWALL))
3392 return; 3393 return;
3393 pgdat = zone->zone_pgdat; 3394 pgdat = zone->zone_pgdat;
3394 if (pgdat->kswapd_max_order < order) { 3395 if (pgdat->kswapd_max_order < order) {