From aa6ec29bee8692ce232132f1a1ea2a1f9196610e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 9 Jul 2014 10:08:08 -0400 Subject: cgroup: remove sane_behavior support on non-default hierarchies sane_behavior has been used as a development vehicle for the default unified hierarchy. Now that the default hierarchy is in place, the flag became redundant and confusing as its usage is allowed on all hierarchies. There are gonna be either the default hierarchy or legacy ones. Let's make that clear by removing sane_behavior support on non-default hierarchies. This patch replaces cgroup_sane_behavior() with cgroup_on_dfl(). The comment on top of CGRP_ROOT_SANE_BEHAVIOR is moved to on top of cgroup_on_dfl() with sane_behavior specific part dropped. On the default and legacy hierarchies w/o sane_behavior, this shouldn't cause any behavior differences. Signed-off-by: Tejun Heo Acked-by: Vivek Goyal Acked-by: Li Zefan Cc: Johannes Weiner Cc: Michal Hocko --- kernel/cpuset.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index f6b33c696224..f9d4807c869f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1383,12 +1383,9 @@ static int cpuset_can_attach(struct cgroup_subsys_state *css, mutex_lock(&cpuset_mutex); - /* - * We allow to move tasks into an empty cpuset if sane_behavior - * flag is set. - */ + /* allow moving tasks into an empty cpuset if on default hierarchy */ ret = -ENOSPC; - if (!cgroup_sane_behavior(css->cgroup) && + if (!cgroup_on_dfl(css->cgroup) && (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))) goto out_unlock; @@ -2030,7 +2027,7 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs) static cpumask_t off_cpus; static nodemask_t off_mems; bool is_empty; - bool sane = cgroup_sane_behavior(cs->css.cgroup); + bool on_dfl = cgroup_on_dfl(cs->css.cgroup); retry: wait_event(cpuset_attach_wq, cs->attach_in_progress == 0); @@ -2054,12 +2051,12 @@ retry: mutex_unlock(&callback_mutex); /* - * If sane_behavior flag is set, we need to update tasks' cpumask - * for empty cpuset to take on ancestor's cpumask. Otherwise, don't - * call update_tasks_cpumask() if the cpuset becomes empty, as - * the tasks in it will be migrated to an ancestor. + * If on_dfl, we need to update tasks' cpumask for empty cpuset to + * take on ancestor's cpumask. Otherwise, don't call + * update_tasks_cpumask() if the cpuset becomes empty, as the tasks + * in it will be migrated to an ancestor. */ - if ((sane && cpumask_empty(cs->cpus_allowed)) || + if ((on_dfl && cpumask_empty(cs->cpus_allowed)) || (!cpumask_empty(&off_cpus) && !cpumask_empty(cs->cpus_allowed))) update_tasks_cpumask(cs); @@ -2068,12 +2065,12 @@ retry: mutex_unlock(&callback_mutex); /* - * If sane_behavior flag is set, we need to update tasks' nodemask - * for empty cpuset to take on ancestor's nodemask. Otherwise, don't - * call update_tasks_nodemask() if the cpuset becomes empty, as - * the tasks in it will be migratd to an ancestor. + * If on_dfl, we need to update tasks' nodemask for empty cpuset to + * take on ancestor's nodemask. Otherwise, don't call + * update_tasks_nodemask() if the cpuset becomes empty, as the + * tasks in it will be migratd to an ancestor. */ - if ((sane && nodes_empty(cs->mems_allowed)) || + if ((on_dfl && nodes_empty(cs->mems_allowed)) || (!nodes_empty(off_mems) && !nodes_empty(cs->mems_allowed))) update_tasks_nodemask(cs); @@ -2083,13 +2080,13 @@ retry: mutex_unlock(&cpuset_mutex); /* - * If sane_behavior flag is set, we'll keep tasks in empty cpusets. + * If on_dfl, we'll keep tasks in empty cpusets. * * Otherwise move tasks to the nearest ancestor with execution * resources. This is full cgroup operation which will * also call back into cpuset. Should be done outside any lock. */ - if (!sane && is_empty) + if (!on_dfl && is_empty) remove_tasks_in_empty_cpuset(cs); } -- cgit v1.2.2 From e2b9a3d7d8f4ab2f3491b8ed2ac6af692a2269b2 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 9 Jul 2014 16:47:03 +0800 Subject: cpuset: add cs->effective_cpus and cs->effective_mems We're going to have separate user-configured masks and effective ones. Eventually configured masks can only be changed by writing cpuset.cpus and cpuset.mems, and they won't be restricted by parent cpuset. While effective masks reflect cpu/memory hotplug and hierachical restriction, and these are the real masks that apply to the tasks in the cpuset. We calculate effective mask this way: - top cpuset's effective_mask == online_mask, otherwise - cpuset's effective_mask == configured_mask & parent effective_mask, if the result is empty, it inherits parent effective mask. Those behavior changes are for default hierarchy only. For legacy hierachy, effective_mask and configured_mask are the same, so we won't break old interfaces. This patch adds the effective masks to struct cpuset and initializes them. The effective masks of the top cpuset is the same with configured masks, and a child cpuset inherits its parent's effective masks. This won't introduce behavior change. v2: - s/real_{mems,cpus}_allowed/effective_{mems,cpus}, suggested by Tejun. - don't init effective masks in cpuset_css_online() if !cgroup_on_dfl. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 11 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index f9d4807c869f..ef0974c73b4b 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -76,8 +76,14 @@ struct cpuset { struct cgroup_subsys_state css; unsigned long flags; /* "unsigned long" so bitops work */ - cpumask_var_t cpus_allowed; /* CPUs allowed to tasks in cpuset */ - nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */ + + /* user-configured CPUs and Memory Nodes allow to tasks */ + cpumask_var_t cpus_allowed; + nodemask_t mems_allowed; + + /* effective CPUs and Memory Nodes allow to tasks */ + cpumask_var_t effective_cpus; + nodemask_t effective_mems; /* * This is old Memory Nodes tasks took on. @@ -376,13 +382,20 @@ static struct cpuset *alloc_trial_cpuset(struct cpuset *cs) if (!trial) return NULL; - if (!alloc_cpumask_var(&trial->cpus_allowed, GFP_KERNEL)) { - kfree(trial); - return NULL; - } - cpumask_copy(trial->cpus_allowed, cs->cpus_allowed); + if (!alloc_cpumask_var(&trial->cpus_allowed, GFP_KERNEL)) + goto free_cs; + if (!alloc_cpumask_var(&trial->effective_cpus, GFP_KERNEL)) + goto free_cpus; + cpumask_copy(trial->cpus_allowed, cs->cpus_allowed); + cpumask_copy(trial->effective_cpus, cs->effective_cpus); return trial; + +free_cpus: + free_cpumask_var(trial->cpus_allowed); +free_cs: + kfree(trial); + return NULL; } /** @@ -391,6 +404,7 @@ static struct cpuset *alloc_trial_cpuset(struct cpuset *cs) */ static void free_trial_cpuset(struct cpuset *trial) { + free_cpumask_var(trial->effective_cpus); free_cpumask_var(trial->cpus_allowed); kfree(trial); } @@ -1848,18 +1862,26 @@ cpuset_css_alloc(struct cgroup_subsys_state *parent_css) cs = kzalloc(sizeof(*cs), GFP_KERNEL); if (!cs) return ERR_PTR(-ENOMEM); - if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL)) { - kfree(cs); - return ERR_PTR(-ENOMEM); - } + if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL)) + goto free_cs; + if (!alloc_cpumask_var(&cs->effective_cpus, GFP_KERNEL)) + goto free_cpus; set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); cpumask_clear(cs->cpus_allowed); nodes_clear(cs->mems_allowed); + cpumask_clear(cs->effective_cpus); + nodes_clear(cs->effective_mems); fmeter_init(&cs->fmeter); cs->relax_domain_level = -1; return &cs->css; + +free_cpus: + free_cpumask_var(cs->cpus_allowed); +free_cs: + kfree(cs); + return ERR_PTR(-ENOMEM); } static int cpuset_css_online(struct cgroup_subsys_state *css) @@ -1882,6 +1904,13 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) cpuset_inc(); + mutex_lock(&callback_mutex); + if (cgroup_on_dfl(cs->css.cgroup)) { + cpumask_copy(cs->effective_cpus, parent->effective_cpus); + cs->effective_mems = parent->effective_mems; + } + mutex_unlock(&callback_mutex); + if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) goto out_unlock; @@ -1941,6 +1970,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css) { struct cpuset *cs = css_cs(css); + free_cpumask_var(cs->effective_cpus); free_cpumask_var(cs->cpus_allowed); kfree(cs); } @@ -1969,9 +1999,13 @@ int __init cpuset_init(void) if (!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL)) BUG(); + if (!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL)) + BUG(); cpumask_setall(top_cpuset.cpus_allowed); nodes_setall(top_cpuset.mems_allowed); + cpumask_setall(top_cpuset.effective_cpus); + nodes_setall(top_cpuset.effective_mems); fmeter_init(&top_cpuset.fmeter); set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags); @@ -2207,6 +2241,9 @@ void __init cpuset_init_smp(void) top_cpuset.mems_allowed = node_states[N_MEMORY]; top_cpuset.old_mems_allowed = top_cpuset.mems_allowed; + cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask); + top_cpuset.effective_mems = node_states[N_MEMORY]; + register_hotmemory_notifier(&cpuset_track_online_nodes_nb); } -- cgit v1.2.2 From 1344ab9c2991b45bacfd2e26a8800a62663ae427 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 9 Jul 2014 16:47:16 +0800 Subject: cpuset: update cpuset->effective_{cpus,mems} at hotplug We're going to have separate user-configured masks and effective ones. Eventually configured masks can only be changed by writing cpuset.cpus and cpuset.mems, and they won't be restricted by parent cpuset. While effective masks reflect cpu/memory hotplug and hierachical restriction, and these are the real masks that apply to the tasks in the cpuset. We calculate effective mask this way: - top cpuset's effective_mask == online_mask, otherwise - cpuset's effective_mask == configured_mask & parent effective_mask, if the result is empty, it inherits parent effective mask. Those behavior changes are for default hierarchy only. For legacy hierarchy, effective_mask and configured_mask are the same, so we won't break old interfaces. To make cs->effective_{cpus,mems} to be effective masks, we need to - update the effective masks at hotplug - update the effective masks at config change - take on ancestor's mask when the effective mask is empty The first item is done here. This won't introduce behavior change. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index ef0974c73b4b..94f651d2eee5 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2082,6 +2082,7 @@ retry: mutex_lock(&callback_mutex); cpumask_andnot(cs->cpus_allowed, cs->cpus_allowed, &off_cpus); + cpumask_andnot(cs->effective_cpus, cs->effective_cpus, &off_cpus); mutex_unlock(&callback_mutex); /* @@ -2096,6 +2097,7 @@ retry: mutex_lock(&callback_mutex); nodes_andnot(cs->mems_allowed, cs->mems_allowed, off_mems); + nodes_andnot(cs->effective_mems, cs->effective_mems, off_mems); mutex_unlock(&callback_mutex); /* @@ -2159,6 +2161,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) if (cpus_updated) { mutex_lock(&callback_mutex); cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); + cpumask_copy(top_cpuset.effective_cpus, &new_cpus); mutex_unlock(&callback_mutex); /* we don't mess with cpumasks of tasks in top_cpuset */ } @@ -2167,6 +2170,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) if (mems_updated) { mutex_lock(&callback_mutex); top_cpuset.mems_allowed = new_mems; + top_cpuset.effective_mems = new_mems; mutex_unlock(&callback_mutex); update_tasks_nodemask(&top_cpuset); } -- cgit v1.2.2 From 734d45130cb4f668fb33d182f6943523628582ef Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 9 Jul 2014 16:47:29 +0800 Subject: cpuset: update cs->effective_{cpus, mems} when config changes We're going to have separate user-configured masks and effective ones. Eventually configured masks can only be changed by writing cpuset.cpus and cpuset.mems, and they won't be restricted by parent cpuset. While effective masks reflect cpu/memory hotplug and hierachical restriction, and these are the real masks that apply to the tasks in the cpuset. We calculate effective mask this way: - top cpuset's effective_mask == online_mask, otherwise - cpuset's effective_mask == configured_mask & parent effective_mask, if the result is empty, it inherits parent effective mask. Those behavior changes are for default hierarchy only. For legacy hierarchy, effective_mask and configured_mask are the same, so we won't break old interfaces. To make cs->effective_{cpus,mems} to be effective masks, we need to - update the effective masks at hotplug - update the effective masks at config change - take on ancestor's mask when the effective mask is empty The second item is done here. We don't need to treat root_cs specially in update_cpumasks_hier(). This won't introduce behavior change. v3: - add a WARN_ON() to check if effective masks are the same with configured masks on legacy hierarchy. - pass trialcs->cpus_allowed to update_cpumasks_hier() and add a comment for it. Similar change for update_nodemasks_hier(). Suggested by Tejun. v2: - revise the comment in update_{cpu,node}masks_hier(), suggested by Tejun. - fix to use @cp instead of @cs in these two functions. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 88 +++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 54 insertions(+), 34 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 94f651d2eee5..da766c3736c4 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -855,36 +855,45 @@ static void update_tasks_cpumask(struct cpuset *cs) } /* - * update_tasks_cpumask_hier - Update the cpumasks of tasks in the hierarchy. - * @root_cs: the root cpuset of the hierarchy - * @update_root: update root cpuset or not? + * update_cpumasks_hier - Update effective cpumasks and tasks in the subtree + * @cs: the cpuset to consider + * @new_cpus: temp variable for calculating new effective_cpus + * + * When congifured cpumask is changed, the effective cpumasks of this cpuset + * and all its descendants need to be updated. * - * This will update cpumasks of tasks in @root_cs and all other empty cpusets - * which take on cpumask of @root_cs. + * On legacy hierachy, effective_cpus will be the same with cpu_allowed. * * Called with cpuset_mutex held */ -static void update_tasks_cpumask_hier(struct cpuset *root_cs, bool update_root) +static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) { struct cpuset *cp; struct cgroup_subsys_state *pos_css; rcu_read_lock(); - cpuset_for_each_descendant_pre(cp, pos_css, root_cs) { - if (cp == root_cs) { - if (!update_root) - continue; - } else { - /* skip the whole subtree if @cp have some CPU */ - if (!cpumask_empty(cp->cpus_allowed)) { - pos_css = css_rightmost_descendant(pos_css); - continue; - } + cpuset_for_each_descendant_pre(cp, pos_css, cs) { + struct cpuset *parent = parent_cs(cp); + + cpumask_and(new_cpus, cp->cpus_allowed, parent->effective_cpus); + + /* Skip the whole subtree if the cpumask remains the same. */ + if (cpumask_equal(new_cpus, cp->effective_cpus)) { + pos_css = css_rightmost_descendant(pos_css); + continue; } + if (!css_tryget_online(&cp->css)) continue; rcu_read_unlock(); + mutex_lock(&callback_mutex); + cpumask_copy(cp->effective_cpus, new_cpus); + mutex_unlock(&callback_mutex); + + WARN_ON(!cgroup_on_dfl(cp->css.cgroup) && + !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); + update_tasks_cpumask(cp); rcu_read_lock(); @@ -940,7 +949,8 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); mutex_unlock(&callback_mutex); - update_tasks_cpumask_hier(cs, true); + /* use trialcs->cpus_allowed as a temp variable */ + update_cpumasks_hier(cs, trialcs->cpus_allowed); if (is_load_balanced) rebuild_sched_domains_locked(); @@ -1091,36 +1101,45 @@ static void update_tasks_nodemask(struct cpuset *cs) } /* - * update_tasks_nodemask_hier - Update the nodemasks of tasks in the hierarchy. - * @cs: the root cpuset of the hierarchy - * @update_root: update the root cpuset or not? + * update_nodemasks_hier - Update effective nodemasks and tasks in the subtree + * @cs: the cpuset to consider + * @new_mems: a temp variable for calculating new effective_mems + * + * When configured nodemask is changed, the effective nodemasks of this cpuset + * and all its descendants need to be updated. * - * This will update nodemasks of tasks in @root_cs and all other empty cpusets - * which take on nodemask of @root_cs. + * On legacy hiearchy, effective_mems will be the same with mems_allowed. * * Called with cpuset_mutex held */ -static void update_tasks_nodemask_hier(struct cpuset *root_cs, bool update_root) +static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) { struct cpuset *cp; struct cgroup_subsys_state *pos_css; rcu_read_lock(); - cpuset_for_each_descendant_pre(cp, pos_css, root_cs) { - if (cp == root_cs) { - if (!update_root) - continue; - } else { - /* skip the whole subtree if @cp have some CPU */ - if (!nodes_empty(cp->mems_allowed)) { - pos_css = css_rightmost_descendant(pos_css); - continue; - } + cpuset_for_each_descendant_pre(cp, pos_css, cs) { + struct cpuset *parent = parent_cs(cp); + + nodes_and(*new_mems, cp->mems_allowed, parent->effective_mems); + + /* Skip the whole subtree if the nodemask remains the same. */ + if (nodes_equal(*new_mems, cp->effective_mems)) { + pos_css = css_rightmost_descendant(pos_css); + continue; } + if (!css_tryget_online(&cp->css)) continue; rcu_read_unlock(); + mutex_lock(&callback_mutex); + cp->effective_mems = *new_mems; + mutex_unlock(&callback_mutex); + + WARN_ON(!cgroup_on_dfl(cp->css.cgroup) && + nodes_equal(cp->mems_allowed, cp->effective_mems)); + update_tasks_nodemask(cp); rcu_read_lock(); @@ -1188,7 +1207,8 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, cs->mems_allowed = trialcs->mems_allowed; mutex_unlock(&callback_mutex); - update_tasks_nodemask_hier(cs, true); + /* use trialcs->mems_allowed as a temp variable */ + update_nodemasks_hier(cs, &cs->mems_allowed); done: return retval; } -- cgit v1.2.2 From 554b0d1c845e42ef01d7f6f5f24b3e4c6129ce8f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 9 Jul 2014 16:47:41 +0800 Subject: cpuset: inherit ancestor's masks if effective_{cpus, mems} becomes empty We're going to have separate user-configured masks and effective ones. Eventually configured masks can only be changed by writing cpuset.cpus and cpuset.mems, and they won't be restricted by parent cpuset. While effective masks reflect cpu/memory hotplug and hierachical restriction, and these are the real masks that apply to the tasks in the cpuset. We calculate effective mask this way: - top cpuset's effective_mask == online_mask, otherwise - cpuset's effective_mask == configured_mask & parent effective_mask, if the result is empty, it inherits parent effective mask. Those behavior changes are for default hierarchy only. For legacy hierarchy, effective_mask and configured_mask are the same, so we won't break old interfaces. To make cs->effective_{cpus,mems} to be effective masks, we need to - update the effective masks at hotplug - update the effective masks at config change - take on ancestor's mask when the effective mask is empty The last item is done here. This won't introduce behavior change. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index da766c3736c4..f8340026d01c 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -877,6 +877,13 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) cpumask_and(new_cpus, cp->cpus_allowed, parent->effective_cpus); + /* + * If it becomes empty, inherit the effective mask of the + * parent, which is guaranteed to have some CPUs. + */ + if (cpumask_empty(new_cpus)) + cpumask_copy(new_cpus, parent->effective_cpus); + /* Skip the whole subtree if the cpumask remains the same. */ if (cpumask_equal(new_cpus, cp->effective_cpus)) { pos_css = css_rightmost_descendant(pos_css); @@ -1123,6 +1130,13 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) nodes_and(*new_mems, cp->mems_allowed, parent->effective_mems); + /* + * If it becomes empty, inherit the effective mask of the + * parent, which is guaranteed to have some MEMs. + */ + if (nodes_empty(*new_mems)) + *new_mems = parent->effective_mems; + /* Skip the whole subtree if the nodemask remains the same. */ if (nodes_equal(*new_mems, cp->effective_mems)) { pos_css = css_rightmost_descendant(pos_css); @@ -2102,7 +2116,11 @@ retry: mutex_lock(&callback_mutex); cpumask_andnot(cs->cpus_allowed, cs->cpus_allowed, &off_cpus); + + /* Inherit the effective mask of the parent, if it becomes empty. */ cpumask_andnot(cs->effective_cpus, cs->effective_cpus, &off_cpus); + if (on_dfl && cpumask_empty(cs->effective_cpus)) + cpumask_copy(cs->effective_cpus, parent_cs(cs)->effective_cpus); mutex_unlock(&callback_mutex); /* @@ -2117,7 +2135,11 @@ retry: mutex_lock(&callback_mutex); nodes_andnot(cs->mems_allowed, cs->mems_allowed, off_mems); + + /* Inherit the effective mask of the parent, if it becomes empty */ nodes_andnot(cs->effective_mems, cs->effective_mems, off_mems); + if (on_dfl && nodes_empty(cs->effective_mems)) + cs->effective_mems = parent_cs(cs)->effective_mems; mutex_unlock(&callback_mutex); /* -- cgit v1.2.2 From 8b5f1c52dcd1accd3a940cfcb148bef6de589524 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 9 Jul 2014 16:47:50 +0800 Subject: cpuset: use effective cpumask to build sched domains We're going to have separate user-configured masks and effective ones. Eventually configured masks can only be changed by writing cpuset.cpus and cpuset.mems, and they won't be restricted by parent cpuset. While effective masks reflect cpu/memory hotplug and hierachical restriction, and these are the real masks that apply to the tasks in the cpuset. We calculate effective mask this way: - top cpuset's effective_mask == online_mask, otherwise - cpuset's effective_mask == configured_mask & parent effective_mask, if the result is empty, it inherits parent effective mask. Those behavior changes are for default hierarchy only. For legacy hierarchy, effective_mask and configured_mask are the same, so we won't break old interfaces. We should partition sched domains according to effective_cpus, which is the real cpulist that takes effects on tasks in the cpuset. This won't introduce behavior change. v2: - Add a comment for the call of rebuild_sched_domains(), suggested by Tejun. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index f8340026d01c..60577ccdbfc7 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -494,11 +494,11 @@ out: #ifdef CONFIG_SMP /* * Helper routine for generate_sched_domains(). - * Do cpusets a, b have overlapping cpus_allowed masks? + * Do cpusets a, b have overlapping effective cpus_allowed masks? */ static int cpusets_overlap(struct cpuset *a, struct cpuset *b) { - return cpumask_intersects(a->cpus_allowed, b->cpus_allowed); + return cpumask_intersects(a->effective_cpus, b->effective_cpus); } static void @@ -615,7 +615,7 @@ static int generate_sched_domains(cpumask_var_t **domains, *dattr = SD_ATTR_INIT; update_domain_attr_tree(dattr, &top_cpuset); } - cpumask_copy(doms[0], top_cpuset.cpus_allowed); + cpumask_copy(doms[0], top_cpuset.effective_cpus); goto done; } @@ -719,7 +719,7 @@ restart: struct cpuset *b = csa[j]; if (apn == b->pn) { - cpumask_or(dp, dp, b->cpus_allowed); + cpumask_or(dp, dp, b->effective_cpus); if (dattr) update_domain_attr_tree(dattr + nslot, b); @@ -771,7 +771,7 @@ static void rebuild_sched_domains_locked(void) * passing doms with offlined cpu to partition_sched_domains(). * Anyways, hotplug work item will rebuild sched domains. */ - if (!cpumask_equal(top_cpuset.cpus_allowed, cpu_active_mask)) + if (!cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask)) goto out; /* Generate domain masks and attrs */ @@ -870,6 +870,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) { struct cpuset *cp; struct cgroup_subsys_state *pos_css; + bool need_rebuild_sched_domains = false; rcu_read_lock(); cpuset_for_each_descendant_pre(cp, pos_css, cs) { @@ -903,10 +904,21 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) update_tasks_cpumask(cp); + /* + * If the effective cpumask of any non-empty cpuset is changed, + * we need to rebuild sched domains. + */ + if (!cpumask_empty(cp->cpus_allowed) && + is_sched_load_balance(cp)) + need_rebuild_sched_domains = true; + rcu_read_lock(); css_put(&cp->css); } rcu_read_unlock(); + + if (need_rebuild_sched_domains) + rebuild_sched_domains_locked(); } /** @@ -919,7 +931,6 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, const char *buf) { int retval; - int is_load_balanced; /* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */ if (cs == &top_cpuset) @@ -950,17 +961,12 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, if (retval < 0) return retval; - is_load_balanced = is_sched_load_balance(trialcs); - mutex_lock(&callback_mutex); cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); mutex_unlock(&callback_mutex); /* use trialcs->cpus_allowed as a temp variable */ update_cpumasks_hier(cs, trialcs->cpus_allowed); - - if (is_load_balanced) - rebuild_sched_domains_locked(); return 0; } -- cgit v1.2.2 From 39bd0d15eca5af15ee1492964f317ecdb024a9d6 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 9 Jul 2014 16:48:01 +0800 Subject: cpuset: initialize top_cpuset's configured masks at mount We now have to support different behaviors for default hierachy and legacy hiearchy, top_cpuset's configured masks need to be initialized accordingly. Suppose we've offlined cpu1. On default hierarchy: # mount -t cgroup -o __DEVEL__sane_behavior xxx /cpuset # cat /cpuset/cpuset.cpus 0-15 On legacy hierarchy: # mount -t cgroup xxx /cpuset # cat /cpuset/cpuset.cpus 0,2-15 Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 60577ccdbfc7..e4c31e6b8716 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2015,16 +2015,35 @@ static void cpuset_css_free(struct cgroup_subsys_state *css) kfree(cs); } +static void cpuset_bind(struct cgroup_subsys_state *root_css) +{ + mutex_lock(&cpuset_mutex); + mutex_lock(&callback_mutex); + + if (cgroup_on_dfl(root_css->cgroup)) { + cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); + top_cpuset.mems_allowed = node_possible_map; + } else { + cpumask_copy(top_cpuset.cpus_allowed, + top_cpuset.effective_cpus); + top_cpuset.mems_allowed = top_cpuset.effective_mems; + } + + mutex_unlock(&callback_mutex); + mutex_unlock(&cpuset_mutex); +} + struct cgroup_subsys cpuset_cgrp_subsys = { - .css_alloc = cpuset_css_alloc, - .css_online = cpuset_css_online, - .css_offline = cpuset_css_offline, - .css_free = cpuset_css_free, - .can_attach = cpuset_can_attach, - .cancel_attach = cpuset_cancel_attach, - .attach = cpuset_attach, - .base_cftypes = files, - .early_init = 1, + .css_alloc = cpuset_css_alloc, + .css_online = cpuset_css_online, + .css_offline = cpuset_css_offline, + .css_free = cpuset_css_free, + .can_attach = cpuset_can_attach, + .cancel_attach = cpuset_cancel_attach, + .attach = cpuset_attach, + .bind = cpuset_bind, + .base_cftypes = files, + .early_init = 1, }; /** -- cgit v1.2.2 From ae1c802382f7af60aa54879fb4f5920a9df1ff48 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 9 Jul 2014 16:48:32 +0800 Subject: cpuset: apply cs->effective_{cpus,mems} Now we can use cs->effective_{cpus,mems} as effective masks. It's used whenever: - we update tasks' cpus_allowed/mems_allowed, - we want to retrieve tasks_cs(tsk)'s cpus_allowed/mems_allowed. They actually replace effective_{cpu,node}mask_cpuset(). effective_mask == configured_mask & parent effective_mask except when the reault is empty, in which case it inherits parent effective_mask. The result equals the mask computed from effective_{cpu,node}mask_cpuset(). This won't affect the original legacy hierarchy, because in this case we make sure the effective masks are always the same with user-configured masks. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 83 ++++++++++----------------------------------------------- 1 file changed, 14 insertions(+), 69 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index e4c31e6b8716..820870a715f8 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -313,9 +313,9 @@ static struct file_system_type cpuset_fs_type = { */ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) { - while (!cpumask_intersects(cs->cpus_allowed, cpu_online_mask)) + while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) cs = parent_cs(cs); - cpumask_and(pmask, cs->cpus_allowed, cpu_online_mask); + cpumask_and(pmask, cs->effective_cpus, cpu_online_mask); } /* @@ -331,9 +331,9 @@ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) */ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) { - while (!nodes_intersects(cs->mems_allowed, node_states[N_MEMORY])) + while (!nodes_intersects(cs->effective_mems, node_states[N_MEMORY])) cs = parent_cs(cs); - nodes_and(*pmask, cs->mems_allowed, node_states[N_MEMORY]); + nodes_and(*pmask, cs->effective_mems, node_states[N_MEMORY]); } /* @@ -795,45 +795,6 @@ void rebuild_sched_domains(void) mutex_unlock(&cpuset_mutex); } -/* - * effective_cpumask_cpuset - return nearest ancestor with non-empty cpus - * @cs: the cpuset in interest - * - * A cpuset's effective cpumask is the cpumask of the nearest ancestor - * with non-empty cpus. We use effective cpumask whenever: - * - we update tasks' cpus_allowed. (they take on the ancestor's cpumask - * if the cpuset they reside in has no cpus) - * - we want to retrieve task_cs(tsk)'s cpus_allowed. - * - * Called with cpuset_mutex held. cpuset_cpus_allowed_fallback() is an - * exception. See comments there. - */ -static struct cpuset *effective_cpumask_cpuset(struct cpuset *cs) -{ - while (cpumask_empty(cs->cpus_allowed)) - cs = parent_cs(cs); - return cs; -} - -/* - * effective_nodemask_cpuset - return nearest ancestor with non-empty mems - * @cs: the cpuset in interest - * - * A cpuset's effective nodemask is the nodemask of the nearest ancestor - * with non-empty memss. We use effective nodemask whenever: - * - we update tasks' mems_allowed. (they take on the ancestor's nodemask - * if the cpuset they reside in has no mems) - * - we want to retrieve task_cs(tsk)'s mems_allowed. - * - * Called with cpuset_mutex held. - */ -static struct cpuset *effective_nodemask_cpuset(struct cpuset *cs) -{ - while (nodes_empty(cs->mems_allowed)) - cs = parent_cs(cs); - return cs; -} - /** * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset. * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed @@ -844,13 +805,12 @@ static struct cpuset *effective_nodemask_cpuset(struct cpuset *cs) */ static void update_tasks_cpumask(struct cpuset *cs) { - struct cpuset *cpus_cs = effective_cpumask_cpuset(cs); struct css_task_iter it; struct task_struct *task; css_task_iter_start(&cs->css, &it); while ((task = css_task_iter_next(&it))) - set_cpus_allowed_ptr(task, cpus_cs->cpus_allowed); + set_cpus_allowed_ptr(task, cs->effective_cpus); css_task_iter_end(&it); } @@ -988,15 +948,13 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, const nodemask_t *to) { struct task_struct *tsk = current; - struct cpuset *mems_cs; tsk->mems_allowed = *to; do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL); rcu_read_lock(); - mems_cs = effective_nodemask_cpuset(task_cs(tsk)); - guarantee_online_mems(mems_cs, &tsk->mems_allowed); + guarantee_online_mems(task_cs(tsk), &tsk->mems_allowed); rcu_read_unlock(); } @@ -1065,13 +1023,12 @@ static void *cpuset_being_rebound; static void update_tasks_nodemask(struct cpuset *cs) { static nodemask_t newmems; /* protected by cpuset_mutex */ - struct cpuset *mems_cs = effective_nodemask_cpuset(cs); struct css_task_iter it; struct task_struct *task; cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ - guarantee_online_mems(mems_cs, &newmems); + guarantee_online_mems(cs, &newmems); /* * The mpol_rebind_mm() call takes mmap_sem, which we couldn't @@ -1497,8 +1454,6 @@ static void cpuset_attach(struct cgroup_subsys_state *css, struct task_struct *leader = cgroup_taskset_first(tset); struct cpuset *cs = css_cs(css); struct cpuset *oldcs = cpuset_attach_old_cs; - struct cpuset *cpus_cs = effective_cpumask_cpuset(cs); - struct cpuset *mems_cs = effective_nodemask_cpuset(cs); mutex_lock(&cpuset_mutex); @@ -1506,9 +1461,9 @@ static void cpuset_attach(struct cgroup_subsys_state *css, if (cs == &top_cpuset) cpumask_copy(cpus_attach, cpu_possible_mask); else - guarantee_online_cpus(cpus_cs, cpus_attach); + guarantee_online_cpus(cs, cpus_attach); - guarantee_online_mems(mems_cs, &cpuset_attach_nodemask_to); + guarantee_online_mems(cs, &cpuset_attach_nodemask_to); cgroup_taskset_for_each(task, tset) { /* @@ -1525,11 +1480,9 @@ static void cpuset_attach(struct cgroup_subsys_state *css, * Change mm, possibly for multiple threads in a threadgroup. This is * expensive and may sleep. */ - cpuset_attach_nodemask_to = cs->mems_allowed; + cpuset_attach_nodemask_to = cs->effective_mems; mm = get_task_mm(leader); if (mm) { - struct cpuset *mems_oldcs = effective_nodemask_cpuset(oldcs); - mpol_rebind_mm(mm, &cpuset_attach_nodemask_to); /* @@ -1540,7 +1493,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css, * mm from. */ if (is_memory_migrate(cs)) { - cpuset_migrate_mm(mm, &mems_oldcs->old_mems_allowed, + cpuset_migrate_mm(mm, &oldcs->old_mems_allowed, &cpuset_attach_nodemask_to); } mmput(mm); @@ -2331,23 +2284,17 @@ void __init cpuset_init_smp(void) void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) { - struct cpuset *cpus_cs; - mutex_lock(&callback_mutex); rcu_read_lock(); - cpus_cs = effective_cpumask_cpuset(task_cs(tsk)); - guarantee_online_cpus(cpus_cs, pmask); + guarantee_online_cpus(task_cs(tsk), pmask); rcu_read_unlock(); mutex_unlock(&callback_mutex); } void cpuset_cpus_allowed_fallback(struct task_struct *tsk) { - struct cpuset *cpus_cs; - rcu_read_lock(); - cpus_cs = effective_cpumask_cpuset(task_cs(tsk)); - do_set_cpus_allowed(tsk, cpus_cs->cpus_allowed); + do_set_cpus_allowed(tsk, task_cs(tsk)->effective_cpus); rcu_read_unlock(); /* @@ -2386,13 +2333,11 @@ void cpuset_init_current_mems_allowed(void) nodemask_t cpuset_mems_allowed(struct task_struct *tsk) { - struct cpuset *mems_cs; nodemask_t mask; mutex_lock(&callback_mutex); rcu_read_lock(); - mems_cs = effective_nodemask_cpuset(task_cs(tsk)); - guarantee_online_mems(mems_cs, &mask); + guarantee_online_mems(task_cs(tsk), &mask); rcu_read_unlock(); mutex_unlock(&callback_mutex); -- cgit v1.2.2 From 7e88291beefbb758fa3b27e500ee2e0c888d6e44 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 9 Jul 2014 16:48:42 +0800 Subject: cpuset: make cs->{cpus, mems}_allowed as user-configured masks Now we've used effective cpumasks to enforce hierarchical manner, we can use cs->{cpus,mems}_allowed as configured masks. Configured masks can be changed by writing cpuset.cpus and cpuset.mems only. The new behaviors are: - They won't be changed by hotplug anymore. - They won't be limited by its parent's masks. This ia a behavior change, but won't take effect unless mount with sane_behavior. v2: - Add comments to explain the differences between configured masks and effective masks. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 820870a715f8..4b409d2ecbb9 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -77,6 +77,26 @@ struct cpuset { unsigned long flags; /* "unsigned long" so bitops work */ + /* + * On default hierarchy: + * + * The user-configured masks can only be changed by writing to + * cpuset.cpus and cpuset.mems, and won't be limited by the + * parent masks. + * + * The effective masks is the real masks that apply to the tasks + * in the cpuset. They may be changed if the configured masks are + * changed or hotplug happens. + * + * effective_mask == configured_mask & parent's effective_mask, + * and if it ends up empty, it will inherit the parent's mask. + * + * + * On legacy hierachy: + * + * The user-configured masks are always the same with effective masks. + */ + /* user-configured CPUs and Memory Nodes allow to tasks */ cpumask_var_t cpus_allowed; nodemask_t mems_allowed; @@ -450,9 +470,9 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial) par = parent_cs(cur); - /* We must be a subset of our parent cpuset */ + /* On legacy hiearchy, we must be a subset of our parent cpuset. */ ret = -EACCES; - if (!is_cpuset_subset(trial, par)) + if (!cgroup_on_dfl(cur->css.cgroup) && !is_cpuset_subset(trial, par)) goto out; /* @@ -2167,6 +2187,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) static cpumask_t new_cpus; static nodemask_t new_mems; bool cpus_updated, mems_updated; + bool on_dfl = cgroup_on_dfl(top_cpuset.css.cgroup); mutex_lock(&cpuset_mutex); @@ -2174,13 +2195,14 @@ static void cpuset_hotplug_workfn(struct work_struct *work) cpumask_copy(&new_cpus, cpu_active_mask); new_mems = node_states[N_MEMORY]; - cpus_updated = !cpumask_equal(top_cpuset.cpus_allowed, &new_cpus); - mems_updated = !nodes_equal(top_cpuset.mems_allowed, new_mems); + cpus_updated = !cpumask_equal(top_cpuset.effective_cpus, &new_cpus); + mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems); /* synchronize cpus_allowed to cpu_active_mask */ if (cpus_updated) { mutex_lock(&callback_mutex); - cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); + if (!on_dfl) + cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); cpumask_copy(top_cpuset.effective_cpus, &new_cpus); mutex_unlock(&callback_mutex); /* we don't mess with cpumasks of tasks in top_cpuset */ @@ -2189,7 +2211,8 @@ static void cpuset_hotplug_workfn(struct work_struct *work) /* synchronize mems_allowed to N_MEMORY */ if (mems_updated) { mutex_lock(&callback_mutex); - top_cpuset.mems_allowed = new_mems; + if (!on_dfl) + top_cpuset.mems_allowed = new_mems; top_cpuset.effective_mems = new_mems; mutex_unlock(&callback_mutex); update_tasks_nodemask(&top_cpuset); -- cgit v1.2.2 From 390a36aadf39e241c83035469aae48ed1a144088 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 9 Jul 2014 16:48:54 +0800 Subject: cpuset: refactor cpuset_hotplug_update_tasks() We mix the handling for both default hierarchy and legacy hierarchy in the same function, and it's quite messy, so split into two functions. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 121 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 66 insertions(+), 55 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 4b409d2ecbb9..41822e2027c1 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2080,6 +2080,65 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs) } } +static void hotplug_update_tasks_legacy(struct cpuset *cs, + struct cpumask *off_cpus, + nodemask_t *off_mems) +{ + bool is_empty; + + mutex_lock(&callback_mutex); + cpumask_andnot(cs->cpus_allowed, cs->cpus_allowed, off_cpus); + cpumask_andnot(cs->effective_cpus, cs->effective_cpus, off_cpus); + nodes_andnot(cs->mems_allowed, cs->mems_allowed, *off_mems); + nodes_andnot(cs->effective_mems, cs->effective_mems, *off_mems); + mutex_unlock(&callback_mutex); + + /* + * Don't call update_tasks_cpumask() if the cpuset becomes empty, + * as the tasks will be migratecd to an ancestor. + */ + if (!cpumask_empty(off_cpus) && !cpumask_empty(cs->cpus_allowed)) + update_tasks_cpumask(cs); + if (!nodes_empty(*off_mems) && !nodes_empty(cs->mems_allowed)) + update_tasks_nodemask(cs); + + is_empty = cpumask_empty(cs->cpus_allowed) || + nodes_empty(cs->mems_allowed); + + mutex_unlock(&cpuset_mutex); + + /* + * Move tasks to the nearest ancestor with execution resources, + * This is full cgroup operation which will also call back into + * cpuset. Should be done outside any lock. + */ + if (is_empty) + remove_tasks_in_empty_cpuset(cs); + + mutex_lock(&cpuset_mutex); +} + +static void hotplug_update_tasks(struct cpuset *cs, + struct cpumask *off_cpus, + nodemask_t *off_mems) +{ + mutex_lock(&callback_mutex); + cpumask_andnot(cs->effective_cpus, cs->effective_cpus, off_cpus); + if (cpumask_empty(cs->effective_cpus)) + cpumask_copy(cs->effective_cpus, + parent_cs(cs)->effective_cpus); + + nodes_andnot(cs->effective_mems, cs->effective_mems, *off_mems); + if (nodes_empty(cs->effective_mems)) + cs->effective_mems = parent_cs(cs)->effective_mems; + mutex_unlock(&callback_mutex); + + if (!cpumask_empty(off_cpus)) + update_tasks_cpumask(cs); + if (!nodes_empty(*off_mems)) + update_tasks_nodemask(cs); +} + /** * cpuset_hotplug_update_tasks - update tasks in a cpuset for hotunplug * @cs: cpuset in interest @@ -2092,9 +2151,6 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs) { static cpumask_t off_cpus; static nodemask_t off_mems; - bool is_empty; - bool on_dfl = cgroup_on_dfl(cs->css.cgroup); - retry: wait_event(cpuset_attach_wq, cs->attach_in_progress == 0); @@ -2109,61 +2165,16 @@ retry: goto retry; } - cpumask_andnot(&off_cpus, cs->cpus_allowed, top_cpuset.cpus_allowed); - nodes_andnot(off_mems, cs->mems_allowed, top_cpuset.mems_allowed); - - mutex_lock(&callback_mutex); - cpumask_andnot(cs->cpus_allowed, cs->cpus_allowed, &off_cpus); - - /* Inherit the effective mask of the parent, if it becomes empty. */ - cpumask_andnot(cs->effective_cpus, cs->effective_cpus, &off_cpus); - if (on_dfl && cpumask_empty(cs->effective_cpus)) - cpumask_copy(cs->effective_cpus, parent_cs(cs)->effective_cpus); - mutex_unlock(&callback_mutex); - - /* - * If on_dfl, we need to update tasks' cpumask for empty cpuset to - * take on ancestor's cpumask. Otherwise, don't call - * update_tasks_cpumask() if the cpuset becomes empty, as the tasks - * in it will be migrated to an ancestor. - */ - if ((on_dfl && cpumask_empty(cs->cpus_allowed)) || - (!cpumask_empty(&off_cpus) && !cpumask_empty(cs->cpus_allowed))) - update_tasks_cpumask(cs); - - mutex_lock(&callback_mutex); - nodes_andnot(cs->mems_allowed, cs->mems_allowed, off_mems); + cpumask_andnot(&off_cpus, cs->effective_cpus, + top_cpuset.effective_cpus); + nodes_andnot(off_mems, cs->effective_mems, top_cpuset.effective_mems); - /* Inherit the effective mask of the parent, if it becomes empty */ - nodes_andnot(cs->effective_mems, cs->effective_mems, off_mems); - if (on_dfl && nodes_empty(cs->effective_mems)) - cs->effective_mems = parent_cs(cs)->effective_mems; - mutex_unlock(&callback_mutex); - - /* - * If on_dfl, we need to update tasks' nodemask for empty cpuset to - * take on ancestor's nodemask. Otherwise, don't call - * update_tasks_nodemask() if the cpuset becomes empty, as the - * tasks in it will be migratd to an ancestor. - */ - if ((on_dfl && nodes_empty(cs->mems_allowed)) || - (!nodes_empty(off_mems) && !nodes_empty(cs->mems_allowed))) - update_tasks_nodemask(cs); - - is_empty = cpumask_empty(cs->cpus_allowed) || - nodes_empty(cs->mems_allowed); + if (cgroup_on_dfl(cs->css.cgroup)) + hotplug_update_tasks(cs, &off_cpus, &off_mems); + else + hotplug_update_tasks_legacy(cs, &off_cpus, &off_mems); mutex_unlock(&cpuset_mutex); - - /* - * If on_dfl, we'll keep tasks in empty cpusets. - * - * Otherwise move tasks to the nearest ancestor with execution - * resources. This is full cgroup operation which will - * also call back into cpuset. Should be done outside any lock. - */ - if (!on_dfl && is_empty) - remove_tasks_in_empty_cpuset(cs); } /** -- cgit v1.2.2 From be4c9dd7aee5ecf3e748da68c27b38bdca70d444 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 9 Jul 2014 16:49:04 +0800 Subject: cpuset: enable onlined cpu/node in effective masks Firstly offline cpu1: # echo 0-1 > cpuset.cpus # echo 0 > /sys/devices/system/cpu/cpu1/online # cat cpuset.cpus 0-1 # cat cpuset.effective_cpus 0 Then online it: # echo 1 > /sys/devices/system/cpu/cpu1/online # cat cpuset.cpus 0-1 # cat cpuset.effective_cpus 0-1 And cpuset will bring it back to the effective mask. The implementation is quite straightforward. Instead of calculating the offlined cpus/mems and do updates, we just set the new effective_mask to online_mask & congifured_mask. This is a behavior change for default hierarchy, so legacy hierarchy won't be affected. v2: - make refactoring of cpuset_hotplug_update_tasks() as seperate patch, suggested by Tejun. - make hotplug_update_tasks_insane() use @new_cpus and @new_mems as hotplug_update_tasks_sane() does. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 65 ++++++++++++++++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 29 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 41822e2027c1..c47cb940712e 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2080,26 +2080,27 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs) } } -static void hotplug_update_tasks_legacy(struct cpuset *cs, - struct cpumask *off_cpus, - nodemask_t *off_mems) +static void +hotplug_update_tasks_legacy(struct cpuset *cs, + struct cpumask *new_cpus, nodemask_t *new_mems, + bool cpus_updated, bool mems_updated) { bool is_empty; mutex_lock(&callback_mutex); - cpumask_andnot(cs->cpus_allowed, cs->cpus_allowed, off_cpus); - cpumask_andnot(cs->effective_cpus, cs->effective_cpus, off_cpus); - nodes_andnot(cs->mems_allowed, cs->mems_allowed, *off_mems); - nodes_andnot(cs->effective_mems, cs->effective_mems, *off_mems); + cpumask_copy(cs->cpus_allowed, new_cpus); + cpumask_copy(cs->effective_cpus, new_cpus); + cs->mems_allowed = *new_mems; + cs->effective_mems = *new_mems; mutex_unlock(&callback_mutex); /* * Don't call update_tasks_cpumask() if the cpuset becomes empty, * as the tasks will be migratecd to an ancestor. */ - if (!cpumask_empty(off_cpus) && !cpumask_empty(cs->cpus_allowed)) + if (cpus_updated && !cpumask_empty(cs->cpus_allowed)) update_tasks_cpumask(cs); - if (!nodes_empty(*off_mems) && !nodes_empty(cs->mems_allowed)) + if (mems_updated && !nodes_empty(cs->mems_allowed)) update_tasks_nodemask(cs); is_empty = cpumask_empty(cs->cpus_allowed) || @@ -2118,24 +2119,24 @@ static void hotplug_update_tasks_legacy(struct cpuset *cs, mutex_lock(&cpuset_mutex); } -static void hotplug_update_tasks(struct cpuset *cs, - struct cpumask *off_cpus, - nodemask_t *off_mems) +static void +hotplug_update_tasks(struct cpuset *cs, + struct cpumask *new_cpus, nodemask_t *new_mems, + bool cpus_updated, bool mems_updated) { + if (cpumask_empty(new_cpus)) + cpumask_copy(new_cpus, parent_cs(cs)->effective_cpus); + if (nodes_empty(*new_mems)) + *new_mems = parent_cs(cs)->effective_mems; + mutex_lock(&callback_mutex); - cpumask_andnot(cs->effective_cpus, cs->effective_cpus, off_cpus); - if (cpumask_empty(cs->effective_cpus)) - cpumask_copy(cs->effective_cpus, - parent_cs(cs)->effective_cpus); - - nodes_andnot(cs->effective_mems, cs->effective_mems, *off_mems); - if (nodes_empty(cs->effective_mems)) - cs->effective_mems = parent_cs(cs)->effective_mems; + cpumask_copy(cs->effective_cpus, new_cpus); + cs->effective_mems = *new_mems; mutex_unlock(&callback_mutex); - if (!cpumask_empty(off_cpus)) + if (cpus_updated) update_tasks_cpumask(cs); - if (!nodes_empty(*off_mems)) + if (mems_updated) update_tasks_nodemask(cs); } @@ -2149,8 +2150,10 @@ static void hotplug_update_tasks(struct cpuset *cs, */ static void cpuset_hotplug_update_tasks(struct cpuset *cs) { - static cpumask_t off_cpus; - static nodemask_t off_mems; + static cpumask_t new_cpus; + static nodemask_t new_mems; + bool cpus_updated; + bool mems_updated; retry: wait_event(cpuset_attach_wq, cs->attach_in_progress == 0); @@ -2165,14 +2168,18 @@ retry: goto retry; } - cpumask_andnot(&off_cpus, cs->effective_cpus, - top_cpuset.effective_cpus); - nodes_andnot(off_mems, cs->effective_mems, top_cpuset.effective_mems); + cpumask_and(&new_cpus, cs->cpus_allowed, parent_cs(cs)->effective_cpus); + nodes_and(new_mems, cs->mems_allowed, parent_cs(cs)->effective_mems); + + cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus); + mems_updated = !nodes_equal(new_mems, cs->effective_mems); if (cgroup_on_dfl(cs->css.cgroup)) - hotplug_update_tasks(cs, &off_cpus, &off_mems); + hotplug_update_tasks(cs, &new_cpus, &new_mems, + cpus_updated, mems_updated); else - hotplug_update_tasks_legacy(cs, &off_cpus, &off_mems); + hotplug_update_tasks_legacy(cs, &new_cpus, &new_mems, + cpus_updated, mems_updated); mutex_unlock(&cpuset_mutex); } -- cgit v1.2.2 From 5d8ba82c3a1f10b77bf39ee3e7670d6789a8d149 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 9 Jul 2014 16:49:12 +0800 Subject: cpuset: allow writing offlined masks to cpuset.cpus/mems As the configured masks won't be limited by its parent, and the top cpuset's masks won't change when hotplug happens, it's natural to allow writing offlined masks to the configured masks. If on default hierarchy: # echo 0 > /sys/devices/system/cpu/cpu1/online # mkdir /cpuset/sub # echo 1 > /cpuset/sub/cpuset.cpus # cat /cpuset/sub/cpuset.cpus 1 If on legacy hierarchy: # echo 0 > /sys/devices/system/cpu/cpu1/online # mkdir /cpuset/sub # echo 1 > /cpuset/sub/cpuset.cpus -bash: echo: write error: Invalid argument Note the checks don't need to be gated by cgroup_on_dfl, because we've initialized top_cpuset.{cpus,mems}_allowed accordingly in cpuset_bind(). Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index c47cb940712e..65878a74a66b 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -929,7 +929,8 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, if (retval < 0) return retval; - if (!cpumask_subset(trialcs->cpus_allowed, cpu_active_mask)) + if (!cpumask_subset(trialcs->cpus_allowed, + top_cpuset.cpus_allowed)) return -EINVAL; } @@ -1186,8 +1187,8 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, goto done; if (!nodes_subset(trialcs->mems_allowed, - node_states[N_MEMORY])) { - retval = -EINVAL; + top_cpuset.mems_allowed)) { + retval = -EINVAL; goto done; } } -- cgit v1.2.2 From afd1a8b3e0bc4d045d762dfdbc4d0cee189893a4 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 9 Jul 2014 16:49:25 +0800 Subject: cpuset: export effective masks to userspace cpuset.cpus and cpuset.mems are the configured masks, and we need to export effective masks to userspace, so users know the real cpus_allowed and mems_allowed that apply to the tasks in a cpuset. v2: - export those masks unconditionally, suggested by Tejun. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 65878a74a66b..53a9bbf16391 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1535,6 +1535,8 @@ typedef enum { FILE_MEMORY_MIGRATE, FILE_CPULIST, FILE_MEMLIST, + FILE_EFFECTIVE_CPULIST, + FILE_EFFECTIVE_MEMLIST, FILE_CPU_EXCLUSIVE, FILE_MEM_EXCLUSIVE, FILE_MEM_HARDWALL, @@ -1701,6 +1703,12 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v) case FILE_MEMLIST: s += nodelist_scnprintf(s, count, cs->mems_allowed); break; + case FILE_EFFECTIVE_CPULIST: + s += cpulist_scnprintf(s, count, cs->effective_cpus); + break; + case FILE_EFFECTIVE_MEMLIST: + s += nodelist_scnprintf(s, count, cs->effective_mems); + break; default: ret = -EINVAL; goto out_unlock; @@ -1785,6 +1793,18 @@ static struct cftype files[] = { .private = FILE_MEMLIST, }, + { + .name = "effective_cpus", + .seq_show = cpuset_common_seq_show, + .private = FILE_EFFECTIVE_CPULIST, + }, + + { + .name = "effective_mems", + .seq_show = cpuset_common_seq_show, + .private = FILE_EFFECTIVE_MEMLIST, + }, + { .name = "cpu_exclusive", .read_u64 = cpuset_read_u64, -- cgit v1.2.2 From 5577964e64692e17cc498854b7e0833e6532cd64 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 15 Jul 2014 11:05:09 -0400 Subject: cgroup: rename cgroup_subsys->base_cftypes to ->legacy_cftypes Currently, cgroup_subsys->base_cftypes is used for both the unified default hierarchy and legacy ones and subsystems can mark each file with either CFTYPE_ONLY_ON_DFL or CFTYPE_INSANE if it has to appear only on one of them. This is quite hairy and error-prone. Also, we may end up exposing interface files to the default hierarchy without thinking it through. cgroup_subsys will grow two separate cftype arrays and apply each only on the hierarchies of the matching type. This will allow organizing cftypes in a lot clearer way and encourage subsystems to scrutinize the interface which is being exposed in the new default hierarchy. In preparation, this patch renames cgroup_subsys->base_cftypes to cgroup_subsys->legacy_cftypes. This patch is pure rename. Signed-off-by: Tejun Heo Acked-by: Neil Horman Acked-by: Li Zefan Cc: Johannes Weiner Cc: Michal Hocko Cc: Vivek Goyal Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Aristeu Rozanski Cc: Aneesh Kumar K.V --- kernel/cpuset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 53a9bbf16391..f337f42a07ac 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2036,7 +2036,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = { .cancel_attach = cpuset_cancel_attach, .attach = cpuset_attach, .bind = cpuset_bind, - .base_cftypes = files, + .legacy_cftypes = files, .early_init = 1, }; -- cgit v1.2.2 From a13812683f1118ee4deed88d8d9bc2c268358b2e Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 30 Jul 2014 15:07:13 +0800 Subject: cpuset: fix the WARN_ON() in update_nodemasks_hier() The WARN_ON() is used to check if we break the legal hierarchy, on which the effective mems should be equal to configured mems. Reported-by: Mike Qiu Tested-by: Mike Qiu Signed-off-by: Li Zefan --- kernel/cpuset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index f337f42a07ac..9d7264beb74f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1136,7 +1136,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) mutex_unlock(&callback_mutex); WARN_ON(!cgroup_on_dfl(cp->css.cgroup) && - nodes_equal(cp->mems_allowed, cp->effective_mems)); + !nodes_equal(cp->mems_allowed, cp->effective_mems)); update_tasks_nodemask(cp); -- cgit v1.2.2