aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2017-07-08 07:17:02 -0400
committerTejun Heo <tj@kernel.org>2017-07-08 07:37:50 -0400
commit610467270fb368584b74567edd21c8cc5104490f (patch)
treeb4c15728b60f010782d1a965ec26395827e1dd05
parentcee37d83e6d9ada1c2254c73bac7955f9e048d22 (diff)
cgroup: don't call migration methods if there are no tasks to migrate
Subsystem migration methods shouldn't be called for empty migrations. cgroup_migrate_execute() implements this guarantee by bailing early if there are no source css_sets. This used to be correct before a79a908fd2b0 ("cgroup: introduce cgroup namespaces"), but no longer since the commit because css_sets can stay pinned without tasks in them. This caused cgroup_migrate_execute() call into cpuset migration methods with an empty cgroup_taskset. cpuset migration methods correctly assume that cgroup_taskset_first() never returns NULL; however, due to the bug, it can, leading to the following oops. Unable to handle kernel paging request for data at address 0x00000960 Faulting instruction address: 0xc0000000001d6868 Oops: Kernel access of bad area, sig: 11 [#1] ... CPU: 14 PID: 16947 Comm: kworker/14:0 Tainted: G W 4.12.0-rc4-next-20170609 #2 Workqueue: events cpuset_hotplug_workfn task: c00000000ca60580 task.stack: c00000000c728000 NIP: c0000000001d6868 LR: c0000000001d6858 CTR: c0000000001d6810 REGS: c00000000c72b720 TRAP: 0300 Tainted: GW (4.12.0-rc4-next-20170609) MSR: 8000000000009033 <SF,EE,ME,IR,DR,RI,LE> CR: 44722422 XER: 20000000 CFAR: c000000000008710 DAR: 0000000000000960 DSISR: 40000000 SOFTE: 1 GPR00: c0000000001d6858 c00000000c72b9a0 c000000001536e00 0000000000000000 GPR04: c00000000c72b9c0 0000000000000000 c00000000c72bad0 c000000766367678 GPR08: c000000766366d10 c00000000c72b958 c000000001736e00 0000000000000000 GPR12: c0000000001d6810 c00000000e749300 c000000000123ef8 c000000775af4180 GPR16: 0000000000000000 0000000000000000 c00000075480e9c0 c00000075480e9e0 GPR20: c00000075480e8c0 0000000000000001 0000000000000000 c00000000c72ba20 GPR24: c00000000c72baa0 c00000000c72bac0 c000000001407248 c00000000c72ba20 GPR28: c00000000141fc80 c00000000c72bac0 c00000000c6bc790 0000000000000000 NIP [c0000000001d6868] cpuset_can_attach+0x58/0x1b0 LR [c0000000001d6858] cpuset_can_attach+0x48/0x1b0 Call Trace: [c00000000c72b9a0] [c0000000001d6858] cpuset_can_attach+0x48/0x1b0 (unreliable) [c00000000c72ba00] [c0000000001cbe80] cgroup_migrate_execute+0xb0/0x450 [c00000000c72ba80] [c0000000001d3754] cgroup_transfer_tasks+0x1c4/0x360 [c00000000c72bba0] [c0000000001d923c] cpuset_hotplug_workfn+0x86c/0xa20 [c00000000c72bca0] [c00000000011aa44] process_one_work+0x1e4/0x580 [c00000000c72bd30] [c00000000011ae78] worker_thread+0x98/0x5c0 [c00000000c72bdc0] [c000000000124058] kthread+0x168/0x1b0 [c00000000c72be30] [c00000000000b2e8] ret_from_kernel_thread+0x5c/0x74 Instruction dump: f821ffa1 7c7d1b78 60000000 60000000 38810020 7fa3eb78 3f42ffed 4bff4c25 60000000 3b5a0448 3d420020 eb610020 <e9230960> 7f43d378 e9290000 f92af200 ---[ end trace dcaaf98fb36d9e64 ]--- This patch fixes the bug by adding an explicit nr_tasks counter to cgroup_taskset and skipping calling the migration methods if the counter is zero. While at it, remove the now spurious check on no source css_sets. Signed-off-by: Tejun Heo <tj@kernel.org> Reported-and-tested-by: Abdul Haleem <abdhalee@linux.vnet.ibm.com> Cc: Roman Gushchin <guro@fb.com> Cc: stable@vger.kernel.org # v4.6+ Fixes: a79a908fd2b0 ("cgroup: introduce cgroup namespaces") Link: http://lkml.kernel.org/r/1497266622.15415.39.camel@abdul.in.ibm.com
-rw-r--r--kernel/cgroup/cgroup-internal.h3
-rw-r--r--kernel/cgroup/cgroup.c58
2 files changed, 34 insertions, 27 deletions
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index 793565c05742..8b4c3c2f2509 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -33,6 +33,9 @@ struct cgroup_taskset {
33 struct list_head src_csets; 33 struct list_head src_csets;
34 struct list_head dst_csets; 34 struct list_head dst_csets;
35 35
36 /* the number of tasks in the set */
37 int nr_tasks;
38
36 /* the subsys currently being processed */ 39 /* the subsys currently being processed */
37 int ssid; 40 int ssid;
38 41
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 620794a20a33..cc53111072d8 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2006,6 +2006,8 @@ static void cgroup_migrate_add_task(struct task_struct *task,
2006 if (!cset->mg_src_cgrp) 2006 if (!cset->mg_src_cgrp)
2007 return; 2007 return;
2008 2008
2009 mgctx->tset.nr_tasks++;
2010
2009 list_move_tail(&task->cg_list, &cset->mg_tasks); 2011 list_move_tail(&task->cg_list, &cset->mg_tasks);
2010 if (list_empty(&cset->mg_node)) 2012 if (list_empty(&cset->mg_node))
2011 list_add_tail(&cset->mg_node, 2013 list_add_tail(&cset->mg_node,
@@ -2094,21 +2096,19 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
2094 struct css_set *cset, *tmp_cset; 2096 struct css_set *cset, *tmp_cset;
2095 int ssid, failed_ssid, ret; 2097 int ssid, failed_ssid, ret;
2096 2098
2097 /* methods shouldn't be called if no task is actually migrating */
2098 if (list_empty(&tset->src_csets))
2099 return 0;
2100
2101 /* check that we can legitimately attach to the cgroup */ 2099 /* check that we can legitimately attach to the cgroup */
2102 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) { 2100 if (tset->nr_tasks) {
2103 if (ss->can_attach) { 2101 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
2104 tset->ssid = ssid; 2102 if (ss->can_attach) {
2105 ret = ss->can_attach(tset); 2103 tset->ssid = ssid;
2106 if (ret) { 2104 ret = ss->can_attach(tset);
2107 failed_ssid = ssid; 2105 if (ret) {
2108 goto out_cancel_attach; 2106 failed_ssid = ssid;
2107 goto out_cancel_attach;
2108 }
2109 } 2109 }
2110 } 2110 } while_each_subsys_mask();
2111 } while_each_subsys_mask(); 2111 }
2112 2112
2113 /* 2113 /*
2114 * Now that we're guaranteed success, proceed to move all tasks to 2114 * Now that we're guaranteed success, proceed to move all tasks to
@@ -2137,25 +2137,29 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
2137 */ 2137 */
2138 tset->csets = &tset->dst_csets; 2138 tset->csets = &tset->dst_csets;
2139 2139
2140 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) { 2140 if (tset->nr_tasks) {
2141 if (ss->attach) { 2141 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
2142 tset->ssid = ssid; 2142 if (ss->attach) {
2143 ss->attach(tset); 2143 tset->ssid = ssid;
2144 } 2144 ss->attach(tset);
2145 } while_each_subsys_mask(); 2145 }
2146 } while_each_subsys_mask();
2147 }
2146 2148
2147 ret = 0; 2149 ret = 0;
2148 goto out_release_tset; 2150 goto out_release_tset;
2149 2151
2150out_cancel_attach: 2152out_cancel_attach:
2151 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) { 2153 if (tset->nr_tasks) {
2152 if (ssid == failed_ssid) 2154 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
2153 break; 2155 if (ssid == failed_ssid)
2154 if (ss->cancel_attach) { 2156 break;
2155 tset->ssid = ssid; 2157 if (ss->cancel_attach) {
2156 ss->cancel_attach(tset); 2158 tset->ssid = ssid;
2157 } 2159 ss->cancel_attach(tset);
2158 } while_each_subsys_mask(); 2160 }
2161 } while_each_subsys_mask();
2162 }
2159out_release_tset: 2163out_release_tset:
2160 spin_lock_irq(&css_set_lock); 2164 spin_lock_irq(&css_set_lock);
2161 list_splice_init(&tset->dst_csets, &tset->src_csets); 2165 list_splice_init(&tset->dst_csets, &tset->src_csets);