aboutsummaryrefslogtreecommitdiffstats
path: root/block/blk-cgroup.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2019-10-15 12:03:47 -0400
committerJens Axboe <axboe@kernel.dk>2019-10-15 12:13:00 -0400
commit9d179b865449b351ad5cb76dbea480c9170d4a27 (patch)
treeebce8fe05f478889be94075edf4e75e34763e349 /block/blk-cgroup.c
parent5da0fb1ab34ccfe6d49210b4f5a739c59fcbf25e (diff)
blkcg: Fix multiple bugs in blkcg_activate_policy()
blkcg_activate_policy() has the following bugs. * cf09a8ee19ad ("blkcg: pass @q and @blkcg into blkcg_pol_alloc_pd_fn()") added @blkcg to ->pd_alloc_fn(); however, blkcg_activate_policy() ends up using pd's allocated for the root blkcg for all preallocations, so ->pd_init_fn() for non-root blkcgs can be passed in pd's which are allocated for the root blkcg. For blk-iocost, this means that ->pd_init_fn() can write beyond the end of the allocated object as it determines the length of the flex array at the end based on the blkcg's nesting level. * Each pd is initialized as they get allocated. If alloc fails, the policy will get freed with pd's initialized on it. * After the above partial failure, the partial pds are not freed. This patch fixes all the above issues by * Restructuring blkcg_activate_policy() so that alloc and init passes are separate. Init takes place only after all allocs succeeded and on failure all allocated pds are freed. * Unifying and fixing the cleanup of the remaining pd_prealloc. Signed-off-by: Tejun Heo <tj@kernel.org> Fixes: cf09a8ee19ad ("blkcg: pass @q and @blkcg into blkcg_pol_alloc_pd_fn()") Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block/blk-cgroup.c')
-rw-r--r--block/blk-cgroup.c69
1 files changed, 51 insertions, 18 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index b6f20be0fc78..5d21027b1faf 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1362,7 +1362,7 @@ int blkcg_activate_policy(struct request_queue *q,
1362 const struct blkcg_policy *pol) 1362 const struct blkcg_policy *pol)
1363{ 1363{
1364 struct blkg_policy_data *pd_prealloc = NULL; 1364 struct blkg_policy_data *pd_prealloc = NULL;
1365 struct blkcg_gq *blkg; 1365 struct blkcg_gq *blkg, *pinned_blkg = NULL;
1366 int ret; 1366 int ret;
1367 1367
1368 if (blkcg_policy_enabled(q, pol)) 1368 if (blkcg_policy_enabled(q, pol))
@@ -1370,49 +1370,82 @@ int blkcg_activate_policy(struct request_queue *q,
1370 1370
1371 if (queue_is_mq(q)) 1371 if (queue_is_mq(q))
1372 blk_mq_freeze_queue(q); 1372 blk_mq_freeze_queue(q);
1373pd_prealloc: 1373retry:
1374 if (!pd_prealloc) {
1375 pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q, &blkcg_root);
1376 if (!pd_prealloc) {
1377 ret = -ENOMEM;
1378 goto out_bypass_end;
1379 }
1380 }
1381
1382 spin_lock_irq(&q->queue_lock); 1374 spin_lock_irq(&q->queue_lock);
1383 1375
1384 /* blkg_list is pushed at the head, reverse walk to init parents first */ 1376 /* blkg_list is pushed at the head, reverse walk to allocate parents first */
1385 list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) { 1377 list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) {
1386 struct blkg_policy_data *pd; 1378 struct blkg_policy_data *pd;
1387 1379
1388 if (blkg->pd[pol->plid]) 1380 if (blkg->pd[pol->plid])
1389 continue; 1381 continue;
1390 1382
1391 pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q, &blkcg_root); 1383 /* If prealloc matches, use it; otherwise try GFP_NOWAIT */
1392 if (!pd) 1384 if (blkg == pinned_blkg) {
1393 swap(pd, pd_prealloc); 1385 pd = pd_prealloc;
1386 pd_prealloc = NULL;
1387 } else {
1388 pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q,
1389 blkg->blkcg);
1390 }
1391
1394 if (!pd) { 1392 if (!pd) {
1393 /*
1394 * GFP_NOWAIT failed. Free the existing one and
1395 * prealloc for @blkg w/ GFP_KERNEL.
1396 */
1397 if (pinned_blkg)
1398 blkg_put(pinned_blkg);
1399 blkg_get(blkg);
1400 pinned_blkg = blkg;
1401
1395 spin_unlock_irq(&q->queue_lock); 1402 spin_unlock_irq(&q->queue_lock);
1396 goto pd_prealloc; 1403
1404 if (pd_prealloc)
1405 pol->pd_free_fn(pd_prealloc);
1406 pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q,
1407 blkg->blkcg);
1408 if (pd_prealloc)
1409 goto retry;
1410 else
1411 goto enomem;
1397 } 1412 }
1398 1413
1399 blkg->pd[pol->plid] = pd; 1414 blkg->pd[pol->plid] = pd;
1400 pd->blkg = blkg; 1415 pd->blkg = blkg;
1401 pd->plid = pol->plid; 1416 pd->plid = pol->plid;
1402 if (pol->pd_init_fn)
1403 pol->pd_init_fn(pd);
1404 } 1417 }
1405 1418
1419 /* all allocated, init in the same order */
1420 if (pol->pd_init_fn)
1421 list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
1422 pol->pd_init_fn(blkg->pd[pol->plid]);
1423
1406 __set_bit(pol->plid, q->blkcg_pols); 1424 __set_bit(pol->plid, q->blkcg_pols);
1407 ret = 0; 1425 ret = 0;
1408 1426
1409 spin_unlock_irq(&q->queue_lock); 1427 spin_unlock_irq(&q->queue_lock);
1410out_bypass_end: 1428out:
1411 if (queue_is_mq(q)) 1429 if (queue_is_mq(q))
1412 blk_mq_unfreeze_queue(q); 1430 blk_mq_unfreeze_queue(q);
1431 if (pinned_blkg)
1432 blkg_put(pinned_blkg);
1413 if (pd_prealloc) 1433 if (pd_prealloc)
1414 pol->pd_free_fn(pd_prealloc); 1434 pol->pd_free_fn(pd_prealloc);
1415 return ret; 1435 return ret;
1436
1437enomem:
1438 /* alloc failed, nothing's initialized yet, free everything */
1439 spin_lock_irq(&q->queue_lock);
1440 list_for_each_entry(blkg, &q->blkg_list, q_node) {
1441 if (blkg->pd[pol->plid]) {
1442 pol->pd_free_fn(blkg->pd[pol->plid]);
1443 blkg->pd[pol->plid] = NULL;
1444 }
1445 }
1446 spin_unlock_irq(&q->queue_lock);
1447 ret = -ENOMEM;
1448 goto out;
1416} 1449}
1417EXPORT_SYMBOL_GPL(blkcg_activate_policy); 1450EXPORT_SYMBOL_GPL(blkcg_activate_policy);
1418 1451