diff options
author | Tejun Heo <tj@kernel.org> | 2019-10-15 12:03:47 -0400 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2019-10-15 12:13:00 -0400 |
commit | 9d179b865449b351ad5cb76dbea480c9170d4a27 (patch) | |
tree | ebce8fe05f478889be94075edf4e75e34763e349 /block/blk-cgroup.c | |
parent | 5da0fb1ab34ccfe6d49210b4f5a739c59fcbf25e (diff) |
blkcg: Fix multiple bugs in blkcg_activate_policy()
blkcg_activate_policy() has the following bugs.
* cf09a8ee19ad ("blkcg: pass @q and @blkcg into
blkcg_pol_alloc_pd_fn()") added @blkcg to ->pd_alloc_fn(); however,
blkcg_activate_policy() ends up using pd's allocated for the root
blkcg for all preallocations, so ->pd_init_fn() for non-root blkcgs
can be passed in pd's which are allocated for the root blkcg.
For blk-iocost, this means that ->pd_init_fn() can write beyond the
end of the allocated object as it determines the length of the flex
array at the end based on the blkcg's nesting level.
* Each pd is initialized as they get allocated. If alloc fails, the
policy will get freed with pd's initialized on it.
* After the above partial failure, the partial pds are not freed.
This patch fixes all the above issues by
* Restructuring blkcg_activate_policy() so that alloc and init passes
are separate. Init takes place only after all allocs succeeded and
on failure all allocated pds are freed.
* Unifying and fixing the cleanup of the remaining pd_prealloc.
Signed-off-by: Tejun Heo <tj@kernel.org>
Fixes: cf09a8ee19ad ("blkcg: pass @q and @blkcg into blkcg_pol_alloc_pd_fn()")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block/blk-cgroup.c')
-rw-r--r-- | block/blk-cgroup.c | 69 |
1 files changed, 51 insertions, 18 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index b6f20be0fc78..5d21027b1faf 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -1362,7 +1362,7 @@ int blkcg_activate_policy(struct request_queue *q, | |||
1362 | const struct blkcg_policy *pol) | 1362 | const struct blkcg_policy *pol) |
1363 | { | 1363 | { |
1364 | struct blkg_policy_data *pd_prealloc = NULL; | 1364 | struct blkg_policy_data *pd_prealloc = NULL; |
1365 | struct blkcg_gq *blkg; | 1365 | struct blkcg_gq *blkg, *pinned_blkg = NULL; |
1366 | int ret; | 1366 | int ret; |
1367 | 1367 | ||
1368 | if (blkcg_policy_enabled(q, pol)) | 1368 | if (blkcg_policy_enabled(q, pol)) |
@@ -1370,49 +1370,82 @@ int blkcg_activate_policy(struct request_queue *q, | |||
1370 | 1370 | ||
1371 | if (queue_is_mq(q)) | 1371 | if (queue_is_mq(q)) |
1372 | blk_mq_freeze_queue(q); | 1372 | blk_mq_freeze_queue(q); |
1373 | pd_prealloc: | 1373 | retry: |
1374 | if (!pd_prealloc) { | ||
1375 | pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q, &blkcg_root); | ||
1376 | if (!pd_prealloc) { | ||
1377 | ret = -ENOMEM; | ||
1378 | goto out_bypass_end; | ||
1379 | } | ||
1380 | } | ||
1381 | |||
1382 | spin_lock_irq(&q->queue_lock); | 1374 | spin_lock_irq(&q->queue_lock); |
1383 | 1375 | ||
1384 | /* blkg_list is pushed at the head, reverse walk to init parents first */ | 1376 | /* blkg_list is pushed at the head, reverse walk to allocate parents first */ |
1385 | list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) { | 1377 | list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) { |
1386 | struct blkg_policy_data *pd; | 1378 | struct blkg_policy_data *pd; |
1387 | 1379 | ||
1388 | if (blkg->pd[pol->plid]) | 1380 | if (blkg->pd[pol->plid]) |
1389 | continue; | 1381 | continue; |
1390 | 1382 | ||
1391 | pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q, &blkcg_root); | 1383 | /* If prealloc matches, use it; otherwise try GFP_NOWAIT */ |
1392 | if (!pd) | 1384 | if (blkg == pinned_blkg) { |
1393 | swap(pd, pd_prealloc); | 1385 | pd = pd_prealloc; |
1386 | pd_prealloc = NULL; | ||
1387 | } else { | ||
1388 | pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q, | ||
1389 | blkg->blkcg); | ||
1390 | } | ||
1391 | |||
1394 | if (!pd) { | 1392 | if (!pd) { |
1393 | /* | ||
1394 | * GFP_NOWAIT failed. Free the existing one and | ||
1395 | * prealloc for @blkg w/ GFP_KERNEL. | ||
1396 | */ | ||
1397 | if (pinned_blkg) | ||
1398 | blkg_put(pinned_blkg); | ||
1399 | blkg_get(blkg); | ||
1400 | pinned_blkg = blkg; | ||
1401 | |||
1395 | spin_unlock_irq(&q->queue_lock); | 1402 | spin_unlock_irq(&q->queue_lock); |
1396 | goto pd_prealloc; | 1403 | |
1404 | if (pd_prealloc) | ||
1405 | pol->pd_free_fn(pd_prealloc); | ||
1406 | pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q, | ||
1407 | blkg->blkcg); | ||
1408 | if (pd_prealloc) | ||
1409 | goto retry; | ||
1410 | else | ||
1411 | goto enomem; | ||
1397 | } | 1412 | } |
1398 | 1413 | ||
1399 | blkg->pd[pol->plid] = pd; | 1414 | blkg->pd[pol->plid] = pd; |
1400 | pd->blkg = blkg; | 1415 | pd->blkg = blkg; |
1401 | pd->plid = pol->plid; | 1416 | pd->plid = pol->plid; |
1402 | if (pol->pd_init_fn) | ||
1403 | pol->pd_init_fn(pd); | ||
1404 | } | 1417 | } |
1405 | 1418 | ||
1419 | /* all allocated, init in the same order */ | ||
1420 | if (pol->pd_init_fn) | ||
1421 | list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) | ||
1422 | pol->pd_init_fn(blkg->pd[pol->plid]); | ||
1423 | |||
1406 | __set_bit(pol->plid, q->blkcg_pols); | 1424 | __set_bit(pol->plid, q->blkcg_pols); |
1407 | ret = 0; | 1425 | ret = 0; |
1408 | 1426 | ||
1409 | spin_unlock_irq(&q->queue_lock); | 1427 | spin_unlock_irq(&q->queue_lock); |
1410 | out_bypass_end: | 1428 | out: |
1411 | if (queue_is_mq(q)) | 1429 | if (queue_is_mq(q)) |
1412 | blk_mq_unfreeze_queue(q); | 1430 | blk_mq_unfreeze_queue(q); |
1431 | if (pinned_blkg) | ||
1432 | blkg_put(pinned_blkg); | ||
1413 | if (pd_prealloc) | 1433 | if (pd_prealloc) |
1414 | pol->pd_free_fn(pd_prealloc); | 1434 | pol->pd_free_fn(pd_prealloc); |
1415 | return ret; | 1435 | return ret; |
1436 | |||
1437 | enomem: | ||
1438 | /* alloc failed, nothing's initialized yet, free everything */ | ||
1439 | spin_lock_irq(&q->queue_lock); | ||
1440 | list_for_each_entry(blkg, &q->blkg_list, q_node) { | ||
1441 | if (blkg->pd[pol->plid]) { | ||
1442 | pol->pd_free_fn(blkg->pd[pol->plid]); | ||
1443 | blkg->pd[pol->plid] = NULL; | ||
1444 | } | ||
1445 | } | ||
1446 | spin_unlock_irq(&q->queue_lock); | ||
1447 | ret = -ENOMEM; | ||
1448 | goto out; | ||
1416 | } | 1449 | } |
1417 | EXPORT_SYMBOL_GPL(blkcg_activate_policy); | 1450 | EXPORT_SYMBOL_GPL(blkcg_activate_policy); |
1418 | 1451 | ||