diff options
author | Paul Turner <pjt@google.com> | 2011-07-21 12:43:34 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-08-14 06:03:36 -0400 |
commit | 671fd9dabe5239ad218c7eb48b2b9edee50250e6 (patch) | |
tree | 351f59453eb699661bd811210f24d8b7fd554ca4 /kernel/sched_fair.c | |
parent | 85dac906bec3bb41bfaa7ccaa65c4706de5cfdf8 (diff) |
sched: Add support for unthrottling group entities
At the start of each period we refresh the global bandwidth pool. At this time
we must also unthrottle any cfs_rq entities who are now within bandwidth once
more (as quota permits).
Unthrottled entities have their corresponding cfs_rq->throttled flag cleared
and their entities re-enqueued.
Signed-off-by: Paul Turner <pjt@google.com>
Reviewed-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110721184757.574628950@google.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r-- | kernel/sched_fair.c | 127 |
1 files changed, 123 insertions, 4 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 72c9d4ed5991..76411950ff3b 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -1439,6 +1439,84 @@ static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq) | |||
1439 | raw_spin_unlock(&cfs_b->lock); | 1439 | raw_spin_unlock(&cfs_b->lock); |
1440 | } | 1440 | } |
1441 | 1441 | ||
1442 | static void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) | ||
1443 | { | ||
1444 | struct rq *rq = rq_of(cfs_rq); | ||
1445 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); | ||
1446 | struct sched_entity *se; | ||
1447 | int enqueue = 1; | ||
1448 | long task_delta; | ||
1449 | |||
1450 | se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))]; | ||
1451 | |||
1452 | cfs_rq->throttled = 0; | ||
1453 | raw_spin_lock(&cfs_b->lock); | ||
1454 | list_del_rcu(&cfs_rq->throttled_list); | ||
1455 | raw_spin_unlock(&cfs_b->lock); | ||
1456 | |||
1457 | if (!cfs_rq->load.weight) | ||
1458 | return; | ||
1459 | |||
1460 | task_delta = cfs_rq->h_nr_running; | ||
1461 | for_each_sched_entity(se) { | ||
1462 | if (se->on_rq) | ||
1463 | enqueue = 0; | ||
1464 | |||
1465 | cfs_rq = cfs_rq_of(se); | ||
1466 | if (enqueue) | ||
1467 | enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP); | ||
1468 | cfs_rq->h_nr_running += task_delta; | ||
1469 | |||
1470 | if (cfs_rq_throttled(cfs_rq)) | ||
1471 | break; | ||
1472 | } | ||
1473 | |||
1474 | if (!se) | ||
1475 | rq->nr_running += task_delta; | ||
1476 | |||
1477 | /* determine whether we need to wake up potentially idle cpu */ | ||
1478 | if (rq->curr == rq->idle && rq->cfs.nr_running) | ||
1479 | resched_task(rq->curr); | ||
1480 | } | ||
1481 | |||
1482 | static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, | ||
1483 | u64 remaining, u64 expires) | ||
1484 | { | ||
1485 | struct cfs_rq *cfs_rq; | ||
1486 | u64 runtime = remaining; | ||
1487 | |||
1488 | rcu_read_lock(); | ||
1489 | list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq, | ||
1490 | throttled_list) { | ||
1491 | struct rq *rq = rq_of(cfs_rq); | ||
1492 | |||
1493 | raw_spin_lock(&rq->lock); | ||
1494 | if (!cfs_rq_throttled(cfs_rq)) | ||
1495 | goto next; | ||
1496 | |||
1497 | runtime = -cfs_rq->runtime_remaining + 1; | ||
1498 | if (runtime > remaining) | ||
1499 | runtime = remaining; | ||
1500 | remaining -= runtime; | ||
1501 | |||
1502 | cfs_rq->runtime_remaining += runtime; | ||
1503 | cfs_rq->runtime_expires = expires; | ||
1504 | |||
1505 | /* we check whether we're throttled above */ | ||
1506 | if (cfs_rq->runtime_remaining > 0) | ||
1507 | unthrottle_cfs_rq(cfs_rq); | ||
1508 | |||
1509 | next: | ||
1510 | raw_spin_unlock(&rq->lock); | ||
1511 | |||
1512 | if (!remaining) | ||
1513 | break; | ||
1514 | } | ||
1515 | rcu_read_unlock(); | ||
1516 | |||
1517 | return remaining; | ||
1518 | } | ||
1519 | |||
1442 | /* | 1520 | /* |
1443 | * Responsible for refilling a task_group's bandwidth and unthrottling its | 1521 | * Responsible for refilling a task_group's bandwidth and unthrottling its |
1444 | * cfs_rqs as appropriate. If there has been no activity within the last | 1522 | * cfs_rqs as appropriate. If there has been no activity within the last |
@@ -1447,23 +1525,64 @@ static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq) | |||
1447 | */ | 1525 | */ |
1448 | static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) | 1526 | static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) |
1449 | { | 1527 | { |
1450 | int idle = 1; | 1528 | u64 runtime, runtime_expires; |
1529 | int idle = 1, throttled; | ||
1451 | 1530 | ||
1452 | raw_spin_lock(&cfs_b->lock); | 1531 | raw_spin_lock(&cfs_b->lock); |
1453 | /* no need to continue the timer with no bandwidth constraint */ | 1532 | /* no need to continue the timer with no bandwidth constraint */ |
1454 | if (cfs_b->quota == RUNTIME_INF) | 1533 | if (cfs_b->quota == RUNTIME_INF) |
1455 | goto out_unlock; | 1534 | goto out_unlock; |
1456 | 1535 | ||
1457 | idle = cfs_b->idle; | 1536 | throttled = !list_empty(&cfs_b->throttled_cfs_rq); |
1537 | /* idle depends on !throttled (for the case of a large deficit) */ | ||
1538 | idle = cfs_b->idle && !throttled; | ||
1539 | |||
1458 | /* if we're going inactive then everything else can be deferred */ | 1540 | /* if we're going inactive then everything else can be deferred */ |
1459 | if (idle) | 1541 | if (idle) |
1460 | goto out_unlock; | 1542 | goto out_unlock; |
1461 | 1543 | ||
1462 | __refill_cfs_bandwidth_runtime(cfs_b); | 1544 | __refill_cfs_bandwidth_runtime(cfs_b); |
1463 | 1545 | ||
1546 | if (!throttled) { | ||
1547 | /* mark as potentially idle for the upcoming period */ | ||
1548 | cfs_b->idle = 1; | ||
1549 | goto out_unlock; | ||
1550 | } | ||
1551 | |||
1552 | /* | ||
1553 | * There are throttled entities so we must first use the new bandwidth | ||
1554 | * to unthrottle them before making it generally available. This | ||
1555 | * ensures that all existing debts will be paid before a new cfs_rq is | ||
1556 | * allowed to run. | ||
1557 | */ | ||
1558 | runtime = cfs_b->runtime; | ||
1559 | runtime_expires = cfs_b->runtime_expires; | ||
1560 | cfs_b->runtime = 0; | ||
1561 | |||
1562 | /* | ||
1563 | * This check is repeated as we are holding onto the new bandwidth | ||
1564 | * while we unthrottle. This can potentially race with an unthrottled | ||
1565 | * group trying to acquire new bandwidth from the global pool. | ||
1566 | */ | ||
1567 | while (throttled && runtime > 0) { | ||
1568 | raw_spin_unlock(&cfs_b->lock); | ||
1569 | /* we can't nest cfs_b->lock while distributing bandwidth */ | ||
1570 | runtime = distribute_cfs_runtime(cfs_b, runtime, | ||
1571 | runtime_expires); | ||
1572 | raw_spin_lock(&cfs_b->lock); | ||
1573 | |||
1574 | throttled = !list_empty(&cfs_b->throttled_cfs_rq); | ||
1575 | } | ||
1464 | 1576 | ||
1465 | /* mark as potentially idle for the upcoming period */ | 1577 | /* return (any) remaining runtime */ |
1466 | cfs_b->idle = 1; | 1578 | cfs_b->runtime = runtime; |
1579 | /* | ||
1580 | * While we are ensured activity in the period following an | ||
1581 | * unthrottle, this also covers the case in which the new bandwidth is | ||
1582 | * insufficient to cover the existing bandwidth deficit. (Forcing the | ||
1583 | * timer to remain active while there are any throttled entities.) | ||
1584 | */ | ||
1585 | cfs_b->idle = 0; | ||
1467 | out_unlock: | 1586 | out_unlock: |
1468 | if (idle) | 1587 | if (idle) |
1469 | cfs_b->timer_active = 0; | 1588 | cfs_b->timer_active = 0; |