diff options
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r-- | kernel/sched_fair.c | 90 |
1 files changed, 81 insertions, 9 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index af73a8a85eef..9d1adbd0b615 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -1272,11 +1272,30 @@ static inline u64 sched_cfs_bandwidth_slice(void) | |||
1272 | return (u64)sysctl_sched_cfs_bandwidth_slice * NSEC_PER_USEC; | 1272 | return (u64)sysctl_sched_cfs_bandwidth_slice * NSEC_PER_USEC; |
1273 | } | 1273 | } |
1274 | 1274 | ||
1275 | /* | ||
1276 | * Replenish runtime according to assigned quota and update expiration time. | ||
1277 | * We use sched_clock_cpu directly instead of rq->clock to avoid adding | ||
1278 | * additional synchronization around rq->lock. | ||
1279 | * | ||
1280 | * requires cfs_b->lock | ||
1281 | */ | ||
1282 | static void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b) | ||
1283 | { | ||
1284 | u64 now; | ||
1285 | |||
1286 | if (cfs_b->quota == RUNTIME_INF) | ||
1287 | return; | ||
1288 | |||
1289 | now = sched_clock_cpu(smp_processor_id()); | ||
1290 | cfs_b->runtime = cfs_b->quota; | ||
1291 | cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period); | ||
1292 | } | ||
1293 | |||
1275 | static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) | 1294 | static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) |
1276 | { | 1295 | { |
1277 | struct task_group *tg = cfs_rq->tg; | 1296 | struct task_group *tg = cfs_rq->tg; |
1278 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); | 1297 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); |
1279 | u64 amount = 0, min_amount; | 1298 | u64 amount = 0, min_amount, expires; |
1280 | 1299 | ||
1281 | /* note: this is a positive sum as runtime_remaining <= 0 */ | 1300 | /* note: this is a positive sum as runtime_remaining <= 0 */ |
1282 | min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining; | 1301 | min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining; |
@@ -1285,9 +1304,16 @@ static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) | |||
1285 | if (cfs_b->quota == RUNTIME_INF) | 1304 | if (cfs_b->quota == RUNTIME_INF) |
1286 | amount = min_amount; | 1305 | amount = min_amount; |
1287 | else { | 1306 | else { |
1288 | /* ensure bandwidth timer remains active under consumption */ | 1307 | /* |
1289 | if (!cfs_b->timer_active) | 1308 | * If the bandwidth pool has become inactive, then at least one |
1309 | * period must have elapsed since the last consumption. | ||
1310 | * Refresh the global state and ensure bandwidth timer becomes | ||
1311 | * active. | ||
1312 | */ | ||
1313 | if (!cfs_b->timer_active) { | ||
1314 | __refill_cfs_bandwidth_runtime(cfs_b); | ||
1290 | __start_cfs_bandwidth(cfs_b); | 1315 | __start_cfs_bandwidth(cfs_b); |
1316 | } | ||
1291 | 1317 | ||
1292 | if (cfs_b->runtime > 0) { | 1318 | if (cfs_b->runtime > 0) { |
1293 | amount = min(cfs_b->runtime, min_amount); | 1319 | amount = min(cfs_b->runtime, min_amount); |
@@ -1295,19 +1321,61 @@ static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) | |||
1295 | cfs_b->idle = 0; | 1321 | cfs_b->idle = 0; |
1296 | } | 1322 | } |
1297 | } | 1323 | } |
1324 | expires = cfs_b->runtime_expires; | ||
1298 | raw_spin_unlock(&cfs_b->lock); | 1325 | raw_spin_unlock(&cfs_b->lock); |
1299 | 1326 | ||
1300 | cfs_rq->runtime_remaining += amount; | 1327 | cfs_rq->runtime_remaining += amount; |
1328 | /* | ||
1329 | * we may have advanced our local expiration to account for allowed | ||
1330 | * spread between our sched_clock and the one on which runtime was | ||
1331 | * issued. | ||
1332 | */ | ||
1333 | if ((s64)(expires - cfs_rq->runtime_expires) > 0) | ||
1334 | cfs_rq->runtime_expires = expires; | ||
1301 | } | 1335 | } |
1302 | 1336 | ||
1303 | static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, | 1337 | /* |
1304 | unsigned long delta_exec) | 1338 | * Note: This depends on the synchronization provided by sched_clock and the |
1339 | * fact that rq->clock snapshots this value. | ||
1340 | */ | ||
1341 | static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq) | ||
1305 | { | 1342 | { |
1306 | if (!cfs_rq->runtime_enabled) | 1343 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); |
1344 | struct rq *rq = rq_of(cfs_rq); | ||
1345 | |||
1346 | /* if the deadline is ahead of our clock, nothing to do */ | ||
1347 | if (likely((s64)(rq->clock - cfs_rq->runtime_expires) < 0)) | ||
1348 | return; | ||
1349 | |||
1350 | if (cfs_rq->runtime_remaining < 0) | ||
1307 | return; | 1351 | return; |
1308 | 1352 | ||
1353 | /* | ||
1354 | * If the local deadline has passed we have to consider the | ||
1355 | * possibility that our sched_clock is 'fast' and the global deadline | ||
1356 | * has not truly expired. | ||
1357 | * | ||
1358 | * Fortunately we can check determine whether this the case by checking | ||
1359 | * whether the global deadline has advanced. | ||
1360 | */ | ||
1361 | |||
1362 | if ((s64)(cfs_rq->runtime_expires - cfs_b->runtime_expires) >= 0) { | ||
1363 | /* extend local deadline, drift is bounded above by 2 ticks */ | ||
1364 | cfs_rq->runtime_expires += TICK_NSEC; | ||
1365 | } else { | ||
1366 | /* global deadline is ahead, expiration has passed */ | ||
1367 | cfs_rq->runtime_remaining = 0; | ||
1368 | } | ||
1369 | } | ||
1370 | |||
1371 | static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, | ||
1372 | unsigned long delta_exec) | ||
1373 | { | ||
1374 | /* dock delta_exec before expiring quota (as it could span periods) */ | ||
1309 | cfs_rq->runtime_remaining -= delta_exec; | 1375 | cfs_rq->runtime_remaining -= delta_exec; |
1310 | if (cfs_rq->runtime_remaining > 0) | 1376 | expire_cfs_rq_runtime(cfs_rq); |
1377 | |||
1378 | if (likely(cfs_rq->runtime_remaining > 0)) | ||
1311 | return; | 1379 | return; |
1312 | 1380 | ||
1313 | assign_cfs_rq_runtime(cfs_rq); | 1381 | assign_cfs_rq_runtime(cfs_rq); |
@@ -1338,7 +1406,12 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) | |||
1338 | goto out_unlock; | 1406 | goto out_unlock; |
1339 | 1407 | ||
1340 | idle = cfs_b->idle; | 1408 | idle = cfs_b->idle; |
1341 | cfs_b->runtime = cfs_b->quota; | 1409 | /* if we're going inactive then everything else can be deferred */ |
1410 | if (idle) | ||
1411 | goto out_unlock; | ||
1412 | |||
1413 | __refill_cfs_bandwidth_runtime(cfs_b); | ||
1414 | |||
1342 | 1415 | ||
1343 | /* mark as potentially idle for the upcoming period */ | 1416 | /* mark as potentially idle for the upcoming period */ |
1344 | cfs_b->idle = 1; | 1417 | cfs_b->idle = 1; |
@@ -1557,7 +1630,6 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg) | |||
1557 | 1630 | ||
1558 | return wl; | 1631 | return wl; |
1559 | } | 1632 | } |
1560 | |||
1561 | #else | 1633 | #else |
1562 | 1634 | ||
1563 | static inline unsigned long effective_load(struct task_group *tg, int cpu, | 1635 | static inline unsigned long effective_load(struct task_group *tg, int cpu, |