aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohn Stultz <john.stultz@linaro.org>2013-12-06 20:25:21 -0500
committerJohn Stultz <john.stultz@linaro.org>2014-07-23 18:01:56 -0400
commitdc491596f6394382fbc74ad331156207d619fa0a (patch)
treec5496cc0145eaeea40e42aac5e2baecc0a7923d0
parente2dff1ec0cc81fcf3e0696604bacc3e1c816538c (diff)
timekeeping: Rework frequency adjustments to work better w/ nohz
The existing timekeeping_adjust logic has always been complicated to understand. Further, since it was developed prior to NOHZ becoming common, its not surprising it performs poorly when NOHZ is enabled. Since Miroslav pointed out the problematic nature of the existing code in the NOHZ case, I've tried to refactor the code to perform better. The problem with the previous approach was that it tried to adjust for the total cumulative error using a scaled dampening factor. This resulted in large errors to be corrected slowly, while small errors were corrected quickly. With NOHZ the timekeeping code doesn't know how far out the next tick will be, so this results in bad over-correction to small errors, and insufficient correction to large errors. Inspired by Miroslav's patch, I've refactored the code to try to address the correction in two steps. 1) Check the future freq error for the next tick, and if the frequency error is large, try to make sure we correct it so it doesn't cause much accumulated error. 2) Then make a small single unit adjustment to correct any cumulative error that has collected over time. This method performs fairly well in the simulator Miroslav created. Major credit to Miroslav for pointing out the issue, providing the original patch to resolve this, a simulator for testing, as well as helping debug and resolve issues in my implementation so that it performed closer to his original implementation. Cc: Miroslav Lichvar <mlichvar@redhat.com> Cc: Richard Cochran <richardcochran@gmail.com> Cc: Prarit Bhargava <prarit@redhat.com> Reported-by: Miroslav Lichvar <mlichvar@redhat.com> Signed-off-by: John Stultz <john.stultz@linaro.org>
-rw-r--r--include/linux/timekeeper_internal.h1
-rw-r--r--kernel/time/timekeeping.c193
2 files changed, 84 insertions, 110 deletions
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 97381997625b..f7ac48d2edf5 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -92,6 +92,7 @@ struct timekeeper {
92 u32 raw_interval; 92 u32 raw_interval;
93 s64 ntp_error; 93 s64 ntp_error;
94 u32 ntp_error_shift; 94 u32 ntp_error_shift;
95 u32 ntp_err_mult;
95}; 96};
96 97
97#ifdef CONFIG_GENERIC_TIME_VSYSCALL 98#ifdef CONFIG_GENERIC_TIME_VSYSCALL
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 2b56b959615b..43c706a7a728 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -178,6 +178,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
178 * to counteract clock drifting. 178 * to counteract clock drifting.
179 */ 179 */
180 tk->tkr.mult = clock->mult; 180 tk->tkr.mult = clock->mult;
181 tk->ntp_err_mult = 0;
181} 182}
182 183
183/* Timekeeper helper functions. */ 184/* Timekeeper helper functions. */
@@ -1257,125 +1258,34 @@ static int __init timekeeping_init_ops(void)
1257 register_syscore_ops(&timekeeping_syscore_ops); 1258 register_syscore_ops(&timekeeping_syscore_ops);
1258 return 0; 1259 return 0;
1259} 1260}
1260
1261device_initcall(timekeeping_init_ops); 1261device_initcall(timekeeping_init_ops);
1262 1262
1263/* 1263/*
1264 * If the error is already larger, we look ahead even further 1264 * Apply a multiplier adjustment to the timekeeper
1265 * to compensate for late or lost adjustments.
1266 */
1267static __always_inline int timekeeping_bigadjust(struct timekeeper *tk,
1268 s64 error, s64 *interval,
1269 s64 *offset)
1270{
1271 s64 tick_error, i;
1272 u32 look_ahead, adj;
1273 s32 error2, mult;
1274
1275 /*
1276 * Use the current error value to determine how much to look ahead.
1277 * The larger the error the slower we adjust for it to avoid problems
1278 * with losing too many ticks, otherwise we would overadjust and
1279 * produce an even larger error. The smaller the adjustment the
1280 * faster we try to adjust for it, as lost ticks can do less harm
1281 * here. This is tuned so that an error of about 1 msec is adjusted
1282 * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks).
1283 */
1284 error2 = tk->ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ);
1285 error2 = abs(error2);
1286 for (look_ahead = 0; error2 > 0; look_ahead++)
1287 error2 >>= 2;
1288
1289 /*
1290 * Now calculate the error in (1 << look_ahead) ticks, but first
1291 * remove the single look ahead already included in the error.
1292 */
1293 tick_error = ntp_tick_length() >> (tk->ntp_error_shift + 1);
1294 tick_error -= tk->xtime_interval >> 1;
1295 error = ((error - tick_error) >> look_ahead) + tick_error;
1296
1297 /* Finally calculate the adjustment shift value. */
1298 i = *interval;
1299 mult = 1;
1300 if (error < 0) {
1301 error = -error;
1302 *interval = -*interval;
1303 *offset = -*offset;
1304 mult = -1;
1305 }
1306 for (adj = 0; error > i; adj++)
1307 error >>= 1;
1308
1309 *interval <<= adj;
1310 *offset <<= adj;
1311 return mult << adj;
1312}
1313
1314/*
1315 * Adjust the multiplier to reduce the error value,
1316 * this is optimized for the most common adjustments of -1,0,1,
1317 * for other values we can do a bit more work.
1318 */ 1265 */
1319static void timekeeping_adjust(struct timekeeper *tk, s64 offset) 1266static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
1267 s64 offset,
1268 bool negative,
1269 int adj_scale)
1320{ 1270{
1321 s64 error, interval = tk->cycle_interval; 1271 s64 interval = tk->cycle_interval;
1322 int adj; 1272 s32 mult_adj = 1;
1323 1273
1324 /* 1274 if (negative) {
1325 * The point of this is to check if the error is greater than half 1275 mult_adj = -mult_adj;
1326 * an interval. 1276 interval = -interval;
1327 * 1277 offset = -offset;
1328 * First we shift it down from NTP_SHIFT to clocksource->shifted nsecs.
1329 *
1330 * Note we subtract one in the shift, so that error is really error*2.
1331 * This "saves" dividing(shifting) interval twice, but keeps the
1332 * (error > interval) comparison as still measuring if error is
1333 * larger than half an interval.
1334 *
1335 * Note: It does not "save" on aggravation when reading the code.
1336 */
1337 error = tk->ntp_error >> (tk->ntp_error_shift - 1);
1338 if (error > interval) {
1339 /*
1340 * We now divide error by 4(via shift), which checks if
1341 * the error is greater than twice the interval.
1342 * If it is greater, we need a bigadjust, if its smaller,
1343 * we can adjust by 1.
1344 */
1345 error >>= 2;
1346 if (likely(error <= interval))
1347 adj = 1;
1348 else
1349 adj = timekeeping_bigadjust(tk, error, &interval, &offset);
1350 } else {
1351 if (error < -interval) {
1352 /* See comment above, this is just switched for the negative */
1353 error >>= 2;
1354 if (likely(error >= -interval)) {
1355 adj = -1;
1356 interval = -interval;
1357 offset = -offset;
1358 } else {
1359 adj = timekeeping_bigadjust(tk, error, &interval, &offset);
1360 }
1361 } else {
1362 goto out_adjust;
1363 }
1364 } 1278 }
1279 mult_adj <<= adj_scale;
1280 interval <<= adj_scale;
1281 offset <<= adj_scale;
1365 1282
1366 if (unlikely(tk->tkr.clock->maxadj &&
1367 (tk->tkr.mult + adj > tk->tkr.clock->mult + tk->tkr.clock->maxadj))) {
1368 printk_deferred_once(KERN_WARNING
1369 "Adjusting %s more than 11%% (%ld vs %ld)\n",
1370 tk->tkr.clock->name, (long)tk->tkr.mult + adj,
1371 (long)tk->tkr.clock->mult + tk->tkr.clock->maxadj);
1372 }
1373 /* 1283 /*
1374 * So the following can be confusing. 1284 * So the following can be confusing.
1375 * 1285 *
1376 * To keep things simple, lets assume adj == 1 for now. 1286 * To keep things simple, lets assume mult_adj == 1 for now.
1377 * 1287 *
1378 * When adj != 1, remember that the interval and offset values 1288 * When mult_adj != 1, remember that the interval and offset values
1379 * have been appropriately scaled so the math is the same. 1289 * have been appropriately scaled so the math is the same.
1380 * 1290 *
1381 * The basic idea here is that we're increasing the multiplier 1291 * The basic idea here is that we're increasing the multiplier
@@ -1419,12 +1329,76 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
1419 * 1329 *
1420 * XXX - TODO: Doc ntp_error calculation. 1330 * XXX - TODO: Doc ntp_error calculation.
1421 */ 1331 */
1422 tk->tkr.mult += adj; 1332 tk->tkr.mult += mult_adj;
1423 tk->xtime_interval += interval; 1333 tk->xtime_interval += interval;
1424 tk->tkr.xtime_nsec -= offset; 1334 tk->tkr.xtime_nsec -= offset;
1425 tk->ntp_error -= (interval - offset) << tk->ntp_error_shift; 1335 tk->ntp_error -= (interval - offset) << tk->ntp_error_shift;
1336}
1337
1338/*
1339 * Calculate the multiplier adjustment needed to match the frequency
1340 * specified by NTP
1341 */
1342static __always_inline void timekeeping_freqadjust(struct timekeeper *tk,
1343 s64 offset)
1344{
1345 s64 interval = tk->cycle_interval;
1346 s64 xinterval = tk->xtime_interval;
1347 s64 tick_error;
1348 bool negative;
1349 u32 adj;
1350
1351 /* Remove any current error adj from freq calculation */
1352 if (tk->ntp_err_mult)
1353 xinterval -= tk->cycle_interval;
1354
1355 /* Calculate current error per tick */
1356 tick_error = ntp_tick_length() >> tk->ntp_error_shift;
1357 tick_error -= (xinterval + tk->xtime_remainder);
1358
1359 /* Don't worry about correcting it if its small */
1360 if (likely((tick_error >= 0) && (tick_error <= interval)))
1361 return;
1362
1363 /* preserve the direction of correction */
1364 negative = (tick_error < 0);
1365
1366 /* Sort out the magnitude of the correction */
1367 tick_error = abs(tick_error);
1368 for (adj = 0; tick_error > interval; adj++)
1369 tick_error >>= 1;
1370
1371 /* scale the corrections */
1372 timekeeping_apply_adjustment(tk, offset, negative, adj);
1373}
1374
1375/*
1376 * Adjust the timekeeper's multiplier to the correct frequency
1377 * and also to reduce the accumulated error value.
1378 */
1379static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
1380{
1381 /* Correct for the current frequency error */
1382 timekeeping_freqadjust(tk, offset);
1383
1384 /* Next make a small adjustment to fix any cumulative error */
1385 if (!tk->ntp_err_mult && (tk->ntp_error > 0)) {
1386 tk->ntp_err_mult = 1;
1387 timekeeping_apply_adjustment(tk, offset, 0, 0);
1388 } else if (tk->ntp_err_mult && (tk->ntp_error <= 0)) {
1389 /* Undo any existing error adjustment */
1390 timekeeping_apply_adjustment(tk, offset, 1, 0);
1391 tk->ntp_err_mult = 0;
1392 }
1393
1394 if (unlikely(tk->tkr.clock->maxadj &&
1395 (tk->tkr.mult > tk->tkr.clock->mult + tk->tkr.clock->maxadj))) {
1396 printk_once(KERN_WARNING
1397 "Adjusting %s more than 11%% (%ld vs %ld)\n",
1398 tk->tkr.clock->name, (long)tk->tkr.mult,
1399 (long)tk->tkr.clock->mult + tk->tkr.clock->maxadj);
1400 }
1426 1401
1427out_adjust:
1428 /* 1402 /*
1429 * It may be possible that when we entered this function, xtime_nsec 1403 * It may be possible that when we entered this function, xtime_nsec
1430 * was very small. Further, if we're slightly speeding the clocksource 1404 * was very small. Further, if we're slightly speeding the clocksource
@@ -1444,7 +1418,6 @@ out_adjust:
1444 tk->tkr.xtime_nsec = 0; 1418 tk->tkr.xtime_nsec = 0;
1445 tk->ntp_error += neg << tk->ntp_error_shift; 1419 tk->ntp_error += neg << tk->ntp_error_shift;
1446 } 1420 }
1447
1448} 1421}
1449 1422
1450/** 1423/**