diff options
Diffstat (limited to 'kernel/timer.c')
-rw-r--r-- | kernel/timer.c | 154 |
1 files changed, 77 insertions, 77 deletions
diff --git a/kernel/timer.c b/kernel/timer.c index 396a3c024c2c..1d7dd6267c2d 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -84,7 +84,7 @@ typedef struct tvec_t_base_s tvec_base_t; | |||
84 | 84 | ||
85 | tvec_base_t boot_tvec_bases; | 85 | tvec_base_t boot_tvec_bases; |
86 | EXPORT_SYMBOL(boot_tvec_bases); | 86 | EXPORT_SYMBOL(boot_tvec_bases); |
87 | static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = { &boot_tvec_bases }; | 87 | static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases; |
88 | 88 | ||
89 | static inline void set_running_timer(tvec_base_t *base, | 89 | static inline void set_running_timer(tvec_base_t *base, |
90 | struct timer_list *timer) | 90 | struct timer_list *timer) |
@@ -374,6 +374,7 @@ int del_timer_sync(struct timer_list *timer) | |||
374 | int ret = try_to_del_timer_sync(timer); | 374 | int ret = try_to_del_timer_sync(timer); |
375 | if (ret >= 0) | 375 | if (ret >= 0) |
376 | return ret; | 376 | return ret; |
377 | cpu_relax(); | ||
377 | } | 378 | } |
378 | } | 379 | } |
379 | 380 | ||
@@ -407,7 +408,7 @@ static int cascade(tvec_base_t *base, tvec_t *tv, int index) | |||
407 | * This function cascades all vectors and executes all expired timer | 408 | * This function cascades all vectors and executes all expired timer |
408 | * vectors. | 409 | * vectors. |
409 | */ | 410 | */ |
410 | #define INDEX(N) (base->timer_jiffies >> (TVR_BITS + N * TVN_BITS)) & TVN_MASK | 411 | #define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) |
411 | 412 | ||
412 | static inline void __run_timers(tvec_base_t *base) | 413 | static inline void __run_timers(tvec_base_t *base) |
413 | { | 414 | { |
@@ -891,6 +892,7 @@ int do_settimeofday(struct timespec *tv) | |||
891 | set_normalized_timespec(&xtime, sec, nsec); | 892 | set_normalized_timespec(&xtime, sec, nsec); |
892 | set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); | 893 | set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); |
893 | 894 | ||
895 | clock->error = 0; | ||
894 | ntp_clear(); | 896 | ntp_clear(); |
895 | 897 | ||
896 | write_sequnlock_irqrestore(&xtime_lock, flags); | 898 | write_sequnlock_irqrestore(&xtime_lock, flags); |
@@ -967,6 +969,7 @@ void __init timekeeping_init(void) | |||
967 | } | 969 | } |
968 | 970 | ||
969 | 971 | ||
972 | static int timekeeping_suspended; | ||
970 | /* | 973 | /* |
971 | * timekeeping_resume - Resumes the generic timekeeping subsystem. | 974 | * timekeeping_resume - Resumes the generic timekeeping subsystem. |
972 | * @dev: unused | 975 | * @dev: unused |
@@ -982,6 +985,18 @@ static int timekeeping_resume(struct sys_device *dev) | |||
982 | write_seqlock_irqsave(&xtime_lock, flags); | 985 | write_seqlock_irqsave(&xtime_lock, flags); |
983 | /* restart the last cycle value */ | 986 | /* restart the last cycle value */ |
984 | clock->cycle_last = clocksource_read(clock); | 987 | clock->cycle_last = clocksource_read(clock); |
988 | clock->error = 0; | ||
989 | timekeeping_suspended = 0; | ||
990 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
991 | return 0; | ||
992 | } | ||
993 | |||
994 | static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) | ||
995 | { | ||
996 | unsigned long flags; | ||
997 | |||
998 | write_seqlock_irqsave(&xtime_lock, flags); | ||
999 | timekeeping_suspended = 1; | ||
985 | write_sequnlock_irqrestore(&xtime_lock, flags); | 1000 | write_sequnlock_irqrestore(&xtime_lock, flags); |
986 | return 0; | 1001 | return 0; |
987 | } | 1002 | } |
@@ -989,6 +1004,7 @@ static int timekeeping_resume(struct sys_device *dev) | |||
989 | /* sysfs resume/suspend bits for timekeeping */ | 1004 | /* sysfs resume/suspend bits for timekeeping */ |
990 | static struct sysdev_class timekeeping_sysclass = { | 1005 | static struct sysdev_class timekeeping_sysclass = { |
991 | .resume = timekeeping_resume, | 1006 | .resume = timekeeping_resume, |
1007 | .suspend = timekeeping_suspend, | ||
992 | set_kset_name("timekeeping"), | 1008 | set_kset_name("timekeeping"), |
993 | }; | 1009 | }; |
994 | 1010 | ||
@@ -1008,52 +1024,52 @@ static int __init timekeeping_init_device(void) | |||
1008 | device_initcall(timekeeping_init_device); | 1024 | device_initcall(timekeeping_init_device); |
1009 | 1025 | ||
1010 | /* | 1026 | /* |
1011 | * If the error is already larger, we look ahead another tick, | 1027 | * If the error is already larger, we look ahead even further |
1012 | * to compensate for late or lost adjustments. | 1028 | * to compensate for late or lost adjustments. |
1013 | */ | 1029 | */ |
1014 | static __always_inline int clocksource_bigadjust(int sign, s64 error, s64 *interval, s64 *offset) | 1030 | static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, s64 *offset) |
1015 | { | 1031 | { |
1016 | int adj; | 1032 | s64 tick_error, i; |
1033 | u32 look_ahead, adj; | ||
1034 | s32 error2, mult; | ||
1017 | 1035 | ||
1018 | /* | 1036 | /* |
1019 | * As soon as the machine is synchronized to the external time | 1037 | * Use the current error value to determine how much to look ahead. |
1020 | * source this should be the common case. | 1038 | * The larger the error the slower we adjust for it to avoid problems |
1039 | * with losing too many ticks, otherwise we would overadjust and | ||
1040 | * produce an even larger error. The smaller the adjustment the | ||
1041 | * faster we try to adjust for it, as lost ticks can do less harm | ||
1042 | * here. This is tuned so that an error of about 1 msec is adusted | ||
1043 | * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks). | ||
1021 | */ | 1044 | */ |
1022 | error >>= 2; | 1045 | error2 = clock->error >> (TICK_LENGTH_SHIFT + 22 - 2 * SHIFT_HZ); |
1023 | if (likely(sign > 0 ? error <= *interval : error >= *interval)) | 1046 | error2 = abs(error2); |
1024 | return sign; | 1047 | for (look_ahead = 0; error2 > 0; look_ahead++) |
1048 | error2 >>= 2; | ||
1025 | 1049 | ||
1026 | /* | 1050 | /* |
1027 | * An extra look ahead dampens the effect of the current error, | 1051 | * Now calculate the error in (1 << look_ahead) ticks, but first |
1028 | * which can grow quite large with continously late updates, as | 1052 | * remove the single look ahead already included in the error. |
1029 | * it would dominate the adjustment value and can lead to | ||
1030 | * oscillation. | ||
1031 | */ | 1053 | */ |
1032 | error += current_tick_length() >> (TICK_LENGTH_SHIFT - clock->shift + 1); | 1054 | tick_error = current_tick_length() >> (TICK_LENGTH_SHIFT - clock->shift + 1); |
1033 | error -= clock->xtime_interval >> 1; | 1055 | tick_error -= clock->xtime_interval >> 1; |
1034 | 1056 | error = ((error - tick_error) >> look_ahead) + tick_error; | |
1035 | adj = 0; | 1057 | |
1036 | while (1) { | 1058 | /* Finally calculate the adjustment shift value. */ |
1037 | error >>= 1; | 1059 | i = *interval; |
1038 | if (sign > 0 ? error <= *interval : error >= *interval) | 1060 | mult = 1; |
1039 | break; | 1061 | if (error < 0) { |
1040 | adj++; | 1062 | error = -error; |
1063 | *interval = -*interval; | ||
1064 | *offset = -*offset; | ||
1065 | mult = -1; | ||
1041 | } | 1066 | } |
1042 | 1067 | for (adj = 0; error > i; adj++) | |
1043 | /* | 1068 | error >>= 1; |
1044 | * Add the current adjustments to the error and take the offset | ||
1045 | * into account, the latter can cause the error to be hardly | ||
1046 | * reduced at the next tick. Check the error again if there's | ||
1047 | * room for another adjustment, thus further reducing the error | ||
1048 | * which otherwise had to be corrected at the next update. | ||
1049 | */ | ||
1050 | error = (error << 1) - *interval + *offset; | ||
1051 | if (sign > 0 ? error > *interval : error < *interval) | ||
1052 | adj++; | ||
1053 | 1069 | ||
1054 | *interval <<= adj; | 1070 | *interval <<= adj; |
1055 | *offset <<= adj; | 1071 | *offset <<= adj; |
1056 | return sign << adj; | 1072 | return mult << adj; |
1057 | } | 1073 | } |
1058 | 1074 | ||
1059 | /* | 1075 | /* |
@@ -1068,11 +1084,19 @@ static void clocksource_adjust(struct clocksource *clock, s64 offset) | |||
1068 | 1084 | ||
1069 | error = clock->error >> (TICK_LENGTH_SHIFT - clock->shift - 1); | 1085 | error = clock->error >> (TICK_LENGTH_SHIFT - clock->shift - 1); |
1070 | if (error > interval) { | 1086 | if (error > interval) { |
1071 | adj = clocksource_bigadjust(1, error, &interval, &offset); | 1087 | error >>= 2; |
1088 | if (likely(error <= interval)) | ||
1089 | adj = 1; | ||
1090 | else | ||
1091 | adj = clocksource_bigadjust(error, &interval, &offset); | ||
1072 | } else if (error < -interval) { | 1092 | } else if (error < -interval) { |
1073 | interval = -interval; | 1093 | error >>= 2; |
1074 | offset = -offset; | 1094 | if (likely(error >= -interval)) { |
1075 | adj = clocksource_bigadjust(-1, error, &interval, &offset); | 1095 | adj = -1; |
1096 | interval = -interval; | ||
1097 | offset = -offset; | ||
1098 | } else | ||
1099 | adj = clocksource_bigadjust(error, &interval, &offset); | ||
1076 | } else | 1100 | } else |
1077 | return; | 1101 | return; |
1078 | 1102 | ||
@@ -1091,13 +1115,16 @@ static void update_wall_time(void) | |||
1091 | { | 1115 | { |
1092 | cycle_t offset; | 1116 | cycle_t offset; |
1093 | 1117 | ||
1094 | clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift; | 1118 | /* Make sure we're fully resumed: */ |
1119 | if (unlikely(timekeeping_suspended)) | ||
1120 | return; | ||
1095 | 1121 | ||
1096 | #ifdef CONFIG_GENERIC_TIME | 1122 | #ifdef CONFIG_GENERIC_TIME |
1097 | offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask; | 1123 | offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask; |
1098 | #else | 1124 | #else |
1099 | offset = clock->cycle_interval; | 1125 | offset = clock->cycle_interval; |
1100 | #endif | 1126 | #endif |
1127 | clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift; | ||
1101 | 1128 | ||
1102 | /* normally this loop will run just once, however in the | 1129 | /* normally this loop will run just once, however in the |
1103 | * case of lost or late ticks, it will accumulate correctly. | 1130 | * case of lost or late ticks, it will accumulate correctly. |
@@ -1129,7 +1156,7 @@ static void update_wall_time(void) | |||
1129 | clocksource_adjust(clock, offset); | 1156 | clocksource_adjust(clock, offset); |
1130 | 1157 | ||
1131 | /* store full nanoseconds into xtime */ | 1158 | /* store full nanoseconds into xtime */ |
1132 | xtime.tv_nsec = clock->xtime_nsec >> clock->shift; | 1159 | xtime.tv_nsec = (s64)clock->xtime_nsec >> clock->shift; |
1133 | clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; | 1160 | clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; |
1134 | 1161 | ||
1135 | /* check to see if there is a new clocksource to use */ | 1162 | /* check to see if there is a new clocksource to use */ |
@@ -1297,46 +1324,19 @@ asmlinkage long sys_getpid(void) | |||
1297 | } | 1324 | } |
1298 | 1325 | ||
1299 | /* | 1326 | /* |
1300 | * Accessing ->group_leader->real_parent is not SMP-safe, it could | 1327 | * Accessing ->real_parent is not SMP-safe, it could |
1301 | * change from under us. However, rather than getting any lock | 1328 | * change from under us. However, we can use a stale |
1302 | * we can use an optimistic algorithm: get the parent | 1329 | * value of ->real_parent under rcu_read_lock(), see |
1303 | * pid, and go back and check that the parent is still | 1330 | * release_task()->call_rcu(delayed_put_task_struct). |
1304 | * the same. If it has changed (which is extremely unlikely | ||
1305 | * indeed), we just try again.. | ||
1306 | * | ||
1307 | * NOTE! This depends on the fact that even if we _do_ | ||
1308 | * get an old value of "parent", we can happily dereference | ||
1309 | * the pointer (it was and remains a dereferencable kernel pointer | ||
1310 | * no matter what): we just can't necessarily trust the result | ||
1311 | * until we know that the parent pointer is valid. | ||
1312 | * | ||
1313 | * NOTE2: ->group_leader never changes from under us. | ||
1314 | */ | 1331 | */ |
1315 | asmlinkage long sys_getppid(void) | 1332 | asmlinkage long sys_getppid(void) |
1316 | { | 1333 | { |
1317 | int pid; | 1334 | int pid; |
1318 | struct task_struct *me = current; | ||
1319 | struct task_struct *parent; | ||
1320 | 1335 | ||
1321 | parent = me->group_leader->real_parent; | 1336 | rcu_read_lock(); |
1322 | for (;;) { | 1337 | pid = rcu_dereference(current->real_parent)->tgid; |
1323 | pid = parent->tgid; | 1338 | rcu_read_unlock(); |
1324 | #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) | ||
1325 | { | ||
1326 | struct task_struct *old = parent; | ||
1327 | 1339 | ||
1328 | /* | ||
1329 | * Make sure we read the pid before re-reading the | ||
1330 | * parent pointer: | ||
1331 | */ | ||
1332 | smp_rmb(); | ||
1333 | parent = me->group_leader->real_parent; | ||
1334 | if (old != parent) | ||
1335 | continue; | ||
1336 | } | ||
1337 | #endif | ||
1338 | break; | ||
1339 | } | ||
1340 | return pid; | 1340 | return pid; |
1341 | } | 1341 | } |
1342 | 1342 | ||
@@ -1661,7 +1661,7 @@ static void __devinit migrate_timers(int cpu) | |||
1661 | } | 1661 | } |
1662 | #endif /* CONFIG_HOTPLUG_CPU */ | 1662 | #endif /* CONFIG_HOTPLUG_CPU */ |
1663 | 1663 | ||
1664 | static int __devinit timer_cpu_notify(struct notifier_block *self, | 1664 | static int __cpuinit timer_cpu_notify(struct notifier_block *self, |
1665 | unsigned long action, void *hcpu) | 1665 | unsigned long action, void *hcpu) |
1666 | { | 1666 | { |
1667 | long cpu = (long)hcpu; | 1667 | long cpu = (long)hcpu; |
@@ -1681,7 +1681,7 @@ static int __devinit timer_cpu_notify(struct notifier_block *self, | |||
1681 | return NOTIFY_OK; | 1681 | return NOTIFY_OK; |
1682 | } | 1682 | } |
1683 | 1683 | ||
1684 | static struct notifier_block __devinitdata timers_nb = { | 1684 | static struct notifier_block __cpuinitdata timers_nb = { |
1685 | .notifier_call = timer_cpu_notify, | 1685 | .notifier_call = timer_cpu_notify, |
1686 | }; | 1686 | }; |
1687 | 1687 | ||