diff options
| author | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-17 16:15:55 -0500 |
|---|---|---|
| committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-17 16:15:55 -0500 |
| commit | 8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch) | |
| tree | a8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /kernel/time | |
| parent | 406089d01562f1e2bf9f089fd7637009ebaad589 (diff) | |
Patched in Tegra support.
Diffstat (limited to 'kernel/time')
| -rw-r--r-- | kernel/time/Kconfig | 64 | ||||
| -rw-r--r-- | kernel/time/Makefile | 4 | ||||
| -rw-r--r-- | kernel/time/alarmtimer.c | 342 | ||||
| -rw-r--r-- | kernel/time/clockevents.c | 157 | ||||
| -rw-r--r-- | kernel/time/clocksource.c | 89 | ||||
| -rw-r--r-- | kernel/time/jiffies.c | 40 | ||||
| -rw-r--r-- | kernel/time/ntp.c | 201 | ||||
| -rw-r--r-- | kernel/time/posix-clock.c | 1 | ||||
| -rw-r--r-- | kernel/time/tick-broadcast.c | 13 | ||||
| -rw-r--r-- | kernel/time/tick-common.c | 12 | ||||
| -rw-r--r-- | kernel/time/tick-internal.h | 3 | ||||
| -rw-r--r-- | kernel/time/tick-oneshot.c | 77 | ||||
| -rw-r--r-- | kernel/time/tick-sched.c | 519 | ||||
| -rw-r--r-- | kernel/time/timekeeping.c | 963 | ||||
| -rw-r--r-- | kernel/time/timer_list.c | 4 | ||||
| -rw-r--r-- | kernel/time/timer_stats.c | 6 |
16 files changed, 961 insertions, 1534 deletions
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 8601f0db126..f06a8a36564 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig | |||
| @@ -1,63 +1,6 @@ | |||
| 1 | # | 1 | # |
| 2 | # Timer subsystem related configuration options | 2 | # Timer subsystem related configuration options |
| 3 | # | 3 | # |
| 4 | |||
| 5 | # Options selectable by arch Kconfig | ||
| 6 | |||
| 7 | # Watchdog function for clocksources to detect instabilities | ||
| 8 | config CLOCKSOURCE_WATCHDOG | ||
| 9 | bool | ||
| 10 | |||
| 11 | # Architecture has extra clocksource data | ||
| 12 | config ARCH_CLOCKSOURCE_DATA | ||
| 13 | bool | ||
| 14 | |||
| 15 | # Timekeeping vsyscall support | ||
| 16 | config GENERIC_TIME_VSYSCALL | ||
| 17 | bool | ||
| 18 | |||
| 19 | # Timekeeping vsyscall support | ||
| 20 | config GENERIC_TIME_VSYSCALL_OLD | ||
| 21 | bool | ||
| 22 | |||
| 23 | # ktime_t scalar 64bit nsec representation | ||
| 24 | config KTIME_SCALAR | ||
| 25 | bool | ||
| 26 | |||
| 27 | # Old style timekeeping | ||
| 28 | config ARCH_USES_GETTIMEOFFSET | ||
| 29 | bool | ||
| 30 | |||
| 31 | # The generic clock events infrastructure | ||
| 32 | config GENERIC_CLOCKEVENTS | ||
| 33 | bool | ||
| 34 | |||
| 35 | # Migration helper. Builds, but does not invoke | ||
| 36 | config GENERIC_CLOCKEVENTS_BUILD | ||
| 37 | bool | ||
| 38 | default y | ||
| 39 | depends on GENERIC_CLOCKEVENTS | ||
| 40 | |||
| 41 | # Clockevents broadcasting infrastructure | ||
| 42 | config GENERIC_CLOCKEVENTS_BROADCAST | ||
| 43 | bool | ||
| 44 | depends on GENERIC_CLOCKEVENTS | ||
| 45 | |||
| 46 | # Automatically adjust the min. reprogramming time for | ||
| 47 | # clock event device | ||
| 48 | config GENERIC_CLOCKEVENTS_MIN_ADJUST | ||
| 49 | bool | ||
| 50 | |||
| 51 | # Generic update of CMOS clock | ||
| 52 | config GENERIC_CMOS_UPDATE | ||
| 53 | bool | ||
| 54 | |||
| 55 | if GENERIC_CLOCKEVENTS | ||
| 56 | menu "Timers subsystem" | ||
| 57 | |||
| 58 | # Core internal switch. Selected by NO_HZ / HIGH_RES_TIMERS. This is | ||
| 59 | # only related to the tick functionality. Oneshot clockevent devices | ||
| 60 | # are supported independ of this. | ||
| 61 | config TICK_ONESHOT | 4 | config TICK_ONESHOT |
| 62 | bool | 5 | bool |
| 63 | 6 | ||
| @@ -79,5 +22,8 @@ config HIGH_RES_TIMERS | |||
| 79 | hardware is not capable then this option only increases | 22 | hardware is not capable then this option only increases |
| 80 | the size of the kernel image. | 23 | the size of the kernel image. |
| 81 | 24 | ||
| 82 | endmenu | 25 | config GENERIC_CLOCKEVENTS_BUILD |
| 83 | endif | 26 | bool |
| 27 | default y | ||
| 28 | depends on GENERIC_CLOCKEVENTS || GENERIC_CLOCKEVENTS_MIGR | ||
| 29 | |||
diff --git a/kernel/time/Makefile b/kernel/time/Makefile index ff7d9d2ab50..cae2ad7491b 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o | 1 | obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o |
| 2 | obj-y += timeconv.o posix-clock.o alarmtimer.o | 2 | obj-y += timeconv.o posix-clock.o #alarmtimer.o |
| 3 | 3 | ||
| 4 | obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o | 4 | obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o |
| 5 | obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o | 5 | obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o |
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index f11d83b1294..8b70c76910a 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c | |||
| @@ -37,6 +37,7 @@ | |||
| 37 | static struct alarm_base { | 37 | static struct alarm_base { |
| 38 | spinlock_t lock; | 38 | spinlock_t lock; |
| 39 | struct timerqueue_head timerqueue; | 39 | struct timerqueue_head timerqueue; |
| 40 | struct hrtimer timer; | ||
| 40 | ktime_t (*gettime)(void); | 41 | ktime_t (*gettime)(void); |
| 41 | clockid_t base_clockid; | 42 | clockid_t base_clockid; |
| 42 | } alarm_bases[ALARM_NUMTYPE]; | 43 | } alarm_bases[ALARM_NUMTYPE]; |
| @@ -45,8 +46,6 @@ static struct alarm_base { | |||
| 45 | static ktime_t freezer_delta; | 46 | static ktime_t freezer_delta; |
| 46 | static DEFINE_SPINLOCK(freezer_delta_lock); | 47 | static DEFINE_SPINLOCK(freezer_delta_lock); |
| 47 | 48 | ||
| 48 | static struct wakeup_source *ws; | ||
| 49 | |||
| 50 | #ifdef CONFIG_RTC_CLASS | 49 | #ifdef CONFIG_RTC_CLASS |
| 51 | /* rtc timer and device for setting alarm wakeups at suspend */ | 50 | /* rtc timer and device for setting alarm wakeups at suspend */ |
| 52 | static struct rtc_timer rtctimer; | 51 | static struct rtc_timer rtctimer; |
| @@ -54,112 +53,108 @@ static struct rtc_device *rtcdev; | |||
| 54 | static DEFINE_SPINLOCK(rtcdev_lock); | 53 | static DEFINE_SPINLOCK(rtcdev_lock); |
| 55 | 54 | ||
| 56 | /** | 55 | /** |
| 57 | * alarmtimer_get_rtcdev - Return selected rtcdevice | 56 | * has_wakealarm - check rtc device has wakealarm ability |
| 57 | * @dev: current device | ||
| 58 | * @name_ptr: name to be returned | ||
| 58 | * | 59 | * |
| 59 | * This function returns the rtc device to use for wakealarms. | 60 | * This helper function checks to see if the rtc device can wake |
| 60 | * If one has not already been chosen, it checks to see if a | 61 | * from suspend. |
| 61 | * functional rtc device is available. | ||
| 62 | */ | 62 | */ |
| 63 | struct rtc_device *alarmtimer_get_rtcdev(void) | 63 | static int has_wakealarm(struct device *dev, void *name_ptr) |
| 64 | { | 64 | { |
| 65 | unsigned long flags; | 65 | struct rtc_device *candidate = to_rtc_device(dev); |
| 66 | struct rtc_device *ret; | ||
| 67 | 66 | ||
| 68 | spin_lock_irqsave(&rtcdev_lock, flags); | 67 | if (!candidate->ops->set_alarm) |
| 69 | ret = rtcdev; | 68 | return 0; |
| 70 | spin_unlock_irqrestore(&rtcdev_lock, flags); | 69 | if (!device_may_wakeup(candidate->dev.parent)) |
| 70 | return 0; | ||
| 71 | 71 | ||
| 72 | return ret; | 72 | *(const char **)name_ptr = dev_name(dev); |
| 73 | return 1; | ||
| 73 | } | 74 | } |
| 74 | 75 | ||
| 75 | 76 | /** | |
| 76 | static int alarmtimer_rtc_add_device(struct device *dev, | 77 | * alarmtimer_get_rtcdev - Return selected rtcdevice |
| 77 | struct class_interface *class_intf) | 78 | * |
| 79 | * This function returns the rtc device to use for wakealarms. | ||
| 80 | * If one has not already been chosen, it checks to see if a | ||
| 81 | * functional rtc device is available. | ||
| 82 | */ | ||
| 83 | static struct rtc_device *alarmtimer_get_rtcdev(void) | ||
| 78 | { | 84 | { |
| 85 | struct device *dev; | ||
| 86 | char *str; | ||
| 79 | unsigned long flags; | 87 | unsigned long flags; |
| 80 | struct rtc_device *rtc = to_rtc_device(dev); | 88 | struct rtc_device *ret; |
| 81 | |||
| 82 | if (rtcdev) | ||
| 83 | return -EBUSY; | ||
| 84 | |||
| 85 | if (!rtc->ops->set_alarm) | ||
| 86 | return -1; | ||
| 87 | if (!device_may_wakeup(rtc->dev.parent)) | ||
| 88 | return -1; | ||
| 89 | 89 | ||
| 90 | spin_lock_irqsave(&rtcdev_lock, flags); | 90 | spin_lock_irqsave(&rtcdev_lock, flags); |
| 91 | if (!rtcdev) { | 91 | if (!rtcdev) { |
| 92 | rtcdev = rtc; | 92 | /* Find an rtc device and init the rtc_timer */ |
| 93 | /* hold a reference so it doesn't go away */ | 93 | dev = class_find_device(rtc_class, NULL, &str, has_wakealarm); |
| 94 | get_device(dev); | 94 | /* If we have a device then str is valid. See has_wakealarm() */ |
| 95 | if (dev) { | ||
| 96 | rtcdev = rtc_class_open(str); | ||
| 97 | /* | ||
| 98 | * Drop the reference we got in class_find_device, | ||
| 99 | * rtc_open takes its own. | ||
| 100 | */ | ||
| 101 | put_device(dev); | ||
| 102 | rtc_timer_init(&rtctimer, NULL, NULL); | ||
| 103 | } | ||
| 95 | } | 104 | } |
| 105 | ret = rtcdev; | ||
| 96 | spin_unlock_irqrestore(&rtcdev_lock, flags); | 106 | spin_unlock_irqrestore(&rtcdev_lock, flags); |
| 97 | return 0; | ||
| 98 | } | ||
| 99 | |||
| 100 | static inline void alarmtimer_rtc_timer_init(void) | ||
| 101 | { | ||
| 102 | rtc_timer_init(&rtctimer, NULL, NULL); | ||
| 103 | } | ||
| 104 | |||
| 105 | static struct class_interface alarmtimer_rtc_interface = { | ||
| 106 | .add_dev = &alarmtimer_rtc_add_device, | ||
| 107 | }; | ||
| 108 | 107 | ||
| 109 | static int alarmtimer_rtc_interface_setup(void) | 108 | return ret; |
| 110 | { | ||
| 111 | alarmtimer_rtc_interface.class = rtc_class; | ||
| 112 | return class_interface_register(&alarmtimer_rtc_interface); | ||
| 113 | } | ||
| 114 | static void alarmtimer_rtc_interface_remove(void) | ||
| 115 | { | ||
| 116 | class_interface_unregister(&alarmtimer_rtc_interface); | ||
| 117 | } | 109 | } |
| 118 | #else | 110 | #else |
| 119 | struct rtc_device *alarmtimer_get_rtcdev(void) | 111 | #define alarmtimer_get_rtcdev() (0) |
| 120 | { | 112 | #define rtcdev (0) |
| 121 | return NULL; | ||
| 122 | } | ||
| 123 | #define rtcdev (NULL) | ||
| 124 | static inline int alarmtimer_rtc_interface_setup(void) { return 0; } | ||
| 125 | static inline void alarmtimer_rtc_interface_remove(void) { } | ||
| 126 | static inline void alarmtimer_rtc_timer_init(void) { } | ||
| 127 | #endif | 113 | #endif |
| 128 | 114 | ||
| 115 | |||
| 129 | /** | 116 | /** |
| 130 | * alarmtimer_enqueue - Adds an alarm timer to an alarm_base timerqueue | 117 | * alarmtimer_enqueue - Adds an alarm timer to an alarm_base timerqueue |
| 131 | * @base: pointer to the base where the timer is being run | 118 | * @base: pointer to the base where the timer is being run |
| 132 | * @alarm: pointer to alarm being enqueued. | 119 | * @alarm: pointer to alarm being enqueued. |
| 133 | * | 120 | * |
| 134 | * Adds alarm to a alarm_base timerqueue | 121 | * Adds alarm to a alarm_base timerqueue and if necessary sets |
| 122 | * an hrtimer to run. | ||
| 135 | * | 123 | * |
| 136 | * Must hold base->lock when calling. | 124 | * Must hold base->lock when calling. |
| 137 | */ | 125 | */ |
| 138 | static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm) | 126 | static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm) |
| 139 | { | 127 | { |
| 140 | if (alarm->state & ALARMTIMER_STATE_ENQUEUED) | ||
| 141 | timerqueue_del(&base->timerqueue, &alarm->node); | ||
| 142 | |||
| 143 | timerqueue_add(&base->timerqueue, &alarm->node); | 128 | timerqueue_add(&base->timerqueue, &alarm->node); |
| 144 | alarm->state |= ALARMTIMER_STATE_ENQUEUED; | 129 | if (&alarm->node == timerqueue_getnext(&base->timerqueue)) { |
| 130 | hrtimer_try_to_cancel(&base->timer); | ||
| 131 | hrtimer_start(&base->timer, alarm->node.expires, | ||
| 132 | HRTIMER_MODE_ABS); | ||
| 133 | } | ||
| 145 | } | 134 | } |
| 146 | 135 | ||
| 147 | /** | 136 | /** |
| 148 | * alarmtimer_dequeue - Removes an alarm timer from an alarm_base timerqueue | 137 | * alarmtimer_remove - Removes an alarm timer from an alarm_base timerqueue |
| 149 | * @base: pointer to the base where the timer is running | 138 | * @base: pointer to the base where the timer is running |
| 150 | * @alarm: pointer to alarm being removed | 139 | * @alarm: pointer to alarm being removed |
| 151 | * | 140 | * |
| 152 | * Removes alarm to a alarm_base timerqueue | 141 | * Removes alarm to a alarm_base timerqueue and if necessary sets |
| 142 | * a new timer to run. | ||
| 153 | * | 143 | * |
| 154 | * Must hold base->lock when calling. | 144 | * Must hold base->lock when calling. |
| 155 | */ | 145 | */ |
| 156 | static void alarmtimer_dequeue(struct alarm_base *base, struct alarm *alarm) | 146 | static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm) |
| 157 | { | 147 | { |
| 158 | if (!(alarm->state & ALARMTIMER_STATE_ENQUEUED)) | 148 | struct timerqueue_node *next = timerqueue_getnext(&base->timerqueue); |
| 159 | return; | ||
| 160 | 149 | ||
| 161 | timerqueue_del(&base->timerqueue, &alarm->node); | 150 | timerqueue_del(&base->timerqueue, &alarm->node); |
| 162 | alarm->state &= ~ALARMTIMER_STATE_ENQUEUED; | 151 | if (next == &alarm->node) { |
| 152 | hrtimer_try_to_cancel(&base->timer); | ||
| 153 | next = timerqueue_getnext(&base->timerqueue); | ||
| 154 | if (!next) | ||
| 155 | return; | ||
| 156 | hrtimer_start(&base->timer, next->expires, HRTIMER_MODE_ABS); | ||
| 157 | } | ||
| 163 | } | 158 | } |
| 164 | 159 | ||
| 165 | 160 | ||
| @@ -174,23 +169,39 @@ static void alarmtimer_dequeue(struct alarm_base *base, struct alarm *alarm) | |||
| 174 | */ | 169 | */ |
| 175 | static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) | 170 | static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) |
| 176 | { | 171 | { |
| 177 | struct alarm *alarm = container_of(timer, struct alarm, timer); | 172 | struct alarm_base *base = container_of(timer, struct alarm_base, timer); |
| 178 | struct alarm_base *base = &alarm_bases[alarm->type]; | 173 | struct timerqueue_node *next; |
| 179 | unsigned long flags; | 174 | unsigned long flags; |
| 175 | ktime_t now; | ||
| 180 | int ret = HRTIMER_NORESTART; | 176 | int ret = HRTIMER_NORESTART; |
| 181 | int restart = ALARMTIMER_NORESTART; | ||
| 182 | 177 | ||
| 183 | spin_lock_irqsave(&base->lock, flags); | 178 | spin_lock_irqsave(&base->lock, flags); |
| 184 | alarmtimer_dequeue(base, alarm); | 179 | now = base->gettime(); |
| 185 | spin_unlock_irqrestore(&base->lock, flags); | 180 | while ((next = timerqueue_getnext(&base->timerqueue))) { |
| 181 | struct alarm *alarm; | ||
| 182 | ktime_t expired = next->expires; | ||
| 186 | 183 | ||
| 187 | if (alarm->function) | 184 | if (expired.tv64 > now.tv64) |
| 188 | restart = alarm->function(alarm, base->gettime()); | 185 | break; |
| 189 | 186 | ||
| 190 | spin_lock_irqsave(&base->lock, flags); | 187 | alarm = container_of(next, struct alarm, node); |
| 191 | if (restart != ALARMTIMER_NORESTART) { | 188 | |
| 192 | hrtimer_set_expires(&alarm->timer, alarm->node.expires); | 189 | timerqueue_del(&base->timerqueue, &alarm->node); |
| 193 | alarmtimer_enqueue(base, alarm); | 190 | alarm->enabled = 0; |
| 191 | /* Re-add periodic timers */ | ||
| 192 | if (alarm->period.tv64) { | ||
| 193 | alarm->node.expires = ktime_add(expired, alarm->period); | ||
| 194 | timerqueue_add(&base->timerqueue, &alarm->node); | ||
| 195 | alarm->enabled = 1; | ||
| 196 | } | ||
| 197 | spin_unlock_irqrestore(&base->lock, flags); | ||
| 198 | if (alarm->function) | ||
| 199 | alarm->function(alarm); | ||
| 200 | spin_lock_irqsave(&base->lock, flags); | ||
| 201 | } | ||
| 202 | |||
| 203 | if (next) { | ||
| 204 | hrtimer_set_expires(&base->timer, next->expires); | ||
| 194 | ret = HRTIMER_RESTART; | 205 | ret = HRTIMER_RESTART; |
| 195 | } | 206 | } |
| 196 | spin_unlock_irqrestore(&base->lock, flags); | 207 | spin_unlock_irqrestore(&base->lock, flags); |
| @@ -217,14 +228,13 @@ static int alarmtimer_suspend(struct device *dev) | |||
| 217 | unsigned long flags; | 228 | unsigned long flags; |
| 218 | struct rtc_device *rtc; | 229 | struct rtc_device *rtc; |
| 219 | int i; | 230 | int i; |
| 220 | int ret; | ||
| 221 | 231 | ||
| 222 | spin_lock_irqsave(&freezer_delta_lock, flags); | 232 | spin_lock_irqsave(&freezer_delta_lock, flags); |
| 223 | min = freezer_delta; | 233 | min = freezer_delta; |
| 224 | freezer_delta = ktime_set(0, 0); | 234 | freezer_delta = ktime_set(0, 0); |
| 225 | spin_unlock_irqrestore(&freezer_delta_lock, flags); | 235 | spin_unlock_irqrestore(&freezer_delta_lock, flags); |
| 226 | 236 | ||
| 227 | rtc = alarmtimer_get_rtcdev(); | 237 | rtc = rtcdev; |
| 228 | /* If we have no rtcdev, just return */ | 238 | /* If we have no rtcdev, just return */ |
| 229 | if (!rtc) | 239 | if (!rtc) |
| 230 | return 0; | 240 | return 0; |
| @@ -247,10 +257,8 @@ static int alarmtimer_suspend(struct device *dev) | |||
| 247 | if (min.tv64 == 0) | 257 | if (min.tv64 == 0) |
| 248 | return 0; | 258 | return 0; |
| 249 | 259 | ||
| 250 | if (ktime_to_ns(min) < 2 * NSEC_PER_SEC) { | 260 | /* XXX - Should we enforce a minimum sleep time? */ |
| 251 | __pm_wakeup_event(ws, 2 * MSEC_PER_SEC); | 261 | WARN_ON(min.tv64 < NSEC_PER_SEC); |
| 252 | return -EBUSY; | ||
| 253 | } | ||
| 254 | 262 | ||
| 255 | /* Setup an rtc timer to fire that far in the future */ | 263 | /* Setup an rtc timer to fire that far in the future */ |
| 256 | rtc_timer_cancel(rtc, &rtctimer); | 264 | rtc_timer_cancel(rtc, &rtctimer); |
| @@ -258,11 +266,9 @@ static int alarmtimer_suspend(struct device *dev) | |||
| 258 | now = rtc_tm_to_ktime(tm); | 266 | now = rtc_tm_to_ktime(tm); |
| 259 | now = ktime_add(now, min); | 267 | now = ktime_add(now, min); |
| 260 | 268 | ||
| 261 | /* Set alarm, if in the past reject suspend briefly to handle */ | 269 | rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0)); |
| 262 | ret = rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0)); | 270 | |
| 263 | if (ret < 0) | 271 | return 0; |
| 264 | __pm_wakeup_event(ws, MSEC_PER_SEC); | ||
| 265 | return ret; | ||
| 266 | } | 272 | } |
| 267 | #else | 273 | #else |
| 268 | static int alarmtimer_suspend(struct device *dev) | 274 | static int alarmtimer_suspend(struct device *dev) |
| @@ -293,110 +299,53 @@ static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type) | |||
| 293 | * @function: callback that is run when the alarm fires | 299 | * @function: callback that is run when the alarm fires |
| 294 | */ | 300 | */ |
| 295 | void alarm_init(struct alarm *alarm, enum alarmtimer_type type, | 301 | void alarm_init(struct alarm *alarm, enum alarmtimer_type type, |
| 296 | enum alarmtimer_restart (*function)(struct alarm *, ktime_t)) | 302 | void (*function)(struct alarm *)) |
| 297 | { | 303 | { |
| 298 | timerqueue_init(&alarm->node); | 304 | timerqueue_init(&alarm->node); |
| 299 | hrtimer_init(&alarm->timer, alarm_bases[type].base_clockid, | 305 | alarm->period = ktime_set(0, 0); |
| 300 | HRTIMER_MODE_ABS); | ||
| 301 | alarm->timer.function = alarmtimer_fired; | ||
| 302 | alarm->function = function; | 306 | alarm->function = function; |
| 303 | alarm->type = type; | 307 | alarm->type = type; |
| 304 | alarm->state = ALARMTIMER_STATE_INACTIVE; | 308 | alarm->enabled = 0; |
| 305 | } | 309 | } |
| 306 | 310 | ||
| 307 | /** | 311 | /** |
| 308 | * alarm_start - Sets an alarm to fire | 312 | * alarm_start - Sets an alarm to fire |
| 309 | * @alarm: ptr to alarm to set | 313 | * @alarm: ptr to alarm to set |
| 310 | * @start: time to run the alarm | 314 | * @start: time to run the alarm |
| 315 | * @period: period at which the alarm will recur | ||
| 311 | */ | 316 | */ |
| 312 | int alarm_start(struct alarm *alarm, ktime_t start) | 317 | void alarm_start(struct alarm *alarm, ktime_t start, ktime_t period) |
| 313 | { | 318 | { |
| 314 | struct alarm_base *base = &alarm_bases[alarm->type]; | 319 | struct alarm_base *base = &alarm_bases[alarm->type]; |
| 315 | unsigned long flags; | 320 | unsigned long flags; |
| 316 | int ret; | ||
| 317 | 321 | ||
| 318 | spin_lock_irqsave(&base->lock, flags); | 322 | spin_lock_irqsave(&base->lock, flags); |
| 323 | if (alarm->enabled) | ||
| 324 | alarmtimer_remove(base, alarm); | ||
| 319 | alarm->node.expires = start; | 325 | alarm->node.expires = start; |
| 326 | alarm->period = period; | ||
| 320 | alarmtimer_enqueue(base, alarm); | 327 | alarmtimer_enqueue(base, alarm); |
| 321 | ret = hrtimer_start(&alarm->timer, alarm->node.expires, | 328 | alarm->enabled = 1; |
| 322 | HRTIMER_MODE_ABS); | ||
| 323 | spin_unlock_irqrestore(&base->lock, flags); | 329 | spin_unlock_irqrestore(&base->lock, flags); |
| 324 | return ret; | ||
| 325 | } | 330 | } |
| 326 | 331 | ||
| 327 | /** | 332 | /** |
| 328 | * alarm_try_to_cancel - Tries to cancel an alarm timer | 333 | * alarm_cancel - Tries to cancel an alarm timer |
| 329 | * @alarm: ptr to alarm to be canceled | 334 | * @alarm: ptr to alarm to be canceled |
| 330 | * | ||
| 331 | * Returns 1 if the timer was canceled, 0 if it was not running, | ||
| 332 | * and -1 if the callback was running | ||
| 333 | */ | 335 | */ |
| 334 | int alarm_try_to_cancel(struct alarm *alarm) | 336 | void alarm_cancel(struct alarm *alarm) |
| 335 | { | 337 | { |
| 336 | struct alarm_base *base = &alarm_bases[alarm->type]; | 338 | struct alarm_base *base = &alarm_bases[alarm->type]; |
| 337 | unsigned long flags; | 339 | unsigned long flags; |
| 338 | int ret; | ||
| 339 | 340 | ||
| 340 | spin_lock_irqsave(&base->lock, flags); | 341 | spin_lock_irqsave(&base->lock, flags); |
| 341 | ret = hrtimer_try_to_cancel(&alarm->timer); | 342 | if (alarm->enabled) |
| 342 | if (ret >= 0) | 343 | alarmtimer_remove(base, alarm); |
| 343 | alarmtimer_dequeue(base, alarm); | 344 | alarm->enabled = 0; |
| 344 | spin_unlock_irqrestore(&base->lock, flags); | 345 | spin_unlock_irqrestore(&base->lock, flags); |
| 345 | return ret; | ||
| 346 | } | ||
| 347 | |||
| 348 | |||
| 349 | /** | ||
| 350 | * alarm_cancel - Spins trying to cancel an alarm timer until it is done | ||
| 351 | * @alarm: ptr to alarm to be canceled | ||
| 352 | * | ||
| 353 | * Returns 1 if the timer was canceled, 0 if it was not active. | ||
| 354 | */ | ||
| 355 | int alarm_cancel(struct alarm *alarm) | ||
| 356 | { | ||
| 357 | for (;;) { | ||
| 358 | int ret = alarm_try_to_cancel(alarm); | ||
| 359 | if (ret >= 0) | ||
| 360 | return ret; | ||
| 361 | cpu_relax(); | ||
| 362 | } | ||
| 363 | } | ||
| 364 | |||
| 365 | |||
| 366 | u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval) | ||
| 367 | { | ||
| 368 | u64 overrun = 1; | ||
| 369 | ktime_t delta; | ||
| 370 | |||
| 371 | delta = ktime_sub(now, alarm->node.expires); | ||
| 372 | |||
| 373 | if (delta.tv64 < 0) | ||
| 374 | return 0; | ||
| 375 | |||
| 376 | if (unlikely(delta.tv64 >= interval.tv64)) { | ||
| 377 | s64 incr = ktime_to_ns(interval); | ||
| 378 | |||
| 379 | overrun = ktime_divns(delta, incr); | ||
| 380 | |||
| 381 | alarm->node.expires = ktime_add_ns(alarm->node.expires, | ||
| 382 | incr*overrun); | ||
| 383 | |||
| 384 | if (alarm->node.expires.tv64 > now.tv64) | ||
| 385 | return overrun; | ||
| 386 | /* | ||
| 387 | * This (and the ktime_add() below) is the | ||
| 388 | * correction for exact: | ||
| 389 | */ | ||
| 390 | overrun++; | ||
| 391 | } | ||
| 392 | |||
| 393 | alarm->node.expires = ktime_add(alarm->node.expires, interval); | ||
| 394 | return overrun; | ||
| 395 | } | 346 | } |
| 396 | 347 | ||
| 397 | 348 | ||
| 398 | |||
| 399 | |||
| 400 | /** | 349 | /** |
| 401 | * clock2alarm - helper that converts from clockid to alarmtypes | 350 | * clock2alarm - helper that converts from clockid to alarmtypes |
| 402 | * @clockid: clockid. | 351 | * @clockid: clockid. |
| @@ -416,21 +365,12 @@ static enum alarmtimer_type clock2alarm(clockid_t clockid) | |||
| 416 | * | 365 | * |
| 417 | * Posix timer callback for expired alarm timers. | 366 | * Posix timer callback for expired alarm timers. |
| 418 | */ | 367 | */ |
| 419 | static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm, | 368 | static void alarm_handle_timer(struct alarm *alarm) |
| 420 | ktime_t now) | ||
| 421 | { | 369 | { |
| 422 | struct k_itimer *ptr = container_of(alarm, struct k_itimer, | 370 | struct k_itimer *ptr = container_of(alarm, struct k_itimer, |
| 423 | it.alarm.alarmtimer); | 371 | it.alarmtimer); |
| 424 | if (posix_timer_event(ptr, 0) != 0) | 372 | if (posix_timer_event(ptr, 0) != 0) |
| 425 | ptr->it_overrun++; | 373 | ptr->it_overrun++; |
| 426 | |||
| 427 | /* Re-add periodic timers */ | ||
| 428 | if (ptr->it.alarm.interval.tv64) { | ||
| 429 | ptr->it_overrun += alarm_forward(alarm, now, | ||
| 430 | ptr->it.alarm.interval); | ||
| 431 | return ALARMTIMER_RESTART; | ||
| 432 | } | ||
| 433 | return ALARMTIMER_NORESTART; | ||
| 434 | } | 374 | } |
| 435 | 375 | ||
| 436 | /** | 376 | /** |
| @@ -487,7 +427,7 @@ static int alarm_timer_create(struct k_itimer *new_timer) | |||
| 487 | 427 | ||
| 488 | type = clock2alarm(new_timer->it_clock); | 428 | type = clock2alarm(new_timer->it_clock); |
| 489 | base = &alarm_bases[type]; | 429 | base = &alarm_bases[type]; |
| 490 | alarm_init(&new_timer->it.alarm.alarmtimer, type, alarm_handle_timer); | 430 | alarm_init(&new_timer->it.alarmtimer, type, alarm_handle_timer); |
| 491 | return 0; | 431 | return 0; |
| 492 | } | 432 | } |
| 493 | 433 | ||
| @@ -504,9 +444,9 @@ static void alarm_timer_get(struct k_itimer *timr, | |||
| 504 | memset(cur_setting, 0, sizeof(struct itimerspec)); | 444 | memset(cur_setting, 0, sizeof(struct itimerspec)); |
| 505 | 445 | ||
| 506 | cur_setting->it_interval = | 446 | cur_setting->it_interval = |
| 507 | ktime_to_timespec(timr->it.alarm.interval); | 447 | ktime_to_timespec(timr->it.alarmtimer.period); |
| 508 | cur_setting->it_value = | 448 | cur_setting->it_value = |
| 509 | ktime_to_timespec(timr->it.alarm.alarmtimer.node.expires); | 449 | ktime_to_timespec(timr->it.alarmtimer.node.expires); |
| 510 | return; | 450 | return; |
| 511 | } | 451 | } |
| 512 | 452 | ||
| @@ -521,9 +461,7 @@ static int alarm_timer_del(struct k_itimer *timr) | |||
| 521 | if (!rtcdev) | 461 | if (!rtcdev) |
| 522 | return -ENOTSUPP; | 462 | return -ENOTSUPP; |
| 523 | 463 | ||
| 524 | if (alarm_try_to_cancel(&timr->it.alarm.alarmtimer) < 0) | 464 | alarm_cancel(&timr->it.alarmtimer); |
| 525 | return TIMER_RETRY; | ||
| 526 | |||
| 527 | return 0; | 465 | return 0; |
| 528 | } | 466 | } |
| 529 | 467 | ||
| @@ -543,17 +481,25 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, | |||
| 543 | if (!rtcdev) | 481 | if (!rtcdev) |
| 544 | return -ENOTSUPP; | 482 | return -ENOTSUPP; |
| 545 | 483 | ||
| 484 | /* | ||
| 485 | * XXX HACK! Currently we can DOS a system if the interval | ||
| 486 | * period on alarmtimers is too small. Cap the interval here | ||
| 487 | * to 100us and solve this properly in a future patch! -jstultz | ||
| 488 | */ | ||
| 489 | if ((new_setting->it_interval.tv_sec == 0) && | ||
| 490 | (new_setting->it_interval.tv_nsec < 100000)) | ||
| 491 | new_setting->it_interval.tv_nsec = 100000; | ||
| 492 | |||
| 546 | if (old_setting) | 493 | if (old_setting) |
| 547 | alarm_timer_get(timr, old_setting); | 494 | alarm_timer_get(timr, old_setting); |
| 548 | 495 | ||
| 549 | /* If the timer was already set, cancel it */ | 496 | /* If the timer was already set, cancel it */ |
| 550 | if (alarm_try_to_cancel(&timr->it.alarm.alarmtimer) < 0) | 497 | alarm_cancel(&timr->it.alarmtimer); |
| 551 | return TIMER_RETRY; | ||
| 552 | 498 | ||
| 553 | /* start the timer */ | 499 | /* start the timer */ |
| 554 | timr->it.alarm.interval = timespec_to_ktime(new_setting->it_interval); | 500 | alarm_start(&timr->it.alarmtimer, |
| 555 | alarm_start(&timr->it.alarm.alarmtimer, | 501 | timespec_to_ktime(new_setting->it_value), |
| 556 | timespec_to_ktime(new_setting->it_value)); | 502 | timespec_to_ktime(new_setting->it_interval)); |
| 557 | return 0; | 503 | return 0; |
| 558 | } | 504 | } |
| 559 | 505 | ||
| @@ -563,15 +509,13 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, | |||
| 563 | * | 509 | * |
| 564 | * Wakes up the task that set the alarmtimer | 510 | * Wakes up the task that set the alarmtimer |
| 565 | */ | 511 | */ |
| 566 | static enum alarmtimer_restart alarmtimer_nsleep_wakeup(struct alarm *alarm, | 512 | static void alarmtimer_nsleep_wakeup(struct alarm *alarm) |
| 567 | ktime_t now) | ||
| 568 | { | 513 | { |
| 569 | struct task_struct *task = (struct task_struct *)alarm->data; | 514 | struct task_struct *task = (struct task_struct *)alarm->data; |
| 570 | 515 | ||
| 571 | alarm->data = NULL; | 516 | alarm->data = NULL; |
| 572 | if (task) | 517 | if (task) |
| 573 | wake_up_process(task); | 518 | wake_up_process(task); |
| 574 | return ALARMTIMER_NORESTART; | ||
| 575 | } | 519 | } |
| 576 | 520 | ||
| 577 | /** | 521 | /** |
| @@ -586,7 +530,7 @@ static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp) | |||
| 586 | alarm->data = (void *)current; | 530 | alarm->data = (void *)current; |
| 587 | do { | 531 | do { |
| 588 | set_current_state(TASK_INTERRUPTIBLE); | 532 | set_current_state(TASK_INTERRUPTIBLE); |
| 589 | alarm_start(alarm, absexp); | 533 | alarm_start(alarm, absexp, ktime_set(0, 0)); |
| 590 | if (likely(alarm->data)) | 534 | if (likely(alarm->data)) |
| 591 | schedule(); | 535 | schedule(); |
| 592 | 536 | ||
| @@ -747,7 +691,6 @@ static struct platform_driver alarmtimer_driver = { | |||
| 747 | */ | 691 | */ |
| 748 | static int __init alarmtimer_init(void) | 692 | static int __init alarmtimer_init(void) |
| 749 | { | 693 | { |
| 750 | struct platform_device *pdev; | ||
| 751 | int error = 0; | 694 | int error = 0; |
| 752 | int i; | 695 | int i; |
| 753 | struct k_clock alarm_clock = { | 696 | struct k_clock alarm_clock = { |
| @@ -760,8 +703,6 @@ static int __init alarmtimer_init(void) | |||
| 760 | .nsleep = alarm_timer_nsleep, | 703 | .nsleep = alarm_timer_nsleep, |
| 761 | }; | 704 | }; |
| 762 | 705 | ||
| 763 | alarmtimer_rtc_timer_init(); | ||
| 764 | |||
| 765 | posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock); | 706 | posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock); |
| 766 | posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock); | 707 | posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock); |
| 767 | 708 | ||
| @@ -773,28 +714,15 @@ static int __init alarmtimer_init(void) | |||
| 773 | for (i = 0; i < ALARM_NUMTYPE; i++) { | 714 | for (i = 0; i < ALARM_NUMTYPE; i++) { |
| 774 | timerqueue_init_head(&alarm_bases[i].timerqueue); | 715 | timerqueue_init_head(&alarm_bases[i].timerqueue); |
| 775 | spin_lock_init(&alarm_bases[i].lock); | 716 | spin_lock_init(&alarm_bases[i].lock); |
| 717 | hrtimer_init(&alarm_bases[i].timer, | ||
| 718 | alarm_bases[i].base_clockid, | ||
| 719 | HRTIMER_MODE_ABS); | ||
| 720 | alarm_bases[i].timer.function = alarmtimer_fired; | ||
| 776 | } | 721 | } |
| 777 | |||
| 778 | error = alarmtimer_rtc_interface_setup(); | ||
| 779 | if (error) | ||
| 780 | return error; | ||
| 781 | |||
| 782 | error = platform_driver_register(&alarmtimer_driver); | 722 | error = platform_driver_register(&alarmtimer_driver); |
| 783 | if (error) | 723 | platform_device_register_simple("alarmtimer", -1, NULL, 0); |
| 784 | goto out_if; | ||
| 785 | |||
| 786 | pdev = platform_device_register_simple("alarmtimer", -1, NULL, 0); | ||
| 787 | if (IS_ERR(pdev)) { | ||
| 788 | error = PTR_ERR(pdev); | ||
| 789 | goto out_drv; | ||
| 790 | } | ||
| 791 | ws = wakeup_source_register("alarmtimer"); | ||
| 792 | return 0; | ||
| 793 | 724 | ||
| 794 | out_drv: | ||
| 795 | platform_driver_unregister(&alarmtimer_driver); | ||
| 796 | out_if: | ||
| 797 | alarmtimer_rtc_interface_remove(); | ||
| 798 | return error; | 725 | return error; |
| 799 | } | 726 | } |
| 800 | device_initcall(alarmtimer_init); | 727 | device_initcall(alarmtimer_init); |
| 728 | |||
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 30b6de0d977..e4c699dfa4e 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
| 18 | #include <linux/notifier.h> | 18 | #include <linux/notifier.h> |
| 19 | #include <linux/smp.h> | 19 | #include <linux/smp.h> |
| 20 | #include <linux/sysdev.h> | ||
| 20 | 21 | ||
| 21 | #include "tick-internal.h" | 22 | #include "tick-internal.h" |
| 22 | 23 | ||
| @@ -93,143 +94,42 @@ void clockevents_shutdown(struct clock_event_device *dev) | |||
| 93 | dev->next_event.tv64 = KTIME_MAX; | 94 | dev->next_event.tv64 = KTIME_MAX; |
| 94 | } | 95 | } |
| 95 | 96 | ||
| 96 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST | ||
| 97 | |||
| 98 | /* Limit min_delta to a jiffie */ | ||
| 99 | #define MIN_DELTA_LIMIT (NSEC_PER_SEC / HZ) | ||
| 100 | |||
| 101 | /** | ||
| 102 | * clockevents_increase_min_delta - raise minimum delta of a clock event device | ||
| 103 | * @dev: device to increase the minimum delta | ||
| 104 | * | ||
| 105 | * Returns 0 on success, -ETIME when the minimum delta reached the limit. | ||
| 106 | */ | ||
| 107 | static int clockevents_increase_min_delta(struct clock_event_device *dev) | ||
| 108 | { | ||
| 109 | /* Nothing to do if we already reached the limit */ | ||
| 110 | if (dev->min_delta_ns >= MIN_DELTA_LIMIT) { | ||
| 111 | printk(KERN_WARNING "CE: Reprogramming failure. Giving up\n"); | ||
| 112 | dev->next_event.tv64 = KTIME_MAX; | ||
| 113 | return -ETIME; | ||
| 114 | } | ||
| 115 | |||
| 116 | if (dev->min_delta_ns < 5000) | ||
| 117 | dev->min_delta_ns = 5000; | ||
| 118 | else | ||
| 119 | dev->min_delta_ns += dev->min_delta_ns >> 1; | ||
| 120 | |||
| 121 | if (dev->min_delta_ns > MIN_DELTA_LIMIT) | ||
| 122 | dev->min_delta_ns = MIN_DELTA_LIMIT; | ||
| 123 | |||
| 124 | printk(KERN_WARNING "CE: %s increased min_delta_ns to %llu nsec\n", | ||
| 125 | dev->name ? dev->name : "?", | ||
| 126 | (unsigned long long) dev->min_delta_ns); | ||
| 127 | return 0; | ||
| 128 | } | ||
| 129 | |||
| 130 | /** | ||
| 131 | * clockevents_program_min_delta - Set clock event device to the minimum delay. | ||
| 132 | * @dev: device to program | ||
| 133 | * | ||
| 134 | * Returns 0 on success, -ETIME when the retry loop failed. | ||
| 135 | */ | ||
| 136 | static int clockevents_program_min_delta(struct clock_event_device *dev) | ||
| 137 | { | ||
| 138 | unsigned long long clc; | ||
| 139 | int64_t delta; | ||
| 140 | int i; | ||
| 141 | |||
| 142 | for (i = 0;;) { | ||
| 143 | delta = dev->min_delta_ns; | ||
| 144 | dev->next_event = ktime_add_ns(ktime_get(), delta); | ||
| 145 | |||
| 146 | if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN) | ||
| 147 | return 0; | ||
| 148 | |||
| 149 | dev->retries++; | ||
| 150 | clc = ((unsigned long long) delta * dev->mult) >> dev->shift; | ||
| 151 | if (dev->set_next_event((unsigned long) clc, dev) == 0) | ||
| 152 | return 0; | ||
| 153 | |||
| 154 | if (++i > 2) { | ||
| 155 | /* | ||
| 156 | * We tried 3 times to program the device with the | ||
| 157 | * given min_delta_ns. Try to increase the minimum | ||
| 158 | * delta, if that fails as well get out of here. | ||
| 159 | */ | ||
| 160 | if (clockevents_increase_min_delta(dev)) | ||
| 161 | return -ETIME; | ||
| 162 | i = 0; | ||
| 163 | } | ||
| 164 | } | ||
| 165 | } | ||
| 166 | |||
| 167 | #else /* CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST */ | ||
| 168 | |||
| 169 | /** | ||
| 170 | * clockevents_program_min_delta - Set clock event device to the minimum delay. | ||
| 171 | * @dev: device to program | ||
| 172 | * | ||
| 173 | * Returns 0 on success, -ETIME when the retry loop failed. | ||
| 174 | */ | ||
| 175 | static int clockevents_program_min_delta(struct clock_event_device *dev) | ||
| 176 | { | ||
| 177 | unsigned long long clc; | ||
| 178 | int64_t delta; | ||
| 179 | |||
| 180 | delta = dev->min_delta_ns; | ||
| 181 | dev->next_event = ktime_add_ns(ktime_get(), delta); | ||
| 182 | |||
| 183 | if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN) | ||
| 184 | return 0; | ||
| 185 | |||
| 186 | dev->retries++; | ||
| 187 | clc = ((unsigned long long) delta * dev->mult) >> dev->shift; | ||
| 188 | return dev->set_next_event((unsigned long) clc, dev); | ||
| 189 | } | ||
| 190 | |||
| 191 | #endif /* CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST */ | ||
| 192 | |||
| 193 | /** | 97 | /** |
| 194 | * clockevents_program_event - Reprogram the clock event device. | 98 | * clockevents_program_event - Reprogram the clock event device. |
| 195 | * @dev: device to program | ||
| 196 | * @expires: absolute expiry time (monotonic clock) | 99 | * @expires: absolute expiry time (monotonic clock) |
| 197 | * @force: program minimum delay if expires can not be set | ||
| 198 | * | 100 | * |
| 199 | * Returns 0 on success, -ETIME when the event is in the past. | 101 | * Returns 0 on success, -ETIME when the event is in the past. |
| 200 | */ | 102 | */ |
| 201 | int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, | 103 | int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, |
| 202 | bool force) | 104 | ktime_t now) |
| 203 | { | 105 | { |
| 204 | unsigned long long clc; | 106 | unsigned long long clc; |
| 205 | int64_t delta; | 107 | int64_t delta; |
| 206 | int rc; | ||
| 207 | 108 | ||
| 208 | if (unlikely(expires.tv64 < 0)) { | 109 | if (unlikely(expires.tv64 < 0)) { |
| 209 | WARN_ON_ONCE(1); | 110 | WARN_ON_ONCE(1); |
| 210 | return -ETIME; | 111 | return -ETIME; |
| 211 | } | 112 | } |
| 212 | 113 | ||
| 114 | delta = ktime_to_ns(ktime_sub(expires, now)); | ||
| 115 | |||
| 116 | if (delta <= 0) | ||
| 117 | return -ETIME; | ||
| 118 | |||
| 213 | dev->next_event = expires; | 119 | dev->next_event = expires; |
| 214 | 120 | ||
| 215 | if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN) | 121 | if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN) |
| 216 | return 0; | 122 | return 0; |
| 217 | 123 | ||
| 218 | /* Shortcut for clockevent devices that can deal with ktime. */ | 124 | if (delta > dev->max_delta_ns) |
| 219 | if (dev->features & CLOCK_EVT_FEAT_KTIME) | 125 | delta = dev->max_delta_ns; |
| 220 | return dev->set_next_ktime(expires, dev); | 126 | if (delta < dev->min_delta_ns) |
| 221 | 127 | delta = dev->min_delta_ns; | |
| 222 | delta = ktime_to_ns(ktime_sub(expires, ktime_get())); | ||
| 223 | if (delta <= 0) | ||
| 224 | return force ? clockevents_program_min_delta(dev) : -ETIME; | ||
| 225 | |||
| 226 | delta = min(delta, (int64_t) dev->max_delta_ns); | ||
| 227 | delta = max(delta, (int64_t) dev->min_delta_ns); | ||
| 228 | 128 | ||
| 229 | clc = ((unsigned long long) delta * dev->mult) >> dev->shift; | 129 | clc = delta * dev->mult; |
| 230 | rc = dev->set_next_event((unsigned long) clc, dev); | 130 | clc >>= dev->shift; |
| 231 | 131 | ||
| 232 | return (rc && force) ? clockevents_program_min_delta(dev) : rc; | 132 | return dev->set_next_event((unsigned long) clc, dev); |
| 233 | } | 133 | } |
| 234 | 134 | ||
| 235 | /** | 135 | /** |
| @@ -297,7 +197,8 @@ void clockevents_register_device(struct clock_event_device *dev) | |||
| 297 | } | 197 | } |
| 298 | EXPORT_SYMBOL_GPL(clockevents_register_device); | 198 | EXPORT_SYMBOL_GPL(clockevents_register_device); |
| 299 | 199 | ||
| 300 | void clockevents_config(struct clock_event_device *dev, u32 freq) | 200 | static void clockevents_config(struct clock_event_device *dev, |
| 201 | u32 freq) | ||
| 301 | { | 202 | { |
| 302 | u64 sec; | 203 | u64 sec; |
| 303 | 204 | ||
| @@ -357,7 +258,7 @@ int clockevents_update_freq(struct clock_event_device *dev, u32 freq) | |||
| 357 | if (dev->mode != CLOCK_EVT_MODE_ONESHOT) | 258 | if (dev->mode != CLOCK_EVT_MODE_ONESHOT) |
| 358 | return 0; | 259 | return 0; |
| 359 | 260 | ||
| 360 | return clockevents_program_event(dev, dev->next_event, false); | 261 | return clockevents_program_event(dev, dev->next_event, ktime_get()); |
| 361 | } | 262 | } |
| 362 | 263 | ||
| 363 | /* | 264 | /* |
| @@ -397,30 +298,6 @@ void clockevents_exchange_device(struct clock_event_device *old, | |||
| 397 | local_irq_restore(flags); | 298 | local_irq_restore(flags); |
| 398 | } | 299 | } |
| 399 | 300 | ||
| 400 | /** | ||
| 401 | * clockevents_suspend - suspend clock devices | ||
| 402 | */ | ||
| 403 | void clockevents_suspend(void) | ||
| 404 | { | ||
| 405 | struct clock_event_device *dev; | ||
| 406 | |||
| 407 | list_for_each_entry_reverse(dev, &clockevent_devices, list) | ||
| 408 | if (dev->suspend) | ||
| 409 | dev->suspend(dev); | ||
| 410 | } | ||
| 411 | |||
| 412 | /** | ||
| 413 | * clockevents_resume - resume clock devices | ||
| 414 | */ | ||
| 415 | void clockevents_resume(void) | ||
| 416 | { | ||
| 417 | struct clock_event_device *dev; | ||
| 418 | |||
| 419 | list_for_each_entry(dev, &clockevent_devices, list) | ||
| 420 | if (dev->resume) | ||
| 421 | dev->resume(dev); | ||
| 422 | } | ||
| 423 | |||
| 424 | #ifdef CONFIG_GENERIC_CLOCKEVENTS | 301 | #ifdef CONFIG_GENERIC_CLOCKEVENTS |
| 425 | /** | 302 | /** |
| 426 | * clockevents_notify - notification about relevant events | 303 | * clockevents_notify - notification about relevant events |
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index c9583382141..8f77da18fef 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
| @@ -23,8 +23,8 @@ | |||
| 23 | * o Allow clocksource drivers to be unregistered | 23 | * o Allow clocksource drivers to be unregistered |
| 24 | */ | 24 | */ |
| 25 | 25 | ||
| 26 | #include <linux/device.h> | ||
| 27 | #include <linux/clocksource.h> | 26 | #include <linux/clocksource.h> |
| 27 | #include <linux/sysdev.h> | ||
| 28 | #include <linux/init.h> | 28 | #include <linux/init.h> |
| 29 | #include <linux/module.h> | 29 | #include <linux/module.h> |
| 30 | #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */ | 30 | #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */ |
| @@ -186,7 +186,6 @@ static struct timer_list watchdog_timer; | |||
| 186 | static DECLARE_WORK(watchdog_work, clocksource_watchdog_work); | 186 | static DECLARE_WORK(watchdog_work, clocksource_watchdog_work); |
| 187 | static DEFINE_SPINLOCK(watchdog_lock); | 187 | static DEFINE_SPINLOCK(watchdog_lock); |
| 188 | static int watchdog_running; | 188 | static int watchdog_running; |
| 189 | static atomic_t watchdog_reset_pending; | ||
| 190 | 189 | ||
| 191 | static int clocksource_watchdog_kthread(void *data); | 190 | static int clocksource_watchdog_kthread(void *data); |
| 192 | static void __clocksource_change_rating(struct clocksource *cs, int rating); | 191 | static void __clocksource_change_rating(struct clocksource *cs, int rating); |
| @@ -248,14 +247,12 @@ static void clocksource_watchdog(unsigned long data) | |||
| 248 | struct clocksource *cs; | 247 | struct clocksource *cs; |
| 249 | cycle_t csnow, wdnow; | 248 | cycle_t csnow, wdnow; |
| 250 | int64_t wd_nsec, cs_nsec; | 249 | int64_t wd_nsec, cs_nsec; |
| 251 | int next_cpu, reset_pending; | 250 | int next_cpu; |
| 252 | 251 | ||
| 253 | spin_lock(&watchdog_lock); | 252 | spin_lock(&watchdog_lock); |
| 254 | if (!watchdog_running) | 253 | if (!watchdog_running) |
| 255 | goto out; | 254 | goto out; |
| 256 | 255 | ||
| 257 | reset_pending = atomic_read(&watchdog_reset_pending); | ||
| 258 | |||
| 259 | list_for_each_entry(cs, &watchdog_list, wd_list) { | 256 | list_for_each_entry(cs, &watchdog_list, wd_list) { |
| 260 | 257 | ||
| 261 | /* Clocksource already marked unstable? */ | 258 | /* Clocksource already marked unstable? */ |
| @@ -271,8 +268,7 @@ static void clocksource_watchdog(unsigned long data) | |||
| 271 | local_irq_enable(); | 268 | local_irq_enable(); |
| 272 | 269 | ||
| 273 | /* Clocksource initialized ? */ | 270 | /* Clocksource initialized ? */ |
| 274 | if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) || | 271 | if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) { |
| 275 | atomic_read(&watchdog_reset_pending)) { | ||
| 276 | cs->flags |= CLOCK_SOURCE_WATCHDOG; | 272 | cs->flags |= CLOCK_SOURCE_WATCHDOG; |
| 277 | cs->wd_last = wdnow; | 273 | cs->wd_last = wdnow; |
| 278 | cs->cs_last = csnow; | 274 | cs->cs_last = csnow; |
| @@ -287,11 +283,8 @@ static void clocksource_watchdog(unsigned long data) | |||
| 287 | cs->cs_last = csnow; | 283 | cs->cs_last = csnow; |
| 288 | cs->wd_last = wdnow; | 284 | cs->wd_last = wdnow; |
| 289 | 285 | ||
| 290 | if (atomic_read(&watchdog_reset_pending)) | ||
| 291 | continue; | ||
| 292 | |||
| 293 | /* Check the deviation from the watchdog clocksource. */ | 286 | /* Check the deviation from the watchdog clocksource. */ |
| 294 | if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) { | 287 | if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { |
| 295 | clocksource_unstable(cs, cs_nsec - wd_nsec); | 288 | clocksource_unstable(cs, cs_nsec - wd_nsec); |
| 296 | continue; | 289 | continue; |
| 297 | } | 290 | } |
| @@ -310,13 +303,6 @@ static void clocksource_watchdog(unsigned long data) | |||
| 310 | } | 303 | } |
| 311 | 304 | ||
| 312 | /* | 305 | /* |
| 313 | * We only clear the watchdog_reset_pending, when we did a | ||
| 314 | * full cycle through all clocksources. | ||
| 315 | */ | ||
| 316 | if (reset_pending) | ||
| 317 | atomic_dec(&watchdog_reset_pending); | ||
| 318 | |||
| 319 | /* | ||
| 320 | * Cycle through CPUs to check if the CPUs stay synchronized | 306 | * Cycle through CPUs to check if the CPUs stay synchronized |
| 321 | * to each other. | 307 | * to each other. |
| 322 | */ | 308 | */ |
| @@ -358,7 +344,23 @@ static inline void clocksource_reset_watchdog(void) | |||
| 358 | 344 | ||
| 359 | static void clocksource_resume_watchdog(void) | 345 | static void clocksource_resume_watchdog(void) |
| 360 | { | 346 | { |
| 361 | atomic_inc(&watchdog_reset_pending); | 347 | unsigned long flags; |
| 348 | |||
| 349 | /* | ||
| 350 | * We use trylock here to avoid a potential dead lock when | ||
| 351 | * kgdb calls this code after the kernel has been stopped with | ||
| 352 | * watchdog_lock held. When watchdog_lock is held we just | ||
| 353 | * return and accept, that the watchdog might trigger and mark | ||
| 354 | * the monitored clock source (usually TSC) unstable. | ||
| 355 | * | ||
| 356 | * This does not affect the other caller clocksource_resume() | ||
| 357 | * because at this point the kernel is UP, interrupts are | ||
| 358 | * disabled and nothing can hold watchdog_lock. | ||
| 359 | */ | ||
| 360 | if (!spin_trylock_irqsave(&watchdog_lock, flags)) | ||
| 361 | return; | ||
| 362 | clocksource_reset_watchdog(); | ||
| 363 | spin_unlock_irqrestore(&watchdog_lock, flags); | ||
| 362 | } | 364 | } |
| 363 | 365 | ||
| 364 | static void clocksource_enqueue_watchdog(struct clocksource *cs) | 366 | static void clocksource_enqueue_watchdog(struct clocksource *cs) |
| @@ -500,7 +502,7 @@ static u32 clocksource_max_adjustment(struct clocksource *cs) | |||
| 500 | { | 502 | { |
| 501 | u64 ret; | 503 | u64 ret; |
| 502 | /* | 504 | /* |
| 503 | * We won't try to correct for more than 11% adjustments (110,000 ppm), | 505 | * We won't try to correct for more then 11% adjustments (110,000 ppm), |
| 504 | */ | 506 | */ |
| 505 | ret = (u64)cs->mult * 11; | 507 | ret = (u64)cs->mult * 11; |
| 506 | do_div(ret,100); | 508 | do_div(ret,100); |
| @@ -647,7 +649,7 @@ static void clocksource_enqueue(struct clocksource *cs) | |||
| 647 | 649 | ||
| 648 | /** | 650 | /** |
| 649 | * __clocksource_updatefreq_scale - Used update clocksource with new freq | 651 | * __clocksource_updatefreq_scale - Used update clocksource with new freq |
| 650 | * @cs: clocksource to be registered | 652 | * @t: clocksource to be registered |
| 651 | * @scale: Scale factor multiplied against freq to get clocksource hz | 653 | * @scale: Scale factor multiplied against freq to get clocksource hz |
| 652 | * @freq: clocksource frequency (cycles per second) divided by scale | 654 | * @freq: clocksource frequency (cycles per second) divided by scale |
| 653 | * | 655 | * |
| @@ -699,7 +701,7 @@ EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale); | |||
| 699 | 701 | ||
| 700 | /** | 702 | /** |
| 701 | * __clocksource_register_scale - Used to install new clocksources | 703 | * __clocksource_register_scale - Used to install new clocksources |
| 702 | * @cs: clocksource to be registered | 704 | * @t: clocksource to be registered |
| 703 | * @scale: Scale factor multiplied against freq to get clocksource hz | 705 | * @scale: Scale factor multiplied against freq to get clocksource hz |
| 704 | * @freq: clocksource frequency (cycles per second) divided by scale | 706 | * @freq: clocksource frequency (cycles per second) divided by scale |
| 705 | * | 707 | * |
| @@ -727,7 +729,7 @@ EXPORT_SYMBOL_GPL(__clocksource_register_scale); | |||
| 727 | 729 | ||
| 728 | /** | 730 | /** |
| 729 | * clocksource_register - Used to install new clocksources | 731 | * clocksource_register - Used to install new clocksources |
| 730 | * @cs: clocksource to be registered | 732 | * @t: clocksource to be registered |
| 731 | * | 733 | * |
| 732 | * Returns -EBUSY if registration fails, zero otherwise. | 734 | * Returns -EBUSY if registration fails, zero otherwise. |
| 733 | */ | 735 | */ |
| @@ -761,8 +763,6 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating) | |||
| 761 | 763 | ||
| 762 | /** | 764 | /** |
| 763 | * clocksource_change_rating - Change the rating of a registered clocksource | 765 | * clocksource_change_rating - Change the rating of a registered clocksource |
| 764 | * @cs: clocksource to be changed | ||
| 765 | * @rating: new rating | ||
| 766 | */ | 766 | */ |
| 767 | void clocksource_change_rating(struct clocksource *cs, int rating) | 767 | void clocksource_change_rating(struct clocksource *cs, int rating) |
| 768 | { | 768 | { |
| @@ -774,7 +774,6 @@ EXPORT_SYMBOL(clocksource_change_rating); | |||
| 774 | 774 | ||
| 775 | /** | 775 | /** |
| 776 | * clocksource_unregister - remove a registered clocksource | 776 | * clocksource_unregister - remove a registered clocksource |
| 777 | * @cs: clocksource to be unregistered | ||
| 778 | */ | 777 | */ |
| 779 | void clocksource_unregister(struct clocksource *cs) | 778 | void clocksource_unregister(struct clocksource *cs) |
| 780 | { | 779 | { |
| @@ -790,14 +789,13 @@ EXPORT_SYMBOL(clocksource_unregister); | |||
| 790 | /** | 789 | /** |
| 791 | * sysfs_show_current_clocksources - sysfs interface for current clocksource | 790 | * sysfs_show_current_clocksources - sysfs interface for current clocksource |
| 792 | * @dev: unused | 791 | * @dev: unused |
| 793 | * @attr: unused | ||
| 794 | * @buf: char buffer to be filled with clocksource list | 792 | * @buf: char buffer to be filled with clocksource list |
| 795 | * | 793 | * |
| 796 | * Provides sysfs interface for listing current clocksource. | 794 | * Provides sysfs interface for listing current clocksource. |
| 797 | */ | 795 | */ |
| 798 | static ssize_t | 796 | static ssize_t |
| 799 | sysfs_show_current_clocksources(struct device *dev, | 797 | sysfs_show_current_clocksources(struct sys_device *dev, |
| 800 | struct device_attribute *attr, char *buf) | 798 | struct sysdev_attribute *attr, char *buf) |
| 801 | { | 799 | { |
| 802 | ssize_t count = 0; | 800 | ssize_t count = 0; |
| 803 | 801 | ||
| @@ -811,15 +809,14 @@ sysfs_show_current_clocksources(struct device *dev, | |||
| 811 | /** | 809 | /** |
| 812 | * sysfs_override_clocksource - interface for manually overriding clocksource | 810 | * sysfs_override_clocksource - interface for manually overriding clocksource |
| 813 | * @dev: unused | 811 | * @dev: unused |
| 814 | * @attr: unused | ||
| 815 | * @buf: name of override clocksource | 812 | * @buf: name of override clocksource |
| 816 | * @count: length of buffer | 813 | * @count: length of buffer |
| 817 | * | 814 | * |
| 818 | * Takes input from sysfs interface for manually overriding the default | 815 | * Takes input from sysfs interface for manually overriding the default |
| 819 | * clocksource selection. | 816 | * clocksource selection. |
| 820 | */ | 817 | */ |
| 821 | static ssize_t sysfs_override_clocksource(struct device *dev, | 818 | static ssize_t sysfs_override_clocksource(struct sys_device *dev, |
| 822 | struct device_attribute *attr, | 819 | struct sysdev_attribute *attr, |
| 823 | const char *buf, size_t count) | 820 | const char *buf, size_t count) |
| 824 | { | 821 | { |
| 825 | size_t ret = count; | 822 | size_t ret = count; |
| @@ -847,14 +844,13 @@ static ssize_t sysfs_override_clocksource(struct device *dev, | |||
| 847 | /** | 844 | /** |
| 848 | * sysfs_show_available_clocksources - sysfs interface for listing clocksource | 845 | * sysfs_show_available_clocksources - sysfs interface for listing clocksource |
| 849 | * @dev: unused | 846 | * @dev: unused |
| 850 | * @attr: unused | ||
| 851 | * @buf: char buffer to be filled with clocksource list | 847 | * @buf: char buffer to be filled with clocksource list |
| 852 | * | 848 | * |
| 853 | * Provides sysfs interface for listing registered clocksources | 849 | * Provides sysfs interface for listing registered clocksources |
| 854 | */ | 850 | */ |
| 855 | static ssize_t | 851 | static ssize_t |
| 856 | sysfs_show_available_clocksources(struct device *dev, | 852 | sysfs_show_available_clocksources(struct sys_device *dev, |
| 857 | struct device_attribute *attr, | 853 | struct sysdev_attribute *attr, |
| 858 | char *buf) | 854 | char *buf) |
| 859 | { | 855 | { |
| 860 | struct clocksource *src; | 856 | struct clocksource *src; |
| @@ -883,36 +879,35 @@ sysfs_show_available_clocksources(struct device *dev, | |||
| 883 | /* | 879 | /* |
| 884 | * Sysfs setup bits: | 880 | * Sysfs setup bits: |
| 885 | */ | 881 | */ |
| 886 | static DEVICE_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources, | 882 | static SYSDEV_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources, |
| 887 | sysfs_override_clocksource); | 883 | sysfs_override_clocksource); |
| 888 | 884 | ||
| 889 | static DEVICE_ATTR(available_clocksource, 0444, | 885 | static SYSDEV_ATTR(available_clocksource, 0444, |
| 890 | sysfs_show_available_clocksources, NULL); | 886 | sysfs_show_available_clocksources, NULL); |
| 891 | 887 | ||
| 892 | static struct bus_type clocksource_subsys = { | 888 | static struct sysdev_class clocksource_sysclass = { |
| 893 | .name = "clocksource", | 889 | .name = "clocksource", |
| 894 | .dev_name = "clocksource", | ||
| 895 | }; | 890 | }; |
| 896 | 891 | ||
| 897 | static struct device device_clocksource = { | 892 | static struct sys_device device_clocksource = { |
| 898 | .id = 0, | 893 | .id = 0, |
| 899 | .bus = &clocksource_subsys, | 894 | .cls = &clocksource_sysclass, |
| 900 | }; | 895 | }; |
| 901 | 896 | ||
| 902 | static int __init init_clocksource_sysfs(void) | 897 | static int __init init_clocksource_sysfs(void) |
| 903 | { | 898 | { |
| 904 | int error = subsys_system_register(&clocksource_subsys, NULL); | 899 | int error = sysdev_class_register(&clocksource_sysclass); |
| 905 | 900 | ||
| 906 | if (!error) | 901 | if (!error) |
| 907 | error = device_register(&device_clocksource); | 902 | error = sysdev_register(&device_clocksource); |
| 908 | if (!error) | 903 | if (!error) |
| 909 | error = device_create_file( | 904 | error = sysdev_create_file( |
| 910 | &device_clocksource, | 905 | &device_clocksource, |
| 911 | &dev_attr_current_clocksource); | 906 | &attr_current_clocksource); |
| 912 | if (!error) | 907 | if (!error) |
| 913 | error = device_create_file( | 908 | error = sysdev_create_file( |
| 914 | &device_clocksource, | 909 | &device_clocksource, |
| 915 | &dev_attr_available_clocksource); | 910 | &attr_available_clocksource); |
| 916 | return error; | 911 | return error; |
| 917 | } | 912 | } |
| 918 | 913 | ||
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 7a925ba456f..a470154e040 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c | |||
| @@ -37,7 +37,7 @@ | |||
| 37 | * requested HZ value. It is also not recommended | 37 | * requested HZ value. It is also not recommended |
| 38 | * for "tick-less" systems. | 38 | * for "tick-less" systems. |
| 39 | */ | 39 | */ |
| 40 | #define NSEC_PER_JIFFY ((NSEC_PER_SEC+HZ/2)/HZ) | 40 | #define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/ACTHZ)) |
| 41 | 41 | ||
| 42 | /* Since jiffies uses a simple NSEC_PER_JIFFY multiplier | 42 | /* Since jiffies uses a simple NSEC_PER_JIFFY multiplier |
| 43 | * conversion, the .shift value could be zero. However | 43 | * conversion, the .shift value could be zero. However |
| @@ -58,7 +58,7 @@ static cycle_t jiffies_read(struct clocksource *cs) | |||
| 58 | return (cycle_t) jiffies; | 58 | return (cycle_t) jiffies; |
| 59 | } | 59 | } |
| 60 | 60 | ||
| 61 | static struct clocksource clocksource_jiffies = { | 61 | struct clocksource clocksource_jiffies = { |
| 62 | .name = "jiffies", | 62 | .name = "jiffies", |
| 63 | .rating = 1, /* lowest valid rating*/ | 63 | .rating = 1, /* lowest valid rating*/ |
| 64 | .read = jiffies_read, | 64 | .read = jiffies_read, |
| @@ -67,8 +67,6 @@ static struct clocksource clocksource_jiffies = { | |||
| 67 | .shift = JIFFIES_SHIFT, | 67 | .shift = JIFFIES_SHIFT, |
| 68 | }; | 68 | }; |
| 69 | 69 | ||
| 70 | __cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock); | ||
| 71 | |||
| 72 | #if (BITS_PER_LONG < 64) | 70 | #if (BITS_PER_LONG < 64) |
| 73 | u64 get_jiffies_64(void) | 71 | u64 get_jiffies_64(void) |
| 74 | { | 72 | { |
| @@ -76,9 +74,9 @@ u64 get_jiffies_64(void) | |||
| 76 | u64 ret; | 74 | u64 ret; |
| 77 | 75 | ||
| 78 | do { | 76 | do { |
| 79 | seq = read_seqbegin(&jiffies_lock); | 77 | seq = read_seqbegin(&xtime_lock); |
| 80 | ret = jiffies_64; | 78 | ret = jiffies_64; |
| 81 | } while (read_seqretry(&jiffies_lock, seq)); | 79 | } while (read_seqretry(&xtime_lock, seq)); |
| 82 | return ret; | 80 | return ret; |
| 83 | } | 81 | } |
| 84 | EXPORT_SYMBOL(get_jiffies_64); | 82 | EXPORT_SYMBOL(get_jiffies_64); |
| @@ -97,33 +95,3 @@ struct clocksource * __init __weak clocksource_default_clock(void) | |||
| 97 | { | 95 | { |
| 98 | return &clocksource_jiffies; | 96 | return &clocksource_jiffies; |
| 99 | } | 97 | } |
| 100 | |||
| 101 | struct clocksource refined_jiffies; | ||
| 102 | |||
| 103 | int register_refined_jiffies(long cycles_per_second) | ||
| 104 | { | ||
| 105 | u64 nsec_per_tick, shift_hz; | ||
| 106 | long cycles_per_tick; | ||
| 107 | |||
| 108 | |||
| 109 | |||
| 110 | refined_jiffies = clocksource_jiffies; | ||
| 111 | refined_jiffies.name = "refined-jiffies"; | ||
| 112 | refined_jiffies.rating++; | ||
| 113 | |||
| 114 | /* Calc cycles per tick */ | ||
| 115 | cycles_per_tick = (cycles_per_second + HZ/2)/HZ; | ||
| 116 | /* shift_hz stores hz<<8 for extra accuracy */ | ||
| 117 | shift_hz = (u64)cycles_per_second << 8; | ||
| 118 | shift_hz += cycles_per_tick/2; | ||
| 119 | do_div(shift_hz, cycles_per_tick); | ||
| 120 | /* Calculate nsec_per_tick using shift_hz */ | ||
| 121 | nsec_per_tick = (u64)NSEC_PER_SEC << 8; | ||
| 122 | nsec_per_tick += (u32)shift_hz/2; | ||
| 123 | do_div(nsec_per_tick, (u32)shift_hz); | ||
| 124 | |||
| 125 | refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT; | ||
| 126 | |||
| 127 | clocksource_register(&refined_jiffies); | ||
| 128 | return 0; | ||
| 129 | } | ||
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 24174b4d669..f6117a4c7cb 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
| @@ -22,18 +22,17 @@ | |||
| 22 | * NTP timekeeping variables: | 22 | * NTP timekeeping variables: |
| 23 | */ | 23 | */ |
| 24 | 24 | ||
| 25 | DEFINE_SPINLOCK(ntp_lock); | ||
| 26 | |||
| 27 | |||
| 28 | /* USER_HZ period (usecs): */ | 25 | /* USER_HZ period (usecs): */ |
| 29 | unsigned long tick_usec = TICK_USEC; | 26 | unsigned long tick_usec = TICK_USEC; |
| 30 | 27 | ||
| 31 | /* SHIFTED_HZ period (nsecs): */ | 28 | /* ACTHZ period (nsecs): */ |
| 32 | unsigned long tick_nsec; | 29 | unsigned long tick_nsec; |
| 33 | 30 | ||
| 34 | static u64 tick_length; | 31 | u64 tick_length; |
| 35 | static u64 tick_length_base; | 32 | static u64 tick_length_base; |
| 36 | 33 | ||
| 34 | static struct hrtimer leap_timer; | ||
| 35 | |||
| 37 | #define MAX_TICKADJ 500LL /* usecs */ | 36 | #define MAX_TICKADJ 500LL /* usecs */ |
| 38 | #define MAX_TICKADJ_SCALED \ | 37 | #define MAX_TICKADJ_SCALED \ |
| 39 | (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) | 38 | (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) |
| @@ -50,7 +49,7 @@ static u64 tick_length_base; | |||
| 50 | static int time_state = TIME_OK; | 49 | static int time_state = TIME_OK; |
| 51 | 50 | ||
| 52 | /* clock status bits: */ | 51 | /* clock status bits: */ |
| 53 | static int time_status = STA_UNSYNC; | 52 | int time_status = STA_UNSYNC; |
| 54 | 53 | ||
| 55 | /* TAI offset (secs): */ | 54 | /* TAI offset (secs): */ |
| 56 | static long time_tai; | 55 | static long time_tai; |
| @@ -134,7 +133,7 @@ static inline void pps_reset_freq_interval(void) | |||
| 134 | /** | 133 | /** |
| 135 | * pps_clear - Clears the PPS state variables | 134 | * pps_clear - Clears the PPS state variables |
| 136 | * | 135 | * |
| 137 | * Must be called while holding a write on the ntp_lock | 136 | * Must be called while holding a write on the xtime_lock |
| 138 | */ | 137 | */ |
| 139 | static inline void pps_clear(void) | 138 | static inline void pps_clear(void) |
| 140 | { | 139 | { |
| @@ -150,7 +149,7 @@ static inline void pps_clear(void) | |||
| 150 | * the last PPS signal. When it reaches 0, indicate that PPS signal is | 149 | * the last PPS signal. When it reaches 0, indicate that PPS signal is |
| 151 | * missing. | 150 | * missing. |
| 152 | * | 151 | * |
| 153 | * Must be called while holding a write on the ntp_lock | 152 | * Must be called while holding a write on the xtime_lock |
| 154 | */ | 153 | */ |
| 155 | static inline void pps_dec_valid(void) | 154 | static inline void pps_dec_valid(void) |
| 156 | { | 155 | { |
| @@ -234,17 +233,6 @@ static inline void pps_fill_timex(struct timex *txc) | |||
| 234 | 233 | ||
| 235 | #endif /* CONFIG_NTP_PPS */ | 234 | #endif /* CONFIG_NTP_PPS */ |
| 236 | 235 | ||
| 237 | |||
| 238 | /** | ||
| 239 | * ntp_synced - Returns 1 if the NTP status is not UNSYNC | ||
| 240 | * | ||
| 241 | */ | ||
| 242 | static inline int ntp_synced(void) | ||
| 243 | { | ||
| 244 | return !(time_status & STA_UNSYNC); | ||
| 245 | } | ||
| 246 | |||
| 247 | |||
| 248 | /* | 236 | /* |
| 249 | * NTP methods: | 237 | * NTP methods: |
| 250 | */ | 238 | */ |
| @@ -287,7 +275,7 @@ static inline s64 ntp_update_offset_fll(s64 offset64, long secs) | |||
| 287 | 275 | ||
| 288 | time_status |= STA_MODE; | 276 | time_status |= STA_MODE; |
| 289 | 277 | ||
| 290 | return div64_long(offset64 << (NTP_SCALE_SHIFT - SHIFT_FLL), secs); | 278 | return div_s64(offset64 << (NTP_SCALE_SHIFT - SHIFT_FLL), secs); |
| 291 | } | 279 | } |
| 292 | 280 | ||
| 293 | static void ntp_update_offset(long offset) | 281 | static void ntp_update_offset(long offset) |
| @@ -342,13 +330,11 @@ static void ntp_update_offset(long offset) | |||
| 342 | 330 | ||
| 343 | /** | 331 | /** |
| 344 | * ntp_clear - Clears the NTP state variables | 332 | * ntp_clear - Clears the NTP state variables |
| 333 | * | ||
| 334 | * Must be called while holding a write on the xtime_lock | ||
| 345 | */ | 335 | */ |
| 346 | void ntp_clear(void) | 336 | void ntp_clear(void) |
| 347 | { | 337 | { |
| 348 | unsigned long flags; | ||
| 349 | |||
| 350 | spin_lock_irqsave(&ntp_lock, flags); | ||
| 351 | |||
| 352 | time_adjust = 0; /* stop active adjtime() */ | 338 | time_adjust = 0; /* stop active adjtime() */ |
| 353 | time_status |= STA_UNSYNC; | 339 | time_status |= STA_UNSYNC; |
| 354 | time_maxerror = NTP_PHASE_LIMIT; | 340 | time_maxerror = NTP_PHASE_LIMIT; |
| @@ -361,85 +347,63 @@ void ntp_clear(void) | |||
| 361 | 347 | ||
| 362 | /* Clear PPS state variables */ | 348 | /* Clear PPS state variables */ |
| 363 | pps_clear(); | 349 | pps_clear(); |
| 364 | spin_unlock_irqrestore(&ntp_lock, flags); | ||
| 365 | |||
| 366 | } | ||
| 367 | |||
| 368 | |||
| 369 | u64 ntp_tick_length(void) | ||
| 370 | { | ||
| 371 | unsigned long flags; | ||
| 372 | s64 ret; | ||
| 373 | |||
| 374 | spin_lock_irqsave(&ntp_lock, flags); | ||
| 375 | ret = tick_length; | ||
| 376 | spin_unlock_irqrestore(&ntp_lock, flags); | ||
| 377 | return ret; | ||
| 378 | } | 350 | } |
| 379 | 351 | ||
| 380 | |||
| 381 | /* | 352 | /* |
| 382 | * this routine handles the overflow of the microsecond field | 353 | * Leap second processing. If in leap-insert state at the end of the |
| 383 | * | 354 | * day, the system clock is set back one second; if in leap-delete |
| 384 | * The tricky bits of code to handle the accurate clock support | 355 | * state, the system clock is set ahead one second. |
| 385 | * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. | ||
| 386 | * They were originally developed for SUN and DEC kernels. | ||
| 387 | * All the kudos should go to Dave for this stuff. | ||
| 388 | * | ||
| 389 | * Also handles leap second processing, and returns leap offset | ||
| 390 | */ | 356 | */ |
| 391 | int second_overflow(unsigned long secs) | 357 | static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) |
| 392 | { | 358 | { |
| 393 | s64 delta; | 359 | enum hrtimer_restart res = HRTIMER_NORESTART; |
| 394 | int leap = 0; | ||
| 395 | unsigned long flags; | ||
| 396 | 360 | ||
| 397 | spin_lock_irqsave(&ntp_lock, flags); | 361 | write_seqlock(&xtime_lock); |
| 398 | 362 | ||
| 399 | /* | ||
| 400 | * Leap second processing. If in leap-insert state at the end of the | ||
| 401 | * day, the system clock is set back one second; if in leap-delete | ||
| 402 | * state, the system clock is set ahead one second. | ||
| 403 | */ | ||
| 404 | switch (time_state) { | 363 | switch (time_state) { |
| 405 | case TIME_OK: | 364 | case TIME_OK: |
| 406 | if (time_status & STA_INS) | ||
| 407 | time_state = TIME_INS; | ||
| 408 | else if (time_status & STA_DEL) | ||
| 409 | time_state = TIME_DEL; | ||
| 410 | break; | 365 | break; |
| 411 | case TIME_INS: | 366 | case TIME_INS: |
| 412 | if (!(time_status & STA_INS)) | 367 | timekeeping_leap_insert(-1); |
| 413 | time_state = TIME_OK; | 368 | time_state = TIME_OOP; |
| 414 | else if (secs % 86400 == 0) { | 369 | printk(KERN_NOTICE |
| 415 | leap = -1; | 370 | "Clock: inserting leap second 23:59:60 UTC\n"); |
| 416 | time_state = TIME_OOP; | 371 | hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC); |
| 417 | time_tai++; | 372 | res = HRTIMER_RESTART; |
| 418 | printk(KERN_NOTICE | ||
| 419 | "Clock: inserting leap second 23:59:60 UTC\n"); | ||
| 420 | } | ||
| 421 | break; | 373 | break; |
| 422 | case TIME_DEL: | 374 | case TIME_DEL: |
| 423 | if (!(time_status & STA_DEL)) | 375 | timekeeping_leap_insert(1); |
| 424 | time_state = TIME_OK; | 376 | time_tai--; |
| 425 | else if ((secs + 1) % 86400 == 0) { | 377 | time_state = TIME_WAIT; |
| 426 | leap = 1; | 378 | printk(KERN_NOTICE |
| 427 | time_tai--; | 379 | "Clock: deleting leap second 23:59:59 UTC\n"); |
| 428 | time_state = TIME_WAIT; | ||
| 429 | printk(KERN_NOTICE | ||
| 430 | "Clock: deleting leap second 23:59:59 UTC\n"); | ||
| 431 | } | ||
| 432 | break; | 380 | break; |
| 433 | case TIME_OOP: | 381 | case TIME_OOP: |
| 382 | time_tai++; | ||
| 434 | time_state = TIME_WAIT; | 383 | time_state = TIME_WAIT; |
| 435 | break; | 384 | /* fall through */ |
| 436 | |||
| 437 | case TIME_WAIT: | 385 | case TIME_WAIT: |
| 438 | if (!(time_status & (STA_INS | STA_DEL))) | 386 | if (!(time_status & (STA_INS | STA_DEL))) |
| 439 | time_state = TIME_OK; | 387 | time_state = TIME_OK; |
| 440 | break; | 388 | break; |
| 441 | } | 389 | } |
| 442 | 390 | ||
| 391 | write_sequnlock(&xtime_lock); | ||
| 392 | |||
| 393 | return res; | ||
| 394 | } | ||
| 395 | |||
| 396 | /* | ||
| 397 | * this routine handles the overflow of the microsecond field | ||
| 398 | * | ||
| 399 | * The tricky bits of code to handle the accurate clock support | ||
| 400 | * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. | ||
| 401 | * They were originally developed for SUN and DEC kernels. | ||
| 402 | * All the kudos should go to Dave for this stuff. | ||
| 403 | */ | ||
| 404 | void second_overflow(void) | ||
| 405 | { | ||
| 406 | s64 delta; | ||
| 443 | 407 | ||
| 444 | /* Bump the maxerror field */ | 408 | /* Bump the maxerror field */ |
| 445 | time_maxerror += MAXFREQ / NSEC_PER_USEC; | 409 | time_maxerror += MAXFREQ / NSEC_PER_USEC; |
| @@ -459,32 +423,30 @@ int second_overflow(unsigned long secs) | |||
| 459 | pps_dec_valid(); | 423 | pps_dec_valid(); |
| 460 | 424 | ||
| 461 | if (!time_adjust) | 425 | if (!time_adjust) |
| 462 | goto out; | 426 | return; |
| 463 | 427 | ||
| 464 | if (time_adjust > MAX_TICKADJ) { | 428 | if (time_adjust > MAX_TICKADJ) { |
| 465 | time_adjust -= MAX_TICKADJ; | 429 | time_adjust -= MAX_TICKADJ; |
| 466 | tick_length += MAX_TICKADJ_SCALED; | 430 | tick_length += MAX_TICKADJ_SCALED; |
| 467 | goto out; | 431 | return; |
| 468 | } | 432 | } |
| 469 | 433 | ||
| 470 | if (time_adjust < -MAX_TICKADJ) { | 434 | if (time_adjust < -MAX_TICKADJ) { |
| 471 | time_adjust += MAX_TICKADJ; | 435 | time_adjust += MAX_TICKADJ; |
| 472 | tick_length -= MAX_TICKADJ_SCALED; | 436 | tick_length -= MAX_TICKADJ_SCALED; |
| 473 | goto out; | 437 | return; |
| 474 | } | 438 | } |
| 475 | 439 | ||
| 476 | tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ) | 440 | tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ) |
| 477 | << NTP_SCALE_SHIFT; | 441 | << NTP_SCALE_SHIFT; |
| 478 | time_adjust = 0; | 442 | time_adjust = 0; |
| 479 | |||
| 480 | out: | ||
| 481 | spin_unlock_irqrestore(&ntp_lock, flags); | ||
| 482 | |||
| 483 | return leap; | ||
| 484 | } | 443 | } |
| 485 | 444 | ||
| 486 | #ifdef CONFIG_GENERIC_CMOS_UPDATE | 445 | #ifdef CONFIG_GENERIC_CMOS_UPDATE |
| 487 | 446 | ||
| 447 | /* Disable the cmos update - used by virtualization and embedded */ | ||
| 448 | int no_sync_cmos_clock __read_mostly; | ||
| 449 | |||
| 488 | static void sync_cmos_clock(struct work_struct *work); | 450 | static void sync_cmos_clock(struct work_struct *work); |
| 489 | 451 | ||
| 490 | static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock); | 452 | static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock); |
| @@ -531,13 +493,35 @@ static void sync_cmos_clock(struct work_struct *work) | |||
| 531 | 493 | ||
| 532 | static void notify_cmos_timer(void) | 494 | static void notify_cmos_timer(void) |
| 533 | { | 495 | { |
| 534 | schedule_delayed_work(&sync_cmos_work, 0); | 496 | if (!no_sync_cmos_clock) |
| 497 | schedule_delayed_work(&sync_cmos_work, 0); | ||
| 535 | } | 498 | } |
| 536 | 499 | ||
| 537 | #else | 500 | #else |
| 538 | static inline void notify_cmos_timer(void) { } | 501 | static inline void notify_cmos_timer(void) { } |
| 539 | #endif | 502 | #endif |
| 540 | 503 | ||
| 504 | /* | ||
| 505 | * Start the leap seconds timer: | ||
| 506 | */ | ||
| 507 | static inline void ntp_start_leap_timer(struct timespec *ts) | ||
| 508 | { | ||
| 509 | long now = ts->tv_sec; | ||
| 510 | |||
| 511 | if (time_status & STA_INS) { | ||
| 512 | time_state = TIME_INS; | ||
| 513 | now += 86400 - now % 86400; | ||
| 514 | hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS); | ||
| 515 | |||
| 516 | return; | ||
| 517 | } | ||
| 518 | |||
| 519 | if (time_status & STA_DEL) { | ||
| 520 | time_state = TIME_DEL; | ||
| 521 | now += 86400 - (now + 1) % 86400; | ||
| 522 | hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS); | ||
| 523 | } | ||
| 524 | } | ||
| 541 | 525 | ||
| 542 | /* | 526 | /* |
| 543 | * Propagate a new txc->status value into the NTP state: | 527 | * Propagate a new txc->status value into the NTP state: |
| @@ -561,10 +545,26 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts) | |||
| 561 | /* only set allowed bits */ | 545 | /* only set allowed bits */ |
| 562 | time_status &= STA_RONLY; | 546 | time_status &= STA_RONLY; |
| 563 | time_status |= txc->status & ~STA_RONLY; | 547 | time_status |= txc->status & ~STA_RONLY; |
| 564 | } | ||
| 565 | 548 | ||
| 549 | switch (time_state) { | ||
| 550 | case TIME_OK: | ||
| 551 | ntp_start_leap_timer(ts); | ||
| 552 | break; | ||
| 553 | case TIME_INS: | ||
| 554 | case TIME_DEL: | ||
| 555 | time_state = TIME_OK; | ||
| 556 | ntp_start_leap_timer(ts); | ||
| 557 | case TIME_WAIT: | ||
| 558 | if (!(time_status & (STA_INS | STA_DEL))) | ||
| 559 | time_state = TIME_OK; | ||
| 560 | break; | ||
| 561 | case TIME_OOP: | ||
| 562 | hrtimer_restart(&leap_timer); | ||
| 563 | break; | ||
| 564 | } | ||
| 565 | } | ||
| 566 | /* | 566 | /* |
| 567 | * Called with ntp_lock held, so we can access and modify | 567 | * Called with the xtime lock held, so we can access and modify |
| 568 | * all the global NTP state: | 568 | * all the global NTP state: |
| 569 | */ | 569 | */ |
| 570 | static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts) | 570 | static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts) |
| @@ -643,6 +643,9 @@ int do_adjtimex(struct timex *txc) | |||
| 643 | (txc->tick < 900000/USER_HZ || | 643 | (txc->tick < 900000/USER_HZ || |
| 644 | txc->tick > 1100000/USER_HZ)) | 644 | txc->tick > 1100000/USER_HZ)) |
| 645 | return -EINVAL; | 645 | return -EINVAL; |
| 646 | |||
| 647 | if (txc->modes & ADJ_STATUS && time_state != TIME_OK) | ||
| 648 | hrtimer_cancel(&leap_timer); | ||
| 646 | } | 649 | } |
| 647 | 650 | ||
| 648 | if (txc->modes & ADJ_SETOFFSET) { | 651 | if (txc->modes & ADJ_SETOFFSET) { |
| @@ -660,7 +663,7 @@ int do_adjtimex(struct timex *txc) | |||
| 660 | 663 | ||
| 661 | getnstimeofday(&ts); | 664 | getnstimeofday(&ts); |
| 662 | 665 | ||
| 663 | spin_lock_irq(&ntp_lock); | 666 | write_seqlock_irq(&xtime_lock); |
| 664 | 667 | ||
| 665 | if (txc->modes & ADJ_ADJTIME) { | 668 | if (txc->modes & ADJ_ADJTIME) { |
| 666 | long save_adjust = time_adjust; | 669 | long save_adjust = time_adjust; |
| @@ -702,7 +705,7 @@ int do_adjtimex(struct timex *txc) | |||
| 702 | /* fill PPS status fields */ | 705 | /* fill PPS status fields */ |
| 703 | pps_fill_timex(txc); | 706 | pps_fill_timex(txc); |
| 704 | 707 | ||
| 705 | spin_unlock_irq(&ntp_lock); | 708 | write_sequnlock_irq(&xtime_lock); |
| 706 | 709 | ||
| 707 | txc->time.tv_sec = ts.tv_sec; | 710 | txc->time.tv_sec = ts.tv_sec; |
| 708 | txc->time.tv_usec = ts.tv_nsec; | 711 | txc->time.tv_usec = ts.tv_nsec; |
| @@ -900,7 +903,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) | |||
| 900 | 903 | ||
| 901 | pts_norm = pps_normalize_ts(*phase_ts); | 904 | pts_norm = pps_normalize_ts(*phase_ts); |
| 902 | 905 | ||
| 903 | spin_lock_irqsave(&ntp_lock, flags); | 906 | write_seqlock_irqsave(&xtime_lock, flags); |
| 904 | 907 | ||
| 905 | /* clear the error bits, they will be set again if needed */ | 908 | /* clear the error bits, they will be set again if needed */ |
| 906 | time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR); | 909 | time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR); |
| @@ -913,7 +916,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) | |||
| 913 | * just start the frequency interval */ | 916 | * just start the frequency interval */ |
| 914 | if (unlikely(pps_fbase.tv_sec == 0)) { | 917 | if (unlikely(pps_fbase.tv_sec == 0)) { |
| 915 | pps_fbase = *raw_ts; | 918 | pps_fbase = *raw_ts; |
| 916 | spin_unlock_irqrestore(&ntp_lock, flags); | 919 | write_sequnlock_irqrestore(&xtime_lock, flags); |
| 917 | return; | 920 | return; |
| 918 | } | 921 | } |
| 919 | 922 | ||
| @@ -928,7 +931,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) | |||
| 928 | time_status |= STA_PPSJITTER; | 931 | time_status |= STA_PPSJITTER; |
| 929 | /* restart the frequency calibration interval */ | 932 | /* restart the frequency calibration interval */ |
| 930 | pps_fbase = *raw_ts; | 933 | pps_fbase = *raw_ts; |
| 931 | spin_unlock_irqrestore(&ntp_lock, flags); | 934 | write_sequnlock_irqrestore(&xtime_lock, flags); |
| 932 | pr_err("hardpps: PPSJITTER: bad pulse\n"); | 935 | pr_err("hardpps: PPSJITTER: bad pulse\n"); |
| 933 | return; | 936 | return; |
| 934 | } | 937 | } |
| @@ -945,7 +948,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) | |||
| 945 | 948 | ||
| 946 | hardpps_update_phase(pts_norm.nsec); | 949 | hardpps_update_phase(pts_norm.nsec); |
| 947 | 950 | ||
| 948 | spin_unlock_irqrestore(&ntp_lock, flags); | 951 | write_sequnlock_irqrestore(&xtime_lock, flags); |
| 949 | } | 952 | } |
| 950 | EXPORT_SYMBOL(hardpps); | 953 | EXPORT_SYMBOL(hardpps); |
| 951 | 954 | ||
| @@ -964,4 +967,6 @@ __setup("ntp_tick_adj=", ntp_tick_adj_setup); | |||
| 964 | void __init ntp_init(void) | 967 | void __init ntp_init(void) |
| 965 | { | 968 | { |
| 966 | ntp_clear(); | 969 | ntp_clear(); |
| 970 | hrtimer_init(&leap_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); | ||
| 971 | leap_timer.function = ntp_leap_second; | ||
| 967 | } | 972 | } |
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index ce033c7aa2e..c340ca658f3 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c | |||
| @@ -18,7 +18,6 @@ | |||
| 18 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 18 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
| 19 | */ | 19 | */ |
| 20 | #include <linux/device.h> | 20 | #include <linux/device.h> |
| 21 | #include <linux/export.h> | ||
| 22 | #include <linux/file.h> | 21 | #include <linux/file.h> |
| 23 | #include <linux/posix-clock.h> | 22 | #include <linux/posix-clock.h> |
| 24 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index f113755695e..7a90d021b79 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c | |||
| @@ -194,7 +194,7 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev) | |||
| 194 | for (next = dev->next_event; ;) { | 194 | for (next = dev->next_event; ;) { |
| 195 | next = ktime_add(next, tick_period); | 195 | next = ktime_add(next, tick_period); |
| 196 | 196 | ||
| 197 | if (!clockevents_program_event(dev, next, false)) | 197 | if (!clockevents_program_event(dev, next, ktime_get())) |
| 198 | return; | 198 | return; |
| 199 | tick_do_periodic_broadcast(); | 199 | tick_do_periodic_broadcast(); |
| 200 | } | 200 | } |
| @@ -346,8 +346,7 @@ int tick_resume_broadcast(void) | |||
| 346 | tick_get_broadcast_mask()); | 346 | tick_get_broadcast_mask()); |
| 347 | break; | 347 | break; |
| 348 | case TICKDEV_MODE_ONESHOT: | 348 | case TICKDEV_MODE_ONESHOT: |
| 349 | if (!cpumask_empty(tick_get_broadcast_mask())) | 349 | broadcast = tick_resume_broadcast_oneshot(bc); |
| 350 | broadcast = tick_resume_broadcast_oneshot(bc); | ||
| 351 | break; | 350 | break; |
| 352 | } | 351 | } |
| 353 | } | 352 | } |
| @@ -374,10 +373,7 @@ static int tick_broadcast_set_event(ktime_t expires, int force) | |||
| 374 | { | 373 | { |
| 375 | struct clock_event_device *bc = tick_broadcast_device.evtdev; | 374 | struct clock_event_device *bc = tick_broadcast_device.evtdev; |
| 376 | 375 | ||
| 377 | if (bc->mode != CLOCK_EVT_MODE_ONESHOT) | 376 | return tick_dev_program_event(bc, expires, force); |
| 378 | clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); | ||
| 379 | |||
| 380 | return clockevents_program_event(bc, expires, force); | ||
| 381 | } | 377 | } |
| 382 | 378 | ||
| 383 | int tick_resume_broadcast_oneshot(struct clock_event_device *bc) | 379 | int tick_resume_broadcast_oneshot(struct clock_event_device *bc) |
| @@ -535,6 +531,7 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) | |||
| 535 | int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC; | 531 | int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC; |
| 536 | 532 | ||
| 537 | bc->event_handler = tick_handle_oneshot_broadcast; | 533 | bc->event_handler = tick_handle_oneshot_broadcast; |
| 534 | clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); | ||
| 538 | 535 | ||
| 539 | /* Take the do_timer update */ | 536 | /* Take the do_timer update */ |
| 540 | tick_do_timer_cpu = cpu; | 537 | tick_do_timer_cpu = cpu; |
| @@ -552,7 +549,6 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) | |||
| 552 | to_cpumask(tmpmask)); | 549 | to_cpumask(tmpmask)); |
| 553 | 550 | ||
| 554 | if (was_periodic && !cpumask_empty(to_cpumask(tmpmask))) { | 551 | if (was_periodic && !cpumask_empty(to_cpumask(tmpmask))) { |
| 555 | clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); | ||
| 556 | tick_broadcast_init_next_event(to_cpumask(tmpmask), | 552 | tick_broadcast_init_next_event(to_cpumask(tmpmask), |
| 557 | tick_next_period); | 553 | tick_next_period); |
| 558 | tick_broadcast_set_event(tick_next_period, 1); | 554 | tick_broadcast_set_event(tick_next_period, 1); |
| @@ -584,7 +580,6 @@ void tick_broadcast_switch_to_oneshot(void) | |||
| 584 | bc = tick_broadcast_device.evtdev; | 580 | bc = tick_broadcast_device.evtdev; |
| 585 | if (bc) | 581 | if (bc) |
| 586 | tick_broadcast_setup_oneshot(bc); | 582 | tick_broadcast_setup_oneshot(bc); |
| 587 | |||
| 588 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); | 583 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); |
| 589 | } | 584 | } |
| 590 | 585 | ||
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index b1600a6973f..119528de823 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c | |||
| @@ -63,13 +63,13 @@ int tick_is_oneshot_available(void) | |||
| 63 | static void tick_periodic(int cpu) | 63 | static void tick_periodic(int cpu) |
| 64 | { | 64 | { |
| 65 | if (tick_do_timer_cpu == cpu) { | 65 | if (tick_do_timer_cpu == cpu) { |
| 66 | write_seqlock(&jiffies_lock); | 66 | write_seqlock(&xtime_lock); |
| 67 | 67 | ||
| 68 | /* Keep track of the next tick event */ | 68 | /* Keep track of the next tick event */ |
| 69 | tick_next_period = ktime_add(tick_next_period, tick_period); | 69 | tick_next_period = ktime_add(tick_next_period, tick_period); |
| 70 | 70 | ||
| 71 | do_timer(1); | 71 | do_timer(1); |
| 72 | write_sequnlock(&jiffies_lock); | 72 | write_sequnlock(&xtime_lock); |
| 73 | } | 73 | } |
| 74 | 74 | ||
| 75 | update_process_times(user_mode(get_irq_regs())); | 75 | update_process_times(user_mode(get_irq_regs())); |
| @@ -94,7 +94,7 @@ void tick_handle_periodic(struct clock_event_device *dev) | |||
| 94 | */ | 94 | */ |
| 95 | next = ktime_add(dev->next_event, tick_period); | 95 | next = ktime_add(dev->next_event, tick_period); |
| 96 | for (;;) { | 96 | for (;;) { |
| 97 | if (!clockevents_program_event(dev, next, false)) | 97 | if (!clockevents_program_event(dev, next, ktime_get())) |
| 98 | return; | 98 | return; |
| 99 | /* | 99 | /* |
| 100 | * Have to be careful here. If we're in oneshot mode, | 100 | * Have to be careful here. If we're in oneshot mode, |
| @@ -130,14 +130,14 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast) | |||
| 130 | ktime_t next; | 130 | ktime_t next; |
| 131 | 131 | ||
| 132 | do { | 132 | do { |
| 133 | seq = read_seqbegin(&jiffies_lock); | 133 | seq = read_seqbegin(&xtime_lock); |
| 134 | next = tick_next_period; | 134 | next = tick_next_period; |
| 135 | } while (read_seqretry(&jiffies_lock, seq)); | 135 | } while (read_seqretry(&xtime_lock, seq)); |
| 136 | 136 | ||
| 137 | clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); | 137 | clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); |
| 138 | 138 | ||
| 139 | for (;;) { | 139 | for (;;) { |
| 140 | if (!clockevents_program_event(dev, next, false)) | 140 | if (!clockevents_program_event(dev, next, ktime_get())) |
| 141 | return; | 141 | return; |
| 142 | next = ktime_add(next, tick_period); | 142 | next = ktime_add(next, tick_period); |
| 143 | } | 143 | } |
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index cf3e59ed6dc..1009b06d6f8 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h | |||
| @@ -26,6 +26,8 @@ extern void clockevents_shutdown(struct clock_event_device *dev); | |||
| 26 | extern void tick_setup_oneshot(struct clock_event_device *newdev, | 26 | extern void tick_setup_oneshot(struct clock_event_device *newdev, |
| 27 | void (*handler)(struct clock_event_device *), | 27 | void (*handler)(struct clock_event_device *), |
| 28 | ktime_t nextevt); | 28 | ktime_t nextevt); |
| 29 | extern int tick_dev_program_event(struct clock_event_device *dev, | ||
| 30 | ktime_t expires, int force); | ||
| 29 | extern int tick_program_event(ktime_t expires, int force); | 31 | extern int tick_program_event(ktime_t expires, int force); |
| 30 | extern void tick_oneshot_notify(void); | 32 | extern void tick_oneshot_notify(void); |
| 31 | extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)); | 33 | extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)); |
| @@ -141,3 +143,4 @@ static inline int tick_device_is_functional(struct clock_event_device *dev) | |||
| 141 | #endif | 143 | #endif |
| 142 | 144 | ||
| 143 | extern void do_timer(unsigned long ticks); | 145 | extern void do_timer(unsigned long ticks); |
| 146 | extern seqlock_t xtime_lock; | ||
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index 824109060a3..2d04411a5f0 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c | |||
| @@ -21,6 +21,74 @@ | |||
| 21 | 21 | ||
| 22 | #include "tick-internal.h" | 22 | #include "tick-internal.h" |
| 23 | 23 | ||
| 24 | /* Limit min_delta to a jiffie */ | ||
| 25 | #define MIN_DELTA_LIMIT (NSEC_PER_SEC / HZ) | ||
| 26 | |||
| 27 | static int tick_increase_min_delta(struct clock_event_device *dev) | ||
| 28 | { | ||
| 29 | /* Nothing to do if we already reached the limit */ | ||
| 30 | if (dev->min_delta_ns >= MIN_DELTA_LIMIT) | ||
| 31 | return -ETIME; | ||
| 32 | |||
| 33 | if (dev->min_delta_ns < 5000) | ||
| 34 | dev->min_delta_ns = 5000; | ||
| 35 | else | ||
| 36 | dev->min_delta_ns += dev->min_delta_ns >> 1; | ||
| 37 | |||
| 38 | if (dev->min_delta_ns > MIN_DELTA_LIMIT) | ||
| 39 | dev->min_delta_ns = MIN_DELTA_LIMIT; | ||
| 40 | |||
| 41 | printk(KERN_WARNING "CE: %s increased min_delta_ns to %llu nsec\n", | ||
| 42 | dev->name ? dev->name : "?", | ||
| 43 | (unsigned long long) dev->min_delta_ns); | ||
| 44 | return 0; | ||
| 45 | } | ||
| 46 | |||
| 47 | /** | ||
| 48 | * tick_program_event internal worker function | ||
| 49 | */ | ||
| 50 | int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires, | ||
| 51 | int force) | ||
| 52 | { | ||
| 53 | ktime_t now = ktime_get(); | ||
| 54 | int i; | ||
| 55 | |||
| 56 | for (i = 0;;) { | ||
| 57 | int ret = clockevents_program_event(dev, expires, now); | ||
| 58 | |||
| 59 | if (!ret || !force) | ||
| 60 | return ret; | ||
| 61 | |||
| 62 | dev->retries++; | ||
| 63 | /* | ||
| 64 | * We tried 3 times to program the device with the given | ||
| 65 | * min_delta_ns. If that's not working then we increase it | ||
| 66 | * and emit a warning. | ||
| 67 | */ | ||
| 68 | if (++i > 2) { | ||
| 69 | /* Increase the min. delta and try again */ | ||
| 70 | if (tick_increase_min_delta(dev)) { | ||
| 71 | /* | ||
| 72 | * Get out of the loop if min_delta_ns | ||
| 73 | * hit the limit already. That's | ||
| 74 | * better than staying here forever. | ||
| 75 | * | ||
| 76 | * We clear next_event so we have a | ||
| 77 | * chance that the box survives. | ||
| 78 | */ | ||
| 79 | printk(KERN_WARNING | ||
| 80 | "CE: Reprogramming failure. Giving up\n"); | ||
| 81 | dev->next_event.tv64 = KTIME_MAX; | ||
| 82 | return -ETIME; | ||
| 83 | } | ||
| 84 | i = 0; | ||
| 85 | } | ||
| 86 | |||
| 87 | now = ktime_get(); | ||
| 88 | expires = ktime_add_ns(now, dev->min_delta_ns); | ||
| 89 | } | ||
| 90 | } | ||
| 91 | |||
| 24 | /** | 92 | /** |
| 25 | * tick_program_event | 93 | * tick_program_event |
| 26 | */ | 94 | */ |
| @@ -28,7 +96,7 @@ int tick_program_event(ktime_t expires, int force) | |||
| 28 | { | 96 | { |
| 29 | struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); | 97 | struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); |
| 30 | 98 | ||
| 31 | return clockevents_program_event(dev, expires, force); | 99 | return tick_dev_program_event(dev, expires, force); |
| 32 | } | 100 | } |
| 33 | 101 | ||
| 34 | /** | 102 | /** |
| @@ -36,10 +104,11 @@ int tick_program_event(ktime_t expires, int force) | |||
| 36 | */ | 104 | */ |
| 37 | void tick_resume_oneshot(void) | 105 | void tick_resume_oneshot(void) |
| 38 | { | 106 | { |
| 39 | struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); | 107 | struct tick_device *td = &__get_cpu_var(tick_cpu_device); |
| 108 | struct clock_event_device *dev = td->evtdev; | ||
| 40 | 109 | ||
| 41 | clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); | 110 | clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); |
| 42 | clockevents_program_event(dev, ktime_get(), true); | 111 | tick_program_event(ktime_get(), 1); |
| 43 | } | 112 | } |
| 44 | 113 | ||
| 45 | /** | 114 | /** |
| @@ -51,7 +120,7 @@ void tick_setup_oneshot(struct clock_event_device *newdev, | |||
| 51 | { | 120 | { |
| 52 | newdev->event_handler = handler; | 121 | newdev->event_handler = handler; |
| 53 | clockevents_set_mode(newdev, CLOCK_EVT_MODE_ONESHOT); | 122 | clockevents_set_mode(newdev, CLOCK_EVT_MODE_ONESHOT); |
| 54 | clockevents_program_event(newdev, next_event, true); | 123 | tick_dev_program_event(newdev, next_event, 1); |
| 55 | } | 124 | } |
| 56 | 125 | ||
| 57 | /** | 126 | /** |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d58e552d9fd..d5097c44b40 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
| @@ -31,7 +31,7 @@ | |||
| 31 | static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); | 31 | static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); |
| 32 | 32 | ||
| 33 | /* | 33 | /* |
| 34 | * The time, when the last jiffy update happened. Protected by jiffies_lock. | 34 | * The time, when the last jiffy update happened. Protected by xtime_lock. |
| 35 | */ | 35 | */ |
| 36 | static ktime_t last_jiffies_update; | 36 | static ktime_t last_jiffies_update; |
| 37 | 37 | ||
| @@ -49,14 +49,14 @@ static void tick_do_update_jiffies64(ktime_t now) | |||
| 49 | ktime_t delta; | 49 | ktime_t delta; |
| 50 | 50 | ||
| 51 | /* | 51 | /* |
| 52 | * Do a quick check without holding jiffies_lock: | 52 | * Do a quick check without holding xtime_lock: |
| 53 | */ | 53 | */ |
| 54 | delta = ktime_sub(now, last_jiffies_update); | 54 | delta = ktime_sub(now, last_jiffies_update); |
| 55 | if (delta.tv64 < tick_period.tv64) | 55 | if (delta.tv64 < tick_period.tv64) |
| 56 | return; | 56 | return; |
| 57 | 57 | ||
| 58 | /* Reevalute with jiffies_lock held */ | 58 | /* Reevalute with xtime_lock held */ |
| 59 | write_seqlock(&jiffies_lock); | 59 | write_seqlock(&xtime_lock); |
| 60 | 60 | ||
| 61 | delta = ktime_sub(now, last_jiffies_update); | 61 | delta = ktime_sub(now, last_jiffies_update); |
| 62 | if (delta.tv64 >= tick_period.tv64) { | 62 | if (delta.tv64 >= tick_period.tv64) { |
| @@ -79,7 +79,7 @@ static void tick_do_update_jiffies64(ktime_t now) | |||
| 79 | /* Keep the tick_next_period variable up to date */ | 79 | /* Keep the tick_next_period variable up to date */ |
| 80 | tick_next_period = ktime_add(last_jiffies_update, tick_period); | 80 | tick_next_period = ktime_add(last_jiffies_update, tick_period); |
| 81 | } | 81 | } |
| 82 | write_sequnlock(&jiffies_lock); | 82 | write_sequnlock(&xtime_lock); |
| 83 | } | 83 | } |
| 84 | 84 | ||
| 85 | /* | 85 | /* |
| @@ -89,58 +89,15 @@ static ktime_t tick_init_jiffy_update(void) | |||
| 89 | { | 89 | { |
| 90 | ktime_t period; | 90 | ktime_t period; |
| 91 | 91 | ||
| 92 | write_seqlock(&jiffies_lock); | 92 | write_seqlock(&xtime_lock); |
| 93 | /* Did we start the jiffies update yet ? */ | 93 | /* Did we start the jiffies update yet ? */ |
| 94 | if (last_jiffies_update.tv64 == 0) | 94 | if (last_jiffies_update.tv64 == 0) |
| 95 | last_jiffies_update = tick_next_period; | 95 | last_jiffies_update = tick_next_period; |
| 96 | period = last_jiffies_update; | 96 | period = last_jiffies_update; |
| 97 | write_sequnlock(&jiffies_lock); | 97 | write_sequnlock(&xtime_lock); |
| 98 | return period; | 98 | return period; |
| 99 | } | 99 | } |
| 100 | 100 | ||
| 101 | |||
| 102 | static void tick_sched_do_timer(ktime_t now) | ||
| 103 | { | ||
| 104 | int cpu = smp_processor_id(); | ||
| 105 | |||
| 106 | #ifdef CONFIG_NO_HZ | ||
| 107 | /* | ||
| 108 | * Check if the do_timer duty was dropped. We don't care about | ||
| 109 | * concurrency: This happens only when the cpu in charge went | ||
| 110 | * into a long sleep. If two cpus happen to assign themself to | ||
| 111 | * this duty, then the jiffies update is still serialized by | ||
| 112 | * jiffies_lock. | ||
| 113 | */ | ||
| 114 | if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) | ||
| 115 | tick_do_timer_cpu = cpu; | ||
| 116 | #endif | ||
| 117 | |||
| 118 | /* Check, if the jiffies need an update */ | ||
| 119 | if (tick_do_timer_cpu == cpu) | ||
| 120 | tick_do_update_jiffies64(now); | ||
| 121 | } | ||
| 122 | |||
| 123 | static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) | ||
| 124 | { | ||
| 125 | #ifdef CONFIG_NO_HZ | ||
| 126 | /* | ||
| 127 | * When we are idle and the tick is stopped, we have to touch | ||
| 128 | * the watchdog as we might not schedule for a really long | ||
| 129 | * time. This happens on complete idle SMP systems while | ||
| 130 | * waiting on the login prompt. We also increment the "start of | ||
| 131 | * idle" jiffy stamp so the idle accounting adjustment we do | ||
| 132 | * when we go busy again does not account too much ticks. | ||
| 133 | */ | ||
| 134 | if (ts->tick_stopped) { | ||
| 135 | touch_softlockup_watchdog(); | ||
| 136 | if (is_idle_task(current)) | ||
| 137 | ts->idle_jiffies++; | ||
| 138 | } | ||
| 139 | #endif | ||
| 140 | update_process_times(user_mode(regs)); | ||
| 141 | profile_tick(CPU_PROFILING); | ||
| 142 | } | ||
| 143 | |||
| 144 | /* | 101 | /* |
| 145 | * NOHZ - aka dynamic tick functionality | 102 | * NOHZ - aka dynamic tick functionality |
| 146 | */ | 103 | */ |
| @@ -148,7 +105,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) | |||
| 148 | /* | 105 | /* |
| 149 | * NO HZ enabled ? | 106 | * NO HZ enabled ? |
| 150 | */ | 107 | */ |
| 151 | int tick_nohz_enabled __read_mostly = 1; | 108 | static int tick_nohz_enabled __read_mostly = 1; |
| 152 | 109 | ||
| 153 | /* | 110 | /* |
| 154 | * Enable / Disable tickless mode | 111 | * Enable / Disable tickless mode |
| @@ -182,6 +139,7 @@ static void tick_nohz_update_jiffies(ktime_t now) | |||
| 182 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 139 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
| 183 | unsigned long flags; | 140 | unsigned long flags; |
| 184 | 141 | ||
| 142 | cpumask_clear_cpu(cpu, nohz_cpu_mask); | ||
| 185 | ts->idle_waketime = now; | 143 | ts->idle_waketime = now; |
| 186 | 144 | ||
| 187 | local_irq_save(flags); | 145 | local_irq_save(flags); |
| @@ -201,10 +159,9 @@ update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_upda | |||
| 201 | 159 | ||
| 202 | if (ts->idle_active) { | 160 | if (ts->idle_active) { |
| 203 | delta = ktime_sub(now, ts->idle_entrytime); | 161 | delta = ktime_sub(now, ts->idle_entrytime); |
| 162 | ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); | ||
| 204 | if (nr_iowait_cpu(cpu) > 0) | 163 | if (nr_iowait_cpu(cpu) > 0) |
| 205 | ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta); | 164 | ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta); |
| 206 | else | ||
| 207 | ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); | ||
| 208 | ts->idle_entrytime = now; | 165 | ts->idle_entrytime = now; |
| 209 | } | 166 | } |
| 210 | 167 | ||
| @@ -225,7 +182,11 @@ static void tick_nohz_stop_idle(int cpu, ktime_t now) | |||
| 225 | 182 | ||
| 226 | static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) | 183 | static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) |
| 227 | { | 184 | { |
| 228 | ktime_t now = ktime_get(); | 185 | ktime_t now; |
| 186 | |||
| 187 | now = ktime_get(); | ||
| 188 | |||
| 189 | update_ts_time_stats(cpu, ts, now, NULL); | ||
| 229 | 190 | ||
| 230 | ts->idle_entrytime = now; | 191 | ts->idle_entrytime = now; |
| 231 | ts->idle_active = 1; | 192 | ts->idle_active = 1; |
| @@ -236,11 +197,11 @@ static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) | |||
| 236 | /** | 197 | /** |
| 237 | * get_cpu_idle_time_us - get the total idle time of a cpu | 198 | * get_cpu_idle_time_us - get the total idle time of a cpu |
| 238 | * @cpu: CPU number to query | 199 | * @cpu: CPU number to query |
| 239 | * @last_update_time: variable to store update time in. Do not update | 200 | * @last_update_time: variable to store update time in |
| 240 | * counters if NULL. | ||
| 241 | * | 201 | * |
| 242 | * Return the cummulative idle time (since boot) for a given | 202 | * Return the cummulative idle time (since boot) for a given |
| 243 | * CPU, in microseconds. | 203 | * CPU, in microseconds. The idle time returned includes |
| 204 | * the iowait time (unlike what "top" and co report). | ||
| 244 | * | 205 | * |
| 245 | * This time is measured via accounting rather than sampling, | 206 | * This time is measured via accounting rather than sampling, |
| 246 | * and is as accurate as ktime_get() is. | 207 | * and is as accurate as ktime_get() is. |
| @@ -250,35 +211,20 @@ static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) | |||
| 250 | u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) | 211 | u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) |
| 251 | { | 212 | { |
| 252 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 213 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
| 253 | ktime_t now, idle; | ||
| 254 | 214 | ||
| 255 | if (!tick_nohz_enabled) | 215 | if (!tick_nohz_enabled) |
| 256 | return -1; | 216 | return -1; |
| 257 | 217 | ||
| 258 | now = ktime_get(); | 218 | update_ts_time_stats(cpu, ts, ktime_get(), last_update_time); |
| 259 | if (last_update_time) { | ||
| 260 | update_ts_time_stats(cpu, ts, now, last_update_time); | ||
| 261 | idle = ts->idle_sleeptime; | ||
| 262 | } else { | ||
| 263 | if (ts->idle_active && !nr_iowait_cpu(cpu)) { | ||
| 264 | ktime_t delta = ktime_sub(now, ts->idle_entrytime); | ||
| 265 | |||
| 266 | idle = ktime_add(ts->idle_sleeptime, delta); | ||
| 267 | } else { | ||
| 268 | idle = ts->idle_sleeptime; | ||
| 269 | } | ||
| 270 | } | ||
| 271 | |||
| 272 | return ktime_to_us(idle); | ||
| 273 | 219 | ||
| 220 | return ktime_to_us(ts->idle_sleeptime); | ||
| 274 | } | 221 | } |
| 275 | EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); | 222 | EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); |
| 276 | 223 | ||
| 277 | /** | 224 | /* |
| 278 | * get_cpu_iowait_time_us - get the total iowait time of a cpu | 225 | * get_cpu_iowait_time_us - get the total iowait time of a cpu |
| 279 | * @cpu: CPU number to query | 226 | * @cpu: CPU number to query |
| 280 | * @last_update_time: variable to store update time in. Do not update | 227 | * @last_update_time: variable to store update time in |
| 281 | * counters if NULL. | ||
| 282 | * | 228 | * |
| 283 | * Return the cummulative iowait time (since boot) for a given | 229 | * Return the cummulative iowait time (since boot) for a given |
| 284 | * CPU, in microseconds. | 230 | * CPU, in microseconds. |
| @@ -291,47 +237,93 @@ EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); | |||
| 291 | u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) | 237 | u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) |
| 292 | { | 238 | { |
| 293 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 239 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
| 294 | ktime_t now, iowait; | ||
| 295 | 240 | ||
| 296 | if (!tick_nohz_enabled) | 241 | if (!tick_nohz_enabled) |
| 297 | return -1; | 242 | return -1; |
| 298 | 243 | ||
| 299 | now = ktime_get(); | 244 | update_ts_time_stats(cpu, ts, ktime_get(), last_update_time); |
| 300 | if (last_update_time) { | ||
| 301 | update_ts_time_stats(cpu, ts, now, last_update_time); | ||
| 302 | iowait = ts->iowait_sleeptime; | ||
| 303 | } else { | ||
| 304 | if (ts->idle_active && nr_iowait_cpu(cpu) > 0) { | ||
| 305 | ktime_t delta = ktime_sub(now, ts->idle_entrytime); | ||
| 306 | |||
| 307 | iowait = ktime_add(ts->iowait_sleeptime, delta); | ||
| 308 | } else { | ||
| 309 | iowait = ts->iowait_sleeptime; | ||
| 310 | } | ||
| 311 | } | ||
| 312 | 245 | ||
| 313 | return ktime_to_us(iowait); | 246 | return ktime_to_us(ts->iowait_sleeptime); |
| 314 | } | 247 | } |
| 315 | EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); | 248 | EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); |
| 316 | 249 | ||
| 317 | static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, | 250 | /** |
| 318 | ktime_t now, int cpu) | 251 | * tick_nohz_stop_sched_tick - stop the idle tick from the idle task |
| 252 | * | ||
| 253 | * When the next event is more than a tick into the future, stop the idle tick | ||
| 254 | * Called either from the idle loop or from irq_exit() when an idle period was | ||
| 255 | * just interrupted by an interrupt which did not cause a reschedule. | ||
| 256 | */ | ||
| 257 | void tick_nohz_stop_sched_tick(int inidle) | ||
| 319 | { | 258 | { |
| 320 | unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; | 259 | unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; |
| 321 | ktime_t last_update, expires, ret = { .tv64 = 0 }; | 260 | struct tick_sched *ts; |
| 322 | unsigned long rcu_delta_jiffies; | 261 | ktime_t last_update, expires, now; |
| 323 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; | 262 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; |
| 324 | u64 time_delta; | 263 | u64 time_delta; |
| 264 | int cpu; | ||
| 265 | |||
| 266 | local_irq_save(flags); | ||
| 267 | |||
| 268 | cpu = smp_processor_id(); | ||
| 269 | ts = &per_cpu(tick_cpu_sched, cpu); | ||
| 270 | |||
| 271 | /* | ||
| 272 | * Call to tick_nohz_start_idle stops the last_update_time from being | ||
| 273 | * updated. Thus, it must not be called in the event we are called from | ||
| 274 | * irq_exit() with the prior state different than idle. | ||
| 275 | */ | ||
| 276 | if (!inidle && !ts->inidle) | ||
| 277 | goto end; | ||
| 278 | |||
| 279 | /* | ||
| 280 | * Set ts->inidle unconditionally. Even if the system did not | ||
| 281 | * switch to NOHZ mode the cpu frequency governers rely on the | ||
| 282 | * update of the idle time accounting in tick_nohz_start_idle(). | ||
| 283 | */ | ||
| 284 | ts->inidle = 1; | ||
| 325 | 285 | ||
| 286 | now = tick_nohz_start_idle(cpu, ts); | ||
| 287 | |||
| 288 | /* | ||
| 289 | * If this cpu is offline and it is the one which updates | ||
| 290 | * jiffies, then give up the assignment and let it be taken by | ||
| 291 | * the cpu which runs the tick timer next. If we don't drop | ||
| 292 | * this here the jiffies might be stale and do_timer() never | ||
| 293 | * invoked. | ||
| 294 | */ | ||
| 295 | if (unlikely(!cpu_online(cpu))) { | ||
| 296 | if (cpu == tick_do_timer_cpu) | ||
| 297 | tick_do_timer_cpu = TICK_DO_TIMER_NONE; | ||
| 298 | } | ||
| 299 | |||
| 300 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) | ||
| 301 | goto end; | ||
| 302 | |||
| 303 | if (need_resched()) | ||
| 304 | goto end; | ||
| 305 | |||
| 306 | if (unlikely(local_softirq_pending() && cpu_online(cpu))) { | ||
| 307 | static int ratelimit; | ||
| 308 | |||
| 309 | if (ratelimit < 10) { | ||
| 310 | printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", | ||
| 311 | (unsigned int) local_softirq_pending()); | ||
| 312 | ratelimit++; | ||
| 313 | } | ||
| 314 | goto end; | ||
| 315 | } | ||
| 316 | |||
| 317 | ts->idle_calls++; | ||
| 326 | /* Read jiffies and the time when jiffies were updated last */ | 318 | /* Read jiffies and the time when jiffies were updated last */ |
| 327 | do { | 319 | do { |
| 328 | seq = read_seqbegin(&jiffies_lock); | 320 | seq = read_seqbegin(&xtime_lock); |
| 329 | last_update = last_jiffies_update; | 321 | last_update = last_jiffies_update; |
| 330 | last_jiffies = jiffies; | 322 | last_jiffies = jiffies; |
| 331 | time_delta = timekeeping_max_deferment(); | 323 | time_delta = timekeeping_max_deferment(); |
| 332 | } while (read_seqretry(&jiffies_lock, seq)); | 324 | } while (read_seqretry(&xtime_lock, seq)); |
| 333 | 325 | ||
| 334 | if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) || | 326 | if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || |
| 335 | arch_needs_cpu(cpu)) { | 327 | arch_needs_cpu(cpu)) { |
| 336 | next_jiffies = last_jiffies + 1; | 328 | next_jiffies = last_jiffies + 1; |
| 337 | delta_jiffies = 1; | 329 | delta_jiffies = 1; |
| @@ -339,10 +331,6 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, | |||
| 339 | /* Get the next timer wheel timer */ | 331 | /* Get the next timer wheel timer */ |
| 340 | next_jiffies = get_next_timer_interrupt(last_jiffies); | 332 | next_jiffies = get_next_timer_interrupt(last_jiffies); |
| 341 | delta_jiffies = next_jiffies - last_jiffies; | 333 | delta_jiffies = next_jiffies - last_jiffies; |
| 342 | if (rcu_delta_jiffies < delta_jiffies) { | ||
| 343 | next_jiffies = last_jiffies + rcu_delta_jiffies; | ||
| 344 | delta_jiffies = rcu_delta_jiffies; | ||
| 345 | } | ||
| 346 | } | 334 | } |
| 347 | /* | 335 | /* |
| 348 | * Do not stop the tick, if we are only one off | 336 | * Do not stop the tick, if we are only one off |
| @@ -401,12 +389,13 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, | |||
| 401 | else | 389 | else |
| 402 | expires.tv64 = KTIME_MAX; | 390 | expires.tv64 = KTIME_MAX; |
| 403 | 391 | ||
| 392 | if (delta_jiffies > 1) | ||
| 393 | cpumask_set_cpu(cpu, nohz_cpu_mask); | ||
| 394 | |||
| 404 | /* Skip reprogram of event if its not changed */ | 395 | /* Skip reprogram of event if its not changed */ |
| 405 | if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) | 396 | if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) |
| 406 | goto out; | 397 | goto out; |
| 407 | 398 | ||
| 408 | ret = expires; | ||
| 409 | |||
| 410 | /* | 399 | /* |
| 411 | * nohz_stop_sched_tick can be called several times before | 400 | * nohz_stop_sched_tick can be called several times before |
| 412 | * the nohz_restart_sched_tick is called. This happens when | 401 | * the nohz_restart_sched_tick is called. This happens when |
| @@ -415,13 +404,19 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, | |||
| 415 | * the scheduler tick in nohz_restart_sched_tick. | 404 | * the scheduler tick in nohz_restart_sched_tick. |
| 416 | */ | 405 | */ |
| 417 | if (!ts->tick_stopped) { | 406 | if (!ts->tick_stopped) { |
| 418 | nohz_balance_enter_idle(cpu); | 407 | select_nohz_load_balancer(1); |
| 419 | calc_load_enter_idle(); | ||
| 420 | 408 | ||
| 421 | ts->last_tick = hrtimer_get_expires(&ts->sched_timer); | 409 | ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); |
| 422 | ts->tick_stopped = 1; | 410 | ts->tick_stopped = 1; |
| 411 | ts->idle_jiffies = last_jiffies; | ||
| 412 | rcu_enter_nohz(); | ||
| 423 | } | 413 | } |
| 424 | 414 | ||
| 415 | ts->idle_sleeps++; | ||
| 416 | |||
| 417 | /* Mark expires */ | ||
| 418 | ts->idle_expires = expires; | ||
| 419 | |||
| 425 | /* | 420 | /* |
| 426 | * If the expiration time == KTIME_MAX, then | 421 | * If the expiration time == KTIME_MAX, then |
| 427 | * in this case we simply stop the tick timer. | 422 | * in this case we simply stop the tick timer. |
| @@ -446,132 +441,15 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, | |||
| 446 | * softirq. | 441 | * softirq. |
| 447 | */ | 442 | */ |
| 448 | tick_do_update_jiffies64(ktime_get()); | 443 | tick_do_update_jiffies64(ktime_get()); |
| 444 | cpumask_clear_cpu(cpu, nohz_cpu_mask); | ||
| 449 | } | 445 | } |
| 450 | raise_softirq_irqoff(TIMER_SOFTIRQ); | 446 | raise_softirq_irqoff(TIMER_SOFTIRQ); |
| 451 | out: | 447 | out: |
| 452 | ts->next_jiffies = next_jiffies; | 448 | ts->next_jiffies = next_jiffies; |
| 453 | ts->last_jiffies = last_jiffies; | 449 | ts->last_jiffies = last_jiffies; |
| 454 | ts->sleep_length = ktime_sub(dev->next_event, now); | 450 | ts->sleep_length = ktime_sub(dev->next_event, now); |
| 455 | 451 | end: | |
| 456 | return ret; | 452 | local_irq_restore(flags); |
| 457 | } | ||
| 458 | |||
| 459 | static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) | ||
| 460 | { | ||
| 461 | /* | ||
| 462 | * If this cpu is offline and it is the one which updates | ||
| 463 | * jiffies, then give up the assignment and let it be taken by | ||
| 464 | * the cpu which runs the tick timer next. If we don't drop | ||
| 465 | * this here the jiffies might be stale and do_timer() never | ||
| 466 | * invoked. | ||
| 467 | */ | ||
| 468 | if (unlikely(!cpu_online(cpu))) { | ||
| 469 | if (cpu == tick_do_timer_cpu) | ||
| 470 | tick_do_timer_cpu = TICK_DO_TIMER_NONE; | ||
| 471 | } | ||
| 472 | |||
| 473 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) | ||
| 474 | return false; | ||
| 475 | |||
| 476 | if (need_resched()) | ||
| 477 | return false; | ||
| 478 | |||
| 479 | if (unlikely(local_softirq_pending() && cpu_online(cpu))) { | ||
| 480 | static int ratelimit; | ||
| 481 | |||
| 482 | if (ratelimit < 10 && | ||
| 483 | (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { | ||
| 484 | printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", | ||
| 485 | (unsigned int) local_softirq_pending()); | ||
| 486 | ratelimit++; | ||
| 487 | } | ||
| 488 | return false; | ||
| 489 | } | ||
| 490 | |||
| 491 | return true; | ||
| 492 | } | ||
| 493 | |||
| 494 | static void __tick_nohz_idle_enter(struct tick_sched *ts) | ||
| 495 | { | ||
| 496 | ktime_t now, expires; | ||
| 497 | int cpu = smp_processor_id(); | ||
| 498 | |||
| 499 | now = tick_nohz_start_idle(cpu, ts); | ||
| 500 | |||
| 501 | if (can_stop_idle_tick(cpu, ts)) { | ||
| 502 | int was_stopped = ts->tick_stopped; | ||
| 503 | |||
| 504 | ts->idle_calls++; | ||
| 505 | |||
| 506 | expires = tick_nohz_stop_sched_tick(ts, now, cpu); | ||
| 507 | if (expires.tv64 > 0LL) { | ||
| 508 | ts->idle_sleeps++; | ||
| 509 | ts->idle_expires = expires; | ||
| 510 | } | ||
| 511 | |||
| 512 | if (!was_stopped && ts->tick_stopped) | ||
| 513 | ts->idle_jiffies = ts->last_jiffies; | ||
| 514 | } | ||
| 515 | } | ||
| 516 | |||
| 517 | /** | ||
| 518 | * tick_nohz_idle_enter - stop the idle tick from the idle task | ||
| 519 | * | ||
| 520 | * When the next event is more than a tick into the future, stop the idle tick | ||
| 521 | * Called when we start the idle loop. | ||
| 522 | * | ||
| 523 | * The arch is responsible of calling: | ||
| 524 | * | ||
| 525 | * - rcu_idle_enter() after its last use of RCU before the CPU is put | ||
| 526 | * to sleep. | ||
| 527 | * - rcu_idle_exit() before the first use of RCU after the CPU is woken up. | ||
| 528 | */ | ||
| 529 | void tick_nohz_idle_enter(void) | ||
| 530 | { | ||
| 531 | struct tick_sched *ts; | ||
| 532 | |||
| 533 | WARN_ON_ONCE(irqs_disabled()); | ||
| 534 | |||
| 535 | /* | ||
| 536 | * Update the idle state in the scheduler domain hierarchy | ||
| 537 | * when tick_nohz_stop_sched_tick() is called from the idle loop. | ||
| 538 | * State will be updated to busy during the first busy tick after | ||
| 539 | * exiting idle. | ||
| 540 | */ | ||
| 541 | set_cpu_sd_state_idle(); | ||
| 542 | |||
| 543 | local_irq_disable(); | ||
| 544 | |||
| 545 | ts = &__get_cpu_var(tick_cpu_sched); | ||
| 546 | /* | ||
| 547 | * set ts->inidle unconditionally. even if the system did not | ||
| 548 | * switch to nohz mode the cpu frequency governers rely on the | ||
| 549 | * update of the idle time accounting in tick_nohz_start_idle(). | ||
| 550 | */ | ||
| 551 | ts->inidle = 1; | ||
| 552 | __tick_nohz_idle_enter(ts); | ||
| 553 | |||
| 554 | local_irq_enable(); | ||
| 555 | } | ||
| 556 | |||
| 557 | /** | ||
| 558 | * tick_nohz_irq_exit - update next tick event from interrupt exit | ||
| 559 | * | ||
| 560 | * When an interrupt fires while we are idle and it doesn't cause | ||
| 561 | * a reschedule, it may still add, modify or delete a timer, enqueue | ||
| 562 | * an RCU callback, etc... | ||
| 563 | * So we need to re-calculate and reprogram the next tick event. | ||
| 564 | */ | ||
| 565 | void tick_nohz_irq_exit(void) | ||
| 566 | { | ||
| 567 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | ||
| 568 | |||
| 569 | if (!ts->inidle) | ||
| 570 | return; | ||
| 571 | |||
| 572 | /* Cancel the timer because CPU already waken up from the C-states*/ | ||
| 573 | menu_hrtimer_cancel(); | ||
| 574 | __tick_nohz_idle_enter(ts); | ||
| 575 | } | 453 | } |
| 576 | 454 | ||
| 577 | /** | 455 | /** |
| @@ -589,7 +467,7 @@ ktime_t tick_nohz_get_sleep_length(void) | |||
| 589 | static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) | 467 | static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) |
| 590 | { | 468 | { |
| 591 | hrtimer_cancel(&ts->sched_timer); | 469 | hrtimer_cancel(&ts->sched_timer); |
| 592 | hrtimer_set_expires(&ts->sched_timer, ts->last_tick); | 470 | hrtimer_set_expires(&ts->sched_timer, ts->idle_tick); |
| 593 | 471 | ||
| 594 | while (1) { | 472 | while (1) { |
| 595 | /* Forward the time to expire in the future */ | 473 | /* Forward the time to expire in the future */ |
| @@ -606,33 +484,49 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) | |||
| 606 | hrtimer_get_expires(&ts->sched_timer), 0)) | 484 | hrtimer_get_expires(&ts->sched_timer), 0)) |
| 607 | break; | 485 | break; |
| 608 | } | 486 | } |
| 609 | /* Reread time and update jiffies */ | 487 | /* Update jiffies and reread time */ |
| 610 | now = ktime_get(); | ||
| 611 | tick_do_update_jiffies64(now); | 488 | tick_do_update_jiffies64(now); |
| 489 | now = ktime_get(); | ||
| 612 | } | 490 | } |
| 613 | } | 491 | } |
| 614 | 492 | ||
| 615 | static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) | 493 | /** |
| 494 | * tick_nohz_restart_sched_tick - restart the idle tick from the idle task | ||
| 495 | * | ||
| 496 | * Restart the idle tick when the CPU is woken up from idle | ||
| 497 | */ | ||
| 498 | void tick_nohz_restart_sched_tick(void) | ||
| 616 | { | 499 | { |
| 617 | /* Update jiffies first */ | 500 | int cpu = smp_processor_id(); |
| 618 | tick_do_update_jiffies64(now); | 501 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
| 619 | update_cpu_load_nohz(); | 502 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING |
| 503 | unsigned long ticks; | ||
| 504 | #endif | ||
| 505 | ktime_t now; | ||
| 620 | 506 | ||
| 621 | calc_load_exit_idle(); | 507 | local_irq_disable(); |
| 622 | touch_softlockup_watchdog(); | 508 | if (ts->idle_active || (ts->inidle && ts->tick_stopped)) |
| 623 | /* | 509 | now = ktime_get(); |
| 624 | * Cancel the scheduled timer and restore the tick | ||
| 625 | */ | ||
| 626 | ts->tick_stopped = 0; | ||
| 627 | ts->idle_exittime = now; | ||
| 628 | 510 | ||
| 629 | tick_nohz_restart(ts, now); | 511 | if (ts->idle_active) |
| 630 | } | 512 | tick_nohz_stop_idle(cpu, now); |
| 513 | |||
| 514 | if (!ts->inidle || !ts->tick_stopped) { | ||
| 515 | ts->inidle = 0; | ||
| 516 | local_irq_enable(); | ||
| 517 | return; | ||
| 518 | } | ||
| 519 | |||
| 520 | ts->inidle = 0; | ||
| 521 | |||
| 522 | rcu_exit_nohz(); | ||
| 523 | |||
| 524 | /* Update jiffies first */ | ||
| 525 | select_nohz_load_balancer(0); | ||
| 526 | tick_do_update_jiffies64(now); | ||
| 527 | cpumask_clear_cpu(cpu, nohz_cpu_mask); | ||
| 631 | 528 | ||
| 632 | static void tick_nohz_account_idle_ticks(struct tick_sched *ts) | ||
| 633 | { | ||
| 634 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | 529 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING |
| 635 | unsigned long ticks; | ||
| 636 | /* | 530 | /* |
| 637 | * We stopped the tick in idle. Update process times would miss the | 531 | * We stopped the tick in idle. Update process times would miss the |
| 638 | * time we slept as update_process_times does only a 1 tick | 532 | * time we slept as update_process_times does only a 1 tick |
| @@ -645,39 +539,15 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts) | |||
| 645 | if (ticks && ticks < LONG_MAX) | 539 | if (ticks && ticks < LONG_MAX) |
| 646 | account_idle_ticks(ticks); | 540 | account_idle_ticks(ticks); |
| 647 | #endif | 541 | #endif |
| 648 | } | ||
| 649 | 542 | ||
| 650 | /** | 543 | touch_softlockup_watchdog(); |
| 651 | * tick_nohz_idle_exit - restart the idle tick from the idle task | 544 | /* |
| 652 | * | 545 | * Cancel the scheduled timer and restore the tick |
| 653 | * Restart the idle tick when the CPU is woken up from idle | 546 | */ |
| 654 | * This also exit the RCU extended quiescent state. The CPU | 547 | ts->tick_stopped = 0; |
| 655 | * can use RCU again after this function is called. | 548 | ts->idle_exittime = now; |
| 656 | */ | ||
| 657 | void tick_nohz_idle_exit(void) | ||
| 658 | { | ||
| 659 | int cpu = smp_processor_id(); | ||
| 660 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | ||
| 661 | ktime_t now; | ||
| 662 | |||
| 663 | local_irq_disable(); | ||
| 664 | |||
| 665 | WARN_ON_ONCE(!ts->inidle); | ||
| 666 | |||
| 667 | ts->inidle = 0; | ||
| 668 | |||
| 669 | /* Cancel the timer because CPU already waken up from the C-states*/ | ||
| 670 | menu_hrtimer_cancel(); | ||
| 671 | if (ts->idle_active || ts->tick_stopped) | ||
| 672 | now = ktime_get(); | ||
| 673 | |||
| 674 | if (ts->idle_active) | ||
| 675 | tick_nohz_stop_idle(cpu, now); | ||
| 676 | 549 | ||
| 677 | if (ts->tick_stopped) { | 550 | tick_nohz_restart(ts, now); |
| 678 | tick_nohz_restart_sched_tick(ts, now); | ||
| 679 | tick_nohz_account_idle_ticks(ts); | ||
| 680 | } | ||
| 681 | 551 | ||
| 682 | local_irq_enable(); | 552 | local_irq_enable(); |
| 683 | } | 553 | } |
| @@ -695,12 +565,40 @@ static void tick_nohz_handler(struct clock_event_device *dev) | |||
| 695 | { | 565 | { |
| 696 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 566 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
| 697 | struct pt_regs *regs = get_irq_regs(); | 567 | struct pt_regs *regs = get_irq_regs(); |
| 568 | int cpu = smp_processor_id(); | ||
| 698 | ktime_t now = ktime_get(); | 569 | ktime_t now = ktime_get(); |
| 699 | 570 | ||
| 700 | dev->next_event.tv64 = KTIME_MAX; | 571 | dev->next_event.tv64 = KTIME_MAX; |
| 701 | 572 | ||
| 702 | tick_sched_do_timer(now); | 573 | /* |
| 703 | tick_sched_handle(ts, regs); | 574 | * Check if the do_timer duty was dropped. We don't care about |
| 575 | * concurrency: This happens only when the cpu in charge went | ||
| 576 | * into a long sleep. If two cpus happen to assign themself to | ||
| 577 | * this duty, then the jiffies update is still serialized by | ||
| 578 | * xtime_lock. | ||
| 579 | */ | ||
| 580 | if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) | ||
| 581 | tick_do_timer_cpu = cpu; | ||
| 582 | |||
| 583 | /* Check, if the jiffies need an update */ | ||
| 584 | if (tick_do_timer_cpu == cpu) | ||
| 585 | tick_do_update_jiffies64(now); | ||
| 586 | |||
| 587 | /* | ||
| 588 | * When we are idle and the tick is stopped, we have to touch | ||
| 589 | * the watchdog as we might not schedule for a really long | ||
| 590 | * time. This happens on complete idle SMP systems while | ||
| 591 | * waiting on the login prompt. We also increment the "start | ||
| 592 | * of idle" jiffy stamp so the idle accounting adjustment we | ||
| 593 | * do when we go busy again does not account too much ticks. | ||
| 594 | */ | ||
| 595 | if (ts->tick_stopped) { | ||
| 596 | touch_softlockup_watchdog(); | ||
| 597 | ts->idle_jiffies++; | ||
| 598 | } | ||
| 599 | |||
| 600 | update_process_times(user_mode(regs)); | ||
| 601 | profile_tick(CPU_PROFILING); | ||
| 704 | 602 | ||
| 705 | while (tick_nohz_reprogram(ts, now)) { | 603 | while (tick_nohz_reprogram(ts, now)) { |
| 706 | now = ktime_get(); | 604 | now = ktime_get(); |
| @@ -742,6 +640,8 @@ static void tick_nohz_switch_to_nohz(void) | |||
| 742 | next = ktime_add(next, tick_period); | 640 | next = ktime_add(next, tick_period); |
| 743 | } | 641 | } |
| 744 | local_irq_enable(); | 642 | local_irq_enable(); |
| 643 | |||
| 644 | printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id()); | ||
| 745 | } | 645 | } |
| 746 | 646 | ||
| 747 | /* | 647 | /* |
| @@ -813,7 +713,7 @@ void tick_check_idle(int cpu) | |||
| 813 | #ifdef CONFIG_HIGH_RES_TIMERS | 713 | #ifdef CONFIG_HIGH_RES_TIMERS |
| 814 | /* | 714 | /* |
| 815 | * We rearm the timer until we get disabled by the idle code. | 715 | * We rearm the timer until we get disabled by the idle code. |
| 816 | * Called with interrupts disabled. | 716 | * Called with interrupts disabled and timer->base->cpu_base->lock held. |
| 817 | */ | 717 | */ |
| 818 | static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) | 718 | static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) |
| 819 | { | 719 | { |
| @@ -821,31 +721,50 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) | |||
| 821 | container_of(timer, struct tick_sched, sched_timer); | 721 | container_of(timer, struct tick_sched, sched_timer); |
| 822 | struct pt_regs *regs = get_irq_regs(); | 722 | struct pt_regs *regs = get_irq_regs(); |
| 823 | ktime_t now = ktime_get(); | 723 | ktime_t now = ktime_get(); |
| 724 | int cpu = smp_processor_id(); | ||
| 824 | 725 | ||
| 825 | tick_sched_do_timer(now); | 726 | #ifdef CONFIG_NO_HZ |
| 727 | /* | ||
| 728 | * Check if the do_timer duty was dropped. We don't care about | ||
| 729 | * concurrency: This happens only when the cpu in charge went | ||
| 730 | * into a long sleep. If two cpus happen to assign themself to | ||
| 731 | * this duty, then the jiffies update is still serialized by | ||
| 732 | * xtime_lock. | ||
| 733 | */ | ||
| 734 | if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) | ||
| 735 | tick_do_timer_cpu = cpu; | ||
| 736 | #endif | ||
| 737 | |||
| 738 | /* Check, if the jiffies need an update */ | ||
| 739 | if (tick_do_timer_cpu == cpu) | ||
| 740 | tick_do_update_jiffies64(now); | ||
| 826 | 741 | ||
| 827 | /* | 742 | /* |
| 828 | * Do not call, when we are not in irq context and have | 743 | * Do not call, when we are not in irq context and have |
| 829 | * no valid regs pointer | 744 | * no valid regs pointer |
| 830 | */ | 745 | */ |
| 831 | if (regs) | 746 | if (regs) { |
| 832 | tick_sched_handle(ts, regs); | 747 | /* |
| 748 | * When we are idle and the tick is stopped, we have to touch | ||
| 749 | * the watchdog as we might not schedule for a really long | ||
| 750 | * time. This happens on complete idle SMP systems while | ||
| 751 | * waiting on the login prompt. We also increment the "start of | ||
| 752 | * idle" jiffy stamp so the idle accounting adjustment we do | ||
| 753 | * when we go busy again does not account too much ticks. | ||
| 754 | */ | ||
| 755 | if (ts->tick_stopped) { | ||
| 756 | touch_softlockup_watchdog(); | ||
| 757 | ts->idle_jiffies++; | ||
| 758 | } | ||
| 759 | update_process_times(user_mode(regs)); | ||
| 760 | profile_tick(CPU_PROFILING); | ||
| 761 | } | ||
| 833 | 762 | ||
| 834 | hrtimer_forward(timer, now, tick_period); | 763 | hrtimer_forward(timer, now, tick_period); |
| 835 | 764 | ||
| 836 | return HRTIMER_RESTART; | 765 | return HRTIMER_RESTART; |
| 837 | } | 766 | } |
| 838 | 767 | ||
| 839 | static int sched_skew_tick; | ||
| 840 | |||
| 841 | static int __init skew_tick(char *str) | ||
| 842 | { | ||
| 843 | get_option(&str, &sched_skew_tick); | ||
| 844 | |||
| 845 | return 0; | ||
| 846 | } | ||
| 847 | early_param("skew_tick", skew_tick); | ||
| 848 | |||
| 849 | /** | 768 | /** |
| 850 | * tick_setup_sched_timer - setup the tick emulation timer | 769 | * tick_setup_sched_timer - setup the tick emulation timer |
| 851 | */ | 770 | */ |
| @@ -863,14 +782,6 @@ void tick_setup_sched_timer(void) | |||
| 863 | /* Get the next period (per cpu) */ | 782 | /* Get the next period (per cpu) */ |
| 864 | hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); | 783 | hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); |
| 865 | 784 | ||
| 866 | /* Offset the tick to avert jiffies_lock contention. */ | ||
| 867 | if (sched_skew_tick) { | ||
| 868 | u64 offset = ktime_to_ns(tick_period) >> 1; | ||
| 869 | do_div(offset, num_possible_cpus()); | ||
| 870 | offset *= smp_processor_id(); | ||
| 871 | hrtimer_add_expires_ns(&ts->sched_timer, offset); | ||
| 872 | } | ||
| 873 | |||
| 874 | for (;;) { | 785 | for (;;) { |
| 875 | hrtimer_forward(&ts->sched_timer, now, tick_period); | 786 | hrtimer_forward(&ts->sched_timer, now, tick_period); |
| 876 | hrtimer_start_expires(&ts->sched_timer, | 787 | hrtimer_start_expires(&ts->sched_timer, |
| @@ -882,8 +793,10 @@ void tick_setup_sched_timer(void) | |||
| 882 | } | 793 | } |
| 883 | 794 | ||
| 884 | #ifdef CONFIG_NO_HZ | 795 | #ifdef CONFIG_NO_HZ |
| 885 | if (tick_nohz_enabled) | 796 | if (tick_nohz_enabled) { |
| 886 | ts->nohz_mode = NOHZ_MODE_HIGHRES; | 797 | ts->nohz_mode = NOHZ_MODE_HIGHRES; |
| 798 | printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id()); | ||
| 799 | } | ||
| 887 | #endif | 800 | #endif |
| 888 | } | 801 | } |
| 889 | #endif /* HIGH_RES_TIMERS */ | 802 | #endif /* HIGH_RES_TIMERS */ |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index cbc6acb0db3..6f9798bf240 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
| @@ -8,7 +8,6 @@ | |||
| 8 | * | 8 | * |
| 9 | */ | 9 | */ |
| 10 | 10 | ||
| 11 | #include <linux/timekeeper_internal.h> | ||
| 12 | #include <linux/module.h> | 11 | #include <linux/module.h> |
| 13 | #include <linux/interrupt.h> | 12 | #include <linux/interrupt.h> |
| 14 | #include <linux/percpu.h> | 13 | #include <linux/percpu.h> |
| @@ -21,60 +20,37 @@ | |||
| 21 | #include <linux/time.h> | 20 | #include <linux/time.h> |
| 22 | #include <linux/tick.h> | 21 | #include <linux/tick.h> |
| 23 | #include <linux/stop_machine.h> | 22 | #include <linux/stop_machine.h> |
| 24 | #include <linux/pvclock_gtod.h> | ||
| 25 | 23 | ||
| 24 | /* Structure holding internal timekeeping values. */ | ||
| 25 | struct timekeeper { | ||
| 26 | /* Current clocksource used for timekeeping. */ | ||
| 27 | struct clocksource *clock; | ||
| 28 | /* The shift value of the current clocksource. */ | ||
| 29 | int shift; | ||
| 30 | |||
| 31 | /* Number of clock cycles in one NTP interval. */ | ||
| 32 | cycle_t cycle_interval; | ||
| 33 | /* Number of clock shifted nano seconds in one NTP interval. */ | ||
| 34 | u64 xtime_interval; | ||
| 35 | /* shifted nano seconds left over when rounding cycle_interval */ | ||
| 36 | s64 xtime_remainder; | ||
| 37 | /* Raw nano seconds accumulated per NTP interval. */ | ||
| 38 | u32 raw_interval; | ||
| 39 | |||
| 40 | /* Clock shifted nano seconds remainder not stored in xtime.tv_nsec. */ | ||
| 41 | u64 xtime_nsec; | ||
| 42 | /* Difference between accumulated time and NTP time in ntp | ||
| 43 | * shifted nano seconds. */ | ||
| 44 | s64 ntp_error; | ||
| 45 | /* Shift conversion between clock shifted nano seconds and | ||
| 46 | * ntp shifted nano seconds. */ | ||
| 47 | int ntp_error_shift; | ||
| 48 | /* NTP adjusted clock multiplier */ | ||
| 49 | u32 mult; | ||
| 50 | }; | ||
| 26 | 51 | ||
| 27 | static struct timekeeper timekeeper; | 52 | static struct timekeeper timekeeper; |
| 28 | 53 | ||
| 29 | /* flag for if timekeeping is suspended */ | ||
| 30 | int __read_mostly timekeeping_suspended; | ||
| 31 | |||
| 32 | static inline void tk_normalize_xtime(struct timekeeper *tk) | ||
| 33 | { | ||
| 34 | while (tk->xtime_nsec >= ((u64)NSEC_PER_SEC << tk->shift)) { | ||
| 35 | tk->xtime_nsec -= (u64)NSEC_PER_SEC << tk->shift; | ||
| 36 | tk->xtime_sec++; | ||
| 37 | } | ||
| 38 | } | ||
| 39 | |||
| 40 | static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts) | ||
| 41 | { | ||
| 42 | tk->xtime_sec = ts->tv_sec; | ||
| 43 | tk->xtime_nsec = (u64)ts->tv_nsec << tk->shift; | ||
| 44 | } | ||
| 45 | |||
| 46 | static void tk_xtime_add(struct timekeeper *tk, const struct timespec *ts) | ||
| 47 | { | ||
| 48 | tk->xtime_sec += ts->tv_sec; | ||
| 49 | tk->xtime_nsec += (u64)ts->tv_nsec << tk->shift; | ||
| 50 | tk_normalize_xtime(tk); | ||
| 51 | } | ||
| 52 | |||
| 53 | static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec wtm) | ||
| 54 | { | ||
| 55 | struct timespec tmp; | ||
| 56 | |||
| 57 | /* | ||
| 58 | * Verify consistency of: offset_real = -wall_to_monotonic | ||
| 59 | * before modifying anything | ||
| 60 | */ | ||
| 61 | set_normalized_timespec(&tmp, -tk->wall_to_monotonic.tv_sec, | ||
| 62 | -tk->wall_to_monotonic.tv_nsec); | ||
| 63 | WARN_ON_ONCE(tk->offs_real.tv64 != timespec_to_ktime(tmp).tv64); | ||
| 64 | tk->wall_to_monotonic = wtm; | ||
| 65 | set_normalized_timespec(&tmp, -wtm.tv_sec, -wtm.tv_nsec); | ||
| 66 | tk->offs_real = timespec_to_ktime(tmp); | ||
| 67 | } | ||
| 68 | |||
| 69 | static void tk_set_sleep_time(struct timekeeper *tk, struct timespec t) | ||
| 70 | { | ||
| 71 | /* Verify consistency before modifying */ | ||
| 72 | WARN_ON_ONCE(tk->offs_boot.tv64 != timespec_to_ktime(tk->total_sleep_time).tv64); | ||
| 73 | |||
| 74 | tk->total_sleep_time = t; | ||
| 75 | tk->offs_boot = timespec_to_ktime(t); | ||
| 76 | } | ||
| 77 | |||
| 78 | /** | 54 | /** |
| 79 | * timekeeper_setup_internals - Set up internals to use clocksource clock. | 55 | * timekeeper_setup_internals - Set up internals to use clocksource clock. |
| 80 | * | 56 | * |
| @@ -85,14 +61,12 @@ static void tk_set_sleep_time(struct timekeeper *tk, struct timespec t) | |||
| 85 | * | 61 | * |
| 86 | * Unless you're the timekeeping code, you should not be using this! | 62 | * Unless you're the timekeeping code, you should not be using this! |
| 87 | */ | 63 | */ |
| 88 | static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) | 64 | static void timekeeper_setup_internals(struct clocksource *clock) |
| 89 | { | 65 | { |
| 90 | cycle_t interval; | 66 | cycle_t interval; |
| 91 | u64 tmp, ntpinterval; | 67 | u64 tmp, ntpinterval; |
| 92 | struct clocksource *old_clock; | ||
| 93 | 68 | ||
| 94 | old_clock = tk->clock; | 69 | timekeeper.clock = clock; |
| 95 | tk->clock = clock; | ||
| 96 | clock->cycle_last = clock->read(clock); | 70 | clock->cycle_last = clock->read(clock); |
| 97 | 71 | ||
| 98 | /* Do the ns -> cycle conversion first, using original mult */ | 72 | /* Do the ns -> cycle conversion first, using original mult */ |
| @@ -105,133 +79,103 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) | |||
| 105 | tmp = 1; | 79 | tmp = 1; |
| 106 | 80 | ||
| 107 | interval = (cycle_t) tmp; | 81 | interval = (cycle_t) tmp; |
| 108 | tk->cycle_interval = interval; | 82 | timekeeper.cycle_interval = interval; |
| 109 | 83 | ||
| 110 | /* Go back from cycles -> shifted ns */ | 84 | /* Go back from cycles -> shifted ns */ |
| 111 | tk->xtime_interval = (u64) interval * clock->mult; | 85 | timekeeper.xtime_interval = (u64) interval * clock->mult; |
| 112 | tk->xtime_remainder = ntpinterval - tk->xtime_interval; | 86 | timekeeper.xtime_remainder = ntpinterval - timekeeper.xtime_interval; |
| 113 | tk->raw_interval = | 87 | timekeeper.raw_interval = |
| 114 | ((u64) interval * clock->mult) >> clock->shift; | 88 | ((u64) interval * clock->mult) >> clock->shift; |
| 115 | 89 | ||
| 116 | /* if changing clocks, convert xtime_nsec shift units */ | 90 | timekeeper.xtime_nsec = 0; |
| 117 | if (old_clock) { | 91 | timekeeper.shift = clock->shift; |
| 118 | int shift_change = clock->shift - old_clock->shift; | ||
| 119 | if (shift_change < 0) | ||
| 120 | tk->xtime_nsec >>= -shift_change; | ||
| 121 | else | ||
| 122 | tk->xtime_nsec <<= shift_change; | ||
| 123 | } | ||
| 124 | tk->shift = clock->shift; | ||
| 125 | 92 | ||
| 126 | tk->ntp_error = 0; | 93 | timekeeper.ntp_error = 0; |
| 127 | tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; | 94 | timekeeper.ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; |
| 128 | 95 | ||
| 129 | /* | 96 | /* |
| 130 | * The timekeeper keeps its own mult values for the currently | 97 | * The timekeeper keeps its own mult values for the currently |
| 131 | * active clocksource. These value will be adjusted via NTP | 98 | * active clocksource. These value will be adjusted via NTP |
| 132 | * to counteract clock drifting. | 99 | * to counteract clock drifting. |
| 133 | */ | 100 | */ |
| 134 | tk->mult = clock->mult; | 101 | timekeeper.mult = clock->mult; |
| 135 | } | 102 | } |
| 136 | 103 | ||
| 137 | /* Timekeeper helper functions. */ | 104 | /* Timekeeper helper functions. */ |
| 138 | static inline s64 timekeeping_get_ns(struct timekeeper *tk) | 105 | static inline s64 timekeeping_get_ns(void) |
| 139 | { | 106 | { |
| 140 | cycle_t cycle_now, cycle_delta; | 107 | cycle_t cycle_now, cycle_delta; |
| 141 | struct clocksource *clock; | 108 | struct clocksource *clock; |
| 142 | s64 nsec; | ||
| 143 | 109 | ||
| 144 | /* read clocksource: */ | 110 | /* read clocksource: */ |
| 145 | clock = tk->clock; | 111 | clock = timekeeper.clock; |
| 146 | cycle_now = clock->read(clock); | 112 | cycle_now = clock->read(clock); |
| 147 | 113 | ||
| 148 | /* calculate the delta since the last update_wall_time: */ | 114 | /* calculate the delta since the last update_wall_time: */ |
| 149 | cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; | 115 | cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; |
| 150 | 116 | ||
| 151 | nsec = cycle_delta * tk->mult + tk->xtime_nsec; | 117 | /* return delta convert to nanoseconds using ntp adjusted mult. */ |
| 152 | nsec >>= tk->shift; | 118 | return clocksource_cyc2ns(cycle_delta, timekeeper.mult, |
| 153 | 119 | timekeeper.shift); | |
| 154 | /* If arch requires, add in gettimeoffset() */ | ||
| 155 | return nsec + arch_gettimeoffset(); | ||
| 156 | } | 120 | } |
| 157 | 121 | ||
| 158 | static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) | 122 | static inline s64 timekeeping_get_ns_raw(void) |
| 159 | { | 123 | { |
| 160 | cycle_t cycle_now, cycle_delta; | 124 | cycle_t cycle_now, cycle_delta; |
| 161 | struct clocksource *clock; | 125 | struct clocksource *clock; |
| 162 | s64 nsec; | ||
| 163 | 126 | ||
| 164 | /* read clocksource: */ | 127 | /* read clocksource: */ |
| 165 | clock = tk->clock; | 128 | clock = timekeeper.clock; |
| 166 | cycle_now = clock->read(clock); | 129 | cycle_now = clock->read(clock); |
| 167 | 130 | ||
| 168 | /* calculate the delta since the last update_wall_time: */ | 131 | /* calculate the delta since the last update_wall_time: */ |
| 169 | cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; | 132 | cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; |
| 170 | 133 | ||
| 171 | /* convert delta to nanoseconds. */ | 134 | /* return delta convert to nanoseconds using ntp adjusted mult. */ |
| 172 | nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); | 135 | return clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); |
| 173 | |||
| 174 | /* If arch requires, add in gettimeoffset() */ | ||
| 175 | return nsec + arch_gettimeoffset(); | ||
| 176 | } | ||
| 177 | |||
| 178 | static RAW_NOTIFIER_HEAD(pvclock_gtod_chain); | ||
| 179 | |||
| 180 | static void update_pvclock_gtod(struct timekeeper *tk) | ||
| 181 | { | ||
| 182 | raw_notifier_call_chain(&pvclock_gtod_chain, 0, tk); | ||
| 183 | } | 136 | } |
| 184 | 137 | ||
| 185 | /** | 138 | /* |
| 186 | * pvclock_gtod_register_notifier - register a pvclock timedata update listener | 139 | * This read-write spinlock protects us from races in SMP while |
| 187 | * | 140 | * playing with xtime. |
| 188 | * Must hold write on timekeeper.lock | ||
| 189 | */ | 141 | */ |
| 190 | int pvclock_gtod_register_notifier(struct notifier_block *nb) | 142 | __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); |
| 191 | { | ||
| 192 | struct timekeeper *tk = &timekeeper; | ||
| 193 | unsigned long flags; | ||
| 194 | int ret; | ||
| 195 | |||
| 196 | write_seqlock_irqsave(&tk->lock, flags); | ||
| 197 | ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb); | ||
| 198 | /* update timekeeping data */ | ||
| 199 | update_pvclock_gtod(tk); | ||
| 200 | write_sequnlock_irqrestore(&tk->lock, flags); | ||
| 201 | 143 | ||
| 202 | return ret; | ||
| 203 | } | ||
| 204 | EXPORT_SYMBOL_GPL(pvclock_gtod_register_notifier); | ||
| 205 | 144 | ||
| 206 | /** | 145 | /* |
| 207 | * pvclock_gtod_unregister_notifier - unregister a pvclock | 146 | * The current time |
| 208 | * timedata update listener | 147 | * wall_to_monotonic is what we need to add to xtime (or xtime corrected |
| 148 | * for sub jiffie times) to get to monotonic time. Monotonic is pegged | ||
| 149 | * at zero at system boot time, so wall_to_monotonic will be negative, | ||
| 150 | * however, we will ALWAYS keep the tv_nsec part positive so we can use | ||
| 151 | * the usual normalization. | ||
| 209 | * | 152 | * |
| 210 | * Must hold write on timekeeper.lock | 153 | * wall_to_monotonic is moved after resume from suspend for the monotonic |
| 154 | * time not to jump. We need to add total_sleep_time to wall_to_monotonic | ||
| 155 | * to get the real boot based time offset. | ||
| 156 | * | ||
| 157 | * - wall_to_monotonic is no longer the boot time, getboottime must be | ||
| 158 | * used instead. | ||
| 211 | */ | 159 | */ |
| 212 | int pvclock_gtod_unregister_notifier(struct notifier_block *nb) | 160 | static struct timespec xtime __attribute__ ((aligned (16))); |
| 213 | { | 161 | static struct timespec wall_to_monotonic __attribute__ ((aligned (16))); |
| 214 | struct timekeeper *tk = &timekeeper; | 162 | static struct timespec total_sleep_time; |
| 215 | unsigned long flags; | ||
| 216 | int ret; | ||
| 217 | 163 | ||
| 218 | write_seqlock_irqsave(&tk->lock, flags); | 164 | /* |
| 219 | ret = raw_notifier_chain_unregister(&pvclock_gtod_chain, nb); | 165 | * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. |
| 220 | write_sequnlock_irqrestore(&tk->lock, flags); | 166 | */ |
| 167 | static struct timespec raw_time; | ||
| 221 | 168 | ||
| 222 | return ret; | 169 | /* flag for if timekeeping is suspended */ |
| 223 | } | 170 | int __read_mostly timekeeping_suspended; |
| 224 | EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier); | ||
| 225 | 171 | ||
| 226 | /* must hold write on timekeeper.lock */ | 172 | /* must hold xtime_lock */ |
| 227 | static void timekeeping_update(struct timekeeper *tk, bool clearntp) | 173 | void timekeeping_leap_insert(int leapsecond) |
| 228 | { | 174 | { |
| 229 | if (clearntp) { | 175 | xtime.tv_sec += leapsecond; |
| 230 | tk->ntp_error = 0; | 176 | wall_to_monotonic.tv_sec -= leapsecond; |
| 231 | ntp_clear(); | 177 | update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock, |
| 232 | } | 178 | timekeeper.mult); |
| 233 | update_vsyscall(tk); | ||
| 234 | update_pvclock_gtod(tk); | ||
| 235 | } | 179 | } |
| 236 | 180 | ||
| 237 | /** | 181 | /** |
| @@ -241,26 +185,27 @@ static void timekeeping_update(struct timekeeper *tk, bool clearntp) | |||
| 241 | * update_wall_time(). This is useful before significant clock changes, | 185 | * update_wall_time(). This is useful before significant clock changes, |
| 242 | * as it avoids having to deal with this time offset explicitly. | 186 | * as it avoids having to deal with this time offset explicitly. |
| 243 | */ | 187 | */ |
| 244 | static void timekeeping_forward_now(struct timekeeper *tk) | 188 | static void timekeeping_forward_now(void) |
| 245 | { | 189 | { |
| 246 | cycle_t cycle_now, cycle_delta; | 190 | cycle_t cycle_now, cycle_delta; |
| 247 | struct clocksource *clock; | 191 | struct clocksource *clock; |
| 248 | s64 nsec; | 192 | s64 nsec; |
| 249 | 193 | ||
| 250 | clock = tk->clock; | 194 | clock = timekeeper.clock; |
| 251 | cycle_now = clock->read(clock); | 195 | cycle_now = clock->read(clock); |
| 252 | cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; | 196 | cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; |
| 253 | clock->cycle_last = cycle_now; | 197 | clock->cycle_last = cycle_now; |
| 254 | 198 | ||
| 255 | tk->xtime_nsec += cycle_delta * tk->mult; | 199 | nsec = clocksource_cyc2ns(cycle_delta, timekeeper.mult, |
| 200 | timekeeper.shift); | ||
| 256 | 201 | ||
| 257 | /* If arch requires, add in gettimeoffset() */ | 202 | /* If arch requires, add in gettimeoffset() */ |
| 258 | tk->xtime_nsec += (u64)arch_gettimeoffset() << tk->shift; | 203 | nsec += arch_gettimeoffset(); |
| 259 | 204 | ||
| 260 | tk_normalize_xtime(tk); | 205 | timespec_add_ns(&xtime, nsec); |
| 261 | 206 | ||
| 262 | nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); | 207 | nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); |
| 263 | timespec_add_ns(&tk->raw_time, nsec); | 208 | timespec_add_ns(&raw_time, nsec); |
| 264 | } | 209 | } |
| 265 | 210 | ||
| 266 | /** | 211 | /** |
| @@ -271,39 +216,43 @@ static void timekeeping_forward_now(struct timekeeper *tk) | |||
| 271 | */ | 216 | */ |
| 272 | void getnstimeofday(struct timespec *ts) | 217 | void getnstimeofday(struct timespec *ts) |
| 273 | { | 218 | { |
| 274 | struct timekeeper *tk = &timekeeper; | ||
| 275 | unsigned long seq; | 219 | unsigned long seq; |
| 276 | s64 nsecs = 0; | 220 | s64 nsecs; |
| 277 | 221 | ||
| 278 | WARN_ON(timekeeping_suspended); | 222 | WARN_ON(timekeeping_suspended); |
| 279 | 223 | ||
| 280 | do { | 224 | do { |
| 281 | seq = read_seqbegin(&tk->lock); | 225 | seq = read_seqbegin(&xtime_lock); |
| 282 | 226 | ||
| 283 | ts->tv_sec = tk->xtime_sec; | 227 | *ts = xtime; |
| 284 | nsecs = timekeeping_get_ns(tk); | 228 | nsecs = timekeeping_get_ns(); |
| 285 | 229 | ||
| 286 | } while (read_seqretry(&tk->lock, seq)); | 230 | /* If arch requires, add in gettimeoffset() */ |
| 231 | nsecs += arch_gettimeoffset(); | ||
| 232 | |||
| 233 | } while (read_seqretry(&xtime_lock, seq)); | ||
| 287 | 234 | ||
| 288 | ts->tv_nsec = 0; | ||
| 289 | timespec_add_ns(ts, nsecs); | 235 | timespec_add_ns(ts, nsecs); |
| 290 | } | 236 | } |
| 237 | |||
| 291 | EXPORT_SYMBOL(getnstimeofday); | 238 | EXPORT_SYMBOL(getnstimeofday); |
| 292 | 239 | ||
| 293 | ktime_t ktime_get(void) | 240 | ktime_t ktime_get(void) |
| 294 | { | 241 | { |
| 295 | struct timekeeper *tk = &timekeeper; | ||
| 296 | unsigned int seq; | 242 | unsigned int seq; |
| 297 | s64 secs, nsecs; | 243 | s64 secs, nsecs; |
| 298 | 244 | ||
| 299 | WARN_ON(timekeeping_suspended); | 245 | WARN_ON(timekeeping_suspended); |
| 300 | 246 | ||
| 301 | do { | 247 | do { |
| 302 | seq = read_seqbegin(&tk->lock); | 248 | seq = read_seqbegin(&xtime_lock); |
| 303 | secs = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; | 249 | secs = xtime.tv_sec + wall_to_monotonic.tv_sec; |
| 304 | nsecs = timekeeping_get_ns(tk) + tk->wall_to_monotonic.tv_nsec; | 250 | nsecs = xtime.tv_nsec + wall_to_monotonic.tv_nsec; |
| 305 | 251 | nsecs += timekeeping_get_ns(); | |
| 306 | } while (read_seqretry(&tk->lock, seq)); | 252 | /* If arch requires, add in gettimeoffset() */ |
| 253 | nsecs += arch_gettimeoffset(); | ||
| 254 | |||
| 255 | } while (read_seqretry(&xtime_lock, seq)); | ||
| 307 | /* | 256 | /* |
| 308 | * Use ktime_set/ktime_add_ns to create a proper ktime on | 257 | * Use ktime_set/ktime_add_ns to create a proper ktime on |
| 309 | * 32-bit architectures without CONFIG_KTIME_SCALAR. | 258 | * 32-bit architectures without CONFIG_KTIME_SCALAR. |
| @@ -322,24 +271,24 @@ EXPORT_SYMBOL_GPL(ktime_get); | |||
| 322 | */ | 271 | */ |
| 323 | void ktime_get_ts(struct timespec *ts) | 272 | void ktime_get_ts(struct timespec *ts) |
| 324 | { | 273 | { |
| 325 | struct timekeeper *tk = &timekeeper; | ||
| 326 | struct timespec tomono; | 274 | struct timespec tomono; |
| 327 | s64 nsec; | ||
| 328 | unsigned int seq; | 275 | unsigned int seq; |
| 276 | s64 nsecs; | ||
| 329 | 277 | ||
| 330 | WARN_ON(timekeeping_suspended); | 278 | WARN_ON(timekeeping_suspended); |
| 331 | 279 | ||
| 332 | do { | 280 | do { |
| 333 | seq = read_seqbegin(&tk->lock); | 281 | seq = read_seqbegin(&xtime_lock); |
| 334 | ts->tv_sec = tk->xtime_sec; | 282 | *ts = xtime; |
| 335 | nsec = timekeeping_get_ns(tk); | 283 | tomono = wall_to_monotonic; |
| 336 | tomono = tk->wall_to_monotonic; | 284 | nsecs = timekeeping_get_ns(); |
| 285 | /* If arch requires, add in gettimeoffset() */ | ||
| 286 | nsecs += arch_gettimeoffset(); | ||
| 337 | 287 | ||
| 338 | } while (read_seqretry(&tk->lock, seq)); | 288 | } while (read_seqretry(&xtime_lock, seq)); |
| 339 | 289 | ||
| 340 | ts->tv_sec += tomono.tv_sec; | 290 | set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec, |
| 341 | ts->tv_nsec = 0; | 291 | ts->tv_nsec + tomono.tv_nsec + nsecs); |
| 342 | timespec_add_ns(ts, nsec + tomono.tv_nsec); | ||
| 343 | } | 292 | } |
| 344 | EXPORT_SYMBOL_GPL(ktime_get_ts); | 293 | EXPORT_SYMBOL_GPL(ktime_get_ts); |
| 345 | 294 | ||
| @@ -356,23 +305,28 @@ EXPORT_SYMBOL_GPL(ktime_get_ts); | |||
| 356 | */ | 305 | */ |
| 357 | void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) | 306 | void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) |
| 358 | { | 307 | { |
| 359 | struct timekeeper *tk = &timekeeper; | ||
| 360 | unsigned long seq; | 308 | unsigned long seq; |
| 361 | s64 nsecs_raw, nsecs_real; | 309 | s64 nsecs_raw, nsecs_real; |
| 362 | 310 | ||
| 363 | WARN_ON_ONCE(timekeeping_suspended); | 311 | WARN_ON_ONCE(timekeeping_suspended); |
| 364 | 312 | ||
| 365 | do { | 313 | do { |
| 366 | seq = read_seqbegin(&tk->lock); | 314 | u32 arch_offset; |
| 367 | 315 | ||
| 368 | *ts_raw = tk->raw_time; | 316 | seq = read_seqbegin(&xtime_lock); |
| 369 | ts_real->tv_sec = tk->xtime_sec; | ||
| 370 | ts_real->tv_nsec = 0; | ||
| 371 | 317 | ||
| 372 | nsecs_raw = timekeeping_get_ns_raw(tk); | 318 | *ts_raw = raw_time; |
| 373 | nsecs_real = timekeeping_get_ns(tk); | 319 | *ts_real = xtime; |
| 374 | 320 | ||
| 375 | } while (read_seqretry(&tk->lock, seq)); | 321 | nsecs_raw = timekeeping_get_ns_raw(); |
| 322 | nsecs_real = timekeeping_get_ns(); | ||
| 323 | |||
| 324 | /* If arch requires, add in gettimeoffset() */ | ||
| 325 | arch_offset = arch_gettimeoffset(); | ||
| 326 | nsecs_raw += arch_offset; | ||
| 327 | nsecs_real += arch_offset; | ||
| 328 | |||
| 329 | } while (read_seqretry(&xtime_lock, seq)); | ||
| 376 | 330 | ||
| 377 | timespec_add_ns(ts_raw, nsecs_raw); | 331 | timespec_add_ns(ts_raw, nsecs_raw); |
| 378 | timespec_add_ns(ts_real, nsecs_real); | 332 | timespec_add_ns(ts_real, nsecs_real); |
| @@ -395,8 +349,8 @@ void do_gettimeofday(struct timeval *tv) | |||
| 395 | tv->tv_sec = now.tv_sec; | 349 | tv->tv_sec = now.tv_sec; |
| 396 | tv->tv_usec = now.tv_nsec/1000; | 350 | tv->tv_usec = now.tv_nsec/1000; |
| 397 | } | 351 | } |
| 398 | EXPORT_SYMBOL(do_gettimeofday); | ||
| 399 | 352 | ||
| 353 | EXPORT_SYMBOL(do_gettimeofday); | ||
| 400 | /** | 354 | /** |
| 401 | * do_settimeofday - Sets the time of day | 355 | * do_settimeofday - Sets the time of day |
| 402 | * @tv: pointer to the timespec variable containing the new time | 356 | * @tv: pointer to the timespec variable containing the new time |
| @@ -405,36 +359,39 @@ EXPORT_SYMBOL(do_gettimeofday); | |||
| 405 | */ | 359 | */ |
| 406 | int do_settimeofday(const struct timespec *tv) | 360 | int do_settimeofday(const struct timespec *tv) |
| 407 | { | 361 | { |
| 408 | struct timekeeper *tk = &timekeeper; | 362 | struct timespec ts_delta; |
| 409 | struct timespec ts_delta, xt; | ||
| 410 | unsigned long flags; | 363 | unsigned long flags; |
| 411 | 364 | ||
| 412 | if (!timespec_valid_strict(tv)) | 365 | if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) |
| 413 | return -EINVAL; | 366 | return -EINVAL; |
| 414 | 367 | ||
| 415 | write_seqlock_irqsave(&tk->lock, flags); | 368 | write_seqlock_irqsave(&xtime_lock, flags); |
| 416 | 369 | ||
| 417 | timekeeping_forward_now(tk); | 370 | timekeeping_forward_now(); |
| 418 | 371 | ||
| 419 | xt = tk_xtime(tk); | 372 | ts_delta.tv_sec = tv->tv_sec - xtime.tv_sec; |
| 420 | ts_delta.tv_sec = tv->tv_sec - xt.tv_sec; | 373 | ts_delta.tv_nsec = tv->tv_nsec - xtime.tv_nsec; |
| 421 | ts_delta.tv_nsec = tv->tv_nsec - xt.tv_nsec; | 374 | wall_to_monotonic = timespec_sub(wall_to_monotonic, ts_delta); |
| 422 | 375 | ||
| 423 | tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, ts_delta)); | 376 | xtime = *tv; |
| 424 | 377 | ||
| 425 | tk_set_xtime(tk, tv); | 378 | timekeeper.ntp_error = 0; |
| 379 | ntp_clear(); | ||
| 426 | 380 | ||
| 427 | timekeeping_update(tk, true); | 381 | update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock, |
| 382 | timekeeper.mult); | ||
| 428 | 383 | ||
| 429 | write_sequnlock_irqrestore(&tk->lock, flags); | 384 | write_sequnlock_irqrestore(&xtime_lock, flags); |
| 430 | 385 | ||
| 431 | /* signal hrtimers about time change */ | 386 | /* signal hrtimers about time change */ |
| 432 | clock_was_set(); | 387 | clock_was_set(); |
| 433 | 388 | ||
| 434 | return 0; | 389 | return 0; |
| 435 | } | 390 | } |
| 391 | |||
| 436 | EXPORT_SYMBOL(do_settimeofday); | 392 | EXPORT_SYMBOL(do_settimeofday); |
| 437 | 393 | ||
| 394 | |||
| 438 | /** | 395 | /** |
| 439 | * timekeeping_inject_offset - Adds or subtracts from the current time. | 396 | * timekeeping_inject_offset - Adds or subtracts from the current time. |
| 440 | * @tv: pointer to the timespec variable containing the offset | 397 | * @tv: pointer to the timespec variable containing the offset |
| @@ -443,37 +400,30 @@ EXPORT_SYMBOL(do_settimeofday); | |||
| 443 | */ | 400 | */ |
| 444 | int timekeeping_inject_offset(struct timespec *ts) | 401 | int timekeeping_inject_offset(struct timespec *ts) |
| 445 | { | 402 | { |
| 446 | struct timekeeper *tk = &timekeeper; | ||
| 447 | unsigned long flags; | 403 | unsigned long flags; |
| 448 | struct timespec tmp; | ||
| 449 | int ret = 0; | ||
| 450 | 404 | ||
| 451 | if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) | 405 | if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) |
| 452 | return -EINVAL; | 406 | return -EINVAL; |
| 453 | 407 | ||
| 454 | write_seqlock_irqsave(&tk->lock, flags); | 408 | write_seqlock_irqsave(&xtime_lock, flags); |
| 455 | 409 | ||
| 456 | timekeeping_forward_now(tk); | 410 | timekeeping_forward_now(); |
| 457 | 411 | ||
| 458 | /* Make sure the proposed value is valid */ | 412 | xtime = timespec_add(xtime, *ts); |
| 459 | tmp = timespec_add(tk_xtime(tk), *ts); | 413 | wall_to_monotonic = timespec_sub(wall_to_monotonic, *ts); |
| 460 | if (!timespec_valid_strict(&tmp)) { | ||
| 461 | ret = -EINVAL; | ||
| 462 | goto error; | ||
| 463 | } | ||
| 464 | 414 | ||
| 465 | tk_xtime_add(tk, ts); | 415 | timekeeper.ntp_error = 0; |
| 466 | tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts)); | 416 | ntp_clear(); |
| 467 | 417 | ||
| 468 | error: /* even if we error out, we forwarded the time, so call update */ | 418 | update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock, |
| 469 | timekeeping_update(tk, true); | 419 | timekeeper.mult); |
| 470 | 420 | ||
| 471 | write_sequnlock_irqrestore(&tk->lock, flags); | 421 | write_sequnlock_irqrestore(&xtime_lock, flags); |
| 472 | 422 | ||
| 473 | /* signal hrtimers about time change */ | 423 | /* signal hrtimers about time change */ |
| 474 | clock_was_set(); | 424 | clock_was_set(); |
| 475 | 425 | ||
| 476 | return ret; | 426 | return 0; |
| 477 | } | 427 | } |
| 478 | EXPORT_SYMBOL(timekeeping_inject_offset); | 428 | EXPORT_SYMBOL(timekeeping_inject_offset); |
| 479 | 429 | ||
| @@ -484,25 +434,17 @@ EXPORT_SYMBOL(timekeeping_inject_offset); | |||
| 484 | */ | 434 | */ |
| 485 | static int change_clocksource(void *data) | 435 | static int change_clocksource(void *data) |
| 486 | { | 436 | { |
| 487 | struct timekeeper *tk = &timekeeper; | ||
| 488 | struct clocksource *new, *old; | 437 | struct clocksource *new, *old; |
| 489 | unsigned long flags; | ||
| 490 | 438 | ||
| 491 | new = (struct clocksource *) data; | 439 | new = (struct clocksource *) data; |
| 492 | 440 | ||
| 493 | write_seqlock_irqsave(&tk->lock, flags); | 441 | timekeeping_forward_now(); |
| 494 | |||
| 495 | timekeeping_forward_now(tk); | ||
| 496 | if (!new->enable || new->enable(new) == 0) { | 442 | if (!new->enable || new->enable(new) == 0) { |
| 497 | old = tk->clock; | 443 | old = timekeeper.clock; |
| 498 | tk_setup_internals(tk, new); | 444 | timekeeper_setup_internals(new); |
| 499 | if (old->disable) | 445 | if (old->disable) |
| 500 | old->disable(old); | 446 | old->disable(old); |
| 501 | } | 447 | } |
| 502 | timekeeping_update(tk, true); | ||
| 503 | |||
| 504 | write_sequnlock_irqrestore(&tk->lock, flags); | ||
| 505 | |||
| 506 | return 0; | 448 | return 0; |
| 507 | } | 449 | } |
| 508 | 450 | ||
| @@ -515,9 +457,7 @@ static int change_clocksource(void *data) | |||
| 515 | */ | 457 | */ |
| 516 | void timekeeping_notify(struct clocksource *clock) | 458 | void timekeeping_notify(struct clocksource *clock) |
| 517 | { | 459 | { |
| 518 | struct timekeeper *tk = &timekeeper; | 460 | if (timekeeper.clock == clock) |
| 519 | |||
| 520 | if (tk->clock == clock) | ||
| 521 | return; | 461 | return; |
| 522 | stop_machine(change_clocksource, clock, NULL); | 462 | stop_machine(change_clocksource, clock, NULL); |
| 523 | tick_clock_notify(); | 463 | tick_clock_notify(); |
| @@ -546,57 +486,48 @@ EXPORT_SYMBOL_GPL(ktime_get_real); | |||
| 546 | */ | 486 | */ |
| 547 | void getrawmonotonic(struct timespec *ts) | 487 | void getrawmonotonic(struct timespec *ts) |
| 548 | { | 488 | { |
| 549 | struct timekeeper *tk = &timekeeper; | ||
| 550 | unsigned long seq; | 489 | unsigned long seq; |
| 551 | s64 nsecs; | 490 | s64 nsecs; |
| 552 | 491 | ||
| 553 | do { | 492 | do { |
| 554 | seq = read_seqbegin(&tk->lock); | 493 | seq = read_seqbegin(&xtime_lock); |
| 555 | nsecs = timekeeping_get_ns_raw(tk); | 494 | nsecs = timekeeping_get_ns_raw(); |
| 556 | *ts = tk->raw_time; | 495 | *ts = raw_time; |
| 557 | 496 | ||
| 558 | } while (read_seqretry(&tk->lock, seq)); | 497 | } while (read_seqretry(&xtime_lock, seq)); |
| 559 | 498 | ||
| 560 | timespec_add_ns(ts, nsecs); | 499 | timespec_add_ns(ts, nsecs); |
| 561 | } | 500 | } |
| 562 | EXPORT_SYMBOL(getrawmonotonic); | 501 | EXPORT_SYMBOL(getrawmonotonic); |
| 563 | 502 | ||
| 503 | |||
| 564 | /** | 504 | /** |
| 565 | * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres | 505 | * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres |
| 566 | */ | 506 | */ |
| 567 | int timekeeping_valid_for_hres(void) | 507 | int timekeeping_valid_for_hres(void) |
| 568 | { | 508 | { |
| 569 | struct timekeeper *tk = &timekeeper; | ||
| 570 | unsigned long seq; | 509 | unsigned long seq; |
| 571 | int ret; | 510 | int ret; |
| 572 | 511 | ||
| 573 | do { | 512 | do { |
| 574 | seq = read_seqbegin(&tk->lock); | 513 | seq = read_seqbegin(&xtime_lock); |
| 575 | 514 | ||
| 576 | ret = tk->clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; | 515 | ret = timekeeper.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; |
| 577 | 516 | ||
| 578 | } while (read_seqretry(&tk->lock, seq)); | 517 | } while (read_seqretry(&xtime_lock, seq)); |
| 579 | 518 | ||
| 580 | return ret; | 519 | return ret; |
| 581 | } | 520 | } |
| 582 | 521 | ||
| 583 | /** | 522 | /** |
| 584 | * timekeeping_max_deferment - Returns max time the clocksource can be deferred | 523 | * timekeeping_max_deferment - Returns max time the clocksource can be deferred |
| 524 | * | ||
| 525 | * Caller must observe xtime_lock via read_seqbegin/read_seqretry to | ||
| 526 | * ensure that the clocksource does not change! | ||
| 585 | */ | 527 | */ |
| 586 | u64 timekeeping_max_deferment(void) | 528 | u64 timekeeping_max_deferment(void) |
| 587 | { | 529 | { |
| 588 | struct timekeeper *tk = &timekeeper; | 530 | return timekeeper.clock->max_idle_ns; |
| 589 | unsigned long seq; | ||
| 590 | u64 ret; | ||
| 591 | |||
| 592 | do { | ||
| 593 | seq = read_seqbegin(&tk->lock); | ||
| 594 | |||
| 595 | ret = tk->clock->max_idle_ns; | ||
| 596 | |||
| 597 | } while (read_seqretry(&tk->lock, seq)); | ||
| 598 | |||
| 599 | return ret; | ||
| 600 | } | 531 | } |
| 601 | 532 | ||
| 602 | /** | 533 | /** |
| @@ -634,51 +565,35 @@ void __attribute__((weak)) read_boot_clock(struct timespec *ts) | |||
| 634 | */ | 565 | */ |
| 635 | void __init timekeeping_init(void) | 566 | void __init timekeeping_init(void) |
| 636 | { | 567 | { |
| 637 | struct timekeeper *tk = &timekeeper; | ||
| 638 | struct clocksource *clock; | 568 | struct clocksource *clock; |
| 639 | unsigned long flags; | 569 | unsigned long flags; |
| 640 | struct timespec now, boot, tmp; | 570 | struct timespec now, boot; |
| 641 | 571 | ||
| 642 | read_persistent_clock(&now); | 572 | read_persistent_clock(&now); |
| 643 | if (!timespec_valid_strict(&now)) { | ||
| 644 | pr_warn("WARNING: Persistent clock returned invalid value!\n" | ||
| 645 | " Check your CMOS/BIOS settings.\n"); | ||
| 646 | now.tv_sec = 0; | ||
| 647 | now.tv_nsec = 0; | ||
| 648 | } | ||
| 649 | |||
| 650 | read_boot_clock(&boot); | 573 | read_boot_clock(&boot); |
| 651 | if (!timespec_valid_strict(&boot)) { | ||
| 652 | pr_warn("WARNING: Boot clock returned invalid value!\n" | ||
| 653 | " Check your CMOS/BIOS settings.\n"); | ||
| 654 | boot.tv_sec = 0; | ||
| 655 | boot.tv_nsec = 0; | ||
| 656 | } | ||
| 657 | 574 | ||
| 658 | seqlock_init(&tk->lock); | 575 | write_seqlock_irqsave(&xtime_lock, flags); |
| 659 | 576 | ||
| 660 | ntp_init(); | 577 | ntp_init(); |
| 661 | 578 | ||
| 662 | write_seqlock_irqsave(&tk->lock, flags); | ||
| 663 | clock = clocksource_default_clock(); | 579 | clock = clocksource_default_clock(); |
| 664 | if (clock->enable) | 580 | if (clock->enable) |
| 665 | clock->enable(clock); | 581 | clock->enable(clock); |
| 666 | tk_setup_internals(tk, clock); | 582 | timekeeper_setup_internals(clock); |
| 667 | 583 | ||
| 668 | tk_set_xtime(tk, &now); | 584 | xtime.tv_sec = now.tv_sec; |
| 669 | tk->raw_time.tv_sec = 0; | 585 | xtime.tv_nsec = now.tv_nsec; |
| 670 | tk->raw_time.tv_nsec = 0; | 586 | raw_time.tv_sec = 0; |
| 671 | if (boot.tv_sec == 0 && boot.tv_nsec == 0) | 587 | raw_time.tv_nsec = 0; |
| 672 | boot = tk_xtime(tk); | 588 | if (boot.tv_sec == 0 && boot.tv_nsec == 0) { |
| 673 | 589 | boot.tv_sec = xtime.tv_sec; | |
| 674 | set_normalized_timespec(&tmp, -boot.tv_sec, -boot.tv_nsec); | 590 | boot.tv_nsec = xtime.tv_nsec; |
| 675 | tk_set_wall_to_mono(tk, tmp); | 591 | } |
| 676 | 592 | set_normalized_timespec(&wall_to_monotonic, | |
| 677 | tmp.tv_sec = 0; | 593 | -boot.tv_sec, -boot.tv_nsec); |
| 678 | tmp.tv_nsec = 0; | 594 | total_sleep_time.tv_sec = 0; |
| 679 | tk_set_sleep_time(tk, tmp); | 595 | total_sleep_time.tv_nsec = 0; |
| 680 | 596 | write_sequnlock_irqrestore(&xtime_lock, flags); | |
| 681 | write_sequnlock_irqrestore(&tk->lock, flags); | ||
| 682 | } | 597 | } |
| 683 | 598 | ||
| 684 | /* time in seconds when suspend began */ | 599 | /* time in seconds when suspend began */ |
| @@ -691,19 +606,20 @@ static struct timespec timekeeping_suspend_time; | |||
| 691 | * Takes a timespec offset measuring a suspend interval and properly | 606 | * Takes a timespec offset measuring a suspend interval and properly |
| 692 | * adds the sleep offset to the timekeeping variables. | 607 | * adds the sleep offset to the timekeeping variables. |
| 693 | */ | 608 | */ |
| 694 | static void __timekeeping_inject_sleeptime(struct timekeeper *tk, | 609 | static void __timekeeping_inject_sleeptime(struct timespec *delta) |
| 695 | struct timespec *delta) | ||
| 696 | { | 610 | { |
| 697 | if (!timespec_valid_strict(delta)) { | 611 | if (!timespec_valid(delta)) { |
| 698 | printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid " | 612 | printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid " |
| 699 | "sleep delta value!\n"); | 613 | "sleep delta value!\n"); |
| 700 | return; | 614 | return; |
| 701 | } | 615 | } |
| 702 | tk_xtime_add(tk, delta); | 616 | |
| 703 | tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *delta)); | 617 | xtime = timespec_add(xtime, *delta); |
| 704 | tk_set_sleep_time(tk, timespec_add(tk->total_sleep_time, *delta)); | 618 | wall_to_monotonic = timespec_sub(wall_to_monotonic, *delta); |
| 619 | total_sleep_time = timespec_add(total_sleep_time, *delta); | ||
| 705 | } | 620 | } |
| 706 | 621 | ||
| 622 | |||
| 707 | /** | 623 | /** |
| 708 | * timekeeping_inject_sleeptime - Adds suspend interval to timeekeeping values | 624 | * timekeeping_inject_sleeptime - Adds suspend interval to timeekeeping values |
| 709 | * @delta: pointer to a timespec delta value | 625 | * @delta: pointer to a timespec delta value |
| @@ -716,7 +632,6 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk, | |||
| 716 | */ | 632 | */ |
| 717 | void timekeeping_inject_sleeptime(struct timespec *delta) | 633 | void timekeeping_inject_sleeptime(struct timespec *delta) |
| 718 | { | 634 | { |
| 719 | struct timekeeper *tk = &timekeeper; | ||
| 720 | unsigned long flags; | 635 | unsigned long flags; |
| 721 | struct timespec ts; | 636 | struct timespec ts; |
| 722 | 637 | ||
| @@ -725,20 +640,23 @@ void timekeeping_inject_sleeptime(struct timespec *delta) | |||
| 725 | if (!(ts.tv_sec == 0 && ts.tv_nsec == 0)) | 640 | if (!(ts.tv_sec == 0 && ts.tv_nsec == 0)) |
| 726 | return; | 641 | return; |
| 727 | 642 | ||
| 728 | write_seqlock_irqsave(&tk->lock, flags); | 643 | write_seqlock_irqsave(&xtime_lock, flags); |
| 729 | 644 | timekeeping_forward_now(); | |
| 730 | timekeeping_forward_now(tk); | ||
| 731 | 645 | ||
| 732 | __timekeeping_inject_sleeptime(tk, delta); | 646 | __timekeeping_inject_sleeptime(delta); |
| 733 | 647 | ||
| 734 | timekeeping_update(tk, true); | 648 | timekeeper.ntp_error = 0; |
| 649 | ntp_clear(); | ||
| 650 | update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock, | ||
| 651 | timekeeper.mult); | ||
| 735 | 652 | ||
| 736 | write_sequnlock_irqrestore(&tk->lock, flags); | 653 | write_sequnlock_irqrestore(&xtime_lock, flags); |
| 737 | 654 | ||
| 738 | /* signal hrtimers about time change */ | 655 | /* signal hrtimers about time change */ |
| 739 | clock_was_set(); | 656 | clock_was_set(); |
| 740 | } | 657 | } |
| 741 | 658 | ||
| 659 | |||
| 742 | /** | 660 | /** |
| 743 | * timekeeping_resume - Resumes the generic timekeeping subsystem. | 661 | * timekeeping_resume - Resumes the generic timekeeping subsystem. |
| 744 | * | 662 | * |
| @@ -748,27 +666,24 @@ void timekeeping_inject_sleeptime(struct timespec *delta) | |||
| 748 | */ | 666 | */ |
| 749 | static void timekeeping_resume(void) | 667 | static void timekeeping_resume(void) |
| 750 | { | 668 | { |
| 751 | struct timekeeper *tk = &timekeeper; | ||
| 752 | unsigned long flags; | 669 | unsigned long flags; |
| 753 | struct timespec ts; | 670 | struct timespec ts; |
| 754 | 671 | ||
| 755 | read_persistent_clock(&ts); | 672 | read_persistent_clock(&ts); |
| 756 | 673 | ||
| 757 | clockevents_resume(); | ||
| 758 | clocksource_resume(); | 674 | clocksource_resume(); |
| 759 | 675 | ||
| 760 | write_seqlock_irqsave(&tk->lock, flags); | 676 | write_seqlock_irqsave(&xtime_lock, flags); |
| 761 | 677 | ||
| 762 | if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) { | 678 | if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) { |
| 763 | ts = timespec_sub(ts, timekeeping_suspend_time); | 679 | ts = timespec_sub(ts, timekeeping_suspend_time); |
| 764 | __timekeeping_inject_sleeptime(tk, &ts); | 680 | __timekeeping_inject_sleeptime(&ts); |
| 765 | } | 681 | } |
| 766 | /* re-base the last cycle value */ | 682 | /* re-base the last cycle value */ |
| 767 | tk->clock->cycle_last = tk->clock->read(tk->clock); | 683 | timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); |
| 768 | tk->ntp_error = 0; | 684 | timekeeper.ntp_error = 0; |
| 769 | timekeeping_suspended = 0; | 685 | timekeeping_suspended = 0; |
| 770 | timekeeping_update(tk, false); | 686 | write_sequnlock_irqrestore(&xtime_lock, flags); |
| 771 | write_sequnlock_irqrestore(&tk->lock, flags); | ||
| 772 | 687 | ||
| 773 | touch_softlockup_watchdog(); | 688 | touch_softlockup_watchdog(); |
| 774 | 689 | ||
| @@ -780,15 +695,14 @@ static void timekeeping_resume(void) | |||
| 780 | 695 | ||
| 781 | static int timekeeping_suspend(void) | 696 | static int timekeeping_suspend(void) |
| 782 | { | 697 | { |
| 783 | struct timekeeper *tk = &timekeeper; | ||
| 784 | unsigned long flags; | 698 | unsigned long flags; |
| 785 | struct timespec delta, delta_delta; | 699 | struct timespec delta, delta_delta; |
| 786 | static struct timespec old_delta; | 700 | static struct timespec old_delta; |
| 787 | 701 | ||
| 788 | read_persistent_clock(&timekeeping_suspend_time); | 702 | read_persistent_clock(&timekeeping_suspend_time); |
| 789 | 703 | ||
| 790 | write_seqlock_irqsave(&tk->lock, flags); | 704 | write_seqlock_irqsave(&xtime_lock, flags); |
| 791 | timekeeping_forward_now(tk); | 705 | timekeeping_forward_now(); |
| 792 | timekeeping_suspended = 1; | 706 | timekeeping_suspended = 1; |
| 793 | 707 | ||
| 794 | /* | 708 | /* |
| @@ -797,7 +711,7 @@ static int timekeeping_suspend(void) | |||
| 797 | * try to compensate so the difference in system time | 711 | * try to compensate so the difference in system time |
| 798 | * and persistent_clock time stays close to constant. | 712 | * and persistent_clock time stays close to constant. |
| 799 | */ | 713 | */ |
| 800 | delta = timespec_sub(tk_xtime(tk), timekeeping_suspend_time); | 714 | delta = timespec_sub(xtime, timekeeping_suspend_time); |
| 801 | delta_delta = timespec_sub(delta, old_delta); | 715 | delta_delta = timespec_sub(delta, old_delta); |
| 802 | if (abs(delta_delta.tv_sec) >= 2) { | 716 | if (abs(delta_delta.tv_sec) >= 2) { |
| 803 | /* | 717 | /* |
| @@ -810,11 +724,10 @@ static int timekeeping_suspend(void) | |||
| 810 | timekeeping_suspend_time = | 724 | timekeeping_suspend_time = |
| 811 | timespec_add(timekeeping_suspend_time, delta_delta); | 725 | timespec_add(timekeeping_suspend_time, delta_delta); |
| 812 | } | 726 | } |
| 813 | write_sequnlock_irqrestore(&tk->lock, flags); | 727 | write_sequnlock_irqrestore(&xtime_lock, flags); |
| 814 | 728 | ||
| 815 | clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); | 729 | clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); |
| 816 | clocksource_suspend(); | 730 | clocksource_suspend(); |
| 817 | clockevents_suspend(); | ||
| 818 | 731 | ||
| 819 | return 0; | 732 | return 0; |
| 820 | } | 733 | } |
| @@ -837,8 +750,7 @@ device_initcall(timekeeping_init_ops); | |||
| 837 | * If the error is already larger, we look ahead even further | 750 | * If the error is already larger, we look ahead even further |
| 838 | * to compensate for late or lost adjustments. | 751 | * to compensate for late or lost adjustments. |
| 839 | */ | 752 | */ |
| 840 | static __always_inline int timekeeping_bigadjust(struct timekeeper *tk, | 753 | static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval, |
| 841 | s64 error, s64 *interval, | ||
| 842 | s64 *offset) | 754 | s64 *offset) |
| 843 | { | 755 | { |
| 844 | s64 tick_error, i; | 756 | s64 tick_error, i; |
| @@ -854,7 +766,7 @@ static __always_inline int timekeeping_bigadjust(struct timekeeper *tk, | |||
| 854 | * here. This is tuned so that an error of about 1 msec is adjusted | 766 | * here. This is tuned so that an error of about 1 msec is adjusted |
| 855 | * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks). | 767 | * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks). |
| 856 | */ | 768 | */ |
| 857 | error2 = tk->ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ); | 769 | error2 = timekeeper.ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ); |
| 858 | error2 = abs(error2); | 770 | error2 = abs(error2); |
| 859 | for (look_ahead = 0; error2 > 0; look_ahead++) | 771 | for (look_ahead = 0; error2 > 0; look_ahead++) |
| 860 | error2 >>= 2; | 772 | error2 >>= 2; |
| @@ -863,8 +775,8 @@ static __always_inline int timekeeping_bigadjust(struct timekeeper *tk, | |||
| 863 | * Now calculate the error in (1 << look_ahead) ticks, but first | 775 | * Now calculate the error in (1 << look_ahead) ticks, but first |
| 864 | * remove the single look ahead already included in the error. | 776 | * remove the single look ahead already included in the error. |
| 865 | */ | 777 | */ |
| 866 | tick_error = ntp_tick_length() >> (tk->ntp_error_shift + 1); | 778 | tick_error = tick_length >> (timekeeper.ntp_error_shift + 1); |
| 867 | tick_error -= tk->xtime_interval >> 1; | 779 | tick_error -= timekeeper.xtime_interval >> 1; |
| 868 | error = ((error - tick_error) >> look_ahead) + tick_error; | 780 | error = ((error - tick_error) >> look_ahead) + tick_error; |
| 869 | 781 | ||
| 870 | /* Finally calculate the adjustment shift value. */ | 782 | /* Finally calculate the adjustment shift value. */ |
| @@ -889,181 +801,43 @@ static __always_inline int timekeeping_bigadjust(struct timekeeper *tk, | |||
| 889 | * this is optimized for the most common adjustments of -1,0,1, | 801 | * this is optimized for the most common adjustments of -1,0,1, |
| 890 | * for other values we can do a bit more work. | 802 | * for other values we can do a bit more work. |
| 891 | */ | 803 | */ |
| 892 | static void timekeeping_adjust(struct timekeeper *tk, s64 offset) | 804 | static void timekeeping_adjust(s64 offset) |
| 893 | { | 805 | { |
| 894 | s64 error, interval = tk->cycle_interval; | 806 | s64 error, interval = timekeeper.cycle_interval; |
| 895 | int adj; | 807 | int adj; |
| 896 | 808 | ||
| 897 | /* | 809 | error = timekeeper.ntp_error >> (timekeeper.ntp_error_shift - 1); |
| 898 | * The point of this is to check if the error is greater than half | ||
| 899 | * an interval. | ||
| 900 | * | ||
| 901 | * First we shift it down from NTP_SHIFT to clocksource->shifted nsecs. | ||
| 902 | * | ||
| 903 | * Note we subtract one in the shift, so that error is really error*2. | ||
| 904 | * This "saves" dividing(shifting) interval twice, but keeps the | ||
| 905 | * (error > interval) comparison as still measuring if error is | ||
| 906 | * larger than half an interval. | ||
| 907 | * | ||
| 908 | * Note: It does not "save" on aggravation when reading the code. | ||
| 909 | */ | ||
| 910 | error = tk->ntp_error >> (tk->ntp_error_shift - 1); | ||
| 911 | if (error > interval) { | 810 | if (error > interval) { |
| 912 | /* | ||
| 913 | * We now divide error by 4(via shift), which checks if | ||
| 914 | * the error is greater than twice the interval. | ||
| 915 | * If it is greater, we need a bigadjust, if its smaller, | ||
| 916 | * we can adjust by 1. | ||
| 917 | */ | ||
| 918 | error >>= 2; | 811 | error >>= 2; |
| 919 | /* | ||
| 920 | * XXX - In update_wall_time, we round up to the next | ||
| 921 | * nanosecond, and store the amount rounded up into | ||
| 922 | * the error. This causes the likely below to be unlikely. | ||
| 923 | * | ||
| 924 | * The proper fix is to avoid rounding up by using | ||
| 925 | * the high precision tk->xtime_nsec instead of | ||
| 926 | * xtime.tv_nsec everywhere. Fixing this will take some | ||
| 927 | * time. | ||
| 928 | */ | ||
| 929 | if (likely(error <= interval)) | 812 | if (likely(error <= interval)) |
| 930 | adj = 1; | 813 | adj = 1; |
| 931 | else | 814 | else |
| 932 | adj = timekeeping_bigadjust(tk, error, &interval, &offset); | 815 | adj = timekeeping_bigadjust(error, &interval, &offset); |
| 933 | } else { | 816 | } else if (error < -interval) { |
| 934 | if (error < -interval) { | 817 | error >>= 2; |
| 935 | /* See comment above, this is just switched for the negative */ | 818 | if (likely(error >= -interval)) { |
| 936 | error >>= 2; | 819 | adj = -1; |
| 937 | if (likely(error >= -interval)) { | 820 | interval = -interval; |
| 938 | adj = -1; | 821 | offset = -offset; |
| 939 | interval = -interval; | 822 | } else |
| 940 | offset = -offset; | 823 | adj = timekeeping_bigadjust(error, &interval, &offset); |
| 941 | } else { | 824 | } else |
| 942 | adj = timekeeping_bigadjust(tk, error, &interval, &offset); | 825 | return; |
| 943 | } | ||
| 944 | } else { | ||
| 945 | goto out_adjust; | ||
| 946 | } | ||
| 947 | } | ||
| 948 | |||
| 949 | if (unlikely(tk->clock->maxadj && | ||
| 950 | (tk->mult + adj > tk->clock->mult + tk->clock->maxadj))) { | ||
| 951 | printk_once(KERN_WARNING | ||
| 952 | "Adjusting %s more than 11%% (%ld vs %ld)\n", | ||
| 953 | tk->clock->name, (long)tk->mult + adj, | ||
| 954 | (long)tk->clock->mult + tk->clock->maxadj); | ||
| 955 | } | ||
| 956 | /* | ||
| 957 | * So the following can be confusing. | ||
| 958 | * | ||
| 959 | * To keep things simple, lets assume adj == 1 for now. | ||
| 960 | * | ||
| 961 | * When adj != 1, remember that the interval and offset values | ||
| 962 | * have been appropriately scaled so the math is the same. | ||
| 963 | * | ||
| 964 | * The basic idea here is that we're increasing the multiplier | ||
| 965 | * by one, this causes the xtime_interval to be incremented by | ||
| 966 | * one cycle_interval. This is because: | ||
| 967 | * xtime_interval = cycle_interval * mult | ||
| 968 | * So if mult is being incremented by one: | ||
| 969 | * xtime_interval = cycle_interval * (mult + 1) | ||
| 970 | * Its the same as: | ||
| 971 | * xtime_interval = (cycle_interval * mult) + cycle_interval | ||
| 972 | * Which can be shortened to: | ||
| 973 | * xtime_interval += cycle_interval | ||
| 974 | * | ||
| 975 | * So offset stores the non-accumulated cycles. Thus the current | ||
| 976 | * time (in shifted nanoseconds) is: | ||
| 977 | * now = (offset * adj) + xtime_nsec | ||
| 978 | * Now, even though we're adjusting the clock frequency, we have | ||
| 979 | * to keep time consistent. In other words, we can't jump back | ||
| 980 | * in time, and we also want to avoid jumping forward in time. | ||
| 981 | * | ||
| 982 | * So given the same offset value, we need the time to be the same | ||
| 983 | * both before and after the freq adjustment. | ||
| 984 | * now = (offset * adj_1) + xtime_nsec_1 | ||
| 985 | * now = (offset * adj_2) + xtime_nsec_2 | ||
| 986 | * So: | ||
| 987 | * (offset * adj_1) + xtime_nsec_1 = | ||
| 988 | * (offset * adj_2) + xtime_nsec_2 | ||
| 989 | * And we know: | ||
| 990 | * adj_2 = adj_1 + 1 | ||
| 991 | * So: | ||
| 992 | * (offset * adj_1) + xtime_nsec_1 = | ||
| 993 | * (offset * (adj_1+1)) + xtime_nsec_2 | ||
| 994 | * (offset * adj_1) + xtime_nsec_1 = | ||
| 995 | * (offset * adj_1) + offset + xtime_nsec_2 | ||
| 996 | * Canceling the sides: | ||
| 997 | * xtime_nsec_1 = offset + xtime_nsec_2 | ||
| 998 | * Which gives us: | ||
| 999 | * xtime_nsec_2 = xtime_nsec_1 - offset | ||
| 1000 | * Which simplfies to: | ||
| 1001 | * xtime_nsec -= offset | ||
| 1002 | * | ||
| 1003 | * XXX - TODO: Doc ntp_error calculation. | ||
| 1004 | */ | ||
| 1005 | tk->mult += adj; | ||
| 1006 | tk->xtime_interval += interval; | ||
| 1007 | tk->xtime_nsec -= offset; | ||
| 1008 | tk->ntp_error -= (interval - offset) << tk->ntp_error_shift; | ||
| 1009 | |||
| 1010 | out_adjust: | ||
| 1011 | /* | ||
| 1012 | * It may be possible that when we entered this function, xtime_nsec | ||
| 1013 | * was very small. Further, if we're slightly speeding the clocksource | ||
| 1014 | * in the code above, its possible the required corrective factor to | ||
| 1015 | * xtime_nsec could cause it to underflow. | ||
| 1016 | * | ||
| 1017 | * Now, since we already accumulated the second, cannot simply roll | ||
| 1018 | * the accumulated second back, since the NTP subsystem has been | ||
| 1019 | * notified via second_overflow. So instead we push xtime_nsec forward | ||
| 1020 | * by the amount we underflowed, and add that amount into the error. | ||
| 1021 | * | ||
| 1022 | * We'll correct this error next time through this function, when | ||
| 1023 | * xtime_nsec is not as small. | ||
| 1024 | */ | ||
| 1025 | if (unlikely((s64)tk->xtime_nsec < 0)) { | ||
| 1026 | s64 neg = -(s64)tk->xtime_nsec; | ||
| 1027 | tk->xtime_nsec = 0; | ||
| 1028 | tk->ntp_error += neg << tk->ntp_error_shift; | ||
| 1029 | } | ||
| 1030 | 826 | ||
| 827 | WARN_ONCE(timekeeper.clock->maxadj && | ||
| 828 | (timekeeper.mult + adj > timekeeper.clock->mult + | ||
| 829 | timekeeper.clock->maxadj), | ||
| 830 | "Adjusting %s more then 11%% (%ld vs %ld)\n", | ||
| 831 | timekeeper.clock->name, (long)timekeeper.mult + adj, | ||
| 832 | (long)timekeeper.clock->mult + | ||
| 833 | timekeeper.clock->maxadj); | ||
| 834 | timekeeper.mult += adj; | ||
| 835 | timekeeper.xtime_interval += interval; | ||
| 836 | timekeeper.xtime_nsec -= offset; | ||
| 837 | timekeeper.ntp_error -= (interval - offset) << | ||
| 838 | timekeeper.ntp_error_shift; | ||
| 1031 | } | 839 | } |
| 1032 | 840 | ||
| 1033 | /** | ||
| 1034 | * accumulate_nsecs_to_secs - Accumulates nsecs into secs | ||
| 1035 | * | ||
| 1036 | * Helper function that accumulates a the nsecs greater then a second | ||
| 1037 | * from the xtime_nsec field to the xtime_secs field. | ||
| 1038 | * It also calls into the NTP code to handle leapsecond processing. | ||
| 1039 | * | ||
| 1040 | */ | ||
| 1041 | static inline void accumulate_nsecs_to_secs(struct timekeeper *tk) | ||
| 1042 | { | ||
| 1043 | u64 nsecps = (u64)NSEC_PER_SEC << tk->shift; | ||
| 1044 | |||
| 1045 | while (tk->xtime_nsec >= nsecps) { | ||
| 1046 | int leap; | ||
| 1047 | |||
| 1048 | tk->xtime_nsec -= nsecps; | ||
| 1049 | tk->xtime_sec++; | ||
| 1050 | |||
| 1051 | /* Figure out if its a leap sec and apply if needed */ | ||
| 1052 | leap = second_overflow(tk->xtime_sec); | ||
| 1053 | if (unlikely(leap)) { | ||
| 1054 | struct timespec ts; | ||
| 1055 | |||
| 1056 | tk->xtime_sec += leap; | ||
| 1057 | |||
| 1058 | ts.tv_sec = leap; | ||
| 1059 | ts.tv_nsec = 0; | ||
| 1060 | tk_set_wall_to_mono(tk, | ||
| 1061 | timespec_sub(tk->wall_to_monotonic, ts)); | ||
| 1062 | |||
| 1063 | clock_was_set_delayed(); | ||
| 1064 | } | ||
| 1065 | } | ||
| 1066 | } | ||
| 1067 | 841 | ||
| 1068 | /** | 842 | /** |
| 1069 | * logarithmic_accumulation - shifted accumulation of cycles | 843 | * logarithmic_accumulation - shifted accumulation of cycles |
| @@ -1074,136 +848,137 @@ static inline void accumulate_nsecs_to_secs(struct timekeeper *tk) | |||
| 1074 | * | 848 | * |
| 1075 | * Returns the unconsumed cycles. | 849 | * Returns the unconsumed cycles. |
| 1076 | */ | 850 | */ |
| 1077 | static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, | 851 | static cycle_t logarithmic_accumulation(cycle_t offset, int shift) |
| 1078 | u32 shift) | ||
| 1079 | { | 852 | { |
| 853 | u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift; | ||
| 1080 | u64 raw_nsecs; | 854 | u64 raw_nsecs; |
| 1081 | 855 | ||
| 1082 | /* If the offset is smaller then a shifted interval, do nothing */ | 856 | /* If the offset is smaller then a shifted interval, do nothing */ |
| 1083 | if (offset < tk->cycle_interval<<shift) | 857 | if (offset < timekeeper.cycle_interval<<shift) |
| 1084 | return offset; | 858 | return offset; |
| 1085 | 859 | ||
| 1086 | /* Accumulate one shifted interval */ | 860 | /* Accumulate one shifted interval */ |
| 1087 | offset -= tk->cycle_interval << shift; | 861 | offset -= timekeeper.cycle_interval << shift; |
| 1088 | tk->clock->cycle_last += tk->cycle_interval << shift; | 862 | timekeeper.clock->cycle_last += timekeeper.cycle_interval << shift; |
| 1089 | 863 | ||
| 1090 | tk->xtime_nsec += tk->xtime_interval << shift; | 864 | timekeeper.xtime_nsec += timekeeper.xtime_interval << shift; |
| 1091 | accumulate_nsecs_to_secs(tk); | 865 | while (timekeeper.xtime_nsec >= nsecps) { |
| 866 | timekeeper.xtime_nsec -= nsecps; | ||
| 867 | xtime.tv_sec++; | ||
| 868 | second_overflow(); | ||
| 869 | } | ||
| 1092 | 870 | ||
| 1093 | /* Accumulate raw time */ | 871 | /* Accumulate raw time */ |
| 1094 | raw_nsecs = (u64)tk->raw_interval << shift; | 872 | raw_nsecs = timekeeper.raw_interval << shift; |
| 1095 | raw_nsecs += tk->raw_time.tv_nsec; | 873 | raw_nsecs += raw_time.tv_nsec; |
| 1096 | if (raw_nsecs >= NSEC_PER_SEC) { | 874 | if (raw_nsecs >= NSEC_PER_SEC) { |
| 1097 | u64 raw_secs = raw_nsecs; | 875 | u64 raw_secs = raw_nsecs; |
| 1098 | raw_nsecs = do_div(raw_secs, NSEC_PER_SEC); | 876 | raw_nsecs = do_div(raw_secs, NSEC_PER_SEC); |
| 1099 | tk->raw_time.tv_sec += raw_secs; | 877 | raw_time.tv_sec += raw_secs; |
| 1100 | } | 878 | } |
| 1101 | tk->raw_time.tv_nsec = raw_nsecs; | 879 | raw_time.tv_nsec = raw_nsecs; |
| 1102 | 880 | ||
| 1103 | /* Accumulate error between NTP and clock interval */ | 881 | /* Accumulate error between NTP and clock interval */ |
| 1104 | tk->ntp_error += ntp_tick_length() << shift; | 882 | timekeeper.ntp_error += tick_length << shift; |
| 1105 | tk->ntp_error -= (tk->xtime_interval + tk->xtime_remainder) << | 883 | timekeeper.ntp_error -= |
| 1106 | (tk->ntp_error_shift + shift); | 884 | (timekeeper.xtime_interval + timekeeper.xtime_remainder) << |
| 885 | (timekeeper.ntp_error_shift + shift); | ||
| 1107 | 886 | ||
| 1108 | return offset; | 887 | return offset; |
| 1109 | } | 888 | } |
| 1110 | 889 | ||
| 1111 | #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD | ||
| 1112 | static inline void old_vsyscall_fixup(struct timekeeper *tk) | ||
| 1113 | { | ||
| 1114 | s64 remainder; | ||
| 1115 | |||
| 1116 | /* | ||
| 1117 | * Store only full nanoseconds into xtime_nsec after rounding | ||
| 1118 | * it up and add the remainder to the error difference. | ||
| 1119 | * XXX - This is necessary to avoid small 1ns inconsistnecies caused | ||
| 1120 | * by truncating the remainder in vsyscalls. However, it causes | ||
| 1121 | * additional work to be done in timekeeping_adjust(). Once | ||
| 1122 | * the vsyscall implementations are converted to use xtime_nsec | ||
| 1123 | * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD | ||
| 1124 | * users are removed, this can be killed. | ||
| 1125 | */ | ||
| 1126 | remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1); | ||
| 1127 | tk->xtime_nsec -= remainder; | ||
| 1128 | tk->xtime_nsec += 1ULL << tk->shift; | ||
| 1129 | tk->ntp_error += remainder << tk->ntp_error_shift; | ||
| 1130 | |||
| 1131 | } | ||
| 1132 | #else | ||
| 1133 | #define old_vsyscall_fixup(tk) | ||
| 1134 | #endif | ||
| 1135 | |||
| 1136 | |||
| 1137 | 890 | ||
| 1138 | /** | 891 | /** |
| 1139 | * update_wall_time - Uses the current clocksource to increment the wall time | 892 | * update_wall_time - Uses the current clocksource to increment the wall time |
| 1140 | * | 893 | * |
| 894 | * Called from the timer interrupt, must hold a write on xtime_lock. | ||
| 1141 | */ | 895 | */ |
| 1142 | static void update_wall_time(void) | 896 | static void update_wall_time(void) |
| 1143 | { | 897 | { |
| 1144 | struct clocksource *clock; | 898 | struct clocksource *clock; |
| 1145 | struct timekeeper *tk = &timekeeper; | ||
| 1146 | cycle_t offset; | 899 | cycle_t offset; |
| 1147 | int shift = 0, maxshift; | 900 | int shift = 0, maxshift; |
| 1148 | unsigned long flags; | ||
| 1149 | |||
| 1150 | write_seqlock_irqsave(&tk->lock, flags); | ||
| 1151 | 901 | ||
| 1152 | /* Make sure we're fully resumed: */ | 902 | /* Make sure we're fully resumed: */ |
| 1153 | if (unlikely(timekeeping_suspended)) | 903 | if (unlikely(timekeeping_suspended)) |
| 1154 | goto out; | 904 | return; |
| 1155 | 905 | ||
| 1156 | clock = tk->clock; | 906 | clock = timekeeper.clock; |
| 1157 | 907 | ||
| 1158 | #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET | 908 | #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET |
| 1159 | offset = tk->cycle_interval; | 909 | offset = timekeeper.cycle_interval; |
| 1160 | #else | 910 | #else |
| 1161 | offset = (clock->read(clock) - clock->cycle_last) & clock->mask; | 911 | offset = (clock->read(clock) - clock->cycle_last) & clock->mask; |
| 1162 | #endif | 912 | #endif |
| 1163 | 913 | timekeeper.xtime_nsec = (s64)xtime.tv_nsec << timekeeper.shift; | |
| 1164 | /* Check if there's really nothing to do */ | ||
| 1165 | if (offset < tk->cycle_interval) | ||
| 1166 | goto out; | ||
| 1167 | 914 | ||
| 1168 | /* | 915 | /* |
| 1169 | * With NO_HZ we may have to accumulate many cycle_intervals | 916 | * With NO_HZ we may have to accumulate many cycle_intervals |
| 1170 | * (think "ticks") worth of time at once. To do this efficiently, | 917 | * (think "ticks") worth of time at once. To do this efficiently, |
| 1171 | * we calculate the largest doubling multiple of cycle_intervals | 918 | * we calculate the largest doubling multiple of cycle_intervals |
| 1172 | * that is smaller than the offset. We then accumulate that | 919 | * that is smaller then the offset. We then accumulate that |
| 1173 | * chunk in one go, and then try to consume the next smaller | 920 | * chunk in one go, and then try to consume the next smaller |
| 1174 | * doubled multiple. | 921 | * doubled multiple. |
| 1175 | */ | 922 | */ |
| 1176 | shift = ilog2(offset) - ilog2(tk->cycle_interval); | 923 | shift = ilog2(offset) - ilog2(timekeeper.cycle_interval); |
| 1177 | shift = max(0, shift); | 924 | shift = max(0, shift); |
| 1178 | /* Bound shift to one less than what overflows tick_length */ | 925 | /* Bound shift to one less then what overflows tick_length */ |
| 1179 | maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1; | 926 | maxshift = (8*sizeof(tick_length) - (ilog2(tick_length)+1)) - 1; |
| 1180 | shift = min(shift, maxshift); | 927 | shift = min(shift, maxshift); |
| 1181 | while (offset >= tk->cycle_interval) { | 928 | while (offset >= timekeeper.cycle_interval) { |
| 1182 | offset = logarithmic_accumulation(tk, offset, shift); | 929 | offset = logarithmic_accumulation(offset, shift); |
| 1183 | if (offset < tk->cycle_interval<<shift) | 930 | if(offset < timekeeper.cycle_interval<<shift) |
| 1184 | shift--; | 931 | shift--; |
| 1185 | } | 932 | } |
| 1186 | 933 | ||
| 1187 | /* correct the clock when NTP error is too big */ | 934 | /* correct the clock when NTP error is too big */ |
| 1188 | timekeeping_adjust(tk, offset); | 935 | timekeeping_adjust(offset); |
| 1189 | 936 | ||
| 1190 | /* | 937 | /* |
| 1191 | * XXX This can be killed once everyone converts | 938 | * Since in the loop above, we accumulate any amount of time |
| 1192 | * to the new update_vsyscall. | 939 | * in xtime_nsec over a second into xtime.tv_sec, its possible for |
| 940 | * xtime_nsec to be fairly small after the loop. Further, if we're | ||
| 941 | * slightly speeding the clocksource up in timekeeping_adjust(), | ||
| 942 | * its possible the required corrective factor to xtime_nsec could | ||
| 943 | * cause it to underflow. | ||
| 944 | * | ||
| 945 | * Now, we cannot simply roll the accumulated second back, since | ||
| 946 | * the NTP subsystem has been notified via second_overflow. So | ||
| 947 | * instead we push xtime_nsec forward by the amount we underflowed, | ||
| 948 | * and add that amount into the error. | ||
| 949 | * | ||
| 950 | * We'll correct this error next time through this function, when | ||
| 951 | * xtime_nsec is not as small. | ||
| 1193 | */ | 952 | */ |
| 1194 | old_vsyscall_fixup(tk); | 953 | if (unlikely((s64)timekeeper.xtime_nsec < 0)) { |
| 954 | s64 neg = -(s64)timekeeper.xtime_nsec; | ||
| 955 | timekeeper.xtime_nsec = 0; | ||
| 956 | timekeeper.ntp_error += neg << timekeeper.ntp_error_shift; | ||
| 957 | } | ||
| 958 | |||
| 1195 | 959 | ||
| 1196 | /* | 960 | /* |
| 1197 | * Finally, make sure that after the rounding | 961 | * Store full nanoseconds into xtime after rounding it up and |
| 1198 | * xtime_nsec isn't larger than NSEC_PER_SEC | 962 | * add the remainder to the error difference. |
| 1199 | */ | 963 | */ |
| 1200 | accumulate_nsecs_to_secs(tk); | 964 | xtime.tv_nsec = ((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1; |
| 965 | timekeeper.xtime_nsec -= (s64) xtime.tv_nsec << timekeeper.shift; | ||
| 966 | timekeeper.ntp_error += timekeeper.xtime_nsec << | ||
| 967 | timekeeper.ntp_error_shift; | ||
| 1201 | 968 | ||
| 1202 | timekeeping_update(tk, false); | 969 | /* |
| 1203 | 970 | * Finally, make sure that after the rounding | |
| 1204 | out: | 971 | * xtime.tv_nsec isn't larger then NSEC_PER_SEC |
| 1205 | write_sequnlock_irqrestore(&tk->lock, flags); | 972 | */ |
| 973 | if (unlikely(xtime.tv_nsec >= NSEC_PER_SEC)) { | ||
| 974 | xtime.tv_nsec -= NSEC_PER_SEC; | ||
| 975 | xtime.tv_sec++; | ||
| 976 | second_overflow(); | ||
| 977 | } | ||
| 1206 | 978 | ||
| 979 | /* check to see if there is a new clocksource to use */ | ||
| 980 | update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock, | ||
| 981 | timekeeper.mult); | ||
| 1207 | } | 982 | } |
| 1208 | 983 | ||
| 1209 | /** | 984 | /** |
| @@ -1219,18 +994,16 @@ out: | |||
| 1219 | */ | 994 | */ |
| 1220 | void getboottime(struct timespec *ts) | 995 | void getboottime(struct timespec *ts) |
| 1221 | { | 996 | { |
| 1222 | struct timekeeper *tk = &timekeeper; | ||
| 1223 | struct timespec boottime = { | 997 | struct timespec boottime = { |
| 1224 | .tv_sec = tk->wall_to_monotonic.tv_sec + | 998 | .tv_sec = wall_to_monotonic.tv_sec + total_sleep_time.tv_sec, |
| 1225 | tk->total_sleep_time.tv_sec, | 999 | .tv_nsec = wall_to_monotonic.tv_nsec + total_sleep_time.tv_nsec |
| 1226 | .tv_nsec = tk->wall_to_monotonic.tv_nsec + | ||
| 1227 | tk->total_sleep_time.tv_nsec | ||
| 1228 | }; | 1000 | }; |
| 1229 | 1001 | ||
| 1230 | set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec); | 1002 | set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec); |
| 1231 | } | 1003 | } |
| 1232 | EXPORT_SYMBOL_GPL(getboottime); | 1004 | EXPORT_SYMBOL_GPL(getboottime); |
| 1233 | 1005 | ||
| 1006 | |||
| 1234 | /** | 1007 | /** |
| 1235 | * get_monotonic_boottime - Returns monotonic time since boot | 1008 | * get_monotonic_boottime - Returns monotonic time since boot |
| 1236 | * @ts: pointer to the timespec to be set | 1009 | * @ts: pointer to the timespec to be set |
| @@ -1242,25 +1015,23 @@ EXPORT_SYMBOL_GPL(getboottime); | |||
| 1242 | */ | 1015 | */ |
| 1243 | void get_monotonic_boottime(struct timespec *ts) | 1016 | void get_monotonic_boottime(struct timespec *ts) |
| 1244 | { | 1017 | { |
| 1245 | struct timekeeper *tk = &timekeeper; | ||
| 1246 | struct timespec tomono, sleep; | 1018 | struct timespec tomono, sleep; |
| 1247 | s64 nsec; | ||
| 1248 | unsigned int seq; | 1019 | unsigned int seq; |
| 1020 | s64 nsecs; | ||
| 1249 | 1021 | ||
| 1250 | WARN_ON(timekeeping_suspended); | 1022 | WARN_ON(timekeeping_suspended); |
| 1251 | 1023 | ||
| 1252 | do { | 1024 | do { |
| 1253 | seq = read_seqbegin(&tk->lock); | 1025 | seq = read_seqbegin(&xtime_lock); |
| 1254 | ts->tv_sec = tk->xtime_sec; | 1026 | *ts = xtime; |
| 1255 | nsec = timekeeping_get_ns(tk); | 1027 | tomono = wall_to_monotonic; |
| 1256 | tomono = tk->wall_to_monotonic; | 1028 | sleep = total_sleep_time; |
| 1257 | sleep = tk->total_sleep_time; | 1029 | nsecs = timekeeping_get_ns(); |
| 1258 | 1030 | ||
| 1259 | } while (read_seqretry(&tk->lock, seq)); | 1031 | } while (read_seqretry(&xtime_lock, seq)); |
| 1260 | 1032 | ||
| 1261 | ts->tv_sec += tomono.tv_sec + sleep.tv_sec; | 1033 | set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec + sleep.tv_sec, |
| 1262 | ts->tv_nsec = 0; | 1034 | ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec + nsecs); |
| 1263 | timespec_add_ns(ts, nsec + tomono.tv_nsec + sleep.tv_nsec); | ||
| 1264 | } | 1035 | } |
| 1265 | EXPORT_SYMBOL_GPL(get_monotonic_boottime); | 1036 | EXPORT_SYMBOL_GPL(get_monotonic_boottime); |
| 1266 | 1037 | ||
| @@ -1287,38 +1058,31 @@ EXPORT_SYMBOL_GPL(ktime_get_boottime); | |||
| 1287 | */ | 1058 | */ |
| 1288 | void monotonic_to_bootbased(struct timespec *ts) | 1059 | void monotonic_to_bootbased(struct timespec *ts) |
| 1289 | { | 1060 | { |
| 1290 | struct timekeeper *tk = &timekeeper; | 1061 | *ts = timespec_add(*ts, total_sleep_time); |
| 1291 | |||
| 1292 | *ts = timespec_add(*ts, tk->total_sleep_time); | ||
| 1293 | } | 1062 | } |
| 1294 | EXPORT_SYMBOL_GPL(monotonic_to_bootbased); | 1063 | EXPORT_SYMBOL_GPL(monotonic_to_bootbased); |
| 1295 | 1064 | ||
| 1296 | unsigned long get_seconds(void) | 1065 | unsigned long get_seconds(void) |
| 1297 | { | 1066 | { |
| 1298 | struct timekeeper *tk = &timekeeper; | 1067 | return xtime.tv_sec; |
| 1299 | |||
| 1300 | return tk->xtime_sec; | ||
| 1301 | } | 1068 | } |
| 1302 | EXPORT_SYMBOL(get_seconds); | 1069 | EXPORT_SYMBOL(get_seconds); |
| 1303 | 1070 | ||
| 1304 | struct timespec __current_kernel_time(void) | 1071 | struct timespec __current_kernel_time(void) |
| 1305 | { | 1072 | { |
| 1306 | struct timekeeper *tk = &timekeeper; | 1073 | return xtime; |
| 1307 | |||
| 1308 | return tk_xtime(tk); | ||
| 1309 | } | 1074 | } |
| 1310 | 1075 | ||
| 1311 | struct timespec current_kernel_time(void) | 1076 | struct timespec current_kernel_time(void) |
| 1312 | { | 1077 | { |
| 1313 | struct timekeeper *tk = &timekeeper; | ||
| 1314 | struct timespec now; | 1078 | struct timespec now; |
| 1315 | unsigned long seq; | 1079 | unsigned long seq; |
| 1316 | 1080 | ||
| 1317 | do { | 1081 | do { |
| 1318 | seq = read_seqbegin(&tk->lock); | 1082 | seq = read_seqbegin(&xtime_lock); |
| 1319 | 1083 | ||
| 1320 | now = tk_xtime(tk); | 1084 | now = xtime; |
| 1321 | } while (read_seqretry(&tk->lock, seq)); | 1085 | } while (read_seqretry(&xtime_lock, seq)); |
| 1322 | 1086 | ||
| 1323 | return now; | 1087 | return now; |
| 1324 | } | 1088 | } |
| @@ -1326,16 +1090,15 @@ EXPORT_SYMBOL(current_kernel_time); | |||
| 1326 | 1090 | ||
| 1327 | struct timespec get_monotonic_coarse(void) | 1091 | struct timespec get_monotonic_coarse(void) |
| 1328 | { | 1092 | { |
| 1329 | struct timekeeper *tk = &timekeeper; | ||
| 1330 | struct timespec now, mono; | 1093 | struct timespec now, mono; |
| 1331 | unsigned long seq; | 1094 | unsigned long seq; |
| 1332 | 1095 | ||
| 1333 | do { | 1096 | do { |
| 1334 | seq = read_seqbegin(&tk->lock); | 1097 | seq = read_seqbegin(&xtime_lock); |
| 1335 | 1098 | ||
| 1336 | now = tk_xtime(tk); | 1099 | now = xtime; |
| 1337 | mono = tk->wall_to_monotonic; | 1100 | mono = wall_to_monotonic; |
| 1338 | } while (read_seqretry(&tk->lock, seq)); | 1101 | } while (read_seqretry(&xtime_lock, seq)); |
| 1339 | 1102 | ||
| 1340 | set_normalized_timespec(&now, now.tv_sec + mono.tv_sec, | 1103 | set_normalized_timespec(&now, now.tv_sec + mono.tv_sec, |
| 1341 | now.tv_nsec + mono.tv_nsec); | 1104 | now.tv_nsec + mono.tv_nsec); |
| @@ -1343,7 +1106,9 @@ struct timespec get_monotonic_coarse(void) | |||
| 1343 | } | 1106 | } |
| 1344 | 1107 | ||
| 1345 | /* | 1108 | /* |
| 1346 | * Must hold jiffies_lock | 1109 | * The 64-bit jiffies value is not atomic - you MUST NOT read it |
| 1110 | * without sampling the sequence number in xtime_lock. | ||
| 1111 | * jiffies is defined in the linker script... | ||
| 1347 | */ | 1112 | */ |
| 1348 | void do_timer(unsigned long ticks) | 1113 | void do_timer(unsigned long ticks) |
| 1349 | { | 1114 | { |
| @@ -1362,66 +1127,30 @@ void do_timer(unsigned long ticks) | |||
| 1362 | void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim, | 1127 | void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim, |
| 1363 | struct timespec *wtom, struct timespec *sleep) | 1128 | struct timespec *wtom, struct timespec *sleep) |
| 1364 | { | 1129 | { |
| 1365 | struct timekeeper *tk = &timekeeper; | ||
| 1366 | unsigned long seq; | 1130 | unsigned long seq; |
| 1367 | 1131 | ||
| 1368 | do { | 1132 | do { |
| 1369 | seq = read_seqbegin(&tk->lock); | 1133 | seq = read_seqbegin(&xtime_lock); |
| 1370 | *xtim = tk_xtime(tk); | 1134 | *xtim = xtime; |
| 1371 | *wtom = tk->wall_to_monotonic; | 1135 | *wtom = wall_to_monotonic; |
| 1372 | *sleep = tk->total_sleep_time; | 1136 | *sleep = total_sleep_time; |
| 1373 | } while (read_seqretry(&tk->lock, seq)); | 1137 | } while (read_seqretry(&xtime_lock, seq)); |
| 1374 | } | ||
| 1375 | |||
| 1376 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
| 1377 | /** | ||
| 1378 | * ktime_get_update_offsets - hrtimer helper | ||
| 1379 | * @offs_real: pointer to storage for monotonic -> realtime offset | ||
| 1380 | * @offs_boot: pointer to storage for monotonic -> boottime offset | ||
| 1381 | * | ||
| 1382 | * Returns current monotonic time and updates the offsets | ||
| 1383 | * Called from hrtimer_interupt() or retrigger_next_event() | ||
| 1384 | */ | ||
| 1385 | ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot) | ||
| 1386 | { | ||
| 1387 | struct timekeeper *tk = &timekeeper; | ||
| 1388 | ktime_t now; | ||
| 1389 | unsigned int seq; | ||
| 1390 | u64 secs, nsecs; | ||
| 1391 | |||
| 1392 | do { | ||
| 1393 | seq = read_seqbegin(&tk->lock); | ||
| 1394 | |||
| 1395 | secs = tk->xtime_sec; | ||
| 1396 | nsecs = timekeeping_get_ns(tk); | ||
| 1397 | |||
| 1398 | *offs_real = tk->offs_real; | ||
| 1399 | *offs_boot = tk->offs_boot; | ||
| 1400 | } while (read_seqretry(&tk->lock, seq)); | ||
| 1401 | |||
| 1402 | now = ktime_add_ns(ktime_set(secs, 0), nsecs); | ||
| 1403 | now = ktime_sub(now, *offs_real); | ||
| 1404 | return now; | ||
| 1405 | } | 1138 | } |
| 1406 | #endif | ||
| 1407 | 1139 | ||
| 1408 | /** | 1140 | /** |
| 1409 | * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format | 1141 | * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format |
| 1410 | */ | 1142 | */ |
| 1411 | ktime_t ktime_get_monotonic_offset(void) | 1143 | ktime_t ktime_get_monotonic_offset(void) |
| 1412 | { | 1144 | { |
| 1413 | struct timekeeper *tk = &timekeeper; | ||
| 1414 | unsigned long seq; | 1145 | unsigned long seq; |
| 1415 | struct timespec wtom; | 1146 | struct timespec wtom; |
| 1416 | 1147 | ||
| 1417 | do { | 1148 | do { |
| 1418 | seq = read_seqbegin(&tk->lock); | 1149 | seq = read_seqbegin(&xtime_lock); |
| 1419 | wtom = tk->wall_to_monotonic; | 1150 | wtom = wall_to_monotonic; |
| 1420 | } while (read_seqretry(&tk->lock, seq)); | 1151 | } while (read_seqretry(&xtime_lock, seq)); |
| 1421 | |||
| 1422 | return timespec_to_ktime(wtom); | 1152 | return timespec_to_ktime(wtom); |
| 1423 | } | 1153 | } |
| 1424 | EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset); | ||
| 1425 | 1154 | ||
| 1426 | /** | 1155 | /** |
| 1427 | * xtime_update() - advances the timekeeping infrastructure | 1156 | * xtime_update() - advances the timekeeping infrastructure |
| @@ -1431,7 +1160,7 @@ EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset); | |||
| 1431 | */ | 1160 | */ |
| 1432 | void xtime_update(unsigned long ticks) | 1161 | void xtime_update(unsigned long ticks) |
| 1433 | { | 1162 | { |
| 1434 | write_seqlock(&jiffies_lock); | 1163 | write_seqlock(&xtime_lock); |
| 1435 | do_timer(ticks); | 1164 | do_timer(ticks); |
| 1436 | write_sequnlock(&jiffies_lock); | 1165 | write_sequnlock(&xtime_lock); |
| 1437 | } | 1166 | } |
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index af5a7e9f164..3258455549f 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c | |||
| @@ -167,7 +167,7 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now) | |||
| 167 | { | 167 | { |
| 168 | struct tick_sched *ts = tick_get_tick_sched(cpu); | 168 | struct tick_sched *ts = tick_get_tick_sched(cpu); |
| 169 | P(nohz_mode); | 169 | P(nohz_mode); |
| 170 | P_ns(last_tick); | 170 | P_ns(idle_tick); |
| 171 | P(tick_stopped); | 171 | P(tick_stopped); |
| 172 | P(idle_jiffies); | 172 | P(idle_jiffies); |
| 173 | P(idle_calls); | 173 | P(idle_calls); |
| @@ -259,7 +259,7 @@ static int timer_list_show(struct seq_file *m, void *v) | |||
| 259 | u64 now = ktime_to_ns(ktime_get()); | 259 | u64 now = ktime_to_ns(ktime_get()); |
| 260 | int cpu; | 260 | int cpu; |
| 261 | 261 | ||
| 262 | SEQ_printf(m, "Timer List Version: v0.7\n"); | 262 | SEQ_printf(m, "Timer List Version: v0.6\n"); |
| 263 | SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); | 263 | SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); |
| 264 | SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); | 264 | SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); |
| 265 | 265 | ||
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c index 0b537f27b55..a5d0a3a85dd 100644 --- a/kernel/time/timer_stats.c +++ b/kernel/time/timer_stats.c | |||
| @@ -81,7 +81,7 @@ struct entry { | |||
| 81 | /* | 81 | /* |
| 82 | * Spinlock protecting the tables - not taken during lookup: | 82 | * Spinlock protecting the tables - not taken during lookup: |
| 83 | */ | 83 | */ |
| 84 | static DEFINE_RAW_SPINLOCK(table_lock); | 84 | static DEFINE_SPINLOCK(table_lock); |
| 85 | 85 | ||
| 86 | /* | 86 | /* |
| 87 | * Per-CPU lookup locks for fast hash lookup: | 87 | * Per-CPU lookup locks for fast hash lookup: |
| @@ -188,7 +188,7 @@ static struct entry *tstat_lookup(struct entry *entry, char *comm) | |||
| 188 | prev = NULL; | 188 | prev = NULL; |
| 189 | curr = *head; | 189 | curr = *head; |
| 190 | 190 | ||
| 191 | raw_spin_lock(&table_lock); | 191 | spin_lock(&table_lock); |
| 192 | /* | 192 | /* |
| 193 | * Make sure we have not raced with another CPU: | 193 | * Make sure we have not raced with another CPU: |
| 194 | */ | 194 | */ |
| @@ -215,7 +215,7 @@ static struct entry *tstat_lookup(struct entry *entry, char *comm) | |||
| 215 | *head = curr; | 215 | *head = curr; |
| 216 | } | 216 | } |
| 217 | out_unlock: | 217 | out_unlock: |
| 218 | raw_spin_unlock(&table_lock); | 218 | spin_unlock(&table_lock); |
| 219 | 219 | ||
| 220 | return curr; | 220 | return curr; |
| 221 | } | 221 | } |
