aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit_tree.c19
-rw-r--r--kernel/debug/kdb/kdb_debugger.c4
-rw-r--r--kernel/debug/kdb/kdb_io.c11
-rw-r--r--kernel/debug/kdb/kdb_main.c15
-rw-r--r--kernel/events/callchain.c9
-rw-r--r--kernel/events/core.c30
-rw-r--r--kernel/events/internal.h3
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/futex.c17
-rw-r--r--kernel/irq/manage.c15
-rw-r--r--kernel/power/suspend.c3
-rw-r--r--kernel/printk.c2
-rw-r--r--kernel/sched/core.c39
-rw-r--r--kernel/sched/cpupri.c10
-rw-r--r--kernel/sched/fair.c40
-rw-r--r--kernel/sched/rt.c13
-rw-r--r--kernel/sched/sched.h8
-rw-r--r--kernel/sched/stop_task.c22
-rw-r--r--kernel/task_work.c1
-rw-r--r--kernel/time/jiffies.c2
-rw-r--r--kernel/time/ntp.c2
-rw-r--r--kernel/time/timekeeping.c442
-rw-r--r--kernel/timer.c9
-rw-r--r--kernel/trace/trace_event_perf.c2
-rw-r--r--kernel/trace/trace_kprobe.c6
-rw-r--r--kernel/trace/trace_syscalls.c8
-rw-r--r--kernel/trace/trace_uprobe.c2
-rw-r--r--kernel/watchdog.c21
28 files changed, 443 insertions, 316 deletions
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 3a5ca582ba1e..ed206fd88cca 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -250,7 +250,6 @@ static void untag_chunk(struct node *p)
250 spin_unlock(&hash_lock); 250 spin_unlock(&hash_lock);
251 spin_unlock(&entry->lock); 251 spin_unlock(&entry->lock);
252 fsnotify_destroy_mark(entry); 252 fsnotify_destroy_mark(entry);
253 fsnotify_put_mark(entry);
254 goto out; 253 goto out;
255 } 254 }
256 255
@@ -259,7 +258,7 @@ static void untag_chunk(struct node *p)
259 258
260 fsnotify_duplicate_mark(&new->mark, entry); 259 fsnotify_duplicate_mark(&new->mark, entry);
261 if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, NULL, 1)) { 260 if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, NULL, 1)) {
262 free_chunk(new); 261 fsnotify_put_mark(&new->mark);
263 goto Fallback; 262 goto Fallback;
264 } 263 }
265 264
@@ -293,7 +292,7 @@ static void untag_chunk(struct node *p)
293 spin_unlock(&hash_lock); 292 spin_unlock(&hash_lock);
294 spin_unlock(&entry->lock); 293 spin_unlock(&entry->lock);
295 fsnotify_destroy_mark(entry); 294 fsnotify_destroy_mark(entry);
296 fsnotify_put_mark(entry); 295 fsnotify_put_mark(&new->mark); /* drop initial reference */
297 goto out; 296 goto out;
298 297
299Fallback: 298Fallback:
@@ -322,7 +321,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
322 321
323 entry = &chunk->mark; 322 entry = &chunk->mark;
324 if (fsnotify_add_mark(entry, audit_tree_group, inode, NULL, 0)) { 323 if (fsnotify_add_mark(entry, audit_tree_group, inode, NULL, 0)) {
325 free_chunk(chunk); 324 fsnotify_put_mark(entry);
326 return -ENOSPC; 325 return -ENOSPC;
327 } 326 }
328 327
@@ -347,6 +346,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
347 insert_hash(chunk); 346 insert_hash(chunk);
348 spin_unlock(&hash_lock); 347 spin_unlock(&hash_lock);
349 spin_unlock(&entry->lock); 348 spin_unlock(&entry->lock);
349 fsnotify_put_mark(entry); /* drop initial reference */
350 return 0; 350 return 0;
351} 351}
352 352
@@ -396,7 +396,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
396 fsnotify_duplicate_mark(chunk_entry, old_entry); 396 fsnotify_duplicate_mark(chunk_entry, old_entry);
397 if (fsnotify_add_mark(chunk_entry, chunk_entry->group, chunk_entry->i.inode, NULL, 1)) { 397 if (fsnotify_add_mark(chunk_entry, chunk_entry->group, chunk_entry->i.inode, NULL, 1)) {
398 spin_unlock(&old_entry->lock); 398 spin_unlock(&old_entry->lock);
399 free_chunk(chunk); 399 fsnotify_put_mark(chunk_entry);
400 fsnotify_put_mark(old_entry); 400 fsnotify_put_mark(old_entry);
401 return -ENOSPC; 401 return -ENOSPC;
402 } 402 }
@@ -444,8 +444,8 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
444 spin_unlock(&chunk_entry->lock); 444 spin_unlock(&chunk_entry->lock);
445 spin_unlock(&old_entry->lock); 445 spin_unlock(&old_entry->lock);
446 fsnotify_destroy_mark(old_entry); 446 fsnotify_destroy_mark(old_entry);
447 fsnotify_put_mark(chunk_entry); /* drop initial reference */
447 fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */ 448 fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
448 fsnotify_put_mark(old_entry); /* and kill it */
449 return 0; 449 return 0;
450} 450}
451 451
@@ -916,7 +916,12 @@ static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify
916 struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark); 916 struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
917 917
918 evict_chunk(chunk); 918 evict_chunk(chunk);
919 fsnotify_put_mark(entry); 919
920 /*
921 * We are guaranteed to have at least one reference to the mark from
922 * either the inode or the caller of fsnotify_destroy_mark().
923 */
924 BUG_ON(atomic_read(&entry->refcnt) < 1);
920} 925}
921 926
922static bool audit_tree_send_event(struct fsnotify_group *group, struct inode *inode, 927static bool audit_tree_send_event(struct fsnotify_group *group, struct inode *inode,
diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c
index 8b68ce78ff17..be7b33b73d30 100644
--- a/kernel/debug/kdb/kdb_debugger.c
+++ b/kernel/debug/kdb/kdb_debugger.c
@@ -12,6 +12,7 @@
12#include <linux/kdb.h> 12#include <linux/kdb.h>
13#include <linux/kdebug.h> 13#include <linux/kdebug.h>
14#include <linux/export.h> 14#include <linux/export.h>
15#include <linux/hardirq.h>
15#include "kdb_private.h" 16#include "kdb_private.h"
16#include "../debug_core.h" 17#include "../debug_core.h"
17 18
@@ -52,6 +53,9 @@ int kdb_stub(struct kgdb_state *ks)
52 if (atomic_read(&kgdb_setting_breakpoint)) 53 if (atomic_read(&kgdb_setting_breakpoint))
53 reason = KDB_REASON_KEYBOARD; 54 reason = KDB_REASON_KEYBOARD;
54 55
56 if (in_nmi())
57 reason = KDB_REASON_NMI;
58
55 for (i = 0, bp = kdb_breakpoints; i < KDB_MAXBPT; i++, bp++) { 59 for (i = 0, bp = kdb_breakpoints; i < KDB_MAXBPT; i++, bp++) {
56 if ((bp->bp_enabled) && (bp->bp_addr == addr)) { 60 if ((bp->bp_enabled) && (bp->bp_addr == addr)) {
57 reason = KDB_REASON_BREAK; 61 reason = KDB_REASON_BREAK;
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index bb9520f0f6ff..0a69d2adc4f3 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -715,9 +715,6 @@ kdb_printit:
715 /* check for having reached the LINES number of printed lines */ 715 /* check for having reached the LINES number of printed lines */
716 if (kdb_nextline == linecount) { 716 if (kdb_nextline == linecount) {
717 char buf1[16] = ""; 717 char buf1[16] = "";
718#if defined(CONFIG_SMP)
719 char buf2[32];
720#endif
721 718
722 /* Watch out for recursion here. Any routine that calls 719 /* Watch out for recursion here. Any routine that calls
723 * kdb_printf will come back through here. And kdb_read 720 * kdb_printf will come back through here. And kdb_read
@@ -732,14 +729,6 @@ kdb_printit:
732 if (moreprompt == NULL) 729 if (moreprompt == NULL)
733 moreprompt = "more> "; 730 moreprompt = "more> ";
734 731
735#if defined(CONFIG_SMP)
736 if (strchr(moreprompt, '%')) {
737 sprintf(buf2, moreprompt, get_cpu());
738 put_cpu();
739 moreprompt = buf2;
740 }
741#endif
742
743 kdb_input_flush(); 732 kdb_input_flush();
744 c = console_drivers; 733 c = console_drivers;
745 734
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 1f91413edb87..31df1706b9a9 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -139,11 +139,10 @@ static const int __nkdb_err = sizeof(kdbmsgs) / sizeof(kdbmsg_t);
139static char *__env[] = { 139static char *__env[] = {
140#if defined(CONFIG_SMP) 140#if defined(CONFIG_SMP)
141 "PROMPT=[%d]kdb> ", 141 "PROMPT=[%d]kdb> ",
142 "MOREPROMPT=[%d]more> ",
143#else 142#else
144 "PROMPT=kdb> ", 143 "PROMPT=kdb> ",
145 "MOREPROMPT=more> ",
146#endif 144#endif
145 "MOREPROMPT=more> ",
147 "RADIX=16", 146 "RADIX=16",
148 "MDCOUNT=8", /* lines of md output */ 147 "MDCOUNT=8", /* lines of md output */
149 KDB_PLATFORM_ENV, 148 KDB_PLATFORM_ENV,
@@ -1236,18 +1235,6 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
1236 *cmdbuf = '\0'; 1235 *cmdbuf = '\0';
1237 *(cmd_hist[cmd_head]) = '\0'; 1236 *(cmd_hist[cmd_head]) = '\0';
1238 1237
1239 if (KDB_FLAG(ONLY_DO_DUMP)) {
1240 /* kdb is off but a catastrophic error requires a dump.
1241 * Take the dump and reboot.
1242 * Turn on logging so the kdb output appears in the log
1243 * buffer in the dump.
1244 */
1245 const char *setargs[] = { "set", "LOGGING", "1" };
1246 kdb_set(2, setargs);
1247 kdb_reboot(0, NULL);
1248 /*NOTREACHED*/
1249 }
1250
1251do_full_getstr: 1238do_full_getstr:
1252#if defined(CONFIG_SMP) 1239#if defined(CONFIG_SMP)
1253 snprintf(kdb_prompt_str, CMD_BUFLEN, kdbgetenv("PROMPT"), 1240 snprintf(kdb_prompt_str, CMD_BUFLEN, kdbgetenv("PROMPT"),
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index 6581a040f399..98d4597f43d6 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -153,7 +153,8 @@ put_callchain_entry(int rctx)
153 put_recursion_context(__get_cpu_var(callchain_recursion), rctx); 153 put_recursion_context(__get_cpu_var(callchain_recursion), rctx);
154} 154}
155 155
156struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) 156struct perf_callchain_entry *
157perf_callchain(struct perf_event *event, struct pt_regs *regs)
157{ 158{
158 int rctx; 159 int rctx;
159 struct perf_callchain_entry *entry; 160 struct perf_callchain_entry *entry;
@@ -178,6 +179,12 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
178 } 179 }
179 180
180 if (regs) { 181 if (regs) {
182 /*
183 * Disallow cross-task user callchains.
184 */
185 if (event->ctx->task && event->ctx->task != current)
186 goto exit_put;
187
181 perf_callchain_store(entry, PERF_CONTEXT_USER); 188 perf_callchain_store(entry, PERF_CONTEXT_USER);
182 perf_callchain_user(entry, regs); 189 perf_callchain_user(entry, regs);
183 } 190 }
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f1cf0edeb39a..b7935fcec7d9 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4039,7 +4039,7 @@ void perf_prepare_sample(struct perf_event_header *header,
4039 if (sample_type & PERF_SAMPLE_CALLCHAIN) { 4039 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
4040 int size = 1; 4040 int size = 1;
4041 4041
4042 data->callchain = perf_callchain(regs); 4042 data->callchain = perf_callchain(event, regs);
4043 4043
4044 if (data->callchain) 4044 if (data->callchain)
4045 size += data->callchain->nr; 4045 size += data->callchain->nr;
@@ -5209,7 +5209,8 @@ static int perf_tp_event_match(struct perf_event *event,
5209} 5209}
5210 5210
5211void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, 5211void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
5212 struct pt_regs *regs, struct hlist_head *head, int rctx) 5212 struct pt_regs *regs, struct hlist_head *head, int rctx,
5213 struct task_struct *task)
5213{ 5214{
5214 struct perf_sample_data data; 5215 struct perf_sample_data data;
5215 struct perf_event *event; 5216 struct perf_event *event;
@@ -5228,6 +5229,31 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
5228 perf_swevent_event(event, count, &data, regs); 5229 perf_swevent_event(event, count, &data, regs);
5229 } 5230 }
5230 5231
5232 /*
5233 * If we got specified a target task, also iterate its context and
5234 * deliver this event there too.
5235 */
5236 if (task && task != current) {
5237 struct perf_event_context *ctx;
5238 struct trace_entry *entry = record;
5239
5240 rcu_read_lock();
5241 ctx = rcu_dereference(task->perf_event_ctxp[perf_sw_context]);
5242 if (!ctx)
5243 goto unlock;
5244
5245 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
5246 if (event->attr.type != PERF_TYPE_TRACEPOINT)
5247 continue;
5248 if (event->attr.config != entry->type)
5249 continue;
5250 if (perf_tp_event_match(event, &data, regs))
5251 perf_swevent_event(event, count, &data, regs);
5252 }
5253unlock:
5254 rcu_read_unlock();
5255 }
5256
5231 perf_swevent_put_recursion_context(rctx); 5257 perf_swevent_put_recursion_context(rctx);
5232} 5258}
5233EXPORT_SYMBOL_GPL(perf_tp_event); 5259EXPORT_SYMBOL_GPL(perf_tp_event);
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index b0b107f90afc..a096c19f2c2a 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -101,7 +101,8 @@ __output_copy(struct perf_output_handle *handle,
101} 101}
102 102
103/* Callchain handling */ 103/* Callchain handling */
104extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); 104extern struct perf_callchain_entry *
105perf_callchain(struct perf_event *event, struct pt_regs *regs);
105extern int get_callchain_buffers(void); 106extern int get_callchain_buffers(void);
106extern void put_callchain_buffers(void); 107extern void put_callchain_buffers(void);
107 108
diff --git a/kernel/fork.c b/kernel/fork.c
index 3bd2280d79f6..2c8857e12855 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -455,8 +455,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
455 if (retval) 455 if (retval)
456 goto out; 456 goto out;
457 457
458 if (file && uprobe_mmap(tmp)) 458 if (file)
459 goto out; 459 uprobe_mmap(tmp);
460 } 460 }
461 /* a new mm has just been created */ 461 /* a new mm has just been created */
462 arch_dup_mmap(oldmm, mm); 462 arch_dup_mmap(oldmm, mm);
diff --git a/kernel/futex.c b/kernel/futex.c
index e2b0fb9a0b3b..3717e7b306e0 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2231,11 +2231,11 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2231 * @uaddr2: the pi futex we will take prior to returning to user-space 2231 * @uaddr2: the pi futex we will take prior to returning to user-space
2232 * 2232 *
2233 * The caller will wait on uaddr and will be requeued by futex_requeue() to 2233 * The caller will wait on uaddr and will be requeued by futex_requeue() to
2234 * uaddr2 which must be PI aware. Normal wakeup will wake on uaddr2 and 2234 * uaddr2 which must be PI aware and unique from uaddr. Normal wakeup will wake
2235 * complete the acquisition of the rt_mutex prior to returning to userspace. 2235 * on uaddr2 and complete the acquisition of the rt_mutex prior to returning to
2236 * This ensures the rt_mutex maintains an owner when it has waiters; without 2236 * userspace. This ensures the rt_mutex maintains an owner when it has waiters;
2237 * one, the pi logic wouldn't know which task to boost/deboost, if there was a 2237 * without one, the pi logic would not know which task to boost/deboost, if
2238 * need to. 2238 * there was a need to.
2239 * 2239 *
2240 * We call schedule in futex_wait_queue_me() when we enqueue and return there 2240 * We call schedule in futex_wait_queue_me() when we enqueue and return there
2241 * via the following: 2241 * via the following:
@@ -2272,6 +2272,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2272 struct futex_q q = futex_q_init; 2272 struct futex_q q = futex_q_init;
2273 int res, ret; 2273 int res, ret;
2274 2274
2275 if (uaddr == uaddr2)
2276 return -EINVAL;
2277
2275 if (!bitset) 2278 if (!bitset)
2276 return -EINVAL; 2279 return -EINVAL;
2277 2280
@@ -2343,7 +2346,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2343 * signal. futex_unlock_pi() will not destroy the lock_ptr nor 2346 * signal. futex_unlock_pi() will not destroy the lock_ptr nor
2344 * the pi_state. 2347 * the pi_state.
2345 */ 2348 */
2346 WARN_ON(!&q.pi_state); 2349 WARN_ON(!q.pi_state);
2347 pi_mutex = &q.pi_state->pi_mutex; 2350 pi_mutex = &q.pi_state->pi_mutex;
2348 ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1); 2351 ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
2349 debug_rt_mutex_free_waiter(&rt_waiter); 2352 debug_rt_mutex_free_waiter(&rt_waiter);
@@ -2370,7 +2373,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2370 * fault, unlock the rt_mutex and return the fault to userspace. 2373 * fault, unlock the rt_mutex and return the fault to userspace.
2371 */ 2374 */
2372 if (ret == -EFAULT) { 2375 if (ret == -EFAULT) {
2373 if (rt_mutex_owner(pi_mutex) == current) 2376 if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
2374 rt_mutex_unlock(pi_mutex); 2377 rt_mutex_unlock(pi_mutex);
2375 } else if (ret == -EINTR) { 2378 } else if (ret == -EINTR) {
2376 /* 2379 /*
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0a8e8f059627..4c69326aa773 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -944,6 +944,18 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
944 } 944 }
945 945
946 /* 946 /*
947 * Drivers are often written to work w/o knowledge about the
948 * underlying irq chip implementation, so a request for a
949 * threaded irq without a primary hard irq context handler
950 * requires the ONESHOT flag to be set. Some irq chips like
951 * MSI based interrupts are per se one shot safe. Check the
952 * chip flags, so we can avoid the unmask dance at the end of
953 * the threaded handler for those.
954 */
955 if (desc->irq_data.chip->flags & IRQCHIP_ONESHOT_SAFE)
956 new->flags &= ~IRQF_ONESHOT;
957
958 /*
947 * The following block of code has to be executed atomically 959 * The following block of code has to be executed atomically
948 */ 960 */
949 raw_spin_lock_irqsave(&desc->lock, flags); 961 raw_spin_lock_irqsave(&desc->lock, flags);
@@ -1017,7 +1029,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
1017 */ 1029 */
1018 new->thread_mask = 1 << ffz(thread_mask); 1030 new->thread_mask = 1 << ffz(thread_mask);
1019 1031
1020 } else if (new->handler == irq_default_primary_handler) { 1032 } else if (new->handler == irq_default_primary_handler &&
1033 !(desc->irq_data.chip->flags & IRQCHIP_ONESHOT_SAFE)) {
1021 /* 1034 /*
1022 * The interrupt was requested with handler = NULL, so 1035 * The interrupt was requested with handler = NULL, so
1023 * we use the default primary handler for it. But it 1036 * we use the default primary handler for it. But it
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 1da39ea248fd..c8b7446b27df 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -178,9 +178,6 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
178 arch_suspend_enable_irqs(); 178 arch_suspend_enable_irqs();
179 BUG_ON(irqs_disabled()); 179 BUG_ON(irqs_disabled());
180 180
181 /* Kick the lockup detector */
182 lockup_detector_bootcpu_resume();
183
184 Enable_cpus: 181 Enable_cpus:
185 enable_nonboot_cpus(); 182 enable_nonboot_cpus();
186 183
diff --git a/kernel/printk.c b/kernel/printk.c
index 6a76ab9d4476..66a2ea37b576 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1034,6 +1034,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
1034 struct log *msg = log_from_idx(idx); 1034 struct log *msg = log_from_idx(idx);
1035 1035
1036 len += msg_print_text(msg, prev, true, NULL, 0); 1036 len += msg_print_text(msg, prev, true, NULL, 0);
1037 prev = msg->flags;
1037 idx = log_next(idx); 1038 idx = log_next(idx);
1038 seq++; 1039 seq++;
1039 } 1040 }
@@ -1046,6 +1047,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
1046 struct log *msg = log_from_idx(idx); 1047 struct log *msg = log_from_idx(idx);
1047 1048
1048 len -= msg_print_text(msg, prev, true, NULL, 0); 1049 len -= msg_print_text(msg, prev, true, NULL, 0);
1050 prev = msg->flags;
1049 idx = log_next(idx); 1051 idx = log_next(idx);
1050 seq++; 1052 seq++;
1051 } 1053 }
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d325c4b2dcbb..fbf1fd098dc6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3142,6 +3142,20 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
3142# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) 3142# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs)
3143#endif 3143#endif
3144 3144
3145static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total)
3146{
3147 u64 temp = (__force u64) rtime;
3148
3149 temp *= (__force u64) utime;
3150
3151 if (sizeof(cputime_t) == 4)
3152 temp = div_u64(temp, (__force u32) total);
3153 else
3154 temp = div64_u64(temp, (__force u64) total);
3155
3156 return (__force cputime_t) temp;
3157}
3158
3145void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) 3159void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
3146{ 3160{
3147 cputime_t rtime, utime = p->utime, total = utime + p->stime; 3161 cputime_t rtime, utime = p->utime, total = utime + p->stime;
@@ -3151,13 +3165,9 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
3151 */ 3165 */
3152 rtime = nsecs_to_cputime(p->se.sum_exec_runtime); 3166 rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
3153 3167
3154 if (total) { 3168 if (total)
3155 u64 temp = (__force u64) rtime; 3169 utime = scale_utime(utime, rtime, total);
3156 3170 else
3157 temp *= (__force u64) utime;
3158 do_div(temp, (__force u32) total);
3159 utime = (__force cputime_t) temp;
3160 } else
3161 utime = rtime; 3171 utime = rtime;
3162 3172
3163 /* 3173 /*
@@ -3184,13 +3194,9 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
3184 total = cputime.utime + cputime.stime; 3194 total = cputime.utime + cputime.stime;
3185 rtime = nsecs_to_cputime(cputime.sum_exec_runtime); 3195 rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
3186 3196
3187 if (total) { 3197 if (total)
3188 u64 temp = (__force u64) rtime; 3198 utime = scale_utime(cputime.utime, rtime, total);
3189 3199 else
3190 temp *= (__force u64) cputime.utime;
3191 do_div(temp, (__force u32) total);
3192 utime = (__force cputime_t) temp;
3193 } else
3194 utime = rtime; 3200 utime = rtime;
3195 3201
3196 sig->prev_utime = max(sig->prev_utime, utime); 3202 sig->prev_utime = max(sig->prev_utime, utime);
@@ -4340,9 +4346,7 @@ recheck:
4340 */ 4346 */
4341 if (unlikely(policy == p->policy && (!rt_policy(policy) || 4347 if (unlikely(policy == p->policy && (!rt_policy(policy) ||
4342 param->sched_priority == p->rt_priority))) { 4348 param->sched_priority == p->rt_priority))) {
4343 4349 task_rq_unlock(rq, p, &flags);
4344 __task_rq_unlock(rq);
4345 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
4346 return 0; 4350 return 0;
4347 } 4351 }
4348 4352
@@ -7248,6 +7252,7 @@ int in_sched_functions(unsigned long addr)
7248 7252
7249#ifdef CONFIG_CGROUP_SCHED 7253#ifdef CONFIG_CGROUP_SCHED
7250struct task_group root_task_group; 7254struct task_group root_task_group;
7255LIST_HEAD(task_groups);
7251#endif 7256#endif
7252 7257
7253DECLARE_PER_CPU(cpumask_var_t, load_balance_tmpmask); 7258DECLARE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index d72586fdf660..23aa789c53ee 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -65,8 +65,8 @@ static int convert_prio(int prio)
65int cpupri_find(struct cpupri *cp, struct task_struct *p, 65int cpupri_find(struct cpupri *cp, struct task_struct *p,
66 struct cpumask *lowest_mask) 66 struct cpumask *lowest_mask)
67{ 67{
68 int idx = 0; 68 int idx = 0;
69 int task_pri = convert_prio(p->prio); 69 int task_pri = convert_prio(p->prio);
70 70
71 if (task_pri >= MAX_RT_PRIO) 71 if (task_pri >= MAX_RT_PRIO)
72 return 0; 72 return 0;
@@ -137,9 +137,9 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
137 */ 137 */
138void cpupri_set(struct cpupri *cp, int cpu, int newpri) 138void cpupri_set(struct cpupri *cp, int cpu, int newpri)
139{ 139{
140 int *currpri = &cp->cpu_to_pri[cpu]; 140 int *currpri = &cp->cpu_to_pri[cpu];
141 int oldpri = *currpri; 141 int oldpri = *currpri;
142 int do_mb = 0; 142 int do_mb = 0;
143 143
144 newpri = convert_prio(newpri); 144 newpri = convert_prio(newpri);
145 145
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 22321db64952..c219bf8d704c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3069,6 +3069,9 @@ struct lb_env {
3069 int new_dst_cpu; 3069 int new_dst_cpu;
3070 enum cpu_idle_type idle; 3070 enum cpu_idle_type idle;
3071 long imbalance; 3071 long imbalance;
3072 /* The set of CPUs under consideration for load-balancing */
3073 struct cpumask *cpus;
3074
3072 unsigned int flags; 3075 unsigned int flags;
3073 3076
3074 unsigned int loop; 3077 unsigned int loop;
@@ -3384,6 +3387,14 @@ static int tg_load_down(struct task_group *tg, void *data)
3384 3387
3385static void update_h_load(long cpu) 3388static void update_h_load(long cpu)
3386{ 3389{
3390 struct rq *rq = cpu_rq(cpu);
3391 unsigned long now = jiffies;
3392
3393 if (rq->h_load_throttle == now)
3394 return;
3395
3396 rq->h_load_throttle = now;
3397
3387 rcu_read_lock(); 3398 rcu_read_lock();
3388 walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); 3399 walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
3389 rcu_read_unlock(); 3400 rcu_read_unlock();
@@ -3653,8 +3664,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
3653 */ 3664 */
3654static inline void update_sg_lb_stats(struct lb_env *env, 3665static inline void update_sg_lb_stats(struct lb_env *env,
3655 struct sched_group *group, int load_idx, 3666 struct sched_group *group, int load_idx,
3656 int local_group, const struct cpumask *cpus, 3667 int local_group, int *balance, struct sg_lb_stats *sgs)
3657 int *balance, struct sg_lb_stats *sgs)
3658{ 3668{
3659 unsigned long nr_running, max_nr_running, min_nr_running; 3669 unsigned long nr_running, max_nr_running, min_nr_running;
3660 unsigned long load, max_cpu_load, min_cpu_load; 3670 unsigned long load, max_cpu_load, min_cpu_load;
@@ -3671,7 +3681,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
3671 max_nr_running = 0; 3681 max_nr_running = 0;
3672 min_nr_running = ~0UL; 3682 min_nr_running = ~0UL;
3673 3683
3674 for_each_cpu_and(i, sched_group_cpus(group), cpus) { 3684 for_each_cpu_and(i, sched_group_cpus(group), env->cpus) {
3675 struct rq *rq = cpu_rq(i); 3685 struct rq *rq = cpu_rq(i);
3676 3686
3677 nr_running = rq->nr_running; 3687 nr_running = rq->nr_running;
@@ -3800,8 +3810,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
3800 * @sds: variable to hold the statistics for this sched_domain. 3810 * @sds: variable to hold the statistics for this sched_domain.
3801 */ 3811 */
3802static inline void update_sd_lb_stats(struct lb_env *env, 3812static inline void update_sd_lb_stats(struct lb_env *env,
3803 const struct cpumask *cpus, 3813 int *balance, struct sd_lb_stats *sds)
3804 int *balance, struct sd_lb_stats *sds)
3805{ 3814{
3806 struct sched_domain *child = env->sd->child; 3815 struct sched_domain *child = env->sd->child;
3807 struct sched_group *sg = env->sd->groups; 3816 struct sched_group *sg = env->sd->groups;
@@ -3818,8 +3827,7 @@ static inline void update_sd_lb_stats(struct lb_env *env,
3818 3827
3819 local_group = cpumask_test_cpu(env->dst_cpu, sched_group_cpus(sg)); 3828 local_group = cpumask_test_cpu(env->dst_cpu, sched_group_cpus(sg));
3820 memset(&sgs, 0, sizeof(sgs)); 3829 memset(&sgs, 0, sizeof(sgs));
3821 update_sg_lb_stats(env, sg, load_idx, local_group, 3830 update_sg_lb_stats(env, sg, load_idx, local_group, balance, &sgs);
3822 cpus, balance, &sgs);
3823 3831
3824 if (local_group && !(*balance)) 3832 if (local_group && !(*balance))
3825 return; 3833 return;
@@ -4055,7 +4063,6 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
4055 * to restore balance. 4063 * to restore balance.
4056 * 4064 *
4057 * @env: The load balancing environment. 4065 * @env: The load balancing environment.
4058 * @cpus: The set of CPUs under consideration for load-balancing.
4059 * @balance: Pointer to a variable indicating if this_cpu 4066 * @balance: Pointer to a variable indicating if this_cpu
4060 * is the appropriate cpu to perform load balancing at this_level. 4067 * is the appropriate cpu to perform load balancing at this_level.
4061 * 4068 *
@@ -4065,7 +4072,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
4065 * put to idle by rebalancing its tasks onto our group. 4072 * put to idle by rebalancing its tasks onto our group.
4066 */ 4073 */
4067static struct sched_group * 4074static struct sched_group *
4068find_busiest_group(struct lb_env *env, const struct cpumask *cpus, int *balance) 4075find_busiest_group(struct lb_env *env, int *balance)
4069{ 4076{
4070 struct sd_lb_stats sds; 4077 struct sd_lb_stats sds;
4071 4078
@@ -4075,7 +4082,7 @@ find_busiest_group(struct lb_env *env, const struct cpumask *cpus, int *balance)
4075 * Compute the various statistics relavent for load balancing at 4082 * Compute the various statistics relavent for load balancing at
4076 * this level. 4083 * this level.
4077 */ 4084 */
4078 update_sd_lb_stats(env, cpus, balance, &sds); 4085 update_sd_lb_stats(env, balance, &sds);
4079 4086
4080 /* 4087 /*
4081 * this_cpu is not the appropriate cpu to perform load balancing at 4088 * this_cpu is not the appropriate cpu to perform load balancing at
@@ -4155,8 +4162,7 @@ ret:
4155 * find_busiest_queue - find the busiest runqueue among the cpus in group. 4162 * find_busiest_queue - find the busiest runqueue among the cpus in group.
4156 */ 4163 */
4157static struct rq *find_busiest_queue(struct lb_env *env, 4164static struct rq *find_busiest_queue(struct lb_env *env,
4158 struct sched_group *group, 4165 struct sched_group *group)
4159 const struct cpumask *cpus)
4160{ 4166{
4161 struct rq *busiest = NULL, *rq; 4167 struct rq *busiest = NULL, *rq;
4162 unsigned long max_load = 0; 4168 unsigned long max_load = 0;
@@ -4171,7 +4177,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
4171 if (!capacity) 4177 if (!capacity)
4172 capacity = fix_small_capacity(env->sd, group); 4178 capacity = fix_small_capacity(env->sd, group);
4173 4179
4174 if (!cpumask_test_cpu(i, cpus)) 4180 if (!cpumask_test_cpu(i, env->cpus))
4175 continue; 4181 continue;
4176 4182
4177 rq = cpu_rq(i); 4183 rq = cpu_rq(i);
@@ -4252,6 +4258,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
4252 .dst_grpmask = sched_group_cpus(sd->groups), 4258 .dst_grpmask = sched_group_cpus(sd->groups),
4253 .idle = idle, 4259 .idle = idle,
4254 .loop_break = sched_nr_migrate_break, 4260 .loop_break = sched_nr_migrate_break,
4261 .cpus = cpus,
4255 }; 4262 };
4256 4263
4257 cpumask_copy(cpus, cpu_active_mask); 4264 cpumask_copy(cpus, cpu_active_mask);
@@ -4260,7 +4267,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
4260 schedstat_inc(sd, lb_count[idle]); 4267 schedstat_inc(sd, lb_count[idle]);
4261 4268
4262redo: 4269redo:
4263 group = find_busiest_group(&env, cpus, balance); 4270 group = find_busiest_group(&env, balance);
4264 4271
4265 if (*balance == 0) 4272 if (*balance == 0)
4266 goto out_balanced; 4273 goto out_balanced;
@@ -4270,7 +4277,7 @@ redo:
4270 goto out_balanced; 4277 goto out_balanced;
4271 } 4278 }
4272 4279
4273 busiest = find_busiest_queue(&env, group, cpus); 4280 busiest = find_busiest_queue(&env, group);
4274 if (!busiest) { 4281 if (!busiest) {
4275 schedstat_inc(sd, lb_nobusyq[idle]); 4282 schedstat_inc(sd, lb_nobusyq[idle]);
4276 goto out_balanced; 4283 goto out_balanced;
@@ -4294,11 +4301,10 @@ redo:
4294 env.src_rq = busiest; 4301 env.src_rq = busiest;
4295 env.loop_max = min(sysctl_sched_nr_migrate, busiest->nr_running); 4302 env.loop_max = min(sysctl_sched_nr_migrate, busiest->nr_running);
4296 4303
4304 update_h_load(env.src_cpu);
4297more_balance: 4305more_balance:
4298 local_irq_save(flags); 4306 local_irq_save(flags);
4299 double_rq_lock(this_rq, busiest); 4307 double_rq_lock(this_rq, busiest);
4300 if (!env.loop)
4301 update_h_load(env.src_cpu);
4302 4308
4303 /* 4309 /*
4304 * cur_ld_moved - load moved in current iteration 4310 * cur_ld_moved - load moved in current iteration
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 573e1ca01102..944cb68420e9 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -788,6 +788,19 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
788 const struct cpumask *span; 788 const struct cpumask *span;
789 789
790 span = sched_rt_period_mask(); 790 span = sched_rt_period_mask();
791#ifdef CONFIG_RT_GROUP_SCHED
792 /*
793 * FIXME: isolated CPUs should really leave the root task group,
794 * whether they are isolcpus or were isolated via cpusets, lest
795 * the timer run on a CPU which does not service all runqueues,
796 * potentially leaving other CPUs indefinitely throttled. If
797 * isolation is really required, the user will turn the throttle
798 * off to kill the perturbations it causes anyway. Meanwhile,
799 * this maintains functionality for boot and/or troubleshooting.
800 */
801 if (rt_b == &root_task_group.rt_bandwidth)
802 span = cpu_online_mask;
803#endif
791 for_each_cpu(i, span) { 804 for_each_cpu(i, span) {
792 int enqueue = 0; 805 int enqueue = 0;
793 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); 806 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c35a1a7dd4d6..f6714d009e77 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -80,7 +80,7 @@ extern struct mutex sched_domains_mutex;
80struct cfs_rq; 80struct cfs_rq;
81struct rt_rq; 81struct rt_rq;
82 82
83static LIST_HEAD(task_groups); 83extern struct list_head task_groups;
84 84
85struct cfs_bandwidth { 85struct cfs_bandwidth {
86#ifdef CONFIG_CFS_BANDWIDTH 86#ifdef CONFIG_CFS_BANDWIDTH
@@ -374,7 +374,11 @@ struct rq {
374#ifdef CONFIG_FAIR_GROUP_SCHED 374#ifdef CONFIG_FAIR_GROUP_SCHED
375 /* list of leaf cfs_rq on this cpu: */ 375 /* list of leaf cfs_rq on this cpu: */
376 struct list_head leaf_cfs_rq_list; 376 struct list_head leaf_cfs_rq_list;
377#endif 377#ifdef CONFIG_SMP
378 unsigned long h_load_throttle;
379#endif /* CONFIG_SMP */
380#endif /* CONFIG_FAIR_GROUP_SCHED */
381
378#ifdef CONFIG_RT_GROUP_SCHED 382#ifdef CONFIG_RT_GROUP_SCHED
379 struct list_head leaf_rt_rq_list; 383 struct list_head leaf_rt_rq_list;
380#endif 384#endif
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index 7b386e86fd23..da5eb5bed84a 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -27,8 +27,10 @@ static struct task_struct *pick_next_task_stop(struct rq *rq)
27{ 27{
28 struct task_struct *stop = rq->stop; 28 struct task_struct *stop = rq->stop;
29 29
30 if (stop && stop->on_rq) 30 if (stop && stop->on_rq) {
31 stop->se.exec_start = rq->clock_task;
31 return stop; 32 return stop;
33 }
32 34
33 return NULL; 35 return NULL;
34} 36}
@@ -52,6 +54,21 @@ static void yield_task_stop(struct rq *rq)
52 54
53static void put_prev_task_stop(struct rq *rq, struct task_struct *prev) 55static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
54{ 56{
57 struct task_struct *curr = rq->curr;
58 u64 delta_exec;
59
60 delta_exec = rq->clock_task - curr->se.exec_start;
61 if (unlikely((s64)delta_exec < 0))
62 delta_exec = 0;
63
64 schedstat_set(curr->se.statistics.exec_max,
65 max(curr->se.statistics.exec_max, delta_exec));
66
67 curr->se.sum_exec_runtime += delta_exec;
68 account_group_exec_runtime(curr, delta_exec);
69
70 curr->se.exec_start = rq->clock_task;
71 cpuacct_charge(curr, delta_exec);
55} 72}
56 73
57static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued) 74static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued)
@@ -60,6 +77,9 @@ static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued)
60 77
61static void set_curr_task_stop(struct rq *rq) 78static void set_curr_task_stop(struct rq *rq)
62{ 79{
80 struct task_struct *stop = rq->stop;
81
82 stop->se.exec_start = rq->clock_task;
63} 83}
64 84
65static void switched_to_stop(struct rq *rq, struct task_struct *p) 85static void switched_to_stop(struct rq *rq, struct task_struct *p)
diff --git a/kernel/task_work.c b/kernel/task_work.c
index 91d4e1742a0c..d320d44903bd 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -75,6 +75,7 @@ void task_work_run(void)
75 p = q->next; 75 p = q->next;
76 q->func(q); 76 q->func(q);
77 q = p; 77 q = p;
78 cond_resched();
78 } 79 }
79 } 80 }
80} 81}
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index a470154e0408..46da0537c10b 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -37,7 +37,7 @@
37 * requested HZ value. It is also not recommended 37 * requested HZ value. It is also not recommended
38 * for "tick-less" systems. 38 * for "tick-less" systems.
39 */ 39 */
40#define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/ACTHZ)) 40#define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/SHIFTED_HZ))
41 41
42/* Since jiffies uses a simple NSEC_PER_JIFFY multiplier 42/* Since jiffies uses a simple NSEC_PER_JIFFY multiplier
43 * conversion, the .shift value could be zero. However 43 * conversion, the .shift value could be zero. However
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index b7fbadc5c973..24174b4d669b 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -28,7 +28,7 @@ DEFINE_SPINLOCK(ntp_lock);
28/* USER_HZ period (usecs): */ 28/* USER_HZ period (usecs): */
29unsigned long tick_usec = TICK_USEC; 29unsigned long tick_usec = TICK_USEC;
30 30
31/* ACTHZ period (nsecs): */ 31/* SHIFTED_HZ period (nsecs): */
32unsigned long tick_nsec; 32unsigned long tick_nsec;
33 33
34static u64 tick_length; 34static u64 tick_length;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index f045cc50832d..34e5eac81424 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -65,14 +65,14 @@ struct timekeeper {
65 * used instead. 65 * used instead.
66 */ 66 */
67 struct timespec wall_to_monotonic; 67 struct timespec wall_to_monotonic;
68 /* time spent in suspend */
69 struct timespec total_sleep_time;
70 /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */
71 struct timespec raw_time;
72 /* Offset clock monotonic -> clock realtime */ 68 /* Offset clock monotonic -> clock realtime */
73 ktime_t offs_real; 69 ktime_t offs_real;
70 /* time spent in suspend */
71 struct timespec total_sleep_time;
74 /* Offset clock monotonic -> clock boottime */ 72 /* Offset clock monotonic -> clock boottime */
75 ktime_t offs_boot; 73 ktime_t offs_boot;
74 /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */
75 struct timespec raw_time;
76 /* Seqlock for all timekeeper values */ 76 /* Seqlock for all timekeeper values */
77 seqlock_t lock; 77 seqlock_t lock;
78}; 78};
@@ -108,13 +108,39 @@ static struct timespec tk_xtime(struct timekeeper *tk)
108static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts) 108static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts)
109{ 109{
110 tk->xtime_sec = ts->tv_sec; 110 tk->xtime_sec = ts->tv_sec;
111 tk->xtime_nsec = ts->tv_nsec << tk->shift; 111 tk->xtime_nsec = (u64)ts->tv_nsec << tk->shift;
112} 112}
113 113
114static void tk_xtime_add(struct timekeeper *tk, const struct timespec *ts) 114static void tk_xtime_add(struct timekeeper *tk, const struct timespec *ts)
115{ 115{
116 tk->xtime_sec += ts->tv_sec; 116 tk->xtime_sec += ts->tv_sec;
117 tk->xtime_nsec += ts->tv_nsec << tk->shift; 117 tk->xtime_nsec += (u64)ts->tv_nsec << tk->shift;
118 tk_normalize_xtime(tk);
119}
120
121static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec wtm)
122{
123 struct timespec tmp;
124
125 /*
126 * Verify consistency of: offset_real = -wall_to_monotonic
127 * before modifying anything
128 */
129 set_normalized_timespec(&tmp, -tk->wall_to_monotonic.tv_sec,
130 -tk->wall_to_monotonic.tv_nsec);
131 WARN_ON_ONCE(tk->offs_real.tv64 != timespec_to_ktime(tmp).tv64);
132 tk->wall_to_monotonic = wtm;
133 set_normalized_timespec(&tmp, -wtm.tv_sec, -wtm.tv_nsec);
134 tk->offs_real = timespec_to_ktime(tmp);
135}
136
137static void tk_set_sleep_time(struct timekeeper *tk, struct timespec t)
138{
139 /* Verify consistency before modifying */
140 WARN_ON_ONCE(tk->offs_boot.tv64 != timespec_to_ktime(tk->total_sleep_time).tv64);
141
142 tk->total_sleep_time = t;
143 tk->offs_boot = timespec_to_ktime(t);
118} 144}
119 145
120/** 146/**
@@ -217,14 +243,6 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
217 return nsec + arch_gettimeoffset(); 243 return nsec + arch_gettimeoffset();
218} 244}
219 245
220static void update_rt_offset(struct timekeeper *tk)
221{
222 struct timespec tmp, *wtm = &tk->wall_to_monotonic;
223
224 set_normalized_timespec(&tmp, -wtm->tv_sec, -wtm->tv_nsec);
225 tk->offs_real = timespec_to_ktime(tmp);
226}
227
228/* must hold write on timekeeper.lock */ 246/* must hold write on timekeeper.lock */
229static void timekeeping_update(struct timekeeper *tk, bool clearntp) 247static void timekeeping_update(struct timekeeper *tk, bool clearntp)
230{ 248{
@@ -234,12 +252,10 @@ static void timekeeping_update(struct timekeeper *tk, bool clearntp)
234 tk->ntp_error = 0; 252 tk->ntp_error = 0;
235 ntp_clear(); 253 ntp_clear();
236 } 254 }
237 update_rt_offset(tk);
238 xt = tk_xtime(tk); 255 xt = tk_xtime(tk);
239 update_vsyscall(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult); 256 update_vsyscall(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult);
240} 257}
241 258
242
243/** 259/**
244 * timekeeping_forward_now - update clock to the current time 260 * timekeeping_forward_now - update clock to the current time
245 * 261 *
@@ -261,7 +277,7 @@ static void timekeeping_forward_now(struct timekeeper *tk)
261 tk->xtime_nsec += cycle_delta * tk->mult; 277 tk->xtime_nsec += cycle_delta * tk->mult;
262 278
263 /* If arch requires, add in gettimeoffset() */ 279 /* If arch requires, add in gettimeoffset() */
264 tk->xtime_nsec += arch_gettimeoffset() << tk->shift; 280 tk->xtime_nsec += (u64)arch_gettimeoffset() << tk->shift;
265 281
266 tk_normalize_xtime(tk); 282 tk_normalize_xtime(tk);
267 283
@@ -277,18 +293,19 @@ static void timekeeping_forward_now(struct timekeeper *tk)
277 */ 293 */
278void getnstimeofday(struct timespec *ts) 294void getnstimeofday(struct timespec *ts)
279{ 295{
296 struct timekeeper *tk = &timekeeper;
280 unsigned long seq; 297 unsigned long seq;
281 s64 nsecs = 0; 298 s64 nsecs = 0;
282 299
283 WARN_ON(timekeeping_suspended); 300 WARN_ON(timekeeping_suspended);
284 301
285 do { 302 do {
286 seq = read_seqbegin(&timekeeper.lock); 303 seq = read_seqbegin(&tk->lock);
287 304
288 ts->tv_sec = timekeeper.xtime_sec; 305 ts->tv_sec = tk->xtime_sec;
289 ts->tv_nsec = timekeeping_get_ns(&timekeeper); 306 ts->tv_nsec = timekeeping_get_ns(tk);
290 307
291 } while (read_seqretry(&timekeeper.lock, seq)); 308 } while (read_seqretry(&tk->lock, seq));
292 309
293 timespec_add_ns(ts, nsecs); 310 timespec_add_ns(ts, nsecs);
294} 311}
@@ -296,19 +313,18 @@ EXPORT_SYMBOL(getnstimeofday);
296 313
297ktime_t ktime_get(void) 314ktime_t ktime_get(void)
298{ 315{
316 struct timekeeper *tk = &timekeeper;
299 unsigned int seq; 317 unsigned int seq;
300 s64 secs, nsecs; 318 s64 secs, nsecs;
301 319
302 WARN_ON(timekeeping_suspended); 320 WARN_ON(timekeeping_suspended);
303 321
304 do { 322 do {
305 seq = read_seqbegin(&timekeeper.lock); 323 seq = read_seqbegin(&tk->lock);
306 secs = timekeeper.xtime_sec + 324 secs = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
307 timekeeper.wall_to_monotonic.tv_sec; 325 nsecs = timekeeping_get_ns(tk) + tk->wall_to_monotonic.tv_nsec;
308 nsecs = timekeeping_get_ns(&timekeeper) +
309 timekeeper.wall_to_monotonic.tv_nsec;
310 326
311 } while (read_seqretry(&timekeeper.lock, seq)); 327 } while (read_seqretry(&tk->lock, seq));
312 /* 328 /*
313 * Use ktime_set/ktime_add_ns to create a proper ktime on 329 * Use ktime_set/ktime_add_ns to create a proper ktime on
314 * 32-bit architectures without CONFIG_KTIME_SCALAR. 330 * 32-bit architectures without CONFIG_KTIME_SCALAR.
@@ -327,18 +343,19 @@ EXPORT_SYMBOL_GPL(ktime_get);
327 */ 343 */
328void ktime_get_ts(struct timespec *ts) 344void ktime_get_ts(struct timespec *ts)
329{ 345{
346 struct timekeeper *tk = &timekeeper;
330 struct timespec tomono; 347 struct timespec tomono;
331 unsigned int seq; 348 unsigned int seq;
332 349
333 WARN_ON(timekeeping_suspended); 350 WARN_ON(timekeeping_suspended);
334 351
335 do { 352 do {
336 seq = read_seqbegin(&timekeeper.lock); 353 seq = read_seqbegin(&tk->lock);
337 ts->tv_sec = timekeeper.xtime_sec; 354 ts->tv_sec = tk->xtime_sec;
338 ts->tv_nsec = timekeeping_get_ns(&timekeeper); 355 ts->tv_nsec = timekeeping_get_ns(tk);
339 tomono = timekeeper.wall_to_monotonic; 356 tomono = tk->wall_to_monotonic;
340 357
341 } while (read_seqretry(&timekeeper.lock, seq)); 358 } while (read_seqretry(&tk->lock, seq));
342 359
343 set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec, 360 set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec,
344 ts->tv_nsec + tomono.tv_nsec); 361 ts->tv_nsec + tomono.tv_nsec);
@@ -358,22 +375,23 @@ EXPORT_SYMBOL_GPL(ktime_get_ts);
358 */ 375 */
359void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) 376void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
360{ 377{
378 struct timekeeper *tk = &timekeeper;
361 unsigned long seq; 379 unsigned long seq;
362 s64 nsecs_raw, nsecs_real; 380 s64 nsecs_raw, nsecs_real;
363 381
364 WARN_ON_ONCE(timekeeping_suspended); 382 WARN_ON_ONCE(timekeeping_suspended);
365 383
366 do { 384 do {
367 seq = read_seqbegin(&timekeeper.lock); 385 seq = read_seqbegin(&tk->lock);
368 386
369 *ts_raw = timekeeper.raw_time; 387 *ts_raw = tk->raw_time;
370 ts_real->tv_sec = timekeeper.xtime_sec; 388 ts_real->tv_sec = tk->xtime_sec;
371 ts_real->tv_nsec = 0; 389 ts_real->tv_nsec = 0;
372 390
373 nsecs_raw = timekeeping_get_ns_raw(&timekeeper); 391 nsecs_raw = timekeeping_get_ns_raw(tk);
374 nsecs_real = timekeeping_get_ns(&timekeeper); 392 nsecs_real = timekeeping_get_ns(tk);
375 393
376 } while (read_seqretry(&timekeeper.lock, seq)); 394 } while (read_seqretry(&tk->lock, seq));
377 395
378 timespec_add_ns(ts_raw, nsecs_raw); 396 timespec_add_ns(ts_raw, nsecs_raw);
379 timespec_add_ns(ts_real, nsecs_real); 397 timespec_add_ns(ts_real, nsecs_real);
@@ -406,28 +424,28 @@ EXPORT_SYMBOL(do_gettimeofday);
406 */ 424 */
407int do_settimeofday(const struct timespec *tv) 425int do_settimeofday(const struct timespec *tv)
408{ 426{
427 struct timekeeper *tk = &timekeeper;
409 struct timespec ts_delta, xt; 428 struct timespec ts_delta, xt;
410 unsigned long flags; 429 unsigned long flags;
411 430
412 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) 431 if (!timespec_valid_strict(tv))
413 return -EINVAL; 432 return -EINVAL;
414 433
415 write_seqlock_irqsave(&timekeeper.lock, flags); 434 write_seqlock_irqsave(&tk->lock, flags);
416 435
417 timekeeping_forward_now(&timekeeper); 436 timekeeping_forward_now(tk);
418 437
419 xt = tk_xtime(&timekeeper); 438 xt = tk_xtime(tk);
420 ts_delta.tv_sec = tv->tv_sec - xt.tv_sec; 439 ts_delta.tv_sec = tv->tv_sec - xt.tv_sec;
421 ts_delta.tv_nsec = tv->tv_nsec - xt.tv_nsec; 440 ts_delta.tv_nsec = tv->tv_nsec - xt.tv_nsec;
422 441
423 timekeeper.wall_to_monotonic = 442 tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, ts_delta));
424 timespec_sub(timekeeper.wall_to_monotonic, ts_delta);
425 443
426 tk_set_xtime(&timekeeper, tv); 444 tk_set_xtime(tk, tv);
427 445
428 timekeeping_update(&timekeeper, true); 446 timekeeping_update(tk, true);
429 447
430 write_sequnlock_irqrestore(&timekeeper.lock, flags); 448 write_sequnlock_irqrestore(&tk->lock, flags);
431 449
432 /* signal hrtimers about time change */ 450 /* signal hrtimers about time change */
433 clock_was_set(); 451 clock_was_set();
@@ -436,7 +454,6 @@ int do_settimeofday(const struct timespec *tv)
436} 454}
437EXPORT_SYMBOL(do_settimeofday); 455EXPORT_SYMBOL(do_settimeofday);
438 456
439
440/** 457/**
441 * timekeeping_inject_offset - Adds or subtracts from the current time. 458 * timekeeping_inject_offset - Adds or subtracts from the current time.
442 * @tv: pointer to the timespec variable containing the offset 459 * @tv: pointer to the timespec variable containing the offset
@@ -445,28 +462,37 @@ EXPORT_SYMBOL(do_settimeofday);
445 */ 462 */
446int timekeeping_inject_offset(struct timespec *ts) 463int timekeeping_inject_offset(struct timespec *ts)
447{ 464{
465 struct timekeeper *tk = &timekeeper;
448 unsigned long flags; 466 unsigned long flags;
467 struct timespec tmp;
468 int ret = 0;
449 469
450 if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) 470 if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
451 return -EINVAL; 471 return -EINVAL;
452 472
453 write_seqlock_irqsave(&timekeeper.lock, flags); 473 write_seqlock_irqsave(&tk->lock, flags);
454 474
455 timekeeping_forward_now(&timekeeper); 475 timekeeping_forward_now(tk);
456 476
477 /* Make sure the proposed value is valid */
478 tmp = timespec_add(tk_xtime(tk), *ts);
479 if (!timespec_valid_strict(&tmp)) {
480 ret = -EINVAL;
481 goto error;
482 }
457 483
458 tk_xtime_add(&timekeeper, ts); 484 tk_xtime_add(tk, ts);
459 timekeeper.wall_to_monotonic = 485 tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts));
460 timespec_sub(timekeeper.wall_to_monotonic, *ts);
461 486
462 timekeeping_update(&timekeeper, true); 487error: /* even if we error out, we forwarded the time, so call update */
488 timekeeping_update(tk, true);
463 489
464 write_sequnlock_irqrestore(&timekeeper.lock, flags); 490 write_sequnlock_irqrestore(&tk->lock, flags);
465 491
466 /* signal hrtimers about time change */ 492 /* signal hrtimers about time change */
467 clock_was_set(); 493 clock_was_set();
468 494
469 return 0; 495 return ret;
470} 496}
471EXPORT_SYMBOL(timekeeping_inject_offset); 497EXPORT_SYMBOL(timekeeping_inject_offset);
472 498
@@ -477,23 +503,24 @@ EXPORT_SYMBOL(timekeeping_inject_offset);
477 */ 503 */
478static int change_clocksource(void *data) 504static int change_clocksource(void *data)
479{ 505{
506 struct timekeeper *tk = &timekeeper;
480 struct clocksource *new, *old; 507 struct clocksource *new, *old;
481 unsigned long flags; 508 unsigned long flags;
482 509
483 new = (struct clocksource *) data; 510 new = (struct clocksource *) data;
484 511
485 write_seqlock_irqsave(&timekeeper.lock, flags); 512 write_seqlock_irqsave(&tk->lock, flags);
486 513
487 timekeeping_forward_now(&timekeeper); 514 timekeeping_forward_now(tk);
488 if (!new->enable || new->enable(new) == 0) { 515 if (!new->enable || new->enable(new) == 0) {
489 old = timekeeper.clock; 516 old = tk->clock;
490 tk_setup_internals(&timekeeper, new); 517 tk_setup_internals(tk, new);
491 if (old->disable) 518 if (old->disable)
492 old->disable(old); 519 old->disable(old);
493 } 520 }
494 timekeeping_update(&timekeeper, true); 521 timekeeping_update(tk, true);
495 522
496 write_sequnlock_irqrestore(&timekeeper.lock, flags); 523 write_sequnlock_irqrestore(&tk->lock, flags);
497 524
498 return 0; 525 return 0;
499} 526}
@@ -507,7 +534,9 @@ static int change_clocksource(void *data)
507 */ 534 */
508void timekeeping_notify(struct clocksource *clock) 535void timekeeping_notify(struct clocksource *clock)
509{ 536{
510 if (timekeeper.clock == clock) 537 struct timekeeper *tk = &timekeeper;
538
539 if (tk->clock == clock)
511 return; 540 return;
512 stop_machine(change_clocksource, clock, NULL); 541 stop_machine(change_clocksource, clock, NULL);
513 tick_clock_notify(); 542 tick_clock_notify();
@@ -536,35 +565,36 @@ EXPORT_SYMBOL_GPL(ktime_get_real);
536 */ 565 */
537void getrawmonotonic(struct timespec *ts) 566void getrawmonotonic(struct timespec *ts)
538{ 567{
568 struct timekeeper *tk = &timekeeper;
539 unsigned long seq; 569 unsigned long seq;
540 s64 nsecs; 570 s64 nsecs;
541 571
542 do { 572 do {
543 seq = read_seqbegin(&timekeeper.lock); 573 seq = read_seqbegin(&tk->lock);
544 nsecs = timekeeping_get_ns_raw(&timekeeper); 574 nsecs = timekeeping_get_ns_raw(tk);
545 *ts = timekeeper.raw_time; 575 *ts = tk->raw_time;
546 576
547 } while (read_seqretry(&timekeeper.lock, seq)); 577 } while (read_seqretry(&tk->lock, seq));
548 578
549 timespec_add_ns(ts, nsecs); 579 timespec_add_ns(ts, nsecs);
550} 580}
551EXPORT_SYMBOL(getrawmonotonic); 581EXPORT_SYMBOL(getrawmonotonic);
552 582
553
554/** 583/**
555 * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres 584 * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres
556 */ 585 */
557int timekeeping_valid_for_hres(void) 586int timekeeping_valid_for_hres(void)
558{ 587{
588 struct timekeeper *tk = &timekeeper;
559 unsigned long seq; 589 unsigned long seq;
560 int ret; 590 int ret;
561 591
562 do { 592 do {
563 seq = read_seqbegin(&timekeeper.lock); 593 seq = read_seqbegin(&tk->lock);
564 594
565 ret = timekeeper.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; 595 ret = tk->clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
566 596
567 } while (read_seqretry(&timekeeper.lock, seq)); 597 } while (read_seqretry(&tk->lock, seq));
568 598
569 return ret; 599 return ret;
570} 600}
@@ -574,15 +604,16 @@ int timekeeping_valid_for_hres(void)
574 */ 604 */
575u64 timekeeping_max_deferment(void) 605u64 timekeeping_max_deferment(void)
576{ 606{
607 struct timekeeper *tk = &timekeeper;
577 unsigned long seq; 608 unsigned long seq;
578 u64 ret; 609 u64 ret;
579 610
580 do { 611 do {
581 seq = read_seqbegin(&timekeeper.lock); 612 seq = read_seqbegin(&tk->lock);
582 613
583 ret = timekeeper.clock->max_idle_ns; 614 ret = tk->clock->max_idle_ns;
584 615
585 } while (read_seqretry(&timekeeper.lock, seq)); 616 } while (read_seqretry(&tk->lock, seq));
586 617
587 return ret; 618 return ret;
588} 619}
@@ -622,46 +653,56 @@ void __attribute__((weak)) read_boot_clock(struct timespec *ts)
622 */ 653 */
623void __init timekeeping_init(void) 654void __init timekeeping_init(void)
624{ 655{
656 struct timekeeper *tk = &timekeeper;
625 struct clocksource *clock; 657 struct clocksource *clock;
626 unsigned long flags; 658 unsigned long flags;
627 struct timespec now, boot; 659 struct timespec now, boot, tmp;
628 660
629 read_persistent_clock(&now); 661 read_persistent_clock(&now);
662 if (!timespec_valid_strict(&now)) {
663 pr_warn("WARNING: Persistent clock returned invalid value!\n"
664 " Check your CMOS/BIOS settings.\n");
665 now.tv_sec = 0;
666 now.tv_nsec = 0;
667 }
668
630 read_boot_clock(&boot); 669 read_boot_clock(&boot);
670 if (!timespec_valid_strict(&boot)) {
671 pr_warn("WARNING: Boot clock returned invalid value!\n"
672 " Check your CMOS/BIOS settings.\n");
673 boot.tv_sec = 0;
674 boot.tv_nsec = 0;
675 }
631 676
632 seqlock_init(&timekeeper.lock); 677 seqlock_init(&tk->lock);
633 678
634 ntp_init(); 679 ntp_init();
635 680
636 write_seqlock_irqsave(&timekeeper.lock, flags); 681 write_seqlock_irqsave(&tk->lock, flags);
637 clock = clocksource_default_clock(); 682 clock = clocksource_default_clock();
638 if (clock->enable) 683 if (clock->enable)
639 clock->enable(clock); 684 clock->enable(clock);
640 tk_setup_internals(&timekeeper, clock); 685 tk_setup_internals(tk, clock);
641 686
642 tk_set_xtime(&timekeeper, &now); 687 tk_set_xtime(tk, &now);
643 timekeeper.raw_time.tv_sec = 0; 688 tk->raw_time.tv_sec = 0;
644 timekeeper.raw_time.tv_nsec = 0; 689 tk->raw_time.tv_nsec = 0;
645 if (boot.tv_sec == 0 && boot.tv_nsec == 0) 690 if (boot.tv_sec == 0 && boot.tv_nsec == 0)
646 boot = tk_xtime(&timekeeper); 691 boot = tk_xtime(tk);
647 692
648 set_normalized_timespec(&timekeeper.wall_to_monotonic, 693 set_normalized_timespec(&tmp, -boot.tv_sec, -boot.tv_nsec);
649 -boot.tv_sec, -boot.tv_nsec); 694 tk_set_wall_to_mono(tk, tmp);
650 update_rt_offset(&timekeeper); 695
651 timekeeper.total_sleep_time.tv_sec = 0; 696 tmp.tv_sec = 0;
652 timekeeper.total_sleep_time.tv_nsec = 0; 697 tmp.tv_nsec = 0;
653 write_sequnlock_irqrestore(&timekeeper.lock, flags); 698 tk_set_sleep_time(tk, tmp);
699
700 write_sequnlock_irqrestore(&tk->lock, flags);
654} 701}
655 702
656/* time in seconds when suspend began */ 703/* time in seconds when suspend began */
657static struct timespec timekeeping_suspend_time; 704static struct timespec timekeeping_suspend_time;
658 705
659static void update_sleep_time(struct timespec t)
660{
661 timekeeper.total_sleep_time = t;
662 timekeeper.offs_boot = timespec_to_ktime(t);
663}
664
665/** 706/**
666 * __timekeeping_inject_sleeptime - Internal function to add sleep interval 707 * __timekeeping_inject_sleeptime - Internal function to add sleep interval
667 * @delta: pointer to a timespec delta value 708 * @delta: pointer to a timespec delta value
@@ -672,18 +713,16 @@ static void update_sleep_time(struct timespec t)
672static void __timekeeping_inject_sleeptime(struct timekeeper *tk, 713static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
673 struct timespec *delta) 714 struct timespec *delta)
674{ 715{
675 if (!timespec_valid(delta)) { 716 if (!timespec_valid_strict(delta)) {
676 printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid " 717 printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid "
677 "sleep delta value!\n"); 718 "sleep delta value!\n");
678 return; 719 return;
679 } 720 }
680
681 tk_xtime_add(tk, delta); 721 tk_xtime_add(tk, delta);
682 tk->wall_to_monotonic = timespec_sub(tk->wall_to_monotonic, *delta); 722 tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *delta));
683 update_sleep_time(timespec_add(tk->total_sleep_time, *delta)); 723 tk_set_sleep_time(tk, timespec_add(tk->total_sleep_time, *delta));
684} 724}
685 725
686
687/** 726/**
688 * timekeeping_inject_sleeptime - Adds suspend interval to timeekeeping values 727 * timekeeping_inject_sleeptime - Adds suspend interval to timeekeeping values
689 * @delta: pointer to a timespec delta value 728 * @delta: pointer to a timespec delta value
@@ -696,6 +735,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
696 */ 735 */
697void timekeeping_inject_sleeptime(struct timespec *delta) 736void timekeeping_inject_sleeptime(struct timespec *delta)
698{ 737{
738 struct timekeeper *tk = &timekeeper;
699 unsigned long flags; 739 unsigned long flags;
700 struct timespec ts; 740 struct timespec ts;
701 741
@@ -704,21 +744,20 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
704 if (!(ts.tv_sec == 0 && ts.tv_nsec == 0)) 744 if (!(ts.tv_sec == 0 && ts.tv_nsec == 0))
705 return; 745 return;
706 746
707 write_seqlock_irqsave(&timekeeper.lock, flags); 747 write_seqlock_irqsave(&tk->lock, flags);
708 748
709 timekeeping_forward_now(&timekeeper); 749 timekeeping_forward_now(tk);
710 750
711 __timekeeping_inject_sleeptime(&timekeeper, delta); 751 __timekeeping_inject_sleeptime(tk, delta);
712 752
713 timekeeping_update(&timekeeper, true); 753 timekeeping_update(tk, true);
714 754
715 write_sequnlock_irqrestore(&timekeeper.lock, flags); 755 write_sequnlock_irqrestore(&tk->lock, flags);
716 756
717 /* signal hrtimers about time change */ 757 /* signal hrtimers about time change */
718 clock_was_set(); 758 clock_was_set();
719} 759}
720 760
721
722/** 761/**
723 * timekeeping_resume - Resumes the generic timekeeping subsystem. 762 * timekeeping_resume - Resumes the generic timekeeping subsystem.
724 * 763 *
@@ -728,6 +767,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
728 */ 767 */
729static void timekeeping_resume(void) 768static void timekeeping_resume(void)
730{ 769{
770 struct timekeeper *tk = &timekeeper;
731 unsigned long flags; 771 unsigned long flags;
732 struct timespec ts; 772 struct timespec ts;
733 773
@@ -735,18 +775,18 @@ static void timekeeping_resume(void)
735 775
736 clocksource_resume(); 776 clocksource_resume();
737 777
738 write_seqlock_irqsave(&timekeeper.lock, flags); 778 write_seqlock_irqsave(&tk->lock, flags);
739 779
740 if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) { 780 if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) {
741 ts = timespec_sub(ts, timekeeping_suspend_time); 781 ts = timespec_sub(ts, timekeeping_suspend_time);
742 __timekeeping_inject_sleeptime(&timekeeper, &ts); 782 __timekeeping_inject_sleeptime(tk, &ts);
743 } 783 }
744 /* re-base the last cycle value */ 784 /* re-base the last cycle value */
745 timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); 785 tk->clock->cycle_last = tk->clock->read(tk->clock);
746 timekeeper.ntp_error = 0; 786 tk->ntp_error = 0;
747 timekeeping_suspended = 0; 787 timekeeping_suspended = 0;
748 timekeeping_update(&timekeeper, false); 788 timekeeping_update(tk, false);
749 write_sequnlock_irqrestore(&timekeeper.lock, flags); 789 write_sequnlock_irqrestore(&tk->lock, flags);
750 790
751 touch_softlockup_watchdog(); 791 touch_softlockup_watchdog();
752 792
@@ -758,14 +798,15 @@ static void timekeeping_resume(void)
758 798
759static int timekeeping_suspend(void) 799static int timekeeping_suspend(void)
760{ 800{
801 struct timekeeper *tk = &timekeeper;
761 unsigned long flags; 802 unsigned long flags;
762 struct timespec delta, delta_delta; 803 struct timespec delta, delta_delta;
763 static struct timespec old_delta; 804 static struct timespec old_delta;
764 805
765 read_persistent_clock(&timekeeping_suspend_time); 806 read_persistent_clock(&timekeeping_suspend_time);
766 807
767 write_seqlock_irqsave(&timekeeper.lock, flags); 808 write_seqlock_irqsave(&tk->lock, flags);
768 timekeeping_forward_now(&timekeeper); 809 timekeeping_forward_now(tk);
769 timekeeping_suspended = 1; 810 timekeeping_suspended = 1;
770 811
771 /* 812 /*
@@ -774,7 +815,7 @@ static int timekeeping_suspend(void)
774 * try to compensate so the difference in system time 815 * try to compensate so the difference in system time
775 * and persistent_clock time stays close to constant. 816 * and persistent_clock time stays close to constant.
776 */ 817 */
777 delta = timespec_sub(tk_xtime(&timekeeper), timekeeping_suspend_time); 818 delta = timespec_sub(tk_xtime(tk), timekeeping_suspend_time);
778 delta_delta = timespec_sub(delta, old_delta); 819 delta_delta = timespec_sub(delta, old_delta);
779 if (abs(delta_delta.tv_sec) >= 2) { 820 if (abs(delta_delta.tv_sec) >= 2) {
780 /* 821 /*
@@ -787,7 +828,7 @@ static int timekeeping_suspend(void)
787 timekeeping_suspend_time = 828 timekeeping_suspend_time =
788 timespec_add(timekeeping_suspend_time, delta_delta); 829 timespec_add(timekeeping_suspend_time, delta_delta);
789 } 830 }
790 write_sequnlock_irqrestore(&timekeeper.lock, flags); 831 write_sequnlock_irqrestore(&tk->lock, flags);
791 832
792 clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); 833 clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
793 clocksource_suspend(); 834 clocksource_suspend();
@@ -898,27 +939,29 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
898 * the error. This causes the likely below to be unlikely. 939 * the error. This causes the likely below to be unlikely.
899 * 940 *
900 * The proper fix is to avoid rounding up by using 941 * The proper fix is to avoid rounding up by using
901 * the high precision timekeeper.xtime_nsec instead of 942 * the high precision tk->xtime_nsec instead of
902 * xtime.tv_nsec everywhere. Fixing this will take some 943 * xtime.tv_nsec everywhere. Fixing this will take some
903 * time. 944 * time.
904 */ 945 */
905 if (likely(error <= interval)) 946 if (likely(error <= interval))
906 adj = 1; 947 adj = 1;
907 else 948 else
908 adj = timekeeping_bigadjust(tk, error, &interval, 949 adj = timekeeping_bigadjust(tk, error, &interval, &offset);
909 &offset); 950 } else {
910 } else if (error < -interval) { 951 if (error < -interval) {
911 /* See comment above, this is just switched for the negative */ 952 /* See comment above, this is just switched for the negative */
912 error >>= 2; 953 error >>= 2;
913 if (likely(error >= -interval)) { 954 if (likely(error >= -interval)) {
914 adj = -1; 955 adj = -1;
915 interval = -interval; 956 interval = -interval;
916 offset = -offset; 957 offset = -offset;
917 } else 958 } else {
918 adj = timekeeping_bigadjust(tk, error, &interval, 959 adj = timekeeping_bigadjust(tk, error, &interval, &offset);
919 &offset); 960 }
920 } else 961 } else {
921 return; 962 goto out_adjust;
963 }
964 }
922 965
923 if (unlikely(tk->clock->maxadj && 966 if (unlikely(tk->clock->maxadj &&
924 (tk->mult + adj > tk->clock->mult + tk->clock->maxadj))) { 967 (tk->mult + adj > tk->clock->mult + tk->clock->maxadj))) {
@@ -981,6 +1024,7 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
981 tk->xtime_nsec -= offset; 1024 tk->xtime_nsec -= offset;
982 tk->ntp_error -= (interval - offset) << tk->ntp_error_shift; 1025 tk->ntp_error -= (interval - offset) << tk->ntp_error_shift;
983 1026
1027out_adjust:
984 /* 1028 /*
985 * It may be possible that when we entered this function, xtime_nsec 1029 * It may be possible that when we entered this function, xtime_nsec
986 * was very small. Further, if we're slightly speeding the clocksource 1030 * was very small. Further, if we're slightly speeding the clocksource
@@ -1003,7 +1047,6 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
1003 1047
1004} 1048}
1005 1049
1006
1007/** 1050/**
1008 * accumulate_nsecs_to_secs - Accumulates nsecs into secs 1051 * accumulate_nsecs_to_secs - Accumulates nsecs into secs
1009 * 1052 *
@@ -1024,15 +1067,21 @@ static inline void accumulate_nsecs_to_secs(struct timekeeper *tk)
1024 1067
1025 /* Figure out if its a leap sec and apply if needed */ 1068 /* Figure out if its a leap sec and apply if needed */
1026 leap = second_overflow(tk->xtime_sec); 1069 leap = second_overflow(tk->xtime_sec);
1027 tk->xtime_sec += leap; 1070 if (unlikely(leap)) {
1028 tk->wall_to_monotonic.tv_sec -= leap; 1071 struct timespec ts;
1029 if (leap) 1072
1030 clock_was_set_delayed(); 1073 tk->xtime_sec += leap;
1074
1075 ts.tv_sec = leap;
1076 ts.tv_nsec = 0;
1077 tk_set_wall_to_mono(tk,
1078 timespec_sub(tk->wall_to_monotonic, ts));
1031 1079
1080 clock_was_set_delayed();
1081 }
1032 } 1082 }
1033} 1083}
1034 1084
1035
1036/** 1085/**
1037 * logarithmic_accumulation - shifted accumulation of cycles 1086 * logarithmic_accumulation - shifted accumulation of cycles
1038 * 1087 *
@@ -1076,7 +1125,6 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
1076 return offset; 1125 return offset;
1077} 1126}
1078 1127
1079
1080/** 1128/**
1081 * update_wall_time - Uses the current clocksource to increment the wall time 1129 * update_wall_time - Uses the current clocksource to increment the wall time
1082 * 1130 *
@@ -1084,25 +1132,30 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
1084static void update_wall_time(void) 1132static void update_wall_time(void)
1085{ 1133{
1086 struct clocksource *clock; 1134 struct clocksource *clock;
1135 struct timekeeper *tk = &timekeeper;
1087 cycle_t offset; 1136 cycle_t offset;
1088 int shift = 0, maxshift; 1137 int shift = 0, maxshift;
1089 unsigned long flags; 1138 unsigned long flags;
1090 s64 remainder; 1139 s64 remainder;
1091 1140
1092 write_seqlock_irqsave(&timekeeper.lock, flags); 1141 write_seqlock_irqsave(&tk->lock, flags);
1093 1142
1094 /* Make sure we're fully resumed: */ 1143 /* Make sure we're fully resumed: */
1095 if (unlikely(timekeeping_suspended)) 1144 if (unlikely(timekeeping_suspended))
1096 goto out; 1145 goto out;
1097 1146
1098 clock = timekeeper.clock; 1147 clock = tk->clock;
1099 1148
1100#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET 1149#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
1101 offset = timekeeper.cycle_interval; 1150 offset = tk->cycle_interval;
1102#else 1151#else
1103 offset = (clock->read(clock) - clock->cycle_last) & clock->mask; 1152 offset = (clock->read(clock) - clock->cycle_last) & clock->mask;
1104#endif 1153#endif
1105 1154
1155 /* Check if there's really nothing to do */
1156 if (offset < tk->cycle_interval)
1157 goto out;
1158
1106 /* 1159 /*
1107 * With NO_HZ we may have to accumulate many cycle_intervals 1160 * With NO_HZ we may have to accumulate many cycle_intervals
1108 * (think "ticks") worth of time at once. To do this efficiently, 1161 * (think "ticks") worth of time at once. To do this efficiently,
@@ -1111,19 +1164,19 @@ static void update_wall_time(void)
1111 * chunk in one go, and then try to consume the next smaller 1164 * chunk in one go, and then try to consume the next smaller
1112 * doubled multiple. 1165 * doubled multiple.
1113 */ 1166 */
1114 shift = ilog2(offset) - ilog2(timekeeper.cycle_interval); 1167 shift = ilog2(offset) - ilog2(tk->cycle_interval);
1115 shift = max(0, shift); 1168 shift = max(0, shift);
1116 /* Bound shift to one less than what overflows tick_length */ 1169 /* Bound shift to one less than what overflows tick_length */
1117 maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1; 1170 maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
1118 shift = min(shift, maxshift); 1171 shift = min(shift, maxshift);
1119 while (offset >= timekeeper.cycle_interval) { 1172 while (offset >= tk->cycle_interval) {
1120 offset = logarithmic_accumulation(&timekeeper, offset, shift); 1173 offset = logarithmic_accumulation(tk, offset, shift);
1121 if(offset < timekeeper.cycle_interval<<shift) 1174 if (offset < tk->cycle_interval<<shift)
1122 shift--; 1175 shift--;
1123 } 1176 }
1124 1177
1125 /* correct the clock when NTP error is too big */ 1178 /* correct the clock when NTP error is too big */
1126 timekeeping_adjust(&timekeeper, offset); 1179 timekeeping_adjust(tk, offset);
1127 1180
1128 1181
1129 /* 1182 /*
@@ -1135,21 +1188,21 @@ static void update_wall_time(void)
1135 * the vsyscall implementations are converted to use xtime_nsec 1188 * the vsyscall implementations are converted to use xtime_nsec
1136 * (shifted nanoseconds), this can be killed. 1189 * (shifted nanoseconds), this can be killed.
1137 */ 1190 */
1138 remainder = timekeeper.xtime_nsec & ((1 << timekeeper.shift) - 1); 1191 remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1);
1139 timekeeper.xtime_nsec -= remainder; 1192 tk->xtime_nsec -= remainder;
1140 timekeeper.xtime_nsec += 1 << timekeeper.shift; 1193 tk->xtime_nsec += 1ULL << tk->shift;
1141 timekeeper.ntp_error += remainder << timekeeper.ntp_error_shift; 1194 tk->ntp_error += remainder << tk->ntp_error_shift;
1142 1195
1143 /* 1196 /*
1144 * Finally, make sure that after the rounding 1197 * Finally, make sure that after the rounding
1145 * xtime_nsec isn't larger than NSEC_PER_SEC 1198 * xtime_nsec isn't larger than NSEC_PER_SEC
1146 */ 1199 */
1147 accumulate_nsecs_to_secs(&timekeeper); 1200 accumulate_nsecs_to_secs(tk);
1148 1201
1149 timekeeping_update(&timekeeper, false); 1202 timekeeping_update(tk, false);
1150 1203
1151out: 1204out:
1152 write_sequnlock_irqrestore(&timekeeper.lock, flags); 1205 write_sequnlock_irqrestore(&tk->lock, flags);
1153 1206
1154} 1207}
1155 1208
@@ -1166,18 +1219,18 @@ out:
1166 */ 1219 */
1167void getboottime(struct timespec *ts) 1220void getboottime(struct timespec *ts)
1168{ 1221{
1222 struct timekeeper *tk = &timekeeper;
1169 struct timespec boottime = { 1223 struct timespec boottime = {
1170 .tv_sec = timekeeper.wall_to_monotonic.tv_sec + 1224 .tv_sec = tk->wall_to_monotonic.tv_sec +
1171 timekeeper.total_sleep_time.tv_sec, 1225 tk->total_sleep_time.tv_sec,
1172 .tv_nsec = timekeeper.wall_to_monotonic.tv_nsec + 1226 .tv_nsec = tk->wall_to_monotonic.tv_nsec +
1173 timekeeper.total_sleep_time.tv_nsec 1227 tk->total_sleep_time.tv_nsec
1174 }; 1228 };
1175 1229
1176 set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec); 1230 set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec);
1177} 1231}
1178EXPORT_SYMBOL_GPL(getboottime); 1232EXPORT_SYMBOL_GPL(getboottime);
1179 1233
1180
1181/** 1234/**
1182 * get_monotonic_boottime - Returns monotonic time since boot 1235 * get_monotonic_boottime - Returns monotonic time since boot
1183 * @ts: pointer to the timespec to be set 1236 * @ts: pointer to the timespec to be set
@@ -1189,19 +1242,20 @@ EXPORT_SYMBOL_GPL(getboottime);
1189 */ 1242 */
1190void get_monotonic_boottime(struct timespec *ts) 1243void get_monotonic_boottime(struct timespec *ts)
1191{ 1244{
1245 struct timekeeper *tk = &timekeeper;
1192 struct timespec tomono, sleep; 1246 struct timespec tomono, sleep;
1193 unsigned int seq; 1247 unsigned int seq;
1194 1248
1195 WARN_ON(timekeeping_suspended); 1249 WARN_ON(timekeeping_suspended);
1196 1250
1197 do { 1251 do {
1198 seq = read_seqbegin(&timekeeper.lock); 1252 seq = read_seqbegin(&tk->lock);
1199 ts->tv_sec = timekeeper.xtime_sec; 1253 ts->tv_sec = tk->xtime_sec;
1200 ts->tv_nsec = timekeeping_get_ns(&timekeeper); 1254 ts->tv_nsec = timekeeping_get_ns(tk);
1201 tomono = timekeeper.wall_to_monotonic; 1255 tomono = tk->wall_to_monotonic;
1202 sleep = timekeeper.total_sleep_time; 1256 sleep = tk->total_sleep_time;
1203 1257
1204 } while (read_seqretry(&timekeeper.lock, seq)); 1258 } while (read_seqretry(&tk->lock, seq));
1205 1259
1206 set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec + sleep.tv_sec, 1260 set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec + sleep.tv_sec,
1207 ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec); 1261 ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec);
@@ -1231,31 +1285,38 @@ EXPORT_SYMBOL_GPL(ktime_get_boottime);
1231 */ 1285 */
1232void monotonic_to_bootbased(struct timespec *ts) 1286void monotonic_to_bootbased(struct timespec *ts)
1233{ 1287{
1234 *ts = timespec_add(*ts, timekeeper.total_sleep_time); 1288 struct timekeeper *tk = &timekeeper;
1289
1290 *ts = timespec_add(*ts, tk->total_sleep_time);
1235} 1291}
1236EXPORT_SYMBOL_GPL(monotonic_to_bootbased); 1292EXPORT_SYMBOL_GPL(monotonic_to_bootbased);
1237 1293
1238unsigned long get_seconds(void) 1294unsigned long get_seconds(void)
1239{ 1295{
1240 return timekeeper.xtime_sec; 1296 struct timekeeper *tk = &timekeeper;
1297
1298 return tk->xtime_sec;
1241} 1299}
1242EXPORT_SYMBOL(get_seconds); 1300EXPORT_SYMBOL(get_seconds);
1243 1301
1244struct timespec __current_kernel_time(void) 1302struct timespec __current_kernel_time(void)
1245{ 1303{
1246 return tk_xtime(&timekeeper); 1304 struct timekeeper *tk = &timekeeper;
1305
1306 return tk_xtime(tk);
1247} 1307}
1248 1308
1249struct timespec current_kernel_time(void) 1309struct timespec current_kernel_time(void)
1250{ 1310{
1311 struct timekeeper *tk = &timekeeper;
1251 struct timespec now; 1312 struct timespec now;
1252 unsigned long seq; 1313 unsigned long seq;
1253 1314
1254 do { 1315 do {
1255 seq = read_seqbegin(&timekeeper.lock); 1316 seq = read_seqbegin(&tk->lock);
1256 1317
1257 now = tk_xtime(&timekeeper); 1318 now = tk_xtime(tk);
1258 } while (read_seqretry(&timekeeper.lock, seq)); 1319 } while (read_seqretry(&tk->lock, seq));
1259 1320
1260 return now; 1321 return now;
1261} 1322}
@@ -1263,15 +1324,16 @@ EXPORT_SYMBOL(current_kernel_time);
1263 1324
1264struct timespec get_monotonic_coarse(void) 1325struct timespec get_monotonic_coarse(void)
1265{ 1326{
1327 struct timekeeper *tk = &timekeeper;
1266 struct timespec now, mono; 1328 struct timespec now, mono;
1267 unsigned long seq; 1329 unsigned long seq;
1268 1330
1269 do { 1331 do {
1270 seq = read_seqbegin(&timekeeper.lock); 1332 seq = read_seqbegin(&tk->lock);
1271 1333
1272 now = tk_xtime(&timekeeper); 1334 now = tk_xtime(tk);
1273 mono = timekeeper.wall_to_monotonic; 1335 mono = tk->wall_to_monotonic;
1274 } while (read_seqretry(&timekeeper.lock, seq)); 1336 } while (read_seqretry(&tk->lock, seq));
1275 1337
1276 set_normalized_timespec(&now, now.tv_sec + mono.tv_sec, 1338 set_normalized_timespec(&now, now.tv_sec + mono.tv_sec,
1277 now.tv_nsec + mono.tv_nsec); 1339 now.tv_nsec + mono.tv_nsec);
@@ -1300,14 +1362,15 @@ void do_timer(unsigned long ticks)
1300void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim, 1362void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
1301 struct timespec *wtom, struct timespec *sleep) 1363 struct timespec *wtom, struct timespec *sleep)
1302{ 1364{
1365 struct timekeeper *tk = &timekeeper;
1303 unsigned long seq; 1366 unsigned long seq;
1304 1367
1305 do { 1368 do {
1306 seq = read_seqbegin(&timekeeper.lock); 1369 seq = read_seqbegin(&tk->lock);
1307 *xtim = tk_xtime(&timekeeper); 1370 *xtim = tk_xtime(tk);
1308 *wtom = timekeeper.wall_to_monotonic; 1371 *wtom = tk->wall_to_monotonic;
1309 *sleep = timekeeper.total_sleep_time; 1372 *sleep = tk->total_sleep_time;
1310 } while (read_seqretry(&timekeeper.lock, seq)); 1373 } while (read_seqretry(&tk->lock, seq));
1311} 1374}
1312 1375
1313#ifdef CONFIG_HIGH_RES_TIMERS 1376#ifdef CONFIG_HIGH_RES_TIMERS
@@ -1321,19 +1384,20 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
1321 */ 1384 */
1322ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot) 1385ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot)
1323{ 1386{
1387 struct timekeeper *tk = &timekeeper;
1324 ktime_t now; 1388 ktime_t now;
1325 unsigned int seq; 1389 unsigned int seq;
1326 u64 secs, nsecs; 1390 u64 secs, nsecs;
1327 1391
1328 do { 1392 do {
1329 seq = read_seqbegin(&timekeeper.lock); 1393 seq = read_seqbegin(&tk->lock);
1330 1394
1331 secs = timekeeper.xtime_sec; 1395 secs = tk->xtime_sec;
1332 nsecs = timekeeping_get_ns(&timekeeper); 1396 nsecs = timekeeping_get_ns(tk);
1333 1397
1334 *offs_real = timekeeper.offs_real; 1398 *offs_real = tk->offs_real;
1335 *offs_boot = timekeeper.offs_boot; 1399 *offs_boot = tk->offs_boot;
1336 } while (read_seqretry(&timekeeper.lock, seq)); 1400 } while (read_seqretry(&tk->lock, seq));
1337 1401
1338 now = ktime_add_ns(ktime_set(secs, 0), nsecs); 1402 now = ktime_add_ns(ktime_set(secs, 0), nsecs);
1339 now = ktime_sub(now, *offs_real); 1403 now = ktime_sub(now, *offs_real);
@@ -1346,19 +1410,19 @@ ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot)
1346 */ 1410 */
1347ktime_t ktime_get_monotonic_offset(void) 1411ktime_t ktime_get_monotonic_offset(void)
1348{ 1412{
1413 struct timekeeper *tk = &timekeeper;
1349 unsigned long seq; 1414 unsigned long seq;
1350 struct timespec wtom; 1415 struct timespec wtom;
1351 1416
1352 do { 1417 do {
1353 seq = read_seqbegin(&timekeeper.lock); 1418 seq = read_seqbegin(&tk->lock);
1354 wtom = timekeeper.wall_to_monotonic; 1419 wtom = tk->wall_to_monotonic;
1355 } while (read_seqretry(&timekeeper.lock, seq)); 1420 } while (read_seqretry(&tk->lock, seq));
1356 1421
1357 return timespec_to_ktime(wtom); 1422 return timespec_to_ktime(wtom);
1358} 1423}
1359EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset); 1424EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset);
1360 1425
1361
1362/** 1426/**
1363 * xtime_update() - advances the timekeeping infrastructure 1427 * xtime_update() - advances the timekeeping infrastructure
1364 * @ticks: number of ticks, that have elapsed since the last call. 1428 * @ticks: number of ticks, that have elapsed since the last call.
diff --git a/kernel/timer.c b/kernel/timer.c
index a61c09374eba..8c5e7b908c68 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1407,13 +1407,6 @@ SYSCALL_DEFINE1(alarm, unsigned int, seconds)
1407 1407
1408#endif 1408#endif
1409 1409
1410#ifndef __alpha__
1411
1412/*
1413 * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this
1414 * should be moved into arch/i386 instead?
1415 */
1416
1417/** 1410/**
1418 * sys_getpid - return the thread group id of the current process 1411 * sys_getpid - return the thread group id of the current process
1419 * 1412 *
@@ -1469,8 +1462,6 @@ SYSCALL_DEFINE0(getegid)
1469 return from_kgid_munged(current_user_ns(), current_egid()); 1462 return from_kgid_munged(current_user_ns(), current_egid());
1470} 1463}
1471 1464
1472#endif
1473
1474static void process_timeout(unsigned long __data) 1465static void process_timeout(unsigned long __data)
1475{ 1466{
1476 wake_up_process((struct task_struct *)__data); 1467 wake_up_process((struct task_struct *)__data);
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index fee3752ae8f6..8a6d2ee2086c 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -281,7 +281,7 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip)
281 281
282 head = this_cpu_ptr(event_function.perf_events); 282 head = this_cpu_ptr(event_function.perf_events);
283 perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0, 283 perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0,
284 1, &regs, head); 284 1, &regs, head, NULL);
285 285
286#undef ENTRY_SIZE 286#undef ENTRY_SIZE
287} 287}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index b31d3d5699fe..1a2117043bb1 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1002,7 +1002,8 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
1002 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 1002 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1003 1003
1004 head = this_cpu_ptr(call->perf_events); 1004 head = this_cpu_ptr(call->perf_events);
1005 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); 1005 perf_trace_buf_submit(entry, size, rctx,
1006 entry->ip, 1, regs, head, NULL);
1006} 1007}
1007 1008
1008/* Kretprobe profile handler */ 1009/* Kretprobe profile handler */
@@ -1033,7 +1034,8 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1033 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 1034 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1034 1035
1035 head = this_cpu_ptr(call->perf_events); 1036 head = this_cpu_ptr(call->perf_events);
1036 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head); 1037 perf_trace_buf_submit(entry, size, rctx,
1038 entry->ret_ip, 1, regs, head, NULL);
1037} 1039}
1038#endif /* CONFIG_PERF_EVENTS */ 1040#endif /* CONFIG_PERF_EVENTS */
1039 1041
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 96fc73369099..6b245f64c8dd 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -506,6 +506,8 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
506 int size; 506 int size;
507 507
508 syscall_nr = syscall_get_nr(current, regs); 508 syscall_nr = syscall_get_nr(current, regs);
509 if (syscall_nr < 0)
510 return;
509 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls)) 511 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
510 return; 512 return;
511 513
@@ -532,7 +534,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
532 (unsigned long *)&rec->args); 534 (unsigned long *)&rec->args);
533 535
534 head = this_cpu_ptr(sys_data->enter_event->perf_events); 536 head = this_cpu_ptr(sys_data->enter_event->perf_events);
535 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); 537 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
536} 538}
537 539
538int perf_sysenter_enable(struct ftrace_event_call *call) 540int perf_sysenter_enable(struct ftrace_event_call *call)
@@ -580,6 +582,8 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
580 int size; 582 int size;
581 583
582 syscall_nr = syscall_get_nr(current, regs); 584 syscall_nr = syscall_get_nr(current, regs);
585 if (syscall_nr < 0)
586 return;
583 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls)) 587 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
584 return; 588 return;
585 589
@@ -608,7 +612,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
608 rec->ret = syscall_get_return_value(current, regs); 612 rec->ret = syscall_get_return_value(current, regs);
609 613
610 head = this_cpu_ptr(sys_data->exit_event->perf_events); 614 head = this_cpu_ptr(sys_data->exit_event->perf_events);
611 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); 615 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
612} 616}
613 617
614int perf_sysexit_enable(struct ftrace_event_call *call) 618int perf_sysexit_enable(struct ftrace_event_call *call)
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 2b36ac68549e..03003cd7dd96 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -670,7 +670,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
670 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); 670 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
671 671
672 head = this_cpu_ptr(call->perf_events); 672 head = this_cpu_ptr(call->perf_events);
673 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); 673 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head, NULL);
674 674
675 out: 675 out:
676 preempt_enable(); 676 preempt_enable();
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 69add8a9da68..4b1dfba70f7c 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -575,7 +575,7 @@ out:
575/* 575/*
576 * Create/destroy watchdog threads as CPUs come and go: 576 * Create/destroy watchdog threads as CPUs come and go:
577 */ 577 */
578static int 578static int __cpuinit
579cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) 579cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
580{ 580{
581 int hotcpu = (unsigned long)hcpu; 581 int hotcpu = (unsigned long)hcpu;
@@ -610,27 +610,10 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
610 return NOTIFY_OK; 610 return NOTIFY_OK;
611} 611}
612 612
613static struct notifier_block cpu_nfb = { 613static struct notifier_block __cpuinitdata cpu_nfb = {
614 .notifier_call = cpu_callback 614 .notifier_call = cpu_callback
615}; 615};
616 616
617#ifdef CONFIG_SUSPEND
618/*
619 * On exit from suspend we force an offline->online transition on the boot CPU
620 * so that the PMU state that was lost while in suspended state gets set up
621 * properly for the boot CPU. This information is required for restarting the
622 * NMI watchdog.
623 */
624void lockup_detector_bootcpu_resume(void)
625{
626 void *cpu = (void *)(long)smp_processor_id();
627
628 cpu_callback(&cpu_nfb, CPU_DEAD_FROZEN, cpu);
629 cpu_callback(&cpu_nfb, CPU_UP_PREPARE_FROZEN, cpu);
630 cpu_callback(&cpu_nfb, CPU_ONLINE_FROZEN, cpu);
631}
632#endif
633
634void __init lockup_detector_init(void) 617void __init lockup_detector_init(void)
635{ 618{
636 void *cpu = (void *)(long)smp_processor_id(); 619 void *cpu = (void *)(long)smp_processor_id();