aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit_tree.c19
-rw-r--r--kernel/debug/kdb/kdb_debugger.c4
-rw-r--r--kernel/debug/kdb/kdb_io.c11
-rw-r--r--kernel/debug/kdb/kdb_main.c15
-rw-r--r--kernel/events/callchain.c9
-rw-r--r--kernel/events/core.c94
-rw-r--r--kernel/events/hw_breakpoint.c11
-rw-r--r--kernel/events/internal.h3
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/futex.c17
-rw-r--r--kernel/irq/manage.c15
-rw-r--r--kernel/pid_namespace.c6
-rw-r--r--kernel/power/suspend.c3
-rw-r--r--kernel/printk.c2
-rw-r--r--kernel/sched/core.c112
-rw-r--r--kernel/sched/cpupri.c10
-rw-r--r--kernel/sched/fair.c77
-rw-r--r--kernel/sched/rt.c14
-rw-r--r--kernel/sched/sched.h9
-rw-r--r--kernel/sched/stop_task.c22
-rw-r--r--kernel/task_work.c1
-rw-r--r--kernel/time/jiffies.c2
-rw-r--r--kernel/time/ntp.c2
-rw-r--r--kernel/time/tick-sched.c1
-rw-r--r--kernel/time/timekeeping.c455
-rw-r--r--kernel/timer.c9
-rw-r--r--kernel/trace/trace_event_perf.c2
-rw-r--r--kernel/trace/trace_kprobe.c6
-rw-r--r--kernel/trace/trace_syscalls.c8
-rw-r--r--kernel/trace/trace_uprobe.c2
-rw-r--r--kernel/watchdog.c21
-rw-r--r--kernel/workqueue.c147
32 files changed, 647 insertions, 466 deletions
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 3a5ca582ba1e..ed206fd88cca 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -250,7 +250,6 @@ static void untag_chunk(struct node *p)
250 spin_unlock(&hash_lock); 250 spin_unlock(&hash_lock);
251 spin_unlock(&entry->lock); 251 spin_unlock(&entry->lock);
252 fsnotify_destroy_mark(entry); 252 fsnotify_destroy_mark(entry);
253 fsnotify_put_mark(entry);
254 goto out; 253 goto out;
255 } 254 }
256 255
@@ -259,7 +258,7 @@ static void untag_chunk(struct node *p)
259 258
260 fsnotify_duplicate_mark(&new->mark, entry); 259 fsnotify_duplicate_mark(&new->mark, entry);
261 if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, NULL, 1)) { 260 if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, NULL, 1)) {
262 free_chunk(new); 261 fsnotify_put_mark(&new->mark);
263 goto Fallback; 262 goto Fallback;
264 } 263 }
265 264
@@ -293,7 +292,7 @@ static void untag_chunk(struct node *p)
293 spin_unlock(&hash_lock); 292 spin_unlock(&hash_lock);
294 spin_unlock(&entry->lock); 293 spin_unlock(&entry->lock);
295 fsnotify_destroy_mark(entry); 294 fsnotify_destroy_mark(entry);
296 fsnotify_put_mark(entry); 295 fsnotify_put_mark(&new->mark); /* drop initial reference */
297 goto out; 296 goto out;
298 297
299Fallback: 298Fallback:
@@ -322,7 +321,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
322 321
323 entry = &chunk->mark; 322 entry = &chunk->mark;
324 if (fsnotify_add_mark(entry, audit_tree_group, inode, NULL, 0)) { 323 if (fsnotify_add_mark(entry, audit_tree_group, inode, NULL, 0)) {
325 free_chunk(chunk); 324 fsnotify_put_mark(entry);
326 return -ENOSPC; 325 return -ENOSPC;
327 } 326 }
328 327
@@ -347,6 +346,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
347 insert_hash(chunk); 346 insert_hash(chunk);
348 spin_unlock(&hash_lock); 347 spin_unlock(&hash_lock);
349 spin_unlock(&entry->lock); 348 spin_unlock(&entry->lock);
349 fsnotify_put_mark(entry); /* drop initial reference */
350 return 0; 350 return 0;
351} 351}
352 352
@@ -396,7 +396,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
396 fsnotify_duplicate_mark(chunk_entry, old_entry); 396 fsnotify_duplicate_mark(chunk_entry, old_entry);
397 if (fsnotify_add_mark(chunk_entry, chunk_entry->group, chunk_entry->i.inode, NULL, 1)) { 397 if (fsnotify_add_mark(chunk_entry, chunk_entry->group, chunk_entry->i.inode, NULL, 1)) {
398 spin_unlock(&old_entry->lock); 398 spin_unlock(&old_entry->lock);
399 free_chunk(chunk); 399 fsnotify_put_mark(chunk_entry);
400 fsnotify_put_mark(old_entry); 400 fsnotify_put_mark(old_entry);
401 return -ENOSPC; 401 return -ENOSPC;
402 } 402 }
@@ -444,8 +444,8 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
444 spin_unlock(&chunk_entry->lock); 444 spin_unlock(&chunk_entry->lock);
445 spin_unlock(&old_entry->lock); 445 spin_unlock(&old_entry->lock);
446 fsnotify_destroy_mark(old_entry); 446 fsnotify_destroy_mark(old_entry);
447 fsnotify_put_mark(chunk_entry); /* drop initial reference */
447 fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */ 448 fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
448 fsnotify_put_mark(old_entry); /* and kill it */
449 return 0; 449 return 0;
450} 450}
451 451
@@ -916,7 +916,12 @@ static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify
916 struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark); 916 struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
917 917
918 evict_chunk(chunk); 918 evict_chunk(chunk);
919 fsnotify_put_mark(entry); 919
920 /*
921 * We are guaranteed to have at least one reference to the mark from
922 * either the inode or the caller of fsnotify_destroy_mark().
923 */
924 BUG_ON(atomic_read(&entry->refcnt) < 1);
920} 925}
921 926
922static bool audit_tree_send_event(struct fsnotify_group *group, struct inode *inode, 927static bool audit_tree_send_event(struct fsnotify_group *group, struct inode *inode,
diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c
index 8b68ce78ff17..be7b33b73d30 100644
--- a/kernel/debug/kdb/kdb_debugger.c
+++ b/kernel/debug/kdb/kdb_debugger.c
@@ -12,6 +12,7 @@
12#include <linux/kdb.h> 12#include <linux/kdb.h>
13#include <linux/kdebug.h> 13#include <linux/kdebug.h>
14#include <linux/export.h> 14#include <linux/export.h>
15#include <linux/hardirq.h>
15#include "kdb_private.h" 16#include "kdb_private.h"
16#include "../debug_core.h" 17#include "../debug_core.h"
17 18
@@ -52,6 +53,9 @@ int kdb_stub(struct kgdb_state *ks)
52 if (atomic_read(&kgdb_setting_breakpoint)) 53 if (atomic_read(&kgdb_setting_breakpoint))
53 reason = KDB_REASON_KEYBOARD; 54 reason = KDB_REASON_KEYBOARD;
54 55
56 if (in_nmi())
57 reason = KDB_REASON_NMI;
58
55 for (i = 0, bp = kdb_breakpoints; i < KDB_MAXBPT; i++, bp++) { 59 for (i = 0, bp = kdb_breakpoints; i < KDB_MAXBPT; i++, bp++) {
56 if ((bp->bp_enabled) && (bp->bp_addr == addr)) { 60 if ((bp->bp_enabled) && (bp->bp_addr == addr)) {
57 reason = KDB_REASON_BREAK; 61 reason = KDB_REASON_BREAK;
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index bb9520f0f6ff..0a69d2adc4f3 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -715,9 +715,6 @@ kdb_printit:
715 /* check for having reached the LINES number of printed lines */ 715 /* check for having reached the LINES number of printed lines */
716 if (kdb_nextline == linecount) { 716 if (kdb_nextline == linecount) {
717 char buf1[16] = ""; 717 char buf1[16] = "";
718#if defined(CONFIG_SMP)
719 char buf2[32];
720#endif
721 718
722 /* Watch out for recursion here. Any routine that calls 719 /* Watch out for recursion here. Any routine that calls
723 * kdb_printf will come back through here. And kdb_read 720 * kdb_printf will come back through here. And kdb_read
@@ -732,14 +729,6 @@ kdb_printit:
732 if (moreprompt == NULL) 729 if (moreprompt == NULL)
733 moreprompt = "more> "; 730 moreprompt = "more> ";
734 731
735#if defined(CONFIG_SMP)
736 if (strchr(moreprompt, '%')) {
737 sprintf(buf2, moreprompt, get_cpu());
738 put_cpu();
739 moreprompt = buf2;
740 }
741#endif
742
743 kdb_input_flush(); 732 kdb_input_flush();
744 c = console_drivers; 733 c = console_drivers;
745 734
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 1f91413edb87..31df1706b9a9 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -139,11 +139,10 @@ static const int __nkdb_err = sizeof(kdbmsgs) / sizeof(kdbmsg_t);
139static char *__env[] = { 139static char *__env[] = {
140#if defined(CONFIG_SMP) 140#if defined(CONFIG_SMP)
141 "PROMPT=[%d]kdb> ", 141 "PROMPT=[%d]kdb> ",
142 "MOREPROMPT=[%d]more> ",
143#else 142#else
144 "PROMPT=kdb> ", 143 "PROMPT=kdb> ",
145 "MOREPROMPT=more> ",
146#endif 144#endif
145 "MOREPROMPT=more> ",
147 "RADIX=16", 146 "RADIX=16",
148 "MDCOUNT=8", /* lines of md output */ 147 "MDCOUNT=8", /* lines of md output */
149 KDB_PLATFORM_ENV, 148 KDB_PLATFORM_ENV,
@@ -1236,18 +1235,6 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
1236 *cmdbuf = '\0'; 1235 *cmdbuf = '\0';
1237 *(cmd_hist[cmd_head]) = '\0'; 1236 *(cmd_hist[cmd_head]) = '\0';
1238 1237
1239 if (KDB_FLAG(ONLY_DO_DUMP)) {
1240 /* kdb is off but a catastrophic error requires a dump.
1241 * Take the dump and reboot.
1242 * Turn on logging so the kdb output appears in the log
1243 * buffer in the dump.
1244 */
1245 const char *setargs[] = { "set", "LOGGING", "1" };
1246 kdb_set(2, setargs);
1247 kdb_reboot(0, NULL);
1248 /*NOTREACHED*/
1249 }
1250
1251do_full_getstr: 1238do_full_getstr:
1252#if defined(CONFIG_SMP) 1239#if defined(CONFIG_SMP)
1253 snprintf(kdb_prompt_str, CMD_BUFLEN, kdbgetenv("PROMPT"), 1240 snprintf(kdb_prompt_str, CMD_BUFLEN, kdbgetenv("PROMPT"),
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index 6581a040f399..98d4597f43d6 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -153,7 +153,8 @@ put_callchain_entry(int rctx)
153 put_recursion_context(__get_cpu_var(callchain_recursion), rctx); 153 put_recursion_context(__get_cpu_var(callchain_recursion), rctx);
154} 154}
155 155
156struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) 156struct perf_callchain_entry *
157perf_callchain(struct perf_event *event, struct pt_regs *regs)
157{ 158{
158 int rctx; 159 int rctx;
159 struct perf_callchain_entry *entry; 160 struct perf_callchain_entry *entry;
@@ -178,6 +179,12 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
178 } 179 }
179 180
180 if (regs) { 181 if (regs) {
182 /*
183 * Disallow cross-task user callchains.
184 */
185 if (event->ctx->task && event->ctx->task != current)
186 goto exit_put;
187
181 perf_callchain_store(entry, PERF_CONTEXT_USER); 188 perf_callchain_store(entry, PERF_CONTEXT_USER);
182 perf_callchain_user(entry, regs); 189 perf_callchain_user(entry, regs);
183 } 190 }
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f1cf0edeb39a..7fee567153f0 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1253,7 +1253,7 @@ retry:
1253/* 1253/*
1254 * Cross CPU call to disable a performance event 1254 * Cross CPU call to disable a performance event
1255 */ 1255 */
1256static int __perf_event_disable(void *info) 1256int __perf_event_disable(void *info)
1257{ 1257{
1258 struct perf_event *event = info; 1258 struct perf_event *event = info;
1259 struct perf_event_context *ctx = event->ctx; 1259 struct perf_event_context *ctx = event->ctx;
@@ -2935,12 +2935,12 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
2935/* 2935/*
2936 * Called when the last reference to the file is gone. 2936 * Called when the last reference to the file is gone.
2937 */ 2937 */
2938static int perf_release(struct inode *inode, struct file *file) 2938static void put_event(struct perf_event *event)
2939{ 2939{
2940 struct perf_event *event = file->private_data;
2941 struct task_struct *owner; 2940 struct task_struct *owner;
2942 2941
2943 file->private_data = NULL; 2942 if (!atomic_long_dec_and_test(&event->refcount))
2943 return;
2944 2944
2945 rcu_read_lock(); 2945 rcu_read_lock();
2946 owner = ACCESS_ONCE(event->owner); 2946 owner = ACCESS_ONCE(event->owner);
@@ -2975,7 +2975,13 @@ static int perf_release(struct inode *inode, struct file *file)
2975 put_task_struct(owner); 2975 put_task_struct(owner);
2976 } 2976 }
2977 2977
2978 return perf_event_release_kernel(event); 2978 perf_event_release_kernel(event);
2979}
2980
2981static int perf_release(struct inode *inode, struct file *file)
2982{
2983 put_event(file->private_data);
2984 return 0;
2979} 2985}
2980 2986
2981u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) 2987u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
@@ -3227,7 +3233,7 @@ unlock:
3227 3233
3228static const struct file_operations perf_fops; 3234static const struct file_operations perf_fops;
3229 3235
3230static struct perf_event *perf_fget_light(int fd, int *fput_needed) 3236static struct file *perf_fget_light(int fd, int *fput_needed)
3231{ 3237{
3232 struct file *file; 3238 struct file *file;
3233 3239
@@ -3241,7 +3247,7 @@ static struct perf_event *perf_fget_light(int fd, int *fput_needed)
3241 return ERR_PTR(-EBADF); 3247 return ERR_PTR(-EBADF);
3242 } 3248 }
3243 3249
3244 return file->private_data; 3250 return file;
3245} 3251}
3246 3252
3247static int perf_event_set_output(struct perf_event *event, 3253static int perf_event_set_output(struct perf_event *event,
@@ -3273,19 +3279,21 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
3273 3279
3274 case PERF_EVENT_IOC_SET_OUTPUT: 3280 case PERF_EVENT_IOC_SET_OUTPUT:
3275 { 3281 {
3282 struct file *output_file = NULL;
3276 struct perf_event *output_event = NULL; 3283 struct perf_event *output_event = NULL;
3277 int fput_needed = 0; 3284 int fput_needed = 0;
3278 int ret; 3285 int ret;
3279 3286
3280 if (arg != -1) { 3287 if (arg != -1) {
3281 output_event = perf_fget_light(arg, &fput_needed); 3288 output_file = perf_fget_light(arg, &fput_needed);
3282 if (IS_ERR(output_event)) 3289 if (IS_ERR(output_file))
3283 return PTR_ERR(output_event); 3290 return PTR_ERR(output_file);
3291 output_event = output_file->private_data;
3284 } 3292 }
3285 3293
3286 ret = perf_event_set_output(event, output_event); 3294 ret = perf_event_set_output(event, output_event);
3287 if (output_event) 3295 if (output_event)
3288 fput_light(output_event->filp, fput_needed); 3296 fput_light(output_file, fput_needed);
3289 3297
3290 return ret; 3298 return ret;
3291 } 3299 }
@@ -4039,7 +4047,7 @@ void perf_prepare_sample(struct perf_event_header *header,
4039 if (sample_type & PERF_SAMPLE_CALLCHAIN) { 4047 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
4040 int size = 1; 4048 int size = 1;
4041 4049
4042 data->callchain = perf_callchain(regs); 4050 data->callchain = perf_callchain(event, regs);
4043 4051
4044 if (data->callchain) 4052 if (data->callchain)
4045 size += data->callchain->nr; 4053 size += data->callchain->nr;
@@ -5209,7 +5217,8 @@ static int perf_tp_event_match(struct perf_event *event,
5209} 5217}
5210 5218
5211void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, 5219void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
5212 struct pt_regs *regs, struct hlist_head *head, int rctx) 5220 struct pt_regs *regs, struct hlist_head *head, int rctx,
5221 struct task_struct *task)
5213{ 5222{
5214 struct perf_sample_data data; 5223 struct perf_sample_data data;
5215 struct perf_event *event; 5224 struct perf_event *event;
@@ -5228,6 +5237,31 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
5228 perf_swevent_event(event, count, &data, regs); 5237 perf_swevent_event(event, count, &data, regs);
5229 } 5238 }
5230 5239
5240 /*
5241 * If we got specified a target task, also iterate its context and
5242 * deliver this event there too.
5243 */
5244 if (task && task != current) {
5245 struct perf_event_context *ctx;
5246 struct trace_entry *entry = record;
5247
5248 rcu_read_lock();
5249 ctx = rcu_dereference(task->perf_event_ctxp[perf_sw_context]);
5250 if (!ctx)
5251 goto unlock;
5252
5253 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
5254 if (event->attr.type != PERF_TYPE_TRACEPOINT)
5255 continue;
5256 if (event->attr.config != entry->type)
5257 continue;
5258 if (perf_tp_event_match(event, &data, regs))
5259 perf_swevent_event(event, count, &data, regs);
5260 }
5261unlock:
5262 rcu_read_unlock();
5263 }
5264
5231 perf_swevent_put_recursion_context(rctx); 5265 perf_swevent_put_recursion_context(rctx);
5232} 5266}
5233EXPORT_SYMBOL_GPL(perf_tp_event); 5267EXPORT_SYMBOL_GPL(perf_tp_event);
@@ -5924,6 +5958,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
5924 5958
5925 mutex_init(&event->mmap_mutex); 5959 mutex_init(&event->mmap_mutex);
5926 5960
5961 atomic_long_set(&event->refcount, 1);
5927 event->cpu = cpu; 5962 event->cpu = cpu;
5928 event->attr = *attr; 5963 event->attr = *attr;
5929 event->group_leader = group_leader; 5964 event->group_leader = group_leader;
@@ -6234,12 +6269,12 @@ SYSCALL_DEFINE5(perf_event_open,
6234 return event_fd; 6269 return event_fd;
6235 6270
6236 if (group_fd != -1) { 6271 if (group_fd != -1) {
6237 group_leader = perf_fget_light(group_fd, &fput_needed); 6272 group_file = perf_fget_light(group_fd, &fput_needed);
6238 if (IS_ERR(group_leader)) { 6273 if (IS_ERR(group_file)) {
6239 err = PTR_ERR(group_leader); 6274 err = PTR_ERR(group_file);
6240 goto err_fd; 6275 goto err_fd;
6241 } 6276 }
6242 group_file = group_leader->filp; 6277 group_leader = group_file->private_data;
6243 if (flags & PERF_FLAG_FD_OUTPUT) 6278 if (flags & PERF_FLAG_FD_OUTPUT)
6244 output_event = group_leader; 6279 output_event = group_leader;
6245 if (flags & PERF_FLAG_FD_NO_GROUP) 6280 if (flags & PERF_FLAG_FD_NO_GROUP)
@@ -6376,7 +6411,6 @@ SYSCALL_DEFINE5(perf_event_open,
6376 put_ctx(gctx); 6411 put_ctx(gctx);
6377 } 6412 }
6378 6413
6379 event->filp = event_file;
6380 WARN_ON_ONCE(ctx->parent_ctx); 6414 WARN_ON_ONCE(ctx->parent_ctx);
6381 mutex_lock(&ctx->mutex); 6415 mutex_lock(&ctx->mutex);
6382 6416
@@ -6470,7 +6504,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
6470 goto err_free; 6504 goto err_free;
6471 } 6505 }
6472 6506
6473 event->filp = NULL;
6474 WARN_ON_ONCE(ctx->parent_ctx); 6507 WARN_ON_ONCE(ctx->parent_ctx);
6475 mutex_lock(&ctx->mutex); 6508 mutex_lock(&ctx->mutex);
6476 perf_install_in_context(ctx, event, cpu); 6509 perf_install_in_context(ctx, event, cpu);
@@ -6552,7 +6585,7 @@ static void sync_child_event(struct perf_event *child_event,
6552 * Release the parent event, if this was the last 6585 * Release the parent event, if this was the last
6553 * reference to it. 6586 * reference to it.
6554 */ 6587 */
6555 fput(parent_event->filp); 6588 put_event(parent_event);
6556} 6589}
6557 6590
6558static void 6591static void
@@ -6628,9 +6661,8 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
6628 * 6661 *
6629 * __perf_event_exit_task() 6662 * __perf_event_exit_task()
6630 * sync_child_event() 6663 * sync_child_event()
6631 * fput(parent_event->filp) 6664 * put_event()
6632 * perf_release() 6665 * mutex_lock(&ctx->mutex)
6633 * mutex_lock(&ctx->mutex)
6634 * 6666 *
6635 * But since its the parent context it won't be the same instance. 6667 * But since its the parent context it won't be the same instance.
6636 */ 6668 */
@@ -6698,7 +6730,7 @@ static void perf_free_event(struct perf_event *event,
6698 list_del_init(&event->child_list); 6730 list_del_init(&event->child_list);
6699 mutex_unlock(&parent->child_mutex); 6731 mutex_unlock(&parent->child_mutex);
6700 6732
6701 fput(parent->filp); 6733 put_event(parent);
6702 6734
6703 perf_group_detach(event); 6735 perf_group_detach(event);
6704 list_del_event(event, ctx); 6736 list_del_event(event, ctx);
@@ -6778,6 +6810,12 @@ inherit_event(struct perf_event *parent_event,
6778 NULL, NULL); 6810 NULL, NULL);
6779 if (IS_ERR(child_event)) 6811 if (IS_ERR(child_event))
6780 return child_event; 6812 return child_event;
6813
6814 if (!atomic_long_inc_not_zero(&parent_event->refcount)) {
6815 free_event(child_event);
6816 return NULL;
6817 }
6818
6781 get_ctx(child_ctx); 6819 get_ctx(child_ctx);
6782 6820
6783 /* 6821 /*
@@ -6819,14 +6857,6 @@ inherit_event(struct perf_event *parent_event,
6819 raw_spin_unlock_irqrestore(&child_ctx->lock, flags); 6857 raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
6820 6858
6821 /* 6859 /*
6822 * Get a reference to the parent filp - we will fput it
6823 * when the child event exits. This is safe to do because
6824 * we are in the parent and we know that the filp still
6825 * exists and has a nonzero count:
6826 */
6827 atomic_long_inc(&parent_event->filp->f_count);
6828
6829 /*
6830 * Link this into the parent event's child list 6860 * Link this into the parent event's child list
6831 */ 6861 */
6832 WARN_ON_ONCE(parent_event->ctx->parent_ctx); 6862 WARN_ON_ONCE(parent_event->ctx->parent_ctx);
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index bb38c4d3ee12..9a7b487c6fe2 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -453,7 +453,16 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att
453 int old_type = bp->attr.bp_type; 453 int old_type = bp->attr.bp_type;
454 int err = 0; 454 int err = 0;
455 455
456 perf_event_disable(bp); 456 /*
457 * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it
458 * will not be possible to raise IPIs that invoke __perf_event_disable.
459 * So call the function directly after making sure we are targeting the
460 * current task.
461 */
462 if (irqs_disabled() && bp->ctx && bp->ctx->task == current)
463 __perf_event_disable(bp);
464 else
465 perf_event_disable(bp);
457 466
458 bp->attr.bp_addr = attr->bp_addr; 467 bp->attr.bp_addr = attr->bp_addr;
459 bp->attr.bp_type = attr->bp_type; 468 bp->attr.bp_type = attr->bp_type;
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index b0b107f90afc..a096c19f2c2a 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -101,7 +101,8 @@ __output_copy(struct perf_output_handle *handle,
101} 101}
102 102
103/* Callchain handling */ 103/* Callchain handling */
104extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); 104extern struct perf_callchain_entry *
105perf_callchain(struct perf_event *event, struct pt_regs *regs);
105extern int get_callchain_buffers(void); 106extern int get_callchain_buffers(void);
106extern void put_callchain_buffers(void); 107extern void put_callchain_buffers(void);
107 108
diff --git a/kernel/fork.c b/kernel/fork.c
index 3bd2280d79f6..2c8857e12855 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -455,8 +455,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
455 if (retval) 455 if (retval)
456 goto out; 456 goto out;
457 457
458 if (file && uprobe_mmap(tmp)) 458 if (file)
459 goto out; 459 uprobe_mmap(tmp);
460 } 460 }
461 /* a new mm has just been created */ 461 /* a new mm has just been created */
462 arch_dup_mmap(oldmm, mm); 462 arch_dup_mmap(oldmm, mm);
diff --git a/kernel/futex.c b/kernel/futex.c
index e2b0fb9a0b3b..3717e7b306e0 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2231,11 +2231,11 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2231 * @uaddr2: the pi futex we will take prior to returning to user-space 2231 * @uaddr2: the pi futex we will take prior to returning to user-space
2232 * 2232 *
2233 * The caller will wait on uaddr and will be requeued by futex_requeue() to 2233 * The caller will wait on uaddr and will be requeued by futex_requeue() to
2234 * uaddr2 which must be PI aware. Normal wakeup will wake on uaddr2 and 2234 * uaddr2 which must be PI aware and unique from uaddr. Normal wakeup will wake
2235 * complete the acquisition of the rt_mutex prior to returning to userspace. 2235 * on uaddr2 and complete the acquisition of the rt_mutex prior to returning to
2236 * This ensures the rt_mutex maintains an owner when it has waiters; without 2236 * userspace. This ensures the rt_mutex maintains an owner when it has waiters;
2237 * one, the pi logic wouldn't know which task to boost/deboost, if there was a 2237 * without one, the pi logic would not know which task to boost/deboost, if
2238 * need to. 2238 * there was a need to.
2239 * 2239 *
2240 * We call schedule in futex_wait_queue_me() when we enqueue and return there 2240 * We call schedule in futex_wait_queue_me() when we enqueue and return there
2241 * via the following: 2241 * via the following:
@@ -2272,6 +2272,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2272 struct futex_q q = futex_q_init; 2272 struct futex_q q = futex_q_init;
2273 int res, ret; 2273 int res, ret;
2274 2274
2275 if (uaddr == uaddr2)
2276 return -EINVAL;
2277
2275 if (!bitset) 2278 if (!bitset)
2276 return -EINVAL; 2279 return -EINVAL;
2277 2280
@@ -2343,7 +2346,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2343 * signal. futex_unlock_pi() will not destroy the lock_ptr nor 2346 * signal. futex_unlock_pi() will not destroy the lock_ptr nor
2344 * the pi_state. 2347 * the pi_state.
2345 */ 2348 */
2346 WARN_ON(!&q.pi_state); 2349 WARN_ON(!q.pi_state);
2347 pi_mutex = &q.pi_state->pi_mutex; 2350 pi_mutex = &q.pi_state->pi_mutex;
2348 ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1); 2351 ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
2349 debug_rt_mutex_free_waiter(&rt_waiter); 2352 debug_rt_mutex_free_waiter(&rt_waiter);
@@ -2370,7 +2373,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2370 * fault, unlock the rt_mutex and return the fault to userspace. 2373 * fault, unlock the rt_mutex and return the fault to userspace.
2371 */ 2374 */
2372 if (ret == -EFAULT) { 2375 if (ret == -EFAULT) {
2373 if (rt_mutex_owner(pi_mutex) == current) 2376 if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
2374 rt_mutex_unlock(pi_mutex); 2377 rt_mutex_unlock(pi_mutex);
2375 } else if (ret == -EINTR) { 2378 } else if (ret == -EINTR) {
2376 /* 2379 /*
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0a8e8f059627..4c69326aa773 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -944,6 +944,18 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
944 } 944 }
945 945
946 /* 946 /*
947 * Drivers are often written to work w/o knowledge about the
948 * underlying irq chip implementation, so a request for a
949 * threaded irq without a primary hard irq context handler
950 * requires the ONESHOT flag to be set. Some irq chips like
951 * MSI based interrupts are per se one shot safe. Check the
952 * chip flags, so we can avoid the unmask dance at the end of
953 * the threaded handler for those.
954 */
955 if (desc->irq_data.chip->flags & IRQCHIP_ONESHOT_SAFE)
956 new->flags &= ~IRQF_ONESHOT;
957
958 /*
947 * The following block of code has to be executed atomically 959 * The following block of code has to be executed atomically
948 */ 960 */
949 raw_spin_lock_irqsave(&desc->lock, flags); 961 raw_spin_lock_irqsave(&desc->lock, flags);
@@ -1017,7 +1029,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
1017 */ 1029 */
1018 new->thread_mask = 1 << ffz(thread_mask); 1030 new->thread_mask = 1 << ffz(thread_mask);
1019 1031
1020 } else if (new->handler == irq_default_primary_handler) { 1032 } else if (new->handler == irq_default_primary_handler &&
1033 !(desc->irq_data.chip->flags & IRQCHIP_ONESHOT_SAFE)) {
1021 /* 1034 /*
1022 * The interrupt was requested with handler = NULL, so 1035 * The interrupt was requested with handler = NULL, so
1023 * we use the default primary handler for it. But it 1036 * we use the default primary handler for it. But it
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index b3c7fd554250..6144bab8fd8e 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -232,15 +232,19 @@ static int pid_ns_ctl_handler(struct ctl_table *table, int write,
232 */ 232 */
233 233
234 tmp.data = &current->nsproxy->pid_ns->last_pid; 234 tmp.data = &current->nsproxy->pid_ns->last_pid;
235 return proc_dointvec(&tmp, write, buffer, lenp, ppos); 235 return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
236} 236}
237 237
238extern int pid_max;
239static int zero = 0;
238static struct ctl_table pid_ns_ctl_table[] = { 240static struct ctl_table pid_ns_ctl_table[] = {
239 { 241 {
240 .procname = "ns_last_pid", 242 .procname = "ns_last_pid",
241 .maxlen = sizeof(int), 243 .maxlen = sizeof(int),
242 .mode = 0666, /* permissions are checked in the handler */ 244 .mode = 0666, /* permissions are checked in the handler */
243 .proc_handler = pid_ns_ctl_handler, 245 .proc_handler = pid_ns_ctl_handler,
246 .extra1 = &zero,
247 .extra2 = &pid_max,
244 }, 248 },
245 { } 249 { }
246}; 250};
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 1da39ea248fd..c8b7446b27df 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -178,9 +178,6 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
178 arch_suspend_enable_irqs(); 178 arch_suspend_enable_irqs();
179 BUG_ON(irqs_disabled()); 179 BUG_ON(irqs_disabled());
180 180
181 /* Kick the lockup detector */
182 lockup_detector_bootcpu_resume();
183
184 Enable_cpus: 181 Enable_cpus:
185 enable_nonboot_cpus(); 182 enable_nonboot_cpus();
186 183
diff --git a/kernel/printk.c b/kernel/printk.c
index 6a76ab9d4476..66a2ea37b576 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1034,6 +1034,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
1034 struct log *msg = log_from_idx(idx); 1034 struct log *msg = log_from_idx(idx);
1035 1035
1036 len += msg_print_text(msg, prev, true, NULL, 0); 1036 len += msg_print_text(msg, prev, true, NULL, 0);
1037 prev = msg->flags;
1037 idx = log_next(idx); 1038 idx = log_next(idx);
1038 seq++; 1039 seq++;
1039 } 1040 }
@@ -1046,6 +1047,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
1046 struct log *msg = log_from_idx(idx); 1047 struct log *msg = log_from_idx(idx);
1047 1048
1048 len -= msg_print_text(msg, prev, true, NULL, 0); 1049 len -= msg_print_text(msg, prev, true, NULL, 0);
1050 prev = msg->flags;
1049 idx = log_next(idx); 1051 idx = log_next(idx);
1050 seq++; 1052 seq++;
1051 } 1053 }
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d325c4b2dcbb..649c9f876cb1 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3142,6 +3142,20 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
3142# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) 3142# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs)
3143#endif 3143#endif
3144 3144
3145static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total)
3146{
3147 u64 temp = (__force u64) rtime;
3148
3149 temp *= (__force u64) utime;
3150
3151 if (sizeof(cputime_t) == 4)
3152 temp = div_u64(temp, (__force u32) total);
3153 else
3154 temp = div64_u64(temp, (__force u64) total);
3155
3156 return (__force cputime_t) temp;
3157}
3158
3145void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) 3159void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
3146{ 3160{
3147 cputime_t rtime, utime = p->utime, total = utime + p->stime; 3161 cputime_t rtime, utime = p->utime, total = utime + p->stime;
@@ -3151,13 +3165,9 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
3151 */ 3165 */
3152 rtime = nsecs_to_cputime(p->se.sum_exec_runtime); 3166 rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
3153 3167
3154 if (total) { 3168 if (total)
3155 u64 temp = (__force u64) rtime; 3169 utime = scale_utime(utime, rtime, total);
3156 3170 else
3157 temp *= (__force u64) utime;
3158 do_div(temp, (__force u32) total);
3159 utime = (__force cputime_t) temp;
3160 } else
3161 utime = rtime; 3171 utime = rtime;
3162 3172
3163 /* 3173 /*
@@ -3184,13 +3194,9 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
3184 total = cputime.utime + cputime.stime; 3194 total = cputime.utime + cputime.stime;
3185 rtime = nsecs_to_cputime(cputime.sum_exec_runtime); 3195 rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
3186 3196
3187 if (total) { 3197 if (total)
3188 u64 temp = (__force u64) rtime; 3198 utime = scale_utime(cputime.utime, rtime, total);
3189 3199 else
3190 temp *= (__force u64) cputime.utime;
3191 do_div(temp, (__force u32) total);
3192 utime = (__force cputime_t) temp;
3193 } else
3194 utime = rtime; 3200 utime = rtime;
3195 3201
3196 sig->prev_utime = max(sig->prev_utime, utime); 3202 sig->prev_utime = max(sig->prev_utime, utime);
@@ -4340,9 +4346,7 @@ recheck:
4340 */ 4346 */
4341 if (unlikely(policy == p->policy && (!rt_policy(policy) || 4347 if (unlikely(policy == p->policy && (!rt_policy(policy) ||
4342 param->sched_priority == p->rt_priority))) { 4348 param->sched_priority == p->rt_priority))) {
4343 4349 task_rq_unlock(rq, p, &flags);
4344 __task_rq_unlock(rq);
4345 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
4346 return 0; 4350 return 0;
4347 } 4351 }
4348 4352
@@ -5300,27 +5304,17 @@ void idle_task_exit(void)
5300} 5304}
5301 5305
5302/* 5306/*
5303 * While a dead CPU has no uninterruptible tasks queued at this point, 5307 * Since this CPU is going 'away' for a while, fold any nr_active delta
5304 * it might still have a nonzero ->nr_uninterruptible counter, because 5308 * we might have. Assumes we're called after migrate_tasks() so that the
5305 * for performance reasons the counter is not stricly tracking tasks to 5309 * nr_active count is stable.
5306 * their home CPUs. So we just add the counter to another CPU's counter, 5310 *
5307 * to keep the global sum constant after CPU-down: 5311 * Also see the comment "Global load-average calculations".
5308 */
5309static void migrate_nr_uninterruptible(struct rq *rq_src)
5310{
5311 struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask));
5312
5313 rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible;
5314 rq_src->nr_uninterruptible = 0;
5315}
5316
5317/*
5318 * remove the tasks which were accounted by rq from calc_load_tasks.
5319 */ 5312 */
5320static void calc_global_load_remove(struct rq *rq) 5313static void calc_load_migrate(struct rq *rq)
5321{ 5314{
5322 atomic_long_sub(rq->calc_load_active, &calc_load_tasks); 5315 long delta = calc_load_fold_active(rq);
5323 rq->calc_load_active = 0; 5316 if (delta)
5317 atomic_long_add(delta, &calc_load_tasks);
5324} 5318}
5325 5319
5326/* 5320/*
@@ -5348,9 +5342,6 @@ static void migrate_tasks(unsigned int dead_cpu)
5348 */ 5342 */
5349 rq->stop = NULL; 5343 rq->stop = NULL;
5350 5344
5351 /* Ensure any throttled groups are reachable by pick_next_task */
5352 unthrottle_offline_cfs_rqs(rq);
5353
5354 for ( ; ; ) { 5345 for ( ; ; ) {
5355 /* 5346 /*
5356 * There's this thread running, bail when that's the only 5347 * There's this thread running, bail when that's the only
@@ -5614,8 +5605,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
5614 BUG_ON(rq->nr_running != 1); /* the migration thread */ 5605 BUG_ON(rq->nr_running != 1); /* the migration thread */
5615 raw_spin_unlock_irqrestore(&rq->lock, flags); 5606 raw_spin_unlock_irqrestore(&rq->lock, flags);
5616 5607
5617 migrate_nr_uninterruptible(rq); 5608 calc_load_migrate(rq);
5618 calc_global_load_remove(rq);
5619 break; 5609 break;
5620#endif 5610#endif
5621 } 5611 }
@@ -6024,11 +6014,6 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu)
6024 * SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this 6014 * SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this
6025 * allows us to avoid some pointer chasing select_idle_sibling(). 6015 * allows us to avoid some pointer chasing select_idle_sibling().
6026 * 6016 *
6027 * Iterate domains and sched_groups downward, assigning CPUs to be
6028 * select_idle_sibling() hw buddy. Cross-wiring hw makes bouncing
6029 * due to random perturbation self canceling, ie sw buddies pull
6030 * their counterpart to their CPU's hw counterpart.
6031 *
6032 * Also keep a unique ID per domain (we use the first cpu number in 6017 * Also keep a unique ID per domain (we use the first cpu number in
6033 * the cpumask of the domain), this allows us to quickly tell if 6018 * the cpumask of the domain), this allows us to quickly tell if
6034 * two cpus are in the same cache domain, see cpus_share_cache(). 6019 * two cpus are in the same cache domain, see cpus_share_cache().
@@ -6042,40 +6027,8 @@ static void update_top_cache_domain(int cpu)
6042 int id = cpu; 6027 int id = cpu;
6043 6028
6044 sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES); 6029 sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
6045 if (sd) { 6030 if (sd)
6046 struct sched_domain *tmp = sd;
6047 struct sched_group *sg, *prev;
6048 bool right;
6049
6050 /*
6051 * Traverse to first CPU in group, and count hops
6052 * to cpu from there, switching direction on each
6053 * hop, never ever pointing the last CPU rightward.
6054 */
6055 do {
6056 id = cpumask_first(sched_domain_span(tmp));
6057 prev = sg = tmp->groups;
6058 right = 1;
6059
6060 while (cpumask_first(sched_group_cpus(sg)) != id)
6061 sg = sg->next;
6062
6063 while (!cpumask_test_cpu(cpu, sched_group_cpus(sg))) {
6064 prev = sg;
6065 sg = sg->next;
6066 right = !right;
6067 }
6068
6069 /* A CPU went down, never point back to domain start. */
6070 if (right && cpumask_first(sched_group_cpus(sg->next)) == id)
6071 right = false;
6072
6073 sg = right ? sg->next : prev;
6074 tmp->idle_buddy = cpumask_first(sched_group_cpus(sg));
6075 } while ((tmp = tmp->child));
6076
6077 id = cpumask_first(sched_domain_span(sd)); 6031 id = cpumask_first(sched_domain_span(sd));
6078 }
6079 6032
6080 rcu_assign_pointer(per_cpu(sd_llc, cpu), sd); 6033 rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
6081 per_cpu(sd_llc_id, cpu) = id; 6034 per_cpu(sd_llc_id, cpu) = id;
@@ -7248,6 +7201,7 @@ int in_sched_functions(unsigned long addr)
7248 7201
7249#ifdef CONFIG_CGROUP_SCHED 7202#ifdef CONFIG_CGROUP_SCHED
7250struct task_group root_task_group; 7203struct task_group root_task_group;
7204LIST_HEAD(task_groups);
7251#endif 7205#endif
7252 7206
7253DECLARE_PER_CPU(cpumask_var_t, load_balance_tmpmask); 7207DECLARE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index d72586fdf660..23aa789c53ee 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -65,8 +65,8 @@ static int convert_prio(int prio)
65int cpupri_find(struct cpupri *cp, struct task_struct *p, 65int cpupri_find(struct cpupri *cp, struct task_struct *p,
66 struct cpumask *lowest_mask) 66 struct cpumask *lowest_mask)
67{ 67{
68 int idx = 0; 68 int idx = 0;
69 int task_pri = convert_prio(p->prio); 69 int task_pri = convert_prio(p->prio);
70 70
71 if (task_pri >= MAX_RT_PRIO) 71 if (task_pri >= MAX_RT_PRIO)
72 return 0; 72 return 0;
@@ -137,9 +137,9 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
137 */ 137 */
138void cpupri_set(struct cpupri *cp, int cpu, int newpri) 138void cpupri_set(struct cpupri *cp, int cpu, int newpri)
139{ 139{
140 int *currpri = &cp->cpu_to_pri[cpu]; 140 int *currpri = &cp->cpu_to_pri[cpu];
141 int oldpri = *currpri; 141 int oldpri = *currpri;
142 int do_mb = 0; 142 int do_mb = 0;
143 143
144 newpri = convert_prio(newpri); 144 newpri = convert_prio(newpri);
145 145
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 22321db64952..96e2b18b6283 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2052,7 +2052,7 @@ static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
2052 hrtimer_cancel(&cfs_b->slack_timer); 2052 hrtimer_cancel(&cfs_b->slack_timer);
2053} 2053}
2054 2054
2055void unthrottle_offline_cfs_rqs(struct rq *rq) 2055static void unthrottle_offline_cfs_rqs(struct rq *rq)
2056{ 2056{
2057 struct cfs_rq *cfs_rq; 2057 struct cfs_rq *cfs_rq;
2058 2058
@@ -2106,7 +2106,7 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
2106 return NULL; 2106 return NULL;
2107} 2107}
2108static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} 2108static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
2109void unthrottle_offline_cfs_rqs(struct rq *rq) {} 2109static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {}
2110 2110
2111#endif /* CONFIG_CFS_BANDWIDTH */ 2111#endif /* CONFIG_CFS_BANDWIDTH */
2112 2112
@@ -2637,6 +2637,8 @@ static int select_idle_sibling(struct task_struct *p, int target)
2637 int cpu = smp_processor_id(); 2637 int cpu = smp_processor_id();
2638 int prev_cpu = task_cpu(p); 2638 int prev_cpu = task_cpu(p);
2639 struct sched_domain *sd; 2639 struct sched_domain *sd;
2640 struct sched_group *sg;
2641 int i;
2640 2642
2641 /* 2643 /*
2642 * If the task is going to be woken-up on this cpu and if it is 2644 * If the task is going to be woken-up on this cpu and if it is
@@ -2653,17 +2655,29 @@ static int select_idle_sibling(struct task_struct *p, int target)
2653 return prev_cpu; 2655 return prev_cpu;
2654 2656
2655 /* 2657 /*
2656 * Otherwise, check assigned siblings to find an elegible idle cpu. 2658 * Otherwise, iterate the domains and find an elegible idle cpu.
2657 */ 2659 */
2658 sd = rcu_dereference(per_cpu(sd_llc, target)); 2660 sd = rcu_dereference(per_cpu(sd_llc, target));
2659
2660 for_each_lower_domain(sd) { 2661 for_each_lower_domain(sd) {
2661 if (!cpumask_test_cpu(sd->idle_buddy, tsk_cpus_allowed(p))) 2662 sg = sd->groups;
2662 continue; 2663 do {
2663 if (idle_cpu(sd->idle_buddy)) 2664 if (!cpumask_intersects(sched_group_cpus(sg),
2664 return sd->idle_buddy; 2665 tsk_cpus_allowed(p)))
2665 } 2666 goto next;
2666 2667
2668 for_each_cpu(i, sched_group_cpus(sg)) {
2669 if (!idle_cpu(i))
2670 goto next;
2671 }
2672
2673 target = cpumask_first_and(sched_group_cpus(sg),
2674 tsk_cpus_allowed(p));
2675 goto done;
2676next:
2677 sg = sg->next;
2678 } while (sg != sd->groups);
2679 }
2680done:
2667 return target; 2681 return target;
2668} 2682}
2669 2683
@@ -3069,6 +3083,9 @@ struct lb_env {
3069 int new_dst_cpu; 3083 int new_dst_cpu;
3070 enum cpu_idle_type idle; 3084 enum cpu_idle_type idle;
3071 long imbalance; 3085 long imbalance;
3086 /* The set of CPUs under consideration for load-balancing */
3087 struct cpumask *cpus;
3088
3072 unsigned int flags; 3089 unsigned int flags;
3073 3090
3074 unsigned int loop; 3091 unsigned int loop;
@@ -3384,6 +3401,14 @@ static int tg_load_down(struct task_group *tg, void *data)
3384 3401
3385static void update_h_load(long cpu) 3402static void update_h_load(long cpu)
3386{ 3403{
3404 struct rq *rq = cpu_rq(cpu);
3405 unsigned long now = jiffies;
3406
3407 if (rq->h_load_throttle == now)
3408 return;
3409
3410 rq->h_load_throttle = now;
3411
3387 rcu_read_lock(); 3412 rcu_read_lock();
3388 walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); 3413 walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
3389 rcu_read_unlock(); 3414 rcu_read_unlock();
@@ -3647,14 +3672,12 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
3647 * @group: sched_group whose statistics are to be updated. 3672 * @group: sched_group whose statistics are to be updated.
3648 * @load_idx: Load index of sched_domain of this_cpu for load calc. 3673 * @load_idx: Load index of sched_domain of this_cpu for load calc.
3649 * @local_group: Does group contain this_cpu. 3674 * @local_group: Does group contain this_cpu.
3650 * @cpus: Set of cpus considered for load balancing.
3651 * @balance: Should we balance. 3675 * @balance: Should we balance.
3652 * @sgs: variable to hold the statistics for this group. 3676 * @sgs: variable to hold the statistics for this group.
3653 */ 3677 */
3654static inline void update_sg_lb_stats(struct lb_env *env, 3678static inline void update_sg_lb_stats(struct lb_env *env,
3655 struct sched_group *group, int load_idx, 3679 struct sched_group *group, int load_idx,
3656 int local_group, const struct cpumask *cpus, 3680 int local_group, int *balance, struct sg_lb_stats *sgs)
3657 int *balance, struct sg_lb_stats *sgs)
3658{ 3681{
3659 unsigned long nr_running, max_nr_running, min_nr_running; 3682 unsigned long nr_running, max_nr_running, min_nr_running;
3660 unsigned long load, max_cpu_load, min_cpu_load; 3683 unsigned long load, max_cpu_load, min_cpu_load;
@@ -3671,7 +3694,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
3671 max_nr_running = 0; 3694 max_nr_running = 0;
3672 min_nr_running = ~0UL; 3695 min_nr_running = ~0UL;
3673 3696
3674 for_each_cpu_and(i, sched_group_cpus(group), cpus) { 3697 for_each_cpu_and(i, sched_group_cpus(group), env->cpus) {
3675 struct rq *rq = cpu_rq(i); 3698 struct rq *rq = cpu_rq(i);
3676 3699
3677 nr_running = rq->nr_running; 3700 nr_running = rq->nr_running;
@@ -3795,13 +3818,11 @@ static bool update_sd_pick_busiest(struct lb_env *env,
3795/** 3818/**
3796 * update_sd_lb_stats - Update sched_domain's statistics for load balancing. 3819 * update_sd_lb_stats - Update sched_domain's statistics for load balancing.
3797 * @env: The load balancing environment. 3820 * @env: The load balancing environment.
3798 * @cpus: Set of cpus considered for load balancing.
3799 * @balance: Should we balance. 3821 * @balance: Should we balance.
3800 * @sds: variable to hold the statistics for this sched_domain. 3822 * @sds: variable to hold the statistics for this sched_domain.
3801 */ 3823 */
3802static inline void update_sd_lb_stats(struct lb_env *env, 3824static inline void update_sd_lb_stats(struct lb_env *env,
3803 const struct cpumask *cpus, 3825 int *balance, struct sd_lb_stats *sds)
3804 int *balance, struct sd_lb_stats *sds)
3805{ 3826{
3806 struct sched_domain *child = env->sd->child; 3827 struct sched_domain *child = env->sd->child;
3807 struct sched_group *sg = env->sd->groups; 3828 struct sched_group *sg = env->sd->groups;
@@ -3818,8 +3839,7 @@ static inline void update_sd_lb_stats(struct lb_env *env,
3818 3839
3819 local_group = cpumask_test_cpu(env->dst_cpu, sched_group_cpus(sg)); 3840 local_group = cpumask_test_cpu(env->dst_cpu, sched_group_cpus(sg));
3820 memset(&sgs, 0, sizeof(sgs)); 3841 memset(&sgs, 0, sizeof(sgs));
3821 update_sg_lb_stats(env, sg, load_idx, local_group, 3842 update_sg_lb_stats(env, sg, load_idx, local_group, balance, &sgs);
3822 cpus, balance, &sgs);
3823 3843
3824 if (local_group && !(*balance)) 3844 if (local_group && !(*balance))
3825 return; 3845 return;
@@ -4055,7 +4075,6 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
4055 * to restore balance. 4075 * to restore balance.
4056 * 4076 *
4057 * @env: The load balancing environment. 4077 * @env: The load balancing environment.
4058 * @cpus: The set of CPUs under consideration for load-balancing.
4059 * @balance: Pointer to a variable indicating if this_cpu 4078 * @balance: Pointer to a variable indicating if this_cpu
4060 * is the appropriate cpu to perform load balancing at this_level. 4079 * is the appropriate cpu to perform load balancing at this_level.
4061 * 4080 *
@@ -4065,7 +4084,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
4065 * put to idle by rebalancing its tasks onto our group. 4084 * put to idle by rebalancing its tasks onto our group.
4066 */ 4085 */
4067static struct sched_group * 4086static struct sched_group *
4068find_busiest_group(struct lb_env *env, const struct cpumask *cpus, int *balance) 4087find_busiest_group(struct lb_env *env, int *balance)
4069{ 4088{
4070 struct sd_lb_stats sds; 4089 struct sd_lb_stats sds;
4071 4090
@@ -4075,7 +4094,7 @@ find_busiest_group(struct lb_env *env, const struct cpumask *cpus, int *balance)
4075 * Compute the various statistics relavent for load balancing at 4094 * Compute the various statistics relavent for load balancing at
4076 * this level. 4095 * this level.
4077 */ 4096 */
4078 update_sd_lb_stats(env, cpus, balance, &sds); 4097 update_sd_lb_stats(env, balance, &sds);
4079 4098
4080 /* 4099 /*
4081 * this_cpu is not the appropriate cpu to perform load balancing at 4100 * this_cpu is not the appropriate cpu to perform load balancing at
@@ -4155,8 +4174,7 @@ ret:
4155 * find_busiest_queue - find the busiest runqueue among the cpus in group. 4174 * find_busiest_queue - find the busiest runqueue among the cpus in group.
4156 */ 4175 */
4157static struct rq *find_busiest_queue(struct lb_env *env, 4176static struct rq *find_busiest_queue(struct lb_env *env,
4158 struct sched_group *group, 4177 struct sched_group *group)
4159 const struct cpumask *cpus)
4160{ 4178{
4161 struct rq *busiest = NULL, *rq; 4179 struct rq *busiest = NULL, *rq;
4162 unsigned long max_load = 0; 4180 unsigned long max_load = 0;
@@ -4171,7 +4189,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
4171 if (!capacity) 4189 if (!capacity)
4172 capacity = fix_small_capacity(env->sd, group); 4190 capacity = fix_small_capacity(env->sd, group);
4173 4191
4174 if (!cpumask_test_cpu(i, cpus)) 4192 if (!cpumask_test_cpu(i, env->cpus))
4175 continue; 4193 continue;
4176 4194
4177 rq = cpu_rq(i); 4195 rq = cpu_rq(i);
@@ -4252,6 +4270,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
4252 .dst_grpmask = sched_group_cpus(sd->groups), 4270 .dst_grpmask = sched_group_cpus(sd->groups),
4253 .idle = idle, 4271 .idle = idle,
4254 .loop_break = sched_nr_migrate_break, 4272 .loop_break = sched_nr_migrate_break,
4273 .cpus = cpus,
4255 }; 4274 };
4256 4275
4257 cpumask_copy(cpus, cpu_active_mask); 4276 cpumask_copy(cpus, cpu_active_mask);
@@ -4260,7 +4279,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
4260 schedstat_inc(sd, lb_count[idle]); 4279 schedstat_inc(sd, lb_count[idle]);
4261 4280
4262redo: 4281redo:
4263 group = find_busiest_group(&env, cpus, balance); 4282 group = find_busiest_group(&env, balance);
4264 4283
4265 if (*balance == 0) 4284 if (*balance == 0)
4266 goto out_balanced; 4285 goto out_balanced;
@@ -4270,7 +4289,7 @@ redo:
4270 goto out_balanced; 4289 goto out_balanced;
4271 } 4290 }
4272 4291
4273 busiest = find_busiest_queue(&env, group, cpus); 4292 busiest = find_busiest_queue(&env, group);
4274 if (!busiest) { 4293 if (!busiest) {
4275 schedstat_inc(sd, lb_nobusyq[idle]); 4294 schedstat_inc(sd, lb_nobusyq[idle]);
4276 goto out_balanced; 4295 goto out_balanced;
@@ -4294,11 +4313,10 @@ redo:
4294 env.src_rq = busiest; 4313 env.src_rq = busiest;
4295 env.loop_max = min(sysctl_sched_nr_migrate, busiest->nr_running); 4314 env.loop_max = min(sysctl_sched_nr_migrate, busiest->nr_running);
4296 4315
4316 update_h_load(env.src_cpu);
4297more_balance: 4317more_balance:
4298 local_irq_save(flags); 4318 local_irq_save(flags);
4299 double_rq_lock(this_rq, busiest); 4319 double_rq_lock(this_rq, busiest);
4300 if (!env.loop)
4301 update_h_load(env.src_cpu);
4302 4320
4303 /* 4321 /*
4304 * cur_ld_moved - load moved in current iteration 4322 * cur_ld_moved - load moved in current iteration
@@ -4950,6 +4968,9 @@ static void rq_online_fair(struct rq *rq)
4950static void rq_offline_fair(struct rq *rq) 4968static void rq_offline_fair(struct rq *rq)
4951{ 4969{
4952 update_sysctl(); 4970 update_sysctl();
4971
4972 /* Ensure any throttled groups are reachable by pick_next_task */
4973 unthrottle_offline_cfs_rqs(rq);
4953} 4974}
4954 4975
4955#endif /* CONFIG_SMP */ 4976#endif /* CONFIG_SMP */
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 573e1ca01102..e0b7ba9c040f 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -691,6 +691,7 @@ balanced:
691 * runtime - in which case borrowing doesn't make sense. 691 * runtime - in which case borrowing doesn't make sense.
692 */ 692 */
693 rt_rq->rt_runtime = RUNTIME_INF; 693 rt_rq->rt_runtime = RUNTIME_INF;
694 rt_rq->rt_throttled = 0;
694 raw_spin_unlock(&rt_rq->rt_runtime_lock); 695 raw_spin_unlock(&rt_rq->rt_runtime_lock);
695 raw_spin_unlock(&rt_b->rt_runtime_lock); 696 raw_spin_unlock(&rt_b->rt_runtime_lock);
696 } 697 }
@@ -788,6 +789,19 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
788 const struct cpumask *span; 789 const struct cpumask *span;
789 790
790 span = sched_rt_period_mask(); 791 span = sched_rt_period_mask();
792#ifdef CONFIG_RT_GROUP_SCHED
793 /*
794 * FIXME: isolated CPUs should really leave the root task group,
795 * whether they are isolcpus or were isolated via cpusets, lest
796 * the timer run on a CPU which does not service all runqueues,
797 * potentially leaving other CPUs indefinitely throttled. If
798 * isolation is really required, the user will turn the throttle
799 * off to kill the perturbations it causes anyway. Meanwhile,
800 * this maintains functionality for boot and/or troubleshooting.
801 */
802 if (rt_b == &root_task_group.rt_bandwidth)
803 span = cpu_online_mask;
804#endif
791 for_each_cpu(i, span) { 805 for_each_cpu(i, span) {
792 int enqueue = 0; 806 int enqueue = 0;
793 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); 807 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c35a1a7dd4d6..0848fa36c383 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -80,7 +80,7 @@ extern struct mutex sched_domains_mutex;
80struct cfs_rq; 80struct cfs_rq;
81struct rt_rq; 81struct rt_rq;
82 82
83static LIST_HEAD(task_groups); 83extern struct list_head task_groups;
84 84
85struct cfs_bandwidth { 85struct cfs_bandwidth {
86#ifdef CONFIG_CFS_BANDWIDTH 86#ifdef CONFIG_CFS_BANDWIDTH
@@ -374,7 +374,11 @@ struct rq {
374#ifdef CONFIG_FAIR_GROUP_SCHED 374#ifdef CONFIG_FAIR_GROUP_SCHED
375 /* list of leaf cfs_rq on this cpu: */ 375 /* list of leaf cfs_rq on this cpu: */
376 struct list_head leaf_cfs_rq_list; 376 struct list_head leaf_cfs_rq_list;
377#endif 377#ifdef CONFIG_SMP
378 unsigned long h_load_throttle;
379#endif /* CONFIG_SMP */
380#endif /* CONFIG_FAIR_GROUP_SCHED */
381
378#ifdef CONFIG_RT_GROUP_SCHED 382#ifdef CONFIG_RT_GROUP_SCHED
379 struct list_head leaf_rt_rq_list; 383 struct list_head leaf_rt_rq_list;
380#endif 384#endif
@@ -1140,7 +1144,6 @@ extern void print_rt_stats(struct seq_file *m, int cpu);
1140 1144
1141extern void init_cfs_rq(struct cfs_rq *cfs_rq); 1145extern void init_cfs_rq(struct cfs_rq *cfs_rq);
1142extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq); 1146extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq);
1143extern void unthrottle_offline_cfs_rqs(struct rq *rq);
1144 1147
1145extern void account_cfs_bandwidth_used(int enabled, int was_enabled); 1148extern void account_cfs_bandwidth_used(int enabled, int was_enabled);
1146 1149
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index 7b386e86fd23..da5eb5bed84a 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -27,8 +27,10 @@ static struct task_struct *pick_next_task_stop(struct rq *rq)
27{ 27{
28 struct task_struct *stop = rq->stop; 28 struct task_struct *stop = rq->stop;
29 29
30 if (stop && stop->on_rq) 30 if (stop && stop->on_rq) {
31 stop->se.exec_start = rq->clock_task;
31 return stop; 32 return stop;
33 }
32 34
33 return NULL; 35 return NULL;
34} 36}
@@ -52,6 +54,21 @@ static void yield_task_stop(struct rq *rq)
52 54
53static void put_prev_task_stop(struct rq *rq, struct task_struct *prev) 55static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
54{ 56{
57 struct task_struct *curr = rq->curr;
58 u64 delta_exec;
59
60 delta_exec = rq->clock_task - curr->se.exec_start;
61 if (unlikely((s64)delta_exec < 0))
62 delta_exec = 0;
63
64 schedstat_set(curr->se.statistics.exec_max,
65 max(curr->se.statistics.exec_max, delta_exec));
66
67 curr->se.sum_exec_runtime += delta_exec;
68 account_group_exec_runtime(curr, delta_exec);
69
70 curr->se.exec_start = rq->clock_task;
71 cpuacct_charge(curr, delta_exec);
55} 72}
56 73
57static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued) 74static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued)
@@ -60,6 +77,9 @@ static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued)
60 77
61static void set_curr_task_stop(struct rq *rq) 78static void set_curr_task_stop(struct rq *rq)
62{ 79{
80 struct task_struct *stop = rq->stop;
81
82 stop->se.exec_start = rq->clock_task;
63} 83}
64 84
65static void switched_to_stop(struct rq *rq, struct task_struct *p) 85static void switched_to_stop(struct rq *rq, struct task_struct *p)
diff --git a/kernel/task_work.c b/kernel/task_work.c
index 91d4e1742a0c..d320d44903bd 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -75,6 +75,7 @@ void task_work_run(void)
75 p = q->next; 75 p = q->next;
76 q->func(q); 76 q->func(q);
77 q = p; 77 q = p;
78 cond_resched();
78 } 79 }
79 } 80 }
80} 81}
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index a470154e0408..46da0537c10b 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -37,7 +37,7 @@
37 * requested HZ value. It is also not recommended 37 * requested HZ value. It is also not recommended
38 * for "tick-less" systems. 38 * for "tick-less" systems.
39 */ 39 */
40#define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/ACTHZ)) 40#define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/SHIFTED_HZ))
41 41
42/* Since jiffies uses a simple NSEC_PER_JIFFY multiplier 42/* Since jiffies uses a simple NSEC_PER_JIFFY multiplier
43 * conversion, the .shift value could be zero. However 43 * conversion, the .shift value could be zero. However
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index b7fbadc5c973..24174b4d669b 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -28,7 +28,7 @@ DEFINE_SPINLOCK(ntp_lock);
28/* USER_HZ period (usecs): */ 28/* USER_HZ period (usecs): */
29unsigned long tick_usec = TICK_USEC; 29unsigned long tick_usec = TICK_USEC;
30 30
31/* ACTHZ period (nsecs): */ 31/* SHIFTED_HZ period (nsecs): */
32unsigned long tick_nsec; 32unsigned long tick_nsec;
33 33
34static u64 tick_length; 34static u64 tick_length;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 024540f97f74..3a9e5d5c1091 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -573,6 +573,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
573 tick_do_update_jiffies64(now); 573 tick_do_update_jiffies64(now);
574 update_cpu_load_nohz(); 574 update_cpu_load_nohz();
575 575
576 calc_load_exit_idle();
576 touch_softlockup_watchdog(); 577 touch_softlockup_watchdog();
577 /* 578 /*
578 * Cancel the scheduled timer and restore the tick 579 * Cancel the scheduled timer and restore the tick
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index f045cc50832d..d3b91e75cecd 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -65,14 +65,14 @@ struct timekeeper {
65 * used instead. 65 * used instead.
66 */ 66 */
67 struct timespec wall_to_monotonic; 67 struct timespec wall_to_monotonic;
68 /* time spent in suspend */
69 struct timespec total_sleep_time;
70 /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */
71 struct timespec raw_time;
72 /* Offset clock monotonic -> clock realtime */ 68 /* Offset clock monotonic -> clock realtime */
73 ktime_t offs_real; 69 ktime_t offs_real;
70 /* time spent in suspend */
71 struct timespec total_sleep_time;
74 /* Offset clock monotonic -> clock boottime */ 72 /* Offset clock monotonic -> clock boottime */
75 ktime_t offs_boot; 73 ktime_t offs_boot;
74 /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */
75 struct timespec raw_time;
76 /* Seqlock for all timekeeper values */ 76 /* Seqlock for all timekeeper values */
77 seqlock_t lock; 77 seqlock_t lock;
78}; 78};
@@ -108,13 +108,39 @@ static struct timespec tk_xtime(struct timekeeper *tk)
108static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts) 108static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts)
109{ 109{
110 tk->xtime_sec = ts->tv_sec; 110 tk->xtime_sec = ts->tv_sec;
111 tk->xtime_nsec = ts->tv_nsec << tk->shift; 111 tk->xtime_nsec = (u64)ts->tv_nsec << tk->shift;
112} 112}
113 113
114static void tk_xtime_add(struct timekeeper *tk, const struct timespec *ts) 114static void tk_xtime_add(struct timekeeper *tk, const struct timespec *ts)
115{ 115{
116 tk->xtime_sec += ts->tv_sec; 116 tk->xtime_sec += ts->tv_sec;
117 tk->xtime_nsec += ts->tv_nsec << tk->shift; 117 tk->xtime_nsec += (u64)ts->tv_nsec << tk->shift;
118 tk_normalize_xtime(tk);
119}
120
121static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec wtm)
122{
123 struct timespec tmp;
124
125 /*
126 * Verify consistency of: offset_real = -wall_to_monotonic
127 * before modifying anything
128 */
129 set_normalized_timespec(&tmp, -tk->wall_to_monotonic.tv_sec,
130 -tk->wall_to_monotonic.tv_nsec);
131 WARN_ON_ONCE(tk->offs_real.tv64 != timespec_to_ktime(tmp).tv64);
132 tk->wall_to_monotonic = wtm;
133 set_normalized_timespec(&tmp, -wtm.tv_sec, -wtm.tv_nsec);
134 tk->offs_real = timespec_to_ktime(tmp);
135}
136
137static void tk_set_sleep_time(struct timekeeper *tk, struct timespec t)
138{
139 /* Verify consistency before modifying */
140 WARN_ON_ONCE(tk->offs_boot.tv64 != timespec_to_ktime(tk->total_sleep_time).tv64);
141
142 tk->total_sleep_time = t;
143 tk->offs_boot = timespec_to_ktime(t);
118} 144}
119 145
120/** 146/**
@@ -217,14 +243,6 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
217 return nsec + arch_gettimeoffset(); 243 return nsec + arch_gettimeoffset();
218} 244}
219 245
220static void update_rt_offset(struct timekeeper *tk)
221{
222 struct timespec tmp, *wtm = &tk->wall_to_monotonic;
223
224 set_normalized_timespec(&tmp, -wtm->tv_sec, -wtm->tv_nsec);
225 tk->offs_real = timespec_to_ktime(tmp);
226}
227
228/* must hold write on timekeeper.lock */ 246/* must hold write on timekeeper.lock */
229static void timekeeping_update(struct timekeeper *tk, bool clearntp) 247static void timekeeping_update(struct timekeeper *tk, bool clearntp)
230{ 248{
@@ -234,12 +252,10 @@ static void timekeeping_update(struct timekeeper *tk, bool clearntp)
234 tk->ntp_error = 0; 252 tk->ntp_error = 0;
235 ntp_clear(); 253 ntp_clear();
236 } 254 }
237 update_rt_offset(tk);
238 xt = tk_xtime(tk); 255 xt = tk_xtime(tk);
239 update_vsyscall(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult); 256 update_vsyscall(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult);
240} 257}
241 258
242
243/** 259/**
244 * timekeeping_forward_now - update clock to the current time 260 * timekeeping_forward_now - update clock to the current time
245 * 261 *
@@ -261,7 +277,7 @@ static void timekeeping_forward_now(struct timekeeper *tk)
261 tk->xtime_nsec += cycle_delta * tk->mult; 277 tk->xtime_nsec += cycle_delta * tk->mult;
262 278
263 /* If arch requires, add in gettimeoffset() */ 279 /* If arch requires, add in gettimeoffset() */
264 tk->xtime_nsec += arch_gettimeoffset() << tk->shift; 280 tk->xtime_nsec += (u64)arch_gettimeoffset() << tk->shift;
265 281
266 tk_normalize_xtime(tk); 282 tk_normalize_xtime(tk);
267 283
@@ -277,38 +293,39 @@ static void timekeeping_forward_now(struct timekeeper *tk)
277 */ 293 */
278void getnstimeofday(struct timespec *ts) 294void getnstimeofday(struct timespec *ts)
279{ 295{
296 struct timekeeper *tk = &timekeeper;
280 unsigned long seq; 297 unsigned long seq;
281 s64 nsecs = 0; 298 s64 nsecs = 0;
282 299
283 WARN_ON(timekeeping_suspended); 300 WARN_ON(timekeeping_suspended);
284 301
285 do { 302 do {
286 seq = read_seqbegin(&timekeeper.lock); 303 seq = read_seqbegin(&tk->lock);
287 304
288 ts->tv_sec = timekeeper.xtime_sec; 305 ts->tv_sec = tk->xtime_sec;
289 ts->tv_nsec = timekeeping_get_ns(&timekeeper); 306 nsecs = timekeeping_get_ns(tk);
290 307
291 } while (read_seqretry(&timekeeper.lock, seq)); 308 } while (read_seqretry(&tk->lock, seq));
292 309
310 ts->tv_nsec = 0;
293 timespec_add_ns(ts, nsecs); 311 timespec_add_ns(ts, nsecs);
294} 312}
295EXPORT_SYMBOL(getnstimeofday); 313EXPORT_SYMBOL(getnstimeofday);
296 314
297ktime_t ktime_get(void) 315ktime_t ktime_get(void)
298{ 316{
317 struct timekeeper *tk = &timekeeper;
299 unsigned int seq; 318 unsigned int seq;
300 s64 secs, nsecs; 319 s64 secs, nsecs;
301 320
302 WARN_ON(timekeeping_suspended); 321 WARN_ON(timekeeping_suspended);
303 322
304 do { 323 do {
305 seq = read_seqbegin(&timekeeper.lock); 324 seq = read_seqbegin(&tk->lock);
306 secs = timekeeper.xtime_sec + 325 secs = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
307 timekeeper.wall_to_monotonic.tv_sec; 326 nsecs = timekeeping_get_ns(tk) + tk->wall_to_monotonic.tv_nsec;
308 nsecs = timekeeping_get_ns(&timekeeper) +
309 timekeeper.wall_to_monotonic.tv_nsec;
310 327
311 } while (read_seqretry(&timekeeper.lock, seq)); 328 } while (read_seqretry(&tk->lock, seq));
312 /* 329 /*
313 * Use ktime_set/ktime_add_ns to create a proper ktime on 330 * Use ktime_set/ktime_add_ns to create a proper ktime on
314 * 32-bit architectures without CONFIG_KTIME_SCALAR. 331 * 32-bit architectures without CONFIG_KTIME_SCALAR.
@@ -327,21 +344,24 @@ EXPORT_SYMBOL_GPL(ktime_get);
327 */ 344 */
328void ktime_get_ts(struct timespec *ts) 345void ktime_get_ts(struct timespec *ts)
329{ 346{
347 struct timekeeper *tk = &timekeeper;
330 struct timespec tomono; 348 struct timespec tomono;
349 s64 nsec;
331 unsigned int seq; 350 unsigned int seq;
332 351
333 WARN_ON(timekeeping_suspended); 352 WARN_ON(timekeeping_suspended);
334 353
335 do { 354 do {
336 seq = read_seqbegin(&timekeeper.lock); 355 seq = read_seqbegin(&tk->lock);
337 ts->tv_sec = timekeeper.xtime_sec; 356 ts->tv_sec = tk->xtime_sec;
338 ts->tv_nsec = timekeeping_get_ns(&timekeeper); 357 nsec = timekeeping_get_ns(tk);
339 tomono = timekeeper.wall_to_monotonic; 358 tomono = tk->wall_to_monotonic;
340 359
341 } while (read_seqretry(&timekeeper.lock, seq)); 360 } while (read_seqretry(&tk->lock, seq));
342 361
343 set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec, 362 ts->tv_sec += tomono.tv_sec;
344 ts->tv_nsec + tomono.tv_nsec); 363 ts->tv_nsec = 0;
364 timespec_add_ns(ts, nsec + tomono.tv_nsec);
345} 365}
346EXPORT_SYMBOL_GPL(ktime_get_ts); 366EXPORT_SYMBOL_GPL(ktime_get_ts);
347 367
@@ -358,22 +378,23 @@ EXPORT_SYMBOL_GPL(ktime_get_ts);
358 */ 378 */
359void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) 379void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
360{ 380{
381 struct timekeeper *tk = &timekeeper;
361 unsigned long seq; 382 unsigned long seq;
362 s64 nsecs_raw, nsecs_real; 383 s64 nsecs_raw, nsecs_real;
363 384
364 WARN_ON_ONCE(timekeeping_suspended); 385 WARN_ON_ONCE(timekeeping_suspended);
365 386
366 do { 387 do {
367 seq = read_seqbegin(&timekeeper.lock); 388 seq = read_seqbegin(&tk->lock);
368 389
369 *ts_raw = timekeeper.raw_time; 390 *ts_raw = tk->raw_time;
370 ts_real->tv_sec = timekeeper.xtime_sec; 391 ts_real->tv_sec = tk->xtime_sec;
371 ts_real->tv_nsec = 0; 392 ts_real->tv_nsec = 0;
372 393
373 nsecs_raw = timekeeping_get_ns_raw(&timekeeper); 394 nsecs_raw = timekeeping_get_ns_raw(tk);
374 nsecs_real = timekeeping_get_ns(&timekeeper); 395 nsecs_real = timekeeping_get_ns(tk);
375 396
376 } while (read_seqretry(&timekeeper.lock, seq)); 397 } while (read_seqretry(&tk->lock, seq));
377 398
378 timespec_add_ns(ts_raw, nsecs_raw); 399 timespec_add_ns(ts_raw, nsecs_raw);
379 timespec_add_ns(ts_real, nsecs_real); 400 timespec_add_ns(ts_real, nsecs_real);
@@ -406,28 +427,28 @@ EXPORT_SYMBOL(do_gettimeofday);
406 */ 427 */
407int do_settimeofday(const struct timespec *tv) 428int do_settimeofday(const struct timespec *tv)
408{ 429{
430 struct timekeeper *tk = &timekeeper;
409 struct timespec ts_delta, xt; 431 struct timespec ts_delta, xt;
410 unsigned long flags; 432 unsigned long flags;
411 433
412 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) 434 if (!timespec_valid_strict(tv))
413 return -EINVAL; 435 return -EINVAL;
414 436
415 write_seqlock_irqsave(&timekeeper.lock, flags); 437 write_seqlock_irqsave(&tk->lock, flags);
416 438
417 timekeeping_forward_now(&timekeeper); 439 timekeeping_forward_now(tk);
418 440
419 xt = tk_xtime(&timekeeper); 441 xt = tk_xtime(tk);
420 ts_delta.tv_sec = tv->tv_sec - xt.tv_sec; 442 ts_delta.tv_sec = tv->tv_sec - xt.tv_sec;
421 ts_delta.tv_nsec = tv->tv_nsec - xt.tv_nsec; 443 ts_delta.tv_nsec = tv->tv_nsec - xt.tv_nsec;
422 444
423 timekeeper.wall_to_monotonic = 445 tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, ts_delta));
424 timespec_sub(timekeeper.wall_to_monotonic, ts_delta);
425 446
426 tk_set_xtime(&timekeeper, tv); 447 tk_set_xtime(tk, tv);
427 448
428 timekeeping_update(&timekeeper, true); 449 timekeeping_update(tk, true);
429 450
430 write_sequnlock_irqrestore(&timekeeper.lock, flags); 451 write_sequnlock_irqrestore(&tk->lock, flags);
431 452
432 /* signal hrtimers about time change */ 453 /* signal hrtimers about time change */
433 clock_was_set(); 454 clock_was_set();
@@ -436,7 +457,6 @@ int do_settimeofday(const struct timespec *tv)
436} 457}
437EXPORT_SYMBOL(do_settimeofday); 458EXPORT_SYMBOL(do_settimeofday);
438 459
439
440/** 460/**
441 * timekeeping_inject_offset - Adds or subtracts from the current time. 461 * timekeeping_inject_offset - Adds or subtracts from the current time.
442 * @tv: pointer to the timespec variable containing the offset 462 * @tv: pointer to the timespec variable containing the offset
@@ -445,28 +465,37 @@ EXPORT_SYMBOL(do_settimeofday);
445 */ 465 */
446int timekeeping_inject_offset(struct timespec *ts) 466int timekeeping_inject_offset(struct timespec *ts)
447{ 467{
468 struct timekeeper *tk = &timekeeper;
448 unsigned long flags; 469 unsigned long flags;
470 struct timespec tmp;
471 int ret = 0;
449 472
450 if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) 473 if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
451 return -EINVAL; 474 return -EINVAL;
452 475
453 write_seqlock_irqsave(&timekeeper.lock, flags); 476 write_seqlock_irqsave(&tk->lock, flags);
454 477
455 timekeeping_forward_now(&timekeeper); 478 timekeeping_forward_now(tk);
456 479
480 /* Make sure the proposed value is valid */
481 tmp = timespec_add(tk_xtime(tk), *ts);
482 if (!timespec_valid_strict(&tmp)) {
483 ret = -EINVAL;
484 goto error;
485 }
457 486
458 tk_xtime_add(&timekeeper, ts); 487 tk_xtime_add(tk, ts);
459 timekeeper.wall_to_monotonic = 488 tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts));
460 timespec_sub(timekeeper.wall_to_monotonic, *ts);
461 489
462 timekeeping_update(&timekeeper, true); 490error: /* even if we error out, we forwarded the time, so call update */
491 timekeeping_update(tk, true);
463 492
464 write_sequnlock_irqrestore(&timekeeper.lock, flags); 493 write_sequnlock_irqrestore(&tk->lock, flags);
465 494
466 /* signal hrtimers about time change */ 495 /* signal hrtimers about time change */
467 clock_was_set(); 496 clock_was_set();
468 497
469 return 0; 498 return ret;
470} 499}
471EXPORT_SYMBOL(timekeeping_inject_offset); 500EXPORT_SYMBOL(timekeeping_inject_offset);
472 501
@@ -477,23 +506,24 @@ EXPORT_SYMBOL(timekeeping_inject_offset);
477 */ 506 */
478static int change_clocksource(void *data) 507static int change_clocksource(void *data)
479{ 508{
509 struct timekeeper *tk = &timekeeper;
480 struct clocksource *new, *old; 510 struct clocksource *new, *old;
481 unsigned long flags; 511 unsigned long flags;
482 512
483 new = (struct clocksource *) data; 513 new = (struct clocksource *) data;
484 514
485 write_seqlock_irqsave(&timekeeper.lock, flags); 515 write_seqlock_irqsave(&tk->lock, flags);
486 516
487 timekeeping_forward_now(&timekeeper); 517 timekeeping_forward_now(tk);
488 if (!new->enable || new->enable(new) == 0) { 518 if (!new->enable || new->enable(new) == 0) {
489 old = timekeeper.clock; 519 old = tk->clock;
490 tk_setup_internals(&timekeeper, new); 520 tk_setup_internals(tk, new);
491 if (old->disable) 521 if (old->disable)
492 old->disable(old); 522 old->disable(old);
493 } 523 }
494 timekeeping_update(&timekeeper, true); 524 timekeeping_update(tk, true);
495 525
496 write_sequnlock_irqrestore(&timekeeper.lock, flags); 526 write_sequnlock_irqrestore(&tk->lock, flags);
497 527
498 return 0; 528 return 0;
499} 529}
@@ -507,7 +537,9 @@ static int change_clocksource(void *data)
507 */ 537 */
508void timekeeping_notify(struct clocksource *clock) 538void timekeeping_notify(struct clocksource *clock)
509{ 539{
510 if (timekeeper.clock == clock) 540 struct timekeeper *tk = &timekeeper;
541
542 if (tk->clock == clock)
511 return; 543 return;
512 stop_machine(change_clocksource, clock, NULL); 544 stop_machine(change_clocksource, clock, NULL);
513 tick_clock_notify(); 545 tick_clock_notify();
@@ -536,35 +568,36 @@ EXPORT_SYMBOL_GPL(ktime_get_real);
536 */ 568 */
537void getrawmonotonic(struct timespec *ts) 569void getrawmonotonic(struct timespec *ts)
538{ 570{
571 struct timekeeper *tk = &timekeeper;
539 unsigned long seq; 572 unsigned long seq;
540 s64 nsecs; 573 s64 nsecs;
541 574
542 do { 575 do {
543 seq = read_seqbegin(&timekeeper.lock); 576 seq = read_seqbegin(&tk->lock);
544 nsecs = timekeeping_get_ns_raw(&timekeeper); 577 nsecs = timekeeping_get_ns_raw(tk);
545 *ts = timekeeper.raw_time; 578 *ts = tk->raw_time;
546 579
547 } while (read_seqretry(&timekeeper.lock, seq)); 580 } while (read_seqretry(&tk->lock, seq));
548 581
549 timespec_add_ns(ts, nsecs); 582 timespec_add_ns(ts, nsecs);
550} 583}
551EXPORT_SYMBOL(getrawmonotonic); 584EXPORT_SYMBOL(getrawmonotonic);
552 585
553
554/** 586/**
555 * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres 587 * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres
556 */ 588 */
557int timekeeping_valid_for_hres(void) 589int timekeeping_valid_for_hres(void)
558{ 590{
591 struct timekeeper *tk = &timekeeper;
559 unsigned long seq; 592 unsigned long seq;
560 int ret; 593 int ret;
561 594
562 do { 595 do {
563 seq = read_seqbegin(&timekeeper.lock); 596 seq = read_seqbegin(&tk->lock);
564 597
565 ret = timekeeper.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; 598 ret = tk->clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
566 599
567 } while (read_seqretry(&timekeeper.lock, seq)); 600 } while (read_seqretry(&tk->lock, seq));
568 601
569 return ret; 602 return ret;
570} 603}
@@ -574,15 +607,16 @@ int timekeeping_valid_for_hres(void)
574 */ 607 */
575u64 timekeeping_max_deferment(void) 608u64 timekeeping_max_deferment(void)
576{ 609{
610 struct timekeeper *tk = &timekeeper;
577 unsigned long seq; 611 unsigned long seq;
578 u64 ret; 612 u64 ret;
579 613
580 do { 614 do {
581 seq = read_seqbegin(&timekeeper.lock); 615 seq = read_seqbegin(&tk->lock);
582 616
583 ret = timekeeper.clock->max_idle_ns; 617 ret = tk->clock->max_idle_ns;
584 618
585 } while (read_seqretry(&timekeeper.lock, seq)); 619 } while (read_seqretry(&tk->lock, seq));
586 620
587 return ret; 621 return ret;
588} 622}
@@ -622,46 +656,56 @@ void __attribute__((weak)) read_boot_clock(struct timespec *ts)
622 */ 656 */
623void __init timekeeping_init(void) 657void __init timekeeping_init(void)
624{ 658{
659 struct timekeeper *tk = &timekeeper;
625 struct clocksource *clock; 660 struct clocksource *clock;
626 unsigned long flags; 661 unsigned long flags;
627 struct timespec now, boot; 662 struct timespec now, boot, tmp;
628 663
629 read_persistent_clock(&now); 664 read_persistent_clock(&now);
665 if (!timespec_valid_strict(&now)) {
666 pr_warn("WARNING: Persistent clock returned invalid value!\n"
667 " Check your CMOS/BIOS settings.\n");
668 now.tv_sec = 0;
669 now.tv_nsec = 0;
670 }
671
630 read_boot_clock(&boot); 672 read_boot_clock(&boot);
673 if (!timespec_valid_strict(&boot)) {
674 pr_warn("WARNING: Boot clock returned invalid value!\n"
675 " Check your CMOS/BIOS settings.\n");
676 boot.tv_sec = 0;
677 boot.tv_nsec = 0;
678 }
631 679
632 seqlock_init(&timekeeper.lock); 680 seqlock_init(&tk->lock);
633 681
634 ntp_init(); 682 ntp_init();
635 683
636 write_seqlock_irqsave(&timekeeper.lock, flags); 684 write_seqlock_irqsave(&tk->lock, flags);
637 clock = clocksource_default_clock(); 685 clock = clocksource_default_clock();
638 if (clock->enable) 686 if (clock->enable)
639 clock->enable(clock); 687 clock->enable(clock);
640 tk_setup_internals(&timekeeper, clock); 688 tk_setup_internals(tk, clock);
641 689
642 tk_set_xtime(&timekeeper, &now); 690 tk_set_xtime(tk, &now);
643 timekeeper.raw_time.tv_sec = 0; 691 tk->raw_time.tv_sec = 0;
644 timekeeper.raw_time.tv_nsec = 0; 692 tk->raw_time.tv_nsec = 0;
645 if (boot.tv_sec == 0 && boot.tv_nsec == 0) 693 if (boot.tv_sec == 0 && boot.tv_nsec == 0)
646 boot = tk_xtime(&timekeeper); 694 boot = tk_xtime(tk);
647 695
648 set_normalized_timespec(&timekeeper.wall_to_monotonic, 696 set_normalized_timespec(&tmp, -boot.tv_sec, -boot.tv_nsec);
649 -boot.tv_sec, -boot.tv_nsec); 697 tk_set_wall_to_mono(tk, tmp);
650 update_rt_offset(&timekeeper); 698
651 timekeeper.total_sleep_time.tv_sec = 0; 699 tmp.tv_sec = 0;
652 timekeeper.total_sleep_time.tv_nsec = 0; 700 tmp.tv_nsec = 0;
653 write_sequnlock_irqrestore(&timekeeper.lock, flags); 701 tk_set_sleep_time(tk, tmp);
702
703 write_sequnlock_irqrestore(&tk->lock, flags);
654} 704}
655 705
656/* time in seconds when suspend began */ 706/* time in seconds when suspend began */
657static struct timespec timekeeping_suspend_time; 707static struct timespec timekeeping_suspend_time;
658 708
659static void update_sleep_time(struct timespec t)
660{
661 timekeeper.total_sleep_time = t;
662 timekeeper.offs_boot = timespec_to_ktime(t);
663}
664
665/** 709/**
666 * __timekeeping_inject_sleeptime - Internal function to add sleep interval 710 * __timekeeping_inject_sleeptime - Internal function to add sleep interval
667 * @delta: pointer to a timespec delta value 711 * @delta: pointer to a timespec delta value
@@ -672,18 +716,16 @@ static void update_sleep_time(struct timespec t)
672static void __timekeeping_inject_sleeptime(struct timekeeper *tk, 716static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
673 struct timespec *delta) 717 struct timespec *delta)
674{ 718{
675 if (!timespec_valid(delta)) { 719 if (!timespec_valid_strict(delta)) {
676 printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid " 720 printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid "
677 "sleep delta value!\n"); 721 "sleep delta value!\n");
678 return; 722 return;
679 } 723 }
680
681 tk_xtime_add(tk, delta); 724 tk_xtime_add(tk, delta);
682 tk->wall_to_monotonic = timespec_sub(tk->wall_to_monotonic, *delta); 725 tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *delta));
683 update_sleep_time(timespec_add(tk->total_sleep_time, *delta)); 726 tk_set_sleep_time(tk, timespec_add(tk->total_sleep_time, *delta));
684} 727}
685 728
686
687/** 729/**
688 * timekeeping_inject_sleeptime - Adds suspend interval to timeekeeping values 730 * timekeeping_inject_sleeptime - Adds suspend interval to timeekeeping values
689 * @delta: pointer to a timespec delta value 731 * @delta: pointer to a timespec delta value
@@ -696,6 +738,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
696 */ 738 */
697void timekeeping_inject_sleeptime(struct timespec *delta) 739void timekeeping_inject_sleeptime(struct timespec *delta)
698{ 740{
741 struct timekeeper *tk = &timekeeper;
699 unsigned long flags; 742 unsigned long flags;
700 struct timespec ts; 743 struct timespec ts;
701 744
@@ -704,21 +747,20 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
704 if (!(ts.tv_sec == 0 && ts.tv_nsec == 0)) 747 if (!(ts.tv_sec == 0 && ts.tv_nsec == 0))
705 return; 748 return;
706 749
707 write_seqlock_irqsave(&timekeeper.lock, flags); 750 write_seqlock_irqsave(&tk->lock, flags);
708 751
709 timekeeping_forward_now(&timekeeper); 752 timekeeping_forward_now(tk);
710 753
711 __timekeeping_inject_sleeptime(&timekeeper, delta); 754 __timekeeping_inject_sleeptime(tk, delta);
712 755
713 timekeeping_update(&timekeeper, true); 756 timekeeping_update(tk, true);
714 757
715 write_sequnlock_irqrestore(&timekeeper.lock, flags); 758 write_sequnlock_irqrestore(&tk->lock, flags);
716 759
717 /* signal hrtimers about time change */ 760 /* signal hrtimers about time change */
718 clock_was_set(); 761 clock_was_set();
719} 762}
720 763
721
722/** 764/**
723 * timekeeping_resume - Resumes the generic timekeeping subsystem. 765 * timekeeping_resume - Resumes the generic timekeeping subsystem.
724 * 766 *
@@ -728,6 +770,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
728 */ 770 */
729static void timekeeping_resume(void) 771static void timekeeping_resume(void)
730{ 772{
773 struct timekeeper *tk = &timekeeper;
731 unsigned long flags; 774 unsigned long flags;
732 struct timespec ts; 775 struct timespec ts;
733 776
@@ -735,18 +778,18 @@ static void timekeeping_resume(void)
735 778
736 clocksource_resume(); 779 clocksource_resume();
737 780
738 write_seqlock_irqsave(&timekeeper.lock, flags); 781 write_seqlock_irqsave(&tk->lock, flags);
739 782
740 if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) { 783 if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) {
741 ts = timespec_sub(ts, timekeeping_suspend_time); 784 ts = timespec_sub(ts, timekeeping_suspend_time);
742 __timekeeping_inject_sleeptime(&timekeeper, &ts); 785 __timekeeping_inject_sleeptime(tk, &ts);
743 } 786 }
744 /* re-base the last cycle value */ 787 /* re-base the last cycle value */
745 timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); 788 tk->clock->cycle_last = tk->clock->read(tk->clock);
746 timekeeper.ntp_error = 0; 789 tk->ntp_error = 0;
747 timekeeping_suspended = 0; 790 timekeeping_suspended = 0;
748 timekeeping_update(&timekeeper, false); 791 timekeeping_update(tk, false);
749 write_sequnlock_irqrestore(&timekeeper.lock, flags); 792 write_sequnlock_irqrestore(&tk->lock, flags);
750 793
751 touch_softlockup_watchdog(); 794 touch_softlockup_watchdog();
752 795
@@ -758,14 +801,15 @@ static void timekeeping_resume(void)
758 801
759static int timekeeping_suspend(void) 802static int timekeeping_suspend(void)
760{ 803{
804 struct timekeeper *tk = &timekeeper;
761 unsigned long flags; 805 unsigned long flags;
762 struct timespec delta, delta_delta; 806 struct timespec delta, delta_delta;
763 static struct timespec old_delta; 807 static struct timespec old_delta;
764 808
765 read_persistent_clock(&timekeeping_suspend_time); 809 read_persistent_clock(&timekeeping_suspend_time);
766 810
767 write_seqlock_irqsave(&timekeeper.lock, flags); 811 write_seqlock_irqsave(&tk->lock, flags);
768 timekeeping_forward_now(&timekeeper); 812 timekeeping_forward_now(tk);
769 timekeeping_suspended = 1; 813 timekeeping_suspended = 1;
770 814
771 /* 815 /*
@@ -774,7 +818,7 @@ static int timekeeping_suspend(void)
774 * try to compensate so the difference in system time 818 * try to compensate so the difference in system time
775 * and persistent_clock time stays close to constant. 819 * and persistent_clock time stays close to constant.
776 */ 820 */
777 delta = timespec_sub(tk_xtime(&timekeeper), timekeeping_suspend_time); 821 delta = timespec_sub(tk_xtime(tk), timekeeping_suspend_time);
778 delta_delta = timespec_sub(delta, old_delta); 822 delta_delta = timespec_sub(delta, old_delta);
779 if (abs(delta_delta.tv_sec) >= 2) { 823 if (abs(delta_delta.tv_sec) >= 2) {
780 /* 824 /*
@@ -787,7 +831,7 @@ static int timekeeping_suspend(void)
787 timekeeping_suspend_time = 831 timekeeping_suspend_time =
788 timespec_add(timekeeping_suspend_time, delta_delta); 832 timespec_add(timekeeping_suspend_time, delta_delta);
789 } 833 }
790 write_sequnlock_irqrestore(&timekeeper.lock, flags); 834 write_sequnlock_irqrestore(&tk->lock, flags);
791 835
792 clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); 836 clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
793 clocksource_suspend(); 837 clocksource_suspend();
@@ -898,27 +942,29 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
898 * the error. This causes the likely below to be unlikely. 942 * the error. This causes the likely below to be unlikely.
899 * 943 *
900 * The proper fix is to avoid rounding up by using 944 * The proper fix is to avoid rounding up by using
901 * the high precision timekeeper.xtime_nsec instead of 945 * the high precision tk->xtime_nsec instead of
902 * xtime.tv_nsec everywhere. Fixing this will take some 946 * xtime.tv_nsec everywhere. Fixing this will take some
903 * time. 947 * time.
904 */ 948 */
905 if (likely(error <= interval)) 949 if (likely(error <= interval))
906 adj = 1; 950 adj = 1;
907 else 951 else
908 adj = timekeeping_bigadjust(tk, error, &interval, 952 adj = timekeeping_bigadjust(tk, error, &interval, &offset);
909 &offset); 953 } else {
910 } else if (error < -interval) { 954 if (error < -interval) {
911 /* See comment above, this is just switched for the negative */ 955 /* See comment above, this is just switched for the negative */
912 error >>= 2; 956 error >>= 2;
913 if (likely(error >= -interval)) { 957 if (likely(error >= -interval)) {
914 adj = -1; 958 adj = -1;
915 interval = -interval; 959 interval = -interval;
916 offset = -offset; 960 offset = -offset;
917 } else 961 } else {
918 adj = timekeeping_bigadjust(tk, error, &interval, 962 adj = timekeeping_bigadjust(tk, error, &interval, &offset);
919 &offset); 963 }
920 } else 964 } else {
921 return; 965 goto out_adjust;
966 }
967 }
922 968
923 if (unlikely(tk->clock->maxadj && 969 if (unlikely(tk->clock->maxadj &&
924 (tk->mult + adj > tk->clock->mult + tk->clock->maxadj))) { 970 (tk->mult + adj > tk->clock->mult + tk->clock->maxadj))) {
@@ -981,6 +1027,7 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
981 tk->xtime_nsec -= offset; 1027 tk->xtime_nsec -= offset;
982 tk->ntp_error -= (interval - offset) << tk->ntp_error_shift; 1028 tk->ntp_error -= (interval - offset) << tk->ntp_error_shift;
983 1029
1030out_adjust:
984 /* 1031 /*
985 * It may be possible that when we entered this function, xtime_nsec 1032 * It may be possible that when we entered this function, xtime_nsec
986 * was very small. Further, if we're slightly speeding the clocksource 1033 * was very small. Further, if we're slightly speeding the clocksource
@@ -1003,7 +1050,6 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
1003 1050
1004} 1051}
1005 1052
1006
1007/** 1053/**
1008 * accumulate_nsecs_to_secs - Accumulates nsecs into secs 1054 * accumulate_nsecs_to_secs - Accumulates nsecs into secs
1009 * 1055 *
@@ -1024,15 +1070,21 @@ static inline void accumulate_nsecs_to_secs(struct timekeeper *tk)
1024 1070
1025 /* Figure out if its a leap sec and apply if needed */ 1071 /* Figure out if its a leap sec and apply if needed */
1026 leap = second_overflow(tk->xtime_sec); 1072 leap = second_overflow(tk->xtime_sec);
1027 tk->xtime_sec += leap; 1073 if (unlikely(leap)) {
1028 tk->wall_to_monotonic.tv_sec -= leap; 1074 struct timespec ts;
1029 if (leap) 1075
1030 clock_was_set_delayed(); 1076 tk->xtime_sec += leap;
1077
1078 ts.tv_sec = leap;
1079 ts.tv_nsec = 0;
1080 tk_set_wall_to_mono(tk,
1081 timespec_sub(tk->wall_to_monotonic, ts));
1031 1082
1083 clock_was_set_delayed();
1084 }
1032 } 1085 }
1033} 1086}
1034 1087
1035
1036/** 1088/**
1037 * logarithmic_accumulation - shifted accumulation of cycles 1089 * logarithmic_accumulation - shifted accumulation of cycles
1038 * 1090 *
@@ -1076,7 +1128,6 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
1076 return offset; 1128 return offset;
1077} 1129}
1078 1130
1079
1080/** 1131/**
1081 * update_wall_time - Uses the current clocksource to increment the wall time 1132 * update_wall_time - Uses the current clocksource to increment the wall time
1082 * 1133 *
@@ -1084,25 +1135,30 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
1084static void update_wall_time(void) 1135static void update_wall_time(void)
1085{ 1136{
1086 struct clocksource *clock; 1137 struct clocksource *clock;
1138 struct timekeeper *tk = &timekeeper;
1087 cycle_t offset; 1139 cycle_t offset;
1088 int shift = 0, maxshift; 1140 int shift = 0, maxshift;
1089 unsigned long flags; 1141 unsigned long flags;
1090 s64 remainder; 1142 s64 remainder;
1091 1143
1092 write_seqlock_irqsave(&timekeeper.lock, flags); 1144 write_seqlock_irqsave(&tk->lock, flags);
1093 1145
1094 /* Make sure we're fully resumed: */ 1146 /* Make sure we're fully resumed: */
1095 if (unlikely(timekeeping_suspended)) 1147 if (unlikely(timekeeping_suspended))
1096 goto out; 1148 goto out;
1097 1149
1098 clock = timekeeper.clock; 1150 clock = tk->clock;
1099 1151
1100#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET 1152#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
1101 offset = timekeeper.cycle_interval; 1153 offset = tk->cycle_interval;
1102#else 1154#else
1103 offset = (clock->read(clock) - clock->cycle_last) & clock->mask; 1155 offset = (clock->read(clock) - clock->cycle_last) & clock->mask;
1104#endif 1156#endif
1105 1157
1158 /* Check if there's really nothing to do */
1159 if (offset < tk->cycle_interval)
1160 goto out;
1161
1106 /* 1162 /*
1107 * With NO_HZ we may have to accumulate many cycle_intervals 1163 * With NO_HZ we may have to accumulate many cycle_intervals
1108 * (think "ticks") worth of time at once. To do this efficiently, 1164 * (think "ticks") worth of time at once. To do this efficiently,
@@ -1111,19 +1167,19 @@ static void update_wall_time(void)
1111 * chunk in one go, and then try to consume the next smaller 1167 * chunk in one go, and then try to consume the next smaller
1112 * doubled multiple. 1168 * doubled multiple.
1113 */ 1169 */
1114 shift = ilog2(offset) - ilog2(timekeeper.cycle_interval); 1170 shift = ilog2(offset) - ilog2(tk->cycle_interval);
1115 shift = max(0, shift); 1171 shift = max(0, shift);
1116 /* Bound shift to one less than what overflows tick_length */ 1172 /* Bound shift to one less than what overflows tick_length */
1117 maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1; 1173 maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
1118 shift = min(shift, maxshift); 1174 shift = min(shift, maxshift);
1119 while (offset >= timekeeper.cycle_interval) { 1175 while (offset >= tk->cycle_interval) {
1120 offset = logarithmic_accumulation(&timekeeper, offset, shift); 1176 offset = logarithmic_accumulation(tk, offset, shift);
1121 if(offset < timekeeper.cycle_interval<<shift) 1177 if (offset < tk->cycle_interval<<shift)
1122 shift--; 1178 shift--;
1123 } 1179 }
1124 1180
1125 /* correct the clock when NTP error is too big */ 1181 /* correct the clock when NTP error is too big */
1126 timekeeping_adjust(&timekeeper, offset); 1182 timekeeping_adjust(tk, offset);
1127 1183
1128 1184
1129 /* 1185 /*
@@ -1135,21 +1191,21 @@ static void update_wall_time(void)
1135 * the vsyscall implementations are converted to use xtime_nsec 1191 * the vsyscall implementations are converted to use xtime_nsec
1136 * (shifted nanoseconds), this can be killed. 1192 * (shifted nanoseconds), this can be killed.
1137 */ 1193 */
1138 remainder = timekeeper.xtime_nsec & ((1 << timekeeper.shift) - 1); 1194 remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1);
1139 timekeeper.xtime_nsec -= remainder; 1195 tk->xtime_nsec -= remainder;
1140 timekeeper.xtime_nsec += 1 << timekeeper.shift; 1196 tk->xtime_nsec += 1ULL << tk->shift;
1141 timekeeper.ntp_error += remainder << timekeeper.ntp_error_shift; 1197 tk->ntp_error += remainder << tk->ntp_error_shift;
1142 1198
1143 /* 1199 /*
1144 * Finally, make sure that after the rounding 1200 * Finally, make sure that after the rounding
1145 * xtime_nsec isn't larger than NSEC_PER_SEC 1201 * xtime_nsec isn't larger than NSEC_PER_SEC
1146 */ 1202 */
1147 accumulate_nsecs_to_secs(&timekeeper); 1203 accumulate_nsecs_to_secs(tk);
1148 1204
1149 timekeeping_update(&timekeeper, false); 1205 timekeeping_update(tk, false);
1150 1206
1151out: 1207out:
1152 write_sequnlock_irqrestore(&timekeeper.lock, flags); 1208 write_sequnlock_irqrestore(&tk->lock, flags);
1153 1209
1154} 1210}
1155 1211
@@ -1166,18 +1222,18 @@ out:
1166 */ 1222 */
1167void getboottime(struct timespec *ts) 1223void getboottime(struct timespec *ts)
1168{ 1224{
1225 struct timekeeper *tk = &timekeeper;
1169 struct timespec boottime = { 1226 struct timespec boottime = {
1170 .tv_sec = timekeeper.wall_to_monotonic.tv_sec + 1227 .tv_sec = tk->wall_to_monotonic.tv_sec +
1171 timekeeper.total_sleep_time.tv_sec, 1228 tk->total_sleep_time.tv_sec,
1172 .tv_nsec = timekeeper.wall_to_monotonic.tv_nsec + 1229 .tv_nsec = tk->wall_to_monotonic.tv_nsec +
1173 timekeeper.total_sleep_time.tv_nsec 1230 tk->total_sleep_time.tv_nsec
1174 }; 1231 };
1175 1232
1176 set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec); 1233 set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec);
1177} 1234}
1178EXPORT_SYMBOL_GPL(getboottime); 1235EXPORT_SYMBOL_GPL(getboottime);
1179 1236
1180
1181/** 1237/**
1182 * get_monotonic_boottime - Returns monotonic time since boot 1238 * get_monotonic_boottime - Returns monotonic time since boot
1183 * @ts: pointer to the timespec to be set 1239 * @ts: pointer to the timespec to be set
@@ -1189,22 +1245,25 @@ EXPORT_SYMBOL_GPL(getboottime);
1189 */ 1245 */
1190void get_monotonic_boottime(struct timespec *ts) 1246void get_monotonic_boottime(struct timespec *ts)
1191{ 1247{
1248 struct timekeeper *tk = &timekeeper;
1192 struct timespec tomono, sleep; 1249 struct timespec tomono, sleep;
1250 s64 nsec;
1193 unsigned int seq; 1251 unsigned int seq;
1194 1252
1195 WARN_ON(timekeeping_suspended); 1253 WARN_ON(timekeeping_suspended);
1196 1254
1197 do { 1255 do {
1198 seq = read_seqbegin(&timekeeper.lock); 1256 seq = read_seqbegin(&tk->lock);
1199 ts->tv_sec = timekeeper.xtime_sec; 1257 ts->tv_sec = tk->xtime_sec;
1200 ts->tv_nsec = timekeeping_get_ns(&timekeeper); 1258 nsec = timekeeping_get_ns(tk);
1201 tomono = timekeeper.wall_to_monotonic; 1259 tomono = tk->wall_to_monotonic;
1202 sleep = timekeeper.total_sleep_time; 1260 sleep = tk->total_sleep_time;
1203 1261
1204 } while (read_seqretry(&timekeeper.lock, seq)); 1262 } while (read_seqretry(&tk->lock, seq));
1205 1263
1206 set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec + sleep.tv_sec, 1264 ts->tv_sec += tomono.tv_sec + sleep.tv_sec;
1207 ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec); 1265 ts->tv_nsec = 0;
1266 timespec_add_ns(ts, nsec + tomono.tv_nsec + sleep.tv_nsec);
1208} 1267}
1209EXPORT_SYMBOL_GPL(get_monotonic_boottime); 1268EXPORT_SYMBOL_GPL(get_monotonic_boottime);
1210 1269
@@ -1231,31 +1290,38 @@ EXPORT_SYMBOL_GPL(ktime_get_boottime);
1231 */ 1290 */
1232void monotonic_to_bootbased(struct timespec *ts) 1291void monotonic_to_bootbased(struct timespec *ts)
1233{ 1292{
1234 *ts = timespec_add(*ts, timekeeper.total_sleep_time); 1293 struct timekeeper *tk = &timekeeper;
1294
1295 *ts = timespec_add(*ts, tk->total_sleep_time);
1235} 1296}
1236EXPORT_SYMBOL_GPL(monotonic_to_bootbased); 1297EXPORT_SYMBOL_GPL(monotonic_to_bootbased);
1237 1298
1238unsigned long get_seconds(void) 1299unsigned long get_seconds(void)
1239{ 1300{
1240 return timekeeper.xtime_sec; 1301 struct timekeeper *tk = &timekeeper;
1302
1303 return tk->xtime_sec;
1241} 1304}
1242EXPORT_SYMBOL(get_seconds); 1305EXPORT_SYMBOL(get_seconds);
1243 1306
1244struct timespec __current_kernel_time(void) 1307struct timespec __current_kernel_time(void)
1245{ 1308{
1246 return tk_xtime(&timekeeper); 1309 struct timekeeper *tk = &timekeeper;
1310
1311 return tk_xtime(tk);
1247} 1312}
1248 1313
1249struct timespec current_kernel_time(void) 1314struct timespec current_kernel_time(void)
1250{ 1315{
1316 struct timekeeper *tk = &timekeeper;
1251 struct timespec now; 1317 struct timespec now;
1252 unsigned long seq; 1318 unsigned long seq;
1253 1319
1254 do { 1320 do {
1255 seq = read_seqbegin(&timekeeper.lock); 1321 seq = read_seqbegin(&tk->lock);
1256 1322
1257 now = tk_xtime(&timekeeper); 1323 now = tk_xtime(tk);
1258 } while (read_seqretry(&timekeeper.lock, seq)); 1324 } while (read_seqretry(&tk->lock, seq));
1259 1325
1260 return now; 1326 return now;
1261} 1327}
@@ -1263,15 +1329,16 @@ EXPORT_SYMBOL(current_kernel_time);
1263 1329
1264struct timespec get_monotonic_coarse(void) 1330struct timespec get_monotonic_coarse(void)
1265{ 1331{
1332 struct timekeeper *tk = &timekeeper;
1266 struct timespec now, mono; 1333 struct timespec now, mono;
1267 unsigned long seq; 1334 unsigned long seq;
1268 1335
1269 do { 1336 do {
1270 seq = read_seqbegin(&timekeeper.lock); 1337 seq = read_seqbegin(&tk->lock);
1271 1338
1272 now = tk_xtime(&timekeeper); 1339 now = tk_xtime(tk);
1273 mono = timekeeper.wall_to_monotonic; 1340 mono = tk->wall_to_monotonic;
1274 } while (read_seqretry(&timekeeper.lock, seq)); 1341 } while (read_seqretry(&tk->lock, seq));
1275 1342
1276 set_normalized_timespec(&now, now.tv_sec + mono.tv_sec, 1343 set_normalized_timespec(&now, now.tv_sec + mono.tv_sec,
1277 now.tv_nsec + mono.tv_nsec); 1344 now.tv_nsec + mono.tv_nsec);
@@ -1300,14 +1367,15 @@ void do_timer(unsigned long ticks)
1300void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim, 1367void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
1301 struct timespec *wtom, struct timespec *sleep) 1368 struct timespec *wtom, struct timespec *sleep)
1302{ 1369{
1370 struct timekeeper *tk = &timekeeper;
1303 unsigned long seq; 1371 unsigned long seq;
1304 1372
1305 do { 1373 do {
1306 seq = read_seqbegin(&timekeeper.lock); 1374 seq = read_seqbegin(&tk->lock);
1307 *xtim = tk_xtime(&timekeeper); 1375 *xtim = tk_xtime(tk);
1308 *wtom = timekeeper.wall_to_monotonic; 1376 *wtom = tk->wall_to_monotonic;
1309 *sleep = timekeeper.total_sleep_time; 1377 *sleep = tk->total_sleep_time;
1310 } while (read_seqretry(&timekeeper.lock, seq)); 1378 } while (read_seqretry(&tk->lock, seq));
1311} 1379}
1312 1380
1313#ifdef CONFIG_HIGH_RES_TIMERS 1381#ifdef CONFIG_HIGH_RES_TIMERS
@@ -1321,19 +1389,20 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
1321 */ 1389 */
1322ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot) 1390ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot)
1323{ 1391{
1392 struct timekeeper *tk = &timekeeper;
1324 ktime_t now; 1393 ktime_t now;
1325 unsigned int seq; 1394 unsigned int seq;
1326 u64 secs, nsecs; 1395 u64 secs, nsecs;
1327 1396
1328 do { 1397 do {
1329 seq = read_seqbegin(&timekeeper.lock); 1398 seq = read_seqbegin(&tk->lock);
1330 1399
1331 secs = timekeeper.xtime_sec; 1400 secs = tk->xtime_sec;
1332 nsecs = timekeeping_get_ns(&timekeeper); 1401 nsecs = timekeeping_get_ns(tk);
1333 1402
1334 *offs_real = timekeeper.offs_real; 1403 *offs_real = tk->offs_real;
1335 *offs_boot = timekeeper.offs_boot; 1404 *offs_boot = tk->offs_boot;
1336 } while (read_seqretry(&timekeeper.lock, seq)); 1405 } while (read_seqretry(&tk->lock, seq));
1337 1406
1338 now = ktime_add_ns(ktime_set(secs, 0), nsecs); 1407 now = ktime_add_ns(ktime_set(secs, 0), nsecs);
1339 now = ktime_sub(now, *offs_real); 1408 now = ktime_sub(now, *offs_real);
@@ -1346,19 +1415,19 @@ ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot)
1346 */ 1415 */
1347ktime_t ktime_get_monotonic_offset(void) 1416ktime_t ktime_get_monotonic_offset(void)
1348{ 1417{
1418 struct timekeeper *tk = &timekeeper;
1349 unsigned long seq; 1419 unsigned long seq;
1350 struct timespec wtom; 1420 struct timespec wtom;
1351 1421
1352 do { 1422 do {
1353 seq = read_seqbegin(&timekeeper.lock); 1423 seq = read_seqbegin(&tk->lock);
1354 wtom = timekeeper.wall_to_monotonic; 1424 wtom = tk->wall_to_monotonic;
1355 } while (read_seqretry(&timekeeper.lock, seq)); 1425 } while (read_seqretry(&tk->lock, seq));
1356 1426
1357 return timespec_to_ktime(wtom); 1427 return timespec_to_ktime(wtom);
1358} 1428}
1359EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset); 1429EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset);
1360 1430
1361
1362/** 1431/**
1363 * xtime_update() - advances the timekeeping infrastructure 1432 * xtime_update() - advances the timekeeping infrastructure
1364 * @ticks: number of ticks, that have elapsed since the last call. 1433 * @ticks: number of ticks, that have elapsed since the last call.
diff --git a/kernel/timer.c b/kernel/timer.c
index a61c09374eba..8c5e7b908c68 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1407,13 +1407,6 @@ SYSCALL_DEFINE1(alarm, unsigned int, seconds)
1407 1407
1408#endif 1408#endif
1409 1409
1410#ifndef __alpha__
1411
1412/*
1413 * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this
1414 * should be moved into arch/i386 instead?
1415 */
1416
1417/** 1410/**
1418 * sys_getpid - return the thread group id of the current process 1411 * sys_getpid - return the thread group id of the current process
1419 * 1412 *
@@ -1469,8 +1462,6 @@ SYSCALL_DEFINE0(getegid)
1469 return from_kgid_munged(current_user_ns(), current_egid()); 1462 return from_kgid_munged(current_user_ns(), current_egid());
1470} 1463}
1471 1464
1472#endif
1473
1474static void process_timeout(unsigned long __data) 1465static void process_timeout(unsigned long __data)
1475{ 1466{
1476 wake_up_process((struct task_struct *)__data); 1467 wake_up_process((struct task_struct *)__data);
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index fee3752ae8f6..8a6d2ee2086c 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -281,7 +281,7 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip)
281 281
282 head = this_cpu_ptr(event_function.perf_events); 282 head = this_cpu_ptr(event_function.perf_events);
283 perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0, 283 perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0,
284 1, &regs, head); 284 1, &regs, head, NULL);
285 285
286#undef ENTRY_SIZE 286#undef ENTRY_SIZE
287} 287}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index b31d3d5699fe..1a2117043bb1 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1002,7 +1002,8 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
1002 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 1002 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1003 1003
1004 head = this_cpu_ptr(call->perf_events); 1004 head = this_cpu_ptr(call->perf_events);
1005 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); 1005 perf_trace_buf_submit(entry, size, rctx,
1006 entry->ip, 1, regs, head, NULL);
1006} 1007}
1007 1008
1008/* Kretprobe profile handler */ 1009/* Kretprobe profile handler */
@@ -1033,7 +1034,8 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1033 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 1034 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1034 1035
1035 head = this_cpu_ptr(call->perf_events); 1036 head = this_cpu_ptr(call->perf_events);
1036 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head); 1037 perf_trace_buf_submit(entry, size, rctx,
1038 entry->ret_ip, 1, regs, head, NULL);
1037} 1039}
1038#endif /* CONFIG_PERF_EVENTS */ 1040#endif /* CONFIG_PERF_EVENTS */
1039 1041
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 96fc73369099..6b245f64c8dd 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -506,6 +506,8 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
506 int size; 506 int size;
507 507
508 syscall_nr = syscall_get_nr(current, regs); 508 syscall_nr = syscall_get_nr(current, regs);
509 if (syscall_nr < 0)
510 return;
509 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls)) 511 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
510 return; 512 return;
511 513
@@ -532,7 +534,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
532 (unsigned long *)&rec->args); 534 (unsigned long *)&rec->args);
533 535
534 head = this_cpu_ptr(sys_data->enter_event->perf_events); 536 head = this_cpu_ptr(sys_data->enter_event->perf_events);
535 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); 537 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
536} 538}
537 539
538int perf_sysenter_enable(struct ftrace_event_call *call) 540int perf_sysenter_enable(struct ftrace_event_call *call)
@@ -580,6 +582,8 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
580 int size; 582 int size;
581 583
582 syscall_nr = syscall_get_nr(current, regs); 584 syscall_nr = syscall_get_nr(current, regs);
585 if (syscall_nr < 0)
586 return;
583 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls)) 587 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
584 return; 588 return;
585 589
@@ -608,7 +612,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
608 rec->ret = syscall_get_return_value(current, regs); 612 rec->ret = syscall_get_return_value(current, regs);
609 613
610 head = this_cpu_ptr(sys_data->exit_event->perf_events); 614 head = this_cpu_ptr(sys_data->exit_event->perf_events);
611 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); 615 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
612} 616}
613 617
614int perf_sysexit_enable(struct ftrace_event_call *call) 618int perf_sysexit_enable(struct ftrace_event_call *call)
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 2b36ac68549e..03003cd7dd96 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -670,7 +670,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
670 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); 670 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
671 671
672 head = this_cpu_ptr(call->perf_events); 672 head = this_cpu_ptr(call->perf_events);
673 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); 673 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head, NULL);
674 674
675 out: 675 out:
676 preempt_enable(); 676 preempt_enable();
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 69add8a9da68..4b1dfba70f7c 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -575,7 +575,7 @@ out:
575/* 575/*
576 * Create/destroy watchdog threads as CPUs come and go: 576 * Create/destroy watchdog threads as CPUs come and go:
577 */ 577 */
578static int 578static int __cpuinit
579cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) 579cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
580{ 580{
581 int hotcpu = (unsigned long)hcpu; 581 int hotcpu = (unsigned long)hcpu;
@@ -610,27 +610,10 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
610 return NOTIFY_OK; 610 return NOTIFY_OK;
611} 611}
612 612
613static struct notifier_block cpu_nfb = { 613static struct notifier_block __cpuinitdata cpu_nfb = {
614 .notifier_call = cpu_callback 614 .notifier_call = cpu_callback
615}; 615};
616 616
617#ifdef CONFIG_SUSPEND
618/*
619 * On exit from suspend we force an offline->online transition on the boot CPU
620 * so that the PMU state that was lost while in suspended state gets set up
621 * properly for the boot CPU. This information is required for restarting the
622 * NMI watchdog.
623 */
624void lockup_detector_bootcpu_resume(void)
625{
626 void *cpu = (void *)(long)smp_processor_id();
627
628 cpu_callback(&cpu_nfb, CPU_DEAD_FROZEN, cpu);
629 cpu_callback(&cpu_nfb, CPU_UP_PREPARE_FROZEN, cpu);
630 cpu_callback(&cpu_nfb, CPU_ONLINE_FROZEN, cpu);
631}
632#endif
633
634void __init lockup_detector_init(void) 617void __init lockup_detector_init(void)
635{ 618{
636 void *cpu = (void *)(long)smp_processor_id(); 619 void *cpu = (void *)(long)smp_processor_id();
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 692d97628a10..3c5a79e2134c 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -66,6 +66,7 @@ enum {
66 66
67 /* pool flags */ 67 /* pool flags */
68 POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */ 68 POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */
69 POOL_MANAGING_WORKERS = 1 << 1, /* managing workers */
69 70
70 /* worker flags */ 71 /* worker flags */
71 WORKER_STARTED = 1 << 0, /* started */ 72 WORKER_STARTED = 1 << 0, /* started */
@@ -652,7 +653,7 @@ static bool need_to_manage_workers(struct worker_pool *pool)
652/* Do we have too many workers and should some go away? */ 653/* Do we have too many workers and should some go away? */
653static bool too_many_workers(struct worker_pool *pool) 654static bool too_many_workers(struct worker_pool *pool)
654{ 655{
655 bool managing = mutex_is_locked(&pool->manager_mutex); 656 bool managing = pool->flags & POOL_MANAGING_WORKERS;
656 int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ 657 int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
657 int nr_busy = pool->nr_workers - nr_idle; 658 int nr_busy = pool->nr_workers - nr_idle;
658 659
@@ -1326,6 +1327,15 @@ static void idle_worker_rebind(struct worker *worker)
1326 1327
1327 /* we did our part, wait for rebind_workers() to finish up */ 1328 /* we did our part, wait for rebind_workers() to finish up */
1328 wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND)); 1329 wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND));
1330
1331 /*
1332 * rebind_workers() shouldn't finish until all workers passed the
1333 * above WORKER_REBIND wait. Tell it when done.
1334 */
1335 spin_lock_irq(&worker->pool->gcwq->lock);
1336 if (!--worker->idle_rebind->cnt)
1337 complete(&worker->idle_rebind->done);
1338 spin_unlock_irq(&worker->pool->gcwq->lock);
1329} 1339}
1330 1340
1331/* 1341/*
@@ -1339,8 +1349,16 @@ static void busy_worker_rebind_fn(struct work_struct *work)
1339 struct worker *worker = container_of(work, struct worker, rebind_work); 1349 struct worker *worker = container_of(work, struct worker, rebind_work);
1340 struct global_cwq *gcwq = worker->pool->gcwq; 1350 struct global_cwq *gcwq = worker->pool->gcwq;
1341 1351
1342 if (worker_maybe_bind_and_lock(worker)) 1352 worker_maybe_bind_and_lock(worker);
1343 worker_clr_flags(worker, WORKER_REBIND); 1353
1354 /*
1355 * %WORKER_REBIND must be cleared even if the above binding failed;
1356 * otherwise, we may confuse the next CPU_UP cycle or oops / get
1357 * stuck by calling idle_worker_rebind() prematurely. If CPU went
1358 * down again inbetween, %WORKER_UNBOUND would be set, so clearing
1359 * %WORKER_REBIND is always safe.
1360 */
1361 worker_clr_flags(worker, WORKER_REBIND);
1344 1362
1345 spin_unlock_irq(&gcwq->lock); 1363 spin_unlock_irq(&gcwq->lock);
1346} 1364}
@@ -1396,12 +1414,15 @@ retry:
1396 /* set REBIND and kick idle ones, we'll wait for these later */ 1414 /* set REBIND and kick idle ones, we'll wait for these later */
1397 for_each_worker_pool(pool, gcwq) { 1415 for_each_worker_pool(pool, gcwq) {
1398 list_for_each_entry(worker, &pool->idle_list, entry) { 1416 list_for_each_entry(worker, &pool->idle_list, entry) {
1417 unsigned long worker_flags = worker->flags;
1418
1399 if (worker->flags & WORKER_REBIND) 1419 if (worker->flags & WORKER_REBIND)
1400 continue; 1420 continue;
1401 1421
1402 /* morph UNBOUND to REBIND */ 1422 /* morph UNBOUND to REBIND atomically */
1403 worker->flags &= ~WORKER_UNBOUND; 1423 worker_flags &= ~WORKER_UNBOUND;
1404 worker->flags |= WORKER_REBIND; 1424 worker_flags |= WORKER_REBIND;
1425 ACCESS_ONCE(worker->flags) = worker_flags;
1405 1426
1406 idle_rebind.cnt++; 1427 idle_rebind.cnt++;
1407 worker->idle_rebind = &idle_rebind; 1428 worker->idle_rebind = &idle_rebind;
@@ -1419,25 +1440,15 @@ retry:
1419 goto retry; 1440 goto retry;
1420 } 1441 }
1421 1442
1422 /* 1443 /* all idle workers are rebound, rebind busy workers */
1423 * All idle workers are rebound and waiting for %WORKER_REBIND to
1424 * be cleared inside idle_worker_rebind(). Clear and release.
1425 * Clearing %WORKER_REBIND from this foreign context is safe
1426 * because these workers are still guaranteed to be idle.
1427 */
1428 for_each_worker_pool(pool, gcwq)
1429 list_for_each_entry(worker, &pool->idle_list, entry)
1430 worker->flags &= ~WORKER_REBIND;
1431
1432 wake_up_all(&gcwq->rebind_hold);
1433
1434 /* rebind busy workers */
1435 for_each_busy_worker(worker, i, pos, gcwq) { 1444 for_each_busy_worker(worker, i, pos, gcwq) {
1436 struct work_struct *rebind_work = &worker->rebind_work; 1445 struct work_struct *rebind_work = &worker->rebind_work;
1446 unsigned long worker_flags = worker->flags;
1437 1447
1438 /* morph UNBOUND to REBIND */ 1448 /* morph UNBOUND to REBIND atomically */
1439 worker->flags &= ~WORKER_UNBOUND; 1449 worker_flags &= ~WORKER_UNBOUND;
1440 worker->flags |= WORKER_REBIND; 1450 worker_flags |= WORKER_REBIND;
1451 ACCESS_ONCE(worker->flags) = worker_flags;
1441 1452
1442 if (test_and_set_bit(WORK_STRUCT_PENDING_BIT, 1453 if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
1443 work_data_bits(rebind_work))) 1454 work_data_bits(rebind_work)))
@@ -1449,6 +1460,34 @@ retry:
1449 worker->scheduled.next, 1460 worker->scheduled.next,
1450 work_color_to_flags(WORK_NO_COLOR)); 1461 work_color_to_flags(WORK_NO_COLOR));
1451 } 1462 }
1463
1464 /*
1465 * All idle workers are rebound and waiting for %WORKER_REBIND to
1466 * be cleared inside idle_worker_rebind(). Clear and release.
1467 * Clearing %WORKER_REBIND from this foreign context is safe
1468 * because these workers are still guaranteed to be idle.
1469 *
1470 * We need to make sure all idle workers passed WORKER_REBIND wait
1471 * in idle_worker_rebind() before returning; otherwise, workers can
1472 * get stuck at the wait if hotplug cycle repeats.
1473 */
1474 idle_rebind.cnt = 1;
1475 INIT_COMPLETION(idle_rebind.done);
1476
1477 for_each_worker_pool(pool, gcwq) {
1478 list_for_each_entry(worker, &pool->idle_list, entry) {
1479 worker->flags &= ~WORKER_REBIND;
1480 idle_rebind.cnt++;
1481 }
1482 }
1483
1484 wake_up_all(&gcwq->rebind_hold);
1485
1486 if (--idle_rebind.cnt) {
1487 spin_unlock_irq(&gcwq->lock);
1488 wait_for_completion(&idle_rebind.done);
1489 spin_lock_irq(&gcwq->lock);
1490 }
1452} 1491}
1453 1492
1454static struct worker *alloc_worker(void) 1493static struct worker *alloc_worker(void)
@@ -1794,9 +1833,45 @@ static bool manage_workers(struct worker *worker)
1794 struct worker_pool *pool = worker->pool; 1833 struct worker_pool *pool = worker->pool;
1795 bool ret = false; 1834 bool ret = false;
1796 1835
1797 if (!mutex_trylock(&pool->manager_mutex)) 1836 if (pool->flags & POOL_MANAGING_WORKERS)
1798 return ret; 1837 return ret;
1799 1838
1839 pool->flags |= POOL_MANAGING_WORKERS;
1840
1841 /*
1842 * To simplify both worker management and CPU hotplug, hold off
1843 * management while hotplug is in progress. CPU hotplug path can't
1844 * grab %POOL_MANAGING_WORKERS to achieve this because that can
1845 * lead to idle worker depletion (all become busy thinking someone
1846 * else is managing) which in turn can result in deadlock under
1847 * extreme circumstances. Use @pool->manager_mutex to synchronize
1848 * manager against CPU hotplug.
1849 *
1850 * manager_mutex would always be free unless CPU hotplug is in
1851 * progress. trylock first without dropping @gcwq->lock.
1852 */
1853 if (unlikely(!mutex_trylock(&pool->manager_mutex))) {
1854 spin_unlock_irq(&pool->gcwq->lock);
1855 mutex_lock(&pool->manager_mutex);
1856 /*
1857 * CPU hotplug could have happened while we were waiting
1858 * for manager_mutex. Hotplug itself can't handle us
1859 * because manager isn't either on idle or busy list, and
1860 * @gcwq's state and ours could have deviated.
1861 *
1862 * As hotplug is now excluded via manager_mutex, we can
1863 * simply try to bind. It will succeed or fail depending
1864 * on @gcwq's current state. Try it and adjust
1865 * %WORKER_UNBOUND accordingly.
1866 */
1867 if (worker_maybe_bind_and_lock(worker))
1868 worker->flags &= ~WORKER_UNBOUND;
1869 else
1870 worker->flags |= WORKER_UNBOUND;
1871
1872 ret = true;
1873 }
1874
1800 pool->flags &= ~POOL_MANAGE_WORKERS; 1875 pool->flags &= ~POOL_MANAGE_WORKERS;
1801 1876
1802 /* 1877 /*
@@ -1806,6 +1881,7 @@ static bool manage_workers(struct worker *worker)
1806 ret |= maybe_destroy_workers(pool); 1881 ret |= maybe_destroy_workers(pool);
1807 ret |= maybe_create_worker(pool); 1882 ret |= maybe_create_worker(pool);
1808 1883
1884 pool->flags &= ~POOL_MANAGING_WORKERS;
1809 mutex_unlock(&pool->manager_mutex); 1885 mutex_unlock(&pool->manager_mutex);
1810 return ret; 1886 return ret;
1811} 1887}
@@ -3500,18 +3576,17 @@ static int __devinit workqueue_cpu_down_callback(struct notifier_block *nfb,
3500#ifdef CONFIG_SMP 3576#ifdef CONFIG_SMP
3501 3577
3502struct work_for_cpu { 3578struct work_for_cpu {
3503 struct completion completion; 3579 struct work_struct work;
3504 long (*fn)(void *); 3580 long (*fn)(void *);
3505 void *arg; 3581 void *arg;
3506 long ret; 3582 long ret;
3507}; 3583};
3508 3584
3509static int do_work_for_cpu(void *_wfc) 3585static void work_for_cpu_fn(struct work_struct *work)
3510{ 3586{
3511 struct work_for_cpu *wfc = _wfc; 3587 struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work);
3588
3512 wfc->ret = wfc->fn(wfc->arg); 3589 wfc->ret = wfc->fn(wfc->arg);
3513 complete(&wfc->completion);
3514 return 0;
3515} 3590}
3516 3591
3517/** 3592/**
@@ -3526,19 +3601,11 @@ static int do_work_for_cpu(void *_wfc)
3526 */ 3601 */
3527long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) 3602long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
3528{ 3603{
3529 struct task_struct *sub_thread; 3604 struct work_for_cpu wfc = { .fn = fn, .arg = arg };
3530 struct work_for_cpu wfc = {
3531 .completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion),
3532 .fn = fn,
3533 .arg = arg,
3534 };
3535 3605
3536 sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu"); 3606 INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
3537 if (IS_ERR(sub_thread)) 3607 schedule_work_on(cpu, &wfc.work);
3538 return PTR_ERR(sub_thread); 3608 flush_work(&wfc.work);
3539 kthread_bind(sub_thread, cpu);
3540 wake_up_process(sub_thread);
3541 wait_for_completion(&wfc.completion);
3542 return wfc.ret; 3609 return wfc.ret;
3543} 3610}
3544EXPORT_SYMBOL_GPL(work_on_cpu); 3611EXPORT_SYMBOL_GPL(work_on_cpu);