aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2016-09-26 15:47:03 -0400
committerThomas Gleixner <tglx@linutronix.de>2016-09-26 15:47:03 -0400
commit1e1b37273cf719545da50b76f214f983a710aaf4 (patch)
tree033f6062325ef7aaeefe8559bb409ab7d2be3c76 /kernel
parentc183a603e8d8a5a189729b77d0c623a3d5950e5f (diff)
parentc291b015158577be533dd5a959dfc09bab119eed (diff)
Merge branch 'x86/urgent' into x86/apic
Bring in the upstream modifications so we can fixup the silent merge conflict which is introduced by this merge. Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit_watch.c8
-rw-r--r--kernel/bpf/hashtab.c84
-rw-r--r--kernel/bpf/verifier.c7
-rw-r--r--kernel/configs/tiny.config8
-rw-r--r--kernel/cpuset.c15
-rw-r--r--kernel/events/core.c97
-rw-r--r--kernel/events/uprobes.c5
-rw-r--r--kernel/exit.c7
-rw-r--r--kernel/fork.c37
-rw-r--r--kernel/irq/affinity.c2
-rw-r--r--kernel/irq/chip.c11
-rw-r--r--kernel/irq/manage.c8
-rw-r--r--kernel/kexec_file.c3
-rw-r--r--kernel/power/snapshot.c10
-rw-r--r--kernel/printk/braille.c4
-rw-r--r--kernel/printk/nmi.c38
-rw-r--r--kernel/sched/cputime.c33
-rw-r--r--kernel/seccomp.c12
-rw-r--r--kernel/sysctl.c45
-rw-r--r--kernel/time/tick-sched.c3
-rw-r--r--kernel/time/timekeeping.c5
-rw-r--r--kernel/time/timekeeping_debug.c9
-rw-r--r--kernel/trace/blktrace.c2
23 files changed, 349 insertions, 104 deletions
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index d6709eb70970..0d302a87f21b 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -19,6 +19,7 @@
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */ 20 */
21 21
22#include <linux/file.h>
22#include <linux/kernel.h> 23#include <linux/kernel.h>
23#include <linux/audit.h> 24#include <linux/audit.h>
24#include <linux/kthread.h> 25#include <linux/kthread.h>
@@ -544,10 +545,11 @@ int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark)
544 unsigned long ino; 545 unsigned long ino;
545 dev_t dev; 546 dev_t dev;
546 547
547 rcu_read_lock(); 548 exe_file = get_task_exe_file(tsk);
548 exe_file = rcu_dereference(tsk->mm->exe_file); 549 if (!exe_file)
550 return 0;
549 ino = exe_file->f_inode->i_ino; 551 ino = exe_file->f_inode->i_ino;
550 dev = exe_file->f_inode->i_sb->s_dev; 552 dev = exe_file->f_inode->i_sb->s_dev;
551 rcu_read_unlock(); 553 fput(exe_file);
552 return audit_mark_compare(mark, ino, dev); 554 return audit_mark_compare(mark, ino, dev);
553} 555}
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index fff3650d52fc..570eeca7bdfa 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -26,11 +26,18 @@ struct bpf_htab {
26 struct bucket *buckets; 26 struct bucket *buckets;
27 void *elems; 27 void *elems;
28 struct pcpu_freelist freelist; 28 struct pcpu_freelist freelist;
29 void __percpu *extra_elems;
29 atomic_t count; /* number of elements in this hashtable */ 30 atomic_t count; /* number of elements in this hashtable */
30 u32 n_buckets; /* number of hash buckets */ 31 u32 n_buckets; /* number of hash buckets */
31 u32 elem_size; /* size of each element in bytes */ 32 u32 elem_size; /* size of each element in bytes */
32}; 33};
33 34
35enum extra_elem_state {
36 HTAB_NOT_AN_EXTRA_ELEM = 0,
37 HTAB_EXTRA_ELEM_FREE,
38 HTAB_EXTRA_ELEM_USED
39};
40
34/* each htab element is struct htab_elem + key + value */ 41/* each htab element is struct htab_elem + key + value */
35struct htab_elem { 42struct htab_elem {
36 union { 43 union {
@@ -38,7 +45,10 @@ struct htab_elem {
38 struct bpf_htab *htab; 45 struct bpf_htab *htab;
39 struct pcpu_freelist_node fnode; 46 struct pcpu_freelist_node fnode;
40 }; 47 };
41 struct rcu_head rcu; 48 union {
49 struct rcu_head rcu;
50 enum extra_elem_state state;
51 };
42 u32 hash; 52 u32 hash;
43 char key[0] __aligned(8); 53 char key[0] __aligned(8);
44}; 54};
@@ -113,6 +123,23 @@ free_elems:
113 return err; 123 return err;
114} 124}
115 125
126static int alloc_extra_elems(struct bpf_htab *htab)
127{
128 void __percpu *pptr;
129 int cpu;
130
131 pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN);
132 if (!pptr)
133 return -ENOMEM;
134
135 for_each_possible_cpu(cpu) {
136 ((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state =
137 HTAB_EXTRA_ELEM_FREE;
138 }
139 htab->extra_elems = pptr;
140 return 0;
141}
142
116/* Called from syscall */ 143/* Called from syscall */
117static struct bpf_map *htab_map_alloc(union bpf_attr *attr) 144static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
118{ 145{
@@ -185,6 +212,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
185 if (percpu) 212 if (percpu)
186 cost += (u64) round_up(htab->map.value_size, 8) * 213 cost += (u64) round_up(htab->map.value_size, 8) *
187 num_possible_cpus() * htab->map.max_entries; 214 num_possible_cpus() * htab->map.max_entries;
215 else
216 cost += (u64) htab->elem_size * num_possible_cpus();
188 217
189 if (cost >= U32_MAX - PAGE_SIZE) 218 if (cost >= U32_MAX - PAGE_SIZE)
190 /* make sure page count doesn't overflow */ 219 /* make sure page count doesn't overflow */
@@ -212,14 +241,22 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
212 raw_spin_lock_init(&htab->buckets[i].lock); 241 raw_spin_lock_init(&htab->buckets[i].lock);
213 } 242 }
214 243
244 if (!percpu) {
245 err = alloc_extra_elems(htab);
246 if (err)
247 goto free_buckets;
248 }
249
215 if (!(attr->map_flags & BPF_F_NO_PREALLOC)) { 250 if (!(attr->map_flags & BPF_F_NO_PREALLOC)) {
216 err = prealloc_elems_and_freelist(htab); 251 err = prealloc_elems_and_freelist(htab);
217 if (err) 252 if (err)
218 goto free_buckets; 253 goto free_extra_elems;
219 } 254 }
220 255
221 return &htab->map; 256 return &htab->map;
222 257
258free_extra_elems:
259 free_percpu(htab->extra_elems);
223free_buckets: 260free_buckets:
224 kvfree(htab->buckets); 261 kvfree(htab->buckets);
225free_htab: 262free_htab:
@@ -349,7 +386,6 @@ static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
349 if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) 386 if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
350 free_percpu(htab_elem_get_ptr(l, htab->map.key_size)); 387 free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
351 kfree(l); 388 kfree(l);
352
353} 389}
354 390
355static void htab_elem_free_rcu(struct rcu_head *head) 391static void htab_elem_free_rcu(struct rcu_head *head)
@@ -370,6 +406,11 @@ static void htab_elem_free_rcu(struct rcu_head *head)
370 406
371static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) 407static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
372{ 408{
409 if (l->state == HTAB_EXTRA_ELEM_USED) {
410 l->state = HTAB_EXTRA_ELEM_FREE;
411 return;
412 }
413
373 if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) { 414 if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) {
374 pcpu_freelist_push(&htab->freelist, &l->fnode); 415 pcpu_freelist_push(&htab->freelist, &l->fnode);
375 } else { 416 } else {
@@ -381,25 +422,44 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
381 422
382static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, 423static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
383 void *value, u32 key_size, u32 hash, 424 void *value, u32 key_size, u32 hash,
384 bool percpu, bool onallcpus) 425 bool percpu, bool onallcpus,
426 bool old_elem_exists)
385{ 427{
386 u32 size = htab->map.value_size; 428 u32 size = htab->map.value_size;
387 bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC); 429 bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC);
388 struct htab_elem *l_new; 430 struct htab_elem *l_new;
389 void __percpu *pptr; 431 void __percpu *pptr;
432 int err = 0;
390 433
391 if (prealloc) { 434 if (prealloc) {
392 l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist); 435 l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist);
393 if (!l_new) 436 if (!l_new)
394 return ERR_PTR(-E2BIG); 437 err = -E2BIG;
395 } else { 438 } else {
396 if (atomic_inc_return(&htab->count) > htab->map.max_entries) { 439 if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
397 atomic_dec(&htab->count); 440 atomic_dec(&htab->count);
398 return ERR_PTR(-E2BIG); 441 err = -E2BIG;
442 } else {
443 l_new = kmalloc(htab->elem_size,
444 GFP_ATOMIC | __GFP_NOWARN);
445 if (!l_new)
446 return ERR_PTR(-ENOMEM);
399 } 447 }
400 l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN); 448 }
401 if (!l_new) 449
402 return ERR_PTR(-ENOMEM); 450 if (err) {
451 if (!old_elem_exists)
452 return ERR_PTR(err);
453
454 /* if we're updating the existing element and the hash table
455 * is full, use per-cpu extra elems
456 */
457 l_new = this_cpu_ptr(htab->extra_elems);
458 if (l_new->state != HTAB_EXTRA_ELEM_FREE)
459 return ERR_PTR(-E2BIG);
460 l_new->state = HTAB_EXTRA_ELEM_USED;
461 } else {
462 l_new->state = HTAB_NOT_AN_EXTRA_ELEM;
403 } 463 }
404 464
405 memcpy(l_new->key, key, key_size); 465 memcpy(l_new->key, key, key_size);
@@ -489,7 +549,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
489 if (ret) 549 if (ret)
490 goto err; 550 goto err;
491 551
492 l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false); 552 l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
553 !!l_old);
493 if (IS_ERR(l_new)) { 554 if (IS_ERR(l_new)) {
494 /* all pre-allocated elements are in use or memory exhausted */ 555 /* all pre-allocated elements are in use or memory exhausted */
495 ret = PTR_ERR(l_new); 556 ret = PTR_ERR(l_new);
@@ -563,7 +624,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
563 } 624 }
564 } else { 625 } else {
565 l_new = alloc_htab_elem(htab, key, value, key_size, 626 l_new = alloc_htab_elem(htab, key, value, key_size,
566 hash, true, onallcpus); 627 hash, true, onallcpus, false);
567 if (IS_ERR(l_new)) { 628 if (IS_ERR(l_new)) {
568 ret = PTR_ERR(l_new); 629 ret = PTR_ERR(l_new);
569 goto err; 630 goto err;
@@ -652,6 +713,7 @@ static void htab_map_free(struct bpf_map *map)
652 htab_free_elems(htab); 713 htab_free_elems(htab);
653 pcpu_freelist_destroy(&htab->freelist); 714 pcpu_freelist_destroy(&htab->freelist);
654 } 715 }
716 free_percpu(htab->extra_elems);
655 kvfree(htab->buckets); 717 kvfree(htab->buckets);
656 kfree(htab); 718 kfree(htab);
657} 719}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f72f23b8fdab..daea765d72e6 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -194,6 +194,7 @@ struct verifier_env {
194 struct verifier_state_list **explored_states; /* search pruning optimization */ 194 struct verifier_state_list **explored_states; /* search pruning optimization */
195 struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ 195 struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
196 u32 used_map_cnt; /* number of used maps */ 196 u32 used_map_cnt; /* number of used maps */
197 u32 id_gen; /* used to generate unique reg IDs */
197 bool allow_ptr_leaks; 198 bool allow_ptr_leaks;
198}; 199};
199 200
@@ -1052,7 +1053,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
1052 goto error; 1053 goto error;
1053 break; 1054 break;
1054 case BPF_MAP_TYPE_CGROUP_ARRAY: 1055 case BPF_MAP_TYPE_CGROUP_ARRAY:
1055 if (func_id != BPF_FUNC_skb_in_cgroup) 1056 if (func_id != BPF_FUNC_skb_under_cgroup)
1056 goto error; 1057 goto error;
1057 break; 1058 break;
1058 default: 1059 default:
@@ -1074,7 +1075,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
1074 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE) 1075 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
1075 goto error; 1076 goto error;
1076 break; 1077 break;
1077 case BPF_FUNC_skb_in_cgroup: 1078 case BPF_FUNC_skb_under_cgroup:
1078 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY) 1079 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
1079 goto error; 1080 goto error;
1080 break; 1081 break;
@@ -1301,7 +1302,7 @@ add_imm:
1301 /* dst_reg stays as pkt_ptr type and since some positive 1302 /* dst_reg stays as pkt_ptr type and since some positive
1302 * integer value was added to the pointer, increment its 'id' 1303 * integer value was added to the pointer, increment its 'id'
1303 */ 1304 */
1304 dst_reg->id++; 1305 dst_reg->id = ++env->id_gen;
1305 1306
1306 /* something was added to pkt_ptr, set range and off to zero */ 1307 /* something was added to pkt_ptr, set range and off to zero */
1307 dst_reg->off = 0; 1308 dst_reg->off = 0;
diff --git a/kernel/configs/tiny.config b/kernel/configs/tiny.config
index c2de56ab0fce..7fa0c4ae6394 100644
--- a/kernel/configs/tiny.config
+++ b/kernel/configs/tiny.config
@@ -1,4 +1,12 @@
1# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE is not set
1CONFIG_CC_OPTIMIZE_FOR_SIZE=y 2CONFIG_CC_OPTIMIZE_FOR_SIZE=y
3# CONFIG_KERNEL_GZIP is not set
4# CONFIG_KERNEL_BZIP2 is not set
5# CONFIG_KERNEL_LZMA is not set
2CONFIG_KERNEL_XZ=y 6CONFIG_KERNEL_XZ=y
7# CONFIG_KERNEL_LZO is not set
8# CONFIG_KERNEL_LZ4 is not set
3CONFIG_OPTIMIZE_INLINING=y 9CONFIG_OPTIMIZE_INLINING=y
10# CONFIG_SLAB is not set
11# CONFIG_SLUB is not set
4CONFIG_SLOB=y 12CONFIG_SLOB=y
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index c7fd2778ed50..c27e53326bef 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2069,6 +2069,20 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
2069 mutex_unlock(&cpuset_mutex); 2069 mutex_unlock(&cpuset_mutex);
2070} 2070}
2071 2071
2072/*
2073 * Make sure the new task conform to the current state of its parent,
2074 * which could have been changed by cpuset just after it inherits the
2075 * state from the parent and before it sits on the cgroup's task list.
2076 */
2077void cpuset_fork(struct task_struct *task)
2078{
2079 if (task_css_is_root(task, cpuset_cgrp_id))
2080 return;
2081
2082 set_cpus_allowed_ptr(task, &current->cpus_allowed);
2083 task->mems_allowed = current->mems_allowed;
2084}
2085
2072struct cgroup_subsys cpuset_cgrp_subsys = { 2086struct cgroup_subsys cpuset_cgrp_subsys = {
2073 .css_alloc = cpuset_css_alloc, 2087 .css_alloc = cpuset_css_alloc,
2074 .css_online = cpuset_css_online, 2088 .css_online = cpuset_css_online,
@@ -2079,6 +2093,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
2079 .attach = cpuset_attach, 2093 .attach = cpuset_attach,
2080 .post_attach = cpuset_post_attach, 2094 .post_attach = cpuset_post_attach,
2081 .bind = cpuset_bind, 2095 .bind = cpuset_bind,
2096 .fork = cpuset_fork,
2082 .legacy_cftypes = files, 2097 .legacy_cftypes = files,
2083 .early_init = true, 2098 .early_init = true,
2084}; 2099};
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1903b8f3a705..3cfabdf7b942 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -242,18 +242,6 @@ unlock:
242 return ret; 242 return ret;
243} 243}
244 244
245static void event_function_local(struct perf_event *event, event_f func, void *data)
246{
247 struct event_function_struct efs = {
248 .event = event,
249 .func = func,
250 .data = data,
251 };
252
253 int ret = event_function(&efs);
254 WARN_ON_ONCE(ret);
255}
256
257static void event_function_call(struct perf_event *event, event_f func, void *data) 245static void event_function_call(struct perf_event *event, event_f func, void *data)
258{ 246{
259 struct perf_event_context *ctx = event->ctx; 247 struct perf_event_context *ctx = event->ctx;
@@ -303,6 +291,54 @@ again:
303 raw_spin_unlock_irq(&ctx->lock); 291 raw_spin_unlock_irq(&ctx->lock);
304} 292}
305 293
294/*
295 * Similar to event_function_call() + event_function(), but hard assumes IRQs
296 * are already disabled and we're on the right CPU.
297 */
298static void event_function_local(struct perf_event *event, event_f func, void *data)
299{
300 struct perf_event_context *ctx = event->ctx;
301 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
302 struct task_struct *task = READ_ONCE(ctx->task);
303 struct perf_event_context *task_ctx = NULL;
304
305 WARN_ON_ONCE(!irqs_disabled());
306
307 if (task) {
308 if (task == TASK_TOMBSTONE)
309 return;
310
311 task_ctx = ctx;
312 }
313
314 perf_ctx_lock(cpuctx, task_ctx);
315
316 task = ctx->task;
317 if (task == TASK_TOMBSTONE)
318 goto unlock;
319
320 if (task) {
321 /*
322 * We must be either inactive or active and the right task,
323 * otherwise we're screwed, since we cannot IPI to somewhere
324 * else.
325 */
326 if (ctx->is_active) {
327 if (WARN_ON_ONCE(task != current))
328 goto unlock;
329
330 if (WARN_ON_ONCE(cpuctx->task_ctx != ctx))
331 goto unlock;
332 }
333 } else {
334 WARN_ON_ONCE(&cpuctx->ctx != ctx);
335 }
336
337 func(event, cpuctx, ctx, data);
338unlock:
339 perf_ctx_unlock(cpuctx, task_ctx);
340}
341
306#define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\ 342#define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
307 PERF_FLAG_FD_OUTPUT |\ 343 PERF_FLAG_FD_OUTPUT |\
308 PERF_FLAG_PID_CGROUP |\ 344 PERF_FLAG_PID_CGROUP |\
@@ -3513,9 +3549,10 @@ static int perf_event_read(struct perf_event *event, bool group)
3513 .group = group, 3549 .group = group,
3514 .ret = 0, 3550 .ret = 0,
3515 }; 3551 };
3516 smp_call_function_single(event->oncpu, 3552 ret = smp_call_function_single(event->oncpu, __perf_event_read, &data, 1);
3517 __perf_event_read, &data, 1); 3553 /* The event must have been read from an online CPU: */
3518 ret = data.ret; 3554 WARN_ON_ONCE(ret);
3555 ret = ret ? : data.ret;
3519 } else if (event->state == PERF_EVENT_STATE_INACTIVE) { 3556 } else if (event->state == PERF_EVENT_STATE_INACTIVE) {
3520 struct perf_event_context *ctx = event->ctx; 3557 struct perf_event_context *ctx = event->ctx;
3521 unsigned long flags; 3558 unsigned long flags;
@@ -6129,7 +6166,7 @@ static int __perf_pmu_output_stop(void *info)
6129{ 6166{
6130 struct perf_event *event = info; 6167 struct perf_event *event = info;
6131 struct pmu *pmu = event->pmu; 6168 struct pmu *pmu = event->pmu;
6132 struct perf_cpu_context *cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); 6169 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
6133 struct remote_output ro = { 6170 struct remote_output ro = {
6134 .rb = event->rb, 6171 .rb = event->rb,
6135 }; 6172 };
@@ -6584,15 +6621,6 @@ got_name:
6584} 6621}
6585 6622
6586/* 6623/*
6587 * Whether this @filter depends on a dynamic object which is not loaded
6588 * yet or its load addresses are not known.
6589 */
6590static bool perf_addr_filter_needs_mmap(struct perf_addr_filter *filter)
6591{
6592 return filter->filter && filter->inode;
6593}
6594
6595/*
6596 * Check whether inode and address range match filter criteria. 6624 * Check whether inode and address range match filter criteria.
6597 */ 6625 */
6598static bool perf_addr_filter_match(struct perf_addr_filter *filter, 6626static bool perf_addr_filter_match(struct perf_addr_filter *filter,
@@ -6653,6 +6681,13 @@ static void perf_addr_filters_adjust(struct vm_area_struct *vma)
6653 struct perf_event_context *ctx; 6681 struct perf_event_context *ctx;
6654 int ctxn; 6682 int ctxn;
6655 6683
6684 /*
6685 * Data tracing isn't supported yet and as such there is no need
6686 * to keep track of anything that isn't related to executable code:
6687 */
6688 if (!(vma->vm_flags & VM_EXEC))
6689 return;
6690
6656 rcu_read_lock(); 6691 rcu_read_lock();
6657 for_each_task_context_nr(ctxn) { 6692 for_each_task_context_nr(ctxn) {
6658 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); 6693 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
@@ -7805,7 +7840,11 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
7805 list_for_each_entry(filter, &ifh->list, entry) { 7840 list_for_each_entry(filter, &ifh->list, entry) {
7806 event->addr_filters_offs[count] = 0; 7841 event->addr_filters_offs[count] = 0;
7807 7842
7808 if (perf_addr_filter_needs_mmap(filter)) 7843 /*
7844 * Adjust base offset if the filter is associated to a binary
7845 * that needs to be mapped:
7846 */
7847 if (filter->inode)
7809 event->addr_filters_offs[count] = 7848 event->addr_filters_offs[count] =
7810 perf_addr_filter_apply(filter, mm); 7849 perf_addr_filter_apply(filter, mm);
7811 7850
@@ -7936,8 +7975,10 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
7936 goto fail; 7975 goto fail;
7937 } 7976 }
7938 7977
7939 if (token == IF_SRC_FILE) { 7978 if (token == IF_SRC_FILE || token == IF_SRC_FILEADDR) {
7940 filename = match_strdup(&args[2]); 7979 int fpos = filter->range ? 2 : 1;
7980
7981 filename = match_strdup(&args[fpos]);
7941 if (!filename) { 7982 if (!filename) {
7942 ret = -ENOMEM; 7983 ret = -ENOMEM;
7943 goto fail; 7984 goto fail;
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index b7a525ab2083..8c50276b60d1 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -172,8 +172,10 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
172 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); 172 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
173 err = -EAGAIN; 173 err = -EAGAIN;
174 ptep = page_check_address(page, mm, addr, &ptl, 0); 174 ptep = page_check_address(page, mm, addr, &ptl, 0);
175 if (!ptep) 175 if (!ptep) {
176 mem_cgroup_cancel_charge(kpage, memcg, false);
176 goto unlock; 177 goto unlock;
178 }
177 179
178 get_page(kpage); 180 get_page(kpage);
179 page_add_new_anon_rmap(kpage, vma, addr, false); 181 page_add_new_anon_rmap(kpage, vma, addr, false);
@@ -200,7 +202,6 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
200 202
201 err = 0; 203 err = 0;
202 unlock: 204 unlock:
203 mem_cgroup_cancel_charge(kpage, memcg, false);
204 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); 205 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
205 unlock_page(page); 206 unlock_page(page);
206 return err; 207 return err;
diff --git a/kernel/exit.c b/kernel/exit.c
index 2f974ae042a6..091a78be3b09 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -848,12 +848,7 @@ void do_exit(long code)
848 TASKS_RCU(preempt_enable()); 848 TASKS_RCU(preempt_enable());
849 exit_notify(tsk, group_dead); 849 exit_notify(tsk, group_dead);
850 proc_exit_connector(tsk); 850 proc_exit_connector(tsk);
851#ifdef CONFIG_NUMA 851 mpol_put_task_policy(tsk);
852 task_lock(tsk);
853 mpol_put(tsk->mempolicy);
854 tsk->mempolicy = NULL;
855 task_unlock(tsk);
856#endif
857#ifdef CONFIG_FUTEX 852#ifdef CONFIG_FUTEX
858 if (unlikely(current->pi_state_cache)) 853 if (unlikely(current->pi_state_cache))
859 kfree(current->pi_state_cache); 854 kfree(current->pi_state_cache);
diff --git a/kernel/fork.c b/kernel/fork.c
index 52e725d4a866..beb31725f7e2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -799,6 +799,29 @@ struct file *get_mm_exe_file(struct mm_struct *mm)
799EXPORT_SYMBOL(get_mm_exe_file); 799EXPORT_SYMBOL(get_mm_exe_file);
800 800
801/** 801/**
802 * get_task_exe_file - acquire a reference to the task's executable file
803 *
804 * Returns %NULL if task's mm (if any) has no associated executable file or
805 * this is a kernel thread with borrowed mm (see the comment above get_task_mm).
806 * User must release file via fput().
807 */
808struct file *get_task_exe_file(struct task_struct *task)
809{
810 struct file *exe_file = NULL;
811 struct mm_struct *mm;
812
813 task_lock(task);
814 mm = task->mm;
815 if (mm) {
816 if (!(task->flags & PF_KTHREAD))
817 exe_file = get_mm_exe_file(mm);
818 }
819 task_unlock(task);
820 return exe_file;
821}
822EXPORT_SYMBOL(get_task_exe_file);
823
824/**
802 * get_task_mm - acquire a reference to the task's mm 825 * get_task_mm - acquire a reference to the task's mm
803 * 826 *
804 * Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning 827 * Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning
@@ -913,14 +936,12 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
913 deactivate_mm(tsk, mm); 936 deactivate_mm(tsk, mm);
914 937
915 /* 938 /*
916 * If we're exiting normally, clear a user-space tid field if 939 * Signal userspace if we're not exiting with a core dump
917 * requested. We leave this alone when dying by signal, to leave 940 * because we want to leave the value intact for debugging
918 * the value intact in a core dump, and to save the unnecessary 941 * purposes.
919 * trouble, say, a killed vfork parent shouldn't touch this mm.
920 * Userland only wants this done for a sys_exit.
921 */ 942 */
922 if (tsk->clear_child_tid) { 943 if (tsk->clear_child_tid) {
923 if (!(tsk->flags & PF_SIGNALED) && 944 if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) &&
924 atomic_read(&mm->mm_users) > 1) { 945 atomic_read(&mm->mm_users) > 1) {
925 /* 946 /*
926 * We don't check the error code - if userspace has 947 * We don't check the error code - if userspace has
@@ -1404,7 +1425,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1404 p->real_start_time = ktime_get_boot_ns(); 1425 p->real_start_time = ktime_get_boot_ns();
1405 p->io_context = NULL; 1426 p->io_context = NULL;
1406 p->audit_context = NULL; 1427 p->audit_context = NULL;
1407 threadgroup_change_begin(current);
1408 cgroup_fork(p); 1428 cgroup_fork(p);
1409#ifdef CONFIG_NUMA 1429#ifdef CONFIG_NUMA
1410 p->mempolicy = mpol_dup(p->mempolicy); 1430 p->mempolicy = mpol_dup(p->mempolicy);
@@ -1556,6 +1576,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1556 INIT_LIST_HEAD(&p->thread_group); 1576 INIT_LIST_HEAD(&p->thread_group);
1557 p->task_works = NULL; 1577 p->task_works = NULL;
1558 1578
1579 threadgroup_change_begin(current);
1559 /* 1580 /*
1560 * Ensure that the cgroup subsystem policies allow the new process to be 1581 * Ensure that the cgroup subsystem policies allow the new process to be
1561 * forked. It should be noted the the new process's css_set can be changed 1582 * forked. It should be noted the the new process's css_set can be changed
@@ -1656,6 +1677,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1656bad_fork_cancel_cgroup: 1677bad_fork_cancel_cgroup:
1657 cgroup_cancel_fork(p); 1678 cgroup_cancel_fork(p);
1658bad_fork_free_pid: 1679bad_fork_free_pid:
1680 threadgroup_change_end(current);
1659 if (pid != &init_struct_pid) 1681 if (pid != &init_struct_pid)
1660 free_pid(pid); 1682 free_pid(pid);
1661bad_fork_cleanup_thread: 1683bad_fork_cleanup_thread:
@@ -1688,7 +1710,6 @@ bad_fork_cleanup_policy:
1688 mpol_put(p->mempolicy); 1710 mpol_put(p->mempolicy);
1689bad_fork_cleanup_threadgroup_lock: 1711bad_fork_cleanup_threadgroup_lock:
1690#endif 1712#endif
1691 threadgroup_change_end(current);
1692 delayacct_tsk_free(p); 1713 delayacct_tsk_free(p);
1693bad_fork_cleanup_count: 1714bad_fork_cleanup_count:
1694 atomic_dec(&p->cred->user->processes); 1715 atomic_dec(&p->cred->user->processes);
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index f68959341c0f..32f6cfcff212 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -39,6 +39,7 @@ struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs)
39 return NULL; 39 return NULL;
40 } 40 }
41 41
42 get_online_cpus();
42 if (max_vecs >= num_online_cpus()) { 43 if (max_vecs >= num_online_cpus()) {
43 cpumask_copy(affinity_mask, cpu_online_mask); 44 cpumask_copy(affinity_mask, cpu_online_mask);
44 *nr_vecs = num_online_cpus(); 45 *nr_vecs = num_online_cpus();
@@ -56,6 +57,7 @@ struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs)
56 } 57 }
57 *nr_vecs = vecs; 58 *nr_vecs = vecs;
58 } 59 }
60 put_online_cpus();
59 61
60 return affinity_mask; 62 return affinity_mask;
61} 63}
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index b4c1bc7c9ca2..637389088b3f 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -820,6 +820,17 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle,
820 desc->name = name; 820 desc->name = name;
821 821
822 if (handle != handle_bad_irq && is_chained) { 822 if (handle != handle_bad_irq && is_chained) {
823 /*
824 * We're about to start this interrupt immediately,
825 * hence the need to set the trigger configuration.
826 * But the .set_type callback may have overridden the
827 * flow handler, ignoring that we're dealing with a
828 * chained interrupt. Reset it immediately because we
829 * do know better.
830 */
831 __irq_set_trigger(desc, irqd_get_trigger_type(&desc->irq_data));
832 desc->handle_irq = handle;
833
823 irq_settings_set_noprobe(desc); 834 irq_settings_set_noprobe(desc);
824 irq_settings_set_norequest(desc); 835 irq_settings_set_norequest(desc);
825 irq_settings_set_nothread(desc); 836 irq_settings_set_nothread(desc);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 73a2b786b5e9..9530fcd27704 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1681,8 +1681,10 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
1681 action->dev_id = dev_id; 1681 action->dev_id = dev_id;
1682 1682
1683 retval = irq_chip_pm_get(&desc->irq_data); 1683 retval = irq_chip_pm_get(&desc->irq_data);
1684 if (retval < 0) 1684 if (retval < 0) {
1685 kfree(action);
1685 return retval; 1686 return retval;
1687 }
1686 1688
1687 chip_bus_lock(desc); 1689 chip_bus_lock(desc);
1688 retval = __setup_irq(irq, desc, action); 1690 retval = __setup_irq(irq, desc, action);
@@ -1985,8 +1987,10 @@ int request_percpu_irq(unsigned int irq, irq_handler_t handler,
1985 action->percpu_dev_id = dev_id; 1987 action->percpu_dev_id = dev_id;
1986 1988
1987 retval = irq_chip_pm_get(&desc->irq_data); 1989 retval = irq_chip_pm_get(&desc->irq_data);
1988 if (retval < 0) 1990 if (retval < 0) {
1991 kfree(action);
1989 return retval; 1992 return retval;
1993 }
1990 1994
1991 chip_bus_lock(desc); 1995 chip_bus_lock(desc);
1992 retval = __setup_irq(irq, desc, action); 1996 retval = __setup_irq(irq, desc, action);
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 503bc2d348e5..037c321c5618 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -887,7 +887,10 @@ int kexec_load_purgatory(struct kimage *image, unsigned long min,
887 return 0; 887 return 0;
888out: 888out:
889 vfree(pi->sechdrs); 889 vfree(pi->sechdrs);
890 pi->sechdrs = NULL;
891
890 vfree(pi->purgatory_buf); 892 vfree(pi->purgatory_buf);
893 pi->purgatory_buf = NULL;
891 return ret; 894 return ret;
892} 895}
893 896
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 9a0178c2ac1d..b02228411d57 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -835,9 +835,9 @@ static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
835 */ 835 */
836static bool rtree_next_node(struct memory_bitmap *bm) 836static bool rtree_next_node(struct memory_bitmap *bm)
837{ 837{
838 bm->cur.node = list_entry(bm->cur.node->list.next, 838 if (!list_is_last(&bm->cur.node->list, &bm->cur.zone->leaves)) {
839 struct rtree_node, list); 839 bm->cur.node = list_entry(bm->cur.node->list.next,
840 if (&bm->cur.node->list != &bm->cur.zone->leaves) { 840 struct rtree_node, list);
841 bm->cur.node_pfn += BM_BITS_PER_BLOCK; 841 bm->cur.node_pfn += BM_BITS_PER_BLOCK;
842 bm->cur.node_bit = 0; 842 bm->cur.node_bit = 0;
843 touch_softlockup_watchdog(); 843 touch_softlockup_watchdog();
@@ -845,9 +845,9 @@ static bool rtree_next_node(struct memory_bitmap *bm)
845 } 845 }
846 846
847 /* No more nodes, goto next zone */ 847 /* No more nodes, goto next zone */
848 bm->cur.zone = list_entry(bm->cur.zone->list.next, 848 if (!list_is_last(&bm->cur.zone->list, &bm->zones)) {
849 bm->cur.zone = list_entry(bm->cur.zone->list.next,
849 struct mem_zone_bm_rtree, list); 850 struct mem_zone_bm_rtree, list);
850 if (&bm->cur.zone->list != &bm->zones) {
851 bm->cur.node = list_entry(bm->cur.zone->leaves.next, 851 bm->cur.node = list_entry(bm->cur.zone->leaves.next,
852 struct rtree_node, list); 852 struct rtree_node, list);
853 bm->cur.node_pfn = 0; 853 bm->cur.node_pfn = 0;
diff --git a/kernel/printk/braille.c b/kernel/printk/braille.c
index 276762f3a460..d5760c42f042 100644
--- a/kernel/printk/braille.c
+++ b/kernel/printk/braille.c
@@ -9,10 +9,10 @@
9 9
10char *_braille_console_setup(char **str, char **brl_options) 10char *_braille_console_setup(char **str, char **brl_options)
11{ 11{
12 if (!memcmp(*str, "brl,", 4)) { 12 if (!strncmp(*str, "brl,", 4)) {
13 *brl_options = ""; 13 *brl_options = "";
14 *str += 4; 14 *str += 4;
15 } else if (!memcmp(str, "brl=", 4)) { 15 } else if (!strncmp(*str, "brl=", 4)) {
16 *brl_options = *str + 4; 16 *brl_options = *str + 4;
17 *str = strchr(*brl_options, ','); 17 *str = strchr(*brl_options, ',');
18 if (!*str) 18 if (!*str)
diff --git a/kernel/printk/nmi.c b/kernel/printk/nmi.c
index b69eb8a2876f..16bab471c7e2 100644
--- a/kernel/printk/nmi.c
+++ b/kernel/printk/nmi.c
@@ -99,27 +99,33 @@ again:
99 return add; 99 return add;
100} 100}
101 101
102/* 102static void printk_nmi_flush_line(const char *text, int len)
103 * printk one line from the temporary buffer from @start index until
104 * and including the @end index.
105 */
106static void print_nmi_seq_line(struct nmi_seq_buf *s, int start, int end)
107{ 103{
108 const char *buf = s->buffer + start;
109
110 /* 104 /*
111 * The buffers are flushed in NMI only on panic. The messages must 105 * The buffers are flushed in NMI only on panic. The messages must
112 * go only into the ring buffer at this stage. Consoles will get 106 * go only into the ring buffer at this stage. Consoles will get
113 * explicitly called later when a crashdump is not generated. 107 * explicitly called later when a crashdump is not generated.
114 */ 108 */
115 if (in_nmi()) 109 if (in_nmi())
116 printk_deferred("%.*s", (end - start) + 1, buf); 110 printk_deferred("%.*s", len, text);
117 else 111 else
118 printk("%.*s", (end - start) + 1, buf); 112 printk("%.*s", len, text);
119 113
120} 114}
121 115
122/* 116/*
117 * printk one line from the temporary buffer from @start index until
118 * and including the @end index.
119 */
120static void printk_nmi_flush_seq_line(struct nmi_seq_buf *s,
121 int start, int end)
122{
123 const char *buf = s->buffer + start;
124
125 printk_nmi_flush_line(buf, (end - start) + 1);
126}
127
128/*
123 * Flush data from the associated per_CPU buffer. The function 129 * Flush data from the associated per_CPU buffer. The function
124 * can be called either via IRQ work or independently. 130 * can be called either via IRQ work or independently.
125 */ 131 */
@@ -150,9 +156,11 @@ more:
150 * the buffer an unexpected way. If we printed something then 156 * the buffer an unexpected way. If we printed something then
151 * @len must only increase. 157 * @len must only increase.
152 */ 158 */
153 if (i && i >= len) 159 if (i && i >= len) {
154 pr_err("printk_nmi_flush: internal error: i=%d >= len=%zu\n", 160 const char *msg = "printk_nmi_flush: internal error\n";
155 i, len); 161
162 printk_nmi_flush_line(msg, strlen(msg));
163 }
156 164
157 if (!len) 165 if (!len)
158 goto out; /* Someone else has already flushed the buffer. */ 166 goto out; /* Someone else has already flushed the buffer. */
@@ -166,14 +174,14 @@ more:
166 /* Print line by line. */ 174 /* Print line by line. */
167 for (; i < size; i++) { 175 for (; i < size; i++) {
168 if (s->buffer[i] == '\n') { 176 if (s->buffer[i] == '\n') {
169 print_nmi_seq_line(s, last_i, i); 177 printk_nmi_flush_seq_line(s, last_i, i);
170 last_i = i + 1; 178 last_i = i + 1;
171 } 179 }
172 } 180 }
173 /* Check if there was a partial line. */ 181 /* Check if there was a partial line. */
174 if (last_i < size) { 182 if (last_i < size) {
175 print_nmi_seq_line(s, last_i, size - 1); 183 printk_nmi_flush_seq_line(s, last_i, size - 1);
176 pr_cont("\n"); 184 printk_nmi_flush_line("\n", strlen("\n"));
177 } 185 }
178 186
179 /* 187 /*
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 9858266fb0b3..a846cf89eb96 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -263,6 +263,11 @@ void account_idle_time(cputime_t cputime)
263 cpustat[CPUTIME_IDLE] += (__force u64) cputime; 263 cpustat[CPUTIME_IDLE] += (__force u64) cputime;
264} 264}
265 265
266/*
267 * When a guest is interrupted for a longer amount of time, missed clock
268 * ticks are not redelivered later. Due to that, this function may on
269 * occasion account more time than the calling functions think elapsed.
270 */
266static __always_inline cputime_t steal_account_process_time(cputime_t maxtime) 271static __always_inline cputime_t steal_account_process_time(cputime_t maxtime)
267{ 272{
268#ifdef CONFIG_PARAVIRT 273#ifdef CONFIG_PARAVIRT
@@ -371,7 +376,7 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
371 * idle, or potentially user or system time. Due to rounding, 376 * idle, or potentially user or system time. Due to rounding,
372 * other time can exceed ticks occasionally. 377 * other time can exceed ticks occasionally.
373 */ 378 */
374 other = account_other_time(cputime); 379 other = account_other_time(ULONG_MAX);
375 if (other >= cputime) 380 if (other >= cputime)
376 return; 381 return;
377 cputime -= other; 382 cputime -= other;
@@ -486,7 +491,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
486 } 491 }
487 492
488 cputime = cputime_one_jiffy; 493 cputime = cputime_one_jiffy;
489 steal = steal_account_process_time(cputime); 494 steal = steal_account_process_time(ULONG_MAX);
490 495
491 if (steal >= cputime) 496 if (steal >= cputime)
492 return; 497 return;
@@ -516,7 +521,7 @@ void account_idle_ticks(unsigned long ticks)
516 } 521 }
517 522
518 cputime = jiffies_to_cputime(ticks); 523 cputime = jiffies_to_cputime(ticks);
519 steal = steal_account_process_time(cputime); 524 steal = steal_account_process_time(ULONG_MAX);
520 525
521 if (steal >= cputime) 526 if (steal >= cputime)
522 return; 527 return;
@@ -614,19 +619,25 @@ static void cputime_adjust(struct task_cputime *curr,
614 stime = curr->stime; 619 stime = curr->stime;
615 utime = curr->utime; 620 utime = curr->utime;
616 621
617 if (utime == 0) { 622 /*
618 stime = rtime; 623 * If either stime or both stime and utime are 0, assume all runtime is
624 * userspace. Once a task gets some ticks, the monotonicy code at
625 * 'update' will ensure things converge to the observed ratio.
626 */
627 if (stime == 0) {
628 utime = rtime;
619 goto update; 629 goto update;
620 } 630 }
621 631
622 if (stime == 0) { 632 if (utime == 0) {
623 utime = rtime; 633 stime = rtime;
624 goto update; 634 goto update;
625 } 635 }
626 636
627 stime = scale_stime((__force u64)stime, (__force u64)rtime, 637 stime = scale_stime((__force u64)stime, (__force u64)rtime,
628 (__force u64)(stime + utime)); 638 (__force u64)(stime + utime));
629 639
640update:
630 /* 641 /*
631 * Make sure stime doesn't go backwards; this preserves monotonicity 642 * Make sure stime doesn't go backwards; this preserves monotonicity
632 * for utime because rtime is monotonic. 643 * for utime because rtime is monotonic.
@@ -649,7 +660,6 @@ static void cputime_adjust(struct task_cputime *curr,
649 stime = rtime - utime; 660 stime = rtime - utime;
650 } 661 }
651 662
652update:
653 prev->stime = stime; 663 prev->stime = stime;
654 prev->utime = utime; 664 prev->utime = utime;
655out: 665out:
@@ -694,6 +704,13 @@ static cputime_t get_vtime_delta(struct task_struct *tsk)
694 unsigned long now = READ_ONCE(jiffies); 704 unsigned long now = READ_ONCE(jiffies);
695 cputime_t delta, other; 705 cputime_t delta, other;
696 706
707 /*
708 * Unlike tick based timing, vtime based timing never has lost
709 * ticks, and no need for steal time accounting to make up for
710 * lost ticks. Vtime accounts a rounded version of actual
711 * elapsed time. Limit account_other_time to prevent rounding
712 * errors from causing elapsed vtime to go negative.
713 */
697 delta = jiffies_to_cputime(now - tsk->vtime_snap); 714 delta = jiffies_to_cputime(now - tsk->vtime_snap);
698 other = account_other_time(delta); 715 other = account_other_time(delta);
699 WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE); 716 WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index ef6c6c3f9d8a..0db7c8a2afe2 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -605,12 +605,16 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
605 ptrace_event(PTRACE_EVENT_SECCOMP, data); 605 ptrace_event(PTRACE_EVENT_SECCOMP, data);
606 /* 606 /*
607 * The delivery of a fatal signal during event 607 * The delivery of a fatal signal during event
608 * notification may silently skip tracer notification. 608 * notification may silently skip tracer notification,
609 * Terminating the task now avoids executing a system 609 * which could leave us with a potentially unmodified
610 * call that may not be intended. 610 * syscall that the tracer would have liked to have
611 * changed. Since the process is about to die, we just
612 * force the syscall to be skipped and let the signal
613 * kill the process and correctly handle any tracer exit
614 * notifications.
611 */ 615 */
612 if (fatal_signal_pending(current)) 616 if (fatal_signal_pending(current))
613 do_exit(SIGSYS); 617 goto skip;
614 /* Check if the tracer forced the syscall to be skipped. */ 618 /* Check if the tracer forced the syscall to be skipped. */
615 this_syscall = syscall_get_nr(current, task_pt_regs(current)); 619 this_syscall = syscall_get_nr(current, task_pt_regs(current));
616 if (this_syscall < 0) 620 if (this_syscall < 0)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b43d0b27c1fe..a13bbdaab47d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2140,6 +2140,21 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
2140 return 0; 2140 return 0;
2141} 2141}
2142 2142
2143static int do_proc_douintvec_conv(bool *negp, unsigned long *lvalp,
2144 int *valp,
2145 int write, void *data)
2146{
2147 if (write) {
2148 if (*negp)
2149 return -EINVAL;
2150 *valp = *lvalp;
2151 } else {
2152 unsigned int val = *valp;
2153 *lvalp = (unsigned long)val;
2154 }
2155 return 0;
2156}
2157
2143static const char proc_wspace_sep[] = { ' ', '\t', '\n' }; 2158static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
2144 2159
2145static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, 2160static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
@@ -2259,8 +2274,27 @@ static int do_proc_dointvec(struct ctl_table *table, int write,
2259int proc_dointvec(struct ctl_table *table, int write, 2274int proc_dointvec(struct ctl_table *table, int write,
2260 void __user *buffer, size_t *lenp, loff_t *ppos) 2275 void __user *buffer, size_t *lenp, loff_t *ppos)
2261{ 2276{
2262 return do_proc_dointvec(table,write,buffer,lenp,ppos, 2277 return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
2263 NULL,NULL); 2278}
2279
2280/**
2281 * proc_douintvec - read a vector of unsigned integers
2282 * @table: the sysctl table
2283 * @write: %TRUE if this is a write to the sysctl file
2284 * @buffer: the user buffer
2285 * @lenp: the size of the user buffer
2286 * @ppos: file position
2287 *
2288 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2289 * values from/to the user buffer, treated as an ASCII string.
2290 *
2291 * Returns 0 on success.
2292 */
2293int proc_douintvec(struct ctl_table *table, int write,
2294 void __user *buffer, size_t *lenp, loff_t *ppos)
2295{
2296 return do_proc_dointvec(table, write, buffer, lenp, ppos,
2297 do_proc_douintvec_conv, NULL);
2264} 2298}
2265 2299
2266/* 2300/*
@@ -2858,6 +2892,12 @@ int proc_dointvec(struct ctl_table *table, int write,
2858 return -ENOSYS; 2892 return -ENOSYS;
2859} 2893}
2860 2894
2895int proc_douintvec(struct ctl_table *table, int write,
2896 void __user *buffer, size_t *lenp, loff_t *ppos)
2897{
2898 return -ENOSYS;
2899}
2900
2861int proc_dointvec_minmax(struct ctl_table *table, int write, 2901int proc_dointvec_minmax(struct ctl_table *table, int write,
2862 void __user *buffer, size_t *lenp, loff_t *ppos) 2902 void __user *buffer, size_t *lenp, loff_t *ppos)
2863{ 2903{
@@ -2903,6 +2943,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2903 * exception granted :-) 2943 * exception granted :-)
2904 */ 2944 */
2905EXPORT_SYMBOL(proc_dointvec); 2945EXPORT_SYMBOL(proc_dointvec);
2946EXPORT_SYMBOL(proc_douintvec);
2906EXPORT_SYMBOL(proc_dointvec_jiffies); 2947EXPORT_SYMBOL(proc_dointvec_jiffies);
2907EXPORT_SYMBOL(proc_dointvec_minmax); 2948EXPORT_SYMBOL(proc_dointvec_minmax);
2908EXPORT_SYMBOL(proc_dointvec_userhz_jiffies); 2949EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 204fdc86863d..2ec7c00228f3 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -908,10 +908,11 @@ static void __tick_nohz_idle_enter(struct tick_sched *ts)
908 ktime_t now, expires; 908 ktime_t now, expires;
909 int cpu = smp_processor_id(); 909 int cpu = smp_processor_id();
910 910
911 now = tick_nohz_start_idle(ts);
912
911 if (can_stop_idle_tick(cpu, ts)) { 913 if (can_stop_idle_tick(cpu, ts)) {
912 int was_stopped = ts->tick_stopped; 914 int was_stopped = ts->tick_stopped;
913 915
914 now = tick_nohz_start_idle(ts);
915 ts->idle_calls++; 916 ts->idle_calls++;
916 917
917 expires = tick_nohz_stop_sched_tick(ts, now, cpu); 918 expires = tick_nohz_stop_sched_tick(ts, now, cpu);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3b65746c7f15..e07fb093f819 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -401,7 +401,10 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
401 do { 401 do {
402 seq = raw_read_seqcount_latch(&tkf->seq); 402 seq = raw_read_seqcount_latch(&tkf->seq);
403 tkr = tkf->base + (seq & 0x01); 403 tkr = tkf->base + (seq & 0x01);
404 now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr); 404 now = ktime_to_ns(tkr->base);
405
406 now += clocksource_delta(tkr->read(tkr->clock),
407 tkr->cycle_last, tkr->mask);
405 } while (read_seqcount_retry(&tkf->seq, seq)); 408 } while (read_seqcount_retry(&tkf->seq, seq));
406 409
407 return now; 410 return now;
diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c
index f6bd65236712..107310a6f36f 100644
--- a/kernel/time/timekeeping_debug.c
+++ b/kernel/time/timekeeping_debug.c
@@ -23,7 +23,9 @@
23 23
24#include "timekeeping_internal.h" 24#include "timekeeping_internal.h"
25 25
26static unsigned int sleep_time_bin[32] = {0}; 26#define NUM_BINS 32
27
28static unsigned int sleep_time_bin[NUM_BINS] = {0};
27 29
28static int tk_debug_show_sleep_time(struct seq_file *s, void *data) 30static int tk_debug_show_sleep_time(struct seq_file *s, void *data)
29{ 31{
@@ -69,6 +71,9 @@ late_initcall(tk_debug_sleep_time_init);
69 71
70void tk_debug_account_sleep_time(struct timespec64 *t) 72void tk_debug_account_sleep_time(struct timespec64 *t)
71{ 73{
72 sleep_time_bin[fls(t->tv_sec)]++; 74 /* Cap bin index so we don't overflow the array */
75 int bin = min(fls(t->tv_sec), NUM_BINS-1);
76
77 sleep_time_bin[bin]++;
73} 78}
74 79
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 7598e6ca817a..dbafc5df03f3 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -223,7 +223,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
223 what |= MASK_TC_BIT(op_flags, META); 223 what |= MASK_TC_BIT(op_flags, META);
224 what |= MASK_TC_BIT(op_flags, PREFLUSH); 224 what |= MASK_TC_BIT(op_flags, PREFLUSH);
225 what |= MASK_TC_BIT(op_flags, FUA); 225 what |= MASK_TC_BIT(op_flags, FUA);
226 if (op == REQ_OP_DISCARD) 226 if (op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE)
227 what |= BLK_TC_ACT(BLK_TC_DISCARD); 227 what |= BLK_TC_ACT(BLK_TC_DISCARD);
228 if (op == REQ_OP_FLUSH) 228 if (op == REQ_OP_FLUSH)
229 what |= BLK_TC_ACT(BLK_TC_FLUSH); 229 what |= BLK_TC_ACT(BLK_TC_FLUSH);