diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/hashtab.c | 84 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 7 | ||||
-rw-r--r-- | kernel/events/core.c | 174 | ||||
-rw-r--r-- | kernel/events/uprobes.c | 5 | ||||
-rw-r--r-- | kernel/futex.c | 23 | ||||
-rw-r--r-- | kernel/irq/affinity.c | 2 | ||||
-rw-r--r-- | kernel/irq/chip.c | 11 | ||||
-rw-r--r-- | kernel/irq/manage.c | 8 | ||||
-rw-r--r-- | kernel/irq/msi.c | 11 | ||||
-rw-r--r-- | kernel/locking/qspinlock_paravirt.h | 2 | ||||
-rw-r--r-- | kernel/locking/qspinlock_stat.h | 1 | ||||
-rw-r--r-- | kernel/power/hibernate.c | 4 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 10 | ||||
-rw-r--r-- | kernel/printk/braille.c | 4 | ||||
-rw-r--r-- | kernel/sched/core.c | 19 | ||||
-rw-r--r-- | kernel/sched/cpudeadline.c | 2 | ||||
-rw-r--r-- | kernel/sched/cputime.c | 41 | ||||
-rw-r--r-- | kernel/sched/deadline.c | 5 | ||||
-rw-r--r-- | kernel/sched/fair.c | 2 | ||||
-rw-r--r-- | kernel/sysctl.c | 45 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 5 | ||||
-rw-r--r-- | kernel/time/timekeeping_debug.c | 9 | ||||
-rw-r--r-- | kernel/time/timer.c | 5 | ||||
-rw-r--r-- | kernel/trace/blktrace.c | 2 |
24 files changed, 382 insertions, 99 deletions
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index fff3650d52fc..570eeca7bdfa 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c | |||
@@ -26,11 +26,18 @@ struct bpf_htab { | |||
26 | struct bucket *buckets; | 26 | struct bucket *buckets; |
27 | void *elems; | 27 | void *elems; |
28 | struct pcpu_freelist freelist; | 28 | struct pcpu_freelist freelist; |
29 | void __percpu *extra_elems; | ||
29 | atomic_t count; /* number of elements in this hashtable */ | 30 | atomic_t count; /* number of elements in this hashtable */ |
30 | u32 n_buckets; /* number of hash buckets */ | 31 | u32 n_buckets; /* number of hash buckets */ |
31 | u32 elem_size; /* size of each element in bytes */ | 32 | u32 elem_size; /* size of each element in bytes */ |
32 | }; | 33 | }; |
33 | 34 | ||
35 | enum extra_elem_state { | ||
36 | HTAB_NOT_AN_EXTRA_ELEM = 0, | ||
37 | HTAB_EXTRA_ELEM_FREE, | ||
38 | HTAB_EXTRA_ELEM_USED | ||
39 | }; | ||
40 | |||
34 | /* each htab element is struct htab_elem + key + value */ | 41 | /* each htab element is struct htab_elem + key + value */ |
35 | struct htab_elem { | 42 | struct htab_elem { |
36 | union { | 43 | union { |
@@ -38,7 +45,10 @@ struct htab_elem { | |||
38 | struct bpf_htab *htab; | 45 | struct bpf_htab *htab; |
39 | struct pcpu_freelist_node fnode; | 46 | struct pcpu_freelist_node fnode; |
40 | }; | 47 | }; |
41 | struct rcu_head rcu; | 48 | union { |
49 | struct rcu_head rcu; | ||
50 | enum extra_elem_state state; | ||
51 | }; | ||
42 | u32 hash; | 52 | u32 hash; |
43 | char key[0] __aligned(8); | 53 | char key[0] __aligned(8); |
44 | }; | 54 | }; |
@@ -113,6 +123,23 @@ free_elems: | |||
113 | return err; | 123 | return err; |
114 | } | 124 | } |
115 | 125 | ||
126 | static int alloc_extra_elems(struct bpf_htab *htab) | ||
127 | { | ||
128 | void __percpu *pptr; | ||
129 | int cpu; | ||
130 | |||
131 | pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN); | ||
132 | if (!pptr) | ||
133 | return -ENOMEM; | ||
134 | |||
135 | for_each_possible_cpu(cpu) { | ||
136 | ((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state = | ||
137 | HTAB_EXTRA_ELEM_FREE; | ||
138 | } | ||
139 | htab->extra_elems = pptr; | ||
140 | return 0; | ||
141 | } | ||
142 | |||
116 | /* Called from syscall */ | 143 | /* Called from syscall */ |
117 | static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | 144 | static struct bpf_map *htab_map_alloc(union bpf_attr *attr) |
118 | { | 145 | { |
@@ -185,6 +212,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | |||
185 | if (percpu) | 212 | if (percpu) |
186 | cost += (u64) round_up(htab->map.value_size, 8) * | 213 | cost += (u64) round_up(htab->map.value_size, 8) * |
187 | num_possible_cpus() * htab->map.max_entries; | 214 | num_possible_cpus() * htab->map.max_entries; |
215 | else | ||
216 | cost += (u64) htab->elem_size * num_possible_cpus(); | ||
188 | 217 | ||
189 | if (cost >= U32_MAX - PAGE_SIZE) | 218 | if (cost >= U32_MAX - PAGE_SIZE) |
190 | /* make sure page count doesn't overflow */ | 219 | /* make sure page count doesn't overflow */ |
@@ -212,14 +241,22 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | |||
212 | raw_spin_lock_init(&htab->buckets[i].lock); | 241 | raw_spin_lock_init(&htab->buckets[i].lock); |
213 | } | 242 | } |
214 | 243 | ||
244 | if (!percpu) { | ||
245 | err = alloc_extra_elems(htab); | ||
246 | if (err) | ||
247 | goto free_buckets; | ||
248 | } | ||
249 | |||
215 | if (!(attr->map_flags & BPF_F_NO_PREALLOC)) { | 250 | if (!(attr->map_flags & BPF_F_NO_PREALLOC)) { |
216 | err = prealloc_elems_and_freelist(htab); | 251 | err = prealloc_elems_and_freelist(htab); |
217 | if (err) | 252 | if (err) |
218 | goto free_buckets; | 253 | goto free_extra_elems; |
219 | } | 254 | } |
220 | 255 | ||
221 | return &htab->map; | 256 | return &htab->map; |
222 | 257 | ||
258 | free_extra_elems: | ||
259 | free_percpu(htab->extra_elems); | ||
223 | free_buckets: | 260 | free_buckets: |
224 | kvfree(htab->buckets); | 261 | kvfree(htab->buckets); |
225 | free_htab: | 262 | free_htab: |
@@ -349,7 +386,6 @@ static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l) | |||
349 | if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) | 386 | if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) |
350 | free_percpu(htab_elem_get_ptr(l, htab->map.key_size)); | 387 | free_percpu(htab_elem_get_ptr(l, htab->map.key_size)); |
351 | kfree(l); | 388 | kfree(l); |
352 | |||
353 | } | 389 | } |
354 | 390 | ||
355 | static void htab_elem_free_rcu(struct rcu_head *head) | 391 | static void htab_elem_free_rcu(struct rcu_head *head) |
@@ -370,6 +406,11 @@ static void htab_elem_free_rcu(struct rcu_head *head) | |||
370 | 406 | ||
371 | static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) | 407 | static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) |
372 | { | 408 | { |
409 | if (l->state == HTAB_EXTRA_ELEM_USED) { | ||
410 | l->state = HTAB_EXTRA_ELEM_FREE; | ||
411 | return; | ||
412 | } | ||
413 | |||
373 | if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) { | 414 | if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) { |
374 | pcpu_freelist_push(&htab->freelist, &l->fnode); | 415 | pcpu_freelist_push(&htab->freelist, &l->fnode); |
375 | } else { | 416 | } else { |
@@ -381,25 +422,44 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) | |||
381 | 422 | ||
382 | static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, | 423 | static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, |
383 | void *value, u32 key_size, u32 hash, | 424 | void *value, u32 key_size, u32 hash, |
384 | bool percpu, bool onallcpus) | 425 | bool percpu, bool onallcpus, |
426 | bool old_elem_exists) | ||
385 | { | 427 | { |
386 | u32 size = htab->map.value_size; | 428 | u32 size = htab->map.value_size; |
387 | bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC); | 429 | bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC); |
388 | struct htab_elem *l_new; | 430 | struct htab_elem *l_new; |
389 | void __percpu *pptr; | 431 | void __percpu *pptr; |
432 | int err = 0; | ||
390 | 433 | ||
391 | if (prealloc) { | 434 | if (prealloc) { |
392 | l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist); | 435 | l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist); |
393 | if (!l_new) | 436 | if (!l_new) |
394 | return ERR_PTR(-E2BIG); | 437 | err = -E2BIG; |
395 | } else { | 438 | } else { |
396 | if (atomic_inc_return(&htab->count) > htab->map.max_entries) { | 439 | if (atomic_inc_return(&htab->count) > htab->map.max_entries) { |
397 | atomic_dec(&htab->count); | 440 | atomic_dec(&htab->count); |
398 | return ERR_PTR(-E2BIG); | 441 | err = -E2BIG; |
442 | } else { | ||
443 | l_new = kmalloc(htab->elem_size, | ||
444 | GFP_ATOMIC | __GFP_NOWARN); | ||
445 | if (!l_new) | ||
446 | return ERR_PTR(-ENOMEM); | ||
399 | } | 447 | } |
400 | l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN); | 448 | } |
401 | if (!l_new) | 449 | |
402 | return ERR_PTR(-ENOMEM); | 450 | if (err) { |
451 | if (!old_elem_exists) | ||
452 | return ERR_PTR(err); | ||
453 | |||
454 | /* if we're updating the existing element and the hash table | ||
455 | * is full, use per-cpu extra elems | ||
456 | */ | ||
457 | l_new = this_cpu_ptr(htab->extra_elems); | ||
458 | if (l_new->state != HTAB_EXTRA_ELEM_FREE) | ||
459 | return ERR_PTR(-E2BIG); | ||
460 | l_new->state = HTAB_EXTRA_ELEM_USED; | ||
461 | } else { | ||
462 | l_new->state = HTAB_NOT_AN_EXTRA_ELEM; | ||
403 | } | 463 | } |
404 | 464 | ||
405 | memcpy(l_new->key, key, key_size); | 465 | memcpy(l_new->key, key, key_size); |
@@ -489,7 +549,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, | |||
489 | if (ret) | 549 | if (ret) |
490 | goto err; | 550 | goto err; |
491 | 551 | ||
492 | l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false); | 552 | l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false, |
553 | !!l_old); | ||
493 | if (IS_ERR(l_new)) { | 554 | if (IS_ERR(l_new)) { |
494 | /* all pre-allocated elements are in use or memory exhausted */ | 555 | /* all pre-allocated elements are in use or memory exhausted */ |
495 | ret = PTR_ERR(l_new); | 556 | ret = PTR_ERR(l_new); |
@@ -563,7 +624,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key, | |||
563 | } | 624 | } |
564 | } else { | 625 | } else { |
565 | l_new = alloc_htab_elem(htab, key, value, key_size, | 626 | l_new = alloc_htab_elem(htab, key, value, key_size, |
566 | hash, true, onallcpus); | 627 | hash, true, onallcpus, false); |
567 | if (IS_ERR(l_new)) { | 628 | if (IS_ERR(l_new)) { |
568 | ret = PTR_ERR(l_new); | 629 | ret = PTR_ERR(l_new); |
569 | goto err; | 630 | goto err; |
@@ -652,6 +713,7 @@ static void htab_map_free(struct bpf_map *map) | |||
652 | htab_free_elems(htab); | 713 | htab_free_elems(htab); |
653 | pcpu_freelist_destroy(&htab->freelist); | 714 | pcpu_freelist_destroy(&htab->freelist); |
654 | } | 715 | } |
716 | free_percpu(htab->extra_elems); | ||
655 | kvfree(htab->buckets); | 717 | kvfree(htab->buckets); |
656 | kfree(htab); | 718 | kfree(htab); |
657 | } | 719 | } |
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f72f23b8fdab..daea765d72e6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c | |||
@@ -194,6 +194,7 @@ struct verifier_env { | |||
194 | struct verifier_state_list **explored_states; /* search pruning optimization */ | 194 | struct verifier_state_list **explored_states; /* search pruning optimization */ |
195 | struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ | 195 | struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ |
196 | u32 used_map_cnt; /* number of used maps */ | 196 | u32 used_map_cnt; /* number of used maps */ |
197 | u32 id_gen; /* used to generate unique reg IDs */ | ||
197 | bool allow_ptr_leaks; | 198 | bool allow_ptr_leaks; |
198 | }; | 199 | }; |
199 | 200 | ||
@@ -1052,7 +1053,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) | |||
1052 | goto error; | 1053 | goto error; |
1053 | break; | 1054 | break; |
1054 | case BPF_MAP_TYPE_CGROUP_ARRAY: | 1055 | case BPF_MAP_TYPE_CGROUP_ARRAY: |
1055 | if (func_id != BPF_FUNC_skb_in_cgroup) | 1056 | if (func_id != BPF_FUNC_skb_under_cgroup) |
1056 | goto error; | 1057 | goto error; |
1057 | break; | 1058 | break; |
1058 | default: | 1059 | default: |
@@ -1074,7 +1075,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) | |||
1074 | if (map->map_type != BPF_MAP_TYPE_STACK_TRACE) | 1075 | if (map->map_type != BPF_MAP_TYPE_STACK_TRACE) |
1075 | goto error; | 1076 | goto error; |
1076 | break; | 1077 | break; |
1077 | case BPF_FUNC_skb_in_cgroup: | 1078 | case BPF_FUNC_skb_under_cgroup: |
1078 | if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY) | 1079 | if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY) |
1079 | goto error; | 1080 | goto error; |
1080 | break; | 1081 | break; |
@@ -1301,7 +1302,7 @@ add_imm: | |||
1301 | /* dst_reg stays as pkt_ptr type and since some positive | 1302 | /* dst_reg stays as pkt_ptr type and since some positive |
1302 | * integer value was added to the pointer, increment its 'id' | 1303 | * integer value was added to the pointer, increment its 'id' |
1303 | */ | 1304 | */ |
1304 | dst_reg->id++; | 1305 | dst_reg->id = ++env->id_gen; |
1305 | 1306 | ||
1306 | /* something was added to pkt_ptr, set range and off to zero */ | 1307 | /* something was added to pkt_ptr, set range and off to zero */ |
1307 | dst_reg->off = 0; | 1308 | dst_reg->off = 0; |
diff --git a/kernel/events/core.c b/kernel/events/core.c index a19550d80ab1..3cfabdf7b942 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -242,18 +242,6 @@ unlock: | |||
242 | return ret; | 242 | return ret; |
243 | } | 243 | } |
244 | 244 | ||
245 | static void event_function_local(struct perf_event *event, event_f func, void *data) | ||
246 | { | ||
247 | struct event_function_struct efs = { | ||
248 | .event = event, | ||
249 | .func = func, | ||
250 | .data = data, | ||
251 | }; | ||
252 | |||
253 | int ret = event_function(&efs); | ||
254 | WARN_ON_ONCE(ret); | ||
255 | } | ||
256 | |||
257 | static void event_function_call(struct perf_event *event, event_f func, void *data) | 245 | static void event_function_call(struct perf_event *event, event_f func, void *data) |
258 | { | 246 | { |
259 | struct perf_event_context *ctx = event->ctx; | 247 | struct perf_event_context *ctx = event->ctx; |
@@ -303,6 +291,54 @@ again: | |||
303 | raw_spin_unlock_irq(&ctx->lock); | 291 | raw_spin_unlock_irq(&ctx->lock); |
304 | } | 292 | } |
305 | 293 | ||
294 | /* | ||
295 | * Similar to event_function_call() + event_function(), but hard assumes IRQs | ||
296 | * are already disabled and we're on the right CPU. | ||
297 | */ | ||
298 | static void event_function_local(struct perf_event *event, event_f func, void *data) | ||
299 | { | ||
300 | struct perf_event_context *ctx = event->ctx; | ||
301 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | ||
302 | struct task_struct *task = READ_ONCE(ctx->task); | ||
303 | struct perf_event_context *task_ctx = NULL; | ||
304 | |||
305 | WARN_ON_ONCE(!irqs_disabled()); | ||
306 | |||
307 | if (task) { | ||
308 | if (task == TASK_TOMBSTONE) | ||
309 | return; | ||
310 | |||
311 | task_ctx = ctx; | ||
312 | } | ||
313 | |||
314 | perf_ctx_lock(cpuctx, task_ctx); | ||
315 | |||
316 | task = ctx->task; | ||
317 | if (task == TASK_TOMBSTONE) | ||
318 | goto unlock; | ||
319 | |||
320 | if (task) { | ||
321 | /* | ||
322 | * We must be either inactive or active and the right task, | ||
323 | * otherwise we're screwed, since we cannot IPI to somewhere | ||
324 | * else. | ||
325 | */ | ||
326 | if (ctx->is_active) { | ||
327 | if (WARN_ON_ONCE(task != current)) | ||
328 | goto unlock; | ||
329 | |||
330 | if (WARN_ON_ONCE(cpuctx->task_ctx != ctx)) | ||
331 | goto unlock; | ||
332 | } | ||
333 | } else { | ||
334 | WARN_ON_ONCE(&cpuctx->ctx != ctx); | ||
335 | } | ||
336 | |||
337 | func(event, cpuctx, ctx, data); | ||
338 | unlock: | ||
339 | perf_ctx_unlock(cpuctx, task_ctx); | ||
340 | } | ||
341 | |||
306 | #define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\ | 342 | #define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\ |
307 | PERF_FLAG_FD_OUTPUT |\ | 343 | PERF_FLAG_FD_OUTPUT |\ |
308 | PERF_FLAG_PID_CGROUP |\ | 344 | PERF_FLAG_PID_CGROUP |\ |
@@ -843,6 +879,32 @@ perf_cgroup_mark_enabled(struct perf_event *event, | |||
843 | } | 879 | } |
844 | } | 880 | } |
845 | } | 881 | } |
882 | |||
883 | /* | ||
884 | * Update cpuctx->cgrp so that it is set when first cgroup event is added and | ||
885 | * cleared when last cgroup event is removed. | ||
886 | */ | ||
887 | static inline void | ||
888 | list_update_cgroup_event(struct perf_event *event, | ||
889 | struct perf_event_context *ctx, bool add) | ||
890 | { | ||
891 | struct perf_cpu_context *cpuctx; | ||
892 | |||
893 | if (!is_cgroup_event(event)) | ||
894 | return; | ||
895 | |||
896 | if (add && ctx->nr_cgroups++) | ||
897 | return; | ||
898 | else if (!add && --ctx->nr_cgroups) | ||
899 | return; | ||
900 | /* | ||
901 | * Because cgroup events are always per-cpu events, | ||
902 | * this will always be called from the right CPU. | ||
903 | */ | ||
904 | cpuctx = __get_cpu_context(ctx); | ||
905 | cpuctx->cgrp = add ? event->cgrp : NULL; | ||
906 | } | ||
907 | |||
846 | #else /* !CONFIG_CGROUP_PERF */ | 908 | #else /* !CONFIG_CGROUP_PERF */ |
847 | 909 | ||
848 | static inline bool | 910 | static inline bool |
@@ -920,6 +982,13 @@ perf_cgroup_mark_enabled(struct perf_event *event, | |||
920 | struct perf_event_context *ctx) | 982 | struct perf_event_context *ctx) |
921 | { | 983 | { |
922 | } | 984 | } |
985 | |||
986 | static inline void | ||
987 | list_update_cgroup_event(struct perf_event *event, | ||
988 | struct perf_event_context *ctx, bool add) | ||
989 | { | ||
990 | } | ||
991 | |||
923 | #endif | 992 | #endif |
924 | 993 | ||
925 | /* | 994 | /* |
@@ -1392,6 +1461,7 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx) | |||
1392 | static void | 1461 | static void |
1393 | list_add_event(struct perf_event *event, struct perf_event_context *ctx) | 1462 | list_add_event(struct perf_event *event, struct perf_event_context *ctx) |
1394 | { | 1463 | { |
1464 | |||
1395 | lockdep_assert_held(&ctx->lock); | 1465 | lockdep_assert_held(&ctx->lock); |
1396 | 1466 | ||
1397 | WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT); | 1467 | WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT); |
@@ -1412,8 +1482,7 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
1412 | list_add_tail(&event->group_entry, list); | 1482 | list_add_tail(&event->group_entry, list); |
1413 | } | 1483 | } |
1414 | 1484 | ||
1415 | if (is_cgroup_event(event)) | 1485 | list_update_cgroup_event(event, ctx, true); |
1416 | ctx->nr_cgroups++; | ||
1417 | 1486 | ||
1418 | list_add_rcu(&event->event_entry, &ctx->event_list); | 1487 | list_add_rcu(&event->event_entry, &ctx->event_list); |
1419 | ctx->nr_events++; | 1488 | ctx->nr_events++; |
@@ -1581,8 +1650,6 @@ static void perf_group_attach(struct perf_event *event) | |||
1581 | static void | 1650 | static void |
1582 | list_del_event(struct perf_event *event, struct perf_event_context *ctx) | 1651 | list_del_event(struct perf_event *event, struct perf_event_context *ctx) |
1583 | { | 1652 | { |
1584 | struct perf_cpu_context *cpuctx; | ||
1585 | |||
1586 | WARN_ON_ONCE(event->ctx != ctx); | 1653 | WARN_ON_ONCE(event->ctx != ctx); |
1587 | lockdep_assert_held(&ctx->lock); | 1654 | lockdep_assert_held(&ctx->lock); |
1588 | 1655 | ||
@@ -1594,20 +1661,7 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
1594 | 1661 | ||
1595 | event->attach_state &= ~PERF_ATTACH_CONTEXT; | 1662 | event->attach_state &= ~PERF_ATTACH_CONTEXT; |
1596 | 1663 | ||
1597 | if (is_cgroup_event(event)) { | 1664 | list_update_cgroup_event(event, ctx, false); |
1598 | ctx->nr_cgroups--; | ||
1599 | /* | ||
1600 | * Because cgroup events are always per-cpu events, this will | ||
1601 | * always be called from the right CPU. | ||
1602 | */ | ||
1603 | cpuctx = __get_cpu_context(ctx); | ||
1604 | /* | ||
1605 | * If there are no more cgroup events then clear cgrp to avoid | ||
1606 | * stale pointer in update_cgrp_time_from_cpuctx(). | ||
1607 | */ | ||
1608 | if (!ctx->nr_cgroups) | ||
1609 | cpuctx->cgrp = NULL; | ||
1610 | } | ||
1611 | 1665 | ||
1612 | ctx->nr_events--; | 1666 | ctx->nr_events--; |
1613 | if (event->attr.inherit_stat) | 1667 | if (event->attr.inherit_stat) |
@@ -1716,8 +1770,8 @@ static inline int pmu_filter_match(struct perf_event *event) | |||
1716 | static inline int | 1770 | static inline int |
1717 | event_filter_match(struct perf_event *event) | 1771 | event_filter_match(struct perf_event *event) |
1718 | { | 1772 | { |
1719 | return (event->cpu == -1 || event->cpu == smp_processor_id()) | 1773 | return (event->cpu == -1 || event->cpu == smp_processor_id()) && |
1720 | && perf_cgroup_match(event) && pmu_filter_match(event); | 1774 | perf_cgroup_match(event) && pmu_filter_match(event); |
1721 | } | 1775 | } |
1722 | 1776 | ||
1723 | static void | 1777 | static void |
@@ -1737,8 +1791,8 @@ event_sched_out(struct perf_event *event, | |||
1737 | * maintained, otherwise bogus information is return | 1791 | * maintained, otherwise bogus information is return |
1738 | * via read() for time_enabled, time_running: | 1792 | * via read() for time_enabled, time_running: |
1739 | */ | 1793 | */ |
1740 | if (event->state == PERF_EVENT_STATE_INACTIVE | 1794 | if (event->state == PERF_EVENT_STATE_INACTIVE && |
1741 | && !event_filter_match(event)) { | 1795 | !event_filter_match(event)) { |
1742 | delta = tstamp - event->tstamp_stopped; | 1796 | delta = tstamp - event->tstamp_stopped; |
1743 | event->tstamp_running += delta; | 1797 | event->tstamp_running += delta; |
1744 | event->tstamp_stopped = tstamp; | 1798 | event->tstamp_stopped = tstamp; |
@@ -2236,10 +2290,15 @@ perf_install_in_context(struct perf_event_context *ctx, | |||
2236 | 2290 | ||
2237 | lockdep_assert_held(&ctx->mutex); | 2291 | lockdep_assert_held(&ctx->mutex); |
2238 | 2292 | ||
2239 | event->ctx = ctx; | ||
2240 | if (event->cpu != -1) | 2293 | if (event->cpu != -1) |
2241 | event->cpu = cpu; | 2294 | event->cpu = cpu; |
2242 | 2295 | ||
2296 | /* | ||
2297 | * Ensures that if we can observe event->ctx, both the event and ctx | ||
2298 | * will be 'complete'. See perf_iterate_sb_cpu(). | ||
2299 | */ | ||
2300 | smp_store_release(&event->ctx, ctx); | ||
2301 | |||
2243 | if (!task) { | 2302 | if (!task) { |
2244 | cpu_function_call(cpu, __perf_install_in_context, event); | 2303 | cpu_function_call(cpu, __perf_install_in_context, event); |
2245 | return; | 2304 | return; |
@@ -3490,9 +3549,10 @@ static int perf_event_read(struct perf_event *event, bool group) | |||
3490 | .group = group, | 3549 | .group = group, |
3491 | .ret = 0, | 3550 | .ret = 0, |
3492 | }; | 3551 | }; |
3493 | smp_call_function_single(event->oncpu, | 3552 | ret = smp_call_function_single(event->oncpu, __perf_event_read, &data, 1); |
3494 | __perf_event_read, &data, 1); | 3553 | /* The event must have been read from an online CPU: */ |
3495 | ret = data.ret; | 3554 | WARN_ON_ONCE(ret); |
3555 | ret = ret ? : data.ret; | ||
3496 | } else if (event->state == PERF_EVENT_STATE_INACTIVE) { | 3556 | } else if (event->state == PERF_EVENT_STATE_INACTIVE) { |
3497 | struct perf_event_context *ctx = event->ctx; | 3557 | struct perf_event_context *ctx = event->ctx; |
3498 | unsigned long flags; | 3558 | unsigned long flags; |
@@ -5969,6 +6029,14 @@ static void perf_iterate_sb_cpu(perf_iterate_f output, void *data) | |||
5969 | struct perf_event *event; | 6029 | struct perf_event *event; |
5970 | 6030 | ||
5971 | list_for_each_entry_rcu(event, &pel->list, sb_list) { | 6031 | list_for_each_entry_rcu(event, &pel->list, sb_list) { |
6032 | /* | ||
6033 | * Skip events that are not fully formed yet; ensure that | ||
6034 | * if we observe event->ctx, both event and ctx will be | ||
6035 | * complete enough. See perf_install_in_context(). | ||
6036 | */ | ||
6037 | if (!smp_load_acquire(&event->ctx)) | ||
6038 | continue; | ||
6039 | |||
5972 | if (event->state < PERF_EVENT_STATE_INACTIVE) | 6040 | if (event->state < PERF_EVENT_STATE_INACTIVE) |
5973 | continue; | 6041 | continue; |
5974 | if (!event_filter_match(event)) | 6042 | if (!event_filter_match(event)) |
@@ -6098,7 +6166,7 @@ static int __perf_pmu_output_stop(void *info) | |||
6098 | { | 6166 | { |
6099 | struct perf_event *event = info; | 6167 | struct perf_event *event = info; |
6100 | struct pmu *pmu = event->pmu; | 6168 | struct pmu *pmu = event->pmu; |
6101 | struct perf_cpu_context *cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 6169 | struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); |
6102 | struct remote_output ro = { | 6170 | struct remote_output ro = { |
6103 | .rb = event->rb, | 6171 | .rb = event->rb, |
6104 | }; | 6172 | }; |
@@ -6553,15 +6621,6 @@ got_name: | |||
6553 | } | 6621 | } |
6554 | 6622 | ||
6555 | /* | 6623 | /* |
6556 | * Whether this @filter depends on a dynamic object which is not loaded | ||
6557 | * yet or its load addresses are not known. | ||
6558 | */ | ||
6559 | static bool perf_addr_filter_needs_mmap(struct perf_addr_filter *filter) | ||
6560 | { | ||
6561 | return filter->filter && filter->inode; | ||
6562 | } | ||
6563 | |||
6564 | /* | ||
6565 | * Check whether inode and address range match filter criteria. | 6624 | * Check whether inode and address range match filter criteria. |
6566 | */ | 6625 | */ |
6567 | static bool perf_addr_filter_match(struct perf_addr_filter *filter, | 6626 | static bool perf_addr_filter_match(struct perf_addr_filter *filter, |
@@ -6622,6 +6681,13 @@ static void perf_addr_filters_adjust(struct vm_area_struct *vma) | |||
6622 | struct perf_event_context *ctx; | 6681 | struct perf_event_context *ctx; |
6623 | int ctxn; | 6682 | int ctxn; |
6624 | 6683 | ||
6684 | /* | ||
6685 | * Data tracing isn't supported yet and as such there is no need | ||
6686 | * to keep track of anything that isn't related to executable code: | ||
6687 | */ | ||
6688 | if (!(vma->vm_flags & VM_EXEC)) | ||
6689 | return; | ||
6690 | |||
6625 | rcu_read_lock(); | 6691 | rcu_read_lock(); |
6626 | for_each_task_context_nr(ctxn) { | 6692 | for_each_task_context_nr(ctxn) { |
6627 | ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); | 6693 | ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); |
@@ -7774,7 +7840,11 @@ static void perf_event_addr_filters_apply(struct perf_event *event) | |||
7774 | list_for_each_entry(filter, &ifh->list, entry) { | 7840 | list_for_each_entry(filter, &ifh->list, entry) { |
7775 | event->addr_filters_offs[count] = 0; | 7841 | event->addr_filters_offs[count] = 0; |
7776 | 7842 | ||
7777 | if (perf_addr_filter_needs_mmap(filter)) | 7843 | /* |
7844 | * Adjust base offset if the filter is associated to a binary | ||
7845 | * that needs to be mapped: | ||
7846 | */ | ||
7847 | if (filter->inode) | ||
7778 | event->addr_filters_offs[count] = | 7848 | event->addr_filters_offs[count] = |
7779 | perf_addr_filter_apply(filter, mm); | 7849 | perf_addr_filter_apply(filter, mm); |
7780 | 7850 | ||
@@ -7905,8 +7975,10 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr, | |||
7905 | goto fail; | 7975 | goto fail; |
7906 | } | 7976 | } |
7907 | 7977 | ||
7908 | if (token == IF_SRC_FILE) { | 7978 | if (token == IF_SRC_FILE || token == IF_SRC_FILEADDR) { |
7909 | filename = match_strdup(&args[2]); | 7979 | int fpos = filter->range ? 2 : 1; |
7980 | |||
7981 | filename = match_strdup(&args[fpos]); | ||
7910 | if (!filename) { | 7982 | if (!filename) { |
7911 | ret = -ENOMEM; | 7983 | ret = -ENOMEM; |
7912 | goto fail; | 7984 | goto fail; |
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index b7a525ab2083..8c50276b60d1 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c | |||
@@ -172,8 +172,10 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, | |||
172 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | 172 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); |
173 | err = -EAGAIN; | 173 | err = -EAGAIN; |
174 | ptep = page_check_address(page, mm, addr, &ptl, 0); | 174 | ptep = page_check_address(page, mm, addr, &ptl, 0); |
175 | if (!ptep) | 175 | if (!ptep) { |
176 | mem_cgroup_cancel_charge(kpage, memcg, false); | ||
176 | goto unlock; | 177 | goto unlock; |
178 | } | ||
177 | 179 | ||
178 | get_page(kpage); | 180 | get_page(kpage); |
179 | page_add_new_anon_rmap(kpage, vma, addr, false); | 181 | page_add_new_anon_rmap(kpage, vma, addr, false); |
@@ -200,7 +202,6 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, | |||
200 | 202 | ||
201 | err = 0; | 203 | err = 0; |
202 | unlock: | 204 | unlock: |
203 | mem_cgroup_cancel_charge(kpage, memcg, false); | ||
204 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | 205 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); |
205 | unlock_page(page); | 206 | unlock_page(page); |
206 | return err; | 207 | return err; |
diff --git a/kernel/futex.c b/kernel/futex.c index 33664f70e2d2..46cb3a301bc1 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -179,7 +179,15 @@ int __read_mostly futex_cmpxchg_enabled; | |||
179 | * Futex flags used to encode options to functions and preserve them across | 179 | * Futex flags used to encode options to functions and preserve them across |
180 | * restarts. | 180 | * restarts. |
181 | */ | 181 | */ |
182 | #define FLAGS_SHARED 0x01 | 182 | #ifdef CONFIG_MMU |
183 | # define FLAGS_SHARED 0x01 | ||
184 | #else | ||
185 | /* | ||
186 | * NOMMU does not have per process address space. Let the compiler optimize | ||
187 | * code away. | ||
188 | */ | ||
189 | # define FLAGS_SHARED 0x00 | ||
190 | #endif | ||
183 | #define FLAGS_CLOCKRT 0x02 | 191 | #define FLAGS_CLOCKRT 0x02 |
184 | #define FLAGS_HAS_TIMEOUT 0x04 | 192 | #define FLAGS_HAS_TIMEOUT 0x04 |
185 | 193 | ||
@@ -405,6 +413,16 @@ static void get_futex_key_refs(union futex_key *key) | |||
405 | if (!key->both.ptr) | 413 | if (!key->both.ptr) |
406 | return; | 414 | return; |
407 | 415 | ||
416 | /* | ||
417 | * On MMU less systems futexes are always "private" as there is no per | ||
418 | * process address space. We need the smp wmb nevertheless - yes, | ||
419 | * arch/blackfin has MMU less SMP ... | ||
420 | */ | ||
421 | if (!IS_ENABLED(CONFIG_MMU)) { | ||
422 | smp_mb(); /* explicit smp_mb(); (B) */ | ||
423 | return; | ||
424 | } | ||
425 | |||
408 | switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { | 426 | switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { |
409 | case FUT_OFF_INODE: | 427 | case FUT_OFF_INODE: |
410 | ihold(key->shared.inode); /* implies smp_mb(); (B) */ | 428 | ihold(key->shared.inode); /* implies smp_mb(); (B) */ |
@@ -436,6 +454,9 @@ static void drop_futex_key_refs(union futex_key *key) | |||
436 | return; | 454 | return; |
437 | } | 455 | } |
438 | 456 | ||
457 | if (!IS_ENABLED(CONFIG_MMU)) | ||
458 | return; | ||
459 | |||
439 | switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { | 460 | switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { |
440 | case FUT_OFF_INODE: | 461 | case FUT_OFF_INODE: |
441 | iput(key->shared.inode); | 462 | iput(key->shared.inode); |
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index f68959341c0f..32f6cfcff212 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c | |||
@@ -39,6 +39,7 @@ struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs) | |||
39 | return NULL; | 39 | return NULL; |
40 | } | 40 | } |
41 | 41 | ||
42 | get_online_cpus(); | ||
42 | if (max_vecs >= num_online_cpus()) { | 43 | if (max_vecs >= num_online_cpus()) { |
43 | cpumask_copy(affinity_mask, cpu_online_mask); | 44 | cpumask_copy(affinity_mask, cpu_online_mask); |
44 | *nr_vecs = num_online_cpus(); | 45 | *nr_vecs = num_online_cpus(); |
@@ -56,6 +57,7 @@ struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs) | |||
56 | } | 57 | } |
57 | *nr_vecs = vecs; | 58 | *nr_vecs = vecs; |
58 | } | 59 | } |
60 | put_online_cpus(); | ||
59 | 61 | ||
60 | return affinity_mask; | 62 | return affinity_mask; |
61 | } | 63 | } |
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index b4c1bc7c9ca2..637389088b3f 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c | |||
@@ -820,6 +820,17 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, | |||
820 | desc->name = name; | 820 | desc->name = name; |
821 | 821 | ||
822 | if (handle != handle_bad_irq && is_chained) { | 822 | if (handle != handle_bad_irq && is_chained) { |
823 | /* | ||
824 | * We're about to start this interrupt immediately, | ||
825 | * hence the need to set the trigger configuration. | ||
826 | * But the .set_type callback may have overridden the | ||
827 | * flow handler, ignoring that we're dealing with a | ||
828 | * chained interrupt. Reset it immediately because we | ||
829 | * do know better. | ||
830 | */ | ||
831 | __irq_set_trigger(desc, irqd_get_trigger_type(&desc->irq_data)); | ||
832 | desc->handle_irq = handle; | ||
833 | |||
823 | irq_settings_set_noprobe(desc); | 834 | irq_settings_set_noprobe(desc); |
824 | irq_settings_set_norequest(desc); | 835 | irq_settings_set_norequest(desc); |
825 | irq_settings_set_nothread(desc); | 836 | irq_settings_set_nothread(desc); |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 73a2b786b5e9..9530fcd27704 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -1681,8 +1681,10 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, | |||
1681 | action->dev_id = dev_id; | 1681 | action->dev_id = dev_id; |
1682 | 1682 | ||
1683 | retval = irq_chip_pm_get(&desc->irq_data); | 1683 | retval = irq_chip_pm_get(&desc->irq_data); |
1684 | if (retval < 0) | 1684 | if (retval < 0) { |
1685 | kfree(action); | ||
1685 | return retval; | 1686 | return retval; |
1687 | } | ||
1686 | 1688 | ||
1687 | chip_bus_lock(desc); | 1689 | chip_bus_lock(desc); |
1688 | retval = __setup_irq(irq, desc, action); | 1690 | retval = __setup_irq(irq, desc, action); |
@@ -1985,8 +1987,10 @@ int request_percpu_irq(unsigned int irq, irq_handler_t handler, | |||
1985 | action->percpu_dev_id = dev_id; | 1987 | action->percpu_dev_id = dev_id; |
1986 | 1988 | ||
1987 | retval = irq_chip_pm_get(&desc->irq_data); | 1989 | retval = irq_chip_pm_get(&desc->irq_data); |
1988 | if (retval < 0) | 1990 | if (retval < 0) { |
1991 | kfree(action); | ||
1989 | return retval; | 1992 | return retval; |
1993 | } | ||
1990 | 1994 | ||
1991 | chip_bus_lock(desc); | 1995 | chip_bus_lock(desc); |
1992 | retval = __setup_irq(irq, desc, action); | 1996 | retval = __setup_irq(irq, desc, action); |
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 54999350162c..19e9dfbe97fa 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c | |||
@@ -359,6 +359,17 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, | |||
359 | else | 359 | else |
360 | dev_dbg(dev, "irq [%d-%d] for MSI\n", | 360 | dev_dbg(dev, "irq [%d-%d] for MSI\n", |
361 | virq, virq + desc->nvec_used - 1); | 361 | virq, virq + desc->nvec_used - 1); |
362 | /* | ||
363 | * This flag is set by the PCI layer as we need to activate | ||
364 | * the MSI entries before the PCI layer enables MSI in the | ||
365 | * card. Otherwise the card latches a random msi message. | ||
366 | */ | ||
367 | if (info->flags & MSI_FLAG_ACTIVATE_EARLY) { | ||
368 | struct irq_data *irq_data; | ||
369 | |||
370 | irq_data = irq_domain_get_irq_data(domain, desc->irq); | ||
371 | irq_domain_activate_irq(irq_data); | ||
372 | } | ||
362 | } | 373 | } |
363 | 374 | ||
364 | return 0; | 375 | return 0; |
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 37649e69056c..8a99abf58080 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h | |||
@@ -450,7 +450,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) | |||
450 | goto gotlock; | 450 | goto gotlock; |
451 | } | 451 | } |
452 | } | 452 | } |
453 | WRITE_ONCE(pn->state, vcpu_halted); | 453 | WRITE_ONCE(pn->state, vcpu_hashed); |
454 | qstat_inc(qstat_pv_wait_head, true); | 454 | qstat_inc(qstat_pv_wait_head, true); |
455 | qstat_inc(qstat_pv_wait_again, waitcnt); | 455 | qstat_inc(qstat_pv_wait_again, waitcnt); |
456 | pv_wait(&l->locked, _Q_SLOW_VAL); | 456 | pv_wait(&l->locked, _Q_SLOW_VAL); |
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h index 22e025309845..b9d031516254 100644 --- a/kernel/locking/qspinlock_stat.h +++ b/kernel/locking/qspinlock_stat.h | |||
@@ -153,7 +153,6 @@ static ssize_t qstat_read(struct file *file, char __user *user_buf, | |||
153 | */ | 153 | */ |
154 | if ((counter == qstat_pv_latency_kick) || | 154 | if ((counter == qstat_pv_latency_kick) || |
155 | (counter == qstat_pv_latency_wake)) { | 155 | (counter == qstat_pv_latency_wake)) { |
156 | stat = 0; | ||
157 | if (kicks) | 156 | if (kicks) |
158 | stat = DIV_ROUND_CLOSEST_ULL(stat, kicks); | 157 | stat = DIV_ROUND_CLOSEST_ULL(stat, kicks); |
159 | } | 158 | } |
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index a881c6a7ba74..33c79b6105c5 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c | |||
@@ -300,12 +300,12 @@ static int create_image(int platform_mode) | |||
300 | save_processor_state(); | 300 | save_processor_state(); |
301 | trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, true); | 301 | trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, true); |
302 | error = swsusp_arch_suspend(); | 302 | error = swsusp_arch_suspend(); |
303 | /* Restore control flow magically appears here */ | ||
304 | restore_processor_state(); | ||
303 | trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, false); | 305 | trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, false); |
304 | if (error) | 306 | if (error) |
305 | printk(KERN_ERR "PM: Error %d creating hibernation image\n", | 307 | printk(KERN_ERR "PM: Error %d creating hibernation image\n", |
306 | error); | 308 | error); |
307 | /* Restore control flow magically appears here */ | ||
308 | restore_processor_state(); | ||
309 | if (!in_suspend) | 309 | if (!in_suspend) |
310 | events_check_enabled = false; | 310 | events_check_enabled = false; |
311 | 311 | ||
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 9a0178c2ac1d..b02228411d57 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -835,9 +835,9 @@ static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) | |||
835 | */ | 835 | */ |
836 | static bool rtree_next_node(struct memory_bitmap *bm) | 836 | static bool rtree_next_node(struct memory_bitmap *bm) |
837 | { | 837 | { |
838 | bm->cur.node = list_entry(bm->cur.node->list.next, | 838 | if (!list_is_last(&bm->cur.node->list, &bm->cur.zone->leaves)) { |
839 | struct rtree_node, list); | 839 | bm->cur.node = list_entry(bm->cur.node->list.next, |
840 | if (&bm->cur.node->list != &bm->cur.zone->leaves) { | 840 | struct rtree_node, list); |
841 | bm->cur.node_pfn += BM_BITS_PER_BLOCK; | 841 | bm->cur.node_pfn += BM_BITS_PER_BLOCK; |
842 | bm->cur.node_bit = 0; | 842 | bm->cur.node_bit = 0; |
843 | touch_softlockup_watchdog(); | 843 | touch_softlockup_watchdog(); |
@@ -845,9 +845,9 @@ static bool rtree_next_node(struct memory_bitmap *bm) | |||
845 | } | 845 | } |
846 | 846 | ||
847 | /* No more nodes, goto next zone */ | 847 | /* No more nodes, goto next zone */ |
848 | bm->cur.zone = list_entry(bm->cur.zone->list.next, | 848 | if (!list_is_last(&bm->cur.zone->list, &bm->zones)) { |
849 | bm->cur.zone = list_entry(bm->cur.zone->list.next, | ||
849 | struct mem_zone_bm_rtree, list); | 850 | struct mem_zone_bm_rtree, list); |
850 | if (&bm->cur.zone->list != &bm->zones) { | ||
851 | bm->cur.node = list_entry(bm->cur.zone->leaves.next, | 851 | bm->cur.node = list_entry(bm->cur.zone->leaves.next, |
852 | struct rtree_node, list); | 852 | struct rtree_node, list); |
853 | bm->cur.node_pfn = 0; | 853 | bm->cur.node_pfn = 0; |
diff --git a/kernel/printk/braille.c b/kernel/printk/braille.c index 276762f3a460..d5760c42f042 100644 --- a/kernel/printk/braille.c +++ b/kernel/printk/braille.c | |||
@@ -9,10 +9,10 @@ | |||
9 | 9 | ||
10 | char *_braille_console_setup(char **str, char **brl_options) | 10 | char *_braille_console_setup(char **str, char **brl_options) |
11 | { | 11 | { |
12 | if (!memcmp(*str, "brl,", 4)) { | 12 | if (!strncmp(*str, "brl,", 4)) { |
13 | *brl_options = ""; | 13 | *brl_options = ""; |
14 | *str += 4; | 14 | *str += 4; |
15 | } else if (!memcmp(str, "brl=", 4)) { | 15 | } else if (!strncmp(*str, "brl=", 4)) { |
16 | *brl_options = *str + 4; | 16 | *brl_options = *str + 4; |
17 | *str = strchr(*brl_options, ','); | 17 | *str = strchr(*brl_options, ','); |
18 | if (!*str) | 18 | if (!*str) |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5c883fe8e440..2a906f20fba7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -74,6 +74,7 @@ | |||
74 | #include <linux/context_tracking.h> | 74 | #include <linux/context_tracking.h> |
75 | #include <linux/compiler.h> | 75 | #include <linux/compiler.h> |
76 | #include <linux/frame.h> | 76 | #include <linux/frame.h> |
77 | #include <linux/prefetch.h> | ||
77 | 78 | ||
78 | #include <asm/switch_to.h> | 79 | #include <asm/switch_to.h> |
79 | #include <asm/tlb.h> | 80 | #include <asm/tlb.h> |
@@ -2972,6 +2973,23 @@ EXPORT_PER_CPU_SYMBOL(kstat); | |||
2972 | EXPORT_PER_CPU_SYMBOL(kernel_cpustat); | 2973 | EXPORT_PER_CPU_SYMBOL(kernel_cpustat); |
2973 | 2974 | ||
2974 | /* | 2975 | /* |
2976 | * The function fair_sched_class.update_curr accesses the struct curr | ||
2977 | * and its field curr->exec_start; when called from task_sched_runtime(), | ||
2978 | * we observe a high rate of cache misses in practice. | ||
2979 | * Prefetching this data results in improved performance. | ||
2980 | */ | ||
2981 | static inline void prefetch_curr_exec_start(struct task_struct *p) | ||
2982 | { | ||
2983 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
2984 | struct sched_entity *curr = (&p->se)->cfs_rq->curr; | ||
2985 | #else | ||
2986 | struct sched_entity *curr = (&task_rq(p)->cfs)->curr; | ||
2987 | #endif | ||
2988 | prefetch(curr); | ||
2989 | prefetch(&curr->exec_start); | ||
2990 | } | ||
2991 | |||
2992 | /* | ||
2975 | * Return accounted runtime for the task. | 2993 | * Return accounted runtime for the task. |
2976 | * In case the task is currently running, return the runtime plus current's | 2994 | * In case the task is currently running, return the runtime plus current's |
2977 | * pending runtime that have not been accounted yet. | 2995 | * pending runtime that have not been accounted yet. |
@@ -3005,6 +3023,7 @@ unsigned long long task_sched_runtime(struct task_struct *p) | |||
3005 | * thread, breaking clock_gettime(). | 3023 | * thread, breaking clock_gettime(). |
3006 | */ | 3024 | */ |
3007 | if (task_current(rq, p) && task_on_rq_queued(p)) { | 3025 | if (task_current(rq, p) && task_on_rq_queued(p)) { |
3026 | prefetch_curr_exec_start(p); | ||
3008 | update_rq_clock(rq); | 3027 | update_rq_clock(rq); |
3009 | p->sched_class->update_curr(rq); | 3028 | p->sched_class->update_curr(rq); |
3010 | } | 3029 | } |
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c index 5be58820465c..d4184498c9f5 100644 --- a/kernel/sched/cpudeadline.c +++ b/kernel/sched/cpudeadline.c | |||
@@ -168,7 +168,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid) | |||
168 | 168 | ||
169 | if (old_idx == IDX_INVALID) { | 169 | if (old_idx == IDX_INVALID) { |
170 | cp->size++; | 170 | cp->size++; |
171 | cp->elements[cp->size - 1].dl = 0; | 171 | cp->elements[cp->size - 1].dl = dl; |
172 | cp->elements[cp->size - 1].cpu = cpu; | 172 | cp->elements[cp->size - 1].cpu = cpu; |
173 | cp->elements[cpu].idx = cp->size - 1; | 173 | cp->elements[cpu].idx = cp->size - 1; |
174 | cpudl_change_key(cp, cp->size - 1, dl); | 174 | cpudl_change_key(cp, cp->size - 1, dl); |
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 1934f658c036..a846cf89eb96 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c | |||
@@ -263,6 +263,11 @@ void account_idle_time(cputime_t cputime) | |||
263 | cpustat[CPUTIME_IDLE] += (__force u64) cputime; | 263 | cpustat[CPUTIME_IDLE] += (__force u64) cputime; |
264 | } | 264 | } |
265 | 265 | ||
266 | /* | ||
267 | * When a guest is interrupted for a longer amount of time, missed clock | ||
268 | * ticks are not redelivered later. Due to that, this function may on | ||
269 | * occasion account more time than the calling functions think elapsed. | ||
270 | */ | ||
266 | static __always_inline cputime_t steal_account_process_time(cputime_t maxtime) | 271 | static __always_inline cputime_t steal_account_process_time(cputime_t maxtime) |
267 | { | 272 | { |
268 | #ifdef CONFIG_PARAVIRT | 273 | #ifdef CONFIG_PARAVIRT |
@@ -371,7 +376,7 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | |||
371 | * idle, or potentially user or system time. Due to rounding, | 376 | * idle, or potentially user or system time. Due to rounding, |
372 | * other time can exceed ticks occasionally. | 377 | * other time can exceed ticks occasionally. |
373 | */ | 378 | */ |
374 | other = account_other_time(cputime); | 379 | other = account_other_time(ULONG_MAX); |
375 | if (other >= cputime) | 380 | if (other >= cputime) |
376 | return; | 381 | return; |
377 | cputime -= other; | 382 | cputime -= other; |
@@ -486,7 +491,7 @@ void account_process_tick(struct task_struct *p, int user_tick) | |||
486 | } | 491 | } |
487 | 492 | ||
488 | cputime = cputime_one_jiffy; | 493 | cputime = cputime_one_jiffy; |
489 | steal = steal_account_process_time(cputime); | 494 | steal = steal_account_process_time(ULONG_MAX); |
490 | 495 | ||
491 | if (steal >= cputime) | 496 | if (steal >= cputime) |
492 | return; | 497 | return; |
@@ -508,13 +513,21 @@ void account_process_tick(struct task_struct *p, int user_tick) | |||
508 | */ | 513 | */ |
509 | void account_idle_ticks(unsigned long ticks) | 514 | void account_idle_ticks(unsigned long ticks) |
510 | { | 515 | { |
516 | cputime_t cputime, steal; | ||
511 | 517 | ||
512 | if (sched_clock_irqtime) { | 518 | if (sched_clock_irqtime) { |
513 | irqtime_account_idle_ticks(ticks); | 519 | irqtime_account_idle_ticks(ticks); |
514 | return; | 520 | return; |
515 | } | 521 | } |
516 | 522 | ||
517 | account_idle_time(jiffies_to_cputime(ticks)); | 523 | cputime = jiffies_to_cputime(ticks); |
524 | steal = steal_account_process_time(ULONG_MAX); | ||
525 | |||
526 | if (steal >= cputime) | ||
527 | return; | ||
528 | |||
529 | cputime -= steal; | ||
530 | account_idle_time(cputime); | ||
518 | } | 531 | } |
519 | 532 | ||
520 | /* | 533 | /* |
@@ -606,19 +619,25 @@ static void cputime_adjust(struct task_cputime *curr, | |||
606 | stime = curr->stime; | 619 | stime = curr->stime; |
607 | utime = curr->utime; | 620 | utime = curr->utime; |
608 | 621 | ||
609 | if (utime == 0) { | 622 | /* |
610 | stime = rtime; | 623 | * If either stime or both stime and utime are 0, assume all runtime is |
624 | * userspace. Once a task gets some ticks, the monotonicy code at | ||
625 | * 'update' will ensure things converge to the observed ratio. | ||
626 | */ | ||
627 | if (stime == 0) { | ||
628 | utime = rtime; | ||
611 | goto update; | 629 | goto update; |
612 | } | 630 | } |
613 | 631 | ||
614 | if (stime == 0) { | 632 | if (utime == 0) { |
615 | utime = rtime; | 633 | stime = rtime; |
616 | goto update; | 634 | goto update; |
617 | } | 635 | } |
618 | 636 | ||
619 | stime = scale_stime((__force u64)stime, (__force u64)rtime, | 637 | stime = scale_stime((__force u64)stime, (__force u64)rtime, |
620 | (__force u64)(stime + utime)); | 638 | (__force u64)(stime + utime)); |
621 | 639 | ||
640 | update: | ||
622 | /* | 641 | /* |
623 | * Make sure stime doesn't go backwards; this preserves monotonicity | 642 | * Make sure stime doesn't go backwards; this preserves monotonicity |
624 | * for utime because rtime is monotonic. | 643 | * for utime because rtime is monotonic. |
@@ -641,7 +660,6 @@ static void cputime_adjust(struct task_cputime *curr, | |||
641 | stime = rtime - utime; | 660 | stime = rtime - utime; |
642 | } | 661 | } |
643 | 662 | ||
644 | update: | ||
645 | prev->stime = stime; | 663 | prev->stime = stime; |
646 | prev->utime = utime; | 664 | prev->utime = utime; |
647 | out: | 665 | out: |
@@ -686,6 +704,13 @@ static cputime_t get_vtime_delta(struct task_struct *tsk) | |||
686 | unsigned long now = READ_ONCE(jiffies); | 704 | unsigned long now = READ_ONCE(jiffies); |
687 | cputime_t delta, other; | 705 | cputime_t delta, other; |
688 | 706 | ||
707 | /* | ||
708 | * Unlike tick based timing, vtime based timing never has lost | ||
709 | * ticks, and no need for steal time accounting to make up for | ||
710 | * lost ticks. Vtime accounts a rounded version of actual | ||
711 | * elapsed time. Limit account_other_time to prevent rounding | ||
712 | * errors from causing elapsed vtime to go negative. | ||
713 | */ | ||
689 | delta = jiffies_to_cputime(now - tsk->vtime_snap); | 714 | delta = jiffies_to_cputime(now - tsk->vtime_snap); |
690 | other = account_other_time(delta); | 715 | other = account_other_time(delta); |
691 | WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE); | 716 | WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE); |
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index fcb7f0217ff4..1ce8867283dc 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c | |||
@@ -658,8 +658,11 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) | |||
658 | * | 658 | * |
659 | * XXX figure out if select_task_rq_dl() deals with offline cpus. | 659 | * XXX figure out if select_task_rq_dl() deals with offline cpus. |
660 | */ | 660 | */ |
661 | if (unlikely(!rq->online)) | 661 | if (unlikely(!rq->online)) { |
662 | lockdep_unpin_lock(&rq->lock, rf.cookie); | ||
662 | rq = dl_task_offline_migration(rq, p); | 663 | rq = dl_task_offline_migration(rq, p); |
664 | rf.cookie = lockdep_pin_lock(&rq->lock); | ||
665 | } | ||
663 | 666 | ||
664 | /* | 667 | /* |
665 | * Queueing this task back might have overloaded rq, check if we need | 668 | * Queueing this task back might have overloaded rq, check if we need |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4088eedea763..039de34f1521 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -4269,7 +4269,7 @@ static void sync_throttle(struct task_group *tg, int cpu) | |||
4269 | pcfs_rq = tg->parent->cfs_rq[cpu]; | 4269 | pcfs_rq = tg->parent->cfs_rq[cpu]; |
4270 | 4270 | ||
4271 | cfs_rq->throttle_count = pcfs_rq->throttle_count; | 4271 | cfs_rq->throttle_count = pcfs_rq->throttle_count; |
4272 | pcfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu)); | 4272 | cfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu)); |
4273 | } | 4273 | } |
4274 | 4274 | ||
4275 | /* conditionally throttle active cfs_rq's from put_prev_entity() */ | 4275 | /* conditionally throttle active cfs_rq's from put_prev_entity() */ |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b43d0b27c1fe..a13bbdaab47d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -2140,6 +2140,21 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp, | |||
2140 | return 0; | 2140 | return 0; |
2141 | } | 2141 | } |
2142 | 2142 | ||
2143 | static int do_proc_douintvec_conv(bool *negp, unsigned long *lvalp, | ||
2144 | int *valp, | ||
2145 | int write, void *data) | ||
2146 | { | ||
2147 | if (write) { | ||
2148 | if (*negp) | ||
2149 | return -EINVAL; | ||
2150 | *valp = *lvalp; | ||
2151 | } else { | ||
2152 | unsigned int val = *valp; | ||
2153 | *lvalp = (unsigned long)val; | ||
2154 | } | ||
2155 | return 0; | ||
2156 | } | ||
2157 | |||
2143 | static const char proc_wspace_sep[] = { ' ', '\t', '\n' }; | 2158 | static const char proc_wspace_sep[] = { ' ', '\t', '\n' }; |
2144 | 2159 | ||
2145 | static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, | 2160 | static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, |
@@ -2259,8 +2274,27 @@ static int do_proc_dointvec(struct ctl_table *table, int write, | |||
2259 | int proc_dointvec(struct ctl_table *table, int write, | 2274 | int proc_dointvec(struct ctl_table *table, int write, |
2260 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2275 | void __user *buffer, size_t *lenp, loff_t *ppos) |
2261 | { | 2276 | { |
2262 | return do_proc_dointvec(table,write,buffer,lenp,ppos, | 2277 | return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL); |
2263 | NULL,NULL); | 2278 | } |
2279 | |||
2280 | /** | ||
2281 | * proc_douintvec - read a vector of unsigned integers | ||
2282 | * @table: the sysctl table | ||
2283 | * @write: %TRUE if this is a write to the sysctl file | ||
2284 | * @buffer: the user buffer | ||
2285 | * @lenp: the size of the user buffer | ||
2286 | * @ppos: file position | ||
2287 | * | ||
2288 | * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer | ||
2289 | * values from/to the user buffer, treated as an ASCII string. | ||
2290 | * | ||
2291 | * Returns 0 on success. | ||
2292 | */ | ||
2293 | int proc_douintvec(struct ctl_table *table, int write, | ||
2294 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
2295 | { | ||
2296 | return do_proc_dointvec(table, write, buffer, lenp, ppos, | ||
2297 | do_proc_douintvec_conv, NULL); | ||
2264 | } | 2298 | } |
2265 | 2299 | ||
2266 | /* | 2300 | /* |
@@ -2858,6 +2892,12 @@ int proc_dointvec(struct ctl_table *table, int write, | |||
2858 | return -ENOSYS; | 2892 | return -ENOSYS; |
2859 | } | 2893 | } |
2860 | 2894 | ||
2895 | int proc_douintvec(struct ctl_table *table, int write, | ||
2896 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
2897 | { | ||
2898 | return -ENOSYS; | ||
2899 | } | ||
2900 | |||
2861 | int proc_dointvec_minmax(struct ctl_table *table, int write, | 2901 | int proc_dointvec_minmax(struct ctl_table *table, int write, |
2862 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2902 | void __user *buffer, size_t *lenp, loff_t *ppos) |
2863 | { | 2903 | { |
@@ -2903,6 +2943,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, | |||
2903 | * exception granted :-) | 2943 | * exception granted :-) |
2904 | */ | 2944 | */ |
2905 | EXPORT_SYMBOL(proc_dointvec); | 2945 | EXPORT_SYMBOL(proc_dointvec); |
2946 | EXPORT_SYMBOL(proc_douintvec); | ||
2906 | EXPORT_SYMBOL(proc_dointvec_jiffies); | 2947 | EXPORT_SYMBOL(proc_dointvec_jiffies); |
2907 | EXPORT_SYMBOL(proc_dointvec_minmax); | 2948 | EXPORT_SYMBOL(proc_dointvec_minmax); |
2908 | EXPORT_SYMBOL(proc_dointvec_userhz_jiffies); | 2949 | EXPORT_SYMBOL(proc_dointvec_userhz_jiffies); |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3b65746c7f15..e07fb093f819 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -401,7 +401,10 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) | |||
401 | do { | 401 | do { |
402 | seq = raw_read_seqcount_latch(&tkf->seq); | 402 | seq = raw_read_seqcount_latch(&tkf->seq); |
403 | tkr = tkf->base + (seq & 0x01); | 403 | tkr = tkf->base + (seq & 0x01); |
404 | now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr); | 404 | now = ktime_to_ns(tkr->base); |
405 | |||
406 | now += clocksource_delta(tkr->read(tkr->clock), | ||
407 | tkr->cycle_last, tkr->mask); | ||
405 | } while (read_seqcount_retry(&tkf->seq, seq)); | 408 | } while (read_seqcount_retry(&tkf->seq, seq)); |
406 | 409 | ||
407 | return now; | 410 | return now; |
diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c index f6bd65236712..107310a6f36f 100644 --- a/kernel/time/timekeeping_debug.c +++ b/kernel/time/timekeeping_debug.c | |||
@@ -23,7 +23,9 @@ | |||
23 | 23 | ||
24 | #include "timekeeping_internal.h" | 24 | #include "timekeeping_internal.h" |
25 | 25 | ||
26 | static unsigned int sleep_time_bin[32] = {0}; | 26 | #define NUM_BINS 32 |
27 | |||
28 | static unsigned int sleep_time_bin[NUM_BINS] = {0}; | ||
27 | 29 | ||
28 | static int tk_debug_show_sleep_time(struct seq_file *s, void *data) | 30 | static int tk_debug_show_sleep_time(struct seq_file *s, void *data) |
29 | { | 31 | { |
@@ -69,6 +71,9 @@ late_initcall(tk_debug_sleep_time_init); | |||
69 | 71 | ||
70 | void tk_debug_account_sleep_time(struct timespec64 *t) | 72 | void tk_debug_account_sleep_time(struct timespec64 *t) |
71 | { | 73 | { |
72 | sleep_time_bin[fls(t->tv_sec)]++; | 74 | /* Cap bin index so we don't overflow the array */ |
75 | int bin = min(fls(t->tv_sec), NUM_BINS-1); | ||
76 | |||
77 | sleep_time_bin[bin]++; | ||
73 | } | 78 | } |
74 | 79 | ||
diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 555670a5143c..32bf6f75a8fe 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c | |||
@@ -1496,6 +1496,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) | |||
1496 | struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); | 1496 | struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); |
1497 | u64 expires = KTIME_MAX; | 1497 | u64 expires = KTIME_MAX; |
1498 | unsigned long nextevt; | 1498 | unsigned long nextevt; |
1499 | bool is_max_delta; | ||
1499 | 1500 | ||
1500 | /* | 1501 | /* |
1501 | * Pretend that there is no timer pending if the cpu is offline. | 1502 | * Pretend that there is no timer pending if the cpu is offline. |
@@ -1506,6 +1507,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) | |||
1506 | 1507 | ||
1507 | spin_lock(&base->lock); | 1508 | spin_lock(&base->lock); |
1508 | nextevt = __next_timer_interrupt(base); | 1509 | nextevt = __next_timer_interrupt(base); |
1510 | is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA); | ||
1509 | base->next_expiry = nextevt; | 1511 | base->next_expiry = nextevt; |
1510 | /* | 1512 | /* |
1511 | * We have a fresh next event. Check whether we can forward the base: | 1513 | * We have a fresh next event. Check whether we can forward the base: |
@@ -1519,7 +1521,8 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) | |||
1519 | expires = basem; | 1521 | expires = basem; |
1520 | base->is_idle = false; | 1522 | base->is_idle = false; |
1521 | } else { | 1523 | } else { |
1522 | expires = basem + (nextevt - basej) * TICK_NSEC; | 1524 | if (!is_max_delta) |
1525 | expires = basem + (nextevt - basej) * TICK_NSEC; | ||
1523 | /* | 1526 | /* |
1524 | * If we expect to sleep more than a tick, mark the base idle: | 1527 | * If we expect to sleep more than a tick, mark the base idle: |
1525 | */ | 1528 | */ |
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 7598e6ca817a..dbafc5df03f3 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
@@ -223,7 +223,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, | |||
223 | what |= MASK_TC_BIT(op_flags, META); | 223 | what |= MASK_TC_BIT(op_flags, META); |
224 | what |= MASK_TC_BIT(op_flags, PREFLUSH); | 224 | what |= MASK_TC_BIT(op_flags, PREFLUSH); |
225 | what |= MASK_TC_BIT(op_flags, FUA); | 225 | what |= MASK_TC_BIT(op_flags, FUA); |
226 | if (op == REQ_OP_DISCARD) | 226 | if (op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE) |
227 | what |= BLK_TC_ACT(BLK_TC_DISCARD); | 227 | what |= BLK_TC_ACT(BLK_TC_DISCARD); |
228 | if (op == REQ_OP_FLUSH) | 228 | if (op == REQ_OP_FLUSH) |
229 | what |= BLK_TC_ACT(BLK_TC_FLUSH); | 229 | what |= BLK_TC_ACT(BLK_TC_FLUSH); |