aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2016-05-12 03:18:13 -0400
committerIngo Molnar <mingo@kernel.org>2016-05-12 03:18:13 -0400
commiteb60b3e5e8dfdd590e586a6fc22daf2f63a7b7e6 (patch)
tree1b06e2c1beca8f970685eb13096c7a12480526c6 /kernel
parent58fe9c4621b7219e724c0b7af053112f974a08c3 (diff)
parent53d3bc773eaa7ab1cf63585e76af7ee869d5e709 (diff)
Merge branch 'sched/urgent' into sched/core to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/inode.c7
-rw-r--r--kernel/bpf/syscall.c24
-rw-r--r--kernel/bpf/verifier.c77
-rw-r--r--kernel/cgroup.c7
-rw-r--r--kernel/cpuset.c4
-rw-r--r--kernel/events/core.c57
-rw-r--r--kernel/kcov.c3
-rw-r--r--kernel/kexec_core.c7
-rw-r--r--kernel/locking/lockdep.c37
-rw-r--r--kernel/locking/lockdep_proc.c2
-rw-r--r--kernel/sched/deadline.c1
-rw-r--r--kernel/sched/fair.c29
-rw-r--r--kernel/sched/rt.c1
-rw-r--r--kernel/trace/trace_events.c9
-rw-r--r--kernel/workqueue.c29
15 files changed, 212 insertions, 82 deletions
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index f2ece3c174a5..8f94ca1860cf 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -31,10 +31,10 @@ static void *bpf_any_get(void *raw, enum bpf_type type)
31{ 31{
32 switch (type) { 32 switch (type) {
33 case BPF_TYPE_PROG: 33 case BPF_TYPE_PROG:
34 atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt); 34 raw = bpf_prog_inc(raw);
35 break; 35 break;
36 case BPF_TYPE_MAP: 36 case BPF_TYPE_MAP:
37 bpf_map_inc(raw, true); 37 raw = bpf_map_inc(raw, true);
38 break; 38 break;
39 default: 39 default:
40 WARN_ON_ONCE(1); 40 WARN_ON_ONCE(1);
@@ -297,7 +297,8 @@ static void *bpf_obj_do_get(const struct filename *pathname,
297 goto out; 297 goto out;
298 298
299 raw = bpf_any_get(inode->i_private, *type); 299 raw = bpf_any_get(inode->i_private, *type);
300 touch_atime(&path); 300 if (!IS_ERR(raw))
301 touch_atime(&path);
301 302
302 path_put(&path); 303 path_put(&path);
303 return raw; 304 return raw;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index adc5e4bd74f8..cf5e9f7ad13a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -218,11 +218,18 @@ struct bpf_map *__bpf_map_get(struct fd f)
218 return f.file->private_data; 218 return f.file->private_data;
219} 219}
220 220
221void bpf_map_inc(struct bpf_map *map, bool uref) 221/* prog's and map's refcnt limit */
222#define BPF_MAX_REFCNT 32768
223
224struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
222{ 225{
223 atomic_inc(&map->refcnt); 226 if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
227 atomic_dec(&map->refcnt);
228 return ERR_PTR(-EBUSY);
229 }
224 if (uref) 230 if (uref)
225 atomic_inc(&map->usercnt); 231 atomic_inc(&map->usercnt);
232 return map;
226} 233}
227 234
228struct bpf_map *bpf_map_get_with_uref(u32 ufd) 235struct bpf_map *bpf_map_get_with_uref(u32 ufd)
@@ -234,7 +241,7 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd)
234 if (IS_ERR(map)) 241 if (IS_ERR(map))
235 return map; 242 return map;
236 243
237 bpf_map_inc(map, true); 244 map = bpf_map_inc(map, true);
238 fdput(f); 245 fdput(f);
239 246
240 return map; 247 return map;
@@ -658,6 +665,15 @@ static struct bpf_prog *__bpf_prog_get(struct fd f)
658 return f.file->private_data; 665 return f.file->private_data;
659} 666}
660 667
668struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
669{
670 if (atomic_inc_return(&prog->aux->refcnt) > BPF_MAX_REFCNT) {
671 atomic_dec(&prog->aux->refcnt);
672 return ERR_PTR(-EBUSY);
673 }
674 return prog;
675}
676
661/* called by sockets/tracing/seccomp before attaching program to an event 677/* called by sockets/tracing/seccomp before attaching program to an event
662 * pairs with bpf_prog_put() 678 * pairs with bpf_prog_put()
663 */ 679 */
@@ -670,7 +686,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
670 if (IS_ERR(prog)) 686 if (IS_ERR(prog))
671 return prog; 687 return prog;
672 688
673 atomic_inc(&prog->aux->refcnt); 689 prog = bpf_prog_inc(prog);
674 fdput(f); 690 fdput(f);
675 691
676 return prog; 692 return prog;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 618ef77c302a..c5c17a62f509 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -239,16 +239,6 @@ static const char * const reg_type_str[] = {
239 [CONST_IMM] = "imm", 239 [CONST_IMM] = "imm",
240}; 240};
241 241
242static const struct {
243 int map_type;
244 int func_id;
245} func_limit[] = {
246 {BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call},
247 {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read},
248 {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_output},
249 {BPF_MAP_TYPE_STACK_TRACE, BPF_FUNC_get_stackid},
250};
251
252static void print_verifier_state(struct verifier_env *env) 242static void print_verifier_state(struct verifier_env *env)
253{ 243{
254 enum bpf_reg_type t; 244 enum bpf_reg_type t;
@@ -921,27 +911,52 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
921 911
922static int check_map_func_compatibility(struct bpf_map *map, int func_id) 912static int check_map_func_compatibility(struct bpf_map *map, int func_id)
923{ 913{
924 bool bool_map, bool_func;
925 int i;
926
927 if (!map) 914 if (!map)
928 return 0; 915 return 0;
929 916
930 for (i = 0; i < ARRAY_SIZE(func_limit); i++) { 917 /* We need a two way check, first is from map perspective ... */
931 bool_map = (map->map_type == func_limit[i].map_type); 918 switch (map->map_type) {
932 bool_func = (func_id == func_limit[i].func_id); 919 case BPF_MAP_TYPE_PROG_ARRAY:
933 /* only when map & func pair match it can continue. 920 if (func_id != BPF_FUNC_tail_call)
934 * don't allow any other map type to be passed into 921 goto error;
935 * the special func; 922 break;
936 */ 923 case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
937 if (bool_func && bool_map != bool_func) { 924 if (func_id != BPF_FUNC_perf_event_read &&
938 verbose("cannot pass map_type %d into func %d\n", 925 func_id != BPF_FUNC_perf_event_output)
939 map->map_type, func_id); 926 goto error;
940 return -EINVAL; 927 break;
941 } 928 case BPF_MAP_TYPE_STACK_TRACE:
929 if (func_id != BPF_FUNC_get_stackid)
930 goto error;
931 break;
932 default:
933 break;
934 }
935
936 /* ... and second from the function itself. */
937 switch (func_id) {
938 case BPF_FUNC_tail_call:
939 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
940 goto error;
941 break;
942 case BPF_FUNC_perf_event_read:
943 case BPF_FUNC_perf_event_output:
944 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
945 goto error;
946 break;
947 case BPF_FUNC_get_stackid:
948 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
949 goto error;
950 break;
951 default:
952 break;
942 } 953 }
943 954
944 return 0; 955 return 0;
956error:
957 verbose("cannot pass map_type %d into func %d\n",
958 map->map_type, func_id);
959 return -EINVAL;
945} 960}
946 961
947static int check_call(struct verifier_env *env, int func_id) 962static int check_call(struct verifier_env *env, int func_id)
@@ -2030,7 +2045,6 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
2030 if (IS_ERR(map)) { 2045 if (IS_ERR(map)) {
2031 verbose("fd %d is not pointing to valid bpf_map\n", 2046 verbose("fd %d is not pointing to valid bpf_map\n",
2032 insn->imm); 2047 insn->imm);
2033 fdput(f);
2034 return PTR_ERR(map); 2048 return PTR_ERR(map);
2035 } 2049 }
2036 2050
@@ -2050,15 +2064,18 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
2050 return -E2BIG; 2064 return -E2BIG;
2051 } 2065 }
2052 2066
2053 /* remember this map */
2054 env->used_maps[env->used_map_cnt++] = map;
2055
2056 /* hold the map. If the program is rejected by verifier, 2067 /* hold the map. If the program is rejected by verifier,
2057 * the map will be released by release_maps() or it 2068 * the map will be released by release_maps() or it
2058 * will be used by the valid program until it's unloaded 2069 * will be used by the valid program until it's unloaded
2059 * and all maps are released in free_bpf_prog_info() 2070 * and all maps are released in free_bpf_prog_info()
2060 */ 2071 */
2061 bpf_map_inc(map, false); 2072 map = bpf_map_inc(map, false);
2073 if (IS_ERR(map)) {
2074 fdput(f);
2075 return PTR_ERR(map);
2076 }
2077 env->used_maps[env->used_map_cnt++] = map;
2078
2062 fdput(f); 2079 fdput(f);
2063next_insn: 2080next_insn:
2064 insn++; 2081 insn++;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 671dc05c0b0f..909a7d31ffd3 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2825,9 +2825,10 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
2825 size_t nbytes, loff_t off, bool threadgroup) 2825 size_t nbytes, loff_t off, bool threadgroup)
2826{ 2826{
2827 struct task_struct *tsk; 2827 struct task_struct *tsk;
2828 struct cgroup_subsys *ss;
2828 struct cgroup *cgrp; 2829 struct cgroup *cgrp;
2829 pid_t pid; 2830 pid_t pid;
2830 int ret; 2831 int ssid, ret;
2831 2832
2832 if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0) 2833 if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
2833 return -EINVAL; 2834 return -EINVAL;
@@ -2875,8 +2876,10 @@ out_unlock_rcu:
2875 rcu_read_unlock(); 2876 rcu_read_unlock();
2876out_unlock_threadgroup: 2877out_unlock_threadgroup:
2877 percpu_up_write(&cgroup_threadgroup_rwsem); 2878 percpu_up_write(&cgroup_threadgroup_rwsem);
2879 for_each_subsys(ss, ssid)
2880 if (ss->post_attach)
2881 ss->post_attach();
2878 cgroup_kn_unlock(of->kn); 2882 cgroup_kn_unlock(of->kn);
2879 cpuset_post_attach_flush();
2880 return ret ?: nbytes; 2883 return ret ?: nbytes;
2881} 2884}
2882 2885
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 00ab5c2b7c5b..1902956baba1 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -58,7 +58,6 @@
58#include <asm/uaccess.h> 58#include <asm/uaccess.h>
59#include <linux/atomic.h> 59#include <linux/atomic.h>
60#include <linux/mutex.h> 60#include <linux/mutex.h>
61#include <linux/workqueue.h>
62#include <linux/cgroup.h> 61#include <linux/cgroup.h>
63#include <linux/wait.h> 62#include <linux/wait.h>
64 63
@@ -1016,7 +1015,7 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
1016 } 1015 }
1017} 1016}
1018 1017
1019void cpuset_post_attach_flush(void) 1018static void cpuset_post_attach(void)
1020{ 1019{
1021 flush_workqueue(cpuset_migrate_mm_wq); 1020 flush_workqueue(cpuset_migrate_mm_wq);
1022} 1021}
@@ -2087,6 +2086,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
2087 .can_attach = cpuset_can_attach, 2086 .can_attach = cpuset_can_attach,
2088 .cancel_attach = cpuset_cancel_attach, 2087 .cancel_attach = cpuset_cancel_attach,
2089 .attach = cpuset_attach, 2088 .attach = cpuset_attach,
2089 .post_attach = cpuset_post_attach,
2090 .bind = cpuset_bind, 2090 .bind = cpuset_bind,
2091 .legacy_cftypes = files, 2091 .legacy_cftypes = files,
2092 .early_init = true, 2092 .early_init = true,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 52bedc5a5aaa..c0ded2416615 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -351,7 +351,7 @@ static struct srcu_struct pmus_srcu;
351 * 1 - disallow cpu events for unpriv 351 * 1 - disallow cpu events for unpriv
352 * 2 - disallow kernel profiling for unpriv 352 * 2 - disallow kernel profiling for unpriv
353 */ 353 */
354int sysctl_perf_event_paranoid __read_mostly = 1; 354int sysctl_perf_event_paranoid __read_mostly = 2;
355 355
356/* Minimum for 512 kiB + 1 user control page */ 356/* Minimum for 512 kiB + 1 user control page */
357int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */ 357int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */
@@ -412,7 +412,8 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
412 if (ret || !write) 412 if (ret || !write)
413 return ret; 413 return ret;
414 414
415 if (sysctl_perf_cpu_time_max_percent == 100) { 415 if (sysctl_perf_cpu_time_max_percent == 100 ||
416 sysctl_perf_cpu_time_max_percent == 0) {
416 printk(KERN_WARNING 417 printk(KERN_WARNING
417 "perf: Dynamic interrupt throttling disabled, can hang your system!\n"); 418 "perf: Dynamic interrupt throttling disabled, can hang your system!\n");
418 WRITE_ONCE(perf_sample_allowed_ns, 0); 419 WRITE_ONCE(perf_sample_allowed_ns, 0);
@@ -1105,6 +1106,7 @@ static void put_ctx(struct perf_event_context *ctx)
1105 * function. 1106 * function.
1106 * 1107 *
1107 * Lock order: 1108 * Lock order:
1109 * cred_guard_mutex
1108 * task_struct::perf_event_mutex 1110 * task_struct::perf_event_mutex
1109 * perf_event_context::mutex 1111 * perf_event_context::mutex
1110 * perf_event::child_mutex; 1112 * perf_event::child_mutex;
@@ -3420,7 +3422,6 @@ static struct task_struct *
3420find_lively_task_by_vpid(pid_t vpid) 3422find_lively_task_by_vpid(pid_t vpid)
3421{ 3423{
3422 struct task_struct *task; 3424 struct task_struct *task;
3423 int err;
3424 3425
3425 rcu_read_lock(); 3426 rcu_read_lock();
3426 if (!vpid) 3427 if (!vpid)
@@ -3434,16 +3435,7 @@ find_lively_task_by_vpid(pid_t vpid)
3434 if (!task) 3435 if (!task)
3435 return ERR_PTR(-ESRCH); 3436 return ERR_PTR(-ESRCH);
3436 3437
3437 /* Reuse ptrace permission checks for now. */
3438 err = -EACCES;
3439 if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
3440 goto errout;
3441
3442 return task; 3438 return task;
3443errout:
3444 put_task_struct(task);
3445 return ERR_PTR(err);
3446
3447} 3439}
3448 3440
3449/* 3441/*
@@ -8413,6 +8405,24 @@ SYSCALL_DEFINE5(perf_event_open,
8413 8405
8414 get_online_cpus(); 8406 get_online_cpus();
8415 8407
8408 if (task) {
8409 err = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
8410 if (err)
8411 goto err_cpus;
8412
8413 /*
8414 * Reuse ptrace permission checks for now.
8415 *
8416 * We must hold cred_guard_mutex across this and any potential
8417 * perf_install_in_context() call for this new event to
8418 * serialize against exec() altering our credentials (and the
8419 * perf_event_exit_task() that could imply).
8420 */
8421 err = -EACCES;
8422 if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
8423 goto err_cred;
8424 }
8425
8416 if (flags & PERF_FLAG_PID_CGROUP) 8426 if (flags & PERF_FLAG_PID_CGROUP)
8417 cgroup_fd = pid; 8427 cgroup_fd = pid;
8418 8428
@@ -8420,7 +8430,7 @@ SYSCALL_DEFINE5(perf_event_open,
8420 NULL, NULL, cgroup_fd); 8430 NULL, NULL, cgroup_fd);
8421 if (IS_ERR(event)) { 8431 if (IS_ERR(event)) {
8422 err = PTR_ERR(event); 8432 err = PTR_ERR(event);
8423 goto err_cpus; 8433 goto err_cred;
8424 } 8434 }
8425 8435
8426 if (is_sampling_event(event)) { 8436 if (is_sampling_event(event)) {
@@ -8479,11 +8489,6 @@ SYSCALL_DEFINE5(perf_event_open,
8479 goto err_context; 8489 goto err_context;
8480 } 8490 }
8481 8491
8482 if (task) {
8483 put_task_struct(task);
8484 task = NULL;
8485 }
8486
8487 /* 8492 /*
8488 * Look up the group leader (we will attach this event to it): 8493 * Look up the group leader (we will attach this event to it):
8489 */ 8494 */
@@ -8581,6 +8586,11 @@ SYSCALL_DEFINE5(perf_event_open,
8581 8586
8582 WARN_ON_ONCE(ctx->parent_ctx); 8587 WARN_ON_ONCE(ctx->parent_ctx);
8583 8588
8589 /*
8590 * This is the point on no return; we cannot fail hereafter. This is
8591 * where we start modifying current state.
8592 */
8593
8584 if (move_group) { 8594 if (move_group) {
8585 /* 8595 /*
8586 * See perf_event_ctx_lock() for comments on the details 8596 * See perf_event_ctx_lock() for comments on the details
@@ -8652,6 +8662,11 @@ SYSCALL_DEFINE5(perf_event_open,
8652 mutex_unlock(&gctx->mutex); 8662 mutex_unlock(&gctx->mutex);
8653 mutex_unlock(&ctx->mutex); 8663 mutex_unlock(&ctx->mutex);
8654 8664
8665 if (task) {
8666 mutex_unlock(&task->signal->cred_guard_mutex);
8667 put_task_struct(task);
8668 }
8669
8655 put_online_cpus(); 8670 put_online_cpus();
8656 8671
8657 mutex_lock(&current->perf_event_mutex); 8672 mutex_lock(&current->perf_event_mutex);
@@ -8684,6 +8699,9 @@ err_alloc:
8684 */ 8699 */
8685 if (!event_file) 8700 if (!event_file)
8686 free_event(event); 8701 free_event(event);
8702err_cred:
8703 if (task)
8704 mutex_unlock(&task->signal->cred_guard_mutex);
8687err_cpus: 8705err_cpus:
8688 put_online_cpus(); 8706 put_online_cpus();
8689err_task: 8707err_task:
@@ -8968,6 +8986,9 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
8968 8986
8969/* 8987/*
8970 * When a child task exits, feed back event values to parent events. 8988 * When a child task exits, feed back event values to parent events.
8989 *
8990 * Can be called with cred_guard_mutex held when called from
8991 * install_exec_creds().
8971 */ 8992 */
8972void perf_event_exit_task(struct task_struct *child) 8993void perf_event_exit_task(struct task_struct *child)
8973{ 8994{
diff --git a/kernel/kcov.c b/kernel/kcov.c
index 3efbee0834a8..a02f2dddd1d7 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -1,5 +1,6 @@
1#define pr_fmt(fmt) "kcov: " fmt 1#define pr_fmt(fmt) "kcov: " fmt
2 2
3#define DISABLE_BRANCH_PROFILING
3#include <linux/compiler.h> 4#include <linux/compiler.h>
4#include <linux/types.h> 5#include <linux/types.h>
5#include <linux/file.h> 6#include <linux/file.h>
@@ -43,7 +44,7 @@ struct kcov {
43 * Entry point from instrumented code. 44 * Entry point from instrumented code.
44 * This is called once per basic-block/edge. 45 * This is called once per basic-block/edge.
45 */ 46 */
46void __sanitizer_cov_trace_pc(void) 47void notrace __sanitizer_cov_trace_pc(void)
47{ 48{
48 struct task_struct *t; 49 struct task_struct *t;
49 enum kcov_mode mode; 50 enum kcov_mode mode;
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 8d34308ea449..1391d3ee3b86 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -1415,6 +1415,9 @@ static int __init crash_save_vmcoreinfo_init(void)
1415 VMCOREINFO_OFFSET(page, lru); 1415 VMCOREINFO_OFFSET(page, lru);
1416 VMCOREINFO_OFFSET(page, _mapcount); 1416 VMCOREINFO_OFFSET(page, _mapcount);
1417 VMCOREINFO_OFFSET(page, private); 1417 VMCOREINFO_OFFSET(page, private);
1418 VMCOREINFO_OFFSET(page, compound_dtor);
1419 VMCOREINFO_OFFSET(page, compound_order);
1420 VMCOREINFO_OFFSET(page, compound_head);
1418 VMCOREINFO_OFFSET(pglist_data, node_zones); 1421 VMCOREINFO_OFFSET(pglist_data, node_zones);
1419 VMCOREINFO_OFFSET(pglist_data, nr_zones); 1422 VMCOREINFO_OFFSET(pglist_data, nr_zones);
1420#ifdef CONFIG_FLAT_NODE_MEM_MAP 1423#ifdef CONFIG_FLAT_NODE_MEM_MAP
@@ -1447,8 +1450,8 @@ static int __init crash_save_vmcoreinfo_init(void)
1447#ifdef CONFIG_X86 1450#ifdef CONFIG_X86
1448 VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE); 1451 VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE);
1449#endif 1452#endif
1450#ifdef CONFIG_HUGETLBFS 1453#ifdef CONFIG_HUGETLB_PAGE
1451 VMCOREINFO_SYMBOL(free_huge_page); 1454 VMCOREINFO_NUMBER(HUGETLB_PAGE_DTOR);
1452#endif 1455#endif
1453 1456
1454 arch_crash_save_vmcoreinfo(); 1457 arch_crash_save_vmcoreinfo();
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index d7f94f4c811d..68bc6a654ca3 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2177,15 +2177,37 @@ cache_hit:
2177 chain->irq_context = hlock->irq_context; 2177 chain->irq_context = hlock->irq_context;
2178 i = get_first_held_lock(curr, hlock); 2178 i = get_first_held_lock(curr, hlock);
2179 chain->depth = curr->lockdep_depth + 1 - i; 2179 chain->depth = curr->lockdep_depth + 1 - i;
2180
2181 BUILD_BUG_ON((1UL << 24) <= ARRAY_SIZE(chain_hlocks));
2182 BUILD_BUG_ON((1UL << 6) <= ARRAY_SIZE(curr->held_locks));
2183 BUILD_BUG_ON((1UL << 8*sizeof(chain_hlocks[0])) <= ARRAY_SIZE(lock_classes));
2184
2180 if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) { 2185 if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
2181 chain->base = nr_chain_hlocks; 2186 chain->base = nr_chain_hlocks;
2182 nr_chain_hlocks += chain->depth;
2183 for (j = 0; j < chain->depth - 1; j++, i++) { 2187 for (j = 0; j < chain->depth - 1; j++, i++) {
2184 int lock_id = curr->held_locks[i].class_idx - 1; 2188 int lock_id = curr->held_locks[i].class_idx - 1;
2185 chain_hlocks[chain->base + j] = lock_id; 2189 chain_hlocks[chain->base + j] = lock_id;
2186 } 2190 }
2187 chain_hlocks[chain->base + j] = class - lock_classes; 2191 chain_hlocks[chain->base + j] = class - lock_classes;
2188 } 2192 }
2193
2194 if (nr_chain_hlocks < MAX_LOCKDEP_CHAIN_HLOCKS)
2195 nr_chain_hlocks += chain->depth;
2196
2197#ifdef CONFIG_DEBUG_LOCKDEP
2198 /*
2199 * Important for check_no_collision().
2200 */
2201 if (unlikely(nr_chain_hlocks > MAX_LOCKDEP_CHAIN_HLOCKS)) {
2202 if (debug_locks_off_graph_unlock())
2203 return 0;
2204
2205 print_lockdep_off("BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!");
2206 dump_stack();
2207 return 0;
2208 }
2209#endif
2210
2189 hlist_add_head_rcu(&chain->entry, hash_head); 2211 hlist_add_head_rcu(&chain->entry, hash_head);
2190 debug_atomic_inc(chain_lookup_misses); 2212 debug_atomic_inc(chain_lookup_misses);
2191 inc_chains(); 2213 inc_chains();
@@ -2933,6 +2955,11 @@ static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
2933 return 1; 2955 return 1;
2934} 2956}
2935 2957
2958static inline unsigned int task_irq_context(struct task_struct *task)
2959{
2960 return 2 * !!task->hardirq_context + !!task->softirq_context;
2961}
2962
2936static int separate_irq_context(struct task_struct *curr, 2963static int separate_irq_context(struct task_struct *curr,
2937 struct held_lock *hlock) 2964 struct held_lock *hlock)
2938{ 2965{
@@ -2941,8 +2968,6 @@ static int separate_irq_context(struct task_struct *curr,
2941 /* 2968 /*
2942 * Keep track of points where we cross into an interrupt context: 2969 * Keep track of points where we cross into an interrupt context:
2943 */ 2970 */
2944 hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) +
2945 curr->softirq_context;
2946 if (depth) { 2971 if (depth) {
2947 struct held_lock *prev_hlock; 2972 struct held_lock *prev_hlock;
2948 2973
@@ -2974,6 +2999,11 @@ static inline int mark_irqflags(struct task_struct *curr,
2974 return 1; 2999 return 1;
2975} 3000}
2976 3001
3002static inline unsigned int task_irq_context(struct task_struct *task)
3003{
3004 return 0;
3005}
3006
2977static inline int separate_irq_context(struct task_struct *curr, 3007static inline int separate_irq_context(struct task_struct *curr,
2978 struct held_lock *hlock) 3008 struct held_lock *hlock)
2979{ 3009{
@@ -3242,6 +3272,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3242 hlock->acquire_ip = ip; 3272 hlock->acquire_ip = ip;
3243 hlock->instance = lock; 3273 hlock->instance = lock;
3244 hlock->nest_lock = nest_lock; 3274 hlock->nest_lock = nest_lock;
3275 hlock->irq_context = task_irq_context(curr);
3245 hlock->trylock = trylock; 3276 hlock->trylock = trylock;
3246 hlock->read = read; 3277 hlock->read = read;
3247 hlock->check = check; 3278 hlock->check = check;
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
index dbb61a302548..a0f61effad25 100644
--- a/kernel/locking/lockdep_proc.c
+++ b/kernel/locking/lockdep_proc.c
@@ -141,6 +141,8 @@ static int lc_show(struct seq_file *m, void *v)
141 int i; 141 int i;
142 142
143 if (v == SEQ_START_TOKEN) { 143 if (v == SEQ_START_TOKEN) {
144 if (nr_chain_hlocks > MAX_LOCKDEP_CHAIN_HLOCKS)
145 seq_printf(m, "(buggered) ");
144 seq_printf(m, "all lock chains:\n"); 146 seq_printf(m, "all lock chains:\n");
145 return 0; 147 return 0;
146 } 148 }
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index ba53a87bb978..0ac6c84f3371 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1395,6 +1395,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
1395 !cpumask_test_cpu(later_rq->cpu, 1395 !cpumask_test_cpu(later_rq->cpu,
1396 &task->cpus_allowed) || 1396 &task->cpus_allowed) ||
1397 task_running(rq, task) || 1397 task_running(rq, task) ||
1398 !dl_task(task) ||
1398 !task_on_rq_queued(task))) { 1399 !task_on_rq_queued(task))) {
1399 double_unlock_balance(rq, later_rq); 1400 double_unlock_balance(rq, later_rq);
1400 later_rq = NULL; 1401 later_rq = NULL;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 51f7a4b62985..39fde3660f97 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3099,7 +3099,14 @@ static int idle_balance(struct rq *this_rq);
3099 3099
3100#else /* CONFIG_SMP */ 3100#else /* CONFIG_SMP */
3101 3101
3102static inline void update_load_avg(struct sched_entity *se, int update_tg) {} 3102static inline void update_load_avg(struct sched_entity *se, int not_used)
3103{
3104 struct cfs_rq *cfs_rq = cfs_rq_of(se);
3105 struct rq *rq = rq_of(cfs_rq);
3106
3107 cpufreq_trigger_update(rq_clock(rq));
3108}
3109
3103static inline void 3110static inline void
3104enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {} 3111enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
3105static inline void 3112static inline void
@@ -3250,25 +3257,17 @@ static inline void check_schedstat_required(void)
3250static void 3257static void
3251enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) 3258enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
3252{ 3259{
3253 bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING);
3254 bool curr = cfs_rq->curr == se;
3255
3256 /* 3260 /*
3257 * If we're the current task, we must renormalise before calling 3261 * Update the normalized vruntime before updating min_vruntime
3258 * update_curr(). 3262 * through calling update_curr().
3259 */ 3263 */
3260 if (renorm && curr) 3264 if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING))
3261 se->vruntime += cfs_rq->min_vruntime; 3265 se->vruntime += cfs_rq->min_vruntime;
3262 3266
3263 update_curr(cfs_rq);
3264
3265 /* 3267 /*
3266 * Otherwise, renormalise after, such that we're placed at the current 3268 * Update run-time statistics of the 'current'.
3267 * moment in time, instead of some random moment in the past.
3268 */ 3269 */
3269 if (renorm && !curr) 3270 update_curr(cfs_rq);
3270 se->vruntime += cfs_rq->min_vruntime;
3271
3272 enqueue_entity_load_avg(cfs_rq, se); 3271 enqueue_entity_load_avg(cfs_rq, se);
3273 account_entity_enqueue(cfs_rq, se); 3272 account_entity_enqueue(cfs_rq, se);
3274 update_cfs_shares(cfs_rq); 3273 update_cfs_shares(cfs_rq);
@@ -3284,7 +3283,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
3284 update_stats_enqueue(cfs_rq, se); 3283 update_stats_enqueue(cfs_rq, se);
3285 check_spread(cfs_rq, se); 3284 check_spread(cfs_rq, se);
3286 } 3285 }
3287 if (!curr) 3286 if (se != cfs_rq->curr)
3288 __enqueue_entity(cfs_rq, se); 3287 __enqueue_entity(cfs_rq, se);
3289 se->on_rq = 1; 3288 se->on_rq = 1;
3290 3289
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 68deaf901a12..67afa06cc8bc 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1729,6 +1729,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
1729 !cpumask_test_cpu(lowest_rq->cpu, 1729 !cpumask_test_cpu(lowest_rq->cpu,
1730 tsk_cpus_allowed(task)) || 1730 tsk_cpus_allowed(task)) ||
1731 task_running(rq, task) || 1731 task_running(rq, task) ||
1732 !rt_task(task) ||
1732 !task_on_rq_queued(task))) { 1733 !task_on_rq_queued(task))) {
1733 1734
1734 double_unlock_balance(rq, lowest_rq); 1735 double_unlock_balance(rq, lowest_rq);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 05ddc0820771..6f965864cc02 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2095,8 +2095,13 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file)
2095 trace_create_file("filter", 0644, file->dir, file, 2095 trace_create_file("filter", 0644, file->dir, file,
2096 &ftrace_event_filter_fops); 2096 &ftrace_event_filter_fops);
2097 2097
2098 trace_create_file("trigger", 0644, file->dir, file, 2098 /*
2099 &event_trigger_fops); 2099 * Only event directories that can be enabled should have
2100 * triggers.
2101 */
2102 if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
2103 trace_create_file("trigger", 0644, file->dir, file,
2104 &event_trigger_fops);
2100 2105
2101 trace_create_file("format", 0444, file->dir, call, 2106 trace_create_file("format", 0444, file->dir, call,
2102 &ftrace_event_format_fops); 2107 &ftrace_event_format_fops);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2232ae3e3ad6..3bfdff06eea7 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -666,6 +666,35 @@ static void set_work_pool_and_clear_pending(struct work_struct *work,
666 */ 666 */
667 smp_wmb(); 667 smp_wmb();
668 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0); 668 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
669 /*
670 * The following mb guarantees that previous clear of a PENDING bit
671 * will not be reordered with any speculative LOADS or STORES from
672 * work->current_func, which is executed afterwards. This possible
673 * reordering can lead to a missed execution on attempt to qeueue
674 * the same @work. E.g. consider this case:
675 *
676 * CPU#0 CPU#1
677 * ---------------------------- --------------------------------
678 *
679 * 1 STORE event_indicated
680 * 2 queue_work_on() {
681 * 3 test_and_set_bit(PENDING)
682 * 4 } set_..._and_clear_pending() {
683 * 5 set_work_data() # clear bit
684 * 6 smp_mb()
685 * 7 work->current_func() {
686 * 8 LOAD event_indicated
687 * }
688 *
689 * Without an explicit full barrier speculative LOAD on line 8 can
690 * be executed before CPU#0 does STORE on line 1. If that happens,
691 * CPU#0 observes the PENDING bit is still set and new execution of
692 * a @work is not queued in a hope, that CPU#1 will eventually
693 * finish the queued @work. Meanwhile CPU#1 does not see
694 * event_indicated is set, because speculative LOAD was executed
695 * before actual STORE.
696 */
697 smp_mb();
669} 698}
670 699
671static void clear_work_data(struct work_struct *work) 700static void clear_work_data(struct work_struct *work)