aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c41
-rw-r--r--kernel/irq/dummychip.c1
-rw-r--r--kernel/locking/rtmutex.c12
-rw-r--r--kernel/rcu/tree.c16
-rw-r--r--kernel/sched/core.c54
-rw-r--r--kernel/time/clockevents.c6
-rw-r--r--kernel/trace/trace_output.c3
-rw-r--r--kernel/watchdog.c20
8 files changed, 94 insertions, 59 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 81aa3a4ece9f..1a3bf48743ce 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -913,10 +913,30 @@ static void put_ctx(struct perf_event_context *ctx)
913 * Those places that change perf_event::ctx will hold both 913 * Those places that change perf_event::ctx will hold both
914 * perf_event_ctx::mutex of the 'old' and 'new' ctx value. 914 * perf_event_ctx::mutex of the 'old' and 'new' ctx value.
915 * 915 *
916 * Lock ordering is by mutex address. There is one other site where 916 * Lock ordering is by mutex address. There are two other sites where
917 * perf_event_context::mutex nests and that is put_event(). But remember that 917 * perf_event_context::mutex nests and those are:
918 * that is a parent<->child context relation, and migration does not affect 918 *
919 * children, therefore these two orderings should not interact. 919 * - perf_event_exit_task_context() [ child , 0 ]
920 * __perf_event_exit_task()
921 * sync_child_event()
922 * put_event() [ parent, 1 ]
923 *
924 * - perf_event_init_context() [ parent, 0 ]
925 * inherit_task_group()
926 * inherit_group()
927 * inherit_event()
928 * perf_event_alloc()
929 * perf_init_event()
930 * perf_try_init_event() [ child , 1 ]
931 *
932 * While it appears there is an obvious deadlock here -- the parent and child
933 * nesting levels are inverted between the two. This is in fact safe because
934 * life-time rules separate them. That is an exiting task cannot fork, and a
935 * spawning task cannot (yet) exit.
936 *
937 * But remember that that these are parent<->child context relations, and
938 * migration does not affect children, therefore these two orderings should not
939 * interact.
920 * 940 *
921 * The change in perf_event::ctx does not affect children (as claimed above) 941 * The change in perf_event::ctx does not affect children (as claimed above)
922 * because the sys_perf_event_open() case will install a new event and break 942 * because the sys_perf_event_open() case will install a new event and break
@@ -3657,9 +3677,6 @@ static void perf_remove_from_owner(struct perf_event *event)
3657 } 3677 }
3658} 3678}
3659 3679
3660/*
3661 * Called when the last reference to the file is gone.
3662 */
3663static void put_event(struct perf_event *event) 3680static void put_event(struct perf_event *event)
3664{ 3681{
3665 struct perf_event_context *ctx; 3682 struct perf_event_context *ctx;
@@ -3697,6 +3714,9 @@ int perf_event_release_kernel(struct perf_event *event)
3697} 3714}
3698EXPORT_SYMBOL_GPL(perf_event_release_kernel); 3715EXPORT_SYMBOL_GPL(perf_event_release_kernel);
3699 3716
3717/*
3718 * Called when the last reference to the file is gone.
3719 */
3700static int perf_release(struct inode *inode, struct file *file) 3720static int perf_release(struct inode *inode, struct file *file)
3701{ 3721{
3702 put_event(file->private_data); 3722 put_event(file->private_data);
@@ -7364,7 +7384,12 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
7364 return -ENODEV; 7384 return -ENODEV;
7365 7385
7366 if (event->group_leader != event) { 7386 if (event->group_leader != event) {
7367 ctx = perf_event_ctx_lock(event->group_leader); 7387 /*
7388 * This ctx->mutex can nest when we're called through
7389 * inheritance. See the perf_event_ctx_lock_nested() comment.
7390 */
7391 ctx = perf_event_ctx_lock_nested(event->group_leader,
7392 SINGLE_DEPTH_NESTING);
7368 BUG_ON(!ctx); 7393 BUG_ON(!ctx);
7369 } 7394 }
7370 7395
diff --git a/kernel/irq/dummychip.c b/kernel/irq/dummychip.c
index 988dc58e8847..2feb6feca0cc 100644
--- a/kernel/irq/dummychip.c
+++ b/kernel/irq/dummychip.c
@@ -57,5 +57,6 @@ struct irq_chip dummy_irq_chip = {
57 .irq_ack = noop, 57 .irq_ack = noop,
58 .irq_mask = noop, 58 .irq_mask = noop,
59 .irq_unmask = noop, 59 .irq_unmask = noop,
60 .flags = IRQCHIP_SKIP_SET_WAKE,
60}; 61};
61EXPORT_SYMBOL_GPL(dummy_irq_chip); 62EXPORT_SYMBOL_GPL(dummy_irq_chip);
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index b73279367087..b025295f4966 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -265,15 +265,17 @@ struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
265} 265}
266 266
267/* 267/*
268 * Called by sched_setscheduler() to check whether the priority change 268 * Called by sched_setscheduler() to get the priority which will be
269 * is overruled by a possible priority boosting. 269 * effective after the change.
270 */ 270 */
271int rt_mutex_check_prio(struct task_struct *task, int newprio) 271int rt_mutex_get_effective_prio(struct task_struct *task, int newprio)
272{ 272{
273 if (!task_has_pi_waiters(task)) 273 if (!task_has_pi_waiters(task))
274 return 0; 274 return newprio;
275 275
276 return task_top_pi_waiter(task)->task->prio <= newprio; 276 if (task_top_pi_waiter(task)->task->prio <= newprio)
277 return task_top_pi_waiter(task)->task->prio;
278 return newprio;
277} 279}
278 280
279/* 281/*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 233165da782f..8cf7304b2867 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -162,11 +162,14 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
162static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO; 162static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO;
163module_param(kthread_prio, int, 0644); 163module_param(kthread_prio, int, 0644);
164 164
165/* Delay in jiffies for grace-period initialization delays. */ 165/* Delay in jiffies for grace-period initialization delays, debug only. */
166static int gp_init_delay = IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_INIT) 166#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT
167 ? CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY 167static int gp_init_delay = CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY;
168 : 0;
169module_param(gp_init_delay, int, 0644); 168module_param(gp_init_delay, int, 0644);
169#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */
170static const int gp_init_delay;
171#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */
172#define PER_RCU_NODE_PERIOD 10 /* Number of grace periods between delays. */
170 173
171/* 174/*
172 * Track the rcutorture test sequence number and the update version 175 * Track the rcutorture test sequence number and the update version
@@ -1843,9 +1846,8 @@ static int rcu_gp_init(struct rcu_state *rsp)
1843 raw_spin_unlock_irq(&rnp->lock); 1846 raw_spin_unlock_irq(&rnp->lock);
1844 cond_resched_rcu_qs(); 1847 cond_resched_rcu_qs();
1845 ACCESS_ONCE(rsp->gp_activity) = jiffies; 1848 ACCESS_ONCE(rsp->gp_activity) = jiffies;
1846 if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_INIT) && 1849 if (gp_init_delay > 0 &&
1847 gp_init_delay > 0 && 1850 !(rsp->gpnum % (rcu_num_nodes * PER_RCU_NODE_PERIOD)))
1848 !(rsp->gpnum % (rcu_num_nodes * 10)))
1849 schedule_timeout_uninterruptible(gp_init_delay); 1851 schedule_timeout_uninterruptible(gp_init_delay);
1850 } 1852 }
1851 1853
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fe22f7510bce..57bd333bc4ab 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3300,15 +3300,18 @@ static void __setscheduler_params(struct task_struct *p,
3300 3300
3301/* Actually do priority change: must hold pi & rq lock. */ 3301/* Actually do priority change: must hold pi & rq lock. */
3302static void __setscheduler(struct rq *rq, struct task_struct *p, 3302static void __setscheduler(struct rq *rq, struct task_struct *p,
3303 const struct sched_attr *attr) 3303 const struct sched_attr *attr, bool keep_boost)
3304{ 3304{
3305 __setscheduler_params(p, attr); 3305 __setscheduler_params(p, attr);
3306 3306
3307 /* 3307 /*
3308 * If we get here, there was no pi waiters boosting the 3308 * Keep a potential priority boosting if called from
3309 * task. It is safe to use the normal prio. 3309 * sched_setscheduler().
3310 */ 3310 */
3311 p->prio = normal_prio(p); 3311 if (keep_boost)
3312 p->prio = rt_mutex_get_effective_prio(p, normal_prio(p));
3313 else
3314 p->prio = normal_prio(p);
3312 3315
3313 if (dl_prio(p->prio)) 3316 if (dl_prio(p->prio))
3314 p->sched_class = &dl_sched_class; 3317 p->sched_class = &dl_sched_class;
@@ -3408,7 +3411,7 @@ static int __sched_setscheduler(struct task_struct *p,
3408 int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 : 3411 int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 :
3409 MAX_RT_PRIO - 1 - attr->sched_priority; 3412 MAX_RT_PRIO - 1 - attr->sched_priority;
3410 int retval, oldprio, oldpolicy = -1, queued, running; 3413 int retval, oldprio, oldpolicy = -1, queued, running;
3411 int policy = attr->sched_policy; 3414 int new_effective_prio, policy = attr->sched_policy;
3412 unsigned long flags; 3415 unsigned long flags;
3413 const struct sched_class *prev_class; 3416 const struct sched_class *prev_class;
3414 struct rq *rq; 3417 struct rq *rq;
@@ -3590,15 +3593,14 @@ change:
3590 oldprio = p->prio; 3593 oldprio = p->prio;
3591 3594
3592 /* 3595 /*
3593 * Special case for priority boosted tasks. 3596 * Take priority boosted tasks into account. If the new
3594 * 3597 * effective priority is unchanged, we just store the new
3595 * If the new priority is lower or equal (user space view)
3596 * than the current (boosted) priority, we just store the new
3597 * normal parameters and do not touch the scheduler class and 3598 * normal parameters and do not touch the scheduler class and
3598 * the runqueue. This will be done when the task deboost 3599 * the runqueue. This will be done when the task deboost
3599 * itself. 3600 * itself.
3600 */ 3601 */
3601 if (rt_mutex_check_prio(p, newprio)) { 3602 new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
3603 if (new_effective_prio == oldprio) {
3602 __setscheduler_params(p, attr); 3604 __setscheduler_params(p, attr);
3603 task_rq_unlock(rq, p, &flags); 3605 task_rq_unlock(rq, p, &flags);
3604 return 0; 3606 return 0;
@@ -3612,7 +3614,7 @@ change:
3612 put_prev_task(rq, p); 3614 put_prev_task(rq, p);
3613 3615
3614 prev_class = p->sched_class; 3616 prev_class = p->sched_class;
3615 __setscheduler(rq, p, attr); 3617 __setscheduler(rq, p, attr, true);
3616 3618
3617 if (running) 3619 if (running)
3618 p->sched_class->set_curr_task(rq); 3620 p->sched_class->set_curr_task(rq);
@@ -6997,27 +6999,23 @@ static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
6997 unsigned long flags; 6999 unsigned long flags;
6998 long cpu = (long)hcpu; 7000 long cpu = (long)hcpu;
6999 struct dl_bw *dl_b; 7001 struct dl_bw *dl_b;
7002 bool overflow;
7003 int cpus;
7000 7004
7001 switch (action & ~CPU_TASKS_FROZEN) { 7005 switch (action) {
7002 case CPU_DOWN_PREPARE: 7006 case CPU_DOWN_PREPARE:
7003 /* explicitly allow suspend */ 7007 rcu_read_lock_sched();
7004 if (!(action & CPU_TASKS_FROZEN)) { 7008 dl_b = dl_bw_of(cpu);
7005 bool overflow;
7006 int cpus;
7007
7008 rcu_read_lock_sched();
7009 dl_b = dl_bw_of(cpu);
7010 7009
7011 raw_spin_lock_irqsave(&dl_b->lock, flags); 7010 raw_spin_lock_irqsave(&dl_b->lock, flags);
7012 cpus = dl_bw_cpus(cpu); 7011 cpus = dl_bw_cpus(cpu);
7013 overflow = __dl_overflow(dl_b, cpus, 0, 0); 7012 overflow = __dl_overflow(dl_b, cpus, 0, 0);
7014 raw_spin_unlock_irqrestore(&dl_b->lock, flags); 7013 raw_spin_unlock_irqrestore(&dl_b->lock, flags);
7015 7014
7016 rcu_read_unlock_sched(); 7015 rcu_read_unlock_sched();
7017 7016
7018 if (overflow) 7017 if (overflow)
7019 return notifier_from_errno(-EBUSY); 7018 return notifier_from_errno(-EBUSY);
7020 }
7021 cpuset_update_active_cpus(false); 7019 cpuset_update_active_cpus(false);
7022 break; 7020 break;
7023 case CPU_DOWN_PREPARE_FROZEN: 7021 case CPU_DOWN_PREPARE_FROZEN:
@@ -7346,7 +7344,7 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
7346 queued = task_on_rq_queued(p); 7344 queued = task_on_rq_queued(p);
7347 if (queued) 7345 if (queued)
7348 dequeue_task(rq, p, 0); 7346 dequeue_task(rq, p, 0);
7349 __setscheduler(rq, p, &attr); 7347 __setscheduler(rq, p, &attr, false);
7350 if (queued) { 7348 if (queued) {
7351 enqueue_task(rq, p, 0); 7349 enqueue_task(rq, p, 0);
7352 resched_curr(rq); 7350 resched_curr(rq);
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 11dc22a6983b..637a09461c1d 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -117,11 +117,7 @@ static int __clockevents_set_state(struct clock_event_device *dev,
117 /* Transition with new state-specific callbacks */ 117 /* Transition with new state-specific callbacks */
118 switch (state) { 118 switch (state) {
119 case CLOCK_EVT_STATE_DETACHED: 119 case CLOCK_EVT_STATE_DETACHED:
120 /* 120 /* The clockevent device is getting replaced. Shut it down. */
121 * This is an internal state, which is guaranteed to go from
122 * SHUTDOWN to DETACHED. No driver interaction required.
123 */
124 return 0;
125 121
126 case CLOCK_EVT_STATE_SHUTDOWN: 122 case CLOCK_EVT_STATE_SHUTDOWN:
127 return dev->set_state_shutdown(dev); 123 return dev->set_state_shutdown(dev);
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 692bf7184c8c..25a086bcb700 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -178,12 +178,13 @@ ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
178EXPORT_SYMBOL(ftrace_print_hex_seq); 178EXPORT_SYMBOL(ftrace_print_hex_seq);
179 179
180const char * 180const char *
181ftrace_print_array_seq(struct trace_seq *p, const void *buf, int buf_len, 181ftrace_print_array_seq(struct trace_seq *p, const void *buf, int count,
182 size_t el_size) 182 size_t el_size)
183{ 183{
184 const char *ret = trace_seq_buffer_ptr(p); 184 const char *ret = trace_seq_buffer_ptr(p);
185 const char *prefix = ""; 185 const char *prefix = "";
186 void *ptr = (void *)buf; 186 void *ptr = (void *)buf;
187 size_t buf_len = count * el_size;
187 188
188 trace_seq_putc(p, '{'); 189 trace_seq_putc(p, '{');
189 190
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 2316f50b07a4..581a68a04c64 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -41,6 +41,8 @@
41#define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT) 41#define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT)
42#define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT) 42#define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT)
43 43
44static DEFINE_MUTEX(watchdog_proc_mutex);
45
44#ifdef CONFIG_HARDLOCKUP_DETECTOR 46#ifdef CONFIG_HARDLOCKUP_DETECTOR
45static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED; 47static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
46#else 48#else
@@ -608,26 +610,36 @@ void watchdog_nmi_enable_all(void)
608{ 610{
609 int cpu; 611 int cpu;
610 612
611 if (!watchdog_user_enabled) 613 mutex_lock(&watchdog_proc_mutex);
612 return; 614
615 if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
616 goto unlock;
613 617
614 get_online_cpus(); 618 get_online_cpus();
615 for_each_online_cpu(cpu) 619 for_each_online_cpu(cpu)
616 watchdog_nmi_enable(cpu); 620 watchdog_nmi_enable(cpu);
617 put_online_cpus(); 621 put_online_cpus();
622
623unlock:
624 mutex_unlock(&watchdog_proc_mutex);
618} 625}
619 626
620void watchdog_nmi_disable_all(void) 627void watchdog_nmi_disable_all(void)
621{ 628{
622 int cpu; 629 int cpu;
623 630
631 mutex_lock(&watchdog_proc_mutex);
632
624 if (!watchdog_running) 633 if (!watchdog_running)
625 return; 634 goto unlock;
626 635
627 get_online_cpus(); 636 get_online_cpus();
628 for_each_online_cpu(cpu) 637 for_each_online_cpu(cpu)
629 watchdog_nmi_disable(cpu); 638 watchdog_nmi_disable(cpu);
630 put_online_cpus(); 639 put_online_cpus();
640
641unlock:
642 mutex_unlock(&watchdog_proc_mutex);
631} 643}
632#else 644#else
633static int watchdog_nmi_enable(unsigned int cpu) { return 0; } 645static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
@@ -744,8 +756,6 @@ static int proc_watchdog_update(void)
744 756
745} 757}
746 758
747static DEFINE_MUTEX(watchdog_proc_mutex);
748
749/* 759/*
750 * common function for watchdog, nmi_watchdog and soft_watchdog parameter 760 * common function for watchdog, nmi_watchdog and soft_watchdog parameter
751 * 761 *