aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Vetter <daniel.vetter@ffwll.ch>2019-08-26 16:14:23 -0400
committerJason Gunthorpe <jgg@mellanox.com>2019-09-07 03:28:05 -0400
commit312364f3534cc974b79a96d062bde2386315201f (patch)
tree19103ff516fbff6a78ce059b63cb5d4cbf598f1a
parentf2bc09e9519181c7ca7ad4778d46b804c5b4c8c9 (diff)
kernel.h: Add non_block_start/end()
In some special cases we must not block, but there's not a spinlock, preempt-off, irqs-off or similar critical section already that arms the might_sleep() debug checks. Add a non_block_start/end() pair to annotate these. This will be used in the oom paths of mmu-notifiers, where blocking is not allowed to make sure there's forward progress. Quoting Michal: "The notifier is called from quite a restricted context - oom_reaper - which shouldn't depend on any locks or sleepable conditionals. The code should be swift as well but we mostly do care about it to make a forward progress. Checking for sleepable context is the best thing we could come up with that would describe these demands at least partially." Peter also asked whether we want to catch spinlocks on top, but Michal said those are less of a problem because spinlocks can't have an indirect dependency upon the page allocator and hence close the loop with the oom reaper. Suggested by Michal Hocko. Link: https://lore.kernel.org/r/20190826201425.17547-4-daniel.vetter@ffwll.ch Acked-by: Christian König <christian.koenig@amd.com> (v1) Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com> Acked-by: Michal Hocko <mhocko@suse.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
-rw-r--r--include/linux/kernel.h23
-rw-r--r--include/linux/sched.h4
-rw-r--r--kernel/sched/core.c19
3 files changed, 40 insertions, 6 deletions
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 4fa360a13c1e..d83d403dac2e 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -217,7 +217,9 @@ extern void __cant_sleep(const char *file, int line, int preempt_offset);
217 * might_sleep - annotation for functions that can sleep 217 * might_sleep - annotation for functions that can sleep
218 * 218 *
219 * this macro will print a stack trace if it is executed in an atomic 219 * this macro will print a stack trace if it is executed in an atomic
220 * context (spinlock, irq-handler, ...). 220 * context (spinlock, irq-handler, ...). Additional sections where blocking is
221 * not allowed can be annotated with non_block_start() and non_block_end()
222 * pairs.
221 * 223 *
222 * This is a useful debugging help to be able to catch problems early and not 224 * This is a useful debugging help to be able to catch problems early and not
223 * be bitten later when the calling function happens to sleep when it is not 225 * be bitten later when the calling function happens to sleep when it is not
@@ -233,6 +235,23 @@ extern void __cant_sleep(const char *file, int line, int preempt_offset);
233# define cant_sleep() \ 235# define cant_sleep() \
234 do { __cant_sleep(__FILE__, __LINE__, 0); } while (0) 236 do { __cant_sleep(__FILE__, __LINE__, 0); } while (0)
235# define sched_annotate_sleep() (current->task_state_change = 0) 237# define sched_annotate_sleep() (current->task_state_change = 0)
238/**
239 * non_block_start - annotate the start of section where sleeping is prohibited
240 *
241 * This is on behalf of the oom reaper, specifically when it is calling the mmu
242 * notifiers. The problem is that if the notifier were to block on, for example,
243 * mutex_lock() and if the process which holds that mutex were to perform a
244 * sleeping memory allocation, the oom reaper is now blocked on completion of
245 * that memory allocation. Other blocking calls like wait_event() pose similar
246 * issues.
247 */
248# define non_block_start() (current->non_block_count++)
249/**
250 * non_block_end - annotate the end of section where sleeping is prohibited
251 *
252 * Closes a section opened by non_block_start().
253 */
254# define non_block_end() WARN_ON(current->non_block_count-- == 0)
236#else 255#else
237 static inline void ___might_sleep(const char *file, int line, 256 static inline void ___might_sleep(const char *file, int line,
238 int preempt_offset) { } 257 int preempt_offset) { }
@@ -241,6 +260,8 @@ extern void __cant_sleep(const char *file, int line, int preempt_offset);
241# define might_sleep() do { might_resched(); } while (0) 260# define might_sleep() do { might_resched(); } while (0)
242# define cant_sleep() do { } while (0) 261# define cant_sleep() do { } while (0)
243# define sched_annotate_sleep() do { } while (0) 262# define sched_annotate_sleep() do { } while (0)
263# define non_block_start() do { } while (0)
264# define non_block_end() do { } while (0)
244#endif 265#endif
245 266
246#define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0) 267#define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9f51932bd543..c5630f3dca1f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -974,6 +974,10 @@ struct task_struct {
974 struct mutex_waiter *blocked_on; 974 struct mutex_waiter *blocked_on;
975#endif 975#endif
976 976
977#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
978 int non_block_count;
979#endif
980
977#ifdef CONFIG_TRACE_IRQFLAGS 981#ifdef CONFIG_TRACE_IRQFLAGS
978 unsigned int irq_events; 982 unsigned int irq_events;
979 unsigned long hardirq_enable_ip; 983 unsigned long hardirq_enable_ip;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2b037f195473..57245770d6cc 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3700,13 +3700,22 @@ static noinline void __schedule_bug(struct task_struct *prev)
3700/* 3700/*
3701 * Various schedule()-time debugging checks and statistics: 3701 * Various schedule()-time debugging checks and statistics:
3702 */ 3702 */
3703static inline void schedule_debug(struct task_struct *prev) 3703static inline void schedule_debug(struct task_struct *prev, bool preempt)
3704{ 3704{
3705#ifdef CONFIG_SCHED_STACK_END_CHECK 3705#ifdef CONFIG_SCHED_STACK_END_CHECK
3706 if (task_stack_end_corrupted(prev)) 3706 if (task_stack_end_corrupted(prev))
3707 panic("corrupted stack end detected inside scheduler\n"); 3707 panic("corrupted stack end detected inside scheduler\n");
3708#endif 3708#endif
3709 3709
3710#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
3711 if (!preempt && prev->state && prev->non_block_count) {
3712 printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n",
3713 prev->comm, prev->pid, prev->non_block_count);
3714 dump_stack();
3715 add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
3716 }
3717#endif
3718
3710 if (unlikely(in_atomic_preempt_off())) { 3719 if (unlikely(in_atomic_preempt_off())) {
3711 __schedule_bug(prev); 3720 __schedule_bug(prev);
3712 preempt_count_set(PREEMPT_DISABLED); 3721 preempt_count_set(PREEMPT_DISABLED);
@@ -3813,7 +3822,7 @@ static void __sched notrace __schedule(bool preempt)
3813 rq = cpu_rq(cpu); 3822 rq = cpu_rq(cpu);
3814 prev = rq->curr; 3823 prev = rq->curr;
3815 3824
3816 schedule_debug(prev); 3825 schedule_debug(prev, preempt);
3817 3826
3818 if (sched_feat(HRTICK)) 3827 if (sched_feat(HRTICK))
3819 hrtick_clear(rq); 3828 hrtick_clear(rq);
@@ -6570,7 +6579,7 @@ void ___might_sleep(const char *file, int line, int preempt_offset)
6570 rcu_sleep_check(); 6579 rcu_sleep_check();
6571 6580
6572 if ((preempt_count_equals(preempt_offset) && !irqs_disabled() && 6581 if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
6573 !is_idle_task(current)) || 6582 !is_idle_task(current) && !current->non_block_count) ||
6574 system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING || 6583 system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING ||
6575 oops_in_progress) 6584 oops_in_progress)
6576 return; 6585 return;
@@ -6586,8 +6595,8 @@ void ___might_sleep(const char *file, int line, int preempt_offset)
6586 "BUG: sleeping function called from invalid context at %s:%d\n", 6595 "BUG: sleeping function called from invalid context at %s:%d\n",
6587 file, line); 6596 file, line);
6588 printk(KERN_ERR 6597 printk(KERN_ERR
6589 "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n", 6598 "in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n",
6590 in_atomic(), irqs_disabled(), 6599 in_atomic(), irqs_disabled(), current->non_block_count,
6591 current->pid, current->comm); 6600 current->pid, current->comm);
6592 6601
6593 if (task_stack_end_corrupted(current)) 6602 if (task_stack_end_corrupted(current))