aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-12-10 00:21:34 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-12-10 00:21:34 -0500
commit86c6a2fddf0b89b494c7616f2c06cf915c4bff01 (patch)
tree0e6930c93e5d49ead71b17fcadf0cc9ba28c3d2d /include
parentbee2782f30f66898be3f74ad02e4d1f87a969694 (diff)
parentfd7de1e8d5b2b2b35e71332fafb899f584597150 (diff)
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: "The main changes in this cycle are: - 'Nested Sleep Debugging', activated when CONFIG_DEBUG_ATOMIC_SLEEP=y. This instruments might_sleep() checks to catch places that nest blocking primitives - such as mutex usage in a wait loop. Such bugs can result in hard to debug races/hangs. Another category of invalid nesting that this facility will detect is the calling of blocking functions from within schedule() -> sched_submit_work() -> blk_schedule_flush_plug(). There's some potential for false positives (if secondary blocking primitives themselves are not ready yet for this facility), but the kernel will warn once about such bugs per bootup, so the warning isn't much of a nuisance. This feature comes with a number of fixes, for problems uncovered with it, so no messages are expected normally. - Another round of sched/numa optimizations and refinements, for CONFIG_NUMA_BALANCING=y. - Another round of sched/dl fixes and refinements. Plus various smaller fixes and cleanups" * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (54 commits) sched: Add missing rcu protection to wake_up_all_idle_cpus sched/deadline: Introduce start_hrtick_dl() for !CONFIG_SCHED_HRTICK sched/numa: Init numa balancing fields of init_task sched/deadline: Remove unnecessary definitions in cpudeadline.h sched/cpupri: Remove unnecessary definitions in cpupri.h sched/deadline: Fix rq->dl.pushable_tasks bug in push_dl_task() sched/fair: Fix stale overloaded status in the busiest group finding logic sched: Move p->nr_cpus_allowed check to select_task_rq() sched/completion: Document when to use wait_for_completion_io_*() sched: Update comments about CLONE_NEWUTS and CLONE_NEWIPC sched/fair: Kill task_struct::numa_entry and numa_group::task_list sched: Refactor task_struct to use numa_faults instead of numa_* pointers sched/deadline: Don't check CONFIG_SMP in switched_from_dl() sched/deadline: Reschedule from switched_from_dl() after a successful pull sched/deadline: Push task away if the deadline is equal to curr during wakeup sched/deadline: Add deadline rq status print sched/deadline: Fix artificial overrun introduced by yield_task_dl() sched/rt: Clean up check_preempt_equal_prio() sched/core: Use dl_bw_of() under rcu_read_lock_sched() sched: Check if we got a shallowest_idle_cpu before searching for least_loaded_cpu ...
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/preempt.h3
-rw-r--r--include/linux/freezer.h50
-rw-r--r--include/linux/init_task.h10
-rw-r--r--include/linux/kernel.h5
-rw-r--r--include/linux/sched.h87
-rw-r--r--include/linux/wait.h80
-rw-r--r--include/net/sock.h1
-rw-r--r--include/trace/events/sched.h9
-rw-r--r--include/uapi/linux/sched.h4
9 files changed, 167 insertions, 82 deletions
diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h
index 1cd3f5d767a8..eb6f9e6c3075 100644
--- a/include/asm-generic/preempt.h
+++ b/include/asm-generic/preempt.h
@@ -23,9 +23,6 @@ static __always_inline void preempt_count_set(int pc)
23/* 23/*
24 * must be macros to avoid header recursion hell 24 * must be macros to avoid header recursion hell
25 */ 25 */
26#define task_preempt_count(p) \
27 (task_thread_info(p)->preempt_count & ~PREEMPT_NEED_RESCHED)
28
29#define init_task_preempt_count(p) do { \ 26#define init_task_preempt_count(p) do { \
30 task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \ 27 task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \
31} while (0) 28} while (0)
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 7fd81b8c4897..6b7fd9cf5ea2 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -246,15 +246,6 @@ static inline int freezable_schedule_hrtimeout_range(ktime_t *expires,
246 * defined in <linux/wait.h> 246 * defined in <linux/wait.h>
247 */ 247 */
248 248
249#define wait_event_freezekillable(wq, condition) \
250({ \
251 int __retval; \
252 freezer_do_not_count(); \
253 __retval = wait_event_killable(wq, (condition)); \
254 freezer_count(); \
255 __retval; \
256})
257
258/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ 249/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */
259#define wait_event_freezekillable_unsafe(wq, condition) \ 250#define wait_event_freezekillable_unsafe(wq, condition) \
260({ \ 251({ \
@@ -265,35 +256,6 @@ static inline int freezable_schedule_hrtimeout_range(ktime_t *expires,
265 __retval; \ 256 __retval; \
266}) 257})
267 258
268#define wait_event_freezable(wq, condition) \
269({ \
270 int __retval; \
271 freezer_do_not_count(); \
272 __retval = wait_event_interruptible(wq, (condition)); \
273 freezer_count(); \
274 __retval; \
275})
276
277#define wait_event_freezable_timeout(wq, condition, timeout) \
278({ \
279 long __retval = timeout; \
280 freezer_do_not_count(); \
281 __retval = wait_event_interruptible_timeout(wq, (condition), \
282 __retval); \
283 freezer_count(); \
284 __retval; \
285})
286
287#define wait_event_freezable_exclusive(wq, condition) \
288({ \
289 int __retval; \
290 freezer_do_not_count(); \
291 __retval = wait_event_interruptible_exclusive(wq, condition); \
292 freezer_count(); \
293 __retval; \
294})
295
296
297#else /* !CONFIG_FREEZER */ 259#else /* !CONFIG_FREEZER */
298static inline bool frozen(struct task_struct *p) { return false; } 260static inline bool frozen(struct task_struct *p) { return false; }
299static inline bool freezing(struct task_struct *p) { return false; } 261static inline bool freezing(struct task_struct *p) { return false; }
@@ -331,18 +293,6 @@ static inline void set_freezable(void) {}
331#define freezable_schedule_hrtimeout_range(expires, delta, mode) \ 293#define freezable_schedule_hrtimeout_range(expires, delta, mode) \
332 schedule_hrtimeout_range(expires, delta, mode) 294 schedule_hrtimeout_range(expires, delta, mode)
333 295
334#define wait_event_freezable(wq, condition) \
335 wait_event_interruptible(wq, condition)
336
337#define wait_event_freezable_timeout(wq, condition, timeout) \
338 wait_event_interruptible_timeout(wq, condition, timeout)
339
340#define wait_event_freezable_exclusive(wq, condition) \
341 wait_event_interruptible_exclusive(wq, condition)
342
343#define wait_event_freezekillable(wq, condition) \
344 wait_event_killable(wq, condition)
345
346#define wait_event_freezekillable_unsafe(wq, condition) \ 296#define wait_event_freezekillable_unsafe(wq, condition) \
347 wait_event_killable(wq, condition) 297 wait_event_killable(wq, condition)
348 298
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index d996aef8044f..3037fc085e8e 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -166,6 +166,15 @@ extern struct task_group root_task_group;
166# define INIT_RT_MUTEXES(tsk) 166# define INIT_RT_MUTEXES(tsk)
167#endif 167#endif
168 168
169#ifdef CONFIG_NUMA_BALANCING
170# define INIT_NUMA_BALANCING(tsk) \
171 .numa_preferred_nid = -1, \
172 .numa_group = NULL, \
173 .numa_faults = NULL,
174#else
175# define INIT_NUMA_BALANCING(tsk)
176#endif
177
169/* 178/*
170 * INIT_TASK is used to set up the first task table, touch at 179 * INIT_TASK is used to set up the first task table, touch at
171 * your own risk!. Base=0, limit=0x1fffff (=2MB) 180 * your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -237,6 +246,7 @@ extern struct task_group root_task_group;
237 INIT_CPUSET_SEQ(tsk) \ 246 INIT_CPUSET_SEQ(tsk) \
238 INIT_RT_MUTEXES(tsk) \ 247 INIT_RT_MUTEXES(tsk) \
239 INIT_VTIME(tsk) \ 248 INIT_VTIME(tsk) \
249 INIT_NUMA_BALANCING(tsk) \
240} 250}
241 251
242 252
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3d770f5564b8..446d76a87ba1 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -162,6 +162,7 @@ extern int _cond_resched(void);
162#endif 162#endif
163 163
164#ifdef CONFIG_DEBUG_ATOMIC_SLEEP 164#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
165 void ___might_sleep(const char *file, int line, int preempt_offset);
165 void __might_sleep(const char *file, int line, int preempt_offset); 166 void __might_sleep(const char *file, int line, int preempt_offset);
166/** 167/**
167 * might_sleep - annotation for functions that can sleep 168 * might_sleep - annotation for functions that can sleep
@@ -175,10 +176,14 @@ extern int _cond_resched(void);
175 */ 176 */
176# define might_sleep() \ 177# define might_sleep() \
177 do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) 178 do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
179# define sched_annotate_sleep() __set_current_state(TASK_RUNNING)
178#else 180#else
181 static inline void ___might_sleep(const char *file, int line,
182 int preempt_offset) { }
179 static inline void __might_sleep(const char *file, int line, 183 static inline void __might_sleep(const char *file, int line,
180 int preempt_offset) { } 184 int preempt_offset) { }
181# define might_sleep() do { might_resched(); } while (0) 185# define might_sleep() do { might_resched(); } while (0)
186# define sched_annotate_sleep() do { } while (0)
182#endif 187#endif
183 188
184#define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0) 189#define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 706a9f744909..55f5ee7cc3d3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -243,6 +243,43 @@ extern char ___assert_task_state[1 - 2*!!(
243 ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ 243 ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
244 (task->flags & PF_FROZEN) == 0) 244 (task->flags & PF_FROZEN) == 0)
245 245
246#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
247
248#define __set_task_state(tsk, state_value) \
249 do { \
250 (tsk)->task_state_change = _THIS_IP_; \
251 (tsk)->state = (state_value); \
252 } while (0)
253#define set_task_state(tsk, state_value) \
254 do { \
255 (tsk)->task_state_change = _THIS_IP_; \
256 set_mb((tsk)->state, (state_value)); \
257 } while (0)
258
259/*
260 * set_current_state() includes a barrier so that the write of current->state
261 * is correctly serialised wrt the caller's subsequent test of whether to
262 * actually sleep:
263 *
264 * set_current_state(TASK_UNINTERRUPTIBLE);
265 * if (do_i_need_to_sleep())
266 * schedule();
267 *
268 * If the caller does not need such serialisation then use __set_current_state()
269 */
270#define __set_current_state(state_value) \
271 do { \
272 current->task_state_change = _THIS_IP_; \
273 current->state = (state_value); \
274 } while (0)
275#define set_current_state(state_value) \
276 do { \
277 current->task_state_change = _THIS_IP_; \
278 set_mb(current->state, (state_value)); \
279 } while (0)
280
281#else
282
246#define __set_task_state(tsk, state_value) \ 283#define __set_task_state(tsk, state_value) \
247 do { (tsk)->state = (state_value); } while (0) 284 do { (tsk)->state = (state_value); } while (0)
248#define set_task_state(tsk, state_value) \ 285#define set_task_state(tsk, state_value) \
@@ -259,11 +296,13 @@ extern char ___assert_task_state[1 - 2*!!(
259 * 296 *
260 * If the caller does not need such serialisation then use __set_current_state() 297 * If the caller does not need such serialisation then use __set_current_state()
261 */ 298 */
262#define __set_current_state(state_value) \ 299#define __set_current_state(state_value) \
263 do { current->state = (state_value); } while (0) 300 do { current->state = (state_value); } while (0)
264#define set_current_state(state_value) \ 301#define set_current_state(state_value) \
265 set_mb(current->state, (state_value)) 302 set_mb(current->state, (state_value))
266 303
304#endif
305
267/* Task command name length */ 306/* Task command name length */
268#define TASK_COMM_LEN 16 307#define TASK_COMM_LEN 16
269 308
@@ -1558,28 +1597,23 @@ struct task_struct {
1558 struct numa_group *numa_group; 1597 struct numa_group *numa_group;
1559 1598
1560 /* 1599 /*
1561 * Exponential decaying average of faults on a per-node basis. 1600 * numa_faults is an array split into four regions:
1562 * Scheduling placement decisions are made based on the these counts. 1601 * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer
1563 * The values remain static for the duration of a PTE scan 1602 * in this precise order.
1603 *
1604 * faults_memory: Exponential decaying average of faults on a per-node
1605 * basis. Scheduling placement decisions are made based on these
1606 * counts. The values remain static for the duration of a PTE scan.
1607 * faults_cpu: Track the nodes the process was running on when a NUMA
1608 * hinting fault was incurred.
1609 * faults_memory_buffer and faults_cpu_buffer: Record faults per node
1610 * during the current scan window. When the scan completes, the counts
1611 * in faults_memory and faults_cpu decay and these values are copied.
1564 */ 1612 */
1565 unsigned long *numa_faults_memory; 1613 unsigned long *numa_faults;
1566 unsigned long total_numa_faults; 1614 unsigned long total_numa_faults;
1567 1615
1568 /* 1616 /*
1569 * numa_faults_buffer records faults per node during the current
1570 * scan window. When the scan completes, the counts in
1571 * numa_faults_memory decay and these values are copied.
1572 */
1573 unsigned long *numa_faults_buffer_memory;
1574
1575 /*
1576 * Track the nodes the process was running on when a NUMA hinting
1577 * fault was incurred.
1578 */
1579 unsigned long *numa_faults_cpu;
1580 unsigned long *numa_faults_buffer_cpu;
1581
1582 /*
1583 * numa_faults_locality tracks if faults recorded during the last 1617 * numa_faults_locality tracks if faults recorded during the last
1584 * scan window were remote/local. The task scan period is adapted 1618 * scan window were remote/local. The task scan period is adapted
1585 * based on the locality of the faults with different weights 1619 * based on the locality of the faults with different weights
@@ -1661,6 +1695,9 @@ struct task_struct {
1661 unsigned int sequential_io; 1695 unsigned int sequential_io;
1662 unsigned int sequential_io_avg; 1696 unsigned int sequential_io_avg;
1663#endif 1697#endif
1698#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
1699 unsigned long task_state_change;
1700#endif
1664}; 1701};
1665 1702
1666/* Future-safe accessor for struct task_struct's cpus_allowed. */ 1703/* Future-safe accessor for struct task_struct's cpus_allowed. */
@@ -2052,6 +2089,10 @@ static inline void tsk_restore_flags(struct task_struct *task,
2052 task->flags |= orig_flags & flags; 2089 task->flags |= orig_flags & flags;
2053} 2090}
2054 2091
2092extern int cpuset_cpumask_can_shrink(const struct cpumask *cur,
2093 const struct cpumask *trial);
2094extern int task_can_attach(struct task_struct *p,
2095 const struct cpumask *cs_cpus_allowed);
2055#ifdef CONFIG_SMP 2096#ifdef CONFIG_SMP
2056extern void do_set_cpus_allowed(struct task_struct *p, 2097extern void do_set_cpus_allowed(struct task_struct *p,
2057 const struct cpumask *new_mask); 2098 const struct cpumask *new_mask);
@@ -2760,7 +2801,7 @@ static inline int signal_pending_state(long state, struct task_struct *p)
2760extern int _cond_resched(void); 2801extern int _cond_resched(void);
2761 2802
2762#define cond_resched() ({ \ 2803#define cond_resched() ({ \
2763 __might_sleep(__FILE__, __LINE__, 0); \ 2804 ___might_sleep(__FILE__, __LINE__, 0); \
2764 _cond_resched(); \ 2805 _cond_resched(); \
2765}) 2806})
2766 2807
@@ -2773,14 +2814,14 @@ extern int __cond_resched_lock(spinlock_t *lock);
2773#endif 2814#endif
2774 2815
2775#define cond_resched_lock(lock) ({ \ 2816#define cond_resched_lock(lock) ({ \
2776 __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ 2817 ___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\
2777 __cond_resched_lock(lock); \ 2818 __cond_resched_lock(lock); \
2778}) 2819})
2779 2820
2780extern int __cond_resched_softirq(void); 2821extern int __cond_resched_softirq(void);
2781 2822
2782#define cond_resched_softirq() ({ \ 2823#define cond_resched_softirq() ({ \
2783 __might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \ 2824 ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
2784 __cond_resched_softirq(); \ 2825 __cond_resched_softirq(); \
2785}) 2826})
2786 2827
diff --git a/include/linux/wait.h b/include/linux/wait.h
index e4a8eb9312ea..2232ed16635a 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -13,9 +13,12 @@ typedef struct __wait_queue wait_queue_t;
13typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key); 13typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);
14int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key); 14int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key);
15 15
16/* __wait_queue::flags */
17#define WQ_FLAG_EXCLUSIVE 0x01
18#define WQ_FLAG_WOKEN 0x02
19
16struct __wait_queue { 20struct __wait_queue {
17 unsigned int flags; 21 unsigned int flags;
18#define WQ_FLAG_EXCLUSIVE 0x01
19 void *private; 22 void *private;
20 wait_queue_func_t func; 23 wait_queue_func_t func;
21 struct list_head task_list; 24 struct list_head task_list;
@@ -258,11 +261,37 @@ __out: __ret; \
258 */ 261 */
259#define wait_event(wq, condition) \ 262#define wait_event(wq, condition) \
260do { \ 263do { \
264 might_sleep(); \
261 if (condition) \ 265 if (condition) \
262 break; \ 266 break; \
263 __wait_event(wq, condition); \ 267 __wait_event(wq, condition); \
264} while (0) 268} while (0)
265 269
270#define __wait_event_freezable(wq, condition) \
271 ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \
272 schedule(); try_to_freeze())
273
274/**
275 * wait_event - sleep (or freeze) until a condition gets true
276 * @wq: the waitqueue to wait on
277 * @condition: a C expression for the event to wait for
278 *
279 * The process is put to sleep (TASK_INTERRUPTIBLE -- so as not to contribute
280 * to system load) until the @condition evaluates to true. The
281 * @condition is checked each time the waitqueue @wq is woken up.
282 *
283 * wake_up() has to be called after changing any variable that could
284 * change the result of the wait condition.
285 */
286#define wait_event_freezable(wq, condition) \
287({ \
288 int __ret = 0; \
289 might_sleep(); \
290 if (!(condition)) \
291 __ret = __wait_event_freezable(wq, condition); \
292 __ret; \
293})
294
266#define __wait_event_timeout(wq, condition, timeout) \ 295#define __wait_event_timeout(wq, condition, timeout) \
267 ___wait_event(wq, ___wait_cond_timeout(condition), \ 296 ___wait_event(wq, ___wait_cond_timeout(condition), \
268 TASK_UNINTERRUPTIBLE, 0, timeout, \ 297 TASK_UNINTERRUPTIBLE, 0, timeout, \
@@ -290,11 +319,30 @@ do { \
290#define wait_event_timeout(wq, condition, timeout) \ 319#define wait_event_timeout(wq, condition, timeout) \
291({ \ 320({ \
292 long __ret = timeout; \ 321 long __ret = timeout; \
322 might_sleep(); \
293 if (!___wait_cond_timeout(condition)) \ 323 if (!___wait_cond_timeout(condition)) \
294 __ret = __wait_event_timeout(wq, condition, timeout); \ 324 __ret = __wait_event_timeout(wq, condition, timeout); \
295 __ret; \ 325 __ret; \
296}) 326})
297 327
328#define __wait_event_freezable_timeout(wq, condition, timeout) \
329 ___wait_event(wq, ___wait_cond_timeout(condition), \
330 TASK_INTERRUPTIBLE, 0, timeout, \
331 __ret = schedule_timeout(__ret); try_to_freeze())
332
333/*
334 * like wait_event_timeout() -- except it uses TASK_INTERRUPTIBLE to avoid
335 * increasing load and is freezable.
336 */
337#define wait_event_freezable_timeout(wq, condition, timeout) \
338({ \
339 long __ret = timeout; \
340 might_sleep(); \
341 if (!___wait_cond_timeout(condition)) \
342 __ret = __wait_event_freezable_timeout(wq, condition, timeout); \
343 __ret; \
344})
345
298#define __wait_event_cmd(wq, condition, cmd1, cmd2) \ 346#define __wait_event_cmd(wq, condition, cmd1, cmd2) \
299 (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ 347 (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
300 cmd1; schedule(); cmd2) 348 cmd1; schedule(); cmd2)
@@ -315,6 +363,7 @@ do { \
315 */ 363 */
316#define wait_event_cmd(wq, condition, cmd1, cmd2) \ 364#define wait_event_cmd(wq, condition, cmd1, cmd2) \
317do { \ 365do { \
366 might_sleep(); \
318 if (condition) \ 367 if (condition) \
319 break; \ 368 break; \
320 __wait_event_cmd(wq, condition, cmd1, cmd2); \ 369 __wait_event_cmd(wq, condition, cmd1, cmd2); \
@@ -342,6 +391,7 @@ do { \
342#define wait_event_interruptible(wq, condition) \ 391#define wait_event_interruptible(wq, condition) \
343({ \ 392({ \
344 int __ret = 0; \ 393 int __ret = 0; \
394 might_sleep(); \
345 if (!(condition)) \ 395 if (!(condition)) \
346 __ret = __wait_event_interruptible(wq, condition); \ 396 __ret = __wait_event_interruptible(wq, condition); \
347 __ret; \ 397 __ret; \
@@ -375,6 +425,7 @@ do { \
375#define wait_event_interruptible_timeout(wq, condition, timeout) \ 425#define wait_event_interruptible_timeout(wq, condition, timeout) \
376({ \ 426({ \
377 long __ret = timeout; \ 427 long __ret = timeout; \
428 might_sleep(); \
378 if (!___wait_cond_timeout(condition)) \ 429 if (!___wait_cond_timeout(condition)) \
379 __ret = __wait_event_interruptible_timeout(wq, \ 430 __ret = __wait_event_interruptible_timeout(wq, \
380 condition, timeout); \ 431 condition, timeout); \
@@ -425,6 +476,7 @@ do { \
425#define wait_event_hrtimeout(wq, condition, timeout) \ 476#define wait_event_hrtimeout(wq, condition, timeout) \
426({ \ 477({ \
427 int __ret = 0; \ 478 int __ret = 0; \
479 might_sleep(); \
428 if (!(condition)) \ 480 if (!(condition)) \
429 __ret = __wait_event_hrtimeout(wq, condition, timeout, \ 481 __ret = __wait_event_hrtimeout(wq, condition, timeout, \
430 TASK_UNINTERRUPTIBLE); \ 482 TASK_UNINTERRUPTIBLE); \
@@ -450,6 +502,7 @@ do { \
450#define wait_event_interruptible_hrtimeout(wq, condition, timeout) \ 502#define wait_event_interruptible_hrtimeout(wq, condition, timeout) \
451({ \ 503({ \
452 long __ret = 0; \ 504 long __ret = 0; \
505 might_sleep(); \
453 if (!(condition)) \ 506 if (!(condition)) \
454 __ret = __wait_event_hrtimeout(wq, condition, timeout, \ 507 __ret = __wait_event_hrtimeout(wq, condition, timeout, \
455 TASK_INTERRUPTIBLE); \ 508 TASK_INTERRUPTIBLE); \
@@ -463,12 +516,27 @@ do { \
463#define wait_event_interruptible_exclusive(wq, condition) \ 516#define wait_event_interruptible_exclusive(wq, condition) \
464({ \ 517({ \
465 int __ret = 0; \ 518 int __ret = 0; \
519 might_sleep(); \
466 if (!(condition)) \ 520 if (!(condition)) \
467 __ret = __wait_event_interruptible_exclusive(wq, condition);\ 521 __ret = __wait_event_interruptible_exclusive(wq, condition);\
468 __ret; \ 522 __ret; \
469}) 523})
470 524
471 525
526#define __wait_event_freezable_exclusive(wq, condition) \
527 ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0, \
528 schedule(); try_to_freeze())
529
530#define wait_event_freezable_exclusive(wq, condition) \
531({ \
532 int __ret = 0; \
533 might_sleep(); \
534 if (!(condition)) \
535 __ret = __wait_event_freezable_exclusive(wq, condition);\
536 __ret; \
537})
538
539
472#define __wait_event_interruptible_locked(wq, condition, exclusive, irq) \ 540#define __wait_event_interruptible_locked(wq, condition, exclusive, irq) \
473({ \ 541({ \
474 int __ret = 0; \ 542 int __ret = 0; \
@@ -637,6 +705,7 @@ do { \
637#define wait_event_killable(wq, condition) \ 705#define wait_event_killable(wq, condition) \
638({ \ 706({ \
639 int __ret = 0; \ 707 int __ret = 0; \
708 might_sleep(); \
640 if (!(condition)) \ 709 if (!(condition)) \
641 __ret = __wait_event_killable(wq, condition); \ 710 __ret = __wait_event_killable(wq, condition); \
642 __ret; \ 711 __ret; \
@@ -830,6 +899,8 @@ void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int sta
830long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state); 899long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state);
831void finish_wait(wait_queue_head_t *q, wait_queue_t *wait); 900void finish_wait(wait_queue_head_t *q, wait_queue_t *wait);
832void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, unsigned int mode, void *key); 901void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, unsigned int mode, void *key);
902long wait_woken(wait_queue_t *wait, unsigned mode, long timeout);
903int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
833int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); 904int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
834int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); 905int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
835 906
@@ -886,6 +957,7 @@ extern int bit_wait_io_timeout(struct wait_bit_key *);
886static inline int 957static inline int
887wait_on_bit(void *word, int bit, unsigned mode) 958wait_on_bit(void *word, int bit, unsigned mode)
888{ 959{
960 might_sleep();
889 if (!test_bit(bit, word)) 961 if (!test_bit(bit, word))
890 return 0; 962 return 0;
891 return out_of_line_wait_on_bit(word, bit, 963 return out_of_line_wait_on_bit(word, bit,
@@ -910,6 +982,7 @@ wait_on_bit(void *word, int bit, unsigned mode)
910static inline int 982static inline int
911wait_on_bit_io(void *word, int bit, unsigned mode) 983wait_on_bit_io(void *word, int bit, unsigned mode)
912{ 984{
985 might_sleep();
913 if (!test_bit(bit, word)) 986 if (!test_bit(bit, word))
914 return 0; 987 return 0;
915 return out_of_line_wait_on_bit(word, bit, 988 return out_of_line_wait_on_bit(word, bit,
@@ -936,6 +1009,7 @@ wait_on_bit_io(void *word, int bit, unsigned mode)
936static inline int 1009static inline int
937wait_on_bit_action(void *word, int bit, wait_bit_action_f *action, unsigned mode) 1010wait_on_bit_action(void *word, int bit, wait_bit_action_f *action, unsigned mode)
938{ 1011{
1012 might_sleep();
939 if (!test_bit(bit, word)) 1013 if (!test_bit(bit, word))
940 return 0; 1014 return 0;
941 return out_of_line_wait_on_bit(word, bit, action, mode); 1015 return out_of_line_wait_on_bit(word, bit, action, mode);
@@ -963,6 +1037,7 @@ wait_on_bit_action(void *word, int bit, wait_bit_action_f *action, unsigned mode
963static inline int 1037static inline int
964wait_on_bit_lock(void *word, int bit, unsigned mode) 1038wait_on_bit_lock(void *word, int bit, unsigned mode)
965{ 1039{
1040 might_sleep();
966 if (!test_and_set_bit(bit, word)) 1041 if (!test_and_set_bit(bit, word))
967 return 0; 1042 return 0;
968 return out_of_line_wait_on_bit_lock(word, bit, bit_wait, mode); 1043 return out_of_line_wait_on_bit_lock(word, bit, bit_wait, mode);
@@ -986,6 +1061,7 @@ wait_on_bit_lock(void *word, int bit, unsigned mode)
986static inline int 1061static inline int
987wait_on_bit_lock_io(void *word, int bit, unsigned mode) 1062wait_on_bit_lock_io(void *word, int bit, unsigned mode)
988{ 1063{
1064 might_sleep();
989 if (!test_and_set_bit(bit, word)) 1065 if (!test_and_set_bit(bit, word))
990 return 0; 1066 return 0;
991 return out_of_line_wait_on_bit_lock(word, bit, bit_wait_io, mode); 1067 return out_of_line_wait_on_bit_lock(word, bit, bit_wait_io, mode);
@@ -1011,6 +1087,7 @@ wait_on_bit_lock_io(void *word, int bit, unsigned mode)
1011static inline int 1087static inline int
1012wait_on_bit_lock_action(void *word, int bit, wait_bit_action_f *action, unsigned mode) 1088wait_on_bit_lock_action(void *word, int bit, wait_bit_action_f *action, unsigned mode)
1013{ 1089{
1090 might_sleep();
1014 if (!test_and_set_bit(bit, word)) 1091 if (!test_and_set_bit(bit, word))
1015 return 0; 1092 return 0;
1016 return out_of_line_wait_on_bit_lock(word, bit, action, mode); 1093 return out_of_line_wait_on_bit_lock(word, bit, action, mode);
@@ -1029,6 +1106,7 @@ wait_on_bit_lock_action(void *word, int bit, wait_bit_action_f *action, unsigned
1029static inline 1106static inline
1030int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode) 1107int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode)
1031{ 1108{
1109 might_sleep();
1032 if (atomic_read(val) == 0) 1110 if (atomic_read(val) == 0)
1033 return 0; 1111 return 0;
1034 return out_of_line_wait_on_atomic_t(val, action, mode); 1112 return out_of_line_wait_on_atomic_t(val, action, mode);
diff --git a/include/net/sock.h b/include/net/sock.h
index 7db3db112baa..e6f235ebf6c9 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -897,6 +897,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk)
897 if (!__rc) { \ 897 if (!__rc) { \
898 *(__timeo) = schedule_timeout(*(__timeo)); \ 898 *(__timeo) = schedule_timeout(*(__timeo)); \
899 } \ 899 } \
900 sched_annotate_sleep(); \
900 lock_sock(__sk); \ 901 lock_sock(__sk); \
901 __rc = __condition; \ 902 __rc = __condition; \
902 __rc; \ 903 __rc; \
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 0a68d5ae584e..30fedaf3e56a 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -97,16 +97,19 @@ static inline long __trace_sched_switch_state(struct task_struct *p)
97 long state = p->state; 97 long state = p->state;
98 98
99#ifdef CONFIG_PREEMPT 99#ifdef CONFIG_PREEMPT
100#ifdef CONFIG_SCHED_DEBUG
101 BUG_ON(p != current);
102#endif /* CONFIG_SCHED_DEBUG */
100 /* 103 /*
101 * For all intents and purposes a preempted task is a running task. 104 * For all intents and purposes a preempted task is a running task.
102 */ 105 */
103 if (task_preempt_count(p) & PREEMPT_ACTIVE) 106 if (preempt_count() & PREEMPT_ACTIVE)
104 state = TASK_RUNNING | TASK_STATE_MAX; 107 state = TASK_RUNNING | TASK_STATE_MAX;
105#endif 108#endif /* CONFIG_PREEMPT */
106 109
107 return state; 110 return state;
108} 111}
109#endif 112#endif /* CREATE_TRACE_POINTS */
110 113
111/* 114/*
112 * Tracepoint for task switches, performed by the scheduler: 115 * Tracepoint for task switches, performed by the scheduler:
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index b932be9f5c5b..cc89ddefa926 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -23,8 +23,8 @@
23#define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */ 23#define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */
24/* 0x02000000 was previously the unused CLONE_STOPPED (Start in stopped state) 24/* 0x02000000 was previously the unused CLONE_STOPPED (Start in stopped state)
25 and is now available for re-use. */ 25 and is now available for re-use. */
26#define CLONE_NEWUTS 0x04000000 /* New utsname group? */ 26#define CLONE_NEWUTS 0x04000000 /* New utsname namespace */
27#define CLONE_NEWIPC 0x08000000 /* New ipcs */ 27#define CLONE_NEWIPC 0x08000000 /* New ipc namespace */
28#define CLONE_NEWUSER 0x10000000 /* New user namespace */ 28#define CLONE_NEWUSER 0x10000000 /* New user namespace */
29#define CLONE_NEWPID 0x20000000 /* New pid namespace */ 29#define CLONE_NEWPID 0x20000000 /* New pid namespace */
30#define CLONE_NEWNET 0x40000000 /* New network namespace */ 30#define CLONE_NEWNET 0x40000000 /* New network namespace */