aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/sched.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/sched.h')
-rw-r--r--include/linux/sched.h167
1 files changed, 150 insertions, 17 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e27baeeda3f4..045b0d227846 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -22,6 +22,7 @@ struct sched_param {
22#include <linux/errno.h> 22#include <linux/errno.h>
23#include <linux/nodemask.h> 23#include <linux/nodemask.h>
24#include <linux/mm_types.h> 24#include <linux/mm_types.h>
25#include <linux/preempt.h>
25 26
26#include <asm/page.h> 27#include <asm/page.h>
27#include <asm/ptrace.h> 28#include <asm/ptrace.h>
@@ -427,6 +428,14 @@ struct task_cputime {
427 .sum_exec_runtime = 0, \ 428 .sum_exec_runtime = 0, \
428 } 429 }
429 430
431#define PREEMPT_ENABLED (PREEMPT_NEED_RESCHED)
432
433#ifdef CONFIG_PREEMPT_COUNT
434#define PREEMPT_DISABLED (1 + PREEMPT_ENABLED)
435#else
436#define PREEMPT_DISABLED PREEMPT_ENABLED
437#endif
438
430/* 439/*
431 * Disable preemption until the scheduler is running. 440 * Disable preemption until the scheduler is running.
432 * Reset by start_kernel()->sched_init()->init_idle(). 441 * Reset by start_kernel()->sched_init()->init_idle().
@@ -434,7 +443,7 @@ struct task_cputime {
434 * We include PREEMPT_ACTIVE to avoid cond_resched() from working 443 * We include PREEMPT_ACTIVE to avoid cond_resched() from working
435 * before the scheduler is active -- see should_resched(). 444 * before the scheduler is active -- see should_resched().
436 */ 445 */
437#define INIT_PREEMPT_COUNT (1 + PREEMPT_ACTIVE) 446#define INIT_PREEMPT_COUNT (PREEMPT_DISABLED + PREEMPT_ACTIVE)
438 447
439/** 448/**
440 * struct thread_group_cputimer - thread group interval timer counts 449 * struct thread_group_cputimer - thread group interval timer counts
@@ -768,6 +777,7 @@ enum cpu_idle_type {
768#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */ 777#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */
769#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ 778#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */
770#define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */ 779#define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */
780#define SD_NUMA 0x4000 /* cross-node balancing */
771 781
772extern int __weak arch_sd_sibiling_asym_packing(void); 782extern int __weak arch_sd_sibiling_asym_packing(void);
773 783
@@ -811,6 +821,10 @@ struct sched_domain {
811 821
812 u64 last_update; 822 u64 last_update;
813 823
824 /* idle_balance() stats */
825 u64 max_newidle_lb_cost;
826 unsigned long next_decay_max_lb_cost;
827
814#ifdef CONFIG_SCHEDSTATS 828#ifdef CONFIG_SCHEDSTATS
815 /* load_balance() stats */ 829 /* load_balance() stats */
816 unsigned int lb_count[CPU_MAX_IDLE_TYPES]; 830 unsigned int lb_count[CPU_MAX_IDLE_TYPES];
@@ -1029,6 +1043,8 @@ struct task_struct {
1029 struct task_struct *last_wakee; 1043 struct task_struct *last_wakee;
1030 unsigned long wakee_flips; 1044 unsigned long wakee_flips;
1031 unsigned long wakee_flip_decay_ts; 1045 unsigned long wakee_flip_decay_ts;
1046
1047 int wake_cpu;
1032#endif 1048#endif
1033 int on_rq; 1049 int on_rq;
1034 1050
@@ -1324,10 +1340,41 @@ struct task_struct {
1324#endif 1340#endif
1325#ifdef CONFIG_NUMA_BALANCING 1341#ifdef CONFIG_NUMA_BALANCING
1326 int numa_scan_seq; 1342 int numa_scan_seq;
1327 int numa_migrate_seq;
1328 unsigned int numa_scan_period; 1343 unsigned int numa_scan_period;
1344 unsigned int numa_scan_period_max;
1345 int numa_preferred_nid;
1346 int numa_migrate_deferred;
1347 unsigned long numa_migrate_retry;
1329 u64 node_stamp; /* migration stamp */ 1348 u64 node_stamp; /* migration stamp */
1330 struct callback_head numa_work; 1349 struct callback_head numa_work;
1350
1351 struct list_head numa_entry;
1352 struct numa_group *numa_group;
1353
1354 /*
1355 * Exponential decaying average of faults on a per-node basis.
1356 * Scheduling placement decisions are made based on the these counts.
1357 * The values remain static for the duration of a PTE scan
1358 */
1359 unsigned long *numa_faults;
1360 unsigned long total_numa_faults;
1361
1362 /*
1363 * numa_faults_buffer records faults per node during the current
1364 * scan window. When the scan completes, the counts in numa_faults
1365 * decay and these values are copied.
1366 */
1367 unsigned long *numa_faults_buffer;
1368
1369 /*
1370 * numa_faults_locality tracks if faults recorded during the last
1371 * scan window were remote/local. The task scan period is adapted
1372 * based on the locality of the faults with different weights
1373 * depending on whether they were shared or private faults
1374 */
1375 unsigned long numa_faults_locality[2];
1376
1377 unsigned long numa_pages_migrated;
1331#endif /* CONFIG_NUMA_BALANCING */ 1378#endif /* CONFIG_NUMA_BALANCING */
1332 1379
1333 struct rcu_head rcu; 1380 struct rcu_head rcu;
@@ -1412,16 +1459,33 @@ struct task_struct {
1412/* Future-safe accessor for struct task_struct's cpus_allowed. */ 1459/* Future-safe accessor for struct task_struct's cpus_allowed. */
1413#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) 1460#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
1414 1461
1462#define TNF_MIGRATED 0x01
1463#define TNF_NO_GROUP 0x02
1464#define TNF_SHARED 0x04
1465#define TNF_FAULT_LOCAL 0x08
1466
1415#ifdef CONFIG_NUMA_BALANCING 1467#ifdef CONFIG_NUMA_BALANCING
1416extern void task_numa_fault(int node, int pages, bool migrated); 1468extern void task_numa_fault(int last_node, int node, int pages, int flags);
1469extern pid_t task_numa_group_id(struct task_struct *p);
1417extern void set_numabalancing_state(bool enabled); 1470extern void set_numabalancing_state(bool enabled);
1471extern void task_numa_free(struct task_struct *p);
1472
1473extern unsigned int sysctl_numa_balancing_migrate_deferred;
1418#else 1474#else
1419static inline void task_numa_fault(int node, int pages, bool migrated) 1475static inline void task_numa_fault(int last_node, int node, int pages,
1476 int flags)
1420{ 1477{
1421} 1478}
1479static inline pid_t task_numa_group_id(struct task_struct *p)
1480{
1481 return 0;
1482}
1422static inline void set_numabalancing_state(bool enabled) 1483static inline void set_numabalancing_state(bool enabled)
1423{ 1484{
1424} 1485}
1486static inline void task_numa_free(struct task_struct *p)
1487{
1488}
1425#endif 1489#endif
1426 1490
1427static inline struct pid *task_pid(struct task_struct *task) 1491static inline struct pid *task_pid(struct task_struct *task)
@@ -1974,7 +2038,7 @@ extern void wake_up_new_task(struct task_struct *tsk);
1974#else 2038#else
1975 static inline void kick_process(struct task_struct *tsk) { } 2039 static inline void kick_process(struct task_struct *tsk) { }
1976#endif 2040#endif
1977extern void sched_fork(struct task_struct *p); 2041extern void sched_fork(unsigned long clone_flags, struct task_struct *p);
1978extern void sched_dead(struct task_struct *p); 2042extern void sched_dead(struct task_struct *p);
1979 2043
1980extern void proc_caches_init(void); 2044extern void proc_caches_init(void);
@@ -2401,11 +2465,6 @@ static inline int signal_pending_state(long state, struct task_struct *p)
2401 return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); 2465 return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
2402} 2466}
2403 2467
2404static inline int need_resched(void)
2405{
2406 return unlikely(test_thread_flag(TIF_NEED_RESCHED));
2407}
2408
2409/* 2468/*
2410 * cond_resched() and cond_resched_lock(): latency reduction via 2469 * cond_resched() and cond_resched_lock(): latency reduction via
2411 * explicit rescheduling in places that are safe. The return 2470 * explicit rescheduling in places that are safe. The return
@@ -2474,36 +2533,105 @@ static inline int tsk_is_polling(struct task_struct *p)
2474{ 2533{
2475 return task_thread_info(p)->status & TS_POLLING; 2534 return task_thread_info(p)->status & TS_POLLING;
2476} 2535}
2477static inline void current_set_polling(void) 2536static inline void __current_set_polling(void)
2478{ 2537{
2479 current_thread_info()->status |= TS_POLLING; 2538 current_thread_info()->status |= TS_POLLING;
2480} 2539}
2481 2540
2482static inline void current_clr_polling(void) 2541static inline bool __must_check current_set_polling_and_test(void)
2542{
2543 __current_set_polling();
2544
2545 /*
2546 * Polling state must be visible before we test NEED_RESCHED,
2547 * paired by resched_task()
2548 */
2549 smp_mb();
2550
2551 return unlikely(tif_need_resched());
2552}
2553
2554static inline void __current_clr_polling(void)
2483{ 2555{
2484 current_thread_info()->status &= ~TS_POLLING; 2556 current_thread_info()->status &= ~TS_POLLING;
2485 smp_mb__after_clear_bit(); 2557}
2558
2559static inline bool __must_check current_clr_polling_and_test(void)
2560{
2561 __current_clr_polling();
2562
2563 /*
2564 * Polling state must be visible before we test NEED_RESCHED,
2565 * paired by resched_task()
2566 */
2567 smp_mb();
2568
2569 return unlikely(tif_need_resched());
2486} 2570}
2487#elif defined(TIF_POLLING_NRFLAG) 2571#elif defined(TIF_POLLING_NRFLAG)
2488static inline int tsk_is_polling(struct task_struct *p) 2572static inline int tsk_is_polling(struct task_struct *p)
2489{ 2573{
2490 return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG); 2574 return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
2491} 2575}
2492static inline void current_set_polling(void) 2576
2577static inline void __current_set_polling(void)
2493{ 2578{
2494 set_thread_flag(TIF_POLLING_NRFLAG); 2579 set_thread_flag(TIF_POLLING_NRFLAG);
2495} 2580}
2496 2581
2497static inline void current_clr_polling(void) 2582static inline bool __must_check current_set_polling_and_test(void)
2583{
2584 __current_set_polling();
2585
2586 /*
2587 * Polling state must be visible before we test NEED_RESCHED,
2588 * paired by resched_task()
2589 *
2590 * XXX: assumes set/clear bit are identical barrier wise.
2591 */
2592 smp_mb__after_clear_bit();
2593
2594 return unlikely(tif_need_resched());
2595}
2596
2597static inline void __current_clr_polling(void)
2498{ 2598{
2499 clear_thread_flag(TIF_POLLING_NRFLAG); 2599 clear_thread_flag(TIF_POLLING_NRFLAG);
2500} 2600}
2601
2602static inline bool __must_check current_clr_polling_and_test(void)
2603{
2604 __current_clr_polling();
2605
2606 /*
2607 * Polling state must be visible before we test NEED_RESCHED,
2608 * paired by resched_task()
2609 */
2610 smp_mb__after_clear_bit();
2611
2612 return unlikely(tif_need_resched());
2613}
2614
2501#else 2615#else
2502static inline int tsk_is_polling(struct task_struct *p) { return 0; } 2616static inline int tsk_is_polling(struct task_struct *p) { return 0; }
2503static inline void current_set_polling(void) { } 2617static inline void __current_set_polling(void) { }
2504static inline void current_clr_polling(void) { } 2618static inline void __current_clr_polling(void) { }
2619
2620static inline bool __must_check current_set_polling_and_test(void)
2621{
2622 return unlikely(tif_need_resched());
2623}
2624static inline bool __must_check current_clr_polling_and_test(void)
2625{
2626 return unlikely(tif_need_resched());
2627}
2505#endif 2628#endif
2506 2629
2630static __always_inline bool need_resched(void)
2631{
2632 return unlikely(tif_need_resched());
2633}
2634
2507/* 2635/*
2508 * Thread group CPU time accounting. 2636 * Thread group CPU time accounting.
2509 */ 2637 */
@@ -2545,6 +2673,11 @@ static inline unsigned int task_cpu(const struct task_struct *p)
2545 return task_thread_info(p)->cpu; 2673 return task_thread_info(p)->cpu;
2546} 2674}
2547 2675
2676static inline int task_node(const struct task_struct *p)
2677{
2678 return cpu_to_node(task_cpu(p));
2679}
2680
2548extern void set_task_cpu(struct task_struct *p, unsigned int cpu); 2681extern void set_task_cpu(struct task_struct *p, unsigned int cpu);
2549 2682
2550#else 2683#else