diff options
Diffstat (limited to 'include/linux/sched.h')
-rw-r--r-- | include/linux/sched.h | 167 |
1 files changed, 150 insertions, 17 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index e27baeeda3f4..045b0d227846 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -22,6 +22,7 @@ struct sched_param { | |||
22 | #include <linux/errno.h> | 22 | #include <linux/errno.h> |
23 | #include <linux/nodemask.h> | 23 | #include <linux/nodemask.h> |
24 | #include <linux/mm_types.h> | 24 | #include <linux/mm_types.h> |
25 | #include <linux/preempt.h> | ||
25 | 26 | ||
26 | #include <asm/page.h> | 27 | #include <asm/page.h> |
27 | #include <asm/ptrace.h> | 28 | #include <asm/ptrace.h> |
@@ -427,6 +428,14 @@ struct task_cputime { | |||
427 | .sum_exec_runtime = 0, \ | 428 | .sum_exec_runtime = 0, \ |
428 | } | 429 | } |
429 | 430 | ||
431 | #define PREEMPT_ENABLED (PREEMPT_NEED_RESCHED) | ||
432 | |||
433 | #ifdef CONFIG_PREEMPT_COUNT | ||
434 | #define PREEMPT_DISABLED (1 + PREEMPT_ENABLED) | ||
435 | #else | ||
436 | #define PREEMPT_DISABLED PREEMPT_ENABLED | ||
437 | #endif | ||
438 | |||
430 | /* | 439 | /* |
431 | * Disable preemption until the scheduler is running. | 440 | * Disable preemption until the scheduler is running. |
432 | * Reset by start_kernel()->sched_init()->init_idle(). | 441 | * Reset by start_kernel()->sched_init()->init_idle(). |
@@ -434,7 +443,7 @@ struct task_cputime { | |||
434 | * We include PREEMPT_ACTIVE to avoid cond_resched() from working | 443 | * We include PREEMPT_ACTIVE to avoid cond_resched() from working |
435 | * before the scheduler is active -- see should_resched(). | 444 | * before the scheduler is active -- see should_resched(). |
436 | */ | 445 | */ |
437 | #define INIT_PREEMPT_COUNT (1 + PREEMPT_ACTIVE) | 446 | #define INIT_PREEMPT_COUNT (PREEMPT_DISABLED + PREEMPT_ACTIVE) |
438 | 447 | ||
439 | /** | 448 | /** |
440 | * struct thread_group_cputimer - thread group interval timer counts | 449 | * struct thread_group_cputimer - thread group interval timer counts |
@@ -768,6 +777,7 @@ enum cpu_idle_type { | |||
768 | #define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */ | 777 | #define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */ |
769 | #define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ | 778 | #define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ |
770 | #define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */ | 779 | #define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */ |
780 | #define SD_NUMA 0x4000 /* cross-node balancing */ | ||
771 | 781 | ||
772 | extern int __weak arch_sd_sibiling_asym_packing(void); | 782 | extern int __weak arch_sd_sibiling_asym_packing(void); |
773 | 783 | ||
@@ -811,6 +821,10 @@ struct sched_domain { | |||
811 | 821 | ||
812 | u64 last_update; | 822 | u64 last_update; |
813 | 823 | ||
824 | /* idle_balance() stats */ | ||
825 | u64 max_newidle_lb_cost; | ||
826 | unsigned long next_decay_max_lb_cost; | ||
827 | |||
814 | #ifdef CONFIG_SCHEDSTATS | 828 | #ifdef CONFIG_SCHEDSTATS |
815 | /* load_balance() stats */ | 829 | /* load_balance() stats */ |
816 | unsigned int lb_count[CPU_MAX_IDLE_TYPES]; | 830 | unsigned int lb_count[CPU_MAX_IDLE_TYPES]; |
@@ -1029,6 +1043,8 @@ struct task_struct { | |||
1029 | struct task_struct *last_wakee; | 1043 | struct task_struct *last_wakee; |
1030 | unsigned long wakee_flips; | 1044 | unsigned long wakee_flips; |
1031 | unsigned long wakee_flip_decay_ts; | 1045 | unsigned long wakee_flip_decay_ts; |
1046 | |||
1047 | int wake_cpu; | ||
1032 | #endif | 1048 | #endif |
1033 | int on_rq; | 1049 | int on_rq; |
1034 | 1050 | ||
@@ -1324,10 +1340,41 @@ struct task_struct { | |||
1324 | #endif | 1340 | #endif |
1325 | #ifdef CONFIG_NUMA_BALANCING | 1341 | #ifdef CONFIG_NUMA_BALANCING |
1326 | int numa_scan_seq; | 1342 | int numa_scan_seq; |
1327 | int numa_migrate_seq; | ||
1328 | unsigned int numa_scan_period; | 1343 | unsigned int numa_scan_period; |
1344 | unsigned int numa_scan_period_max; | ||
1345 | int numa_preferred_nid; | ||
1346 | int numa_migrate_deferred; | ||
1347 | unsigned long numa_migrate_retry; | ||
1329 | u64 node_stamp; /* migration stamp */ | 1348 | u64 node_stamp; /* migration stamp */ |
1330 | struct callback_head numa_work; | 1349 | struct callback_head numa_work; |
1350 | |||
1351 | struct list_head numa_entry; | ||
1352 | struct numa_group *numa_group; | ||
1353 | |||
1354 | /* | ||
1355 | * Exponential decaying average of faults on a per-node basis. | ||
1356 | * Scheduling placement decisions are made based on the these counts. | ||
1357 | * The values remain static for the duration of a PTE scan | ||
1358 | */ | ||
1359 | unsigned long *numa_faults; | ||
1360 | unsigned long total_numa_faults; | ||
1361 | |||
1362 | /* | ||
1363 | * numa_faults_buffer records faults per node during the current | ||
1364 | * scan window. When the scan completes, the counts in numa_faults | ||
1365 | * decay and these values are copied. | ||
1366 | */ | ||
1367 | unsigned long *numa_faults_buffer; | ||
1368 | |||
1369 | /* | ||
1370 | * numa_faults_locality tracks if faults recorded during the last | ||
1371 | * scan window were remote/local. The task scan period is adapted | ||
1372 | * based on the locality of the faults with different weights | ||
1373 | * depending on whether they were shared or private faults | ||
1374 | */ | ||
1375 | unsigned long numa_faults_locality[2]; | ||
1376 | |||
1377 | unsigned long numa_pages_migrated; | ||
1331 | #endif /* CONFIG_NUMA_BALANCING */ | 1378 | #endif /* CONFIG_NUMA_BALANCING */ |
1332 | 1379 | ||
1333 | struct rcu_head rcu; | 1380 | struct rcu_head rcu; |
@@ -1412,16 +1459,33 @@ struct task_struct { | |||
1412 | /* Future-safe accessor for struct task_struct's cpus_allowed. */ | 1459 | /* Future-safe accessor for struct task_struct's cpus_allowed. */ |
1413 | #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) | 1460 | #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) |
1414 | 1461 | ||
1462 | #define TNF_MIGRATED 0x01 | ||
1463 | #define TNF_NO_GROUP 0x02 | ||
1464 | #define TNF_SHARED 0x04 | ||
1465 | #define TNF_FAULT_LOCAL 0x08 | ||
1466 | |||
1415 | #ifdef CONFIG_NUMA_BALANCING | 1467 | #ifdef CONFIG_NUMA_BALANCING |
1416 | extern void task_numa_fault(int node, int pages, bool migrated); | 1468 | extern void task_numa_fault(int last_node, int node, int pages, int flags); |
1469 | extern pid_t task_numa_group_id(struct task_struct *p); | ||
1417 | extern void set_numabalancing_state(bool enabled); | 1470 | extern void set_numabalancing_state(bool enabled); |
1471 | extern void task_numa_free(struct task_struct *p); | ||
1472 | |||
1473 | extern unsigned int sysctl_numa_balancing_migrate_deferred; | ||
1418 | #else | 1474 | #else |
1419 | static inline void task_numa_fault(int node, int pages, bool migrated) | 1475 | static inline void task_numa_fault(int last_node, int node, int pages, |
1476 | int flags) | ||
1420 | { | 1477 | { |
1421 | } | 1478 | } |
1479 | static inline pid_t task_numa_group_id(struct task_struct *p) | ||
1480 | { | ||
1481 | return 0; | ||
1482 | } | ||
1422 | static inline void set_numabalancing_state(bool enabled) | 1483 | static inline void set_numabalancing_state(bool enabled) |
1423 | { | 1484 | { |
1424 | } | 1485 | } |
1486 | static inline void task_numa_free(struct task_struct *p) | ||
1487 | { | ||
1488 | } | ||
1425 | #endif | 1489 | #endif |
1426 | 1490 | ||
1427 | static inline struct pid *task_pid(struct task_struct *task) | 1491 | static inline struct pid *task_pid(struct task_struct *task) |
@@ -1974,7 +2038,7 @@ extern void wake_up_new_task(struct task_struct *tsk); | |||
1974 | #else | 2038 | #else |
1975 | static inline void kick_process(struct task_struct *tsk) { } | 2039 | static inline void kick_process(struct task_struct *tsk) { } |
1976 | #endif | 2040 | #endif |
1977 | extern void sched_fork(struct task_struct *p); | 2041 | extern void sched_fork(unsigned long clone_flags, struct task_struct *p); |
1978 | extern void sched_dead(struct task_struct *p); | 2042 | extern void sched_dead(struct task_struct *p); |
1979 | 2043 | ||
1980 | extern void proc_caches_init(void); | 2044 | extern void proc_caches_init(void); |
@@ -2401,11 +2465,6 @@ static inline int signal_pending_state(long state, struct task_struct *p) | |||
2401 | return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); | 2465 | return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); |
2402 | } | 2466 | } |
2403 | 2467 | ||
2404 | static inline int need_resched(void) | ||
2405 | { | ||
2406 | return unlikely(test_thread_flag(TIF_NEED_RESCHED)); | ||
2407 | } | ||
2408 | |||
2409 | /* | 2468 | /* |
2410 | * cond_resched() and cond_resched_lock(): latency reduction via | 2469 | * cond_resched() and cond_resched_lock(): latency reduction via |
2411 | * explicit rescheduling in places that are safe. The return | 2470 | * explicit rescheduling in places that are safe. The return |
@@ -2474,36 +2533,105 @@ static inline int tsk_is_polling(struct task_struct *p) | |||
2474 | { | 2533 | { |
2475 | return task_thread_info(p)->status & TS_POLLING; | 2534 | return task_thread_info(p)->status & TS_POLLING; |
2476 | } | 2535 | } |
2477 | static inline void current_set_polling(void) | 2536 | static inline void __current_set_polling(void) |
2478 | { | 2537 | { |
2479 | current_thread_info()->status |= TS_POLLING; | 2538 | current_thread_info()->status |= TS_POLLING; |
2480 | } | 2539 | } |
2481 | 2540 | ||
2482 | static inline void current_clr_polling(void) | 2541 | static inline bool __must_check current_set_polling_and_test(void) |
2542 | { | ||
2543 | __current_set_polling(); | ||
2544 | |||
2545 | /* | ||
2546 | * Polling state must be visible before we test NEED_RESCHED, | ||
2547 | * paired by resched_task() | ||
2548 | */ | ||
2549 | smp_mb(); | ||
2550 | |||
2551 | return unlikely(tif_need_resched()); | ||
2552 | } | ||
2553 | |||
2554 | static inline void __current_clr_polling(void) | ||
2483 | { | 2555 | { |
2484 | current_thread_info()->status &= ~TS_POLLING; | 2556 | current_thread_info()->status &= ~TS_POLLING; |
2485 | smp_mb__after_clear_bit(); | 2557 | } |
2558 | |||
2559 | static inline bool __must_check current_clr_polling_and_test(void) | ||
2560 | { | ||
2561 | __current_clr_polling(); | ||
2562 | |||
2563 | /* | ||
2564 | * Polling state must be visible before we test NEED_RESCHED, | ||
2565 | * paired by resched_task() | ||
2566 | */ | ||
2567 | smp_mb(); | ||
2568 | |||
2569 | return unlikely(tif_need_resched()); | ||
2486 | } | 2570 | } |
2487 | #elif defined(TIF_POLLING_NRFLAG) | 2571 | #elif defined(TIF_POLLING_NRFLAG) |
2488 | static inline int tsk_is_polling(struct task_struct *p) | 2572 | static inline int tsk_is_polling(struct task_struct *p) |
2489 | { | 2573 | { |
2490 | return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG); | 2574 | return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG); |
2491 | } | 2575 | } |
2492 | static inline void current_set_polling(void) | 2576 | |
2577 | static inline void __current_set_polling(void) | ||
2493 | { | 2578 | { |
2494 | set_thread_flag(TIF_POLLING_NRFLAG); | 2579 | set_thread_flag(TIF_POLLING_NRFLAG); |
2495 | } | 2580 | } |
2496 | 2581 | ||
2497 | static inline void current_clr_polling(void) | 2582 | static inline bool __must_check current_set_polling_and_test(void) |
2583 | { | ||
2584 | __current_set_polling(); | ||
2585 | |||
2586 | /* | ||
2587 | * Polling state must be visible before we test NEED_RESCHED, | ||
2588 | * paired by resched_task() | ||
2589 | * | ||
2590 | * XXX: assumes set/clear bit are identical barrier wise. | ||
2591 | */ | ||
2592 | smp_mb__after_clear_bit(); | ||
2593 | |||
2594 | return unlikely(tif_need_resched()); | ||
2595 | } | ||
2596 | |||
2597 | static inline void __current_clr_polling(void) | ||
2498 | { | 2598 | { |
2499 | clear_thread_flag(TIF_POLLING_NRFLAG); | 2599 | clear_thread_flag(TIF_POLLING_NRFLAG); |
2500 | } | 2600 | } |
2601 | |||
2602 | static inline bool __must_check current_clr_polling_and_test(void) | ||
2603 | { | ||
2604 | __current_clr_polling(); | ||
2605 | |||
2606 | /* | ||
2607 | * Polling state must be visible before we test NEED_RESCHED, | ||
2608 | * paired by resched_task() | ||
2609 | */ | ||
2610 | smp_mb__after_clear_bit(); | ||
2611 | |||
2612 | return unlikely(tif_need_resched()); | ||
2613 | } | ||
2614 | |||
2501 | #else | 2615 | #else |
2502 | static inline int tsk_is_polling(struct task_struct *p) { return 0; } | 2616 | static inline int tsk_is_polling(struct task_struct *p) { return 0; } |
2503 | static inline void current_set_polling(void) { } | 2617 | static inline void __current_set_polling(void) { } |
2504 | static inline void current_clr_polling(void) { } | 2618 | static inline void __current_clr_polling(void) { } |
2619 | |||
2620 | static inline bool __must_check current_set_polling_and_test(void) | ||
2621 | { | ||
2622 | return unlikely(tif_need_resched()); | ||
2623 | } | ||
2624 | static inline bool __must_check current_clr_polling_and_test(void) | ||
2625 | { | ||
2626 | return unlikely(tif_need_resched()); | ||
2627 | } | ||
2505 | #endif | 2628 | #endif |
2506 | 2629 | ||
2630 | static __always_inline bool need_resched(void) | ||
2631 | { | ||
2632 | return unlikely(tif_need_resched()); | ||
2633 | } | ||
2634 | |||
2507 | /* | 2635 | /* |
2508 | * Thread group CPU time accounting. | 2636 | * Thread group CPU time accounting. |
2509 | */ | 2637 | */ |
@@ -2545,6 +2673,11 @@ static inline unsigned int task_cpu(const struct task_struct *p) | |||
2545 | return task_thread_info(p)->cpu; | 2673 | return task_thread_info(p)->cpu; |
2546 | } | 2674 | } |
2547 | 2675 | ||
2676 | static inline int task_node(const struct task_struct *p) | ||
2677 | { | ||
2678 | return cpu_to_node(task_cpu(p)); | ||
2679 | } | ||
2680 | |||
2548 | extern void set_task_cpu(struct task_struct *p, unsigned int cpu); | 2681 | extern void set_task_cpu(struct task_struct *p, unsigned int cpu); |
2549 | 2682 | ||
2550 | #else | 2683 | #else |