diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-03 17:00:15 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-03 17:00:15 -0400 |
commit | c84a1e32ee58fc1cc9d3fd42619b917cce67e30a (patch) | |
tree | d3e5bed273f747e7c9e399864219bea76f4c30ea /kernel | |
parent | 3d521f9151dacab566904d1f57dcb3e7080cdd8f (diff) | |
parent | 096aa33863a5e48de52d2ff30e0801b7487944f4 (diff) |
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into next
Pull scheduler updates from Ingo Molnar:
"The main scheduling related changes in this cycle were:
- various sched/numa updates, for better performance
- tree wide cleanup of open coded nice levels
- nohz fix related to rq->nr_running use
- cpuidle changes and continued consolidation to improve the
kernel/sched/idle.c high level idle scheduling logic. As part of
this effort I pulled cpuidle driver changes from Rafael as well.
- standardized idle polling amongst architectures
- continued work on preparing better power/energy aware scheduling
- sched/rt updates
- misc fixlets and cleanups"
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (49 commits)
sched/numa: Decay ->wakee_flips instead of zeroing
sched/numa: Update migrate_improves/degrades_locality()
sched/numa: Allow task switch if load imbalance improves
sched/rt: Fix 'struct sched_dl_entity' and dl_task_time() comments, to match the current upstream code
sched: Consolidate open coded implementations of nice level frobbing into nice_to_rlimit() and rlimit_to_nice()
sched: Initialize rq->age_stamp on processor start
sched, nohz: Change rq->nr_running to always use wrappers
sched: Fix the rq->next_balance logic in rebalance_domains() and idle_balance()
sched: Use clamp() and clamp_val() to make sys_nice() more readable
sched: Do not zero sg->cpumask and sg->sgp->power in build_sched_groups()
sched/numa: Fix initialization of sched_domain_topology for NUMA
sched: Call select_idle_sibling() when not affine_sd
sched: Simplify return logic in sched_read_attr()
sched: Simplify return logic in sched_copy_attr()
sched: Fix exec_start/task_hot on migrated tasks
arm64: Remove TIF_POLLING_NRFLAG
metag: Remove TIF_POLLING_NRFLAG
sched/idle: Make cpuidle_idle_call() void
sched/idle: Reflow cpuidle_idle_call()
sched/idle: Delay clearing the polling bit
...
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/locking/locktorture.c | 2 | ||||
-rw-r--r-- | kernel/power/suspend.c | 2 | ||||
-rw-r--r-- | kernel/sched/core.c | 324 | ||||
-rw-r--r-- | kernel/sched/deadline.c | 6 | ||||
-rw-r--r-- | kernel/sched/fair.c | 226 | ||||
-rw-r--r-- | kernel/sched/idle.c | 140 | ||||
-rw-r--r-- | kernel/sched/rt.c | 119 | ||||
-rw-r--r-- | kernel/sched/sched.h | 26 | ||||
-rw-r--r-- | kernel/sched/stop_task.c | 4 | ||||
-rw-r--r-- | kernel/sys.c | 6 | ||||
-rw-r--r-- | kernel/workqueue.c | 6 |
11 files changed, 522 insertions, 339 deletions
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index dbafeac18e4d..0955b885d0dc 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c | |||
@@ -216,7 +216,7 @@ static int lock_torture_writer(void *arg) | |||
216 | static DEFINE_TORTURE_RANDOM(rand); | 216 | static DEFINE_TORTURE_RANDOM(rand); |
217 | 217 | ||
218 | VERBOSE_TOROUT_STRING("lock_torture_writer task started"); | 218 | VERBOSE_TOROUT_STRING("lock_torture_writer task started"); |
219 | set_user_nice(current, 19); | 219 | set_user_nice(current, MAX_NICE); |
220 | 220 | ||
221 | do { | 221 | do { |
222 | if ((torture_random(&rand) & 0xfffff) == 0) | 222 | if ((torture_random(&rand) & 0xfffff) == 0) |
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 8233cd4047d7..155721f7f909 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c | |||
@@ -54,9 +54,11 @@ static void freeze_begin(void) | |||
54 | 54 | ||
55 | static void freeze_enter(void) | 55 | static void freeze_enter(void) |
56 | { | 56 | { |
57 | cpuidle_use_deepest_state(true); | ||
57 | cpuidle_resume(); | 58 | cpuidle_resume(); |
58 | wait_event(suspend_freeze_wait_head, suspend_freeze_wake); | 59 | wait_event(suspend_freeze_wait_head, suspend_freeze_wake); |
59 | cpuidle_pause(); | 60 | cpuidle_pause(); |
61 | cpuidle_use_deepest_state(false); | ||
60 | } | 62 | } |
61 | 63 | ||
62 | void freeze_wake(void) | 64 | void freeze_wake(void) |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a62a7dec3986..913c6d6cc2c1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -522,6 +522,39 @@ static inline void init_hrtick(void) | |||
522 | #endif /* CONFIG_SCHED_HRTICK */ | 522 | #endif /* CONFIG_SCHED_HRTICK */ |
523 | 523 | ||
524 | /* | 524 | /* |
525 | * cmpxchg based fetch_or, macro so it works for different integer types | ||
526 | */ | ||
527 | #define fetch_or(ptr, val) \ | ||
528 | ({ typeof(*(ptr)) __old, __val = *(ptr); \ | ||
529 | for (;;) { \ | ||
530 | __old = cmpxchg((ptr), __val, __val | (val)); \ | ||
531 | if (__old == __val) \ | ||
532 | break; \ | ||
533 | __val = __old; \ | ||
534 | } \ | ||
535 | __old; \ | ||
536 | }) | ||
537 | |||
538 | #ifdef TIF_POLLING_NRFLAG | ||
539 | /* | ||
540 | * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG, | ||
541 | * this avoids any races wrt polling state changes and thereby avoids | ||
542 | * spurious IPIs. | ||
543 | */ | ||
544 | static bool set_nr_and_not_polling(struct task_struct *p) | ||
545 | { | ||
546 | struct thread_info *ti = task_thread_info(p); | ||
547 | return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG); | ||
548 | } | ||
549 | #else | ||
550 | static bool set_nr_and_not_polling(struct task_struct *p) | ||
551 | { | ||
552 | set_tsk_need_resched(p); | ||
553 | return true; | ||
554 | } | ||
555 | #endif | ||
556 | |||
557 | /* | ||
525 | * resched_task - mark a task 'to be rescheduled now'. | 558 | * resched_task - mark a task 'to be rescheduled now'. |
526 | * | 559 | * |
527 | * On UP this means the setting of the need_resched flag, on SMP it | 560 | * On UP this means the setting of the need_resched flag, on SMP it |
@@ -537,17 +570,15 @@ void resched_task(struct task_struct *p) | |||
537 | if (test_tsk_need_resched(p)) | 570 | if (test_tsk_need_resched(p)) |
538 | return; | 571 | return; |
539 | 572 | ||
540 | set_tsk_need_resched(p); | ||
541 | |||
542 | cpu = task_cpu(p); | 573 | cpu = task_cpu(p); |
574 | |||
543 | if (cpu == smp_processor_id()) { | 575 | if (cpu == smp_processor_id()) { |
576 | set_tsk_need_resched(p); | ||
544 | set_preempt_need_resched(); | 577 | set_preempt_need_resched(); |
545 | return; | 578 | return; |
546 | } | 579 | } |
547 | 580 | ||
548 | /* NEED_RESCHED must be visible before we test polling */ | 581 | if (set_nr_and_not_polling(p)) |
549 | smp_mb(); | ||
550 | if (!tsk_is_polling(p)) | ||
551 | smp_send_reschedule(cpu); | 582 | smp_send_reschedule(cpu); |
552 | } | 583 | } |
553 | 584 | ||
@@ -3018,7 +3049,7 @@ EXPORT_SYMBOL(set_user_nice); | |||
3018 | int can_nice(const struct task_struct *p, const int nice) | 3049 | int can_nice(const struct task_struct *p, const int nice) |
3019 | { | 3050 | { |
3020 | /* convert nice value [19,-20] to rlimit style value [1,40] */ | 3051 | /* convert nice value [19,-20] to rlimit style value [1,40] */ |
3021 | int nice_rlim = 20 - nice; | 3052 | int nice_rlim = nice_to_rlimit(nice); |
3022 | 3053 | ||
3023 | return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) || | 3054 | return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) || |
3024 | capable(CAP_SYS_NICE)); | 3055 | capable(CAP_SYS_NICE)); |
@@ -3042,17 +3073,10 @@ SYSCALL_DEFINE1(nice, int, increment) | |||
3042 | * We don't have to worry. Conceptually one call occurs first | 3073 | * We don't have to worry. Conceptually one call occurs first |
3043 | * and we have a single winner. | 3074 | * and we have a single winner. |
3044 | */ | 3075 | */ |
3045 | if (increment < -40) | 3076 | increment = clamp(increment, -NICE_WIDTH, NICE_WIDTH); |
3046 | increment = -40; | ||
3047 | if (increment > 40) | ||
3048 | increment = 40; | ||
3049 | |||
3050 | nice = task_nice(current) + increment; | 3077 | nice = task_nice(current) + increment; |
3051 | if (nice < MIN_NICE) | ||
3052 | nice = MIN_NICE; | ||
3053 | if (nice > MAX_NICE) | ||
3054 | nice = MAX_NICE; | ||
3055 | 3078 | ||
3079 | nice = clamp_val(nice, MIN_NICE, MAX_NICE); | ||
3056 | if (increment < 0 && !can_nice(current, nice)) | 3080 | if (increment < 0 && !can_nice(current, nice)) |
3057 | return -EPERM; | 3081 | return -EPERM; |
3058 | 3082 | ||
@@ -3642,13 +3666,11 @@ static int sched_copy_attr(struct sched_attr __user *uattr, | |||
3642 | */ | 3666 | */ |
3643 | attr->sched_nice = clamp(attr->sched_nice, MIN_NICE, MAX_NICE); | 3667 | attr->sched_nice = clamp(attr->sched_nice, MIN_NICE, MAX_NICE); |
3644 | 3668 | ||
3645 | out: | 3669 | return 0; |
3646 | return ret; | ||
3647 | 3670 | ||
3648 | err_size: | 3671 | err_size: |
3649 | put_user(sizeof(*attr), &uattr->size); | 3672 | put_user(sizeof(*attr), &uattr->size); |
3650 | ret = -E2BIG; | 3673 | return -E2BIG; |
3651 | goto out; | ||
3652 | } | 3674 | } |
3653 | 3675 | ||
3654 | /** | 3676 | /** |
@@ -3808,7 +3830,7 @@ static int sched_read_attr(struct sched_attr __user *uattr, | |||
3808 | 3830 | ||
3809 | for (; addr < end; addr++) { | 3831 | for (; addr < end; addr++) { |
3810 | if (*addr) | 3832 | if (*addr) |
3811 | goto err_size; | 3833 | return -EFBIG; |
3812 | } | 3834 | } |
3813 | 3835 | ||
3814 | attr->size = usize; | 3836 | attr->size = usize; |
@@ -3818,12 +3840,7 @@ static int sched_read_attr(struct sched_attr __user *uattr, | |||
3818 | if (ret) | 3840 | if (ret) |
3819 | return -EFAULT; | 3841 | return -EFAULT; |
3820 | 3842 | ||
3821 | out: | 3843 | return 0; |
3822 | return ret; | ||
3823 | |||
3824 | err_size: | ||
3825 | ret = -E2BIG; | ||
3826 | goto out; | ||
3827 | } | 3844 | } |
3828 | 3845 | ||
3829 | /** | 3846 | /** |
@@ -5093,10 +5110,20 @@ static struct notifier_block migration_notifier = { | |||
5093 | .priority = CPU_PRI_MIGRATION, | 5110 | .priority = CPU_PRI_MIGRATION, |
5094 | }; | 5111 | }; |
5095 | 5112 | ||
5113 | static void __cpuinit set_cpu_rq_start_time(void) | ||
5114 | { | ||
5115 | int cpu = smp_processor_id(); | ||
5116 | struct rq *rq = cpu_rq(cpu); | ||
5117 | rq->age_stamp = sched_clock_cpu(cpu); | ||
5118 | } | ||
5119 | |||
5096 | static int sched_cpu_active(struct notifier_block *nfb, | 5120 | static int sched_cpu_active(struct notifier_block *nfb, |
5097 | unsigned long action, void *hcpu) | 5121 | unsigned long action, void *hcpu) |
5098 | { | 5122 | { |
5099 | switch (action & ~CPU_TASKS_FROZEN) { | 5123 | switch (action & ~CPU_TASKS_FROZEN) { |
5124 | case CPU_STARTING: | ||
5125 | set_cpu_rq_start_time(); | ||
5126 | return NOTIFY_OK; | ||
5100 | case CPU_DOWN_FAILED: | 5127 | case CPU_DOWN_FAILED: |
5101 | set_cpu_active((long)hcpu, true); | 5128 | set_cpu_active((long)hcpu, true); |
5102 | return NOTIFY_OK; | 5129 | return NOTIFY_OK; |
@@ -5305,7 +5332,8 @@ static int sd_degenerate(struct sched_domain *sd) | |||
5305 | SD_BALANCE_FORK | | 5332 | SD_BALANCE_FORK | |
5306 | SD_BALANCE_EXEC | | 5333 | SD_BALANCE_EXEC | |
5307 | SD_SHARE_CPUPOWER | | 5334 | SD_SHARE_CPUPOWER | |
5308 | SD_SHARE_PKG_RESOURCES)) { | 5335 | SD_SHARE_PKG_RESOURCES | |
5336 | SD_SHARE_POWERDOMAIN)) { | ||
5309 | if (sd->groups != sd->groups->next) | 5337 | if (sd->groups != sd->groups->next) |
5310 | return 0; | 5338 | return 0; |
5311 | } | 5339 | } |
@@ -5336,7 +5364,8 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) | |||
5336 | SD_BALANCE_EXEC | | 5364 | SD_BALANCE_EXEC | |
5337 | SD_SHARE_CPUPOWER | | 5365 | SD_SHARE_CPUPOWER | |
5338 | SD_SHARE_PKG_RESOURCES | | 5366 | SD_SHARE_PKG_RESOURCES | |
5339 | SD_PREFER_SIBLING); | 5367 | SD_PREFER_SIBLING | |
5368 | SD_SHARE_POWERDOMAIN); | ||
5340 | if (nr_node_ids == 1) | 5369 | if (nr_node_ids == 1) |
5341 | pflags &= ~SD_SERIALIZE; | 5370 | pflags &= ~SD_SERIALIZE; |
5342 | } | 5371 | } |
@@ -5610,17 +5639,6 @@ static int __init isolated_cpu_setup(char *str) | |||
5610 | 5639 | ||
5611 | __setup("isolcpus=", isolated_cpu_setup); | 5640 | __setup("isolcpus=", isolated_cpu_setup); |
5612 | 5641 | ||
5613 | static const struct cpumask *cpu_cpu_mask(int cpu) | ||
5614 | { | ||
5615 | return cpumask_of_node(cpu_to_node(cpu)); | ||
5616 | } | ||
5617 | |||
5618 | struct sd_data { | ||
5619 | struct sched_domain **__percpu sd; | ||
5620 | struct sched_group **__percpu sg; | ||
5621 | struct sched_group_power **__percpu sgp; | ||
5622 | }; | ||
5623 | |||
5624 | struct s_data { | 5642 | struct s_data { |
5625 | struct sched_domain ** __percpu sd; | 5643 | struct sched_domain ** __percpu sd; |
5626 | struct root_domain *rd; | 5644 | struct root_domain *rd; |
@@ -5633,21 +5651,6 @@ enum s_alloc { | |||
5633 | sa_none, | 5651 | sa_none, |
5634 | }; | 5652 | }; |
5635 | 5653 | ||
5636 | struct sched_domain_topology_level; | ||
5637 | |||
5638 | typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu); | ||
5639 | typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); | ||
5640 | |||
5641 | #define SDTL_OVERLAP 0x01 | ||
5642 | |||
5643 | struct sched_domain_topology_level { | ||
5644 | sched_domain_init_f init; | ||
5645 | sched_domain_mask_f mask; | ||
5646 | int flags; | ||
5647 | int numa_level; | ||
5648 | struct sd_data data; | ||
5649 | }; | ||
5650 | |||
5651 | /* | 5654 | /* |
5652 | * Build an iteration mask that can exclude certain CPUs from the upwards | 5655 | * Build an iteration mask that can exclude certain CPUs from the upwards |
5653 | * domain traversal. | 5656 | * domain traversal. |
@@ -5815,8 +5818,6 @@ build_sched_groups(struct sched_domain *sd, int cpu) | |||
5815 | continue; | 5818 | continue; |
5816 | 5819 | ||
5817 | group = get_group(i, sdd, &sg); | 5820 | group = get_group(i, sdd, &sg); |
5818 | cpumask_clear(sched_group_cpus(sg)); | ||
5819 | sg->sgp->power = 0; | ||
5820 | cpumask_setall(sched_group_mask(sg)); | 5821 | cpumask_setall(sched_group_mask(sg)); |
5821 | 5822 | ||
5822 | for_each_cpu(j, span) { | 5823 | for_each_cpu(j, span) { |
@@ -5866,44 +5867,11 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) | |||
5866 | atomic_set(&sg->sgp->nr_busy_cpus, sg->group_weight); | 5867 | atomic_set(&sg->sgp->nr_busy_cpus, sg->group_weight); |
5867 | } | 5868 | } |
5868 | 5869 | ||
5869 | int __weak arch_sd_sibling_asym_packing(void) | ||
5870 | { | ||
5871 | return 0*SD_ASYM_PACKING; | ||
5872 | } | ||
5873 | |||
5874 | /* | 5870 | /* |
5875 | * Initializers for schedule domains | 5871 | * Initializers for schedule domains |
5876 | * Non-inlined to reduce accumulated stack pressure in build_sched_domains() | 5872 | * Non-inlined to reduce accumulated stack pressure in build_sched_domains() |
5877 | */ | 5873 | */ |
5878 | 5874 | ||
5879 | #ifdef CONFIG_SCHED_DEBUG | ||
5880 | # define SD_INIT_NAME(sd, type) sd->name = #type | ||
5881 | #else | ||
5882 | # define SD_INIT_NAME(sd, type) do { } while (0) | ||
5883 | #endif | ||
5884 | |||
5885 | #define SD_INIT_FUNC(type) \ | ||
5886 | static noinline struct sched_domain * \ | ||
5887 | sd_init_##type(struct sched_domain_topology_level *tl, int cpu) \ | ||
5888 | { \ | ||
5889 | struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu); \ | ||
5890 | *sd = SD_##type##_INIT; \ | ||
5891 | SD_INIT_NAME(sd, type); \ | ||
5892 | sd->private = &tl->data; \ | ||
5893 | return sd; \ | ||
5894 | } | ||
5895 | |||
5896 | SD_INIT_FUNC(CPU) | ||
5897 | #ifdef CONFIG_SCHED_SMT | ||
5898 | SD_INIT_FUNC(SIBLING) | ||
5899 | #endif | ||
5900 | #ifdef CONFIG_SCHED_MC | ||
5901 | SD_INIT_FUNC(MC) | ||
5902 | #endif | ||
5903 | #ifdef CONFIG_SCHED_BOOK | ||
5904 | SD_INIT_FUNC(BOOK) | ||
5905 | #endif | ||
5906 | |||
5907 | static int default_relax_domain_level = -1; | 5875 | static int default_relax_domain_level = -1; |
5908 | int sched_domain_level_max; | 5876 | int sched_domain_level_max; |
5909 | 5877 | ||
@@ -5991,99 +5959,154 @@ static void claim_allocations(int cpu, struct sched_domain *sd) | |||
5991 | *per_cpu_ptr(sdd->sgp, cpu) = NULL; | 5959 | *per_cpu_ptr(sdd->sgp, cpu) = NULL; |
5992 | } | 5960 | } |
5993 | 5961 | ||
5994 | #ifdef CONFIG_SCHED_SMT | ||
5995 | static const struct cpumask *cpu_smt_mask(int cpu) | ||
5996 | { | ||
5997 | return topology_thread_cpumask(cpu); | ||
5998 | } | ||
5999 | #endif | ||
6000 | |||
6001 | /* | ||
6002 | * Topology list, bottom-up. | ||
6003 | */ | ||
6004 | static struct sched_domain_topology_level default_topology[] = { | ||
6005 | #ifdef CONFIG_SCHED_SMT | ||
6006 | { sd_init_SIBLING, cpu_smt_mask, }, | ||
6007 | #endif | ||
6008 | #ifdef CONFIG_SCHED_MC | ||
6009 | { sd_init_MC, cpu_coregroup_mask, }, | ||
6010 | #endif | ||
6011 | #ifdef CONFIG_SCHED_BOOK | ||
6012 | { sd_init_BOOK, cpu_book_mask, }, | ||
6013 | #endif | ||
6014 | { sd_init_CPU, cpu_cpu_mask, }, | ||
6015 | { NULL, }, | ||
6016 | }; | ||
6017 | |||
6018 | static struct sched_domain_topology_level *sched_domain_topology = default_topology; | ||
6019 | |||
6020 | #define for_each_sd_topology(tl) \ | ||
6021 | for (tl = sched_domain_topology; tl->init; tl++) | ||
6022 | |||
6023 | #ifdef CONFIG_NUMA | 5962 | #ifdef CONFIG_NUMA |
6024 | |||
6025 | static int sched_domains_numa_levels; | 5963 | static int sched_domains_numa_levels; |
6026 | static int *sched_domains_numa_distance; | 5964 | static int *sched_domains_numa_distance; |
6027 | static struct cpumask ***sched_domains_numa_masks; | 5965 | static struct cpumask ***sched_domains_numa_masks; |
6028 | static int sched_domains_curr_level; | 5966 | static int sched_domains_curr_level; |
5967 | #endif | ||
6029 | 5968 | ||
6030 | static inline int sd_local_flags(int level) | 5969 | /* |
6031 | { | 5970 | * SD_flags allowed in topology descriptions. |
6032 | if (sched_domains_numa_distance[level] > RECLAIM_DISTANCE) | 5971 | * |
6033 | return 0; | 5972 | * SD_SHARE_CPUPOWER - describes SMT topologies |
6034 | 5973 | * SD_SHARE_PKG_RESOURCES - describes shared caches | |
6035 | return SD_BALANCE_EXEC | SD_BALANCE_FORK | SD_WAKE_AFFINE; | 5974 | * SD_NUMA - describes NUMA topologies |
6036 | } | 5975 | * SD_SHARE_POWERDOMAIN - describes shared power domain |
5976 | * | ||
5977 | * Odd one out: | ||
5978 | * SD_ASYM_PACKING - describes SMT quirks | ||
5979 | */ | ||
5980 | #define TOPOLOGY_SD_FLAGS \ | ||
5981 | (SD_SHARE_CPUPOWER | \ | ||
5982 | SD_SHARE_PKG_RESOURCES | \ | ||
5983 | SD_NUMA | \ | ||
5984 | SD_ASYM_PACKING | \ | ||
5985 | SD_SHARE_POWERDOMAIN) | ||
6037 | 5986 | ||
6038 | static struct sched_domain * | 5987 | static struct sched_domain * |
6039 | sd_numa_init(struct sched_domain_topology_level *tl, int cpu) | 5988 | sd_init(struct sched_domain_topology_level *tl, int cpu) |
6040 | { | 5989 | { |
6041 | struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu); | 5990 | struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu); |
6042 | int level = tl->numa_level; | 5991 | int sd_weight, sd_flags = 0; |
6043 | int sd_weight = cpumask_weight( | 5992 | |
6044 | sched_domains_numa_masks[level][cpu_to_node(cpu)]); | 5993 | #ifdef CONFIG_NUMA |
5994 | /* | ||
5995 | * Ugly hack to pass state to sd_numa_mask()... | ||
5996 | */ | ||
5997 | sched_domains_curr_level = tl->numa_level; | ||
5998 | #endif | ||
5999 | |||
6000 | sd_weight = cpumask_weight(tl->mask(cpu)); | ||
6001 | |||
6002 | if (tl->sd_flags) | ||
6003 | sd_flags = (*tl->sd_flags)(); | ||
6004 | if (WARN_ONCE(sd_flags & ~TOPOLOGY_SD_FLAGS, | ||
6005 | "wrong sd_flags in topology description\n")) | ||
6006 | sd_flags &= ~TOPOLOGY_SD_FLAGS; | ||
6045 | 6007 | ||
6046 | *sd = (struct sched_domain){ | 6008 | *sd = (struct sched_domain){ |
6047 | .min_interval = sd_weight, | 6009 | .min_interval = sd_weight, |
6048 | .max_interval = 2*sd_weight, | 6010 | .max_interval = 2*sd_weight, |
6049 | .busy_factor = 32, | 6011 | .busy_factor = 32, |
6050 | .imbalance_pct = 125, | 6012 | .imbalance_pct = 125, |
6051 | .cache_nice_tries = 2, | 6013 | |
6052 | .busy_idx = 3, | 6014 | .cache_nice_tries = 0, |
6053 | .idle_idx = 2, | 6015 | .busy_idx = 0, |
6016 | .idle_idx = 0, | ||
6054 | .newidle_idx = 0, | 6017 | .newidle_idx = 0, |
6055 | .wake_idx = 0, | 6018 | .wake_idx = 0, |
6056 | .forkexec_idx = 0, | 6019 | .forkexec_idx = 0, |
6057 | 6020 | ||
6058 | .flags = 1*SD_LOAD_BALANCE | 6021 | .flags = 1*SD_LOAD_BALANCE |
6059 | | 1*SD_BALANCE_NEWIDLE | 6022 | | 1*SD_BALANCE_NEWIDLE |
6060 | | 0*SD_BALANCE_EXEC | 6023 | | 1*SD_BALANCE_EXEC |
6061 | | 0*SD_BALANCE_FORK | 6024 | | 1*SD_BALANCE_FORK |
6062 | | 0*SD_BALANCE_WAKE | 6025 | | 0*SD_BALANCE_WAKE |
6063 | | 0*SD_WAKE_AFFINE | 6026 | | 1*SD_WAKE_AFFINE |
6064 | | 0*SD_SHARE_CPUPOWER | 6027 | | 0*SD_SHARE_CPUPOWER |
6065 | | 0*SD_SHARE_PKG_RESOURCES | 6028 | | 0*SD_SHARE_PKG_RESOURCES |
6066 | | 1*SD_SERIALIZE | 6029 | | 0*SD_SERIALIZE |
6067 | | 0*SD_PREFER_SIBLING | 6030 | | 0*SD_PREFER_SIBLING |
6068 | | 1*SD_NUMA | 6031 | | 0*SD_NUMA |
6069 | | sd_local_flags(level) | 6032 | | sd_flags |
6070 | , | 6033 | , |
6034 | |||
6071 | .last_balance = jiffies, | 6035 | .last_balance = jiffies, |
6072 | .balance_interval = sd_weight, | 6036 | .balance_interval = sd_weight, |
6037 | .smt_gain = 0, | ||
6073 | .max_newidle_lb_cost = 0, | 6038 | .max_newidle_lb_cost = 0, |
6074 | .next_decay_max_lb_cost = jiffies, | 6039 | .next_decay_max_lb_cost = jiffies, |
6040 | #ifdef CONFIG_SCHED_DEBUG | ||
6041 | .name = tl->name, | ||
6042 | #endif | ||
6075 | }; | 6043 | }; |
6076 | SD_INIT_NAME(sd, NUMA); | ||
6077 | sd->private = &tl->data; | ||
6078 | 6044 | ||
6079 | /* | 6045 | /* |
6080 | * Ugly hack to pass state to sd_numa_mask()... | 6046 | * Convert topological properties into behaviour. |
6081 | */ | 6047 | */ |
6082 | sched_domains_curr_level = tl->numa_level; | 6048 | |
6049 | if (sd->flags & SD_SHARE_CPUPOWER) { | ||
6050 | sd->imbalance_pct = 110; | ||
6051 | sd->smt_gain = 1178; /* ~15% */ | ||
6052 | |||
6053 | } else if (sd->flags & SD_SHARE_PKG_RESOURCES) { | ||
6054 | sd->imbalance_pct = 117; | ||
6055 | sd->cache_nice_tries = 1; | ||
6056 | sd->busy_idx = 2; | ||
6057 | |||
6058 | #ifdef CONFIG_NUMA | ||
6059 | } else if (sd->flags & SD_NUMA) { | ||
6060 | sd->cache_nice_tries = 2; | ||
6061 | sd->busy_idx = 3; | ||
6062 | sd->idle_idx = 2; | ||
6063 | |||
6064 | sd->flags |= SD_SERIALIZE; | ||
6065 | if (sched_domains_numa_distance[tl->numa_level] > RECLAIM_DISTANCE) { | ||
6066 | sd->flags &= ~(SD_BALANCE_EXEC | | ||
6067 | SD_BALANCE_FORK | | ||
6068 | SD_WAKE_AFFINE); | ||
6069 | } | ||
6070 | |||
6071 | #endif | ||
6072 | } else { | ||
6073 | sd->flags |= SD_PREFER_SIBLING; | ||
6074 | sd->cache_nice_tries = 1; | ||
6075 | sd->busy_idx = 2; | ||
6076 | sd->idle_idx = 1; | ||
6077 | } | ||
6078 | |||
6079 | sd->private = &tl->data; | ||
6083 | 6080 | ||
6084 | return sd; | 6081 | return sd; |
6085 | } | 6082 | } |
6086 | 6083 | ||
6084 | /* | ||
6085 | * Topology list, bottom-up. | ||
6086 | */ | ||
6087 | static struct sched_domain_topology_level default_topology[] = { | ||
6088 | #ifdef CONFIG_SCHED_SMT | ||
6089 | { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, | ||
6090 | #endif | ||
6091 | #ifdef CONFIG_SCHED_MC | ||
6092 | { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, | ||
6093 | #endif | ||
6094 | { cpu_cpu_mask, SD_INIT_NAME(DIE) }, | ||
6095 | { NULL, }, | ||
6096 | }; | ||
6097 | |||
6098 | struct sched_domain_topology_level *sched_domain_topology = default_topology; | ||
6099 | |||
6100 | #define for_each_sd_topology(tl) \ | ||
6101 | for (tl = sched_domain_topology; tl->mask; tl++) | ||
6102 | |||
6103 | void set_sched_topology(struct sched_domain_topology_level *tl) | ||
6104 | { | ||
6105 | sched_domain_topology = tl; | ||
6106 | } | ||
6107 | |||
6108 | #ifdef CONFIG_NUMA | ||
6109 | |||
6087 | static const struct cpumask *sd_numa_mask(int cpu) | 6110 | static const struct cpumask *sd_numa_mask(int cpu) |
6088 | { | 6111 | { |
6089 | return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)]; | 6112 | return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)]; |
@@ -6227,7 +6250,10 @@ static void sched_init_numa(void) | |||
6227 | } | 6250 | } |
6228 | } | 6251 | } |
6229 | 6252 | ||
6230 | tl = kzalloc((ARRAY_SIZE(default_topology) + level) * | 6253 | /* Compute default topology size */ |
6254 | for (i = 0; sched_domain_topology[i].mask; i++); | ||
6255 | |||
6256 | tl = kzalloc((i + level + 1) * | ||
6231 | sizeof(struct sched_domain_topology_level), GFP_KERNEL); | 6257 | sizeof(struct sched_domain_topology_level), GFP_KERNEL); |
6232 | if (!tl) | 6258 | if (!tl) |
6233 | return; | 6259 | return; |
@@ -6235,18 +6261,19 @@ static void sched_init_numa(void) | |||
6235 | /* | 6261 | /* |
6236 | * Copy the default topology bits.. | 6262 | * Copy the default topology bits.. |
6237 | */ | 6263 | */ |
6238 | for (i = 0; default_topology[i].init; i++) | 6264 | for (i = 0; sched_domain_topology[i].mask; i++) |
6239 | tl[i] = default_topology[i]; | 6265 | tl[i] = sched_domain_topology[i]; |
6240 | 6266 | ||
6241 | /* | 6267 | /* |
6242 | * .. and append 'j' levels of NUMA goodness. | 6268 | * .. and append 'j' levels of NUMA goodness. |
6243 | */ | 6269 | */ |
6244 | for (j = 0; j < level; i++, j++) { | 6270 | for (j = 0; j < level; i++, j++) { |
6245 | tl[i] = (struct sched_domain_topology_level){ | 6271 | tl[i] = (struct sched_domain_topology_level){ |
6246 | .init = sd_numa_init, | ||
6247 | .mask = sd_numa_mask, | 6272 | .mask = sd_numa_mask, |
6273 | .sd_flags = cpu_numa_flags, | ||
6248 | .flags = SDTL_OVERLAP, | 6274 | .flags = SDTL_OVERLAP, |
6249 | .numa_level = j, | 6275 | .numa_level = j, |
6276 | SD_INIT_NAME(NUMA) | ||
6250 | }; | 6277 | }; |
6251 | } | 6278 | } |
6252 | 6279 | ||
@@ -6404,7 +6431,7 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl, | |||
6404 | const struct cpumask *cpu_map, struct sched_domain_attr *attr, | 6431 | const struct cpumask *cpu_map, struct sched_domain_attr *attr, |
6405 | struct sched_domain *child, int cpu) | 6432 | struct sched_domain *child, int cpu) |
6406 | { | 6433 | { |
6407 | struct sched_domain *sd = tl->init(tl, cpu); | 6434 | struct sched_domain *sd = sd_init(tl, cpu); |
6408 | if (!sd) | 6435 | if (!sd) |
6409 | return child; | 6436 | return child; |
6410 | 6437 | ||
@@ -6974,6 +7001,7 @@ void __init sched_init(void) | |||
6974 | if (cpu_isolated_map == NULL) | 7001 | if (cpu_isolated_map == NULL) |
6975 | zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); | 7002 | zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); |
6976 | idle_thread_set_boot_cpu(); | 7003 | idle_thread_set_boot_cpu(); |
7004 | set_cpu_rq_start_time(); | ||
6977 | #endif | 7005 | #endif |
6978 | init_sched_fair_class(); | 7006 | init_sched_fair_class(); |
6979 | 7007 | ||
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 800e99b99075..f9ca7d19781a 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c | |||
@@ -520,7 +520,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) | |||
520 | * We need to take care of a possible races here. In fact, the | 520 | * We need to take care of a possible races here. In fact, the |
521 | * task might have changed its scheduling policy to something | 521 | * task might have changed its scheduling policy to something |
522 | * different from SCHED_DEADLINE or changed its reservation | 522 | * different from SCHED_DEADLINE or changed its reservation |
523 | * parameters (through sched_setscheduler()). | 523 | * parameters (through sched_setattr()). |
524 | */ | 524 | */ |
525 | if (!dl_task(p) || dl_se->dl_new) | 525 | if (!dl_task(p) || dl_se->dl_new) |
526 | goto unlock; | 526 | goto unlock; |
@@ -741,7 +741,7 @@ void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | |||
741 | 741 | ||
742 | WARN_ON(!dl_prio(prio)); | 742 | WARN_ON(!dl_prio(prio)); |
743 | dl_rq->dl_nr_running++; | 743 | dl_rq->dl_nr_running++; |
744 | inc_nr_running(rq_of_dl_rq(dl_rq)); | 744 | add_nr_running(rq_of_dl_rq(dl_rq), 1); |
745 | 745 | ||
746 | inc_dl_deadline(dl_rq, deadline); | 746 | inc_dl_deadline(dl_rq, deadline); |
747 | inc_dl_migration(dl_se, dl_rq); | 747 | inc_dl_migration(dl_se, dl_rq); |
@@ -755,7 +755,7 @@ void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | |||
755 | WARN_ON(!dl_prio(prio)); | 755 | WARN_ON(!dl_prio(prio)); |
756 | WARN_ON(!dl_rq->dl_nr_running); | 756 | WARN_ON(!dl_rq->dl_nr_running); |
757 | dl_rq->dl_nr_running--; | 757 | dl_rq->dl_nr_running--; |
758 | dec_nr_running(rq_of_dl_rq(dl_rq)); | 758 | sub_nr_running(rq_of_dl_rq(dl_rq), 1); |
759 | 759 | ||
760 | dec_dl_deadline(dl_rq, dl_se->deadline); | 760 | dec_dl_deadline(dl_rq, dl_se->deadline); |
761 | dec_dl_migration(dl_se, dl_rq); | 761 | dec_dl_migration(dl_se, dl_rq); |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 0fdb96de81a5..c9617b73bcc0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -1095,6 +1095,34 @@ static void task_numa_assign(struct task_numa_env *env, | |||
1095 | env->best_cpu = env->dst_cpu; | 1095 | env->best_cpu = env->dst_cpu; |
1096 | } | 1096 | } |
1097 | 1097 | ||
1098 | static bool load_too_imbalanced(long orig_src_load, long orig_dst_load, | ||
1099 | long src_load, long dst_load, | ||
1100 | struct task_numa_env *env) | ||
1101 | { | ||
1102 | long imb, old_imb; | ||
1103 | |||
1104 | /* We care about the slope of the imbalance, not the direction. */ | ||
1105 | if (dst_load < src_load) | ||
1106 | swap(dst_load, src_load); | ||
1107 | |||
1108 | /* Is the difference below the threshold? */ | ||
1109 | imb = dst_load * 100 - src_load * env->imbalance_pct; | ||
1110 | if (imb <= 0) | ||
1111 | return false; | ||
1112 | |||
1113 | /* | ||
1114 | * The imbalance is above the allowed threshold. | ||
1115 | * Compare it with the old imbalance. | ||
1116 | */ | ||
1117 | if (orig_dst_load < orig_src_load) | ||
1118 | swap(orig_dst_load, orig_src_load); | ||
1119 | |||
1120 | old_imb = orig_dst_load * 100 - orig_src_load * env->imbalance_pct; | ||
1121 | |||
1122 | /* Would this change make things worse? */ | ||
1123 | return (old_imb > imb); | ||
1124 | } | ||
1125 | |||
1098 | /* | 1126 | /* |
1099 | * This checks if the overall compute and NUMA accesses of the system would | 1127 | * This checks if the overall compute and NUMA accesses of the system would |
1100 | * be improved if the source tasks was migrated to the target dst_cpu taking | 1128 | * be improved if the source tasks was migrated to the target dst_cpu taking |
@@ -1107,7 +1135,8 @@ static void task_numa_compare(struct task_numa_env *env, | |||
1107 | struct rq *src_rq = cpu_rq(env->src_cpu); | 1135 | struct rq *src_rq = cpu_rq(env->src_cpu); |
1108 | struct rq *dst_rq = cpu_rq(env->dst_cpu); | 1136 | struct rq *dst_rq = cpu_rq(env->dst_cpu); |
1109 | struct task_struct *cur; | 1137 | struct task_struct *cur; |
1110 | long dst_load, src_load; | 1138 | long orig_src_load, src_load; |
1139 | long orig_dst_load, dst_load; | ||
1111 | long load; | 1140 | long load; |
1112 | long imp = (groupimp > 0) ? groupimp : taskimp; | 1141 | long imp = (groupimp > 0) ? groupimp : taskimp; |
1113 | 1142 | ||
@@ -1181,13 +1210,13 @@ static void task_numa_compare(struct task_numa_env *env, | |||
1181 | * In the overloaded case, try and keep the load balanced. | 1210 | * In the overloaded case, try and keep the load balanced. |
1182 | */ | 1211 | */ |
1183 | balance: | 1212 | balance: |
1184 | dst_load = env->dst_stats.load; | 1213 | orig_dst_load = env->dst_stats.load; |
1185 | src_load = env->src_stats.load; | 1214 | orig_src_load = env->src_stats.load; |
1186 | 1215 | ||
1187 | /* XXX missing power terms */ | 1216 | /* XXX missing power terms */ |
1188 | load = task_h_load(env->p); | 1217 | load = task_h_load(env->p); |
1189 | dst_load += load; | 1218 | dst_load = orig_dst_load + load; |
1190 | src_load -= load; | 1219 | src_load = orig_src_load - load; |
1191 | 1220 | ||
1192 | if (cur) { | 1221 | if (cur) { |
1193 | load = task_h_load(cur); | 1222 | load = task_h_load(cur); |
@@ -1195,11 +1224,8 @@ balance: | |||
1195 | src_load += load; | 1224 | src_load += load; |
1196 | } | 1225 | } |
1197 | 1226 | ||
1198 | /* make src_load the smaller */ | 1227 | if (load_too_imbalanced(orig_src_load, orig_dst_load, |
1199 | if (dst_load < src_load) | 1228 | src_load, dst_load, env)) |
1200 | swap(dst_load, src_load); | ||
1201 | |||
1202 | if (src_load * env->imbalance_pct < dst_load * 100) | ||
1203 | goto unlock; | 1229 | goto unlock; |
1204 | 1230 | ||
1205 | assign: | 1231 | assign: |
@@ -1301,7 +1327,16 @@ static int task_numa_migrate(struct task_struct *p) | |||
1301 | if (env.best_cpu == -1) | 1327 | if (env.best_cpu == -1) |
1302 | return -EAGAIN; | 1328 | return -EAGAIN; |
1303 | 1329 | ||
1304 | sched_setnuma(p, env.dst_nid); | 1330 | /* |
1331 | * If the task is part of a workload that spans multiple NUMA nodes, | ||
1332 | * and is migrating into one of the workload's active nodes, remember | ||
1333 | * this node as the task's preferred numa node, so the workload can | ||
1334 | * settle down. | ||
1335 | * A task that migrated to a second choice node will be better off | ||
1336 | * trying for a better one later. Do not set the preferred node here. | ||
1337 | */ | ||
1338 | if (p->numa_group && node_isset(env.dst_nid, p->numa_group->active_nodes)) | ||
1339 | sched_setnuma(p, env.dst_nid); | ||
1305 | 1340 | ||
1306 | /* | 1341 | /* |
1307 | * Reset the scan period if the task is being rescheduled on an | 1342 | * Reset the scan period if the task is being rescheduled on an |
@@ -1326,12 +1361,15 @@ static int task_numa_migrate(struct task_struct *p) | |||
1326 | /* Attempt to migrate a task to a CPU on the preferred node. */ | 1361 | /* Attempt to migrate a task to a CPU on the preferred node. */ |
1327 | static void numa_migrate_preferred(struct task_struct *p) | 1362 | static void numa_migrate_preferred(struct task_struct *p) |
1328 | { | 1363 | { |
1364 | unsigned long interval = HZ; | ||
1365 | |||
1329 | /* This task has no NUMA fault statistics yet */ | 1366 | /* This task has no NUMA fault statistics yet */ |
1330 | if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults_memory)) | 1367 | if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults_memory)) |
1331 | return; | 1368 | return; |
1332 | 1369 | ||
1333 | /* Periodically retry migrating the task to the preferred node */ | 1370 | /* Periodically retry migrating the task to the preferred node */ |
1334 | p->numa_migrate_retry = jiffies + HZ; | 1371 | interval = min(interval, msecs_to_jiffies(p->numa_scan_period) / 16); |
1372 | p->numa_migrate_retry = jiffies + interval; | ||
1335 | 1373 | ||
1336 | /* Success if task is already running on preferred CPU */ | 1374 | /* Success if task is already running on preferred CPU */ |
1337 | if (task_node(p) == p->numa_preferred_nid) | 1375 | if (task_node(p) == p->numa_preferred_nid) |
@@ -1738,6 +1776,7 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags) | |||
1738 | struct task_struct *p = current; | 1776 | struct task_struct *p = current; |
1739 | bool migrated = flags & TNF_MIGRATED; | 1777 | bool migrated = flags & TNF_MIGRATED; |
1740 | int cpu_node = task_node(current); | 1778 | int cpu_node = task_node(current); |
1779 | int local = !!(flags & TNF_FAULT_LOCAL); | ||
1741 | int priv; | 1780 | int priv; |
1742 | 1781 | ||
1743 | if (!numabalancing_enabled) | 1782 | if (!numabalancing_enabled) |
@@ -1786,6 +1825,17 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags) | |||
1786 | task_numa_group(p, last_cpupid, flags, &priv); | 1825 | task_numa_group(p, last_cpupid, flags, &priv); |
1787 | } | 1826 | } |
1788 | 1827 | ||
1828 | /* | ||
1829 | * If a workload spans multiple NUMA nodes, a shared fault that | ||
1830 | * occurs wholly within the set of nodes that the workload is | ||
1831 | * actively using should be counted as local. This allows the | ||
1832 | * scan rate to slow down when a workload has settled down. | ||
1833 | */ | ||
1834 | if (!priv && !local && p->numa_group && | ||
1835 | node_isset(cpu_node, p->numa_group->active_nodes) && | ||
1836 | node_isset(mem_node, p->numa_group->active_nodes)) | ||
1837 | local = 1; | ||
1838 | |||
1789 | task_numa_placement(p); | 1839 | task_numa_placement(p); |
1790 | 1840 | ||
1791 | /* | 1841 | /* |
@@ -1800,7 +1850,7 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags) | |||
1800 | 1850 | ||
1801 | p->numa_faults_buffer_memory[task_faults_idx(mem_node, priv)] += pages; | 1851 | p->numa_faults_buffer_memory[task_faults_idx(mem_node, priv)] += pages; |
1802 | p->numa_faults_buffer_cpu[task_faults_idx(cpu_node, priv)] += pages; | 1852 | p->numa_faults_buffer_cpu[task_faults_idx(cpu_node, priv)] += pages; |
1803 | p->numa_faults_locality[!!(flags & TNF_FAULT_LOCAL)] += pages; | 1853 | p->numa_faults_locality[local] += pages; |
1804 | } | 1854 | } |
1805 | 1855 | ||
1806 | static void reset_ptenuma_scan(struct task_struct *p) | 1856 | static void reset_ptenuma_scan(struct task_struct *p) |
@@ -3301,7 +3351,7 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq) | |||
3301 | } | 3351 | } |
3302 | 3352 | ||
3303 | if (!se) | 3353 | if (!se) |
3304 | rq->nr_running -= task_delta; | 3354 | sub_nr_running(rq, task_delta); |
3305 | 3355 | ||
3306 | cfs_rq->throttled = 1; | 3356 | cfs_rq->throttled = 1; |
3307 | cfs_rq->throttled_clock = rq_clock(rq); | 3357 | cfs_rq->throttled_clock = rq_clock(rq); |
@@ -3352,7 +3402,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) | |||
3352 | } | 3402 | } |
3353 | 3403 | ||
3354 | if (!se) | 3404 | if (!se) |
3355 | rq->nr_running += task_delta; | 3405 | add_nr_running(rq, task_delta); |
3356 | 3406 | ||
3357 | /* determine whether we need to wake up potentially idle cpu */ | 3407 | /* determine whether we need to wake up potentially idle cpu */ |
3358 | if (rq->curr == rq->idle && rq->cfs.nr_running) | 3408 | if (rq->curr == rq->idle && rq->cfs.nr_running) |
@@ -3884,7 +3934,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) | |||
3884 | 3934 | ||
3885 | if (!se) { | 3935 | if (!se) { |
3886 | update_rq_runnable_avg(rq, rq->nr_running); | 3936 | update_rq_runnable_avg(rq, rq->nr_running); |
3887 | inc_nr_running(rq); | 3937 | add_nr_running(rq, 1); |
3888 | } | 3938 | } |
3889 | hrtick_update(rq); | 3939 | hrtick_update(rq); |
3890 | } | 3940 | } |
@@ -3944,7 +3994,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) | |||
3944 | } | 3994 | } |
3945 | 3995 | ||
3946 | if (!se) { | 3996 | if (!se) { |
3947 | dec_nr_running(rq); | 3997 | sub_nr_running(rq, 1); |
3948 | update_rq_runnable_avg(rq, 1); | 3998 | update_rq_runnable_avg(rq, 1); |
3949 | } | 3999 | } |
3950 | hrtick_update(rq); | 4000 | hrtick_update(rq); |
@@ -4015,7 +4065,7 @@ static void record_wakee(struct task_struct *p) | |||
4015 | * about the loss. | 4065 | * about the loss. |
4016 | */ | 4066 | */ |
4017 | if (jiffies > current->wakee_flip_decay_ts + HZ) { | 4067 | if (jiffies > current->wakee_flip_decay_ts + HZ) { |
4018 | current->wakee_flips = 0; | 4068 | current->wakee_flips >>= 1; |
4019 | current->wakee_flip_decay_ts = jiffies; | 4069 | current->wakee_flip_decay_ts = jiffies; |
4020 | } | 4070 | } |
4021 | 4071 | ||
@@ -4449,10 +4499,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f | |||
4449 | sd = tmp; | 4499 | sd = tmp; |
4450 | } | 4500 | } |
4451 | 4501 | ||
4452 | if (affine_sd) { | 4502 | if (affine_sd && cpu != prev_cpu && wake_affine(affine_sd, p, sync)) |
4453 | if (cpu != prev_cpu && wake_affine(affine_sd, p, sync)) | 4503 | prev_cpu = cpu; |
4454 | prev_cpu = cpu; | ||
4455 | 4504 | ||
4505 | if (sd_flag & SD_BALANCE_WAKE) { | ||
4456 | new_cpu = select_idle_sibling(p, prev_cpu); | 4506 | new_cpu = select_idle_sibling(p, prev_cpu); |
4457 | goto unlock; | 4507 | goto unlock; |
4458 | } | 4508 | } |
@@ -4520,6 +4570,9 @@ migrate_task_rq_fair(struct task_struct *p, int next_cpu) | |||
4520 | atomic_long_add(se->avg.load_avg_contrib, | 4570 | atomic_long_add(se->avg.load_avg_contrib, |
4521 | &cfs_rq->removed_load); | 4571 | &cfs_rq->removed_load); |
4522 | } | 4572 | } |
4573 | |||
4574 | /* We have migrated, no longer consider this task hot */ | ||
4575 | se->exec_start = 0; | ||
4523 | } | 4576 | } |
4524 | #endif /* CONFIG_SMP */ | 4577 | #endif /* CONFIG_SMP */ |
4525 | 4578 | ||
@@ -5070,6 +5123,7 @@ task_hot(struct task_struct *p, u64 now) | |||
5070 | /* Returns true if the destination node has incurred more faults */ | 5123 | /* Returns true if the destination node has incurred more faults */ |
5071 | static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env) | 5124 | static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env) |
5072 | { | 5125 | { |
5126 | struct numa_group *numa_group = rcu_dereference(p->numa_group); | ||
5073 | int src_nid, dst_nid; | 5127 | int src_nid, dst_nid; |
5074 | 5128 | ||
5075 | if (!sched_feat(NUMA_FAVOUR_HIGHER) || !p->numa_faults_memory || | 5129 | if (!sched_feat(NUMA_FAVOUR_HIGHER) || !p->numa_faults_memory || |
@@ -5083,21 +5137,29 @@ static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env) | |||
5083 | if (src_nid == dst_nid) | 5137 | if (src_nid == dst_nid) |
5084 | return false; | 5138 | return false; |
5085 | 5139 | ||
5086 | /* Always encourage migration to the preferred node. */ | 5140 | if (numa_group) { |
5087 | if (dst_nid == p->numa_preferred_nid) | 5141 | /* Task is already in the group's interleave set. */ |
5088 | return true; | 5142 | if (node_isset(src_nid, numa_group->active_nodes)) |
5143 | return false; | ||
5144 | |||
5145 | /* Task is moving into the group's interleave set. */ | ||
5146 | if (node_isset(dst_nid, numa_group->active_nodes)) | ||
5147 | return true; | ||
5089 | 5148 | ||
5090 | /* If both task and group weight improve, this move is a winner. */ | 5149 | return group_faults(p, dst_nid) > group_faults(p, src_nid); |
5091 | if (task_weight(p, dst_nid) > task_weight(p, src_nid) && | 5150 | } |
5092 | group_weight(p, dst_nid) > group_weight(p, src_nid)) | 5151 | |
5152 | /* Encourage migration to the preferred node. */ | ||
5153 | if (dst_nid == p->numa_preferred_nid) | ||
5093 | return true; | 5154 | return true; |
5094 | 5155 | ||
5095 | return false; | 5156 | return task_faults(p, dst_nid) > task_faults(p, src_nid); |
5096 | } | 5157 | } |
5097 | 5158 | ||
5098 | 5159 | ||
5099 | static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env) | 5160 | static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env) |
5100 | { | 5161 | { |
5162 | struct numa_group *numa_group = rcu_dereference(p->numa_group); | ||
5101 | int src_nid, dst_nid; | 5163 | int src_nid, dst_nid; |
5102 | 5164 | ||
5103 | if (!sched_feat(NUMA) || !sched_feat(NUMA_RESIST_LOWER)) | 5165 | if (!sched_feat(NUMA) || !sched_feat(NUMA_RESIST_LOWER)) |
@@ -5112,16 +5174,23 @@ static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env) | |||
5112 | if (src_nid == dst_nid) | 5174 | if (src_nid == dst_nid) |
5113 | return false; | 5175 | return false; |
5114 | 5176 | ||
5177 | if (numa_group) { | ||
5178 | /* Task is moving within/into the group's interleave set. */ | ||
5179 | if (node_isset(dst_nid, numa_group->active_nodes)) | ||
5180 | return false; | ||
5181 | |||
5182 | /* Task is moving out of the group's interleave set. */ | ||
5183 | if (node_isset(src_nid, numa_group->active_nodes)) | ||
5184 | return true; | ||
5185 | |||
5186 | return group_faults(p, dst_nid) < group_faults(p, src_nid); | ||
5187 | } | ||
5188 | |||
5115 | /* Migrating away from the preferred node is always bad. */ | 5189 | /* Migrating away from the preferred node is always bad. */ |
5116 | if (src_nid == p->numa_preferred_nid) | 5190 | if (src_nid == p->numa_preferred_nid) |
5117 | return true; | 5191 | return true; |
5118 | 5192 | ||
5119 | /* If either task or group weight get worse, don't do it. */ | 5193 | return task_faults(p, dst_nid) < task_faults(p, src_nid); |
5120 | if (task_weight(p, dst_nid) < task_weight(p, src_nid) || | ||
5121 | group_weight(p, dst_nid) < group_weight(p, src_nid)) | ||
5122 | return true; | ||
5123 | |||
5124 | return false; | ||
5125 | } | 5194 | } |
5126 | 5195 | ||
5127 | #else | 5196 | #else |
@@ -5564,6 +5633,7 @@ static unsigned long scale_rt_power(int cpu) | |||
5564 | { | 5633 | { |
5565 | struct rq *rq = cpu_rq(cpu); | 5634 | struct rq *rq = cpu_rq(cpu); |
5566 | u64 total, available, age_stamp, avg; | 5635 | u64 total, available, age_stamp, avg; |
5636 | s64 delta; | ||
5567 | 5637 | ||
5568 | /* | 5638 | /* |
5569 | * Since we're reading these variables without serialization make sure | 5639 | * Since we're reading these variables without serialization make sure |
@@ -5572,7 +5642,11 @@ static unsigned long scale_rt_power(int cpu) | |||
5572 | age_stamp = ACCESS_ONCE(rq->age_stamp); | 5642 | age_stamp = ACCESS_ONCE(rq->age_stamp); |
5573 | avg = ACCESS_ONCE(rq->rt_avg); | 5643 | avg = ACCESS_ONCE(rq->rt_avg); |
5574 | 5644 | ||
5575 | total = sched_avg_period() + (rq_clock(rq) - age_stamp); | 5645 | delta = rq_clock(rq) - age_stamp; |
5646 | if (unlikely(delta < 0)) | ||
5647 | delta = 0; | ||
5648 | |||
5649 | total = sched_avg_period() + delta; | ||
5576 | 5650 | ||
5577 | if (unlikely(total < avg)) { | 5651 | if (unlikely(total < avg)) { |
5578 | /* Ensures that power won't end up being negative */ | 5652 | /* Ensures that power won't end up being negative */ |
@@ -6640,17 +6714,44 @@ out: | |||
6640 | return ld_moved; | 6714 | return ld_moved; |
6641 | } | 6715 | } |
6642 | 6716 | ||
6717 | static inline unsigned long | ||
6718 | get_sd_balance_interval(struct sched_domain *sd, int cpu_busy) | ||
6719 | { | ||
6720 | unsigned long interval = sd->balance_interval; | ||
6721 | |||
6722 | if (cpu_busy) | ||
6723 | interval *= sd->busy_factor; | ||
6724 | |||
6725 | /* scale ms to jiffies */ | ||
6726 | interval = msecs_to_jiffies(interval); | ||
6727 | interval = clamp(interval, 1UL, max_load_balance_interval); | ||
6728 | |||
6729 | return interval; | ||
6730 | } | ||
6731 | |||
6732 | static inline void | ||
6733 | update_next_balance(struct sched_domain *sd, int cpu_busy, unsigned long *next_balance) | ||
6734 | { | ||
6735 | unsigned long interval, next; | ||
6736 | |||
6737 | interval = get_sd_balance_interval(sd, cpu_busy); | ||
6738 | next = sd->last_balance + interval; | ||
6739 | |||
6740 | if (time_after(*next_balance, next)) | ||
6741 | *next_balance = next; | ||
6742 | } | ||
6743 | |||
6643 | /* | 6744 | /* |
6644 | * idle_balance is called by schedule() if this_cpu is about to become | 6745 | * idle_balance is called by schedule() if this_cpu is about to become |
6645 | * idle. Attempts to pull tasks from other CPUs. | 6746 | * idle. Attempts to pull tasks from other CPUs. |
6646 | */ | 6747 | */ |
6647 | static int idle_balance(struct rq *this_rq) | 6748 | static int idle_balance(struct rq *this_rq) |
6648 | { | 6749 | { |
6750 | unsigned long next_balance = jiffies + HZ; | ||
6751 | int this_cpu = this_rq->cpu; | ||
6649 | struct sched_domain *sd; | 6752 | struct sched_domain *sd; |
6650 | int pulled_task = 0; | 6753 | int pulled_task = 0; |
6651 | unsigned long next_balance = jiffies + HZ; | ||
6652 | u64 curr_cost = 0; | 6754 | u64 curr_cost = 0; |
6653 | int this_cpu = this_rq->cpu; | ||
6654 | 6755 | ||
6655 | idle_enter_fair(this_rq); | 6756 | idle_enter_fair(this_rq); |
6656 | 6757 | ||
@@ -6660,8 +6761,15 @@ static int idle_balance(struct rq *this_rq) | |||
6660 | */ | 6761 | */ |
6661 | this_rq->idle_stamp = rq_clock(this_rq); | 6762 | this_rq->idle_stamp = rq_clock(this_rq); |
6662 | 6763 | ||
6663 | if (this_rq->avg_idle < sysctl_sched_migration_cost) | 6764 | if (this_rq->avg_idle < sysctl_sched_migration_cost) { |
6765 | rcu_read_lock(); | ||
6766 | sd = rcu_dereference_check_sched_domain(this_rq->sd); | ||
6767 | if (sd) | ||
6768 | update_next_balance(sd, 0, &next_balance); | ||
6769 | rcu_read_unlock(); | ||
6770 | |||
6664 | goto out; | 6771 | goto out; |
6772 | } | ||
6665 | 6773 | ||
6666 | /* | 6774 | /* |
6667 | * Drop the rq->lock, but keep IRQ/preempt disabled. | 6775 | * Drop the rq->lock, but keep IRQ/preempt disabled. |
@@ -6671,20 +6779,20 @@ static int idle_balance(struct rq *this_rq) | |||
6671 | update_blocked_averages(this_cpu); | 6779 | update_blocked_averages(this_cpu); |
6672 | rcu_read_lock(); | 6780 | rcu_read_lock(); |
6673 | for_each_domain(this_cpu, sd) { | 6781 | for_each_domain(this_cpu, sd) { |
6674 | unsigned long interval; | ||
6675 | int continue_balancing = 1; | 6782 | int continue_balancing = 1; |
6676 | u64 t0, domain_cost; | 6783 | u64 t0, domain_cost; |
6677 | 6784 | ||
6678 | if (!(sd->flags & SD_LOAD_BALANCE)) | 6785 | if (!(sd->flags & SD_LOAD_BALANCE)) |
6679 | continue; | 6786 | continue; |
6680 | 6787 | ||
6681 | if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) | 6788 | if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) { |
6789 | update_next_balance(sd, 0, &next_balance); | ||
6682 | break; | 6790 | break; |
6791 | } | ||
6683 | 6792 | ||
6684 | if (sd->flags & SD_BALANCE_NEWIDLE) { | 6793 | if (sd->flags & SD_BALANCE_NEWIDLE) { |
6685 | t0 = sched_clock_cpu(this_cpu); | 6794 | t0 = sched_clock_cpu(this_cpu); |
6686 | 6795 | ||
6687 | /* If we've pulled tasks over stop searching: */ | ||
6688 | pulled_task = load_balance(this_cpu, this_rq, | 6796 | pulled_task = load_balance(this_cpu, this_rq, |
6689 | sd, CPU_NEWLY_IDLE, | 6797 | sd, CPU_NEWLY_IDLE, |
6690 | &continue_balancing); | 6798 | &continue_balancing); |
@@ -6696,10 +6804,13 @@ static int idle_balance(struct rq *this_rq) | |||
6696 | curr_cost += domain_cost; | 6804 | curr_cost += domain_cost; |
6697 | } | 6805 | } |
6698 | 6806 | ||
6699 | interval = msecs_to_jiffies(sd->balance_interval); | 6807 | update_next_balance(sd, 0, &next_balance); |
6700 | if (time_after(next_balance, sd->last_balance + interval)) | 6808 | |
6701 | next_balance = sd->last_balance + interval; | 6809 | /* |
6702 | if (pulled_task) | 6810 | * Stop searching for tasks to pull if there are |
6811 | * now runnable tasks on this rq. | ||
6812 | */ | ||
6813 | if (pulled_task || this_rq->nr_running > 0) | ||
6703 | break; | 6814 | break; |
6704 | } | 6815 | } |
6705 | rcu_read_unlock(); | 6816 | rcu_read_unlock(); |
@@ -6717,20 +6828,13 @@ static int idle_balance(struct rq *this_rq) | |||
6717 | if (this_rq->cfs.h_nr_running && !pulled_task) | 6828 | if (this_rq->cfs.h_nr_running && !pulled_task) |
6718 | pulled_task = 1; | 6829 | pulled_task = 1; |
6719 | 6830 | ||
6720 | if (pulled_task || time_after(jiffies, this_rq->next_balance)) { | 6831 | out: |
6721 | /* | 6832 | /* Move the next balance forward */ |
6722 | * We are going idle. next_balance may be set based on | 6833 | if (time_after(this_rq->next_balance, next_balance)) |
6723 | * a busy processor. So reset next_balance. | ||
6724 | */ | ||
6725 | this_rq->next_balance = next_balance; | 6834 | this_rq->next_balance = next_balance; |
6726 | } | ||
6727 | 6835 | ||
6728 | out: | ||
6729 | /* Is there a task of a high priority class? */ | 6836 | /* Is there a task of a high priority class? */ |
6730 | if (this_rq->nr_running != this_rq->cfs.h_nr_running && | 6837 | if (this_rq->nr_running != this_rq->cfs.h_nr_running) |
6731 | ((this_rq->stop && this_rq->stop->on_rq) || | ||
6732 | this_rq->dl.dl_nr_running || | ||
6733 | (this_rq->rt.rt_nr_running && !rt_rq_throttled(&this_rq->rt)))) | ||
6734 | pulled_task = -1; | 6838 | pulled_task = -1; |
6735 | 6839 | ||
6736 | if (pulled_task) { | 6840 | if (pulled_task) { |
@@ -7011,16 +7115,9 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle) | |||
7011 | break; | 7115 | break; |
7012 | } | 7116 | } |
7013 | 7117 | ||
7014 | interval = sd->balance_interval; | 7118 | interval = get_sd_balance_interval(sd, idle != CPU_IDLE); |
7015 | if (idle != CPU_IDLE) | ||
7016 | interval *= sd->busy_factor; | ||
7017 | |||
7018 | /* scale ms to jiffies */ | ||
7019 | interval = msecs_to_jiffies(interval); | ||
7020 | interval = clamp(interval, 1UL, max_load_balance_interval); | ||
7021 | 7119 | ||
7022 | need_serialize = sd->flags & SD_SERIALIZE; | 7120 | need_serialize = sd->flags & SD_SERIALIZE; |
7023 | |||
7024 | if (need_serialize) { | 7121 | if (need_serialize) { |
7025 | if (!spin_trylock(&balancing)) | 7122 | if (!spin_trylock(&balancing)) |
7026 | goto out; | 7123 | goto out; |
@@ -7036,6 +7133,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle) | |||
7036 | idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE; | 7133 | idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE; |
7037 | } | 7134 | } |
7038 | sd->last_balance = jiffies; | 7135 | sd->last_balance = jiffies; |
7136 | interval = get_sd_balance_interval(sd, idle != CPU_IDLE); | ||
7039 | } | 7137 | } |
7040 | if (need_serialize) | 7138 | if (need_serialize) |
7041 | spin_unlock(&balancing); | 7139 | spin_unlock(&balancing); |
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 8f4390a079c7..25b9423abce9 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c | |||
@@ -67,24 +67,21 @@ void __weak arch_cpu_idle(void) | |||
67 | * cpuidle_idle_call - the main idle function | 67 | * cpuidle_idle_call - the main idle function |
68 | * | 68 | * |
69 | * NOTE: no locks or semaphores should be used here | 69 | * NOTE: no locks or semaphores should be used here |
70 | * return non-zero on failure | ||
71 | */ | 70 | */ |
72 | static int cpuidle_idle_call(void) | 71 | static void cpuidle_idle_call(void) |
73 | { | 72 | { |
74 | struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); | 73 | struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); |
75 | struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); | 74 | struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); |
76 | int next_state, entered_state, ret; | 75 | int next_state, entered_state; |
77 | bool broadcast; | 76 | bool broadcast; |
78 | 77 | ||
79 | /* | 78 | /* |
80 | * Check if the idle task must be rescheduled. If it is the | 79 | * Check if the idle task must be rescheduled. If it is the |
81 | * case, exit the function after re-enabling the local irq and | 80 | * case, exit the function after re-enabling the local irq. |
82 | * set again the polling flag | ||
83 | */ | 81 | */ |
84 | if (current_clr_polling_and_test()) { | 82 | if (need_resched()) { |
85 | local_irq_enable(); | 83 | local_irq_enable(); |
86 | __current_set_polling(); | 84 | return; |
87 | return 0; | ||
88 | } | 85 | } |
89 | 86 | ||
90 | /* | 87 | /* |
@@ -101,96 +98,79 @@ static int cpuidle_idle_call(void) | |||
101 | rcu_idle_enter(); | 98 | rcu_idle_enter(); |
102 | 99 | ||
103 | /* | 100 | /* |
104 | * Check if the cpuidle framework is ready, otherwise fallback | 101 | * Ask the cpuidle framework to choose a convenient idle state. |
105 | * to the default arch specific idle method | 102 | * Fall back to the default arch idle method on errors. |
106 | */ | 103 | */ |
107 | ret = cpuidle_enabled(drv, dev); | 104 | next_state = cpuidle_select(drv, dev); |
108 | 105 | if (next_state < 0) { | |
109 | if (!ret) { | 106 | use_default: |
110 | /* | 107 | /* |
111 | * Ask the governor to choose an idle state it thinks | 108 | * We can't use the cpuidle framework, let's use the default |
112 | * it is convenient to go to. There is *always* a | 109 | * idle routine. |
113 | * convenient idle state | ||
114 | */ | 110 | */ |
115 | next_state = cpuidle_select(drv, dev); | 111 | if (current_clr_polling_and_test()) |
116 | |||
117 | /* | ||
118 | * The idle task must be scheduled, it is pointless to | ||
119 | * go to idle, just update no idle residency and get | ||
120 | * out of this function | ||
121 | */ | ||
122 | if (current_clr_polling_and_test()) { | ||
123 | dev->last_residency = 0; | ||
124 | entered_state = next_state; | ||
125 | local_irq_enable(); | 112 | local_irq_enable(); |
126 | } else { | 113 | else |
127 | broadcast = !!(drv->states[next_state].flags & | 114 | arch_cpu_idle(); |
128 | CPUIDLE_FLAG_TIMER_STOP); | 115 | |
129 | 116 | goto exit_idle; | |
130 | if (broadcast) | ||
131 | /* | ||
132 | * Tell the time framework to switch | ||
133 | * to a broadcast timer because our | ||
134 | * local timer will be shutdown. If a | ||
135 | * local timer is used from another | ||
136 | * cpu as a broadcast timer, this call | ||
137 | * may fail if it is not available | ||
138 | */ | ||
139 | ret = clockevents_notify( | ||
140 | CLOCK_EVT_NOTIFY_BROADCAST_ENTER, | ||
141 | &dev->cpu); | ||
142 | |||
143 | if (!ret) { | ||
144 | trace_cpu_idle_rcuidle(next_state, dev->cpu); | ||
145 | |||
146 | /* | ||
147 | * Enter the idle state previously | ||
148 | * returned by the governor | ||
149 | * decision. This function will block | ||
150 | * until an interrupt occurs and will | ||
151 | * take care of re-enabling the local | ||
152 | * interrupts | ||
153 | */ | ||
154 | entered_state = cpuidle_enter(drv, dev, | ||
155 | next_state); | ||
156 | |||
157 | trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, | ||
158 | dev->cpu); | ||
159 | |||
160 | if (broadcast) | ||
161 | clockevents_notify( | ||
162 | CLOCK_EVT_NOTIFY_BROADCAST_EXIT, | ||
163 | &dev->cpu); | ||
164 | |||
165 | /* | ||
166 | * Give the governor an opportunity to reflect on the | ||
167 | * outcome | ||
168 | */ | ||
169 | cpuidle_reflect(dev, entered_state); | ||
170 | } | ||
171 | } | ||
172 | } | 117 | } |
173 | 118 | ||
119 | |||
174 | /* | 120 | /* |
175 | * We can't use the cpuidle framework, let's use the default | 121 | * The idle task must be scheduled, it is pointless to |
176 | * idle routine | 122 | * go to idle, just update no idle residency and get |
123 | * out of this function | ||
177 | */ | 124 | */ |
178 | if (ret) | 125 | if (current_clr_polling_and_test()) { |
179 | arch_cpu_idle(); | 126 | dev->last_residency = 0; |
127 | entered_state = next_state; | ||
128 | local_irq_enable(); | ||
129 | goto exit_idle; | ||
130 | } | ||
131 | |||
132 | broadcast = !!(drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP); | ||
180 | 133 | ||
134 | /* | ||
135 | * Tell the time framework to switch to a broadcast timer | ||
136 | * because our local timer will be shutdown. If a local timer | ||
137 | * is used from another cpu as a broadcast timer, this call may | ||
138 | * fail if it is not available | ||
139 | */ | ||
140 | if (broadcast && | ||
141 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu)) | ||
142 | goto use_default; | ||
143 | |||
144 | trace_cpu_idle_rcuidle(next_state, dev->cpu); | ||
145 | |||
146 | /* | ||
147 | * Enter the idle state previously returned by the governor decision. | ||
148 | * This function will block until an interrupt occurs and will take | ||
149 | * care of re-enabling the local interrupts | ||
150 | */ | ||
151 | entered_state = cpuidle_enter(drv, dev, next_state); | ||
152 | |||
153 | trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); | ||
154 | |||
155 | if (broadcast) | ||
156 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); | ||
157 | |||
158 | /* | ||
159 | * Give the governor an opportunity to reflect on the outcome | ||
160 | */ | ||
161 | cpuidle_reflect(dev, entered_state); | ||
162 | |||
163 | exit_idle: | ||
181 | __current_set_polling(); | 164 | __current_set_polling(); |
182 | 165 | ||
183 | /* | 166 | /* |
184 | * It is up to the idle functions to enable back the local | 167 | * It is up to the idle functions to reenable local interrupts |
185 | * interrupt | ||
186 | */ | 168 | */ |
187 | if (WARN_ON_ONCE(irqs_disabled())) | 169 | if (WARN_ON_ONCE(irqs_disabled())) |
188 | local_irq_enable(); | 170 | local_irq_enable(); |
189 | 171 | ||
190 | rcu_idle_exit(); | 172 | rcu_idle_exit(); |
191 | start_critical_timings(); | 173 | start_critical_timings(); |
192 | |||
193 | return 0; | ||
194 | } | 174 | } |
195 | 175 | ||
196 | /* | 176 | /* |
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index bd2267ad404f..0ebfd7a29472 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
@@ -79,6 +79,8 @@ void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) | |||
79 | rt_rq->overloaded = 0; | 79 | rt_rq->overloaded = 0; |
80 | plist_head_init(&rt_rq->pushable_tasks); | 80 | plist_head_init(&rt_rq->pushable_tasks); |
81 | #endif | 81 | #endif |
82 | /* We start is dequeued state, because no RT tasks are queued */ | ||
83 | rt_rq->rt_queued = 0; | ||
82 | 84 | ||
83 | rt_rq->rt_time = 0; | 85 | rt_rq->rt_time = 0; |
84 | rt_rq->rt_throttled = 0; | 86 | rt_rq->rt_throttled = 0; |
@@ -112,6 +114,13 @@ static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) | |||
112 | return rt_se->rt_rq; | 114 | return rt_se->rt_rq; |
113 | } | 115 | } |
114 | 116 | ||
117 | static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se) | ||
118 | { | ||
119 | struct rt_rq *rt_rq = rt_se->rt_rq; | ||
120 | |||
121 | return rt_rq->rq; | ||
122 | } | ||
123 | |||
115 | void free_rt_sched_group(struct task_group *tg) | 124 | void free_rt_sched_group(struct task_group *tg) |
116 | { | 125 | { |
117 | int i; | 126 | int i; |
@@ -211,10 +220,16 @@ static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) | |||
211 | return container_of(rt_rq, struct rq, rt); | 220 | return container_of(rt_rq, struct rq, rt); |
212 | } | 221 | } |
213 | 222 | ||
214 | static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) | 223 | static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se) |
215 | { | 224 | { |
216 | struct task_struct *p = rt_task_of(rt_se); | 225 | struct task_struct *p = rt_task_of(rt_se); |
217 | struct rq *rq = task_rq(p); | 226 | |
227 | return task_rq(p); | ||
228 | } | ||
229 | |||
230 | static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) | ||
231 | { | ||
232 | struct rq *rq = rq_of_rt_se(rt_se); | ||
218 | 233 | ||
219 | return &rq->rt; | 234 | return &rq->rt; |
220 | } | 235 | } |
@@ -391,6 +406,9 @@ static inline void set_post_schedule(struct rq *rq) | |||
391 | } | 406 | } |
392 | #endif /* CONFIG_SMP */ | 407 | #endif /* CONFIG_SMP */ |
393 | 408 | ||
409 | static void enqueue_top_rt_rq(struct rt_rq *rt_rq); | ||
410 | static void dequeue_top_rt_rq(struct rt_rq *rt_rq); | ||
411 | |||
394 | static inline int on_rt_rq(struct sched_rt_entity *rt_se) | 412 | static inline int on_rt_rq(struct sched_rt_entity *rt_se) |
395 | { | 413 | { |
396 | return !list_empty(&rt_se->run_list); | 414 | return !list_empty(&rt_se->run_list); |
@@ -452,8 +470,11 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) | |||
452 | rt_se = rt_rq->tg->rt_se[cpu]; | 470 | rt_se = rt_rq->tg->rt_se[cpu]; |
453 | 471 | ||
454 | if (rt_rq->rt_nr_running) { | 472 | if (rt_rq->rt_nr_running) { |
455 | if (rt_se && !on_rt_rq(rt_se)) | 473 | if (!rt_se) |
474 | enqueue_top_rt_rq(rt_rq); | ||
475 | else if (!on_rt_rq(rt_se)) | ||
456 | enqueue_rt_entity(rt_se, false); | 476 | enqueue_rt_entity(rt_se, false); |
477 | |||
457 | if (rt_rq->highest_prio.curr < curr->prio) | 478 | if (rt_rq->highest_prio.curr < curr->prio) |
458 | resched_task(curr); | 479 | resched_task(curr); |
459 | } | 480 | } |
@@ -466,10 +487,17 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) | |||
466 | 487 | ||
467 | rt_se = rt_rq->tg->rt_se[cpu]; | 488 | rt_se = rt_rq->tg->rt_se[cpu]; |
468 | 489 | ||
469 | if (rt_se && on_rt_rq(rt_se)) | 490 | if (!rt_se) |
491 | dequeue_top_rt_rq(rt_rq); | ||
492 | else if (on_rt_rq(rt_se)) | ||
470 | dequeue_rt_entity(rt_se); | 493 | dequeue_rt_entity(rt_se); |
471 | } | 494 | } |
472 | 495 | ||
496 | static inline int rt_rq_throttled(struct rt_rq *rt_rq) | ||
497 | { | ||
498 | return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted; | ||
499 | } | ||
500 | |||
473 | static int rt_se_boosted(struct sched_rt_entity *rt_se) | 501 | static int rt_se_boosted(struct sched_rt_entity *rt_se) |
474 | { | 502 | { |
475 | struct rt_rq *rt_rq = group_rt_rq(rt_se); | 503 | struct rt_rq *rt_rq = group_rt_rq(rt_se); |
@@ -532,12 +560,23 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) | |||
532 | 560 | ||
533 | static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) | 561 | static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) |
534 | { | 562 | { |
535 | if (rt_rq->rt_nr_running) | 563 | struct rq *rq = rq_of_rt_rq(rt_rq); |
536 | resched_task(rq_of_rt_rq(rt_rq)->curr); | 564 | |
565 | if (!rt_rq->rt_nr_running) | ||
566 | return; | ||
567 | |||
568 | enqueue_top_rt_rq(rt_rq); | ||
569 | resched_task(rq->curr); | ||
537 | } | 570 | } |
538 | 571 | ||
539 | static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) | 572 | static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) |
540 | { | 573 | { |
574 | dequeue_top_rt_rq(rt_rq); | ||
575 | } | ||
576 | |||
577 | static inline int rt_rq_throttled(struct rt_rq *rt_rq) | ||
578 | { | ||
579 | return rt_rq->rt_throttled; | ||
541 | } | 580 | } |
542 | 581 | ||
543 | static inline const struct cpumask *sched_rt_period_mask(void) | 582 | static inline const struct cpumask *sched_rt_period_mask(void) |
@@ -922,6 +961,38 @@ static void update_curr_rt(struct rq *rq) | |||
922 | } | 961 | } |
923 | } | 962 | } |
924 | 963 | ||
964 | static void | ||
965 | dequeue_top_rt_rq(struct rt_rq *rt_rq) | ||
966 | { | ||
967 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
968 | |||
969 | BUG_ON(&rq->rt != rt_rq); | ||
970 | |||
971 | if (!rt_rq->rt_queued) | ||
972 | return; | ||
973 | |||
974 | BUG_ON(!rq->nr_running); | ||
975 | |||
976 | sub_nr_running(rq, rt_rq->rt_nr_running); | ||
977 | rt_rq->rt_queued = 0; | ||
978 | } | ||
979 | |||
980 | static void | ||
981 | enqueue_top_rt_rq(struct rt_rq *rt_rq) | ||
982 | { | ||
983 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
984 | |||
985 | BUG_ON(&rq->rt != rt_rq); | ||
986 | |||
987 | if (rt_rq->rt_queued) | ||
988 | return; | ||
989 | if (rt_rq_throttled(rt_rq) || !rt_rq->rt_nr_running) | ||
990 | return; | ||
991 | |||
992 | add_nr_running(rq, rt_rq->rt_nr_running); | ||
993 | rt_rq->rt_queued = 1; | ||
994 | } | ||
995 | |||
925 | #if defined CONFIG_SMP | 996 | #if defined CONFIG_SMP |
926 | 997 | ||
927 | static void | 998 | static void |
@@ -1045,12 +1116,23 @@ void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {} | |||
1045 | #endif /* CONFIG_RT_GROUP_SCHED */ | 1116 | #endif /* CONFIG_RT_GROUP_SCHED */ |
1046 | 1117 | ||
1047 | static inline | 1118 | static inline |
1119 | unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se) | ||
1120 | { | ||
1121 | struct rt_rq *group_rq = group_rt_rq(rt_se); | ||
1122 | |||
1123 | if (group_rq) | ||
1124 | return group_rq->rt_nr_running; | ||
1125 | else | ||
1126 | return 1; | ||
1127 | } | ||
1128 | |||
1129 | static inline | ||
1048 | void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | 1130 | void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) |
1049 | { | 1131 | { |
1050 | int prio = rt_se_prio(rt_se); | 1132 | int prio = rt_se_prio(rt_se); |
1051 | 1133 | ||
1052 | WARN_ON(!rt_prio(prio)); | 1134 | WARN_ON(!rt_prio(prio)); |
1053 | rt_rq->rt_nr_running++; | 1135 | rt_rq->rt_nr_running += rt_se_nr_running(rt_se); |
1054 | 1136 | ||
1055 | inc_rt_prio(rt_rq, prio); | 1137 | inc_rt_prio(rt_rq, prio); |
1056 | inc_rt_migration(rt_se, rt_rq); | 1138 | inc_rt_migration(rt_se, rt_rq); |
@@ -1062,7 +1144,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
1062 | { | 1144 | { |
1063 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); | 1145 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); |
1064 | WARN_ON(!rt_rq->rt_nr_running); | 1146 | WARN_ON(!rt_rq->rt_nr_running); |
1065 | rt_rq->rt_nr_running--; | 1147 | rt_rq->rt_nr_running -= rt_se_nr_running(rt_se); |
1066 | 1148 | ||
1067 | dec_rt_prio(rt_rq, rt_se_prio(rt_se)); | 1149 | dec_rt_prio(rt_rq, rt_se_prio(rt_se)); |
1068 | dec_rt_migration(rt_se, rt_rq); | 1150 | dec_rt_migration(rt_se, rt_rq); |
@@ -1119,6 +1201,8 @@ static void dequeue_rt_stack(struct sched_rt_entity *rt_se) | |||
1119 | back = rt_se; | 1201 | back = rt_se; |
1120 | } | 1202 | } |
1121 | 1203 | ||
1204 | dequeue_top_rt_rq(rt_rq_of_se(back)); | ||
1205 | |||
1122 | for (rt_se = back; rt_se; rt_se = rt_se->back) { | 1206 | for (rt_se = back; rt_se; rt_se = rt_se->back) { |
1123 | if (on_rt_rq(rt_se)) | 1207 | if (on_rt_rq(rt_se)) |
1124 | __dequeue_rt_entity(rt_se); | 1208 | __dequeue_rt_entity(rt_se); |
@@ -1127,13 +1211,18 @@ static void dequeue_rt_stack(struct sched_rt_entity *rt_se) | |||
1127 | 1211 | ||
1128 | static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head) | 1212 | static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head) |
1129 | { | 1213 | { |
1214 | struct rq *rq = rq_of_rt_se(rt_se); | ||
1215 | |||
1130 | dequeue_rt_stack(rt_se); | 1216 | dequeue_rt_stack(rt_se); |
1131 | for_each_sched_rt_entity(rt_se) | 1217 | for_each_sched_rt_entity(rt_se) |
1132 | __enqueue_rt_entity(rt_se, head); | 1218 | __enqueue_rt_entity(rt_se, head); |
1219 | enqueue_top_rt_rq(&rq->rt); | ||
1133 | } | 1220 | } |
1134 | 1221 | ||
1135 | static void dequeue_rt_entity(struct sched_rt_entity *rt_se) | 1222 | static void dequeue_rt_entity(struct sched_rt_entity *rt_se) |
1136 | { | 1223 | { |
1224 | struct rq *rq = rq_of_rt_se(rt_se); | ||
1225 | |||
1137 | dequeue_rt_stack(rt_se); | 1226 | dequeue_rt_stack(rt_se); |
1138 | 1227 | ||
1139 | for_each_sched_rt_entity(rt_se) { | 1228 | for_each_sched_rt_entity(rt_se) { |
@@ -1142,6 +1231,7 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se) | |||
1142 | if (rt_rq && rt_rq->rt_nr_running) | 1231 | if (rt_rq && rt_rq->rt_nr_running) |
1143 | __enqueue_rt_entity(rt_se, false); | 1232 | __enqueue_rt_entity(rt_se, false); |
1144 | } | 1233 | } |
1234 | enqueue_top_rt_rq(&rq->rt); | ||
1145 | } | 1235 | } |
1146 | 1236 | ||
1147 | /* | 1237 | /* |
@@ -1159,8 +1249,6 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) | |||
1159 | 1249 | ||
1160 | if (!task_current(rq, p) && p->nr_cpus_allowed > 1) | 1250 | if (!task_current(rq, p) && p->nr_cpus_allowed > 1) |
1161 | enqueue_pushable_task(rq, p); | 1251 | enqueue_pushable_task(rq, p); |
1162 | |||
1163 | inc_nr_running(rq); | ||
1164 | } | 1252 | } |
1165 | 1253 | ||
1166 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) | 1254 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) |
@@ -1171,8 +1259,6 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) | |||
1171 | dequeue_rt_entity(rt_se); | 1259 | dequeue_rt_entity(rt_se); |
1172 | 1260 | ||
1173 | dequeue_pushable_task(rq, p); | 1261 | dequeue_pushable_task(rq, p); |
1174 | |||
1175 | dec_nr_running(rq); | ||
1176 | } | 1262 | } |
1177 | 1263 | ||
1178 | /* | 1264 | /* |
@@ -1377,10 +1463,7 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev) | |||
1377 | if (prev->sched_class == &rt_sched_class) | 1463 | if (prev->sched_class == &rt_sched_class) |
1378 | update_curr_rt(rq); | 1464 | update_curr_rt(rq); |
1379 | 1465 | ||
1380 | if (!rt_rq->rt_nr_running) | 1466 | if (!rt_rq->rt_queued) |
1381 | return NULL; | ||
1382 | |||
1383 | if (rt_rq_throttled(rt_rq)) | ||
1384 | return NULL; | 1467 | return NULL; |
1385 | 1468 | ||
1386 | put_prev_task(rq, prev); | 1469 | put_prev_task(rq, prev); |
@@ -1892,9 +1975,9 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p) | |||
1892 | */ | 1975 | */ |
1893 | if (p->on_rq && rq->curr != p) { | 1976 | if (p->on_rq && rq->curr != p) { |
1894 | #ifdef CONFIG_SMP | 1977 | #ifdef CONFIG_SMP |
1895 | if (rq->rt.overloaded && push_rt_task(rq) && | 1978 | if (p->nr_cpus_allowed > 1 && rq->rt.overloaded && |
1896 | /* Don't resched if we changed runqueues */ | 1979 | /* Don't resched if we changed runqueues */ |
1897 | rq != task_rq(p)) | 1980 | push_rt_task(rq) && rq != task_rq(p)) |
1898 | check_resched = 0; | 1981 | check_resched = 0; |
1899 | #endif /* CONFIG_SMP */ | 1982 | #endif /* CONFIG_SMP */ |
1900 | if (check_resched && p->prio < rq->curr->prio) | 1983 | if (check_resched && p->prio < rq->curr->prio) |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 456e492a3dca..600e2291a75c 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -409,6 +409,8 @@ struct rt_rq { | |||
409 | int overloaded; | 409 | int overloaded; |
410 | struct plist_head pushable_tasks; | 410 | struct plist_head pushable_tasks; |
411 | #endif | 411 | #endif |
412 | int rt_queued; | ||
413 | |||
412 | int rt_throttled; | 414 | int rt_throttled; |
413 | u64 rt_time; | 415 | u64 rt_time; |
414 | u64 rt_runtime; | 416 | u64 rt_runtime; |
@@ -423,18 +425,6 @@ struct rt_rq { | |||
423 | #endif | 425 | #endif |
424 | }; | 426 | }; |
425 | 427 | ||
426 | #ifdef CONFIG_RT_GROUP_SCHED | ||
427 | static inline int rt_rq_throttled(struct rt_rq *rt_rq) | ||
428 | { | ||
429 | return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted; | ||
430 | } | ||
431 | #else | ||
432 | static inline int rt_rq_throttled(struct rt_rq *rt_rq) | ||
433 | { | ||
434 | return rt_rq->rt_throttled; | ||
435 | } | ||
436 | #endif | ||
437 | |||
438 | /* Deadline class' related fields in a runqueue */ | 428 | /* Deadline class' related fields in a runqueue */ |
439 | struct dl_rq { | 429 | struct dl_rq { |
440 | /* runqueue is an rbtree, ordered by deadline */ | 430 | /* runqueue is an rbtree, ordered by deadline */ |
@@ -1216,12 +1206,14 @@ extern void update_idle_cpu_load(struct rq *this_rq); | |||
1216 | 1206 | ||
1217 | extern void init_task_runnable_average(struct task_struct *p); | 1207 | extern void init_task_runnable_average(struct task_struct *p); |
1218 | 1208 | ||
1219 | static inline void inc_nr_running(struct rq *rq) | 1209 | static inline void add_nr_running(struct rq *rq, unsigned count) |
1220 | { | 1210 | { |
1221 | rq->nr_running++; | 1211 | unsigned prev_nr = rq->nr_running; |
1212 | |||
1213 | rq->nr_running = prev_nr + count; | ||
1222 | 1214 | ||
1223 | #ifdef CONFIG_NO_HZ_FULL | 1215 | #ifdef CONFIG_NO_HZ_FULL |
1224 | if (rq->nr_running == 2) { | 1216 | if (prev_nr < 2 && rq->nr_running >= 2) { |
1225 | if (tick_nohz_full_cpu(rq->cpu)) { | 1217 | if (tick_nohz_full_cpu(rq->cpu)) { |
1226 | /* Order rq->nr_running write against the IPI */ | 1218 | /* Order rq->nr_running write against the IPI */ |
1227 | smp_wmb(); | 1219 | smp_wmb(); |
@@ -1231,9 +1223,9 @@ static inline void inc_nr_running(struct rq *rq) | |||
1231 | #endif | 1223 | #endif |
1232 | } | 1224 | } |
1233 | 1225 | ||
1234 | static inline void dec_nr_running(struct rq *rq) | 1226 | static inline void sub_nr_running(struct rq *rq, unsigned count) |
1235 | { | 1227 | { |
1236 | rq->nr_running--; | 1228 | rq->nr_running -= count; |
1237 | } | 1229 | } |
1238 | 1230 | ||
1239 | static inline void rq_last_tick_reset(struct rq *rq) | 1231 | static inline void rq_last_tick_reset(struct rq *rq) |
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index d6ce65dde541..bfe0edadbfbb 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c | |||
@@ -41,13 +41,13 @@ pick_next_task_stop(struct rq *rq, struct task_struct *prev) | |||
41 | static void | 41 | static void |
42 | enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags) | 42 | enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags) |
43 | { | 43 | { |
44 | inc_nr_running(rq); | 44 | add_nr_running(rq, 1); |
45 | } | 45 | } |
46 | 46 | ||
47 | static void | 47 | static void |
48 | dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags) | 48 | dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags) |
49 | { | 49 | { |
50 | dec_nr_running(rq); | 50 | sub_nr_running(rq, 1); |
51 | } | 51 | } |
52 | 52 | ||
53 | static void yield_task_stop(struct rq *rq) | 53 | static void yield_task_stop(struct rq *rq) |
diff --git a/kernel/sys.c b/kernel/sys.c index fba0f29401ea..66a751ebf9d9 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -250,7 +250,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who) | |||
250 | else | 250 | else |
251 | p = current; | 251 | p = current; |
252 | if (p) { | 252 | if (p) { |
253 | niceval = 20 - task_nice(p); | 253 | niceval = nice_to_rlimit(task_nice(p)); |
254 | if (niceval > retval) | 254 | if (niceval > retval) |
255 | retval = niceval; | 255 | retval = niceval; |
256 | } | 256 | } |
@@ -261,7 +261,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who) | |||
261 | else | 261 | else |
262 | pgrp = task_pgrp(current); | 262 | pgrp = task_pgrp(current); |
263 | do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { | 263 | do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { |
264 | niceval = 20 - task_nice(p); | 264 | niceval = nice_to_rlimit(task_nice(p)); |
265 | if (niceval > retval) | 265 | if (niceval > retval) |
266 | retval = niceval; | 266 | retval = niceval; |
267 | } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); | 267 | } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); |
@@ -277,7 +277,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who) | |||
277 | 277 | ||
278 | do_each_thread(g, p) { | 278 | do_each_thread(g, p) { |
279 | if (uid_eq(task_uid(p), uid)) { | 279 | if (uid_eq(task_uid(p), uid)) { |
280 | niceval = 20 - task_nice(p); | 280 | niceval = nice_to_rlimit(task_nice(p)); |
281 | if (niceval > retval) | 281 | if (niceval > retval) |
282 | retval = niceval; | 282 | retval = niceval; |
283 | } | 283 | } |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 8edc87185427..a4bab46cd38e 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -100,10 +100,10 @@ enum { | |||
100 | 100 | ||
101 | /* | 101 | /* |
102 | * Rescue workers are used only on emergencies and shared by | 102 | * Rescue workers are used only on emergencies and shared by |
103 | * all cpus. Give -20. | 103 | * all cpus. Give MIN_NICE. |
104 | */ | 104 | */ |
105 | RESCUER_NICE_LEVEL = -20, | 105 | RESCUER_NICE_LEVEL = MIN_NICE, |
106 | HIGHPRI_NICE_LEVEL = -20, | 106 | HIGHPRI_NICE_LEVEL = MIN_NICE, |
107 | 107 | ||
108 | WQ_NAME_LEN = 24, | 108 | WQ_NAME_LEN = 24, |
109 | }; | 109 | }; |