diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/sched.c | 223 | ||||
| -rw-r--r-- | kernel/time/tick-sched.c | 9 |
2 files changed, 219 insertions, 13 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index ba053d88c8c6..74599286230c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -224,6 +224,9 @@ struct rq { | |||
| 224 | #ifdef CONFIG_SMP | 224 | #ifdef CONFIG_SMP |
| 225 | unsigned long cpu_load[3]; | 225 | unsigned long cpu_load[3]; |
| 226 | unsigned char idle_at_tick; | 226 | unsigned char idle_at_tick; |
| 227 | #ifdef CONFIG_NO_HZ | ||
| 228 | unsigned char in_nohz_recently; | ||
| 229 | #endif | ||
| 227 | #endif | 230 | #endif |
| 228 | unsigned long long nr_switches; | 231 | unsigned long long nr_switches; |
| 229 | 232 | ||
| @@ -1050,6 +1053,17 @@ static void resched_task(struct task_struct *p) | |||
| 1050 | if (!tsk_is_polling(p)) | 1053 | if (!tsk_is_polling(p)) |
| 1051 | smp_send_reschedule(cpu); | 1054 | smp_send_reschedule(cpu); |
| 1052 | } | 1055 | } |
| 1056 | |||
| 1057 | static void resched_cpu(int cpu) | ||
| 1058 | { | ||
| 1059 | struct rq *rq = cpu_rq(cpu); | ||
| 1060 | unsigned long flags; | ||
| 1061 | |||
| 1062 | if (!spin_trylock_irqsave(&rq->lock, flags)) | ||
| 1063 | return; | ||
| 1064 | resched_task(cpu_curr(cpu)); | ||
| 1065 | spin_unlock_irqrestore(&rq->lock, flags); | ||
| 1066 | } | ||
| 1053 | #else | 1067 | #else |
| 1054 | static inline void resched_task(struct task_struct *p) | 1068 | static inline void resched_task(struct task_struct *p) |
| 1055 | { | 1069 | { |
| @@ -2658,6 +2672,12 @@ redo: | |||
| 2658 | double_rq_unlock(this_rq, busiest); | 2672 | double_rq_unlock(this_rq, busiest); |
| 2659 | local_irq_restore(flags); | 2673 | local_irq_restore(flags); |
| 2660 | 2674 | ||
| 2675 | /* | ||
| 2676 | * some other cpu did the load balance for us. | ||
| 2677 | */ | ||
| 2678 | if (nr_moved && this_cpu != smp_processor_id()) | ||
| 2679 | resched_cpu(this_cpu); | ||
| 2680 | |||
| 2661 | /* All tasks on this runqueue were pinned by CPU affinity */ | 2681 | /* All tasks on this runqueue were pinned by CPU affinity */ |
| 2662 | if (unlikely(all_pinned)) { | 2682 | if (unlikely(all_pinned)) { |
| 2663 | cpu_clear(cpu_of(busiest), cpus); | 2683 | cpu_clear(cpu_of(busiest), cpus); |
| @@ -2928,27 +2948,98 @@ static void update_load(struct rq *this_rq) | |||
| 2928 | } | 2948 | } |
| 2929 | } | 2949 | } |
| 2930 | 2950 | ||
| 2951 | #ifdef CONFIG_NO_HZ | ||
| 2952 | static struct { | ||
| 2953 | atomic_t load_balancer; | ||
| 2954 | cpumask_t cpu_mask; | ||
| 2955 | } nohz ____cacheline_aligned = { | ||
| 2956 | .load_balancer = ATOMIC_INIT(-1), | ||
| 2957 | .cpu_mask = CPU_MASK_NONE, | ||
| 2958 | }; | ||
| 2959 | |||
| 2931 | /* | 2960 | /* |
| 2932 | * run_rebalance_domains is triggered when needed from the scheduler tick. | 2961 | * This routine will try to nominate the ilb (idle load balancing) |
| 2962 | * owner among the cpus whose ticks are stopped. ilb owner will do the idle | ||
| 2963 | * load balancing on behalf of all those cpus. If all the cpus in the system | ||
| 2964 | * go into this tickless mode, then there will be no ilb owner (as there is | ||
| 2965 | * no need for one) and all the cpus will sleep till the next wakeup event | ||
| 2966 | * arrives... | ||
| 2967 | * | ||
| 2968 | * For the ilb owner, tick is not stopped. And this tick will be used | ||
| 2969 | * for idle load balancing. ilb owner will still be part of | ||
| 2970 | * nohz.cpu_mask.. | ||
| 2971 | * | ||
| 2972 | * While stopping the tick, this cpu will become the ilb owner if there | ||
| 2973 | * is no other owner. And will be the owner till that cpu becomes busy | ||
| 2974 | * or if all cpus in the system stop their ticks at which point | ||
| 2975 | * there is no need for ilb owner. | ||
| 2933 | * | 2976 | * |
| 2977 | * When the ilb owner becomes busy, it nominates another owner, during the | ||
| 2978 | * next busy scheduler_tick() | ||
| 2979 | */ | ||
| 2980 | int select_nohz_load_balancer(int stop_tick) | ||
| 2981 | { | ||
| 2982 | int cpu = smp_processor_id(); | ||
| 2983 | |||
| 2984 | if (stop_tick) { | ||
| 2985 | cpu_set(cpu, nohz.cpu_mask); | ||
| 2986 | cpu_rq(cpu)->in_nohz_recently = 1; | ||
| 2987 | |||
| 2988 | /* | ||
| 2989 | * If we are going offline and still the leader, give up! | ||
| 2990 | */ | ||
| 2991 | if (cpu_is_offline(cpu) && | ||
| 2992 | atomic_read(&nohz.load_balancer) == cpu) { | ||
| 2993 | if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) | ||
| 2994 | BUG(); | ||
| 2995 | return 0; | ||
| 2996 | } | ||
| 2997 | |||
| 2998 | /* time for ilb owner also to sleep */ | ||
| 2999 | if (cpus_weight(nohz.cpu_mask) == num_online_cpus()) { | ||
| 3000 | if (atomic_read(&nohz.load_balancer) == cpu) | ||
| 3001 | atomic_set(&nohz.load_balancer, -1); | ||
| 3002 | return 0; | ||
| 3003 | } | ||
| 3004 | |||
| 3005 | if (atomic_read(&nohz.load_balancer) == -1) { | ||
| 3006 | /* make me the ilb owner */ | ||
| 3007 | if (atomic_cmpxchg(&nohz.load_balancer, -1, cpu) == -1) | ||
| 3008 | return 1; | ||
| 3009 | } else if (atomic_read(&nohz.load_balancer) == cpu) | ||
| 3010 | return 1; | ||
| 3011 | } else { | ||
| 3012 | if (!cpu_isset(cpu, nohz.cpu_mask)) | ||
| 3013 | return 0; | ||
| 3014 | |||
| 3015 | cpu_clear(cpu, nohz.cpu_mask); | ||
| 3016 | |||
| 3017 | if (atomic_read(&nohz.load_balancer) == cpu) | ||
| 3018 | if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) | ||
| 3019 | BUG(); | ||
| 3020 | } | ||
| 3021 | return 0; | ||
| 3022 | } | ||
| 3023 | #endif | ||
| 3024 | |||
| 3025 | static DEFINE_SPINLOCK(balancing); | ||
| 3026 | |||
| 3027 | /* | ||
| 2934 | * It checks each scheduling domain to see if it is due to be balanced, | 3028 | * It checks each scheduling domain to see if it is due to be balanced, |
| 2935 | * and initiates a balancing operation if so. | 3029 | * and initiates a balancing operation if so. |
| 2936 | * | 3030 | * |
| 2937 | * Balancing parameters are set up in arch_init_sched_domains. | 3031 | * Balancing parameters are set up in arch_init_sched_domains. |
| 2938 | */ | 3032 | */ |
| 2939 | static DEFINE_SPINLOCK(balancing); | 3033 | static inline void rebalance_domains(int cpu, enum idle_type idle) |
| 2940 | |||
| 2941 | static void run_rebalance_domains(struct softirq_action *h) | ||
| 2942 | { | 3034 | { |
| 2943 | int this_cpu = smp_processor_id(), balance = 1; | 3035 | int balance = 1; |
| 2944 | struct rq *this_rq = cpu_rq(this_cpu); | 3036 | struct rq *rq = cpu_rq(cpu); |
| 2945 | unsigned long interval; | 3037 | unsigned long interval; |
| 2946 | struct sched_domain *sd; | 3038 | struct sched_domain *sd; |
| 2947 | enum idle_type idle = this_rq->idle_at_tick ? SCHED_IDLE : NOT_IDLE; | 3039 | /* Earliest time when we have to do rebalance again */ |
| 2948 | /* Earliest time when we have to call run_rebalance_domains again */ | ||
| 2949 | unsigned long next_balance = jiffies + 60*HZ; | 3040 | unsigned long next_balance = jiffies + 60*HZ; |
| 2950 | 3041 | ||
| 2951 | for_each_domain(this_cpu, sd) { | 3042 | for_each_domain(cpu, sd) { |
| 2952 | if (!(sd->flags & SD_LOAD_BALANCE)) | 3043 | if (!(sd->flags & SD_LOAD_BALANCE)) |
| 2953 | continue; | 3044 | continue; |
| 2954 | 3045 | ||
| @@ -2967,7 +3058,7 @@ static void run_rebalance_domains(struct softirq_action *h) | |||
| 2967 | } | 3058 | } |
| 2968 | 3059 | ||
| 2969 | if (time_after_eq(jiffies, sd->last_balance + interval)) { | 3060 | if (time_after_eq(jiffies, sd->last_balance + interval)) { |
| 2970 | if (load_balance(this_cpu, this_rq, sd, idle, &balance)) { | 3061 | if (load_balance(cpu, rq, sd, idle, &balance)) { |
| 2971 | /* | 3062 | /* |
| 2972 | * We've pulled tasks over so either we're no | 3063 | * We've pulled tasks over so either we're no |
| 2973 | * longer idle, or one of our SMT siblings is | 3064 | * longer idle, or one of our SMT siblings is |
| @@ -2991,7 +3082,114 @@ out: | |||
| 2991 | if (!balance) | 3082 | if (!balance) |
| 2992 | break; | 3083 | break; |
| 2993 | } | 3084 | } |
| 2994 | this_rq->next_balance = next_balance; | 3085 | rq->next_balance = next_balance; |
| 3086 | } | ||
| 3087 | |||
| 3088 | /* | ||
| 3089 | * run_rebalance_domains is triggered when needed from the scheduler tick. | ||
| 3090 | * In CONFIG_NO_HZ case, the idle load balance owner will do the | ||
| 3091 | * rebalancing for all the cpus for whom scheduler ticks are stopped. | ||
| 3092 | */ | ||
| 3093 | static void run_rebalance_domains(struct softirq_action *h) | ||
| 3094 | { | ||
| 3095 | int local_cpu = smp_processor_id(); | ||
| 3096 | struct rq *local_rq = cpu_rq(local_cpu); | ||
| 3097 | enum idle_type idle = local_rq->idle_at_tick ? SCHED_IDLE : NOT_IDLE; | ||
| 3098 | |||
| 3099 | rebalance_domains(local_cpu, idle); | ||
| 3100 | |||
| 3101 | #ifdef CONFIG_NO_HZ | ||
| 3102 | /* | ||
| 3103 | * If this cpu is the owner for idle load balancing, then do the | ||
| 3104 | * balancing on behalf of the other idle cpus whose ticks are | ||
| 3105 | * stopped. | ||
| 3106 | */ | ||
| 3107 | if (local_rq->idle_at_tick && | ||
| 3108 | atomic_read(&nohz.load_balancer) == local_cpu) { | ||
| 3109 | cpumask_t cpus = nohz.cpu_mask; | ||
| 3110 | struct rq *rq; | ||
| 3111 | int balance_cpu; | ||
| 3112 | |||
| 3113 | cpu_clear(local_cpu, cpus); | ||
| 3114 | for_each_cpu_mask(balance_cpu, cpus) { | ||
| 3115 | /* | ||
| 3116 | * If this cpu gets work to do, stop the load balancing | ||
| 3117 | * work being done for other cpus. Next load | ||
| 3118 | * balancing owner will pick it up. | ||
| 3119 | */ | ||
| 3120 | if (need_resched()) | ||
| 3121 | break; | ||
| 3122 | |||
| 3123 | rebalance_domains(balance_cpu, SCHED_IDLE); | ||
| 3124 | |||
| 3125 | rq = cpu_rq(balance_cpu); | ||
| 3126 | if (time_after(local_rq->next_balance, rq->next_balance)) | ||
| 3127 | local_rq->next_balance = rq->next_balance; | ||
| 3128 | } | ||
| 3129 | } | ||
| 3130 | #endif | ||
| 3131 | } | ||
| 3132 | |||
| 3133 | /* | ||
| 3134 | * Trigger the SCHED_SOFTIRQ if it is time to do periodic load balancing. | ||
| 3135 | * | ||
| 3136 | * In case of CONFIG_NO_HZ, this is the place where we nominate a new | ||
| 3137 | * idle load balancing owner or decide to stop the periodic load balancing, | ||
| 3138 | * if the whole system is idle. | ||
| 3139 | */ | ||
| 3140 | static inline void trigger_load_balance(int cpu) | ||
| 3141 | { | ||
| 3142 | struct rq *rq = cpu_rq(cpu); | ||
| 3143 | #ifdef CONFIG_NO_HZ | ||
| 3144 | /* | ||
| 3145 | * If we were in the nohz mode recently and busy at the current | ||
| 3146 | * scheduler tick, then check if we need to nominate new idle | ||
| 3147 | * load balancer. | ||
| 3148 | */ | ||
| 3149 | if (rq->in_nohz_recently && !rq->idle_at_tick) { | ||
| 3150 | rq->in_nohz_recently = 0; | ||
| 3151 | |||
| 3152 | if (atomic_read(&nohz.load_balancer) == cpu) { | ||
| 3153 | cpu_clear(cpu, nohz.cpu_mask); | ||
| 3154 | atomic_set(&nohz.load_balancer, -1); | ||
| 3155 | } | ||
| 3156 | |||
| 3157 | if (atomic_read(&nohz.load_balancer) == -1) { | ||
| 3158 | /* | ||
| 3159 | * simple selection for now: Nominate the | ||
| 3160 | * first cpu in the nohz list to be the next | ||
| 3161 | * ilb owner. | ||
| 3162 | * | ||
| 3163 | * TBD: Traverse the sched domains and nominate | ||
| 3164 | * the nearest cpu in the nohz.cpu_mask. | ||
| 3165 | */ | ||
| 3166 | int ilb = first_cpu(nohz.cpu_mask); | ||
| 3167 | |||
| 3168 | if (ilb != NR_CPUS) | ||
| 3169 | resched_cpu(ilb); | ||
| 3170 | } | ||
| 3171 | } | ||
| 3172 | |||
| 3173 | /* | ||
| 3174 | * If this cpu is idle and doing idle load balancing for all the | ||
| 3175 | * cpus with ticks stopped, is it time for that to stop? | ||
| 3176 | */ | ||
| 3177 | if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) == cpu && | ||
| 3178 | cpus_weight(nohz.cpu_mask) == num_online_cpus()) { | ||
| 3179 | resched_cpu(cpu); | ||
| 3180 | return; | ||
| 3181 | } | ||
| 3182 | |||
| 3183 | /* | ||
| 3184 | * If this cpu is idle and the idle load balancing is done by | ||
| 3185 | * someone else, then no need raise the SCHED_SOFTIRQ | ||
| 3186 | */ | ||
| 3187 | if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) != cpu && | ||
| 3188 | cpu_isset(cpu, nohz.cpu_mask)) | ||
| 3189 | return; | ||
| 3190 | #endif | ||
| 3191 | if (time_after_eq(jiffies, rq->next_balance)) | ||
| 3192 | raise_softirq(SCHED_SOFTIRQ); | ||
| 2995 | } | 3193 | } |
| 2996 | #else | 3194 | #else |
| 2997 | /* | 3195 | /* |
| @@ -3224,8 +3422,7 @@ void scheduler_tick(void) | |||
| 3224 | #ifdef CONFIG_SMP | 3422 | #ifdef CONFIG_SMP |
| 3225 | update_load(rq); | 3423 | update_load(rq); |
| 3226 | rq->idle_at_tick = idle_at_tick; | 3424 | rq->idle_at_tick = idle_at_tick; |
| 3227 | if (time_after_eq(jiffies, rq->next_balance)) | 3425 | trigger_load_balance(cpu); |
| 3228 | raise_softirq(SCHED_SOFTIRQ); | ||
| 3229 | #endif | 3426 | #endif |
| 3230 | } | 3427 | } |
| 3231 | 3428 | ||
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f4fc867f467d..3483e6cb9549 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
| @@ -217,6 +217,14 @@ void tick_nohz_stop_sched_tick(void) | |||
| 217 | * the scheduler tick in nohz_restart_sched_tick. | 217 | * the scheduler tick in nohz_restart_sched_tick. |
| 218 | */ | 218 | */ |
| 219 | if (!ts->tick_stopped) { | 219 | if (!ts->tick_stopped) { |
| 220 | if (select_nohz_load_balancer(1)) { | ||
| 221 | /* | ||
| 222 | * sched tick not stopped! | ||
| 223 | */ | ||
| 224 | cpu_clear(cpu, nohz_cpu_mask); | ||
| 225 | goto out; | ||
| 226 | } | ||
| 227 | |||
| 220 | ts->idle_tick = ts->sched_timer.expires; | 228 | ts->idle_tick = ts->sched_timer.expires; |
| 221 | ts->tick_stopped = 1; | 229 | ts->tick_stopped = 1; |
| 222 | ts->idle_jiffies = last_jiffies; | 230 | ts->idle_jiffies = last_jiffies; |
| @@ -285,6 +293,7 @@ void tick_nohz_restart_sched_tick(void) | |||
| 285 | now = ktime_get(); | 293 | now = ktime_get(); |
| 286 | 294 | ||
| 287 | local_irq_disable(); | 295 | local_irq_disable(); |
| 296 | select_nohz_load_balancer(0); | ||
| 288 | tick_do_update_jiffies64(now); | 297 | tick_do_update_jiffies64(now); |
| 289 | cpu_clear(cpu, nohz_cpu_mask); | 298 | cpu_clear(cpu, nohz_cpu_mask); |
| 290 | 299 | ||
