diff options
Diffstat (limited to 'kernel/sched.c')
| -rw-r--r-- | kernel/sched.c | 87 |
1 files changed, 56 insertions, 31 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index deb5ac8c12f3..e72485033c48 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -125,6 +125,9 @@ DEFINE_TRACE(sched_switch); | |||
| 125 | DEFINE_TRACE(sched_migrate_task); | 125 | DEFINE_TRACE(sched_migrate_task); |
| 126 | 126 | ||
| 127 | #ifdef CONFIG_SMP | 127 | #ifdef CONFIG_SMP |
| 128 | |||
| 129 | static void double_rq_lock(struct rq *rq1, struct rq *rq2); | ||
| 130 | |||
| 128 | /* | 131 | /* |
| 129 | * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) | 132 | * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) |
| 130 | * Since cpu_power is a 'constant', we can use a reciprocal divide. | 133 | * Since cpu_power is a 'constant', we can use a reciprocal divide. |
| @@ -1320,8 +1323,8 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec) | |||
| 1320 | * slice expiry etc. | 1323 | * slice expiry etc. |
| 1321 | */ | 1324 | */ |
| 1322 | 1325 | ||
| 1323 | #define WEIGHT_IDLEPRIO 2 | 1326 | #define WEIGHT_IDLEPRIO 3 |
| 1324 | #define WMULT_IDLEPRIO (1 << 31) | 1327 | #define WMULT_IDLEPRIO 1431655765 |
| 1325 | 1328 | ||
| 1326 | /* | 1329 | /* |
| 1327 | * Nice levels are multiplicative, with a gentle 10% change for every | 1330 | * Nice levels are multiplicative, with a gentle 10% change for every |
| @@ -2263,6 +2266,16 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
| 2263 | if (!sched_feat(SYNC_WAKEUPS)) | 2266 | if (!sched_feat(SYNC_WAKEUPS)) |
| 2264 | sync = 0; | 2267 | sync = 0; |
| 2265 | 2268 | ||
| 2269 | if (!sync) { | ||
| 2270 | if (current->se.avg_overlap < sysctl_sched_migration_cost && | ||
| 2271 | p->se.avg_overlap < sysctl_sched_migration_cost) | ||
| 2272 | sync = 1; | ||
| 2273 | } else { | ||
| 2274 | if (current->se.avg_overlap >= sysctl_sched_migration_cost || | ||
| 2275 | p->se.avg_overlap >= sysctl_sched_migration_cost) | ||
| 2276 | sync = 0; | ||
| 2277 | } | ||
| 2278 | |||
| 2266 | #ifdef CONFIG_SMP | 2279 | #ifdef CONFIG_SMP |
| 2267 | if (sched_feat(LB_WAKEUP_UPDATE)) { | 2280 | if (sched_feat(LB_WAKEUP_UPDATE)) { |
| 2268 | struct sched_domain *sd; | 2281 | struct sched_domain *sd; |
| @@ -3877,19 +3890,24 @@ int select_nohz_load_balancer(int stop_tick) | |||
| 3877 | int cpu = smp_processor_id(); | 3890 | int cpu = smp_processor_id(); |
| 3878 | 3891 | ||
| 3879 | if (stop_tick) { | 3892 | if (stop_tick) { |
| 3880 | cpumask_set_cpu(cpu, nohz.cpu_mask); | ||
| 3881 | cpu_rq(cpu)->in_nohz_recently = 1; | 3893 | cpu_rq(cpu)->in_nohz_recently = 1; |
| 3882 | 3894 | ||
| 3883 | /* | 3895 | if (!cpu_active(cpu)) { |
| 3884 | * If we are going offline and still the leader, give up! | 3896 | if (atomic_read(&nohz.load_balancer) != cpu) |
| 3885 | */ | 3897 | return 0; |
| 3886 | if (!cpu_active(cpu) && | 3898 | |
| 3887 | atomic_read(&nohz.load_balancer) == cpu) { | 3899 | /* |
| 3900 | * If we are going offline and still the leader, | ||
| 3901 | * give up! | ||
| 3902 | */ | ||
| 3888 | if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) | 3903 | if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) |
| 3889 | BUG(); | 3904 | BUG(); |
| 3905 | |||
| 3890 | return 0; | 3906 | return 0; |
| 3891 | } | 3907 | } |
| 3892 | 3908 | ||
| 3909 | cpumask_set_cpu(cpu, nohz.cpu_mask); | ||
| 3910 | |||
| 3893 | /* time for ilb owner also to sleep */ | 3911 | /* time for ilb owner also to sleep */ |
| 3894 | if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { | 3912 | if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { |
| 3895 | if (atomic_read(&nohz.load_balancer) == cpu) | 3913 | if (atomic_read(&nohz.load_balancer) == cpu) |
| @@ -4437,7 +4455,7 @@ void __kprobes sub_preempt_count(int val) | |||
| 4437 | /* | 4455 | /* |
| 4438 | * Underflow? | 4456 | * Underflow? |
| 4439 | */ | 4457 | */ |
| 4440 | if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked()))) | 4458 | if (DEBUG_LOCKS_WARN_ON(val > preempt_count())) |
| 4441 | return; | 4459 | return; |
| 4442 | /* | 4460 | /* |
| 4443 | * Is the spinlock portion underflowing? | 4461 | * Is the spinlock portion underflowing? |
| @@ -4684,8 +4702,8 @@ EXPORT_SYMBOL(default_wake_function); | |||
| 4684 | * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns | 4702 | * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns |
| 4685 | * zero in this (rare) case, and we handle it by continuing to scan the queue. | 4703 | * zero in this (rare) case, and we handle it by continuing to scan the queue. |
| 4686 | */ | 4704 | */ |
| 4687 | static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, | 4705 | void __wake_up_common(wait_queue_head_t *q, unsigned int mode, |
| 4688 | int nr_exclusive, int sync, void *key) | 4706 | int nr_exclusive, int sync, void *key) |
| 4689 | { | 4707 | { |
| 4690 | wait_queue_t *curr, *next; | 4708 | wait_queue_t *curr, *next; |
| 4691 | 4709 | ||
| @@ -5123,7 +5141,7 @@ int can_nice(const struct task_struct *p, const int nice) | |||
| 5123 | * sys_setpriority is a more generic, but much slower function that | 5141 | * sys_setpriority is a more generic, but much slower function that |
| 5124 | * does similar things. | 5142 | * does similar things. |
| 5125 | */ | 5143 | */ |
| 5126 | asmlinkage long sys_nice(int increment) | 5144 | SYSCALL_DEFINE1(nice, int, increment) |
| 5127 | { | 5145 | { |
| 5128 | long nice, retval; | 5146 | long nice, retval; |
| 5129 | 5147 | ||
| @@ -5430,8 +5448,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) | |||
| 5430 | * @policy: new policy. | 5448 | * @policy: new policy. |
| 5431 | * @param: structure containing the new RT priority. | 5449 | * @param: structure containing the new RT priority. |
| 5432 | */ | 5450 | */ |
| 5433 | asmlinkage long | 5451 | SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, |
| 5434 | sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) | 5452 | struct sched_param __user *, param) |
| 5435 | { | 5453 | { |
| 5436 | /* negative values for policy are not valid */ | 5454 | /* negative values for policy are not valid */ |
| 5437 | if (policy < 0) | 5455 | if (policy < 0) |
| @@ -5445,7 +5463,7 @@ sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) | |||
| 5445 | * @pid: the pid in question. | 5463 | * @pid: the pid in question. |
| 5446 | * @param: structure containing the new RT priority. | 5464 | * @param: structure containing the new RT priority. |
| 5447 | */ | 5465 | */ |
| 5448 | asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param) | 5466 | SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param) |
| 5449 | { | 5467 | { |
| 5450 | return do_sched_setscheduler(pid, -1, param); | 5468 | return do_sched_setscheduler(pid, -1, param); |
| 5451 | } | 5469 | } |
| @@ -5454,7 +5472,7 @@ asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param) | |||
| 5454 | * sys_sched_getscheduler - get the policy (scheduling class) of a thread | 5472 | * sys_sched_getscheduler - get the policy (scheduling class) of a thread |
| 5455 | * @pid: the pid in question. | 5473 | * @pid: the pid in question. |
| 5456 | */ | 5474 | */ |
| 5457 | asmlinkage long sys_sched_getscheduler(pid_t pid) | 5475 | SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) |
| 5458 | { | 5476 | { |
| 5459 | struct task_struct *p; | 5477 | struct task_struct *p; |
| 5460 | int retval; | 5478 | int retval; |
| @@ -5479,7 +5497,7 @@ asmlinkage long sys_sched_getscheduler(pid_t pid) | |||
| 5479 | * @pid: the pid in question. | 5497 | * @pid: the pid in question. |
| 5480 | * @param: structure containing the RT priority. | 5498 | * @param: structure containing the RT priority. |
| 5481 | */ | 5499 | */ |
| 5482 | asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param) | 5500 | SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) |
| 5483 | { | 5501 | { |
| 5484 | struct sched_param lp; | 5502 | struct sched_param lp; |
| 5485 | struct task_struct *p; | 5503 | struct task_struct *p; |
| @@ -5597,8 +5615,8 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, | |||
| 5597 | * @len: length in bytes of the bitmask pointed to by user_mask_ptr | 5615 | * @len: length in bytes of the bitmask pointed to by user_mask_ptr |
| 5598 | * @user_mask_ptr: user-space pointer to the new cpu mask | 5616 | * @user_mask_ptr: user-space pointer to the new cpu mask |
| 5599 | */ | 5617 | */ |
| 5600 | asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, | 5618 | SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len, |
| 5601 | unsigned long __user *user_mask_ptr) | 5619 | unsigned long __user *, user_mask_ptr) |
| 5602 | { | 5620 | { |
| 5603 | cpumask_var_t new_mask; | 5621 | cpumask_var_t new_mask; |
| 5604 | int retval; | 5622 | int retval; |
| @@ -5645,8 +5663,8 @@ out_unlock: | |||
| 5645 | * @len: length in bytes of the bitmask pointed to by user_mask_ptr | 5663 | * @len: length in bytes of the bitmask pointed to by user_mask_ptr |
| 5646 | * @user_mask_ptr: user-space pointer to hold the current cpu mask | 5664 | * @user_mask_ptr: user-space pointer to hold the current cpu mask |
| 5647 | */ | 5665 | */ |
| 5648 | asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, | 5666 | SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, |
| 5649 | unsigned long __user *user_mask_ptr) | 5667 | unsigned long __user *, user_mask_ptr) |
| 5650 | { | 5668 | { |
| 5651 | int ret; | 5669 | int ret; |
| 5652 | cpumask_var_t mask; | 5670 | cpumask_var_t mask; |
| @@ -5675,7 +5693,7 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, | |||
| 5675 | * This function yields the current CPU to other tasks. If there are no | 5693 | * This function yields the current CPU to other tasks. If there are no |
| 5676 | * other threads running on this CPU then this function will return. | 5694 | * other threads running on this CPU then this function will return. |
| 5677 | */ | 5695 | */ |
| 5678 | asmlinkage long sys_sched_yield(void) | 5696 | SYSCALL_DEFINE0(sched_yield) |
| 5679 | { | 5697 | { |
| 5680 | struct rq *rq = this_rq_lock(); | 5698 | struct rq *rq = this_rq_lock(); |
| 5681 | 5699 | ||
| @@ -5816,7 +5834,7 @@ long __sched io_schedule_timeout(long timeout) | |||
| 5816 | * this syscall returns the maximum rt_priority that can be used | 5834 | * this syscall returns the maximum rt_priority that can be used |
| 5817 | * by a given scheduling class. | 5835 | * by a given scheduling class. |
| 5818 | */ | 5836 | */ |
| 5819 | asmlinkage long sys_sched_get_priority_max(int policy) | 5837 | SYSCALL_DEFINE1(sched_get_priority_max, int, policy) |
| 5820 | { | 5838 | { |
| 5821 | int ret = -EINVAL; | 5839 | int ret = -EINVAL; |
| 5822 | 5840 | ||
| @@ -5841,7 +5859,7 @@ asmlinkage long sys_sched_get_priority_max(int policy) | |||
| 5841 | * this syscall returns the minimum rt_priority that can be used | 5859 | * this syscall returns the minimum rt_priority that can be used |
| 5842 | * by a given scheduling class. | 5860 | * by a given scheduling class. |
| 5843 | */ | 5861 | */ |
| 5844 | asmlinkage long sys_sched_get_priority_min(int policy) | 5862 | SYSCALL_DEFINE1(sched_get_priority_min, int, policy) |
| 5845 | { | 5863 | { |
| 5846 | int ret = -EINVAL; | 5864 | int ret = -EINVAL; |
| 5847 | 5865 | ||
| @@ -5866,8 +5884,8 @@ asmlinkage long sys_sched_get_priority_min(int policy) | |||
| 5866 | * this syscall writes the default timeslice value of a given process | 5884 | * this syscall writes the default timeslice value of a given process |
| 5867 | * into the user-space timespec buffer. A value of '0' means infinity. | 5885 | * into the user-space timespec buffer. A value of '0' means infinity. |
| 5868 | */ | 5886 | */ |
| 5869 | asmlinkage | 5887 | SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, |
| 5870 | long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) | 5888 | struct timespec __user *, interval) |
| 5871 | { | 5889 | { |
| 5872 | struct task_struct *p; | 5890 | struct task_struct *p; |
| 5873 | unsigned int time_slice; | 5891 | unsigned int time_slice; |
| @@ -7282,10 +7300,10 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map, | |||
| 7282 | * groups, so roll our own. Now each node has its own list of groups which | 7300 | * groups, so roll our own. Now each node has its own list of groups which |
| 7283 | * gets dynamically allocated. | 7301 | * gets dynamically allocated. |
| 7284 | */ | 7302 | */ |
| 7285 | static DEFINE_PER_CPU(struct sched_domain, node_domains); | 7303 | static DEFINE_PER_CPU(struct static_sched_domain, node_domains); |
| 7286 | static struct sched_group ***sched_group_nodes_bycpu; | 7304 | static struct sched_group ***sched_group_nodes_bycpu; |
| 7287 | 7305 | ||
| 7288 | static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); | 7306 | static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains); |
| 7289 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes); | 7307 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes); |
| 7290 | 7308 | ||
| 7291 | static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map, | 7309 | static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map, |
| @@ -7560,7 +7578,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map, | |||
| 7560 | #ifdef CONFIG_NUMA | 7578 | #ifdef CONFIG_NUMA |
| 7561 | if (cpumask_weight(cpu_map) > | 7579 | if (cpumask_weight(cpu_map) > |
| 7562 | SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) { | 7580 | SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) { |
| 7563 | sd = &per_cpu(allnodes_domains, i); | 7581 | sd = &per_cpu(allnodes_domains, i).sd; |
| 7564 | SD_INIT(sd, ALLNODES); | 7582 | SD_INIT(sd, ALLNODES); |
| 7565 | set_domain_attribute(sd, attr); | 7583 | set_domain_attribute(sd, attr); |
| 7566 | cpumask_copy(sched_domain_span(sd), cpu_map); | 7584 | cpumask_copy(sched_domain_span(sd), cpu_map); |
| @@ -7570,7 +7588,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map, | |||
| 7570 | } else | 7588 | } else |
| 7571 | p = NULL; | 7589 | p = NULL; |
| 7572 | 7590 | ||
| 7573 | sd = &per_cpu(node_domains, i); | 7591 | sd = &per_cpu(node_domains, i).sd; |
| 7574 | SD_INIT(sd, NODE); | 7592 | SD_INIT(sd, NODE); |
| 7575 | set_domain_attribute(sd, attr); | 7593 | set_domain_attribute(sd, attr); |
| 7576 | sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd)); | 7594 | sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd)); |
| @@ -7688,7 +7706,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map, | |||
| 7688 | for_each_cpu(j, nodemask) { | 7706 | for_each_cpu(j, nodemask) { |
| 7689 | struct sched_domain *sd; | 7707 | struct sched_domain *sd; |
| 7690 | 7708 | ||
| 7691 | sd = &per_cpu(node_domains, j); | 7709 | sd = &per_cpu(node_domains, j).sd; |
| 7692 | sd->groups = sg; | 7710 | sd->groups = sg; |
| 7693 | } | 7711 | } |
| 7694 | sg->__cpu_power = 0; | 7712 | sg->__cpu_power = 0; |
| @@ -9047,6 +9065,13 @@ static int tg_schedulable(struct task_group *tg, void *data) | |||
| 9047 | runtime = d->rt_runtime; | 9065 | runtime = d->rt_runtime; |
| 9048 | } | 9066 | } |
| 9049 | 9067 | ||
| 9068 | #ifdef CONFIG_USER_SCHED | ||
| 9069 | if (tg == &root_task_group) { | ||
| 9070 | period = global_rt_period(); | ||
| 9071 | runtime = global_rt_runtime(); | ||
| 9072 | } | ||
| 9073 | #endif | ||
| 9074 | |||
| 9050 | /* | 9075 | /* |
| 9051 | * Cannot have more runtime than the period. | 9076 | * Cannot have more runtime than the period. |
| 9052 | */ | 9077 | */ |
