diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 99 |
1 files changed, 59 insertions, 40 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index deb5ac8c12f3..7d97ff7c4478 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -125,6 +125,9 @@ DEFINE_TRACE(sched_switch); | |||
125 | DEFINE_TRACE(sched_migrate_task); | 125 | DEFINE_TRACE(sched_migrate_task); |
126 | 126 | ||
127 | #ifdef CONFIG_SMP | 127 | #ifdef CONFIG_SMP |
128 | |||
129 | static void double_rq_lock(struct rq *rq1, struct rq *rq2); | ||
130 | |||
128 | /* | 131 | /* |
129 | * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) | 132 | * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) |
130 | * Since cpu_power is a 'constant', we can use a reciprocal divide. | 133 | * Since cpu_power is a 'constant', we can use a reciprocal divide. |
@@ -1320,8 +1323,8 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec) | |||
1320 | * slice expiry etc. | 1323 | * slice expiry etc. |
1321 | */ | 1324 | */ |
1322 | 1325 | ||
1323 | #define WEIGHT_IDLEPRIO 2 | 1326 | #define WEIGHT_IDLEPRIO 3 |
1324 | #define WMULT_IDLEPRIO (1 << 31) | 1327 | #define WMULT_IDLEPRIO 1431655765 |
1325 | 1328 | ||
1326 | /* | 1329 | /* |
1327 | * Nice levels are multiplicative, with a gentle 10% change for every | 1330 | * Nice levels are multiplicative, with a gentle 10% change for every |
@@ -3877,19 +3880,24 @@ int select_nohz_load_balancer(int stop_tick) | |||
3877 | int cpu = smp_processor_id(); | 3880 | int cpu = smp_processor_id(); |
3878 | 3881 | ||
3879 | if (stop_tick) { | 3882 | if (stop_tick) { |
3880 | cpumask_set_cpu(cpu, nohz.cpu_mask); | ||
3881 | cpu_rq(cpu)->in_nohz_recently = 1; | 3883 | cpu_rq(cpu)->in_nohz_recently = 1; |
3882 | 3884 | ||
3883 | /* | 3885 | if (!cpu_active(cpu)) { |
3884 | * If we are going offline and still the leader, give up! | 3886 | if (atomic_read(&nohz.load_balancer) != cpu) |
3885 | */ | 3887 | return 0; |
3886 | if (!cpu_active(cpu) && | 3888 | |
3887 | atomic_read(&nohz.load_balancer) == cpu) { | 3889 | /* |
3890 | * If we are going offline and still the leader, | ||
3891 | * give up! | ||
3892 | */ | ||
3888 | if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) | 3893 | if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) |
3889 | BUG(); | 3894 | BUG(); |
3895 | |||
3890 | return 0; | 3896 | return 0; |
3891 | } | 3897 | } |
3892 | 3898 | ||
3899 | cpumask_set_cpu(cpu, nohz.cpu_mask); | ||
3900 | |||
3893 | /* time for ilb owner also to sleep */ | 3901 | /* time for ilb owner also to sleep */ |
3894 | if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { | 3902 | if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { |
3895 | if (atomic_read(&nohz.load_balancer) == cpu) | 3903 | if (atomic_read(&nohz.load_balancer) == cpu) |
@@ -4437,7 +4445,7 @@ void __kprobes sub_preempt_count(int val) | |||
4437 | /* | 4445 | /* |
4438 | * Underflow? | 4446 | * Underflow? |
4439 | */ | 4447 | */ |
4440 | if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked()))) | 4448 | if (DEBUG_LOCKS_WARN_ON(val > preempt_count())) |
4441 | return; | 4449 | return; |
4442 | /* | 4450 | /* |
4443 | * Is the spinlock portion underflowing? | 4451 | * Is the spinlock portion underflowing? |
@@ -4684,8 +4692,8 @@ EXPORT_SYMBOL(default_wake_function); | |||
4684 | * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns | 4692 | * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns |
4685 | * zero in this (rare) case, and we handle it by continuing to scan the queue. | 4693 | * zero in this (rare) case, and we handle it by continuing to scan the queue. |
4686 | */ | 4694 | */ |
4687 | static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, | 4695 | void __wake_up_common(wait_queue_head_t *q, unsigned int mode, |
4688 | int nr_exclusive, int sync, void *key) | 4696 | int nr_exclusive, int sync, void *key) |
4689 | { | 4697 | { |
4690 | wait_queue_t *curr, *next; | 4698 | wait_queue_t *curr, *next; |
4691 | 4699 | ||
@@ -5123,7 +5131,7 @@ int can_nice(const struct task_struct *p, const int nice) | |||
5123 | * sys_setpriority is a more generic, but much slower function that | 5131 | * sys_setpriority is a more generic, but much slower function that |
5124 | * does similar things. | 5132 | * does similar things. |
5125 | */ | 5133 | */ |
5126 | asmlinkage long sys_nice(int increment) | 5134 | SYSCALL_DEFINE1(nice, int, increment) |
5127 | { | 5135 | { |
5128 | long nice, retval; | 5136 | long nice, retval; |
5129 | 5137 | ||
@@ -5430,8 +5438,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) | |||
5430 | * @policy: new policy. | 5438 | * @policy: new policy. |
5431 | * @param: structure containing the new RT priority. | 5439 | * @param: structure containing the new RT priority. |
5432 | */ | 5440 | */ |
5433 | asmlinkage long | 5441 | SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, |
5434 | sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) | 5442 | struct sched_param __user *, param) |
5435 | { | 5443 | { |
5436 | /* negative values for policy are not valid */ | 5444 | /* negative values for policy are not valid */ |
5437 | if (policy < 0) | 5445 | if (policy < 0) |
@@ -5445,7 +5453,7 @@ sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) | |||
5445 | * @pid: the pid in question. | 5453 | * @pid: the pid in question. |
5446 | * @param: structure containing the new RT priority. | 5454 | * @param: structure containing the new RT priority. |
5447 | */ | 5455 | */ |
5448 | asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param) | 5456 | SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param) |
5449 | { | 5457 | { |
5450 | return do_sched_setscheduler(pid, -1, param); | 5458 | return do_sched_setscheduler(pid, -1, param); |
5451 | } | 5459 | } |
@@ -5454,7 +5462,7 @@ asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param) | |||
5454 | * sys_sched_getscheduler - get the policy (scheduling class) of a thread | 5462 | * sys_sched_getscheduler - get the policy (scheduling class) of a thread |
5455 | * @pid: the pid in question. | 5463 | * @pid: the pid in question. |
5456 | */ | 5464 | */ |
5457 | asmlinkage long sys_sched_getscheduler(pid_t pid) | 5465 | SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) |
5458 | { | 5466 | { |
5459 | struct task_struct *p; | 5467 | struct task_struct *p; |
5460 | int retval; | 5468 | int retval; |
@@ -5479,7 +5487,7 @@ asmlinkage long sys_sched_getscheduler(pid_t pid) | |||
5479 | * @pid: the pid in question. | 5487 | * @pid: the pid in question. |
5480 | * @param: structure containing the RT priority. | 5488 | * @param: structure containing the RT priority. |
5481 | */ | 5489 | */ |
5482 | asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param) | 5490 | SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) |
5483 | { | 5491 | { |
5484 | struct sched_param lp; | 5492 | struct sched_param lp; |
5485 | struct task_struct *p; | 5493 | struct task_struct *p; |
@@ -5597,8 +5605,8 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, | |||
5597 | * @len: length in bytes of the bitmask pointed to by user_mask_ptr | 5605 | * @len: length in bytes of the bitmask pointed to by user_mask_ptr |
5598 | * @user_mask_ptr: user-space pointer to the new cpu mask | 5606 | * @user_mask_ptr: user-space pointer to the new cpu mask |
5599 | */ | 5607 | */ |
5600 | asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, | 5608 | SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len, |
5601 | unsigned long __user *user_mask_ptr) | 5609 | unsigned long __user *, user_mask_ptr) |
5602 | { | 5610 | { |
5603 | cpumask_var_t new_mask; | 5611 | cpumask_var_t new_mask; |
5604 | int retval; | 5612 | int retval; |
@@ -5645,8 +5653,8 @@ out_unlock: | |||
5645 | * @len: length in bytes of the bitmask pointed to by user_mask_ptr | 5653 | * @len: length in bytes of the bitmask pointed to by user_mask_ptr |
5646 | * @user_mask_ptr: user-space pointer to hold the current cpu mask | 5654 | * @user_mask_ptr: user-space pointer to hold the current cpu mask |
5647 | */ | 5655 | */ |
5648 | asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, | 5656 | SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, |
5649 | unsigned long __user *user_mask_ptr) | 5657 | unsigned long __user *, user_mask_ptr) |
5650 | { | 5658 | { |
5651 | int ret; | 5659 | int ret; |
5652 | cpumask_var_t mask; | 5660 | cpumask_var_t mask; |
@@ -5675,7 +5683,7 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, | |||
5675 | * This function yields the current CPU to other tasks. If there are no | 5683 | * This function yields the current CPU to other tasks. If there are no |
5676 | * other threads running on this CPU then this function will return. | 5684 | * other threads running on this CPU then this function will return. |
5677 | */ | 5685 | */ |
5678 | asmlinkage long sys_sched_yield(void) | 5686 | SYSCALL_DEFINE0(sched_yield) |
5679 | { | 5687 | { |
5680 | struct rq *rq = this_rq_lock(); | 5688 | struct rq *rq = this_rq_lock(); |
5681 | 5689 | ||
@@ -5816,7 +5824,7 @@ long __sched io_schedule_timeout(long timeout) | |||
5816 | * this syscall returns the maximum rt_priority that can be used | 5824 | * this syscall returns the maximum rt_priority that can be used |
5817 | * by a given scheduling class. | 5825 | * by a given scheduling class. |
5818 | */ | 5826 | */ |
5819 | asmlinkage long sys_sched_get_priority_max(int policy) | 5827 | SYSCALL_DEFINE1(sched_get_priority_max, int, policy) |
5820 | { | 5828 | { |
5821 | int ret = -EINVAL; | 5829 | int ret = -EINVAL; |
5822 | 5830 | ||
@@ -5841,7 +5849,7 @@ asmlinkage long sys_sched_get_priority_max(int policy) | |||
5841 | * this syscall returns the minimum rt_priority that can be used | 5849 | * this syscall returns the minimum rt_priority that can be used |
5842 | * by a given scheduling class. | 5850 | * by a given scheduling class. |
5843 | */ | 5851 | */ |
5844 | asmlinkage long sys_sched_get_priority_min(int policy) | 5852 | SYSCALL_DEFINE1(sched_get_priority_min, int, policy) |
5845 | { | 5853 | { |
5846 | int ret = -EINVAL; | 5854 | int ret = -EINVAL; |
5847 | 5855 | ||
@@ -5866,8 +5874,8 @@ asmlinkage long sys_sched_get_priority_min(int policy) | |||
5866 | * this syscall writes the default timeslice value of a given process | 5874 | * this syscall writes the default timeslice value of a given process |
5867 | * into the user-space timespec buffer. A value of '0' means infinity. | 5875 | * into the user-space timespec buffer. A value of '0' means infinity. |
5868 | */ | 5876 | */ |
5869 | asmlinkage | 5877 | SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, |
5870 | long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) | 5878 | struct timespec __user *, interval) |
5871 | { | 5879 | { |
5872 | struct task_struct *p; | 5880 | struct task_struct *p; |
5873 | unsigned int time_slice; | 5881 | unsigned int time_slice; |
@@ -5936,12 +5944,7 @@ void sched_show_task(struct task_struct *p) | |||
5936 | printk(KERN_CONT " %016lx ", thread_saved_pc(p)); | 5944 | printk(KERN_CONT " %016lx ", thread_saved_pc(p)); |
5937 | #endif | 5945 | #endif |
5938 | #ifdef CONFIG_DEBUG_STACK_USAGE | 5946 | #ifdef CONFIG_DEBUG_STACK_USAGE |
5939 | { | 5947 | free = stack_not_used(p); |
5940 | unsigned long *n = end_of_stack(p); | ||
5941 | while (!*n) | ||
5942 | n++; | ||
5943 | free = (unsigned long)n - (unsigned long)end_of_stack(p); | ||
5944 | } | ||
5945 | #endif | 5948 | #endif |
5946 | printk(KERN_CONT "%5lu %5d %6d\n", free, | 5949 | printk(KERN_CONT "%5lu %5d %6d\n", free, |
5947 | task_pid_nr(p), task_pid_nr(p->real_parent)); | 5950 | task_pid_nr(p), task_pid_nr(p->real_parent)); |
@@ -6936,20 +6939,26 @@ static void free_rootdomain(struct root_domain *rd) | |||
6936 | 6939 | ||
6937 | static void rq_attach_root(struct rq *rq, struct root_domain *rd) | 6940 | static void rq_attach_root(struct rq *rq, struct root_domain *rd) |
6938 | { | 6941 | { |
6942 | struct root_domain *old_rd = NULL; | ||
6939 | unsigned long flags; | 6943 | unsigned long flags; |
6940 | 6944 | ||
6941 | spin_lock_irqsave(&rq->lock, flags); | 6945 | spin_lock_irqsave(&rq->lock, flags); |
6942 | 6946 | ||
6943 | if (rq->rd) { | 6947 | if (rq->rd) { |
6944 | struct root_domain *old_rd = rq->rd; | 6948 | old_rd = rq->rd; |
6945 | 6949 | ||
6946 | if (cpumask_test_cpu(rq->cpu, old_rd->online)) | 6950 | if (cpumask_test_cpu(rq->cpu, old_rd->online)) |
6947 | set_rq_offline(rq); | 6951 | set_rq_offline(rq); |
6948 | 6952 | ||
6949 | cpumask_clear_cpu(rq->cpu, old_rd->span); | 6953 | cpumask_clear_cpu(rq->cpu, old_rd->span); |
6950 | 6954 | ||
6951 | if (atomic_dec_and_test(&old_rd->refcount)) | 6955 | /* |
6952 | free_rootdomain(old_rd); | 6956 | * If we dont want to free the old_rt yet then |
6957 | * set old_rd to NULL to skip the freeing later | ||
6958 | * in this function: | ||
6959 | */ | ||
6960 | if (!atomic_dec_and_test(&old_rd->refcount)) | ||
6961 | old_rd = NULL; | ||
6953 | } | 6962 | } |
6954 | 6963 | ||
6955 | atomic_inc(&rd->refcount); | 6964 | atomic_inc(&rd->refcount); |
@@ -6960,6 +6969,9 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) | |||
6960 | set_rq_online(rq); | 6969 | set_rq_online(rq); |
6961 | 6970 | ||
6962 | spin_unlock_irqrestore(&rq->lock, flags); | 6971 | spin_unlock_irqrestore(&rq->lock, flags); |
6972 | |||
6973 | if (old_rd) | ||
6974 | free_rootdomain(old_rd); | ||
6963 | } | 6975 | } |
6964 | 6976 | ||
6965 | static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem) | 6977 | static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem) |
@@ -7282,10 +7294,10 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map, | |||
7282 | * groups, so roll our own. Now each node has its own list of groups which | 7294 | * groups, so roll our own. Now each node has its own list of groups which |
7283 | * gets dynamically allocated. | 7295 | * gets dynamically allocated. |
7284 | */ | 7296 | */ |
7285 | static DEFINE_PER_CPU(struct sched_domain, node_domains); | 7297 | static DEFINE_PER_CPU(struct static_sched_domain, node_domains); |
7286 | static struct sched_group ***sched_group_nodes_bycpu; | 7298 | static struct sched_group ***sched_group_nodes_bycpu; |
7287 | 7299 | ||
7288 | static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); | 7300 | static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains); |
7289 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes); | 7301 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes); |
7290 | 7302 | ||
7291 | static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map, | 7303 | static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map, |
@@ -7560,7 +7572,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map, | |||
7560 | #ifdef CONFIG_NUMA | 7572 | #ifdef CONFIG_NUMA |
7561 | if (cpumask_weight(cpu_map) > | 7573 | if (cpumask_weight(cpu_map) > |
7562 | SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) { | 7574 | SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) { |
7563 | sd = &per_cpu(allnodes_domains, i); | 7575 | sd = &per_cpu(allnodes_domains, i).sd; |
7564 | SD_INIT(sd, ALLNODES); | 7576 | SD_INIT(sd, ALLNODES); |
7565 | set_domain_attribute(sd, attr); | 7577 | set_domain_attribute(sd, attr); |
7566 | cpumask_copy(sched_domain_span(sd), cpu_map); | 7578 | cpumask_copy(sched_domain_span(sd), cpu_map); |
@@ -7570,7 +7582,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map, | |||
7570 | } else | 7582 | } else |
7571 | p = NULL; | 7583 | p = NULL; |
7572 | 7584 | ||
7573 | sd = &per_cpu(node_domains, i); | 7585 | sd = &per_cpu(node_domains, i).sd; |
7574 | SD_INIT(sd, NODE); | 7586 | SD_INIT(sd, NODE); |
7575 | set_domain_attribute(sd, attr); | 7587 | set_domain_attribute(sd, attr); |
7576 | sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd)); | 7588 | sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd)); |
@@ -7688,7 +7700,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map, | |||
7688 | for_each_cpu(j, nodemask) { | 7700 | for_each_cpu(j, nodemask) { |
7689 | struct sched_domain *sd; | 7701 | struct sched_domain *sd; |
7690 | 7702 | ||
7691 | sd = &per_cpu(node_domains, j); | 7703 | sd = &per_cpu(node_domains, j).sd; |
7692 | sd->groups = sg; | 7704 | sd->groups = sg; |
7693 | } | 7705 | } |
7694 | sg->__cpu_power = 0; | 7706 | sg->__cpu_power = 0; |
@@ -9047,6 +9059,13 @@ static int tg_schedulable(struct task_group *tg, void *data) | |||
9047 | runtime = d->rt_runtime; | 9059 | runtime = d->rt_runtime; |
9048 | } | 9060 | } |
9049 | 9061 | ||
9062 | #ifdef CONFIG_USER_SCHED | ||
9063 | if (tg == &root_task_group) { | ||
9064 | period = global_rt_period(); | ||
9065 | runtime = global_rt_runtime(); | ||
9066 | } | ||
9067 | #endif | ||
9068 | |||
9050 | /* | 9069 | /* |
9051 | * Cannot have more runtime than the period. | 9070 | * Cannot have more runtime than the period. |
9052 | */ | 9071 | */ |