aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c99
1 files changed, 59 insertions, 40 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index deb5ac8c12f3..7d97ff7c4478 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -125,6 +125,9 @@ DEFINE_TRACE(sched_switch);
125DEFINE_TRACE(sched_migrate_task); 125DEFINE_TRACE(sched_migrate_task);
126 126
127#ifdef CONFIG_SMP 127#ifdef CONFIG_SMP
128
129static void double_rq_lock(struct rq *rq1, struct rq *rq2);
130
128/* 131/*
129 * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) 132 * Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
130 * Since cpu_power is a 'constant', we can use a reciprocal divide. 133 * Since cpu_power is a 'constant', we can use a reciprocal divide.
@@ -1320,8 +1323,8 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
1320 * slice expiry etc. 1323 * slice expiry etc.
1321 */ 1324 */
1322 1325
1323#define WEIGHT_IDLEPRIO 2 1326#define WEIGHT_IDLEPRIO 3
1324#define WMULT_IDLEPRIO (1 << 31) 1327#define WMULT_IDLEPRIO 1431655765
1325 1328
1326/* 1329/*
1327 * Nice levels are multiplicative, with a gentle 10% change for every 1330 * Nice levels are multiplicative, with a gentle 10% change for every
@@ -3877,19 +3880,24 @@ int select_nohz_load_balancer(int stop_tick)
3877 int cpu = smp_processor_id(); 3880 int cpu = smp_processor_id();
3878 3881
3879 if (stop_tick) { 3882 if (stop_tick) {
3880 cpumask_set_cpu(cpu, nohz.cpu_mask);
3881 cpu_rq(cpu)->in_nohz_recently = 1; 3883 cpu_rq(cpu)->in_nohz_recently = 1;
3882 3884
3883 /* 3885 if (!cpu_active(cpu)) {
3884 * If we are going offline and still the leader, give up! 3886 if (atomic_read(&nohz.load_balancer) != cpu)
3885 */ 3887 return 0;
3886 if (!cpu_active(cpu) && 3888
3887 atomic_read(&nohz.load_balancer) == cpu) { 3889 /*
3890 * If we are going offline and still the leader,
3891 * give up!
3892 */
3888 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) 3893 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
3889 BUG(); 3894 BUG();
3895
3890 return 0; 3896 return 0;
3891 } 3897 }
3892 3898
3899 cpumask_set_cpu(cpu, nohz.cpu_mask);
3900
3893 /* time for ilb owner also to sleep */ 3901 /* time for ilb owner also to sleep */
3894 if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { 3902 if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
3895 if (atomic_read(&nohz.load_balancer) == cpu) 3903 if (atomic_read(&nohz.load_balancer) == cpu)
@@ -4437,7 +4445,7 @@ void __kprobes sub_preempt_count(int val)
4437 /* 4445 /*
4438 * Underflow? 4446 * Underflow?
4439 */ 4447 */
4440 if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked()))) 4448 if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
4441 return; 4449 return;
4442 /* 4450 /*
4443 * Is the spinlock portion underflowing? 4451 * Is the spinlock portion underflowing?
@@ -4684,8 +4692,8 @@ EXPORT_SYMBOL(default_wake_function);
4684 * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns 4692 * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
4685 * zero in this (rare) case, and we handle it by continuing to scan the queue. 4693 * zero in this (rare) case, and we handle it by continuing to scan the queue.
4686 */ 4694 */
4687static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, 4695void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
4688 int nr_exclusive, int sync, void *key) 4696 int nr_exclusive, int sync, void *key)
4689{ 4697{
4690 wait_queue_t *curr, *next; 4698 wait_queue_t *curr, *next;
4691 4699
@@ -5123,7 +5131,7 @@ int can_nice(const struct task_struct *p, const int nice)
5123 * sys_setpriority is a more generic, but much slower function that 5131 * sys_setpriority is a more generic, but much slower function that
5124 * does similar things. 5132 * does similar things.
5125 */ 5133 */
5126asmlinkage long sys_nice(int increment) 5134SYSCALL_DEFINE1(nice, int, increment)
5127{ 5135{
5128 long nice, retval; 5136 long nice, retval;
5129 5137
@@ -5430,8 +5438,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
5430 * @policy: new policy. 5438 * @policy: new policy.
5431 * @param: structure containing the new RT priority. 5439 * @param: structure containing the new RT priority.
5432 */ 5440 */
5433asmlinkage long 5441SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy,
5434sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) 5442 struct sched_param __user *, param)
5435{ 5443{
5436 /* negative values for policy are not valid */ 5444 /* negative values for policy are not valid */
5437 if (policy < 0) 5445 if (policy < 0)
@@ -5445,7 +5453,7 @@ sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
5445 * @pid: the pid in question. 5453 * @pid: the pid in question.
5446 * @param: structure containing the new RT priority. 5454 * @param: structure containing the new RT priority.
5447 */ 5455 */
5448asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param) 5456SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
5449{ 5457{
5450 return do_sched_setscheduler(pid, -1, param); 5458 return do_sched_setscheduler(pid, -1, param);
5451} 5459}
@@ -5454,7 +5462,7 @@ asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param)
5454 * sys_sched_getscheduler - get the policy (scheduling class) of a thread 5462 * sys_sched_getscheduler - get the policy (scheduling class) of a thread
5455 * @pid: the pid in question. 5463 * @pid: the pid in question.
5456 */ 5464 */
5457asmlinkage long sys_sched_getscheduler(pid_t pid) 5465SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
5458{ 5466{
5459 struct task_struct *p; 5467 struct task_struct *p;
5460 int retval; 5468 int retval;
@@ -5479,7 +5487,7 @@ asmlinkage long sys_sched_getscheduler(pid_t pid)
5479 * @pid: the pid in question. 5487 * @pid: the pid in question.
5480 * @param: structure containing the RT priority. 5488 * @param: structure containing the RT priority.
5481 */ 5489 */
5482asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param) 5490SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
5483{ 5491{
5484 struct sched_param lp; 5492 struct sched_param lp;
5485 struct task_struct *p; 5493 struct task_struct *p;
@@ -5597,8 +5605,8 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
5597 * @len: length in bytes of the bitmask pointed to by user_mask_ptr 5605 * @len: length in bytes of the bitmask pointed to by user_mask_ptr
5598 * @user_mask_ptr: user-space pointer to the new cpu mask 5606 * @user_mask_ptr: user-space pointer to the new cpu mask
5599 */ 5607 */
5600asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, 5608SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
5601 unsigned long __user *user_mask_ptr) 5609 unsigned long __user *, user_mask_ptr)
5602{ 5610{
5603 cpumask_var_t new_mask; 5611 cpumask_var_t new_mask;
5604 int retval; 5612 int retval;
@@ -5645,8 +5653,8 @@ out_unlock:
5645 * @len: length in bytes of the bitmask pointed to by user_mask_ptr 5653 * @len: length in bytes of the bitmask pointed to by user_mask_ptr
5646 * @user_mask_ptr: user-space pointer to hold the current cpu mask 5654 * @user_mask_ptr: user-space pointer to hold the current cpu mask
5647 */ 5655 */
5648asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, 5656SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
5649 unsigned long __user *user_mask_ptr) 5657 unsigned long __user *, user_mask_ptr)
5650{ 5658{
5651 int ret; 5659 int ret;
5652 cpumask_var_t mask; 5660 cpumask_var_t mask;
@@ -5675,7 +5683,7 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
5675 * This function yields the current CPU to other tasks. If there are no 5683 * This function yields the current CPU to other tasks. If there are no
5676 * other threads running on this CPU then this function will return. 5684 * other threads running on this CPU then this function will return.
5677 */ 5685 */
5678asmlinkage long sys_sched_yield(void) 5686SYSCALL_DEFINE0(sched_yield)
5679{ 5687{
5680 struct rq *rq = this_rq_lock(); 5688 struct rq *rq = this_rq_lock();
5681 5689
@@ -5816,7 +5824,7 @@ long __sched io_schedule_timeout(long timeout)
5816 * this syscall returns the maximum rt_priority that can be used 5824 * this syscall returns the maximum rt_priority that can be used
5817 * by a given scheduling class. 5825 * by a given scheduling class.
5818 */ 5826 */
5819asmlinkage long sys_sched_get_priority_max(int policy) 5827SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
5820{ 5828{
5821 int ret = -EINVAL; 5829 int ret = -EINVAL;
5822 5830
@@ -5841,7 +5849,7 @@ asmlinkage long sys_sched_get_priority_max(int policy)
5841 * this syscall returns the minimum rt_priority that can be used 5849 * this syscall returns the minimum rt_priority that can be used
5842 * by a given scheduling class. 5850 * by a given scheduling class.
5843 */ 5851 */
5844asmlinkage long sys_sched_get_priority_min(int policy) 5852SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
5845{ 5853{
5846 int ret = -EINVAL; 5854 int ret = -EINVAL;
5847 5855
@@ -5866,8 +5874,8 @@ asmlinkage long sys_sched_get_priority_min(int policy)
5866 * this syscall writes the default timeslice value of a given process 5874 * this syscall writes the default timeslice value of a given process
5867 * into the user-space timespec buffer. A value of '0' means infinity. 5875 * into the user-space timespec buffer. A value of '0' means infinity.
5868 */ 5876 */
5869asmlinkage 5877SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
5870long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) 5878 struct timespec __user *, interval)
5871{ 5879{
5872 struct task_struct *p; 5880 struct task_struct *p;
5873 unsigned int time_slice; 5881 unsigned int time_slice;
@@ -5936,12 +5944,7 @@ void sched_show_task(struct task_struct *p)
5936 printk(KERN_CONT " %016lx ", thread_saved_pc(p)); 5944 printk(KERN_CONT " %016lx ", thread_saved_pc(p));
5937#endif 5945#endif
5938#ifdef CONFIG_DEBUG_STACK_USAGE 5946#ifdef CONFIG_DEBUG_STACK_USAGE
5939 { 5947 free = stack_not_used(p);
5940 unsigned long *n = end_of_stack(p);
5941 while (!*n)
5942 n++;
5943 free = (unsigned long)n - (unsigned long)end_of_stack(p);
5944 }
5945#endif 5948#endif
5946 printk(KERN_CONT "%5lu %5d %6d\n", free, 5949 printk(KERN_CONT "%5lu %5d %6d\n", free,
5947 task_pid_nr(p), task_pid_nr(p->real_parent)); 5950 task_pid_nr(p), task_pid_nr(p->real_parent));
@@ -6936,20 +6939,26 @@ static void free_rootdomain(struct root_domain *rd)
6936 6939
6937static void rq_attach_root(struct rq *rq, struct root_domain *rd) 6940static void rq_attach_root(struct rq *rq, struct root_domain *rd)
6938{ 6941{
6942 struct root_domain *old_rd = NULL;
6939 unsigned long flags; 6943 unsigned long flags;
6940 6944
6941 spin_lock_irqsave(&rq->lock, flags); 6945 spin_lock_irqsave(&rq->lock, flags);
6942 6946
6943 if (rq->rd) { 6947 if (rq->rd) {
6944 struct root_domain *old_rd = rq->rd; 6948 old_rd = rq->rd;
6945 6949
6946 if (cpumask_test_cpu(rq->cpu, old_rd->online)) 6950 if (cpumask_test_cpu(rq->cpu, old_rd->online))
6947 set_rq_offline(rq); 6951 set_rq_offline(rq);
6948 6952
6949 cpumask_clear_cpu(rq->cpu, old_rd->span); 6953 cpumask_clear_cpu(rq->cpu, old_rd->span);
6950 6954
6951 if (atomic_dec_and_test(&old_rd->refcount)) 6955 /*
6952 free_rootdomain(old_rd); 6956 * If we dont want to free the old_rt yet then
6957 * set old_rd to NULL to skip the freeing later
6958 * in this function:
6959 */
6960 if (!atomic_dec_and_test(&old_rd->refcount))
6961 old_rd = NULL;
6953 } 6962 }
6954 6963
6955 atomic_inc(&rd->refcount); 6964 atomic_inc(&rd->refcount);
@@ -6960,6 +6969,9 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
6960 set_rq_online(rq); 6969 set_rq_online(rq);
6961 6970
6962 spin_unlock_irqrestore(&rq->lock, flags); 6971 spin_unlock_irqrestore(&rq->lock, flags);
6972
6973 if (old_rd)
6974 free_rootdomain(old_rd);
6963} 6975}
6964 6976
6965static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem) 6977static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem)
@@ -7282,10 +7294,10 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
7282 * groups, so roll our own. Now each node has its own list of groups which 7294 * groups, so roll our own. Now each node has its own list of groups which
7283 * gets dynamically allocated. 7295 * gets dynamically allocated.
7284 */ 7296 */
7285static DEFINE_PER_CPU(struct sched_domain, node_domains); 7297static DEFINE_PER_CPU(struct static_sched_domain, node_domains);
7286static struct sched_group ***sched_group_nodes_bycpu; 7298static struct sched_group ***sched_group_nodes_bycpu;
7287 7299
7288static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); 7300static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains);
7289static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes); 7301static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
7290 7302
7291static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map, 7303static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
@@ -7560,7 +7572,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
7560#ifdef CONFIG_NUMA 7572#ifdef CONFIG_NUMA
7561 if (cpumask_weight(cpu_map) > 7573 if (cpumask_weight(cpu_map) >
7562 SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) { 7574 SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) {
7563 sd = &per_cpu(allnodes_domains, i); 7575 sd = &per_cpu(allnodes_domains, i).sd;
7564 SD_INIT(sd, ALLNODES); 7576 SD_INIT(sd, ALLNODES);
7565 set_domain_attribute(sd, attr); 7577 set_domain_attribute(sd, attr);
7566 cpumask_copy(sched_domain_span(sd), cpu_map); 7578 cpumask_copy(sched_domain_span(sd), cpu_map);
@@ -7570,7 +7582,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
7570 } else 7582 } else
7571 p = NULL; 7583 p = NULL;
7572 7584
7573 sd = &per_cpu(node_domains, i); 7585 sd = &per_cpu(node_domains, i).sd;
7574 SD_INIT(sd, NODE); 7586 SD_INIT(sd, NODE);
7575 set_domain_attribute(sd, attr); 7587 set_domain_attribute(sd, attr);
7576 sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd)); 7588 sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
@@ -7688,7 +7700,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
7688 for_each_cpu(j, nodemask) { 7700 for_each_cpu(j, nodemask) {
7689 struct sched_domain *sd; 7701 struct sched_domain *sd;
7690 7702
7691 sd = &per_cpu(node_domains, j); 7703 sd = &per_cpu(node_domains, j).sd;
7692 sd->groups = sg; 7704 sd->groups = sg;
7693 } 7705 }
7694 sg->__cpu_power = 0; 7706 sg->__cpu_power = 0;
@@ -9047,6 +9059,13 @@ static int tg_schedulable(struct task_group *tg, void *data)
9047 runtime = d->rt_runtime; 9059 runtime = d->rt_runtime;
9048 } 9060 }
9049 9061
9062#ifdef CONFIG_USER_SCHED
9063 if (tg == &root_task_group) {
9064 period = global_rt_period();
9065 runtime = global_rt_runtime();
9066 }
9067#endif
9068
9050 /* 9069 /*
9051 * Cannot have more runtime than the period. 9070 * Cannot have more runtime than the period.
9052 */ 9071 */