aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c84
1 files changed, 47 insertions, 37 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index deb5ac8c12f3..61245b8d0f16 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -125,6 +125,9 @@ DEFINE_TRACE(sched_switch);
125DEFINE_TRACE(sched_migrate_task); 125DEFINE_TRACE(sched_migrate_task);
126 126
127#ifdef CONFIG_SMP 127#ifdef CONFIG_SMP
128
129static void double_rq_lock(struct rq *rq1, struct rq *rq2);
130
128/* 131/*
129 * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) 132 * Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
130 * Since cpu_power is a 'constant', we can use a reciprocal divide. 133 * Since cpu_power is a 'constant', we can use a reciprocal divide.
@@ -1320,8 +1323,8 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
1320 * slice expiry etc. 1323 * slice expiry etc.
1321 */ 1324 */
1322 1325
1323#define WEIGHT_IDLEPRIO 2 1326#define WEIGHT_IDLEPRIO 3
1324#define WMULT_IDLEPRIO (1 << 31) 1327#define WMULT_IDLEPRIO 1431655765
1325 1328
1326/* 1329/*
1327 * Nice levels are multiplicative, with a gentle 10% change for every 1330 * Nice levels are multiplicative, with a gentle 10% change for every
@@ -3877,19 +3880,24 @@ int select_nohz_load_balancer(int stop_tick)
3877 int cpu = smp_processor_id(); 3880 int cpu = smp_processor_id();
3878 3881
3879 if (stop_tick) { 3882 if (stop_tick) {
3880 cpumask_set_cpu(cpu, nohz.cpu_mask);
3881 cpu_rq(cpu)->in_nohz_recently = 1; 3883 cpu_rq(cpu)->in_nohz_recently = 1;
3882 3884
3883 /* 3885 if (!cpu_active(cpu)) {
3884 * If we are going offline and still the leader, give up! 3886 if (atomic_read(&nohz.load_balancer) != cpu)
3885 */ 3887 return 0;
3886 if (!cpu_active(cpu) && 3888
3887 atomic_read(&nohz.load_balancer) == cpu) { 3889 /*
3890 * If we are going offline and still the leader,
3891 * give up!
3892 */
3888 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) 3893 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
3889 BUG(); 3894 BUG();
3895
3890 return 0; 3896 return 0;
3891 } 3897 }
3892 3898
3899 cpumask_set_cpu(cpu, nohz.cpu_mask);
3900
3893 /* time for ilb owner also to sleep */ 3901 /* time for ilb owner also to sleep */
3894 if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { 3902 if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
3895 if (atomic_read(&nohz.load_balancer) == cpu) 3903 if (atomic_read(&nohz.load_balancer) == cpu)
@@ -4437,7 +4445,7 @@ void __kprobes sub_preempt_count(int val)
4437 /* 4445 /*
4438 * Underflow? 4446 * Underflow?
4439 */ 4447 */
4440 if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked()))) 4448 if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
4441 return; 4449 return;
4442 /* 4450 /*
4443 * Is the spinlock portion underflowing? 4451 * Is the spinlock portion underflowing?
@@ -4684,8 +4692,8 @@ EXPORT_SYMBOL(default_wake_function);
4684 * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns 4692 * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
4685 * zero in this (rare) case, and we handle it by continuing to scan the queue. 4693 * zero in this (rare) case, and we handle it by continuing to scan the queue.
4686 */ 4694 */
4687static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, 4695void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
4688 int nr_exclusive, int sync, void *key) 4696 int nr_exclusive, int sync, void *key)
4689{ 4697{
4690 wait_queue_t *curr, *next; 4698 wait_queue_t *curr, *next;
4691 4699
@@ -5123,7 +5131,7 @@ int can_nice(const struct task_struct *p, const int nice)
5123 * sys_setpriority is a more generic, but much slower function that 5131 * sys_setpriority is a more generic, but much slower function that
5124 * does similar things. 5132 * does similar things.
5125 */ 5133 */
5126asmlinkage long sys_nice(int increment) 5134SYSCALL_DEFINE1(nice, int, increment)
5127{ 5135{
5128 long nice, retval; 5136 long nice, retval;
5129 5137
@@ -5430,8 +5438,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
5430 * @policy: new policy. 5438 * @policy: new policy.
5431 * @param: structure containing the new RT priority. 5439 * @param: structure containing the new RT priority.
5432 */ 5440 */
5433asmlinkage long 5441SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy,
5434sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) 5442 struct sched_param __user *, param)
5435{ 5443{
5436 /* negative values for policy are not valid */ 5444 /* negative values for policy are not valid */
5437 if (policy < 0) 5445 if (policy < 0)
@@ -5445,7 +5453,7 @@ sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
5445 * @pid: the pid in question. 5453 * @pid: the pid in question.
5446 * @param: structure containing the new RT priority. 5454 * @param: structure containing the new RT priority.
5447 */ 5455 */
5448asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param) 5456SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
5449{ 5457{
5450 return do_sched_setscheduler(pid, -1, param); 5458 return do_sched_setscheduler(pid, -1, param);
5451} 5459}
@@ -5454,7 +5462,7 @@ asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param)
5454 * sys_sched_getscheduler - get the policy (scheduling class) of a thread 5462 * sys_sched_getscheduler - get the policy (scheduling class) of a thread
5455 * @pid: the pid in question. 5463 * @pid: the pid in question.
5456 */ 5464 */
5457asmlinkage long sys_sched_getscheduler(pid_t pid) 5465SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
5458{ 5466{
5459 struct task_struct *p; 5467 struct task_struct *p;
5460 int retval; 5468 int retval;
@@ -5479,7 +5487,7 @@ asmlinkage long sys_sched_getscheduler(pid_t pid)
5479 * @pid: the pid in question. 5487 * @pid: the pid in question.
5480 * @param: structure containing the RT priority. 5488 * @param: structure containing the RT priority.
5481 */ 5489 */
5482asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param) 5490SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
5483{ 5491{
5484 struct sched_param lp; 5492 struct sched_param lp;
5485 struct task_struct *p; 5493 struct task_struct *p;
@@ -5597,8 +5605,8 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
5597 * @len: length in bytes of the bitmask pointed to by user_mask_ptr 5605 * @len: length in bytes of the bitmask pointed to by user_mask_ptr
5598 * @user_mask_ptr: user-space pointer to the new cpu mask 5606 * @user_mask_ptr: user-space pointer to the new cpu mask
5599 */ 5607 */
5600asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, 5608SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
5601 unsigned long __user *user_mask_ptr) 5609 unsigned long __user *, user_mask_ptr)
5602{ 5610{
5603 cpumask_var_t new_mask; 5611 cpumask_var_t new_mask;
5604 int retval; 5612 int retval;
@@ -5645,8 +5653,8 @@ out_unlock:
5645 * @len: length in bytes of the bitmask pointed to by user_mask_ptr 5653 * @len: length in bytes of the bitmask pointed to by user_mask_ptr
5646 * @user_mask_ptr: user-space pointer to hold the current cpu mask 5654 * @user_mask_ptr: user-space pointer to hold the current cpu mask
5647 */ 5655 */
5648asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, 5656SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
5649 unsigned long __user *user_mask_ptr) 5657 unsigned long __user *, user_mask_ptr)
5650{ 5658{
5651 int ret; 5659 int ret;
5652 cpumask_var_t mask; 5660 cpumask_var_t mask;
@@ -5675,7 +5683,7 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
5675 * This function yields the current CPU to other tasks. If there are no 5683 * This function yields the current CPU to other tasks. If there are no
5676 * other threads running on this CPU then this function will return. 5684 * other threads running on this CPU then this function will return.
5677 */ 5685 */
5678asmlinkage long sys_sched_yield(void) 5686SYSCALL_DEFINE0(sched_yield)
5679{ 5687{
5680 struct rq *rq = this_rq_lock(); 5688 struct rq *rq = this_rq_lock();
5681 5689
@@ -5816,7 +5824,7 @@ long __sched io_schedule_timeout(long timeout)
5816 * this syscall returns the maximum rt_priority that can be used 5824 * this syscall returns the maximum rt_priority that can be used
5817 * by a given scheduling class. 5825 * by a given scheduling class.
5818 */ 5826 */
5819asmlinkage long sys_sched_get_priority_max(int policy) 5827SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
5820{ 5828{
5821 int ret = -EINVAL; 5829 int ret = -EINVAL;
5822 5830
@@ -5841,7 +5849,7 @@ asmlinkage long sys_sched_get_priority_max(int policy)
5841 * this syscall returns the minimum rt_priority that can be used 5849 * this syscall returns the minimum rt_priority that can be used
5842 * by a given scheduling class. 5850 * by a given scheduling class.
5843 */ 5851 */
5844asmlinkage long sys_sched_get_priority_min(int policy) 5852SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
5845{ 5853{
5846 int ret = -EINVAL; 5854 int ret = -EINVAL;
5847 5855
@@ -5866,8 +5874,8 @@ asmlinkage long sys_sched_get_priority_min(int policy)
5866 * this syscall writes the default timeslice value of a given process 5874 * this syscall writes the default timeslice value of a given process
5867 * into the user-space timespec buffer. A value of '0' means infinity. 5875 * into the user-space timespec buffer. A value of '0' means infinity.
5868 */ 5876 */
5869asmlinkage 5877SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
5870long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) 5878 struct timespec __user *, interval)
5871{ 5879{
5872 struct task_struct *p; 5880 struct task_struct *p;
5873 unsigned int time_slice; 5881 unsigned int time_slice;
@@ -5936,12 +5944,7 @@ void sched_show_task(struct task_struct *p)
5936 printk(KERN_CONT " %016lx ", thread_saved_pc(p)); 5944 printk(KERN_CONT " %016lx ", thread_saved_pc(p));
5937#endif 5945#endif
5938#ifdef CONFIG_DEBUG_STACK_USAGE 5946#ifdef CONFIG_DEBUG_STACK_USAGE
5939 { 5947 free = stack_not_used(p);
5940 unsigned long *n = end_of_stack(p);
5941 while (!*n)
5942 n++;
5943 free = (unsigned long)n - (unsigned long)end_of_stack(p);
5944 }
5945#endif 5948#endif
5946 printk(KERN_CONT "%5lu %5d %6d\n", free, 5949 printk(KERN_CONT "%5lu %5d %6d\n", free,
5947 task_pid_nr(p), task_pid_nr(p->real_parent)); 5950 task_pid_nr(p), task_pid_nr(p->real_parent));
@@ -7282,10 +7285,10 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
7282 * groups, so roll our own. Now each node has its own list of groups which 7285 * groups, so roll our own. Now each node has its own list of groups which
7283 * gets dynamically allocated. 7286 * gets dynamically allocated.
7284 */ 7287 */
7285static DEFINE_PER_CPU(struct sched_domain, node_domains); 7288static DEFINE_PER_CPU(struct static_sched_domain, node_domains);
7286static struct sched_group ***sched_group_nodes_bycpu; 7289static struct sched_group ***sched_group_nodes_bycpu;
7287 7290
7288static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); 7291static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains);
7289static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes); 7292static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
7290 7293
7291static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map, 7294static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
@@ -7560,7 +7563,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
7560#ifdef CONFIG_NUMA 7563#ifdef CONFIG_NUMA
7561 if (cpumask_weight(cpu_map) > 7564 if (cpumask_weight(cpu_map) >
7562 SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) { 7565 SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) {
7563 sd = &per_cpu(allnodes_domains, i); 7566 sd = &per_cpu(allnodes_domains, i).sd;
7564 SD_INIT(sd, ALLNODES); 7567 SD_INIT(sd, ALLNODES);
7565 set_domain_attribute(sd, attr); 7568 set_domain_attribute(sd, attr);
7566 cpumask_copy(sched_domain_span(sd), cpu_map); 7569 cpumask_copy(sched_domain_span(sd), cpu_map);
@@ -7570,7 +7573,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
7570 } else 7573 } else
7571 p = NULL; 7574 p = NULL;
7572 7575
7573 sd = &per_cpu(node_domains, i); 7576 sd = &per_cpu(node_domains, i).sd;
7574 SD_INIT(sd, NODE); 7577 SD_INIT(sd, NODE);
7575 set_domain_attribute(sd, attr); 7578 set_domain_attribute(sd, attr);
7576 sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd)); 7579 sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
@@ -7688,7 +7691,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
7688 for_each_cpu(j, nodemask) { 7691 for_each_cpu(j, nodemask) {
7689 struct sched_domain *sd; 7692 struct sched_domain *sd;
7690 7693
7691 sd = &per_cpu(node_domains, j); 7694 sd = &per_cpu(node_domains, j).sd;
7692 sd->groups = sg; 7695 sd->groups = sg;
7693 } 7696 }
7694 sg->__cpu_power = 0; 7697 sg->__cpu_power = 0;
@@ -9047,6 +9050,13 @@ static int tg_schedulable(struct task_group *tg, void *data)
9047 runtime = d->rt_runtime; 9050 runtime = d->rt_runtime;
9048 } 9051 }
9049 9052
9053#ifdef CONFIG_USER_SCHED
9054 if (tg == &root_task_group) {
9055 period = global_rt_period();
9056 runtime = global_rt_runtime();
9057 }
9058#endif
9059
9050 /* 9060 /*
9051 * Cannot have more runtime than the period. 9061 * Cannot have more runtime than the period.
9052 */ 9062 */