aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c87
1 files changed, 56 insertions, 31 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index deb5ac8c12f3..e72485033c48 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -125,6 +125,9 @@ DEFINE_TRACE(sched_switch);
125DEFINE_TRACE(sched_migrate_task); 125DEFINE_TRACE(sched_migrate_task);
126 126
127#ifdef CONFIG_SMP 127#ifdef CONFIG_SMP
128
129static void double_rq_lock(struct rq *rq1, struct rq *rq2);
130
128/* 131/*
129 * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) 132 * Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
130 * Since cpu_power is a 'constant', we can use a reciprocal divide. 133 * Since cpu_power is a 'constant', we can use a reciprocal divide.
@@ -1320,8 +1323,8 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
1320 * slice expiry etc. 1323 * slice expiry etc.
1321 */ 1324 */
1322 1325
1323#define WEIGHT_IDLEPRIO 2 1326#define WEIGHT_IDLEPRIO 3
1324#define WMULT_IDLEPRIO (1 << 31) 1327#define WMULT_IDLEPRIO 1431655765
1325 1328
1326/* 1329/*
1327 * Nice levels are multiplicative, with a gentle 10% change for every 1330 * Nice levels are multiplicative, with a gentle 10% change for every
@@ -2263,6 +2266,16 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
2263 if (!sched_feat(SYNC_WAKEUPS)) 2266 if (!sched_feat(SYNC_WAKEUPS))
2264 sync = 0; 2267 sync = 0;
2265 2268
2269 if (!sync) {
2270 if (current->se.avg_overlap < sysctl_sched_migration_cost &&
2271 p->se.avg_overlap < sysctl_sched_migration_cost)
2272 sync = 1;
2273 } else {
2274 if (current->se.avg_overlap >= sysctl_sched_migration_cost ||
2275 p->se.avg_overlap >= sysctl_sched_migration_cost)
2276 sync = 0;
2277 }
2278
2266#ifdef CONFIG_SMP 2279#ifdef CONFIG_SMP
2267 if (sched_feat(LB_WAKEUP_UPDATE)) { 2280 if (sched_feat(LB_WAKEUP_UPDATE)) {
2268 struct sched_domain *sd; 2281 struct sched_domain *sd;
@@ -3877,19 +3890,24 @@ int select_nohz_load_balancer(int stop_tick)
3877 int cpu = smp_processor_id(); 3890 int cpu = smp_processor_id();
3878 3891
3879 if (stop_tick) { 3892 if (stop_tick) {
3880 cpumask_set_cpu(cpu, nohz.cpu_mask);
3881 cpu_rq(cpu)->in_nohz_recently = 1; 3893 cpu_rq(cpu)->in_nohz_recently = 1;
3882 3894
3883 /* 3895 if (!cpu_active(cpu)) {
3884 * If we are going offline and still the leader, give up! 3896 if (atomic_read(&nohz.load_balancer) != cpu)
3885 */ 3897 return 0;
3886 if (!cpu_active(cpu) && 3898
3887 atomic_read(&nohz.load_balancer) == cpu) { 3899 /*
3900 * If we are going offline and still the leader,
3901 * give up!
3902 */
3888 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) 3903 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
3889 BUG(); 3904 BUG();
3905
3890 return 0; 3906 return 0;
3891 } 3907 }
3892 3908
3909 cpumask_set_cpu(cpu, nohz.cpu_mask);
3910
3893 /* time for ilb owner also to sleep */ 3911 /* time for ilb owner also to sleep */
3894 if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { 3912 if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
3895 if (atomic_read(&nohz.load_balancer) == cpu) 3913 if (atomic_read(&nohz.load_balancer) == cpu)
@@ -4437,7 +4455,7 @@ void __kprobes sub_preempt_count(int val)
4437 /* 4455 /*
4438 * Underflow? 4456 * Underflow?
4439 */ 4457 */
4440 if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked()))) 4458 if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
4441 return; 4459 return;
4442 /* 4460 /*
4443 * Is the spinlock portion underflowing? 4461 * Is the spinlock portion underflowing?
@@ -4684,8 +4702,8 @@ EXPORT_SYMBOL(default_wake_function);
4684 * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns 4702 * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
4685 * zero in this (rare) case, and we handle it by continuing to scan the queue. 4703 * zero in this (rare) case, and we handle it by continuing to scan the queue.
4686 */ 4704 */
4687static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, 4705void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
4688 int nr_exclusive, int sync, void *key) 4706 int nr_exclusive, int sync, void *key)
4689{ 4707{
4690 wait_queue_t *curr, *next; 4708 wait_queue_t *curr, *next;
4691 4709
@@ -5123,7 +5141,7 @@ int can_nice(const struct task_struct *p, const int nice)
5123 * sys_setpriority is a more generic, but much slower function that 5141 * sys_setpriority is a more generic, but much slower function that
5124 * does similar things. 5142 * does similar things.
5125 */ 5143 */
5126asmlinkage long sys_nice(int increment) 5144SYSCALL_DEFINE1(nice, int, increment)
5127{ 5145{
5128 long nice, retval; 5146 long nice, retval;
5129 5147
@@ -5430,8 +5448,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
5430 * @policy: new policy. 5448 * @policy: new policy.
5431 * @param: structure containing the new RT priority. 5449 * @param: structure containing the new RT priority.
5432 */ 5450 */
5433asmlinkage long 5451SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy,
5434sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) 5452 struct sched_param __user *, param)
5435{ 5453{
5436 /* negative values for policy are not valid */ 5454 /* negative values for policy are not valid */
5437 if (policy < 0) 5455 if (policy < 0)
@@ -5445,7 +5463,7 @@ sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
5445 * @pid: the pid in question. 5463 * @pid: the pid in question.
5446 * @param: structure containing the new RT priority. 5464 * @param: structure containing the new RT priority.
5447 */ 5465 */
5448asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param) 5466SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
5449{ 5467{
5450 return do_sched_setscheduler(pid, -1, param); 5468 return do_sched_setscheduler(pid, -1, param);
5451} 5469}
@@ -5454,7 +5472,7 @@ asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param)
5454 * sys_sched_getscheduler - get the policy (scheduling class) of a thread 5472 * sys_sched_getscheduler - get the policy (scheduling class) of a thread
5455 * @pid: the pid in question. 5473 * @pid: the pid in question.
5456 */ 5474 */
5457asmlinkage long sys_sched_getscheduler(pid_t pid) 5475SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
5458{ 5476{
5459 struct task_struct *p; 5477 struct task_struct *p;
5460 int retval; 5478 int retval;
@@ -5479,7 +5497,7 @@ asmlinkage long sys_sched_getscheduler(pid_t pid)
5479 * @pid: the pid in question. 5497 * @pid: the pid in question.
5480 * @param: structure containing the RT priority. 5498 * @param: structure containing the RT priority.
5481 */ 5499 */
5482asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param) 5500SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
5483{ 5501{
5484 struct sched_param lp; 5502 struct sched_param lp;
5485 struct task_struct *p; 5503 struct task_struct *p;
@@ -5597,8 +5615,8 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
5597 * @len: length in bytes of the bitmask pointed to by user_mask_ptr 5615 * @len: length in bytes of the bitmask pointed to by user_mask_ptr
5598 * @user_mask_ptr: user-space pointer to the new cpu mask 5616 * @user_mask_ptr: user-space pointer to the new cpu mask
5599 */ 5617 */
5600asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, 5618SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
5601 unsigned long __user *user_mask_ptr) 5619 unsigned long __user *, user_mask_ptr)
5602{ 5620{
5603 cpumask_var_t new_mask; 5621 cpumask_var_t new_mask;
5604 int retval; 5622 int retval;
@@ -5645,8 +5663,8 @@ out_unlock:
5645 * @len: length in bytes of the bitmask pointed to by user_mask_ptr 5663 * @len: length in bytes of the bitmask pointed to by user_mask_ptr
5646 * @user_mask_ptr: user-space pointer to hold the current cpu mask 5664 * @user_mask_ptr: user-space pointer to hold the current cpu mask
5647 */ 5665 */
5648asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, 5666SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
5649 unsigned long __user *user_mask_ptr) 5667 unsigned long __user *, user_mask_ptr)
5650{ 5668{
5651 int ret; 5669 int ret;
5652 cpumask_var_t mask; 5670 cpumask_var_t mask;
@@ -5675,7 +5693,7 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
5675 * This function yields the current CPU to other tasks. If there are no 5693 * This function yields the current CPU to other tasks. If there are no
5676 * other threads running on this CPU then this function will return. 5694 * other threads running on this CPU then this function will return.
5677 */ 5695 */
5678asmlinkage long sys_sched_yield(void) 5696SYSCALL_DEFINE0(sched_yield)
5679{ 5697{
5680 struct rq *rq = this_rq_lock(); 5698 struct rq *rq = this_rq_lock();
5681 5699
@@ -5816,7 +5834,7 @@ long __sched io_schedule_timeout(long timeout)
5816 * this syscall returns the maximum rt_priority that can be used 5834 * this syscall returns the maximum rt_priority that can be used
5817 * by a given scheduling class. 5835 * by a given scheduling class.
5818 */ 5836 */
5819asmlinkage long sys_sched_get_priority_max(int policy) 5837SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
5820{ 5838{
5821 int ret = -EINVAL; 5839 int ret = -EINVAL;
5822 5840
@@ -5841,7 +5859,7 @@ asmlinkage long sys_sched_get_priority_max(int policy)
5841 * this syscall returns the minimum rt_priority that can be used 5859 * this syscall returns the minimum rt_priority that can be used
5842 * by a given scheduling class. 5860 * by a given scheduling class.
5843 */ 5861 */
5844asmlinkage long sys_sched_get_priority_min(int policy) 5862SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
5845{ 5863{
5846 int ret = -EINVAL; 5864 int ret = -EINVAL;
5847 5865
@@ -5866,8 +5884,8 @@ asmlinkage long sys_sched_get_priority_min(int policy)
5866 * this syscall writes the default timeslice value of a given process 5884 * this syscall writes the default timeslice value of a given process
5867 * into the user-space timespec buffer. A value of '0' means infinity. 5885 * into the user-space timespec buffer. A value of '0' means infinity.
5868 */ 5886 */
5869asmlinkage 5887SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
5870long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) 5888 struct timespec __user *, interval)
5871{ 5889{
5872 struct task_struct *p; 5890 struct task_struct *p;
5873 unsigned int time_slice; 5891 unsigned int time_slice;
@@ -7282,10 +7300,10 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
7282 * groups, so roll our own. Now each node has its own list of groups which 7300 * groups, so roll our own. Now each node has its own list of groups which
7283 * gets dynamically allocated. 7301 * gets dynamically allocated.
7284 */ 7302 */
7285static DEFINE_PER_CPU(struct sched_domain, node_domains); 7303static DEFINE_PER_CPU(struct static_sched_domain, node_domains);
7286static struct sched_group ***sched_group_nodes_bycpu; 7304static struct sched_group ***sched_group_nodes_bycpu;
7287 7305
7288static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); 7306static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains);
7289static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes); 7307static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
7290 7308
7291static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map, 7309static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
@@ -7560,7 +7578,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
7560#ifdef CONFIG_NUMA 7578#ifdef CONFIG_NUMA
7561 if (cpumask_weight(cpu_map) > 7579 if (cpumask_weight(cpu_map) >
7562 SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) { 7580 SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) {
7563 sd = &per_cpu(allnodes_domains, i); 7581 sd = &per_cpu(allnodes_domains, i).sd;
7564 SD_INIT(sd, ALLNODES); 7582 SD_INIT(sd, ALLNODES);
7565 set_domain_attribute(sd, attr); 7583 set_domain_attribute(sd, attr);
7566 cpumask_copy(sched_domain_span(sd), cpu_map); 7584 cpumask_copy(sched_domain_span(sd), cpu_map);
@@ -7570,7 +7588,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
7570 } else 7588 } else
7571 p = NULL; 7589 p = NULL;
7572 7590
7573 sd = &per_cpu(node_domains, i); 7591 sd = &per_cpu(node_domains, i).sd;
7574 SD_INIT(sd, NODE); 7592 SD_INIT(sd, NODE);
7575 set_domain_attribute(sd, attr); 7593 set_domain_attribute(sd, attr);
7576 sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd)); 7594 sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
@@ -7688,7 +7706,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
7688 for_each_cpu(j, nodemask) { 7706 for_each_cpu(j, nodemask) {
7689 struct sched_domain *sd; 7707 struct sched_domain *sd;
7690 7708
7691 sd = &per_cpu(node_domains, j); 7709 sd = &per_cpu(node_domains, j).sd;
7692 sd->groups = sg; 7710 sd->groups = sg;
7693 } 7711 }
7694 sg->__cpu_power = 0; 7712 sg->__cpu_power = 0;
@@ -9047,6 +9065,13 @@ static int tg_schedulable(struct task_group *tg, void *data)
9047 runtime = d->rt_runtime; 9065 runtime = d->rt_runtime;
9048 } 9066 }
9049 9067
9068#ifdef CONFIG_USER_SCHED
9069 if (tg == &root_task_group) {
9070 period = global_rt_period();
9071 runtime = global_rt_runtime();
9072 }
9073#endif
9074
9050 /* 9075 /*
9051 * Cannot have more runtime than the period. 9076 * Cannot have more runtime than the period.
9052 */ 9077 */