aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-03-11 05:49:34 -0400
committerIngo Molnar <mingo@elte.hu>2009-03-11 05:49:34 -0400
commitd95c3578120e5bc4784069439f00ccb1b5f87717 (patch)
treec819de31de3983f3d69f223ede07667ff23bf7da /kernel/sched.c
parentba1d755a36f66101aa88ac9ebb54694def6ec38d (diff)
parent78b020d035074fc3aa4d017353bb2c32e2aff56f (diff)
Merge branch 'x86/core' into cpus4096
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c107
1 files changed, 66 insertions, 41 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 8be2c13b50d0..0a76d0b6f215 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -223,7 +223,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
223{ 223{
224 ktime_t now; 224 ktime_t now;
225 225
226 if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF) 226 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
227 return; 227 return;
228 228
229 if (hrtimer_active(&rt_b->rt_period_timer)) 229 if (hrtimer_active(&rt_b->rt_period_timer))
@@ -1323,8 +1323,8 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
1323 * slice expiry etc. 1323 * slice expiry etc.
1324 */ 1324 */
1325 1325
1326#define WEIGHT_IDLEPRIO 2 1326#define WEIGHT_IDLEPRIO 3
1327#define WMULT_IDLEPRIO (1 << 31) 1327#define WMULT_IDLEPRIO 1431655765
1328 1328
1329/* 1329/*
1330 * Nice levels are multiplicative, with a gentle 10% change for every 1330 * Nice levels are multiplicative, with a gentle 10% change for every
@@ -3880,19 +3880,24 @@ int select_nohz_load_balancer(int stop_tick)
3880 int cpu = smp_processor_id(); 3880 int cpu = smp_processor_id();
3881 3881
3882 if (stop_tick) { 3882 if (stop_tick) {
3883 cpumask_set_cpu(cpu, nohz.cpu_mask);
3884 cpu_rq(cpu)->in_nohz_recently = 1; 3883 cpu_rq(cpu)->in_nohz_recently = 1;
3885 3884
3886 /* 3885 if (!cpu_active(cpu)) {
3887 * If we are going offline and still the leader, give up! 3886 if (atomic_read(&nohz.load_balancer) != cpu)
3888 */ 3887 return 0;
3889 if (!cpu_active(cpu) && 3888
3890 atomic_read(&nohz.load_balancer) == cpu) { 3889 /*
3890 * If we are going offline and still the leader,
3891 * give up!
3892 */
3891 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) 3893 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
3892 BUG(); 3894 BUG();
3895
3893 return 0; 3896 return 0;
3894 } 3897 }
3895 3898
3899 cpumask_set_cpu(cpu, nohz.cpu_mask);
3900
3896 /* time for ilb owner also to sleep */ 3901 /* time for ilb owner also to sleep */
3897 if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { 3902 if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
3898 if (atomic_read(&nohz.load_balancer) == cpu) 3903 if (atomic_read(&nohz.load_balancer) == cpu)
@@ -4440,7 +4445,7 @@ void __kprobes sub_preempt_count(int val)
4440 /* 4445 /*
4441 * Underflow? 4446 * Underflow?
4442 */ 4447 */
4443 if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked()))) 4448 if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
4444 return; 4449 return;
4445 /* 4450 /*
4446 * Is the spinlock portion underflowing? 4451 * Is the spinlock portion underflowing?
@@ -4687,8 +4692,8 @@ EXPORT_SYMBOL(default_wake_function);
4687 * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns 4692 * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
4688 * zero in this (rare) case, and we handle it by continuing to scan the queue. 4693 * zero in this (rare) case, and we handle it by continuing to scan the queue.
4689 */ 4694 */
4690static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, 4695void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
4691 int nr_exclusive, int sync, void *key) 4696 int nr_exclusive, int sync, void *key)
4692{ 4697{
4693 wait_queue_t *curr, *next; 4698 wait_queue_t *curr, *next;
4694 4699
@@ -5126,7 +5131,7 @@ int can_nice(const struct task_struct *p, const int nice)
5126 * sys_setpriority is a more generic, but much slower function that 5131 * sys_setpriority is a more generic, but much slower function that
5127 * does similar things. 5132 * does similar things.
5128 */ 5133 */
5129asmlinkage long sys_nice(int increment) 5134SYSCALL_DEFINE1(nice, int, increment)
5130{ 5135{
5131 long nice, retval; 5136 long nice, retval;
5132 5137
@@ -5433,8 +5438,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
5433 * @policy: new policy. 5438 * @policy: new policy.
5434 * @param: structure containing the new RT priority. 5439 * @param: structure containing the new RT priority.
5435 */ 5440 */
5436asmlinkage long 5441SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy,
5437sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) 5442 struct sched_param __user *, param)
5438{ 5443{
5439 /* negative values for policy are not valid */ 5444 /* negative values for policy are not valid */
5440 if (policy < 0) 5445 if (policy < 0)
@@ -5448,7 +5453,7 @@ sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
5448 * @pid: the pid in question. 5453 * @pid: the pid in question.
5449 * @param: structure containing the new RT priority. 5454 * @param: structure containing the new RT priority.
5450 */ 5455 */
5451asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param) 5456SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
5452{ 5457{
5453 return do_sched_setscheduler(pid, -1, param); 5458 return do_sched_setscheduler(pid, -1, param);
5454} 5459}
@@ -5457,7 +5462,7 @@ asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param)
5457 * sys_sched_getscheduler - get the policy (scheduling class) of a thread 5462 * sys_sched_getscheduler - get the policy (scheduling class) of a thread
5458 * @pid: the pid in question. 5463 * @pid: the pid in question.
5459 */ 5464 */
5460asmlinkage long sys_sched_getscheduler(pid_t pid) 5465SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
5461{ 5466{
5462 struct task_struct *p; 5467 struct task_struct *p;
5463 int retval; 5468 int retval;
@@ -5482,7 +5487,7 @@ asmlinkage long sys_sched_getscheduler(pid_t pid)
5482 * @pid: the pid in question. 5487 * @pid: the pid in question.
5483 * @param: structure containing the RT priority. 5488 * @param: structure containing the RT priority.
5484 */ 5489 */
5485asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param) 5490SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
5486{ 5491{
5487 struct sched_param lp; 5492 struct sched_param lp;
5488 struct task_struct *p; 5493 struct task_struct *p;
@@ -5600,8 +5605,8 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
5600 * @len: length in bytes of the bitmask pointed to by user_mask_ptr 5605 * @len: length in bytes of the bitmask pointed to by user_mask_ptr
5601 * @user_mask_ptr: user-space pointer to the new cpu mask 5606 * @user_mask_ptr: user-space pointer to the new cpu mask
5602 */ 5607 */
5603asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, 5608SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
5604 unsigned long __user *user_mask_ptr) 5609 unsigned long __user *, user_mask_ptr)
5605{ 5610{
5606 cpumask_var_t new_mask; 5611 cpumask_var_t new_mask;
5607 int retval; 5612 int retval;
@@ -5648,8 +5653,8 @@ out_unlock:
5648 * @len: length in bytes of the bitmask pointed to by user_mask_ptr 5653 * @len: length in bytes of the bitmask pointed to by user_mask_ptr
5649 * @user_mask_ptr: user-space pointer to hold the current cpu mask 5654 * @user_mask_ptr: user-space pointer to hold the current cpu mask
5650 */ 5655 */
5651asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, 5656SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
5652 unsigned long __user *user_mask_ptr) 5657 unsigned long __user *, user_mask_ptr)
5653{ 5658{
5654 int ret; 5659 int ret;
5655 cpumask_var_t mask; 5660 cpumask_var_t mask;
@@ -5678,7 +5683,7 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
5678 * This function yields the current CPU to other tasks. If there are no 5683 * This function yields the current CPU to other tasks. If there are no
5679 * other threads running on this CPU then this function will return. 5684 * other threads running on this CPU then this function will return.
5680 */ 5685 */
5681asmlinkage long sys_sched_yield(void) 5686SYSCALL_DEFINE0(sched_yield)
5682{ 5687{
5683 struct rq *rq = this_rq_lock(); 5688 struct rq *rq = this_rq_lock();
5684 5689
@@ -5819,7 +5824,7 @@ long __sched io_schedule_timeout(long timeout)
5819 * this syscall returns the maximum rt_priority that can be used 5824 * this syscall returns the maximum rt_priority that can be used
5820 * by a given scheduling class. 5825 * by a given scheduling class.
5821 */ 5826 */
5822asmlinkage long sys_sched_get_priority_max(int policy) 5827SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
5823{ 5828{
5824 int ret = -EINVAL; 5829 int ret = -EINVAL;
5825 5830
@@ -5844,7 +5849,7 @@ asmlinkage long sys_sched_get_priority_max(int policy)
5844 * this syscall returns the minimum rt_priority that can be used 5849 * this syscall returns the minimum rt_priority that can be used
5845 * by a given scheduling class. 5850 * by a given scheduling class.
5846 */ 5851 */
5847asmlinkage long sys_sched_get_priority_min(int policy) 5852SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
5848{ 5853{
5849 int ret = -EINVAL; 5854 int ret = -EINVAL;
5850 5855
@@ -5869,8 +5874,8 @@ asmlinkage long sys_sched_get_priority_min(int policy)
5869 * this syscall writes the default timeslice value of a given process 5874 * this syscall writes the default timeslice value of a given process
5870 * into the user-space timespec buffer. A value of '0' means infinity. 5875 * into the user-space timespec buffer. A value of '0' means infinity.
5871 */ 5876 */
5872asmlinkage 5877SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
5873long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) 5878 struct timespec __user *, interval)
5874{ 5879{
5875 struct task_struct *p; 5880 struct task_struct *p;
5876 unsigned int time_slice; 5881 unsigned int time_slice;
@@ -5939,12 +5944,7 @@ void sched_show_task(struct task_struct *p)
5939 printk(KERN_CONT " %016lx ", thread_saved_pc(p)); 5944 printk(KERN_CONT " %016lx ", thread_saved_pc(p));
5940#endif 5945#endif
5941#ifdef CONFIG_DEBUG_STACK_USAGE 5946#ifdef CONFIG_DEBUG_STACK_USAGE
5942 { 5947 free = stack_not_used(p);
5943 unsigned long *n = end_of_stack(p);
5944 while (!*n)
5945 n++;
5946 free = (unsigned long)n - (unsigned long)end_of_stack(p);
5947 }
5948#endif 5948#endif
5949 printk(KERN_CONT "%5lu %5d %6d\n", free, 5949 printk(KERN_CONT "%5lu %5d %6d\n", free,
5950 task_pid_nr(p), task_pid_nr(p->real_parent)); 5950 task_pid_nr(p), task_pid_nr(p->real_parent));
@@ -6939,20 +6939,26 @@ static void free_rootdomain(struct root_domain *rd)
6939 6939
6940static void rq_attach_root(struct rq *rq, struct root_domain *rd) 6940static void rq_attach_root(struct rq *rq, struct root_domain *rd)
6941{ 6941{
6942 struct root_domain *old_rd = NULL;
6942 unsigned long flags; 6943 unsigned long flags;
6943 6944
6944 spin_lock_irqsave(&rq->lock, flags); 6945 spin_lock_irqsave(&rq->lock, flags);
6945 6946
6946 if (rq->rd) { 6947 if (rq->rd) {
6947 struct root_domain *old_rd = rq->rd; 6948 old_rd = rq->rd;
6948 6949
6949 if (cpumask_test_cpu(rq->cpu, old_rd->online)) 6950 if (cpumask_test_cpu(rq->cpu, old_rd->online))
6950 set_rq_offline(rq); 6951 set_rq_offline(rq);
6951 6952
6952 cpumask_clear_cpu(rq->cpu, old_rd->span); 6953 cpumask_clear_cpu(rq->cpu, old_rd->span);
6953 6954
6954 if (atomic_dec_and_test(&old_rd->refcount)) 6955 /*
6955 free_rootdomain(old_rd); 6956 * If we dont want to free the old_rt yet then
6957 * set old_rd to NULL to skip the freeing later
6958 * in this function:
6959 */
6960 if (!atomic_dec_and_test(&old_rd->refcount))
6961 old_rd = NULL;
6956 } 6962 }
6957 6963
6958 atomic_inc(&rd->refcount); 6964 atomic_inc(&rd->refcount);
@@ -6963,6 +6969,9 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
6963 set_rq_online(rq); 6969 set_rq_online(rq);
6964 6970
6965 spin_unlock_irqrestore(&rq->lock, flags); 6971 spin_unlock_irqrestore(&rq->lock, flags);
6972
6973 if (old_rd)
6974 free_rootdomain(old_rd);
6966} 6975}
6967 6976
6968static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem) 6977static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem)
@@ -9050,6 +9059,13 @@ static int tg_schedulable(struct task_group *tg, void *data)
9050 runtime = d->rt_runtime; 9059 runtime = d->rt_runtime;
9051 } 9060 }
9052 9061
9062#ifdef CONFIG_USER_SCHED
9063 if (tg == &root_task_group) {
9064 period = global_rt_period();
9065 runtime = global_rt_runtime();
9066 }
9067#endif
9068
9053 /* 9069 /*
9054 * Cannot have more runtime than the period. 9070 * Cannot have more runtime than the period.
9055 */ 9071 */
@@ -9203,6 +9219,16 @@ static int sched_rt_global_constraints(void)
9203 9219
9204 return ret; 9220 return ret;
9205} 9221}
9222
9223int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
9224{
9225 /* Don't accept realtime tasks when there is no way for them to run */
9226 if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
9227 return 0;
9228
9229 return 1;
9230}
9231
9206#else /* !CONFIG_RT_GROUP_SCHED */ 9232#else /* !CONFIG_RT_GROUP_SCHED */
9207static int sched_rt_global_constraints(void) 9233static int sched_rt_global_constraints(void)
9208{ 9234{
@@ -9296,8 +9322,7 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
9296 struct task_struct *tsk) 9322 struct task_struct *tsk)
9297{ 9323{
9298#ifdef CONFIG_RT_GROUP_SCHED 9324#ifdef CONFIG_RT_GROUP_SCHED
9299 /* Don't accept realtime tasks when there is no way for them to run */ 9325 if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk))
9300 if (rt_task(tsk) && cgroup_tg(cgrp)->rt_bandwidth.rt_runtime == 0)
9301 return -EINVAL; 9326 return -EINVAL;
9302#else 9327#else
9303 /* We don't support RT-tasks being in separate groups */ 9328 /* We don't support RT-tasks being in separate groups */
@@ -9460,7 +9485,7 @@ cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
9460 9485
9461static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu) 9486static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
9462{ 9487{
9463 u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu); 9488 u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
9464 u64 data; 9489 u64 data;
9465 9490
9466#ifndef CONFIG_64BIT 9491#ifndef CONFIG_64BIT
@@ -9479,7 +9504,7 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
9479 9504
9480static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) 9505static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
9481{ 9506{
9482 u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu); 9507 u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
9483 9508
9484#ifndef CONFIG_64BIT 9509#ifndef CONFIG_64BIT
9485 /* 9510 /*
@@ -9575,7 +9600,7 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
9575 ca = task_ca(tsk); 9600 ca = task_ca(tsk);
9576 9601
9577 for (; ca; ca = ca->parent) { 9602 for (; ca; ca = ca->parent) {
9578 u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu); 9603 u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
9579 *cpuusage += cputime; 9604 *cpuusage += cputime;
9580 } 9605 }
9581} 9606}