aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>2009-04-07 16:34:16 -0400
committerJeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>2009-04-07 16:34:16 -0400
commit38f4b8c0da01ae7cd9b93386842ce272d6fde9ab (patch)
tree3c8c52201aac038094bfea7efdd0984a8f62045e /kernel/sched.c
parenta811454027352c762e0d5bba1b1d8f7d26bf96ae (diff)
parent8e2c4f2844c0e8dcdfe312e5f2204854ca8532c6 (diff)
Merge commit 'origin/master' into for-linus/xen/master
* commit 'origin/master': (4825 commits) Fix build errors due to CONFIG_BRANCH_TRACER=y parport: Use the PCI IRQ if offered tty: jsm cleanups Adjust path to gpio headers KGDB_SERIAL_CONSOLE check for module Change KCONFIG name tty: Blackin CTS/RTS Change hardware flow control from poll to interrupt driven Add support for the MAX3100 SPI UART. lanana: assign a device name and numbering for MAX3100 serqt: initial clean up pass for tty side tty: Use the generic RS485 ioctl on CRIS tty: Correct inline types for tty_driver_kref_get() splice: fix deadlock in splicing to file nilfs2: support nanosecond timestamp nilfs2: introduce secondary super block nilfs2: simplify handling of active state of segments nilfs2: mark minor flag for checkpoint created by internal operation nilfs2: clean up sketch file nilfs2: super block operations fix endian bug ... Conflicts: arch/x86/include/asm/thread_info.h arch/x86/lguest/boot.c drivers/xen/manage.c
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c161
1 files changed, 125 insertions, 36 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 133762aece50..b38bd96098f6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -231,13 +231,20 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
231 231
232 spin_lock(&rt_b->rt_runtime_lock); 232 spin_lock(&rt_b->rt_runtime_lock);
233 for (;;) { 233 for (;;) {
234 unsigned long delta;
235 ktime_t soft, hard;
236
234 if (hrtimer_active(&rt_b->rt_period_timer)) 237 if (hrtimer_active(&rt_b->rt_period_timer))
235 break; 238 break;
236 239
237 now = hrtimer_cb_get_time(&rt_b->rt_period_timer); 240 now = hrtimer_cb_get_time(&rt_b->rt_period_timer);
238 hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period); 241 hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period);
239 hrtimer_start_expires(&rt_b->rt_period_timer, 242
240 HRTIMER_MODE_ABS); 243 soft = hrtimer_get_softexpires(&rt_b->rt_period_timer);
244 hard = hrtimer_get_expires(&rt_b->rt_period_timer);
245 delta = ktime_to_ns(ktime_sub(hard, soft));
246 __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta,
247 HRTIMER_MODE_ABS, 0);
241 } 248 }
242 spin_unlock(&rt_b->rt_runtime_lock); 249 spin_unlock(&rt_b->rt_runtime_lock);
243} 250}
@@ -1110,7 +1117,7 @@ static void hrtick_start(struct rq *rq, u64 delay)
1110 if (rq == this_rq()) { 1117 if (rq == this_rq()) {
1111 hrtimer_restart(timer); 1118 hrtimer_restart(timer);
1112 } else if (!rq->hrtick_csd_pending) { 1119 } else if (!rq->hrtick_csd_pending) {
1113 __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd); 1120 __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0);
1114 rq->hrtick_csd_pending = 1; 1121 rq->hrtick_csd_pending = 1;
1115 } 1122 }
1116} 1123}
@@ -1146,7 +1153,8 @@ static __init void init_hrtick(void)
1146 */ 1153 */
1147static void hrtick_start(struct rq *rq, u64 delay) 1154static void hrtick_start(struct rq *rq, u64 delay)
1148{ 1155{
1149 hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL); 1156 __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0,
1157 HRTIMER_MODE_REL, 0);
1150} 1158}
1151 1159
1152static inline void init_hrtick(void) 1160static inline void init_hrtick(void)
@@ -3818,19 +3826,23 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
3818 */ 3826 */
3819#define MAX_PINNED_INTERVAL 512 3827#define MAX_PINNED_INTERVAL 512
3820 3828
3829/* Working cpumask for load_balance and load_balance_newidle. */
3830static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
3831
3821/* 3832/*
3822 * Check this_cpu to ensure it is balanced within domain. Attempt to move 3833 * Check this_cpu to ensure it is balanced within domain. Attempt to move
3823 * tasks if there is an imbalance. 3834 * tasks if there is an imbalance.
3824 */ 3835 */
3825static int load_balance(int this_cpu, struct rq *this_rq, 3836static int load_balance(int this_cpu, struct rq *this_rq,
3826 struct sched_domain *sd, enum cpu_idle_type idle, 3837 struct sched_domain *sd, enum cpu_idle_type idle,
3827 int *balance, struct cpumask *cpus) 3838 int *balance)
3828{ 3839{
3829 int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; 3840 int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
3830 struct sched_group *group; 3841 struct sched_group *group;
3831 unsigned long imbalance; 3842 unsigned long imbalance;
3832 struct rq *busiest; 3843 struct rq *busiest;
3833 unsigned long flags; 3844 unsigned long flags;
3845 struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
3834 3846
3835 cpumask_setall(cpus); 3847 cpumask_setall(cpus);
3836 3848
@@ -3985,8 +3997,7 @@ out:
3985 * this_rq is locked. 3997 * this_rq is locked.
3986 */ 3998 */
3987static int 3999static int
3988load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd, 4000load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
3989 struct cpumask *cpus)
3990{ 4001{
3991 struct sched_group *group; 4002 struct sched_group *group;
3992 struct rq *busiest = NULL; 4003 struct rq *busiest = NULL;
@@ -3994,6 +4005,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
3994 int ld_moved = 0; 4005 int ld_moved = 0;
3995 int sd_idle = 0; 4006 int sd_idle = 0;
3996 int all_pinned = 0; 4007 int all_pinned = 0;
4008 struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
3997 4009
3998 cpumask_setall(cpus); 4010 cpumask_setall(cpus);
3999 4011
@@ -4134,10 +4146,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
4134 struct sched_domain *sd; 4146 struct sched_domain *sd;
4135 int pulled_task = 0; 4147 int pulled_task = 0;
4136 unsigned long next_balance = jiffies + HZ; 4148 unsigned long next_balance = jiffies + HZ;
4137 cpumask_var_t tmpmask;
4138
4139 if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC))
4140 return;
4141 4149
4142 for_each_domain(this_cpu, sd) { 4150 for_each_domain(this_cpu, sd) {
4143 unsigned long interval; 4151 unsigned long interval;
@@ -4148,7 +4156,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
4148 if (sd->flags & SD_BALANCE_NEWIDLE) 4156 if (sd->flags & SD_BALANCE_NEWIDLE)
4149 /* If we've pulled tasks over stop searching: */ 4157 /* If we've pulled tasks over stop searching: */
4150 pulled_task = load_balance_newidle(this_cpu, this_rq, 4158 pulled_task = load_balance_newidle(this_cpu, this_rq,
4151 sd, tmpmask); 4159 sd);
4152 4160
4153 interval = msecs_to_jiffies(sd->balance_interval); 4161 interval = msecs_to_jiffies(sd->balance_interval);
4154 if (time_after(next_balance, sd->last_balance + interval)) 4162 if (time_after(next_balance, sd->last_balance + interval))
@@ -4163,7 +4171,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
4163 */ 4171 */
4164 this_rq->next_balance = next_balance; 4172 this_rq->next_balance = next_balance;
4165 } 4173 }
4166 free_cpumask_var(tmpmask);
4167} 4174}
4168 4175
4169/* 4176/*
@@ -4313,11 +4320,6 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
4313 unsigned long next_balance = jiffies + 60*HZ; 4320 unsigned long next_balance = jiffies + 60*HZ;
4314 int update_next_balance = 0; 4321 int update_next_balance = 0;
4315 int need_serialize; 4322 int need_serialize;
4316 cpumask_var_t tmp;
4317
4318 /* Fails alloc? Rebalancing probably not a priority right now. */
4319 if (!alloc_cpumask_var(&tmp, GFP_ATOMIC))
4320 return;
4321 4323
4322 for_each_domain(cpu, sd) { 4324 for_each_domain(cpu, sd) {
4323 if (!(sd->flags & SD_LOAD_BALANCE)) 4325 if (!(sd->flags & SD_LOAD_BALANCE))
@@ -4342,7 +4344,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
4342 } 4344 }
4343 4345
4344 if (time_after_eq(jiffies, sd->last_balance + interval)) { 4346 if (time_after_eq(jiffies, sd->last_balance + interval)) {
4345 if (load_balance(cpu, rq, sd, idle, &balance, tmp)) { 4347 if (load_balance(cpu, rq, sd, idle, &balance)) {
4346 /* 4348 /*
4347 * We've pulled tasks over so either we're no 4349 * We've pulled tasks over so either we're no
4348 * longer idle, or one of our SMT siblings is 4350 * longer idle, or one of our SMT siblings is
@@ -4376,8 +4378,6 @@ out:
4376 */ 4378 */
4377 if (likely(update_next_balance)) 4379 if (likely(update_next_balance))
4378 rq->next_balance = next_balance; 4380 rq->next_balance = next_balance;
4379
4380 free_cpumask_var(tmp);
4381} 4381}
4382 4382
4383/* 4383/*
@@ -4781,10 +4781,7 @@ void scheduler_tick(void)
4781#endif 4781#endif
4782} 4782}
4783 4783
4784#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ 4784unsigned long get_parent_ip(unsigned long addr)
4785 defined(CONFIG_PREEMPT_TRACER))
4786
4787static inline unsigned long get_parent_ip(unsigned long addr)
4788{ 4785{
4789 if (in_lock_functions(addr)) { 4786 if (in_lock_functions(addr)) {
4790 addr = CALLER_ADDR2; 4787 addr = CALLER_ADDR2;
@@ -4794,6 +4791,9 @@ static inline unsigned long get_parent_ip(unsigned long addr)
4794 return addr; 4791 return addr;
4795} 4792}
4796 4793
4794#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
4795 defined(CONFIG_PREEMPT_TRACER))
4796
4797void __kprobes add_preempt_count(int val) 4797void __kprobes add_preempt_count(int val)
4798{ 4798{
4799#ifdef CONFIG_DEBUG_PREEMPT 4799#ifdef CONFIG_DEBUG_PREEMPT
@@ -4942,15 +4942,13 @@ pick_next_task(struct rq *rq)
4942/* 4942/*
4943 * schedule() is the main scheduler function. 4943 * schedule() is the main scheduler function.
4944 */ 4944 */
4945asmlinkage void __sched schedule(void) 4945asmlinkage void __sched __schedule(void)
4946{ 4946{
4947 struct task_struct *prev, *next; 4947 struct task_struct *prev, *next;
4948 unsigned long *switch_count; 4948 unsigned long *switch_count;
4949 struct rq *rq; 4949 struct rq *rq;
4950 int cpu; 4950 int cpu;
4951 4951
4952need_resched:
4953 preempt_disable();
4954 cpu = smp_processor_id(); 4952 cpu = smp_processor_id();
4955 rq = cpu_rq(cpu); 4953 rq = cpu_rq(cpu);
4956 rcu_qsctr_inc(cpu); 4954 rcu_qsctr_inc(cpu);
@@ -5007,13 +5005,80 @@ need_resched_nonpreemptible:
5007 5005
5008 if (unlikely(reacquire_kernel_lock(current) < 0)) 5006 if (unlikely(reacquire_kernel_lock(current) < 0))
5009 goto need_resched_nonpreemptible; 5007 goto need_resched_nonpreemptible;
5008}
5010 5009
5010asmlinkage void __sched schedule(void)
5011{
5012need_resched:
5013 preempt_disable();
5014 __schedule();
5011 preempt_enable_no_resched(); 5015 preempt_enable_no_resched();
5012 if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) 5016 if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
5013 goto need_resched; 5017 goto need_resched;
5014} 5018}
5015EXPORT_SYMBOL(schedule); 5019EXPORT_SYMBOL(schedule);
5016 5020
5021#ifdef CONFIG_SMP
5022/*
5023 * Look out! "owner" is an entirely speculative pointer
5024 * access and not reliable.
5025 */
5026int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
5027{
5028 unsigned int cpu;
5029 struct rq *rq;
5030
5031 if (!sched_feat(OWNER_SPIN))
5032 return 0;
5033
5034#ifdef CONFIG_DEBUG_PAGEALLOC
5035 /*
5036 * Need to access the cpu field knowing that
5037 * DEBUG_PAGEALLOC could have unmapped it if
5038 * the mutex owner just released it and exited.
5039 */
5040 if (probe_kernel_address(&owner->cpu, cpu))
5041 goto out;
5042#else
5043 cpu = owner->cpu;
5044#endif
5045
5046 /*
5047 * Even if the access succeeded (likely case),
5048 * the cpu field may no longer be valid.
5049 */
5050 if (cpu >= nr_cpumask_bits)
5051 goto out;
5052
5053 /*
5054 * We need to validate that we can do a
5055 * get_cpu() and that we have the percpu area.
5056 */
5057 if (!cpu_online(cpu))
5058 goto out;
5059
5060 rq = cpu_rq(cpu);
5061
5062 for (;;) {
5063 /*
5064 * Owner changed, break to re-assess state.
5065 */
5066 if (lock->owner != owner)
5067 break;
5068
5069 /*
5070 * Is that owner really running on that cpu?
5071 */
5072 if (task_thread_info(rq->curr) != owner || need_resched())
5073 return 0;
5074
5075 cpu_relax();
5076 }
5077out:
5078 return 1;
5079}
5080#endif
5081
5017#ifdef CONFIG_PREEMPT 5082#ifdef CONFIG_PREEMPT
5018/* 5083/*
5019 * this is the entry point to schedule() from in-kernel preemption 5084 * this is the entry point to schedule() from in-kernel preemption
@@ -5131,11 +5196,17 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
5131 __wake_up_common(q, mode, 1, 0, NULL); 5196 __wake_up_common(q, mode, 1, 0, NULL);
5132} 5197}
5133 5198
5199void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
5200{
5201 __wake_up_common(q, mode, 1, 0, key);
5202}
5203
5134/** 5204/**
5135 * __wake_up_sync - wake up threads blocked on a waitqueue. 5205 * __wake_up_sync_key - wake up threads blocked on a waitqueue.
5136 * @q: the waitqueue 5206 * @q: the waitqueue
5137 * @mode: which threads 5207 * @mode: which threads
5138 * @nr_exclusive: how many wake-one or wake-many threads to wake up 5208 * @nr_exclusive: how many wake-one or wake-many threads to wake up
5209 * @key: opaque value to be passed to wakeup targets
5139 * 5210 *
5140 * The sync wakeup differs that the waker knows that it will schedule 5211 * The sync wakeup differs that the waker knows that it will schedule
5141 * away soon, so while the target thread will be woken up, it will not 5212 * away soon, so while the target thread will be woken up, it will not
@@ -5144,8 +5215,8 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
5144 * 5215 *
5145 * On UP it can prevent extra preemption. 5216 * On UP it can prevent extra preemption.
5146 */ 5217 */
5147void 5218void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
5148__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) 5219 int nr_exclusive, void *key)
5149{ 5220{
5150 unsigned long flags; 5221 unsigned long flags;
5151 int sync = 1; 5222 int sync = 1;
@@ -5157,9 +5228,18 @@ __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
5157 sync = 0; 5228 sync = 0;
5158 5229
5159 spin_lock_irqsave(&q->lock, flags); 5230 spin_lock_irqsave(&q->lock, flags);
5160 __wake_up_common(q, mode, nr_exclusive, sync, NULL); 5231 __wake_up_common(q, mode, nr_exclusive, sync, key);
5161 spin_unlock_irqrestore(&q->lock, flags); 5232 spin_unlock_irqrestore(&q->lock, flags);
5162} 5233}
5234EXPORT_SYMBOL_GPL(__wake_up_sync_key);
5235
5236/*
5237 * __wake_up_sync - see __wake_up_sync_key()
5238 */
5239void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
5240{
5241 __wake_up_sync_key(q, mode, nr_exclusive, NULL);
5242}
5163EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ 5243EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */
5164 5244
5165/** 5245/**
@@ -7648,7 +7728,7 @@ cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
7648{ 7728{
7649 int group; 7729 int group;
7650 7730
7651 cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map); 7731 cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
7652 group = cpumask_first(mask); 7732 group = cpumask_first(mask);
7653 if (sg) 7733 if (sg)
7654 *sg = &per_cpu(sched_group_core, group).sg; 7734 *sg = &per_cpu(sched_group_core, group).sg;
@@ -7677,7 +7757,7 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
7677 cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); 7757 cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
7678 group = cpumask_first(mask); 7758 group = cpumask_first(mask);
7679#elif defined(CONFIG_SCHED_SMT) 7759#elif defined(CONFIG_SCHED_SMT)
7680 cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map); 7760 cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
7681 group = cpumask_first(mask); 7761 group = cpumask_first(mask);
7682#else 7762#else
7683 group = cpu; 7763 group = cpu;
@@ -8020,7 +8100,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
8020 SD_INIT(sd, SIBLING); 8100 SD_INIT(sd, SIBLING);
8021 set_domain_attribute(sd, attr); 8101 set_domain_attribute(sd, attr);
8022 cpumask_and(sched_domain_span(sd), 8102 cpumask_and(sched_domain_span(sd),
8023 &per_cpu(cpu_sibling_map, i), cpu_map); 8103 topology_thread_cpumask(i), cpu_map);
8024 sd->parent = p; 8104 sd->parent = p;
8025 p->child = sd; 8105 p->child = sd;
8026 cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask); 8106 cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask);
@@ -8031,7 +8111,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
8031 /* Set up CPU (sibling) groups */ 8111 /* Set up CPU (sibling) groups */
8032 for_each_cpu(i, cpu_map) { 8112 for_each_cpu(i, cpu_map) {
8033 cpumask_and(this_sibling_map, 8113 cpumask_and(this_sibling_map,
8034 &per_cpu(cpu_sibling_map, i), cpu_map); 8114 topology_thread_cpumask(i), cpu_map);
8035 if (i != cpumask_first(this_sibling_map)) 8115 if (i != cpumask_first(this_sibling_map))
8036 continue; 8116 continue;
8037 8117
@@ -8707,6 +8787,9 @@ void __init sched_init(void)
8707#ifdef CONFIG_USER_SCHED 8787#ifdef CONFIG_USER_SCHED
8708 alloc_size *= 2; 8788 alloc_size *= 2;
8709#endif 8789#endif
8790#ifdef CONFIG_CPUMASK_OFFSTACK
8791 alloc_size += num_possible_cpus() * cpumask_size();
8792#endif
8710 /* 8793 /*
8711 * As sched_init() is called before page_alloc is setup, 8794 * As sched_init() is called before page_alloc is setup,
8712 * we use alloc_bootmem(). 8795 * we use alloc_bootmem().
@@ -8744,6 +8827,12 @@ void __init sched_init(void)
8744 ptr += nr_cpu_ids * sizeof(void **); 8827 ptr += nr_cpu_ids * sizeof(void **);
8745#endif /* CONFIG_USER_SCHED */ 8828#endif /* CONFIG_USER_SCHED */
8746#endif /* CONFIG_RT_GROUP_SCHED */ 8829#endif /* CONFIG_RT_GROUP_SCHED */
8830#ifdef CONFIG_CPUMASK_OFFSTACK
8831 for_each_possible_cpu(i) {
8832 per_cpu(load_balance_tmpmask, i) = (void *)ptr;
8833 ptr += cpumask_size();
8834 }
8835#endif /* CONFIG_CPUMASK_OFFSTACK */
8747 } 8836 }
8748 8837
8749#ifdef CONFIG_SMP 8838#ifdef CONFIG_SMP