aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/asm-arm/system.h30
-rw-r--r--include/asm-ia64/system.h10
-rw-r--r--include/asm-mips/system.h10
-rw-r--r--include/asm-s390/system.h17
-rw-r--r--include/asm-sparc/system.h4
-rw-r--r--include/asm-sparc64/system.h14
-rw-r--r--include/linux/init_task.h1
-rw-r--r--include/linux/sched.h10
-rw-r--r--kernel/sched.c132
9 files changed, 131 insertions, 97 deletions
diff --git a/include/asm-arm/system.h b/include/asm-arm/system.h
index 39dd7008013c..3d0d2860b6db 100644
--- a/include/asm-arm/system.h
+++ b/include/asm-arm/system.h
@@ -145,34 +145,12 @@ extern unsigned int user_debug;
145#define set_wmb(var, value) do { var = value; wmb(); } while (0) 145#define set_wmb(var, value) do { var = value; wmb(); } while (0)
146#define nop() __asm__ __volatile__("mov\tr0,r0\t@ nop\n\t"); 146#define nop() __asm__ __volatile__("mov\tr0,r0\t@ nop\n\t");
147 147
148#ifdef CONFIG_SMP
149/* 148/*
150 * Define our own context switch locking. This allows us to enable 149 * switch_mm() may do a full cache flush over the context switch,
151 * interrupts over the context switch, otherwise we end up with high 150 * so enable interrupts over the context switch to avoid high
152 * interrupt latency. The real problem area is switch_mm() which may 151 * latency.
153 * do a full cache flush.
154 */ 152 */
155#define prepare_arch_switch(rq,next) \ 153#define __ARCH_WANT_INTERRUPTS_ON_CTXSW
156do { \
157 spin_lock(&(next)->switch_lock); \
158 spin_unlock_irq(&(rq)->lock); \
159} while (0)
160
161#define finish_arch_switch(rq,prev) \
162 spin_unlock(&(prev)->switch_lock)
163
164#define task_running(rq,p) \
165 ((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock))
166#else
167/*
168 * Our UP-case is more simple, but we assume knowledge of how
169 * spin_unlock_irq() and friends are implemented. This avoids
170 * us needlessly decrementing and incrementing the preempt count.
171 */
172#define prepare_arch_switch(rq,next) local_irq_enable()
173#define finish_arch_switch(rq,prev) spin_unlock(&(rq)->lock)
174#define task_running(rq,p) ((rq)->curr == (p))
175#endif
176 154
177/* 155/*
178 * switch_to(prev, next) should switch from task `prev' to `next' 156 * switch_to(prev, next) should switch from task `prev' to `next'
diff --git a/include/asm-ia64/system.h b/include/asm-ia64/system.h
index 6f516e76d1f0..cd2cf76b2db1 100644
--- a/include/asm-ia64/system.h
+++ b/include/asm-ia64/system.h
@@ -183,8 +183,6 @@ do { \
183 183
184#ifdef __KERNEL__ 184#ifdef __KERNEL__
185 185
186#define prepare_to_switch() do { } while(0)
187
188#ifdef CONFIG_IA32_SUPPORT 186#ifdef CONFIG_IA32_SUPPORT
189# define IS_IA32_PROCESS(regs) (ia64_psr(regs)->is != 0) 187# define IS_IA32_PROCESS(regs) (ia64_psr(regs)->is != 0)
190#else 188#else
@@ -274,13 +272,7 @@ extern void ia64_load_extra (struct task_struct *task);
274 * of that CPU which will not be released, because there we wait for the 272 * of that CPU which will not be released, because there we wait for the
275 * tasklist_lock to become available. 273 * tasklist_lock to become available.
276 */ 274 */
277#define prepare_arch_switch(rq, next) \ 275#define __ARCH_WANT_UNLOCKED_CTXSW
278do { \
279 spin_lock(&(next)->switch_lock); \
280 spin_unlock(&(rq)->lock); \
281} while (0)
282#define finish_arch_switch(rq, prev) spin_unlock_irq(&(prev)->switch_lock)
283#define task_running(rq, p) ((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock))
284 276
285#define ia64_platform_is(x) (strcmp(x, platform_name) == 0) 277#define ia64_platform_is(x) (strcmp(x, platform_name) == 0)
286 278
diff --git a/include/asm-mips/system.h b/include/asm-mips/system.h
index 888fd8908467..169f3d4265b1 100644
--- a/include/asm-mips/system.h
+++ b/include/asm-mips/system.h
@@ -422,16 +422,10 @@ extern void __die_if_kernel(const char *, struct pt_regs *, const char *file,
422extern int stop_a_enabled; 422extern int stop_a_enabled;
423 423
424/* 424/*
425 * Taken from include/asm-ia64/system.h; prevents deadlock on SMP 425 * See include/asm-ia64/system.h; prevents deadlock on SMP
426 * systems. 426 * systems.
427 */ 427 */
428#define prepare_arch_switch(rq, next) \ 428#define __ARCH_WANT_UNLOCKED_CTXSW
429do { \
430 spin_lock(&(next)->switch_lock); \
431 spin_unlock(&(rq)->lock); \
432} while (0)
433#define finish_arch_switch(rq, prev) spin_unlock_irq(&(prev)->switch_lock)
434#define task_running(rq, p) ((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock))
435 429
436#define arch_align_stack(x) (x) 430#define arch_align_stack(x) (x)
437 431
diff --git a/include/asm-s390/system.h b/include/asm-s390/system.h
index e3cb3ce1d24a..b4a9f05a93d6 100644
--- a/include/asm-s390/system.h
+++ b/include/asm-s390/system.h
@@ -104,29 +104,18 @@ static inline void restore_access_regs(unsigned int *acrs)
104 prev = __switch_to(prev,next); \ 104 prev = __switch_to(prev,next); \
105} while (0) 105} while (0)
106 106
107#define prepare_arch_switch(rq, next) do { } while(0)
108#define task_running(rq, p) ((rq)->curr == (p))
109
110#ifdef CONFIG_VIRT_CPU_ACCOUNTING 107#ifdef CONFIG_VIRT_CPU_ACCOUNTING
111extern void account_user_vtime(struct task_struct *); 108extern void account_user_vtime(struct task_struct *);
112extern void account_system_vtime(struct task_struct *); 109extern void account_system_vtime(struct task_struct *);
113
114#define finish_arch_switch(rq, prev) do { \
115 set_fs(current->thread.mm_segment); \
116 spin_unlock(&(rq)->lock); \
117 account_system_vtime(prev); \
118 local_irq_enable(); \
119} while (0)
120
121#else 110#else
111#define account_system_vtime(prev) do { } while (0)
112#endif
122 113
123#define finish_arch_switch(rq, prev) do { \ 114#define finish_arch_switch(rq, prev) do { \
124 set_fs(current->thread.mm_segment); \ 115 set_fs(current->thread.mm_segment); \
125 spin_unlock_irq(&(rq)->lock); \ 116 account_system_vtime(prev); \
126} while (0) 117} while (0)
127 118
128#endif
129
130#define nop() __asm__ __volatile__ ("nop") 119#define nop() __asm__ __volatile__ ("nop")
131 120
132#define xchg(ptr,x) \ 121#define xchg(ptr,x) \
diff --git a/include/asm-sparc/system.h b/include/asm-sparc/system.h
index 80cf20cfaee1..898562ebe94c 100644
--- a/include/asm-sparc/system.h
+++ b/include/asm-sparc/system.h
@@ -101,7 +101,7 @@ extern void fpsave(unsigned long *fpregs, unsigned long *fsr,
101 * SWITCH_ENTER and SWITH_DO_LAZY_FPU do not work yet (e.g. SMP does not work) 101 * SWITCH_ENTER and SWITH_DO_LAZY_FPU do not work yet (e.g. SMP does not work)
102 * XXX WTF is the above comment? Found in late teen 2.4.x. 102 * XXX WTF is the above comment? Found in late teen 2.4.x.
103 */ 103 */
104#define prepare_arch_switch(rq, next) do { \ 104#define prepare_arch_switch(next) do { \
105 __asm__ __volatile__( \ 105 __asm__ __volatile__( \
106 ".globl\tflush_patch_switch\nflush_patch_switch:\n\t" \ 106 ".globl\tflush_patch_switch\nflush_patch_switch:\n\t" \
107 "save %sp, -0x40, %sp; save %sp, -0x40, %sp; save %sp, -0x40, %sp\n\t" \ 107 "save %sp, -0x40, %sp; save %sp, -0x40, %sp; save %sp, -0x40, %sp\n\t" \
@@ -109,8 +109,6 @@ extern void fpsave(unsigned long *fpregs, unsigned long *fsr,
109 "save %sp, -0x40, %sp\n\t" \ 109 "save %sp, -0x40, %sp\n\t" \
110 "restore; restore; restore; restore; restore; restore; restore"); \ 110 "restore; restore; restore; restore; restore; restore; restore"); \
111} while(0) 111} while(0)
112#define finish_arch_switch(rq, next) spin_unlock_irq(&(rq)->lock)
113#define task_running(rq, p) ((rq)->curr == (p))
114 112
115 /* Much care has gone into this code, do not touch it. 113 /* Much care has gone into this code, do not touch it.
116 * 114 *
diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h
index fd12ca386f48..f9be2c5b4dc9 100644
--- a/include/asm-sparc64/system.h
+++ b/include/asm-sparc64/system.h
@@ -139,19 +139,13 @@ extern void __flushw_user(void);
139#define flush_user_windows flushw_user 139#define flush_user_windows flushw_user
140#define flush_register_windows flushw_all 140#define flush_register_windows flushw_all
141 141
142#define prepare_arch_switch(rq, next) \ 142/* Don't hold the runqueue lock over context switch */
143do { spin_lock(&(next)->switch_lock); \ 143#define __ARCH_WANT_UNLOCKED_CTXSW
144 spin_unlock(&(rq)->lock); \ 144#define prepare_arch_switch(next) \
145do { \
145 flushw_all(); \ 146 flushw_all(); \
146} while (0) 147} while (0)
147 148
148#define finish_arch_switch(rq, prev) \
149do { spin_unlock_irq(&(prev)->switch_lock); \
150} while (0)
151
152#define task_running(rq, p) \
153 ((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock))
154
155 /* See what happens when you design the chip correctly? 149 /* See what happens when you design the chip correctly?
156 * 150 *
157 * We tell gcc we clobber all non-fixed-usage registers except 151 * We tell gcc we clobber all non-fixed-usage registers except
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index a6a8c1a38d5e..03206a425d7a 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -108,7 +108,6 @@ extern struct group_info init_groups;
108 .blocked = {{0}}, \ 108 .blocked = {{0}}, \
109 .alloc_lock = SPIN_LOCK_UNLOCKED, \ 109 .alloc_lock = SPIN_LOCK_UNLOCKED, \
110 .proc_lock = SPIN_LOCK_UNLOCKED, \ 110 .proc_lock = SPIN_LOCK_UNLOCKED, \
111 .switch_lock = SPIN_LOCK_UNLOCKED, \
112 .journal_info = NULL, \ 111 .journal_info = NULL, \
113 .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ 112 .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
114} 113}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 36a10781c3f3..d27be9337425 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -368,6 +368,11 @@ struct signal_struct {
368#endif 368#endif
369}; 369};
370 370
371/* Context switch must be unlocked if interrupts are to be enabled */
372#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
373# define __ARCH_WANT_UNLOCKED_CTXSW
374#endif
375
371/* 376/*
372 * Bits in flags field of signal_struct. 377 * Bits in flags field of signal_struct.
373 */ 378 */
@@ -594,6 +599,9 @@ struct task_struct {
594 599
595 int lock_depth; /* BKL lock depth */ 600 int lock_depth; /* BKL lock depth */
596 601
602#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
603 int oncpu;
604#endif
597 int prio, static_prio; 605 int prio, static_prio;
598 struct list_head run_list; 606 struct list_head run_list;
599 prio_array_t *array; 607 prio_array_t *array;
@@ -716,8 +724,6 @@ struct task_struct {
716 spinlock_t alloc_lock; 724 spinlock_t alloc_lock;
717/* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */ 725/* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */
718 spinlock_t proc_lock; 726 spinlock_t proc_lock;
719/* context-switch lock */
720 spinlock_t switch_lock;
721 727
722/* journalling filesystem info */ 728/* journalling filesystem info */
723 void *journal_info; 729 void *journal_info;
diff --git a/kernel/sched.c b/kernel/sched.c
index 98bf1c091da5..b1410577f9a8 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -268,14 +268,71 @@ static DEFINE_PER_CPU(struct runqueue, runqueues);
268#define task_rq(p) cpu_rq(task_cpu(p)) 268#define task_rq(p) cpu_rq(task_cpu(p))
269#define cpu_curr(cpu) (cpu_rq(cpu)->curr) 269#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
270 270
271/*
272 * Default context-switch locking:
273 */
274#ifndef prepare_arch_switch 271#ifndef prepare_arch_switch
275# define prepare_arch_switch(rq, next) do { } while (0) 272# define prepare_arch_switch(next) do { } while (0)
276# define finish_arch_switch(rq, next) spin_unlock_irq(&(rq)->lock) 273#endif
277# define task_running(rq, p) ((rq)->curr == (p)) 274#ifndef finish_arch_switch
275# define finish_arch_switch(prev) do { } while (0)
276#endif
277
278#ifndef __ARCH_WANT_UNLOCKED_CTXSW
279static inline int task_running(runqueue_t *rq, task_t *p)
280{
281 return rq->curr == p;
282}
283
284static inline void prepare_lock_switch(runqueue_t *rq, task_t *next)
285{
286}
287
288static inline void finish_lock_switch(runqueue_t *rq, task_t *prev)
289{
290 spin_unlock_irq(&rq->lock);
291}
292
293#else /* __ARCH_WANT_UNLOCKED_CTXSW */
294static inline int task_running(runqueue_t *rq, task_t *p)
295{
296#ifdef CONFIG_SMP
297 return p->oncpu;
298#else
299 return rq->curr == p;
300#endif
301}
302
303static inline void prepare_lock_switch(runqueue_t *rq, task_t *next)
304{
305#ifdef CONFIG_SMP
306 /*
307 * We can optimise this out completely for !SMP, because the
308 * SMP rebalancing from interrupt is the only thing that cares
309 * here.
310 */
311 next->oncpu = 1;
312#endif
313#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
314 spin_unlock_irq(&rq->lock);
315#else
316 spin_unlock(&rq->lock);
278#endif 317#endif
318}
319
320static inline void finish_lock_switch(runqueue_t *rq, task_t *prev)
321{
322#ifdef CONFIG_SMP
323 /*
324 * After ->oncpu is cleared, the task can be moved to a different CPU.
325 * We must ensure this doesn't happen until the switch is completely
326 * finished.
327 */
328 smp_wmb();
329 prev->oncpu = 0;
330#endif
331#ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW
332 local_irq_enable();
333#endif
334}
335#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
279 336
280/* 337/*
281 * task_rq_lock - lock the runqueue a given task resides on and disable 338 * task_rq_lock - lock the runqueue a given task resides on and disable
@@ -1196,17 +1253,14 @@ void fastcall sched_fork(task_t *p)
1196 p->state = TASK_RUNNING; 1253 p->state = TASK_RUNNING;
1197 INIT_LIST_HEAD(&p->run_list); 1254 INIT_LIST_HEAD(&p->run_list);
1198 p->array = NULL; 1255 p->array = NULL;
1199 spin_lock_init(&p->switch_lock);
1200#ifdef CONFIG_SCHEDSTATS 1256#ifdef CONFIG_SCHEDSTATS
1201 memset(&p->sched_info, 0, sizeof(p->sched_info)); 1257 memset(&p->sched_info, 0, sizeof(p->sched_info));
1202#endif 1258#endif
1259#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
1260 p->oncpu = 0;
1261#endif
1203#ifdef CONFIG_PREEMPT 1262#ifdef CONFIG_PREEMPT
1204 /* 1263 /* Want to start with kernel preemption disabled. */
1205 * During context-switch we hold precisely one spinlock, which
1206 * schedule_tail drops. (in the common case it's this_rq()->lock,
1207 * but it also can be p->switch_lock.) So we compensate with a count
1208 * of 1. Also, we want to start with kernel preemption disabled.
1209 */
1210 p->thread_info->preempt_count = 1; 1264 p->thread_info->preempt_count = 1;
1211#endif 1265#endif
1212 /* 1266 /*
@@ -1388,22 +1442,40 @@ void fastcall sched_exit(task_t * p)
1388} 1442}
1389 1443
1390/** 1444/**
1445 * prepare_task_switch - prepare to switch tasks
1446 * @rq: the runqueue preparing to switch
1447 * @next: the task we are going to switch to.
1448 *
1449 * This is called with the rq lock held and interrupts off. It must
1450 * be paired with a subsequent finish_task_switch after the context
1451 * switch.
1452 *
1453 * prepare_task_switch sets up locking and calls architecture specific
1454 * hooks.
1455 */
1456static inline void prepare_task_switch(runqueue_t *rq, task_t *next)
1457{
1458 prepare_lock_switch(rq, next);
1459 prepare_arch_switch(next);
1460}
1461
1462/**
1391 * finish_task_switch - clean up after a task-switch 1463 * finish_task_switch - clean up after a task-switch
1392 * @prev: the thread we just switched away from. 1464 * @prev: the thread we just switched away from.
1393 * 1465 *
1394 * We enter this with the runqueue still locked, and finish_arch_switch() 1466 * finish_task_switch must be called after the context switch, paired
1395 * will unlock it along with doing any other architecture-specific cleanup 1467 * with a prepare_task_switch call before the context switch.
1396 * actions. 1468 * finish_task_switch will reconcile locking set up by prepare_task_switch,
1469 * and do any other architecture-specific cleanup actions.
1397 * 1470 *
1398 * Note that we may have delayed dropping an mm in context_switch(). If 1471 * Note that we may have delayed dropping an mm in context_switch(). If
1399 * so, we finish that here outside of the runqueue lock. (Doing it 1472 * so, we finish that here outside of the runqueue lock. (Doing it
1400 * with the lock held can cause deadlocks; see schedule() for 1473 * with the lock held can cause deadlocks; see schedule() for
1401 * details.) 1474 * details.)
1402 */ 1475 */
1403static inline void finish_task_switch(task_t *prev) 1476static inline void finish_task_switch(runqueue_t *rq, task_t *prev)
1404 __releases(rq->lock) 1477 __releases(rq->lock)
1405{ 1478{
1406 runqueue_t *rq = this_rq();
1407 struct mm_struct *mm = rq->prev_mm; 1479 struct mm_struct *mm = rq->prev_mm;
1408 unsigned long prev_task_flags; 1480 unsigned long prev_task_flags;
1409 1481
@@ -1421,7 +1493,8 @@ static inline void finish_task_switch(task_t *prev)
1421 * Manfred Spraul <manfred@colorfullife.com> 1493 * Manfred Spraul <manfred@colorfullife.com>
1422 */ 1494 */
1423 prev_task_flags = prev->flags; 1495 prev_task_flags = prev->flags;
1424 finish_arch_switch(rq, prev); 1496 finish_arch_switch(prev);
1497 finish_lock_switch(rq, prev);
1425 if (mm) 1498 if (mm)
1426 mmdrop(mm); 1499 mmdrop(mm);
1427 if (unlikely(prev_task_flags & PF_DEAD)) 1500 if (unlikely(prev_task_flags & PF_DEAD))
@@ -1435,8 +1508,12 @@ static inline void finish_task_switch(task_t *prev)
1435asmlinkage void schedule_tail(task_t *prev) 1508asmlinkage void schedule_tail(task_t *prev)
1436 __releases(rq->lock) 1509 __releases(rq->lock)
1437{ 1510{
1438 finish_task_switch(prev); 1511 runqueue_t *rq = this_rq();
1439 1512 finish_task_switch(rq, prev);
1513#ifdef __ARCH_WANT_UNLOCKED_CTXSW
1514 /* In this case, finish_task_switch does not reenable preemption */
1515 preempt_enable();
1516#endif
1440 if (current->set_child_tid) 1517 if (current->set_child_tid)
1441 put_user(current->pid, current->set_child_tid); 1518 put_user(current->pid, current->set_child_tid);
1442} 1519}
@@ -2816,11 +2893,15 @@ switch_tasks:
2816 rq->curr = next; 2893 rq->curr = next;
2817 ++*switch_count; 2894 ++*switch_count;
2818 2895
2819 prepare_arch_switch(rq, next); 2896 prepare_task_switch(rq, next);
2820 prev = context_switch(rq, prev, next); 2897 prev = context_switch(rq, prev, next);
2821 barrier(); 2898 barrier();
2822 2899 /*
2823 finish_task_switch(prev); 2900 * this_rq must be evaluated again because prev may have moved
2901 * CPUs since it called schedule(), thus the 'rq' on its stack
2902 * frame will be invalid.
2903 */
2904 finish_task_switch(this_rq(), prev);
2824 } else 2905 } else
2825 spin_unlock_irq(&rq->lock); 2906 spin_unlock_irq(&rq->lock);
2826 2907
@@ -4085,6 +4166,9 @@ void __devinit init_idle(task_t *idle, int cpu)
4085 4166
4086 spin_lock_irqsave(&rq->lock, flags); 4167 spin_lock_irqsave(&rq->lock, flags);
4087 rq->curr = rq->idle = idle; 4168 rq->curr = rq->idle = idle;
4169#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
4170 idle->oncpu = 1;
4171#endif
4088 set_tsk_need_resched(idle); 4172 set_tsk_need_resched(idle);
4089 spin_unlock_irqrestore(&rq->lock, flags); 4173 spin_unlock_irqrestore(&rq->lock, flags);
4090 4174