aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Piggin <nickpiggin@yahoo.com.au>2005-06-25 17:57:23 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-25 19:24:43 -0400
commit4866cde064afbb6c2a488c265e696879de616daa (patch)
tree6effad1ab6271129fc607b98273086409876563a
parent48c08d3f8ff94fa118187e4d8d4a5707bb85e59d (diff)
[PATCH] sched: cleanup context switch locking
Instead of requiring architecture code to interact with the scheduler's locking implementation, provide a couple of defines that can be used by the architecture to request runqueue unlocked context switches, and ask for interrupts to be enabled over the context switch. Also replaces the "switch_lock" used by these architectures with an oncpu flag (note, not a potentially slow bitflag). This eliminates one bus locked memory operation when context switching, and simplifies the task_running function. Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--include/asm-arm/system.h30
-rw-r--r--include/asm-ia64/system.h10
-rw-r--r--include/asm-mips/system.h10
-rw-r--r--include/asm-s390/system.h17
-rw-r--r--include/asm-sparc/system.h4
-rw-r--r--include/asm-sparc64/system.h14
-rw-r--r--include/linux/init_task.h1
-rw-r--r--include/linux/sched.h10
-rw-r--r--kernel/sched.c132
9 files changed, 131 insertions, 97 deletions
diff --git a/include/asm-arm/system.h b/include/asm-arm/system.h
index 39dd7008013c..3d0d2860b6db 100644
--- a/include/asm-arm/system.h
+++ b/include/asm-arm/system.h
@@ -145,34 +145,12 @@ extern unsigned int user_debug;
145#define set_wmb(var, value) do { var = value; wmb(); } while (0) 145#define set_wmb(var, value) do { var = value; wmb(); } while (0)
146#define nop() __asm__ __volatile__("mov\tr0,r0\t@ nop\n\t"); 146#define nop() __asm__ __volatile__("mov\tr0,r0\t@ nop\n\t");
147 147
148#ifdef CONFIG_SMP
149/* 148/*
150 * Define our own context switch locking. This allows us to enable 149 * switch_mm() may do a full cache flush over the context switch,
151 * interrupts over the context switch, otherwise we end up with high 150 * so enable interrupts over the context switch to avoid high
152 * interrupt latency. The real problem area is switch_mm() which may 151 * latency.
153 * do a full cache flush.
154 */ 152 */
155#define prepare_arch_switch(rq,next) \ 153#define __ARCH_WANT_INTERRUPTS_ON_CTXSW
156do { \
157 spin_lock(&(next)->switch_lock); \
158 spin_unlock_irq(&(rq)->lock); \
159} while (0)
160
161#define finish_arch_switch(rq,prev) \
162 spin_unlock(&(prev)->switch_lock)
163
164#define task_running(rq,p) \
165 ((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock))
166#else
167/*
168 * Our UP-case is more simple, but we assume knowledge of how
169 * spin_unlock_irq() and friends are implemented. This avoids
170 * us needlessly decrementing and incrementing the preempt count.
171 */
172#define prepare_arch_switch(rq,next) local_irq_enable()
173#define finish_arch_switch(rq,prev) spin_unlock(&(rq)->lock)
174#define task_running(rq,p) ((rq)->curr == (p))
175#endif
176 154
177/* 155/*
178 * switch_to(prev, next) should switch from task `prev' to `next' 156 * switch_to(prev, next) should switch from task `prev' to `next'
diff --git a/include/asm-ia64/system.h b/include/asm-ia64/system.h
index 6f516e76d1f0..cd2cf76b2db1 100644
--- a/include/asm-ia64/system.h
+++ b/include/asm-ia64/system.h
@@ -183,8 +183,6 @@ do { \
183 183
184#ifdef __KERNEL__ 184#ifdef __KERNEL__
185 185
186#define prepare_to_switch() do { } while(0)
187
188#ifdef CONFIG_IA32_SUPPORT 186#ifdef CONFIG_IA32_SUPPORT
189# define IS_IA32_PROCESS(regs) (ia64_psr(regs)->is != 0) 187# define IS_IA32_PROCESS(regs) (ia64_psr(regs)->is != 0)
190#else 188#else
@@ -274,13 +272,7 @@ extern void ia64_load_extra (struct task_struct *task);
274 * of that CPU which will not be released, because there we wait for the 272 * of that CPU which will not be released, because there we wait for the
275 * tasklist_lock to become available. 273 * tasklist_lock to become available.
276 */ 274 */
277#define prepare_arch_switch(rq, next) \ 275#define __ARCH_WANT_UNLOCKED_CTXSW
278do { \
279 spin_lock(&(next)->switch_lock); \
280 spin_unlock(&(rq)->lock); \
281} while (0)
282#define finish_arch_switch(rq, prev) spin_unlock_irq(&(prev)->switch_lock)
283#define task_running(rq, p) ((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock))
284 276
285#define ia64_platform_is(x) (strcmp(x, platform_name) == 0) 277#define ia64_platform_is(x) (strcmp(x, platform_name) == 0)
286 278
diff --git a/include/asm-mips/system.h b/include/asm-mips/system.h
index 888fd8908467..169f3d4265b1 100644
--- a/include/asm-mips/system.h
+++ b/include/asm-mips/system.h
@@ -422,16 +422,10 @@ extern void __die_if_kernel(const char *, struct pt_regs *, const char *file,
422extern int stop_a_enabled; 422extern int stop_a_enabled;
423 423
424/* 424/*
425 * Taken from include/asm-ia64/system.h; prevents deadlock on SMP 425 * See include/asm-ia64/system.h; prevents deadlock on SMP
426 * systems. 426 * systems.
427 */ 427 */
428#define prepare_arch_switch(rq, next) \ 428#define __ARCH_WANT_UNLOCKED_CTXSW
429do { \
430 spin_lock(&(next)->switch_lock); \
431 spin_unlock(&(rq)->lock); \
432} while (0)
433#define finish_arch_switch(rq, prev) spin_unlock_irq(&(prev)->switch_lock)
434#define task_running(rq, p) ((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock))
435 429
436#define arch_align_stack(x) (x) 430#define arch_align_stack(x) (x)
437 431
diff --git a/include/asm-s390/system.h b/include/asm-s390/system.h
index e3cb3ce1d24a..b4a9f05a93d6 100644
--- a/include/asm-s390/system.h
+++ b/include/asm-s390/system.h
@@ -104,29 +104,18 @@ static inline void restore_access_regs(unsigned int *acrs)
104 prev = __switch_to(prev,next); \ 104 prev = __switch_to(prev,next); \
105} while (0) 105} while (0)
106 106
107#define prepare_arch_switch(rq, next) do { } while(0)
108#define task_running(rq, p) ((rq)->curr == (p))
109
110#ifdef CONFIG_VIRT_CPU_ACCOUNTING 107#ifdef CONFIG_VIRT_CPU_ACCOUNTING
111extern void account_user_vtime(struct task_struct *); 108extern void account_user_vtime(struct task_struct *);
112extern void account_system_vtime(struct task_struct *); 109extern void account_system_vtime(struct task_struct *);
113
114#define finish_arch_switch(rq, prev) do { \
115 set_fs(current->thread.mm_segment); \
116 spin_unlock(&(rq)->lock); \
117 account_system_vtime(prev); \
118 local_irq_enable(); \
119} while (0)
120
121#else 110#else
111#define account_system_vtime(prev) do { } while (0)
112#endif
122 113
123#define finish_arch_switch(rq, prev) do { \ 114#define finish_arch_switch(rq, prev) do { \
124 set_fs(current->thread.mm_segment); \ 115 set_fs(current->thread.mm_segment); \
125 spin_unlock_irq(&(rq)->lock); \ 116 account_system_vtime(prev); \
126} while (0) 117} while (0)
127 118
128#endif
129
130#define nop() __asm__ __volatile__ ("nop") 119#define nop() __asm__ __volatile__ ("nop")
131 120
132#define xchg(ptr,x) \ 121#define xchg(ptr,x) \
diff --git a/include/asm-sparc/system.h b/include/asm-sparc/system.h
index 80cf20cfaee1..898562ebe94c 100644
--- a/include/asm-sparc/system.h
+++ b/include/asm-sparc/system.h
@@ -101,7 +101,7 @@ extern void fpsave(unsigned long *fpregs, unsigned long *fsr,
101 * SWITCH_ENTER and SWITH_DO_LAZY_FPU do not work yet (e.g. SMP does not work) 101 * SWITCH_ENTER and SWITH_DO_LAZY_FPU do not work yet (e.g. SMP does not work)
102 * XXX WTF is the above comment? Found in late teen 2.4.x. 102 * XXX WTF is the above comment? Found in late teen 2.4.x.
103 */ 103 */
104#define prepare_arch_switch(rq, next) do { \ 104#define prepare_arch_switch(next) do { \
105 __asm__ __volatile__( \ 105 __asm__ __volatile__( \
106 ".globl\tflush_patch_switch\nflush_patch_switch:\n\t" \ 106 ".globl\tflush_patch_switch\nflush_patch_switch:\n\t" \
107 "save %sp, -0x40, %sp; save %sp, -0x40, %sp; save %sp, -0x40, %sp\n\t" \ 107 "save %sp, -0x40, %sp; save %sp, -0x40, %sp; save %sp, -0x40, %sp\n\t" \
@@ -109,8 +109,6 @@ extern void fpsave(unsigned long *fpregs, unsigned long *fsr,
109 "save %sp, -0x40, %sp\n\t" \ 109 "save %sp, -0x40, %sp\n\t" \
110 "restore; restore; restore; restore; restore; restore; restore"); \ 110 "restore; restore; restore; restore; restore; restore; restore"); \
111} while(0) 111} while(0)
112#define finish_arch_switch(rq, next) spin_unlock_irq(&(rq)->lock)
113#define task_running(rq, p) ((rq)->curr == (p))
114 112
115 /* Much care has gone into this code, do not touch it. 113 /* Much care has gone into this code, do not touch it.
116 * 114 *
diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h
index fd12ca386f48..f9be2c5b4dc9 100644
--- a/include/asm-sparc64/system.h
+++ b/include/asm-sparc64/system.h
@@ -139,19 +139,13 @@ extern void __flushw_user(void);
139#define flush_user_windows flushw_user 139#define flush_user_windows flushw_user
140#define flush_register_windows flushw_all 140#define flush_register_windows flushw_all
141 141
142#define prepare_arch_switch(rq, next) \ 142/* Don't hold the runqueue lock over context switch */
143do { spin_lock(&(next)->switch_lock); \ 143#define __ARCH_WANT_UNLOCKED_CTXSW
144 spin_unlock(&(rq)->lock); \ 144#define prepare_arch_switch(next) \
145do { \
145 flushw_all(); \ 146 flushw_all(); \
146} while (0) 147} while (0)
147 148
148#define finish_arch_switch(rq, prev) \
149do { spin_unlock_irq(&(prev)->switch_lock); \
150} while (0)
151
152#define task_running(rq, p) \
153 ((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock))
154
155 /* See what happens when you design the chip correctly? 149 /* See what happens when you design the chip correctly?
156 * 150 *
157 * We tell gcc we clobber all non-fixed-usage registers except 151 * We tell gcc we clobber all non-fixed-usage registers except
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index a6a8c1a38d5e..03206a425d7a 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -108,7 +108,6 @@ extern struct group_info init_groups;
108 .blocked = {{0}}, \ 108 .blocked = {{0}}, \
109 .alloc_lock = SPIN_LOCK_UNLOCKED, \ 109 .alloc_lock = SPIN_LOCK_UNLOCKED, \
110 .proc_lock = SPIN_LOCK_UNLOCKED, \ 110 .proc_lock = SPIN_LOCK_UNLOCKED, \
111 .switch_lock = SPIN_LOCK_UNLOCKED, \
112 .journal_info = NULL, \ 111 .journal_info = NULL, \
113 .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ 112 .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
114} 113}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 36a10781c3f3..d27be9337425 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -368,6 +368,11 @@ struct signal_struct {
368#endif 368#endif
369}; 369};
370 370
371/* Context switch must be unlocked if interrupts are to be enabled */
372#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
373# define __ARCH_WANT_UNLOCKED_CTXSW
374#endif
375
371/* 376/*
372 * Bits in flags field of signal_struct. 377 * Bits in flags field of signal_struct.
373 */ 378 */
@@ -594,6 +599,9 @@ struct task_struct {
594 599
595 int lock_depth; /* BKL lock depth */ 600 int lock_depth; /* BKL lock depth */
596 601
602#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
603 int oncpu;
604#endif
597 int prio, static_prio; 605 int prio, static_prio;
598 struct list_head run_list; 606 struct list_head run_list;
599 prio_array_t *array; 607 prio_array_t *array;
@@ -716,8 +724,6 @@ struct task_struct {
716 spinlock_t alloc_lock; 724 spinlock_t alloc_lock;
717/* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */ 725/* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */
718 spinlock_t proc_lock; 726 spinlock_t proc_lock;
719/* context-switch lock */
720 spinlock_t switch_lock;
721 727
722/* journalling filesystem info */ 728/* journalling filesystem info */
723 void *journal_info; 729 void *journal_info;
diff --git a/kernel/sched.c b/kernel/sched.c
index 98bf1c091da5..b1410577f9a8 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -268,14 +268,71 @@ static DEFINE_PER_CPU(struct runqueue, runqueues);
268#define task_rq(p) cpu_rq(task_cpu(p)) 268#define task_rq(p) cpu_rq(task_cpu(p))
269#define cpu_curr(cpu) (cpu_rq(cpu)->curr) 269#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
270 270
271/*
272 * Default context-switch locking:
273 */
274#ifndef prepare_arch_switch 271#ifndef prepare_arch_switch
275# define prepare_arch_switch(rq, next) do { } while (0) 272# define prepare_arch_switch(next) do { } while (0)
276# define finish_arch_switch(rq, next) spin_unlock_irq(&(rq)->lock) 273#endif
277# define task_running(rq, p) ((rq)->curr == (p)) 274#ifndef finish_arch_switch
275# define finish_arch_switch(prev) do { } while (0)
276#endif
277
278#ifndef __ARCH_WANT_UNLOCKED_CTXSW
279static inline int task_running(runqueue_t *rq, task_t *p)
280{
281 return rq->curr == p;
282}
283
284static inline void prepare_lock_switch(runqueue_t *rq, task_t *next)
285{
286}
287
288static inline void finish_lock_switch(runqueue_t *rq, task_t *prev)
289{
290 spin_unlock_irq(&rq->lock);
291}
292
293#else /* __ARCH_WANT_UNLOCKED_CTXSW */
294static inline int task_running(runqueue_t *rq, task_t *p)
295{
296#ifdef CONFIG_SMP
297 return p->oncpu;
298#else
299 return rq->curr == p;
300#endif
301}
302
303static inline void prepare_lock_switch(runqueue_t *rq, task_t *next)
304{
305#ifdef CONFIG_SMP
306 /*
307 * We can optimise this out completely for !SMP, because the
308 * SMP rebalancing from interrupt is the only thing that cares
309 * here.
310 */
311 next->oncpu = 1;
312#endif
313#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
314 spin_unlock_irq(&rq->lock);
315#else
316 spin_unlock(&rq->lock);
278#endif 317#endif
318}
319
320static inline void finish_lock_switch(runqueue_t *rq, task_t *prev)
321{
322#ifdef CONFIG_SMP
323 /*
324 * After ->oncpu is cleared, the task can be moved to a different CPU.
325 * We must ensure this doesn't happen until the switch is completely
326 * finished.
327 */
328 smp_wmb();
329 prev->oncpu = 0;
330#endif
331#ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW
332 local_irq_enable();
333#endif
334}
335#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
279 336
280/* 337/*
281 * task_rq_lock - lock the runqueue a given task resides on and disable 338 * task_rq_lock - lock the runqueue a given task resides on and disable
@@ -1196,17 +1253,14 @@ void fastcall sched_fork(task_t *p)
1196 p->state = TASK_RUNNING; 1253 p->state = TASK_RUNNING;
1197 INIT_LIST_HEAD(&p->run_list); 1254 INIT_LIST_HEAD(&p->run_list);
1198 p->array = NULL; 1255 p->array = NULL;
1199 spin_lock_init(&p->switch_lock);
1200#ifdef CONFIG_SCHEDSTATS 1256#ifdef CONFIG_SCHEDSTATS
1201 memset(&p->sched_info, 0, sizeof(p->sched_info)); 1257 memset(&p->sched_info, 0, sizeof(p->sched_info));
1202#endif 1258#endif
1259#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
1260 p->oncpu = 0;
1261#endif
1203#ifdef CONFIG_PREEMPT 1262#ifdef CONFIG_PREEMPT
1204 /* 1263 /* Want to start with kernel preemption disabled. */
1205 * During context-switch we hold precisely one spinlock, which
1206 * schedule_tail drops. (in the common case it's this_rq()->lock,
1207 * but it also can be p->switch_lock.) So we compensate with a count
1208 * of 1. Also, we want to start with kernel preemption disabled.
1209 */
1210 p->thread_info->preempt_count = 1; 1264 p->thread_info->preempt_count = 1;
1211#endif 1265#endif
1212 /* 1266 /*
@@ -1388,22 +1442,40 @@ void fastcall sched_exit(task_t * p)
1388} 1442}
1389 1443
1390/** 1444/**
1445 * prepare_task_switch - prepare to switch tasks
1446 * @rq: the runqueue preparing to switch
1447 * @next: the task we are going to switch to.
1448 *
1449 * This is called with the rq lock held and interrupts off. It must
1450 * be paired with a subsequent finish_task_switch after the context
1451 * switch.
1452 *
1453 * prepare_task_switch sets up locking and calls architecture specific
1454 * hooks.
1455 */
1456static inline void prepare_task_switch(runqueue_t *rq, task_t *next)
1457{
1458 prepare_lock_switch(rq, next);
1459 prepare_arch_switch(next);
1460}
1461
1462/**
1391 * finish_task_switch - clean up after a task-switch 1463 * finish_task_switch - clean up after a task-switch
1392 * @prev: the thread we just switched away from. 1464 * @prev: the thread we just switched away from.
1393 * 1465 *
1394 * We enter this with the runqueue still locked, and finish_arch_switch() 1466 * finish_task_switch must be called after the context switch, paired
1395 * will unlock it along with doing any other architecture-specific cleanup 1467 * with a prepare_task_switch call before the context switch.
1396 * actions. 1468 * finish_task_switch will reconcile locking set up by prepare_task_switch,
1469 * and do any other architecture-specific cleanup actions.
1397 * 1470 *
1398 * Note that we may have delayed dropping an mm in context_switch(). If 1471 * Note that we may have delayed dropping an mm in context_switch(). If
1399 * so, we finish that here outside of the runqueue lock. (Doing it 1472 * so, we finish that here outside of the runqueue lock. (Doing it
1400 * with the lock held can cause deadlocks; see schedule() for 1473 * with the lock held can cause deadlocks; see schedule() for
1401 * details.) 1474 * details.)
1402 */ 1475 */
1403static inline void finish_task_switch(task_t *prev) 1476static inline void finish_task_switch(runqueue_t *rq, task_t *prev)
1404 __releases(rq->lock) 1477 __releases(rq->lock)
1405{ 1478{
1406 runqueue_t *rq = this_rq();
1407 struct mm_struct *mm = rq->prev_mm; 1479 struct mm_struct *mm = rq->prev_mm;
1408 unsigned long prev_task_flags; 1480 unsigned long prev_task_flags;
1409 1481
@@ -1421,7 +1493,8 @@ static inline void finish_task_switch(task_t *prev)
1421 * Manfred Spraul <manfred@colorfullife.com> 1493 * Manfred Spraul <manfred@colorfullife.com>
1422 */ 1494 */
1423 prev_task_flags = prev->flags; 1495 prev_task_flags = prev->flags;
1424 finish_arch_switch(rq, prev); 1496 finish_arch_switch(prev);
1497 finish_lock_switch(rq, prev);
1425 if (mm) 1498 if (mm)
1426 mmdrop(mm); 1499 mmdrop(mm);
1427 if (unlikely(prev_task_flags & PF_DEAD)) 1500 if (unlikely(prev_task_flags & PF_DEAD))
@@ -1435,8 +1508,12 @@ static inline void finish_task_switch(task_t *prev)
1435asmlinkage void schedule_tail(task_t *prev) 1508asmlinkage void schedule_tail(task_t *prev)
1436 __releases(rq->lock) 1509 __releases(rq->lock)
1437{ 1510{
1438 finish_task_switch(prev); 1511 runqueue_t *rq = this_rq();
1439 1512 finish_task_switch(rq, prev);
1513#ifdef __ARCH_WANT_UNLOCKED_CTXSW
1514 /* In this case, finish_task_switch does not reenable preemption */
1515 preempt_enable();
1516#endif
1440 if (current->set_child_tid) 1517 if (current->set_child_tid)
1441 put_user(current->pid, current->set_child_tid); 1518 put_user(current->pid, current->set_child_tid);
1442} 1519}
@@ -2816,11 +2893,15 @@ switch_tasks:
2816 rq->curr = next; 2893 rq->curr = next;
2817 ++*switch_count; 2894 ++*switch_count;
2818 2895
2819 prepare_arch_switch(rq, next); 2896 prepare_task_switch(rq, next);
2820 prev = context_switch(rq, prev, next); 2897 prev = context_switch(rq, prev, next);
2821 barrier(); 2898 barrier();
2822 2899 /*
2823 finish_task_switch(prev); 2900 * this_rq must be evaluated again because prev may have moved
2901 * CPUs since it called schedule(), thus the 'rq' on its stack
2902 * frame will be invalid.
2903 */
2904 finish_task_switch(this_rq(), prev);
2824 } else 2905 } else
2825 spin_unlock_irq(&rq->lock); 2906 spin_unlock_irq(&rq->lock);
2826 2907
@@ -4085,6 +4166,9 @@ void __devinit init_idle(task_t *idle, int cpu)
4085 4166
4086 spin_lock_irqsave(&rq->lock, flags); 4167 spin_lock_irqsave(&rq->lock, flags);
4087 rq->curr = rq->idle = idle; 4168 rq->curr = rq->idle = idle;
4169#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
4170 idle->oncpu = 1;
4171#endif
4088 set_tsk_need_resched(idle); 4172 set_tsk_need_resched(idle);
4089 spin_unlock_irqrestore(&rq->lock, flags); 4173 spin_unlock_irqrestore(&rq->lock, flags);
4090 4174