aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2007-02-13 19:33:39 -0500
committerPaul Mackerras <paulus@samba.org>2007-02-13 19:33:39 -0500
commit944b380e0c3dead0e16a747dfd3ce2765afac16d (patch)
treed1016c273ebac0b4c02d851a854d9a789df632b5 /arch/powerpc
parentfff5f52808be01d16bb7c8b82580155ff19e16b0 (diff)
parentc7eb734766217b9ddac217cbccae3aedcfa67520 (diff)
Merge branch 'cell-merge' of git+ssh://master.kernel.org/pub/scm/linux/kernel/git/arnd/cell-2.6
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/oprofile/op_model_cell.c149
-rw-r--r--arch/powerpc/platforms/cell/pmu.c14
-rw-r--r--arch/powerpc/platforms/cell/spufs/context.c125
-rw-r--r--arch/powerpc/platforms/cell/spufs/file.c7
-rw-r--r--arch/powerpc/platforms/cell/spufs/run.c16
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c386
-rw-r--r--arch/powerpc/platforms/cell/spufs/spufs.h48
-rw-r--r--arch/powerpc/xmon/xmon.c1
8 files changed, 473 insertions, 273 deletions
diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c
index 2eb15f388103..e08e1d7b3dc5 100644
--- a/arch/powerpc/oprofile/op_model_cell.c
+++ b/arch/powerpc/oprofile/op_model_cell.c
@@ -39,10 +39,17 @@
39#include "../platforms/cell/interrupt.h" 39#include "../platforms/cell/interrupt.h"
40 40
41#define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */ 41#define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */
42#define PPU_CYCLES_GRP_NUM 1 /* special group number for identifying
43 * PPU_CYCLES event
44 */
42#define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */ 45#define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */
43 46
44#define NUM_THREADS 2 47#define NUM_THREADS 2 /* number of physical threads in
45#define VIRT_CNTR_SW_TIME_NS 100000000 // 0.5 seconds 48 * physical processor
49 */
50#define NUM_TRACE_BUS_WORDS 4
51#define NUM_INPUT_BUS_WORDS 2
52
46 53
47struct pmc_cntrl_data { 54struct pmc_cntrl_data {
48 unsigned long vcntr; 55 unsigned long vcntr;
@@ -58,7 +65,7 @@ struct pmc_cntrl_data {
58struct pm_signal { 65struct pm_signal {
59 u16 cpu; /* Processor to modify */ 66 u16 cpu; /* Processor to modify */
60 u16 sub_unit; /* hw subunit this applies to (if applicable) */ 67 u16 sub_unit; /* hw subunit this applies to (if applicable) */
61 u16 signal_group; /* Signal Group to Enable/Disable */ 68 short int signal_group; /* Signal Group to Enable/Disable */
62 u8 bus_word; /* Enable/Disable on this Trace/Trigger/Event 69 u8 bus_word; /* Enable/Disable on this Trace/Trigger/Event
63 * Bus Word(s) (bitmask) 70 * Bus Word(s) (bitmask)
64 */ 71 */
@@ -93,7 +100,6 @@ static struct {
93 u32 pm07_cntrl[NR_PHYS_CTRS]; 100 u32 pm07_cntrl[NR_PHYS_CTRS];
94} pm_regs; 101} pm_regs;
95 102
96
97#define GET_SUB_UNIT(x) ((x & 0x0000f000) >> 12) 103#define GET_SUB_UNIT(x) ((x & 0x0000f000) >> 12)
98#define GET_BUS_WORD(x) ((x & 0x000000f0) >> 4) 104#define GET_BUS_WORD(x) ((x & 0x000000f0) >> 4)
99#define GET_BUS_TYPE(x) ((x & 0x00000300) >> 8) 105#define GET_BUS_TYPE(x) ((x & 0x00000300) >> 8)
@@ -101,7 +107,6 @@ static struct {
101#define GET_COUNT_CYCLES(x) (x & 0x00000001) 107#define GET_COUNT_CYCLES(x) (x & 0x00000001)
102#define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2) 108#define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2)
103 109
104
105static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values); 110static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values);
106 111
107static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; 112static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS];
@@ -129,8 +134,8 @@ static spinlock_t virt_cntr_lock = SPIN_LOCK_UNLOCKED;
129 134
130static u32 ctr_enabled; 135static u32 ctr_enabled;
131 136
132static unsigned char trace_bus[4]; 137static unsigned char trace_bus[NUM_TRACE_BUS_WORDS];
133static unsigned char input_bus[2]; 138static unsigned char input_bus[NUM_INPUT_BUS_WORDS];
134 139
135/* 140/*
136 * Firmware interface functions 141 * Firmware interface functions
@@ -177,25 +182,40 @@ static void pm_rtas_reset_signals(u32 node)
177static void pm_rtas_activate_signals(u32 node, u32 count) 182static void pm_rtas_activate_signals(u32 node, u32 count)
178{ 183{
179 int ret; 184 int ret;
180 int j; 185 int i, j;
181 struct pm_signal pm_signal_local[NR_PHYS_CTRS]; 186 struct pm_signal pm_signal_local[NR_PHYS_CTRS];
182 187
188 /* There is no debug setup required for the cycles event.
189 * Note that only events in the same group can be used.
190 * Otherwise, there will be conflicts in correctly routing
191 * the signals on the debug bus. It is the responsiblity
192 * of the OProfile user tool to check the events are in
193 * the same group.
194 */
195 i = 0;
183 for (j = 0; j < count; j++) { 196 for (j = 0; j < count; j++) {
184 /* fw expects physical cpu # */ 197 if (pm_signal[j].signal_group != PPU_CYCLES_GRP_NUM) {
185 pm_signal_local[j].cpu = node; 198
186 pm_signal_local[j].signal_group = pm_signal[j].signal_group; 199 /* fw expects physical cpu # */
187 pm_signal_local[j].bus_word = pm_signal[j].bus_word; 200 pm_signal_local[i].cpu = node;
188 pm_signal_local[j].sub_unit = pm_signal[j].sub_unit; 201 pm_signal_local[i].signal_group
189 pm_signal_local[j].bit = pm_signal[j].bit; 202 = pm_signal[j].signal_group;
203 pm_signal_local[i].bus_word = pm_signal[j].bus_word;
204 pm_signal_local[i].sub_unit = pm_signal[j].sub_unit;
205 pm_signal_local[i].bit = pm_signal[j].bit;
206 i++;
207 }
190 } 208 }
191 209
192 ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE, PASSTHRU_ENABLE, 210 if (i != 0) {
193 pm_signal_local, 211 ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE, PASSTHRU_ENABLE,
194 count * sizeof(struct pm_signal)); 212 pm_signal_local,
213 i * sizeof(struct pm_signal));
195 214
196 if (ret) 215 if (ret)
197 printk(KERN_WARNING "%s: rtas returned: %d\n", 216 printk(KERN_WARNING "%s: rtas returned: %d\n",
198 __FUNCTION__, ret); 217 __FUNCTION__, ret);
218 }
199} 219}
200 220
201/* 221/*
@@ -212,7 +232,7 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask)
212 /* Special Event: Count all cpu cycles */ 232 /* Special Event: Count all cpu cycles */
213 pm_regs.pm07_cntrl[ctr] = CBE_COUNT_ALL_CYCLES; 233 pm_regs.pm07_cntrl[ctr] = CBE_COUNT_ALL_CYCLES;
214 p = &(pm_signal[ctr]); 234 p = &(pm_signal[ctr]);
215 p->signal_group = 21; 235 p->signal_group = PPU_CYCLES_GRP_NUM;
216 p->bus_word = 1; 236 p->bus_word = 1;
217 p->sub_unit = 0; 237 p->sub_unit = 0;
218 p->bit = 0; 238 p->bit = 0;
@@ -232,13 +252,21 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask)
232 252
233 p->signal_group = event / 100; 253 p->signal_group = event / 100;
234 p->bus_word = bus_word; 254 p->bus_word = bus_word;
235 p->sub_unit = unit_mask & 0x0000f000; 255 p->sub_unit = (unit_mask & 0x0000f000) >> 12;
236 256
237 pm_regs.pm07_cntrl[ctr] = 0; 257 pm_regs.pm07_cntrl[ctr] = 0;
238 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_COUNT_CYCLES(count_cycles); 258 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_COUNT_CYCLES(count_cycles);
239 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity); 259 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity);
240 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control); 260 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control);
241 261
262 /* Some of the islands signal selection is based on 64 bit words.
263 * The debug bus words are 32 bits, the input words to the performance
264 * counters are defined as 32 bits. Need to convert the 64 bit island
265 * specification to the appropriate 32 input bit and bus word for the
266 * performance counter event selection. See the CELL Performance
267 * monitoring signals manual and the Perf cntr hardware descriptions
268 * for the details.
269 */
242 if (input_control == 0) { 270 if (input_control == 0) {
243 if (signal_bit > 31) { 271 if (signal_bit > 31) {
244 signal_bit -= 32; 272 signal_bit -= 32;
@@ -259,12 +287,12 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask)
259 p->bit = signal_bit; 287 p->bit = signal_bit;
260 } 288 }
261 289
262 for (i = 0; i < 4; i++) { 290 for (i = 0; i < NUM_TRACE_BUS_WORDS; i++) {
263 if (bus_word & (1 << i)) { 291 if (bus_word & (1 << i)) {
264 pm_regs.debug_bus_control |= 292 pm_regs.debug_bus_control |=
265 (bus_type << (31 - (2 * i) + 1)); 293 (bus_type << (31 - (2 * i) + 1));
266 294
267 for (j = 0; j < 2; j++) { 295 for (j = 0; j < NUM_INPUT_BUS_WORDS; j++) {
268 if (input_bus[j] == 0xff) { 296 if (input_bus[j] == 0xff) {
269 input_bus[j] = i; 297 input_bus[j] = i;
270 pm_regs.group_control |= 298 pm_regs.group_control |=
@@ -278,52 +306,58 @@ out:
278 ; 306 ;
279} 307}
280 308
281static void write_pm_cntrl(int cpu, struct pm_cntrl *pm_cntrl) 309static void write_pm_cntrl(int cpu)
282{ 310{
283 /* Oprofile will use 32 bit counters, set bits 7:10 to 0 */ 311 /* Oprofile will use 32 bit counters, set bits 7:10 to 0
312 * pmregs.pm_cntrl is a global
313 */
314
284 u32 val = 0; 315 u32 val = 0;
285 if (pm_cntrl->enable == 1) 316 if (pm_regs.pm_cntrl.enable == 1)
286 val |= CBE_PM_ENABLE_PERF_MON; 317 val |= CBE_PM_ENABLE_PERF_MON;
287 318
288 if (pm_cntrl->stop_at_max == 1) 319 if (pm_regs.pm_cntrl.stop_at_max == 1)
289 val |= CBE_PM_STOP_AT_MAX; 320 val |= CBE_PM_STOP_AT_MAX;
290 321
291 if (pm_cntrl->trace_mode == 1) 322 if (pm_regs.pm_cntrl.trace_mode == 1)
292 val |= CBE_PM_TRACE_MODE_SET(pm_cntrl->trace_mode); 323 val |= CBE_PM_TRACE_MODE_SET(pm_regs.pm_cntrl.trace_mode);
293 324
294 if (pm_cntrl->freeze == 1) 325 if (pm_regs.pm_cntrl.freeze == 1)
295 val |= CBE_PM_FREEZE_ALL_CTRS; 326 val |= CBE_PM_FREEZE_ALL_CTRS;
296 327
297 /* Routine set_count_mode must be called previously to set 328 /* Routine set_count_mode must be called previously to set
298 * the count mode based on the user selection of user and kernel. 329 * the count mode based on the user selection of user and kernel.
299 */ 330 */
300 val |= CBE_PM_COUNT_MODE_SET(pm_cntrl->count_mode); 331 val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode);
301 cbe_write_pm(cpu, pm_control, val); 332 cbe_write_pm(cpu, pm_control, val);
302} 333}
303 334
304static inline void 335static inline void
305set_count_mode(u32 kernel, u32 user, struct pm_cntrl *pm_cntrl) 336set_count_mode(u32 kernel, u32 user)
306{ 337{
307 /* The user must specify user and kernel if they want them. If 338 /* The user must specify user and kernel if they want them. If
308 * neither is specified, OProfile will count in hypervisor mode 339 * neither is specified, OProfile will count in hypervisor mode.
340 * pm_regs.pm_cntrl is a global
309 */ 341 */
310 if (kernel) { 342 if (kernel) {
311 if (user) 343 if (user)
312 pm_cntrl->count_mode = CBE_COUNT_ALL_MODES; 344 pm_regs.pm_cntrl.count_mode = CBE_COUNT_ALL_MODES;
313 else 345 else
314 pm_cntrl->count_mode = CBE_COUNT_SUPERVISOR_MODE; 346 pm_regs.pm_cntrl.count_mode =
347 CBE_COUNT_SUPERVISOR_MODE;
315 } else { 348 } else {
316 if (user) 349 if (user)
317 pm_cntrl->count_mode = CBE_COUNT_PROBLEM_MODE; 350 pm_regs.pm_cntrl.count_mode = CBE_COUNT_PROBLEM_MODE;
318 else 351 else
319 pm_cntrl->count_mode = CBE_COUNT_HYPERVISOR_MODE; 352 pm_regs.pm_cntrl.count_mode =
353 CBE_COUNT_HYPERVISOR_MODE;
320 } 354 }
321} 355}
322 356
323static inline void enable_ctr(u32 cpu, u32 ctr, u32 * pm07_cntrl) 357static inline void enable_ctr(u32 cpu, u32 ctr, u32 * pm07_cntrl)
324{ 358{
325 359
326 pm07_cntrl[ctr] |= PM07_CTR_ENABLE(1); 360 pm07_cntrl[ctr] |= CBE_PM_CTR_ENABLE;
327 cbe_write_pm07_control(cpu, ctr, pm07_cntrl[ctr]); 361 cbe_write_pm07_control(cpu, ctr, pm07_cntrl[ctr]);
328} 362}
329 363
@@ -365,6 +399,14 @@ static void cell_virtual_cntr(unsigned long data)
365 hdw_thread = 1 ^ hdw_thread; 399 hdw_thread = 1 ^ hdw_thread;
366 next_hdw_thread = hdw_thread; 400 next_hdw_thread = hdw_thread;
367 401
402 for (i = 0; i < num_counters; i++)
403 /* There are some per thread events. Must do the
404 * set event, for the thread that is being started
405 */
406 set_pm_event(i,
407 pmc_cntrl[next_hdw_thread][i].evnts,
408 pmc_cntrl[next_hdw_thread][i].masks);
409
368 /* The following is done only once per each node, but 410 /* The following is done only once per each node, but
369 * we need cpu #, not node #, to pass to the cbe_xxx functions. 411 * we need cpu #, not node #, to pass to the cbe_xxx functions.
370 */ 412 */
@@ -385,12 +427,13 @@ static void cell_virtual_cntr(unsigned long data)
385 == 0xFFFFFFFF) 427 == 0xFFFFFFFF)
386 /* If the cntr value is 0xffffffff, we must 428 /* If the cntr value is 0xffffffff, we must
387 * reset that to 0xfffffff0 when the current 429 * reset that to 0xfffffff0 when the current
388 * thread is restarted. This will generate a new 430 * thread is restarted. This will generate a
389 * interrupt and make sure that we never restore 431 * new interrupt and make sure that we never
390 * the counters to the max value. If the counters 432 * restore the counters to the max value. If
391 * were restored to the max value, they do not 433 * the counters were restored to the max value,
392 * increment and no interrupts are generated. Hence 434 * they do not increment and no interrupts are
393 * no more samples will be collected on that cpu. 435 * generated. Hence no more samples will be
436 * collected on that cpu.
394 */ 437 */
395 cbe_write_ctr(cpu, i, 0xFFFFFFF0); 438 cbe_write_ctr(cpu, i, 0xFFFFFFF0);
396 else 439 else
@@ -410,9 +453,6 @@ static void cell_virtual_cntr(unsigned long data)
410 * Must do the set event, enable_cntr 453 * Must do the set event, enable_cntr
411 * for each cpu. 454 * for each cpu.
412 */ 455 */
413 set_pm_event(i,
414 pmc_cntrl[next_hdw_thread][i].evnts,
415 pmc_cntrl[next_hdw_thread][i].masks);
416 enable_ctr(cpu, i, 456 enable_ctr(cpu, i,
417 pm_regs.pm07_cntrl); 457 pm_regs.pm07_cntrl);
418 } else { 458 } else {
@@ -465,8 +505,7 @@ cell_reg_setup(struct op_counter_config *ctr,
465 pm_regs.pm_cntrl.trace_mode = 0; 505 pm_regs.pm_cntrl.trace_mode = 0;
466 pm_regs.pm_cntrl.freeze = 1; 506 pm_regs.pm_cntrl.freeze = 1;
467 507
468 set_count_mode(sys->enable_kernel, sys->enable_user, 508 set_count_mode(sys->enable_kernel, sys->enable_user);
469 &pm_regs.pm_cntrl);
470 509
471 /* Setup the thread 0 events */ 510 /* Setup the thread 0 events */
472 for (i = 0; i < num_ctrs; ++i) { 511 for (i = 0; i < num_ctrs; ++i) {
@@ -498,10 +537,10 @@ cell_reg_setup(struct op_counter_config *ctr,
498 pmc_cntrl[1][i].vcntr = i; 537 pmc_cntrl[1][i].vcntr = i;
499 } 538 }
500 539
501 for (i = 0; i < 4; i++) 540 for (i = 0; i < NUM_TRACE_BUS_WORDS; i++)
502 trace_bus[i] = 0xff; 541 trace_bus[i] = 0xff;
503 542
504 for (i = 0; i < 2; i++) 543 for (i = 0; i < NUM_INPUT_BUS_WORDS; i++)
505 input_bus[i] = 0xff; 544 input_bus[i] = 0xff;
506 545
507 /* Our counters count up, and "count" refers to 546 /* Our counters count up, and "count" refers to
@@ -560,7 +599,7 @@ static void cell_cpu_setup(struct op_counter_config *cntr)
560 cbe_write_pm(cpu, pm_start_stop, 0); 599 cbe_write_pm(cpu, pm_start_stop, 0);
561 cbe_write_pm(cpu, group_control, pm_regs.group_control); 600 cbe_write_pm(cpu, group_control, pm_regs.group_control);
562 cbe_write_pm(cpu, debug_bus_control, pm_regs.debug_bus_control); 601 cbe_write_pm(cpu, debug_bus_control, pm_regs.debug_bus_control);
563 write_pm_cntrl(cpu, &pm_regs.pm_cntrl); 602 write_pm_cntrl(cpu);
564 603
565 for (i = 0; i < num_counters; ++i) { 604 for (i = 0; i < num_counters; ++i) {
566 if (ctr_enabled & (1 << i)) { 605 if (ctr_enabled & (1 << i)) {
@@ -602,7 +641,7 @@ static void cell_global_start(struct op_counter_config *ctr)
602 } 641 }
603 } 642 }
604 643
605 cbe_clear_pm_interrupts(cpu); 644 cbe_get_and_clear_pm_interrupts(cpu);
606 cbe_enable_pm_interrupts(cpu, hdw_thread, interrupt_mask); 645 cbe_enable_pm_interrupts(cpu, hdw_thread, interrupt_mask);
607 cbe_enable_pm(cpu); 646 cbe_enable_pm(cpu);
608 } 647 }
@@ -672,7 +711,7 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
672 711
673 cbe_disable_pm(cpu); 712 cbe_disable_pm(cpu);
674 713
675 interrupt_mask = cbe_clear_pm_interrupts(cpu); 714 interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu);
676 715
677 /* If the interrupt mask has been cleared, then the virt cntr 716 /* If the interrupt mask has been cleared, then the virt cntr
678 * has cleared the interrupt. When the thread that generated 717 * has cleared the interrupt. When the thread that generated
diff --git a/arch/powerpc/platforms/cell/pmu.c b/arch/powerpc/platforms/cell/pmu.c
index d04ae1671e6c..66ca4b5a1dbc 100644
--- a/arch/powerpc/platforms/cell/pmu.c
+++ b/arch/powerpc/platforms/cell/pmu.c
@@ -345,18 +345,12 @@ EXPORT_SYMBOL_GPL(cbe_read_trace_buffer);
345 * Enabling/disabling interrupts for the entire performance monitoring unit. 345 * Enabling/disabling interrupts for the entire performance monitoring unit.
346 */ 346 */
347 347
348u32 cbe_query_pm_interrupts(u32 cpu) 348u32 cbe_get_and_clear_pm_interrupts(u32 cpu)
349{
350 return cbe_read_pm(cpu, pm_status);
351}
352EXPORT_SYMBOL_GPL(cbe_query_pm_interrupts);
353
354u32 cbe_clear_pm_interrupts(u32 cpu)
355{ 349{
356 /* Reading pm_status clears the interrupt bits. */ 350 /* Reading pm_status clears the interrupt bits. */
357 return cbe_query_pm_interrupts(cpu); 351 return cbe_read_pm(cpu, pm_status);
358} 352}
359EXPORT_SYMBOL_GPL(cbe_clear_pm_interrupts); 353EXPORT_SYMBOL_GPL(cbe_get_and_clear_pm_interrupts);
360 354
361void cbe_enable_pm_interrupts(u32 cpu, u32 thread, u32 mask) 355void cbe_enable_pm_interrupts(u32 cpu, u32 thread, u32 mask)
362{ 356{
@@ -371,7 +365,7 @@ EXPORT_SYMBOL_GPL(cbe_enable_pm_interrupts);
371 365
372void cbe_disable_pm_interrupts(u32 cpu) 366void cbe_disable_pm_interrupts(u32 cpu)
373{ 367{
374 cbe_clear_pm_interrupts(cpu); 368 cbe_get_and_clear_pm_interrupts(cpu);
375 cbe_write_pm(cpu, pm_status, 0); 369 cbe_write_pm(cpu, pm_status, 0);
376} 370}
377EXPORT_SYMBOL_GPL(cbe_disable_pm_interrupts); 371EXPORT_SYMBOL_GPL(cbe_disable_pm_interrupts);
diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c
index 28c718ca3b51..04ad2e364e97 100644
--- a/arch/powerpc/platforms/cell/spufs/context.c
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -42,7 +42,7 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang)
42 } 42 }
43 spin_lock_init(&ctx->mmio_lock); 43 spin_lock_init(&ctx->mmio_lock);
44 kref_init(&ctx->kref); 44 kref_init(&ctx->kref);
45 init_rwsem(&ctx->state_sema); 45 mutex_init(&ctx->state_mutex);
46 init_MUTEX(&ctx->run_sema); 46 init_MUTEX(&ctx->run_sema);
47 init_waitqueue_head(&ctx->ibox_wq); 47 init_waitqueue_head(&ctx->ibox_wq);
48 init_waitqueue_head(&ctx->wbox_wq); 48 init_waitqueue_head(&ctx->wbox_wq);
@@ -53,6 +53,10 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang)
53 ctx->owner = get_task_mm(current); 53 ctx->owner = get_task_mm(current);
54 if (gang) 54 if (gang)
55 spu_gang_add_ctx(gang, ctx); 55 spu_gang_add_ctx(gang, ctx);
56 ctx->rt_priority = current->rt_priority;
57 ctx->policy = current->policy;
58 ctx->prio = current->prio;
59 INIT_DELAYED_WORK(&ctx->sched_work, spu_sched_tick);
56 goto out; 60 goto out;
57out_free: 61out_free:
58 kfree(ctx); 62 kfree(ctx);
@@ -65,9 +69,9 @@ void destroy_spu_context(struct kref *kref)
65{ 69{
66 struct spu_context *ctx; 70 struct spu_context *ctx;
67 ctx = container_of(kref, struct spu_context, kref); 71 ctx = container_of(kref, struct spu_context, kref);
68 down_write(&ctx->state_sema); 72 mutex_lock(&ctx->state_mutex);
69 spu_deactivate(ctx); 73 spu_deactivate(ctx);
70 up_write(&ctx->state_sema); 74 mutex_unlock(&ctx->state_mutex);
71 spu_fini_csa(&ctx->csa); 75 spu_fini_csa(&ctx->csa);
72 if (ctx->gang) 76 if (ctx->gang)
73 spu_gang_remove_ctx(ctx->gang, ctx); 77 spu_gang_remove_ctx(ctx->gang, ctx);
@@ -96,16 +100,6 @@ void spu_forget(struct spu_context *ctx)
96 spu_release(ctx); 100 spu_release(ctx);
97} 101}
98 102
99void spu_acquire(struct spu_context *ctx)
100{
101 down_read(&ctx->state_sema);
102}
103
104void spu_release(struct spu_context *ctx)
105{
106 up_read(&ctx->state_sema);
107}
108
109void spu_unmap_mappings(struct spu_context *ctx) 103void spu_unmap_mappings(struct spu_context *ctx)
110{ 104{
111 if (ctx->local_store) 105 if (ctx->local_store)
@@ -124,83 +118,84 @@ void spu_unmap_mappings(struct spu_context *ctx)
124 unmap_mapping_range(ctx->psmap, 0, 0x20000, 1); 118 unmap_mapping_range(ctx->psmap, 0, 0x20000, 1);
125} 119}
126 120
121/**
122 * spu_acquire_exclusive - lock spu contex and protect against userspace access
123 * @ctx: spu contex to lock
124 *
125 * Note:
126 * Returns 0 and with the context locked on success
127 * Returns negative error and with the context _unlocked_ on failure.
128 */
127int spu_acquire_exclusive(struct spu_context *ctx) 129int spu_acquire_exclusive(struct spu_context *ctx)
128{ 130{
129 int ret = 0; 131 int ret = -EINVAL;
130 132
131 down_write(&ctx->state_sema); 133 spu_acquire(ctx);
132 /* ctx is about to be freed, can't acquire any more */ 134 /*
133 if (!ctx->owner) { 135 * Context is about to be freed, so we can't acquire it anymore.
134 ret = -EINVAL; 136 */
135 goto out; 137 if (!ctx->owner)
136 } 138 goto out_unlock;
137 139
138 if (ctx->state == SPU_STATE_SAVED) { 140 if (ctx->state == SPU_STATE_SAVED) {
139 ret = spu_activate(ctx, 0); 141 ret = spu_activate(ctx, 0);
140 if (ret) 142 if (ret)
141 goto out; 143 goto out_unlock;
142 ctx->state = SPU_STATE_RUNNABLE;
143 } else { 144 } else {
144 /* We need to exclude userspace access to the context. */ 145 /*
146 * We need to exclude userspace access to the context.
147 *
148 * To protect against memory access we invalidate all ptes
149 * and make sure the pagefault handlers block on the mutex.
150 */
145 spu_unmap_mappings(ctx); 151 spu_unmap_mappings(ctx);
146 } 152 }
147 153
148out: 154 return 0;
149 if (ret) 155
150 up_write(&ctx->state_sema); 156 out_unlock:
157 spu_release(ctx);
151 return ret; 158 return ret;
152} 159}
153 160
154int spu_acquire_runnable(struct spu_context *ctx) 161/**
162 * spu_acquire_runnable - lock spu contex and make sure it is in runnable state
163 * @ctx: spu contex to lock
164 *
165 * Note:
166 * Returns 0 and with the context locked on success
167 * Returns negative error and with the context _unlocked_ on failure.
168 */
169int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags)
155{ 170{
156 int ret = 0; 171 int ret = -EINVAL;
157
158 down_read(&ctx->state_sema);
159 if (ctx->state == SPU_STATE_RUNNABLE) {
160 ctx->spu->prio = current->prio;
161 return 0;
162 }
163 up_read(&ctx->state_sema);
164
165 down_write(&ctx->state_sema);
166 /* ctx is about to be freed, can't acquire any more */
167 if (!ctx->owner) {
168 ret = -EINVAL;
169 goto out;
170 }
171 172
173 spu_acquire(ctx);
172 if (ctx->state == SPU_STATE_SAVED) { 174 if (ctx->state == SPU_STATE_SAVED) {
173 ret = spu_activate(ctx, 0); 175 /*
176 * Context is about to be freed, so we can't acquire it anymore.
177 */
178 if (!ctx->owner)
179 goto out_unlock;
180 ret = spu_activate(ctx, flags);
174 if (ret) 181 if (ret)
175 goto out; 182 goto out_unlock;
176 ctx->state = SPU_STATE_RUNNABLE;
177 } 183 }
178 184
179 downgrade_write(&ctx->state_sema); 185 return 0;
180 /* On success, we return holding the lock */
181
182 return ret;
183out:
184 /* Release here, to simplify calling code. */
185 up_write(&ctx->state_sema);
186 186
187 out_unlock:
188 spu_release(ctx);
187 return ret; 189 return ret;
188} 190}
189 191
192/**
193 * spu_acquire_saved - lock spu contex and make sure it is in saved state
194 * @ctx: spu contex to lock
195 */
190void spu_acquire_saved(struct spu_context *ctx) 196void spu_acquire_saved(struct spu_context *ctx)
191{ 197{
192 down_read(&ctx->state_sema); 198 spu_acquire(ctx);
193 199 if (ctx->state != SPU_STATE_SAVED)
194 if (ctx->state == SPU_STATE_SAVED)
195 return;
196
197 up_read(&ctx->state_sema);
198 down_write(&ctx->state_sema);
199
200 if (ctx->state == SPU_STATE_RUNNABLE) {
201 spu_deactivate(ctx); 200 spu_deactivate(ctx);
202 ctx->state = SPU_STATE_SAVED;
203 }
204
205 downgrade_write(&ctx->state_sema);
206} 201}
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index a528020baa18..b00653d69c01 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -103,6 +103,9 @@ static unsigned long spufs_mem_mmap_nopfn(struct vm_area_struct *vma,
103 103
104 offset += vma->vm_pgoff << PAGE_SHIFT; 104 offset += vma->vm_pgoff << PAGE_SHIFT;
105 105
106 if (offset >= LS_SIZE)
107 return NOPFN_SIGBUS;
108
106 spu_acquire(ctx); 109 spu_acquire(ctx);
107 110
108 if (ctx->state == SPU_STATE_SAVED) { 111 if (ctx->state == SPU_STATE_SAVED) {
@@ -164,7 +167,7 @@ static unsigned long spufs_ps_nopfn(struct vm_area_struct *vma,
164 /* error here usually means a signal.. we might want to test 167 /* error here usually means a signal.. we might want to test
165 * the error code more precisely though 168 * the error code more precisely though
166 */ 169 */
167 ret = spu_acquire_runnable(ctx); 170 ret = spu_acquire_runnable(ctx, 0);
168 if (ret) 171 if (ret)
169 return NOPFN_REFAULT; 172 return NOPFN_REFAULT;
170 173
@@ -1306,7 +1309,7 @@ static ssize_t spufs_mfc_write(struct file *file, const char __user *buffer,
1306 if (ret) 1309 if (ret)
1307 goto out; 1310 goto out;
1308 1311
1309 spu_acquire_runnable(ctx); 1312 spu_acquire_runnable(ctx, 0);
1310 if (file->f_flags & O_NONBLOCK) { 1313 if (file->f_flags & O_NONBLOCK) {
1311 ret = ctx->ops->send_mfc_command(ctx, &cmd); 1314 ret = ctx->ops->send_mfc_command(ctx, &cmd);
1312 } else { 1315 } else {
diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c
index 1acc2ffef8c8..353a8fa07ab8 100644
--- a/arch/powerpc/platforms/cell/spufs/run.c
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -133,7 +133,7 @@ out_drop_priv:
133 spu_mfc_sr1_set(ctx->spu, sr1); 133 spu_mfc_sr1_set(ctx->spu, sr1);
134 134
135out_unlock: 135out_unlock:
136 spu_release_exclusive(ctx); 136 spu_release(ctx);
137out: 137out:
138 return ret; 138 return ret;
139} 139}
@@ -143,7 +143,7 @@ static inline int spu_run_init(struct spu_context *ctx, u32 * npc)
143 int ret; 143 int ret;
144 unsigned long runcntl = SPU_RUNCNTL_RUNNABLE; 144 unsigned long runcntl = SPU_RUNCNTL_RUNNABLE;
145 145
146 ret = spu_acquire_runnable(ctx); 146 ret = spu_acquire_runnable(ctx, SPU_ACTIVATE_NOWAKE);
147 if (ret) 147 if (ret)
148 return ret; 148 return ret;
149 149
@@ -155,7 +155,7 @@ static inline int spu_run_init(struct spu_context *ctx, u32 * npc)
155 spu_release(ctx); 155 spu_release(ctx);
156 ret = spu_setup_isolated(ctx); 156 ret = spu_setup_isolated(ctx);
157 if (!ret) 157 if (!ret)
158 ret = spu_acquire_runnable(ctx); 158 ret = spu_acquire_runnable(ctx, SPU_ACTIVATE_NOWAKE);
159 } 159 }
160 160
161 /* if userspace has set the runcntrl register (eg, to issue an 161 /* if userspace has set the runcntrl register (eg, to issue an
@@ -164,8 +164,10 @@ static inline int spu_run_init(struct spu_context *ctx, u32 * npc)
164 (SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE); 164 (SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE);
165 if (runcntl == 0) 165 if (runcntl == 0)
166 runcntl = SPU_RUNCNTL_RUNNABLE; 166 runcntl = SPU_RUNCNTL_RUNNABLE;
167 } else 167 } else {
168 spu_start_tick(ctx);
168 ctx->ops->npc_write(ctx, *npc); 169 ctx->ops->npc_write(ctx, *npc);
170 }
169 171
170 ctx->ops->runcntl_write(ctx, runcntl); 172 ctx->ops->runcntl_write(ctx, runcntl);
171 return ret; 173 return ret;
@@ -176,6 +178,7 @@ static inline int spu_run_fini(struct spu_context *ctx, u32 * npc,
176{ 178{
177 int ret = 0; 179 int ret = 0;
178 180
181 spu_stop_tick(ctx);
179 *status = ctx->ops->status_read(ctx); 182 *status = ctx->ops->status_read(ctx);
180 *npc = ctx->ops->npc_read(ctx); 183 *npc = ctx->ops->npc_read(ctx);
181 spu_release(ctx); 184 spu_release(ctx);
@@ -329,8 +332,10 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx,
329 } 332 }
330 if (unlikely(ctx->state != SPU_STATE_RUNNABLE)) { 333 if (unlikely(ctx->state != SPU_STATE_RUNNABLE)) {
331 ret = spu_reacquire_runnable(ctx, npc, &status); 334 ret = spu_reacquire_runnable(ctx, npc, &status);
332 if (ret) 335 if (ret) {
336 spu_stop_tick(ctx);
333 goto out2; 337 goto out2;
338 }
334 continue; 339 continue;
335 } 340 }
336 ret = spu_process_events(ctx); 341 ret = spu_process_events(ctx);
@@ -361,4 +366,3 @@ out:
361 up(&ctx->run_sema); 366 up(&ctx->run_sema);
362 return ret; 367 return ret;
363} 368}
364
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index bd6fe4b7a84b..2f25e68b4bac 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -44,17 +44,18 @@
44#include <asm/spu_priv1.h> 44#include <asm/spu_priv1.h>
45#include "spufs.h" 45#include "spufs.h"
46 46
47#define SPU_MIN_TIMESLICE (100 * HZ / 1000) 47#define SPU_TIMESLICE (HZ)
48 48
49#define SPU_BITMAP_SIZE (((MAX_PRIO+BITS_PER_LONG)/BITS_PER_LONG)+1)
50struct spu_prio_array { 49struct spu_prio_array {
51 unsigned long bitmap[SPU_BITMAP_SIZE]; 50 DECLARE_BITMAP(bitmap, MAX_PRIO);
52 wait_queue_head_t waitq[MAX_PRIO]; 51 struct list_head runq[MAX_PRIO];
52 spinlock_t runq_lock;
53 struct list_head active_list[MAX_NUMNODES]; 53 struct list_head active_list[MAX_NUMNODES];
54 struct mutex active_mutex[MAX_NUMNODES]; 54 struct mutex active_mutex[MAX_NUMNODES];
55}; 55};
56 56
57static struct spu_prio_array *spu_prio; 57static struct spu_prio_array *spu_prio;
58static struct workqueue_struct *spu_sched_wq;
58 59
59static inline int node_allowed(int node) 60static inline int node_allowed(int node)
60{ 61{
@@ -68,6 +69,64 @@ static inline int node_allowed(int node)
68 return 1; 69 return 1;
69} 70}
70 71
72void spu_start_tick(struct spu_context *ctx)
73{
74 if (ctx->policy == SCHED_RR)
75 queue_delayed_work(spu_sched_wq, &ctx->sched_work, SPU_TIMESLICE);
76}
77
78void spu_stop_tick(struct spu_context *ctx)
79{
80 if (ctx->policy == SCHED_RR)
81 cancel_delayed_work(&ctx->sched_work);
82}
83
84void spu_sched_tick(struct work_struct *work)
85{
86 struct spu_context *ctx =
87 container_of(work, struct spu_context, sched_work.work);
88 struct spu *spu;
89 int rearm = 1;
90
91 mutex_lock(&ctx->state_mutex);
92 spu = ctx->spu;
93 if (spu) {
94 int best = sched_find_first_bit(spu_prio->bitmap);
95 if (best <= ctx->prio) {
96 spu_deactivate(ctx);
97 rearm = 0;
98 }
99 }
100 mutex_unlock(&ctx->state_mutex);
101
102 if (rearm)
103 spu_start_tick(ctx);
104}
105
106/**
107 * spu_add_to_active_list - add spu to active list
108 * @spu: spu to add to the active list
109 */
110static void spu_add_to_active_list(struct spu *spu)
111{
112 mutex_lock(&spu_prio->active_mutex[spu->node]);
113 list_add_tail(&spu->list, &spu_prio->active_list[spu->node]);
114 mutex_unlock(&spu_prio->active_mutex[spu->node]);
115}
116
117/**
118 * spu_remove_from_active_list - remove spu from active list
119 * @spu: spu to remove from the active list
120 */
121static void spu_remove_from_active_list(struct spu *spu)
122{
123 int node = spu->node;
124
125 mutex_lock(&spu_prio->active_mutex[node]);
126 list_del_init(&spu->list);
127 mutex_unlock(&spu_prio->active_mutex[node]);
128}
129
71static inline void mm_needs_global_tlbie(struct mm_struct *mm) 130static inline void mm_needs_global_tlbie(struct mm_struct *mm)
72{ 131{
73 int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1; 132 int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1;
@@ -94,8 +153,12 @@ int spu_switch_event_unregister(struct notifier_block * n)
94 return blocking_notifier_chain_unregister(&spu_switch_notifier, n); 153 return blocking_notifier_chain_unregister(&spu_switch_notifier, n);
95} 154}
96 155
97 156/**
98static inline void bind_context(struct spu *spu, struct spu_context *ctx) 157 * spu_bind_context - bind spu context to physical spu
158 * @spu: physical spu to bind to
159 * @ctx: context to bind
160 */
161static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
99{ 162{
100 pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid, 163 pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid,
101 spu->number, spu->node); 164 spu->number, spu->node);
@@ -104,7 +167,6 @@ static inline void bind_context(struct spu *spu, struct spu_context *ctx)
104 ctx->spu = spu; 167 ctx->spu = spu;
105 ctx->ops = &spu_hw_ops; 168 ctx->ops = &spu_hw_ops;
106 spu->pid = current->pid; 169 spu->pid = current->pid;
107 spu->prio = current->prio;
108 spu->mm = ctx->owner; 170 spu->mm = ctx->owner;
109 mm_needs_global_tlbie(spu->mm); 171 mm_needs_global_tlbie(spu->mm);
110 spu->ibox_callback = spufs_ibox_callback; 172 spu->ibox_callback = spufs_ibox_callback;
@@ -118,12 +180,21 @@ static inline void bind_context(struct spu *spu, struct spu_context *ctx)
118 spu->timestamp = jiffies; 180 spu->timestamp = jiffies;
119 spu_cpu_affinity_set(spu, raw_smp_processor_id()); 181 spu_cpu_affinity_set(spu, raw_smp_processor_id());
120 spu_switch_notify(spu, ctx); 182 spu_switch_notify(spu, ctx);
183 spu_add_to_active_list(spu);
184 ctx->state = SPU_STATE_RUNNABLE;
121} 185}
122 186
123static inline void unbind_context(struct spu *spu, struct spu_context *ctx) 187/**
188 * spu_unbind_context - unbind spu context from physical spu
189 * @spu: physical spu to unbind from
190 * @ctx: context to unbind
191 */
192static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
124{ 193{
125 pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__, 194 pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__,
126 spu->pid, spu->number, spu->node); 195 spu->pid, spu->number, spu->node);
196
197 spu_remove_from_active_list(spu);
127 spu_switch_notify(spu, NULL); 198 spu_switch_notify(spu, NULL);
128 spu_unmap_mappings(ctx); 199 spu_unmap_mappings(ctx);
129 spu_save(&ctx->csa, spu); 200 spu_save(&ctx->csa, spu);
@@ -136,95 +207,98 @@ static inline void unbind_context(struct spu *spu, struct spu_context *ctx)
136 spu->dma_callback = NULL; 207 spu->dma_callback = NULL;
137 spu->mm = NULL; 208 spu->mm = NULL;
138 spu->pid = 0; 209 spu->pid = 0;
139 spu->prio = MAX_PRIO;
140 ctx->ops = &spu_backing_ops; 210 ctx->ops = &spu_backing_ops;
141 ctx->spu = NULL; 211 ctx->spu = NULL;
142 spu->flags = 0; 212 spu->flags = 0;
143 spu->ctx = NULL; 213 spu->ctx = NULL;
144} 214}
145 215
146static inline void spu_add_wq(wait_queue_head_t * wq, wait_queue_t * wait, 216/**
147 int prio) 217 * spu_add_to_rq - add a context to the runqueue
218 * @ctx: context to add
219 */
220static void spu_add_to_rq(struct spu_context *ctx)
148{ 221{
149 prepare_to_wait_exclusive(wq, wait, TASK_INTERRUPTIBLE); 222 spin_lock(&spu_prio->runq_lock);
150 set_bit(prio, spu_prio->bitmap); 223 list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]);
224 set_bit(ctx->prio, spu_prio->bitmap);
225 spin_unlock(&spu_prio->runq_lock);
151} 226}
152 227
153static inline void spu_del_wq(wait_queue_head_t * wq, wait_queue_t * wait, 228/**
154 int prio) 229 * spu_del_from_rq - remove a context from the runqueue
230 * @ctx: context to remove
231 */
232static void spu_del_from_rq(struct spu_context *ctx)
155{ 233{
156 u64 flags; 234 spin_lock(&spu_prio->runq_lock);
157 235 list_del_init(&ctx->rq);
158 __set_current_state(TASK_RUNNING); 236 if (list_empty(&spu_prio->runq[ctx->prio]))
159 237 clear_bit(ctx->prio, spu_prio->bitmap);
160 spin_lock_irqsave(&wq->lock, flags); 238 spin_unlock(&spu_prio->runq_lock);
239}
161 240
162 remove_wait_queue_locked(wq, wait); 241/**
163 if (list_empty(&wq->task_list)) 242 * spu_grab_context - remove one context from the runqueue
164 clear_bit(prio, spu_prio->bitmap); 243 * @prio: priority of the context to be removed
244 *
245 * This function removes one context from the runqueue for priority @prio.
246 * If there is more than one context with the given priority the first
247 * task on the runqueue will be taken.
248 *
249 * Returns the spu_context it just removed.
250 *
251 * Must be called with spu_prio->runq_lock held.
252 */
253static struct spu_context *spu_grab_context(int prio)
254{
255 struct list_head *rq = &spu_prio->runq[prio];
165 256
166 spin_unlock_irqrestore(&wq->lock, flags); 257 if (list_empty(rq))
258 return NULL;
259 return list_entry(rq->next, struct spu_context, rq);
167} 260}
168 261
169static void spu_prio_wait(struct spu_context *ctx, u64 flags) 262static void spu_prio_wait(struct spu_context *ctx)
170{ 263{
171 int prio = current->prio;
172 wait_queue_head_t *wq = &spu_prio->waitq[prio];
173 DEFINE_WAIT(wait); 264 DEFINE_WAIT(wait);
174 265
175 if (ctx->spu) 266 set_bit(SPU_SCHED_WAKE, &ctx->sched_flags);
176 return; 267 prepare_to_wait_exclusive(&ctx->stop_wq, &wait, TASK_INTERRUPTIBLE);
177
178 spu_add_wq(wq, &wait, prio);
179
180 if (!signal_pending(current)) { 268 if (!signal_pending(current)) {
181 up_write(&ctx->state_sema); 269 mutex_unlock(&ctx->state_mutex);
182 pr_debug("%s: pid=%d prio=%d\n", __FUNCTION__,
183 current->pid, current->prio);
184 schedule(); 270 schedule();
185 down_write(&ctx->state_sema); 271 mutex_lock(&ctx->state_mutex);
186 } 272 }
187 273 __set_current_state(TASK_RUNNING);
188 spu_del_wq(wq, &wait, prio); 274 remove_wait_queue(&ctx->stop_wq, &wait);
275 clear_bit(SPU_SCHED_WAKE, &ctx->sched_flags);
189} 276}
190 277
191static void spu_prio_wakeup(void) 278/**
279 * spu_reschedule - try to find a runnable context for a spu
280 * @spu: spu available
281 *
282 * This function is called whenever a spu becomes idle. It looks for the
283 * most suitable runnable spu context and schedules it for execution.
284 */
285static void spu_reschedule(struct spu *spu)
192{ 286{
193 int best = sched_find_first_bit(spu_prio->bitmap); 287 int best;
194 if (best < MAX_PRIO) {
195 wait_queue_head_t *wq = &spu_prio->waitq[best];
196 wake_up_interruptible_nr(wq, 1);
197 }
198}
199 288
200static int get_active_spu(struct spu *spu) 289 spu_free(spu);
201{
202 int node = spu->node;
203 struct spu *tmp;
204 int rc = 0;
205 290
206 mutex_lock(&spu_prio->active_mutex[node]); 291 spin_lock(&spu_prio->runq_lock);
207 list_for_each_entry(tmp, &spu_prio->active_list[node], list) { 292 best = sched_find_first_bit(spu_prio->bitmap);
208 if (tmp == spu) { 293 if (best < MAX_PRIO) {
209 list_del_init(&spu->list); 294 struct spu_context *ctx = spu_grab_context(best);
210 rc = 1; 295 if (ctx && test_bit(SPU_SCHED_WAKE, &ctx->sched_flags))
211 break; 296 wake_up(&ctx->stop_wq);
212 }
213 } 297 }
214 mutex_unlock(&spu_prio->active_mutex[node]); 298 spin_unlock(&spu_prio->runq_lock);
215 return rc;
216}
217
218static void put_active_spu(struct spu *spu)
219{
220 int node = spu->node;
221
222 mutex_lock(&spu_prio->active_mutex[node]);
223 list_add_tail(&spu->list, &spu_prio->active_list[node]);
224 mutex_unlock(&spu_prio->active_mutex[node]);
225} 299}
226 300
227static struct spu *spu_get_idle(struct spu_context *ctx, u64 flags) 301static struct spu *spu_get_idle(struct spu_context *ctx)
228{ 302{
229 struct spu *spu = NULL; 303 struct spu *spu = NULL;
230 int node = cpu_to_node(raw_smp_processor_id()); 304 int node = cpu_to_node(raw_smp_processor_id());
@@ -241,87 +315,154 @@ static struct spu *spu_get_idle(struct spu_context *ctx, u64 flags)
241 return spu; 315 return spu;
242} 316}
243 317
244static inline struct spu *spu_get(struct spu_context *ctx, u64 flags) 318/**
319 * find_victim - find a lower priority context to preempt
320 * @ctx: canidate context for running
321 *
322 * Returns the freed physical spu to run the new context on.
323 */
324static struct spu *find_victim(struct spu_context *ctx)
245{ 325{
246 /* Future: spu_get_idle() if possible, 326 struct spu_context *victim = NULL;
247 * otherwise try to preempt an active 327 struct spu *spu;
248 * context. 328 int node, n;
329
330 /*
331 * Look for a possible preemption candidate on the local node first.
332 * If there is no candidate look at the other nodes. This isn't
333 * exactly fair, but so far the whole spu schedule tries to keep
334 * a strong node affinity. We might want to fine-tune this in
335 * the future.
249 */ 336 */
250 return spu_get_idle(ctx, flags); 337 restart:
338 node = cpu_to_node(raw_smp_processor_id());
339 for (n = 0; n < MAX_NUMNODES; n++, node++) {
340 node = (node < MAX_NUMNODES) ? node : 0;
341 if (!node_allowed(node))
342 continue;
343
344 mutex_lock(&spu_prio->active_mutex[node]);
345 list_for_each_entry(spu, &spu_prio->active_list[node], list) {
346 struct spu_context *tmp = spu->ctx;
347
348 if (tmp->rt_priority < ctx->rt_priority &&
349 (!victim || tmp->rt_priority < victim->rt_priority))
350 victim = spu->ctx;
351 }
352 mutex_unlock(&spu_prio->active_mutex[node]);
353
354 if (victim) {
355 /*
356 * This nests ctx->state_mutex, but we always lock
357 * higher priority contexts before lower priority
358 * ones, so this is safe until we introduce
359 * priority inheritance schemes.
360 */
361 if (!mutex_trylock(&victim->state_mutex)) {
362 victim = NULL;
363 goto restart;
364 }
365
366 spu = victim->spu;
367 if (!spu) {
368 /*
369 * This race can happen because we've dropped
370 * the active list mutex. No a problem, just
371 * restart the search.
372 */
373 mutex_unlock(&victim->state_mutex);
374 victim = NULL;
375 goto restart;
376 }
377 spu_unbind_context(spu, victim);
378 mutex_unlock(&victim->state_mutex);
379 return spu;
380 }
381 }
382
383 return NULL;
251} 384}
252 385
253/* The three externally callable interfaces 386/**
254 * for the scheduler begin here. 387 * spu_activate - find a free spu for a context and execute it
388 * @ctx: spu context to schedule
389 * @flags: flags (currently ignored)
255 * 390 *
256 * spu_activate - bind a context to SPU, waiting as needed. 391 * Tries to find a free spu to run @ctx. If no free spu is availble
257 * spu_deactivate - unbind a context from its SPU. 392 * add the context to the runqueue so it gets woken up once an spu
258 * spu_yield - yield an SPU if others are waiting. 393 * is available.
259 */ 394 */
260 395int spu_activate(struct spu_context *ctx, unsigned long flags)
261int spu_activate(struct spu_context *ctx, u64 flags)
262{ 396{
263 struct spu *spu;
264 int ret = 0;
265 397
266 for (;;) { 398 if (ctx->spu)
267 if (ctx->spu) 399 return 0;
400
401 do {
402 struct spu *spu;
403
404 spu = spu_get_idle(ctx);
405 /*
406 * If this is a realtime thread we try to get it running by
407 * preempting a lower priority thread.
408 */
409 if (!spu && ctx->rt_priority)
410 spu = find_victim(ctx);
411 if (spu) {
412 spu_bind_context(spu, ctx);
268 return 0; 413 return 0;
269 spu = spu_get(ctx, flags);
270 if (spu != NULL) {
271 if (ctx->spu != NULL) {
272 spu_free(spu);
273 spu_prio_wakeup();
274 break;
275 }
276 bind_context(spu, ctx);
277 put_active_spu(spu);
278 break;
279 } 414 }
280 spu_prio_wait(ctx, flags); 415
281 if (signal_pending(current)) { 416 spu_add_to_rq(ctx);
282 ret = -ERESTARTSYS; 417 if (!(flags & SPU_ACTIVATE_NOWAKE))
283 spu_prio_wakeup(); 418 spu_prio_wait(ctx);
284 break; 419 spu_del_from_rq(ctx);
285 } 420 } while (!signal_pending(current));
286 } 421
287 return ret; 422 return -ERESTARTSYS;
288} 423}
289 424
425/**
426 * spu_deactivate - unbind a context from it's physical spu
427 * @ctx: spu context to unbind
428 *
429 * Unbind @ctx from the physical spu it is running on and schedule
430 * the highest priority context to run on the freed physical spu.
431 */
290void spu_deactivate(struct spu_context *ctx) 432void spu_deactivate(struct spu_context *ctx)
291{ 433{
292 struct spu *spu; 434 struct spu *spu = ctx->spu;
293 int needs_idle;
294 435
295 spu = ctx->spu; 436 if (spu) {
296 if (!spu) 437 spu_unbind_context(spu, ctx);
297 return; 438 spu_reschedule(spu);
298 needs_idle = get_active_spu(spu);
299 unbind_context(spu, ctx);
300 if (needs_idle) {
301 spu_free(spu);
302 spu_prio_wakeup();
303 } 439 }
304} 440}
305 441
442/**
443 * spu_yield - yield a physical spu if others are waiting
444 * @ctx: spu context to yield
445 *
446 * Check if there is a higher priority context waiting and if yes
447 * unbind @ctx from the physical spu and schedule the highest
448 * priority context to run on the freed physical spu instead.
449 */
306void spu_yield(struct spu_context *ctx) 450void spu_yield(struct spu_context *ctx)
307{ 451{
308 struct spu *spu; 452 struct spu *spu;
309 int need_yield = 0; 453 int need_yield = 0;
310 454
311 if (down_write_trylock(&ctx->state_sema)) { 455 if (mutex_trylock(&ctx->state_mutex)) {
312 if ((spu = ctx->spu) != NULL) { 456 if ((spu = ctx->spu) != NULL) {
313 int best = sched_find_first_bit(spu_prio->bitmap); 457 int best = sched_find_first_bit(spu_prio->bitmap);
314 if (best < MAX_PRIO) { 458 if (best < MAX_PRIO) {
315 pr_debug("%s: yielding SPU %d NODE %d\n", 459 pr_debug("%s: yielding SPU %d NODE %d\n",
316 __FUNCTION__, spu->number, spu->node); 460 __FUNCTION__, spu->number, spu->node);
317 spu_deactivate(ctx); 461 spu_deactivate(ctx);
318 ctx->state = SPU_STATE_SAVED;
319 need_yield = 1; 462 need_yield = 1;
320 } else {
321 spu->prio = MAX_PRIO;
322 } 463 }
323 } 464 }
324 up_write(&ctx->state_sema); 465 mutex_unlock(&ctx->state_mutex);
325 } 466 }
326 if (unlikely(need_yield)) 467 if (unlikely(need_yield))
327 yield(); 468 yield();
@@ -331,14 +472,19 @@ int __init spu_sched_init(void)
331{ 472{
332 int i; 473 int i;
333 474
475 spu_sched_wq = create_singlethread_workqueue("spusched");
476 if (!spu_sched_wq)
477 return 1;
478
334 spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL); 479 spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL);
335 if (!spu_prio) { 480 if (!spu_prio) {
336 printk(KERN_WARNING "%s: Unable to allocate priority queue.\n", 481 printk(KERN_WARNING "%s: Unable to allocate priority queue.\n",
337 __FUNCTION__); 482 __FUNCTION__);
483 destroy_workqueue(spu_sched_wq);
338 return 1; 484 return 1;
339 } 485 }
340 for (i = 0; i < MAX_PRIO; i++) { 486 for (i = 0; i < MAX_PRIO; i++) {
341 init_waitqueue_head(&spu_prio->waitq[i]); 487 INIT_LIST_HEAD(&spu_prio->runq[i]);
342 __clear_bit(i, spu_prio->bitmap); 488 __clear_bit(i, spu_prio->bitmap);
343 } 489 }
344 __set_bit(MAX_PRIO, spu_prio->bitmap); 490 __set_bit(MAX_PRIO, spu_prio->bitmap);
@@ -346,6 +492,7 @@ int __init spu_sched_init(void)
346 mutex_init(&spu_prio->active_mutex[i]); 492 mutex_init(&spu_prio->active_mutex[i]);
347 INIT_LIST_HEAD(&spu_prio->active_list[i]); 493 INIT_LIST_HEAD(&spu_prio->active_list[i]);
348 } 494 }
495 spin_lock_init(&spu_prio->runq_lock);
349 return 0; 496 return 0;
350} 497}
351 498
@@ -364,4 +511,5 @@ void __exit spu_sched_exit(void)
364 mutex_unlock(&spu_prio->active_mutex[node]); 511 mutex_unlock(&spu_prio->active_mutex[node]);
365 } 512 }
366 kfree(spu_prio); 513 kfree(spu_prio);
514 destroy_workqueue(spu_sched_wq);
367} 515}
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 0941c56df9b5..0c437891dfd5 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -23,7 +23,7 @@
23#define SPUFS_H 23#define SPUFS_H
24 24
25#include <linux/kref.h> 25#include <linux/kref.h>
26#include <linux/rwsem.h> 26#include <linux/mutex.h>
27#include <linux/spinlock.h> 27#include <linux/spinlock.h>
28#include <linux/fs.h> 28#include <linux/fs.h>
29 29
@@ -37,11 +37,13 @@ enum {
37}; 37};
38 38
39struct spu_context_ops; 39struct spu_context_ops;
40
41#define SPU_CONTEXT_PREEMPT 0UL
42
43struct spu_gang; 40struct spu_gang;
44 41
42/* ctx->sched_flags */
43enum {
44 SPU_SCHED_WAKE = 0,
45};
46
45struct spu_context { 47struct spu_context {
46 struct spu *spu; /* pointer to a physical SPU */ 48 struct spu *spu; /* pointer to a physical SPU */
47 struct spu_state csa; /* SPU context save area. */ 49 struct spu_state csa; /* SPU context save area. */
@@ -56,7 +58,7 @@ struct spu_context {
56 u64 object_id; /* user space pointer for oprofile */ 58 u64 object_id; /* user space pointer for oprofile */
57 59
58 enum { SPU_STATE_RUNNABLE, SPU_STATE_SAVED } state; 60 enum { SPU_STATE_RUNNABLE, SPU_STATE_SAVED } state;
59 struct rw_semaphore state_sema; 61 struct mutex state_mutex;
60 struct semaphore run_sema; 62 struct semaphore run_sema;
61 63
62 struct mm_struct *owner; 64 struct mm_struct *owner;
@@ -77,6 +79,14 @@ struct spu_context {
77 79
78 struct list_head gang_list; 80 struct list_head gang_list;
79 struct spu_gang *gang; 81 struct spu_gang *gang;
82
83 /* scheduler fields */
84 struct list_head rq;
85 struct delayed_work sched_work;
86 unsigned long sched_flags;
87 unsigned long rt_priority;
88 int policy;
89 int prio;
80}; 90};
81 91
82struct spu_gang { 92struct spu_gang {
@@ -161,6 +171,16 @@ void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx);
161void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx); 171void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx);
162 172
163/* context management */ 173/* context management */
174static inline void spu_acquire(struct spu_context *ctx)
175{
176 mutex_lock(&ctx->state_mutex);
177}
178
179static inline void spu_release(struct spu_context *ctx)
180{
181 mutex_unlock(&ctx->state_mutex);
182}
183
164struct spu_context * alloc_spu_context(struct spu_gang *gang); 184struct spu_context * alloc_spu_context(struct spu_gang *gang);
165void destroy_spu_context(struct kref *kref); 185void destroy_spu_context(struct kref *kref);
166struct spu_context * get_spu_context(struct spu_context *ctx); 186struct spu_context * get_spu_context(struct spu_context *ctx);
@@ -168,20 +188,18 @@ int put_spu_context(struct spu_context *ctx);
168void spu_unmap_mappings(struct spu_context *ctx); 188void spu_unmap_mappings(struct spu_context *ctx);
169 189
170void spu_forget(struct spu_context *ctx); 190void spu_forget(struct spu_context *ctx);
171void spu_acquire(struct spu_context *ctx); 191int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags);
172void spu_release(struct spu_context *ctx);
173int spu_acquire_runnable(struct spu_context *ctx);
174void spu_acquire_saved(struct spu_context *ctx); 192void spu_acquire_saved(struct spu_context *ctx);
175int spu_acquire_exclusive(struct spu_context *ctx); 193int spu_acquire_exclusive(struct spu_context *ctx);
176 194enum {
177static inline void spu_release_exclusive(struct spu_context *ctx) 195 SPU_ACTIVATE_NOWAKE = 1,
178{ 196};
179 up_write(&ctx->state_sema); 197int spu_activate(struct spu_context *ctx, unsigned long flags);
180}
181
182int spu_activate(struct spu_context *ctx, u64 flags);
183void spu_deactivate(struct spu_context *ctx); 198void spu_deactivate(struct spu_context *ctx);
184void spu_yield(struct spu_context *ctx); 199void spu_yield(struct spu_context *ctx);
200void spu_start_tick(struct spu_context *ctx);
201void spu_stop_tick(struct spu_context *ctx);
202void spu_sched_tick(struct work_struct *work);
185int __init spu_sched_init(void); 203int __init spu_sched_init(void);
186void __exit spu_sched_exit(void); 204void __exit spu_sched_exit(void);
187 205
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 77540a2f7704..0183e5fbaf46 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2811,7 +2811,6 @@ static void dump_spu_fields(struct spu *spu)
2811 DUMP_FIELD(spu, "0x%lx", irqs[2]); 2811 DUMP_FIELD(spu, "0x%lx", irqs[2]);
2812 DUMP_FIELD(spu, "0x%x", slb_replace); 2812 DUMP_FIELD(spu, "0x%x", slb_replace);
2813 DUMP_FIELD(spu, "%d", pid); 2813 DUMP_FIELD(spu, "%d", pid);
2814 DUMP_FIELD(spu, "%d", prio);
2815 DUMP_FIELD(spu, "0x%p", mm); 2814 DUMP_FIELD(spu, "0x%p", mm);
2816 DUMP_FIELD(spu, "0x%p", ctx); 2815 DUMP_FIELD(spu, "0x%p", ctx);
2817 DUMP_FIELD(spu, "0x%p", rq); 2816 DUMP_FIELD(spu, "0x%p", rq);