diff options
55 files changed, 4303 insertions, 914 deletions
diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig index 74f83f4a4e5e..d9ac24e8de16 100644 --- a/arch/powerpc/configs/cell_defconfig +++ b/arch/powerpc/configs/cell_defconfig | |||
| @@ -1455,7 +1455,8 @@ CONFIG_HAS_DMA=y | |||
| 1455 | # Instrumentation Support | 1455 | # Instrumentation Support |
| 1456 | # | 1456 | # |
| 1457 | CONFIG_PROFILING=y | 1457 | CONFIG_PROFILING=y |
| 1458 | CONFIG_OPROFILE=y | 1458 | CONFIG_OPROFILE=m |
| 1459 | CONFIG_OPROFILE_CELL=y | ||
| 1459 | # CONFIG_KPROBES is not set | 1460 | # CONFIG_KPROBES is not set |
| 1460 | 1461 | ||
| 1461 | # | 1462 | # |
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index d3f2080d2eee..37658ea417fa 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c | |||
| @@ -219,6 +219,72 @@ void crash_kexec_secondary(struct pt_regs *regs) | |||
| 219 | cpus_in_sr = CPU_MASK_NONE; | 219 | cpus_in_sr = CPU_MASK_NONE; |
| 220 | } | 220 | } |
| 221 | #endif | 221 | #endif |
| 222 | #ifdef CONFIG_SPU_BASE | ||
| 223 | |||
| 224 | #include <asm/spu.h> | ||
| 225 | #include <asm/spu_priv1.h> | ||
| 226 | |||
| 227 | struct crash_spu_info { | ||
| 228 | struct spu *spu; | ||
| 229 | u32 saved_spu_runcntl_RW; | ||
| 230 | u32 saved_spu_status_R; | ||
| 231 | u32 saved_spu_npc_RW; | ||
| 232 | u64 saved_mfc_sr1_RW; | ||
| 233 | u64 saved_mfc_dar; | ||
| 234 | u64 saved_mfc_dsisr; | ||
| 235 | }; | ||
| 236 | |||
| 237 | #define CRASH_NUM_SPUS 16 /* Enough for current hardware */ | ||
| 238 | static struct crash_spu_info crash_spu_info[CRASH_NUM_SPUS]; | ||
| 239 | |||
| 240 | static void crash_kexec_stop_spus(void) | ||
| 241 | { | ||
| 242 | struct spu *spu; | ||
| 243 | int i; | ||
| 244 | u64 tmp; | ||
| 245 | |||
| 246 | for (i = 0; i < CRASH_NUM_SPUS; i++) { | ||
| 247 | if (!crash_spu_info[i].spu) | ||
| 248 | continue; | ||
| 249 | |||
| 250 | spu = crash_spu_info[i].spu; | ||
| 251 | |||
| 252 | crash_spu_info[i].saved_spu_runcntl_RW = | ||
| 253 | in_be32(&spu->problem->spu_runcntl_RW); | ||
| 254 | crash_spu_info[i].saved_spu_status_R = | ||
| 255 | in_be32(&spu->problem->spu_status_R); | ||
| 256 | crash_spu_info[i].saved_spu_npc_RW = | ||
| 257 | in_be32(&spu->problem->spu_npc_RW); | ||
| 258 | |||
| 259 | crash_spu_info[i].saved_mfc_dar = spu_mfc_dar_get(spu); | ||
| 260 | crash_spu_info[i].saved_mfc_dsisr = spu_mfc_dsisr_get(spu); | ||
| 261 | tmp = spu_mfc_sr1_get(spu); | ||
| 262 | crash_spu_info[i].saved_mfc_sr1_RW = tmp; | ||
| 263 | |||
| 264 | tmp &= ~MFC_STATE1_MASTER_RUN_CONTROL_MASK; | ||
| 265 | spu_mfc_sr1_set(spu, tmp); | ||
| 266 | |||
| 267 | __delay(200); | ||
| 268 | } | ||
| 269 | } | ||
| 270 | |||
| 271 | void crash_register_spus(struct list_head *list) | ||
| 272 | { | ||
| 273 | struct spu *spu; | ||
| 274 | |||
| 275 | list_for_each_entry(spu, list, full_list) { | ||
| 276 | if (WARN_ON(spu->number >= CRASH_NUM_SPUS)) | ||
| 277 | continue; | ||
| 278 | |||
| 279 | crash_spu_info[spu->number].spu = spu; | ||
| 280 | } | ||
| 281 | } | ||
| 282 | |||
| 283 | #else | ||
| 284 | static inline void crash_kexec_stop_spus(void) | ||
| 285 | { | ||
| 286 | } | ||
| 287 | #endif /* CONFIG_SPU_BASE */ | ||
| 222 | 288 | ||
| 223 | void default_machine_crash_shutdown(struct pt_regs *regs) | 289 | void default_machine_crash_shutdown(struct pt_regs *regs) |
| 224 | { | 290 | { |
| @@ -254,6 +320,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) | |||
| 254 | crash_save_cpu(regs, crashing_cpu); | 320 | crash_save_cpu(regs, crashing_cpu); |
| 255 | crash_kexec_prepare_cpus(crashing_cpu); | 321 | crash_kexec_prepare_cpus(crashing_cpu); |
| 256 | cpu_set(crashing_cpu, cpus_in_crash); | 322 | cpu_set(crashing_cpu, cpus_in_crash); |
| 323 | crash_kexec_stop_spus(); | ||
| 257 | if (ppc_md.kexec_cpu_down) | 324 | if (ppc_md.kexec_cpu_down) |
| 258 | ppc_md.kexec_cpu_down(1, 0); | 325 | ppc_md.kexec_cpu_down(1, 0); |
| 259 | } | 326 | } |
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index e5df167f7824..727a6699f2f4 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c | |||
| @@ -122,6 +122,7 @@ extern struct timezone sys_tz; | |||
| 122 | static long timezone_offset; | 122 | static long timezone_offset; |
| 123 | 123 | ||
| 124 | unsigned long ppc_proc_freq; | 124 | unsigned long ppc_proc_freq; |
| 125 | EXPORT_SYMBOL(ppc_proc_freq); | ||
| 125 | unsigned long ppc_tb_freq; | 126 | unsigned long ppc_tb_freq; |
| 126 | 127 | ||
| 127 | static u64 tb_last_jiffy __cacheline_aligned_in_smp; | 128 | static u64 tb_last_jiffy __cacheline_aligned_in_smp; |
diff --git a/arch/powerpc/oprofile/Kconfig b/arch/powerpc/oprofile/Kconfig index eb2dece76a54..7089e79689b9 100644 --- a/arch/powerpc/oprofile/Kconfig +++ b/arch/powerpc/oprofile/Kconfig | |||
| @@ -15,3 +15,10 @@ config OPROFILE | |||
| 15 | 15 | ||
| 16 | If unsure, say N. | 16 | If unsure, say N. |
| 17 | 17 | ||
| 18 | config OPROFILE_CELL | ||
| 19 | bool "OProfile for Cell Broadband Engine" | ||
| 20 | depends on (SPU_FS = y && OPROFILE = m) || (SPU_FS = y && OPROFILE = y) || (SPU_FS = m && OPROFILE = m) | ||
| 21 | default y | ||
| 22 | help | ||
| 23 | Profiling of Cell BE SPUs requires special support enabled | ||
| 24 | by this option. | ||
diff --git a/arch/powerpc/oprofile/Makefile b/arch/powerpc/oprofile/Makefile index 4b5f9528218c..c5f64c3bd668 100644 --- a/arch/powerpc/oprofile/Makefile +++ b/arch/powerpc/oprofile/Makefile | |||
| @@ -11,7 +11,9 @@ DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \ | |||
| 11 | timer_int.o ) | 11 | timer_int.o ) |
| 12 | 12 | ||
| 13 | oprofile-y := $(DRIVER_OBJS) common.o backtrace.o | 13 | oprofile-y := $(DRIVER_OBJS) common.o backtrace.o |
| 14 | oprofile-$(CONFIG_PPC_CELL_NATIVE) += op_model_cell.o | 14 | oprofile-$(CONFIG_OPROFILE_CELL) += op_model_cell.o \ |
| 15 | cell/spu_profiler.o cell/vma_map.o \ | ||
| 16 | cell/spu_task_sync.o | ||
| 15 | oprofile-$(CONFIG_PPC64) += op_model_rs64.o op_model_power4.o op_model_pa6t.o | 17 | oprofile-$(CONFIG_PPC64) += op_model_rs64.o op_model_power4.o op_model_pa6t.o |
| 16 | oprofile-$(CONFIG_FSL_BOOKE) += op_model_fsl_booke.o | 18 | oprofile-$(CONFIG_FSL_BOOKE) += op_model_fsl_booke.o |
| 17 | oprofile-$(CONFIG_6xx) += op_model_7450.o | 19 | oprofile-$(CONFIG_6xx) += op_model_7450.o |
diff --git a/arch/powerpc/oprofile/cell/pr_util.h b/arch/powerpc/oprofile/cell/pr_util.h new file mode 100644 index 000000000000..e5704f00c8b4 --- /dev/null +++ b/arch/powerpc/oprofile/cell/pr_util.h | |||
| @@ -0,0 +1,97 @@ | |||
| 1 | /* | ||
| 2 | * Cell Broadband Engine OProfile Support | ||
| 3 | * | ||
| 4 | * (C) Copyright IBM Corporation 2006 | ||
| 5 | * | ||
| 6 | * Author: Maynard Johnson <maynardj@us.ibm.com> | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or | ||
| 9 | * modify it under the terms of the GNU General Public License | ||
| 10 | * as published by the Free Software Foundation; either version | ||
| 11 | * 2 of the License, or (at your option) any later version. | ||
| 12 | */ | ||
| 13 | |||
| 14 | #ifndef PR_UTIL_H | ||
| 15 | #define PR_UTIL_H | ||
| 16 | |||
| 17 | #include <linux/cpumask.h> | ||
| 18 | #include <linux/oprofile.h> | ||
| 19 | #include <asm/cell-pmu.h> | ||
| 20 | #include <asm/spu.h> | ||
| 21 | |||
| 22 | #include "../../platforms/cell/cbe_regs.h" | ||
| 23 | |||
| 24 | /* Defines used for sync_start */ | ||
| 25 | #define SKIP_GENERIC_SYNC 0 | ||
| 26 | #define SYNC_START_ERROR -1 | ||
| 27 | #define DO_GENERIC_SYNC 1 | ||
| 28 | |||
| 29 | struct spu_overlay_info { /* map of sections within an SPU overlay */ | ||
| 30 | unsigned int vma; /* SPU virtual memory address from elf */ | ||
| 31 | unsigned int size; /* size of section from elf */ | ||
| 32 | unsigned int offset; /* offset of section into elf file */ | ||
| 33 | unsigned int buf; | ||
| 34 | }; | ||
| 35 | |||
| 36 | struct vma_to_fileoffset_map { /* map of sections within an SPU program */ | ||
| 37 | struct vma_to_fileoffset_map *next; /* list pointer */ | ||
| 38 | unsigned int vma; /* SPU virtual memory address from elf */ | ||
| 39 | unsigned int size; /* size of section from elf */ | ||
| 40 | unsigned int offset; /* offset of section into elf file */ | ||
| 41 | unsigned int guard_ptr; | ||
| 42 | unsigned int guard_val; | ||
| 43 | /* | ||
| 44 | * The guard pointer is an entry in the _ovly_buf_table, | ||
| 45 | * computed using ovly.buf as the index into the table. Since | ||
| 46 | * ovly.buf values begin at '1' to reference the first (or 0th) | ||
| 47 | * entry in the _ovly_buf_table, the computation subtracts 1 | ||
| 48 | * from ovly.buf. | ||
| 49 | * The guard value is stored in the _ovly_buf_table entry and | ||
| 50 | * is an index (starting at 1) back to the _ovly_table entry | ||
| 51 | * that is pointing at this _ovly_buf_table entry. So, for | ||
| 52 | * example, for an overlay scenario with one overlay segment | ||
| 53 | * and two overlay sections: | ||
| 54 | * - Section 1 points to the first entry of the | ||
| 55 | * _ovly_buf_table, which contains a guard value | ||
| 56 | * of '1', referencing the first (index=0) entry of | ||
| 57 | * _ovly_table. | ||
| 58 | * - Section 2 points to the second entry of the | ||
| 59 | * _ovly_buf_table, which contains a guard value | ||
| 60 | * of '2', referencing the second (index=1) entry of | ||
| 61 | * _ovly_table. | ||
| 62 | */ | ||
| 63 | |||
| 64 | }; | ||
| 65 | |||
| 66 | /* The three functions below are for maintaining and accessing | ||
| 67 | * the vma-to-fileoffset map. | ||
| 68 | */ | ||
| 69 | struct vma_to_fileoffset_map *create_vma_map(const struct spu *spu, | ||
| 70 | u64 objectid); | ||
| 71 | unsigned int vma_map_lookup(struct vma_to_fileoffset_map *map, | ||
| 72 | unsigned int vma, const struct spu *aSpu, | ||
| 73 | int *grd_val); | ||
| 74 | void vma_map_free(struct vma_to_fileoffset_map *map); | ||
| 75 | |||
| 76 | /* | ||
| 77 | * Entry point for SPU profiling. | ||
| 78 | * cycles_reset is the SPU_CYCLES count value specified by the user. | ||
| 79 | */ | ||
| 80 | int start_spu_profiling(unsigned int cycles_reset); | ||
| 81 | |||
| 82 | void stop_spu_profiling(void); | ||
| 83 | |||
| 84 | |||
| 85 | /* add the necessary profiling hooks */ | ||
| 86 | int spu_sync_start(void); | ||
| 87 | |||
| 88 | /* remove the hooks */ | ||
| 89 | int spu_sync_stop(void); | ||
| 90 | |||
| 91 | /* Record SPU program counter samples to the oprofile event buffer. */ | ||
| 92 | void spu_sync_buffer(int spu_num, unsigned int *samples, | ||
| 93 | int num_samples); | ||
| 94 | |||
| 95 | void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset); | ||
| 96 | |||
| 97 | #endif /* PR_UTIL_H */ | ||
diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c new file mode 100644 index 000000000000..380d7e217531 --- /dev/null +++ b/arch/powerpc/oprofile/cell/spu_profiler.c | |||
| @@ -0,0 +1,221 @@ | |||
| 1 | /* | ||
| 2 | * Cell Broadband Engine OProfile Support | ||
| 3 | * | ||
| 4 | * (C) Copyright IBM Corporation 2006 | ||
| 5 | * | ||
| 6 | * Authors: Maynard Johnson <maynardj@us.ibm.com> | ||
| 7 | * Carl Love <carll@us.ibm.com> | ||
| 8 | * | ||
| 9 | * This program is free software; you can redistribute it and/or | ||
| 10 | * modify it under the terms of the GNU General Public License | ||
| 11 | * as published by the Free Software Foundation; either version | ||
| 12 | * 2 of the License, or (at your option) any later version. | ||
| 13 | */ | ||
| 14 | |||
| 15 | #include <linux/hrtimer.h> | ||
| 16 | #include <linux/smp.h> | ||
| 17 | #include <linux/slab.h> | ||
| 18 | #include <asm/cell-pmu.h> | ||
| 19 | #include "pr_util.h" | ||
| 20 | |||
| 21 | #define TRACE_ARRAY_SIZE 1024 | ||
| 22 | #define SCALE_SHIFT 14 | ||
| 23 | |||
| 24 | static u32 *samples; | ||
| 25 | |||
| 26 | static int spu_prof_running; | ||
| 27 | static unsigned int profiling_interval; | ||
| 28 | |||
| 29 | #define NUM_SPU_BITS_TRBUF 16 | ||
| 30 | #define SPUS_PER_TB_ENTRY 4 | ||
| 31 | #define SPUS_PER_NODE 8 | ||
| 32 | |||
| 33 | #define SPU_PC_MASK 0xFFFF | ||
| 34 | |||
| 35 | static DEFINE_SPINLOCK(sample_array_lock); | ||
| 36 | unsigned long sample_array_lock_flags; | ||
| 37 | |||
| 38 | void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset) | ||
| 39 | { | ||
| 40 | unsigned long ns_per_cyc; | ||
| 41 | |||
| 42 | if (!freq_khz) | ||
| 43 | freq_khz = ppc_proc_freq/1000; | ||
| 44 | |||
| 45 | /* To calculate a timeout in nanoseconds, the basic | ||
| 46 | * formula is ns = cycles_reset * (NSEC_PER_SEC / cpu frequency). | ||
| 47 | * To avoid floating point math, we use the scale math | ||
| 48 | * technique as described in linux/jiffies.h. We use | ||
| 49 | * a scale factor of SCALE_SHIFT, which provides 4 decimal places | ||
| 50 | * of precision. This is close enough for the purpose at hand. | ||
| 51 | * | ||
| 52 | * The value of the timeout should be small enough that the hw | ||
| 53 | * trace buffer will not get more then about 1/3 full for the | ||
| 54 | * maximum user specified (the LFSR value) hw sampling frequency. | ||
| 55 | * This is to ensure the trace buffer will never fill even if the | ||
| 56 | * kernel thread scheduling varies under a heavy system load. | ||
| 57 | */ | ||
| 58 | |||
| 59 | ns_per_cyc = (USEC_PER_SEC << SCALE_SHIFT)/freq_khz; | ||
| 60 | profiling_interval = (ns_per_cyc * cycles_reset) >> SCALE_SHIFT; | ||
| 61 | |||
| 62 | } | ||
| 63 | |||
| 64 | /* | ||
| 65 | * Extract SPU PC from trace buffer entry | ||
| 66 | */ | ||
| 67 | static void spu_pc_extract(int cpu, int entry) | ||
| 68 | { | ||
| 69 | /* the trace buffer is 128 bits */ | ||
| 70 | u64 trace_buffer[2]; | ||
| 71 | u64 spu_mask; | ||
| 72 | int spu; | ||
| 73 | |||
| 74 | spu_mask = SPU_PC_MASK; | ||
| 75 | |||
| 76 | /* Each SPU PC is 16 bits; hence, four spus in each of | ||
| 77 | * the two 64-bit buffer entries that make up the | ||
| 78 | * 128-bit trace_buffer entry. Process two 64-bit values | ||
| 79 | * simultaneously. | ||
| 80 | * trace[0] SPU PC contents are: 0 1 2 3 | ||
| 81 | * trace[1] SPU PC contents are: 4 5 6 7 | ||
| 82 | */ | ||
| 83 | |||
| 84 | cbe_read_trace_buffer(cpu, trace_buffer); | ||
| 85 | |||
| 86 | for (spu = SPUS_PER_TB_ENTRY-1; spu >= 0; spu--) { | ||
| 87 | /* spu PC trace entry is upper 16 bits of the | ||
| 88 | * 18 bit SPU program counter | ||
| 89 | */ | ||
| 90 | samples[spu * TRACE_ARRAY_SIZE + entry] | ||
| 91 | = (spu_mask & trace_buffer[0]) << 2; | ||
| 92 | samples[(spu + SPUS_PER_TB_ENTRY) * TRACE_ARRAY_SIZE + entry] | ||
| 93 | = (spu_mask & trace_buffer[1]) << 2; | ||
| 94 | |||
| 95 | trace_buffer[0] = trace_buffer[0] >> NUM_SPU_BITS_TRBUF; | ||
| 96 | trace_buffer[1] = trace_buffer[1] >> NUM_SPU_BITS_TRBUF; | ||
| 97 | } | ||
| 98 | } | ||
| 99 | |||
| 100 | static int cell_spu_pc_collection(int cpu) | ||
| 101 | { | ||
| 102 | u32 trace_addr; | ||
| 103 | int entry; | ||
| 104 | |||
| 105 | /* process the collected SPU PC for the node */ | ||
| 106 | |||
| 107 | entry = 0; | ||
| 108 | |||
| 109 | trace_addr = cbe_read_pm(cpu, trace_address); | ||
| 110 | while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) { | ||
| 111 | /* there is data in the trace buffer to process */ | ||
| 112 | spu_pc_extract(cpu, entry); | ||
| 113 | |||
| 114 | entry++; | ||
| 115 | |||
| 116 | if (entry >= TRACE_ARRAY_SIZE) | ||
| 117 | /* spu_samples is full */ | ||
| 118 | break; | ||
| 119 | |||
| 120 | trace_addr = cbe_read_pm(cpu, trace_address); | ||
| 121 | } | ||
| 122 | |||
| 123 | return entry; | ||
| 124 | } | ||
| 125 | |||
| 126 | |||
| 127 | static enum hrtimer_restart profile_spus(struct hrtimer *timer) | ||
| 128 | { | ||
| 129 | ktime_t kt; | ||
| 130 | int cpu, node, k, num_samples, spu_num; | ||
| 131 | |||
| 132 | if (!spu_prof_running) | ||
| 133 | goto stop; | ||
| 134 | |||
| 135 | for_each_online_cpu(cpu) { | ||
| 136 | if (cbe_get_hw_thread_id(cpu)) | ||
| 137 | continue; | ||
| 138 | |||
| 139 | node = cbe_cpu_to_node(cpu); | ||
| 140 | |||
| 141 | /* There should only be one kernel thread at a time processing | ||
| 142 | * the samples. In the very unlikely case that the processing | ||
| 143 | * is taking a very long time and multiple kernel threads are | ||
| 144 | * started to process the samples. Make sure only one kernel | ||
| 145 | * thread is working on the samples array at a time. The | ||
| 146 | * sample array must be loaded and then processed for a given | ||
| 147 | * cpu. The sample array is not per cpu. | ||
| 148 | */ | ||
| 149 | spin_lock_irqsave(&sample_array_lock, | ||
| 150 | sample_array_lock_flags); | ||
| 151 | num_samples = cell_spu_pc_collection(cpu); | ||
| 152 | |||
| 153 | if (num_samples == 0) { | ||
| 154 | spin_unlock_irqrestore(&sample_array_lock, | ||
| 155 | sample_array_lock_flags); | ||
| 156 | continue; | ||
| 157 | } | ||
| 158 | |||
| 159 | for (k = 0; k < SPUS_PER_NODE; k++) { | ||
| 160 | spu_num = k + (node * SPUS_PER_NODE); | ||
| 161 | spu_sync_buffer(spu_num, | ||
| 162 | samples + (k * TRACE_ARRAY_SIZE), | ||
| 163 | num_samples); | ||
| 164 | } | ||
| 165 | |||
| 166 | spin_unlock_irqrestore(&sample_array_lock, | ||
| 167 | sample_array_lock_flags); | ||
| 168 | |||
| 169 | } | ||
| 170 | smp_wmb(); /* insure spu event buffer updates are written */ | ||
| 171 | /* don't want events intermingled... */ | ||
| 172 | |||
| 173 | kt = ktime_set(0, profiling_interval); | ||
| 174 | if (!spu_prof_running) | ||
| 175 | goto stop; | ||
| 176 | hrtimer_forward(timer, timer->base->get_time(), kt); | ||
| 177 | return HRTIMER_RESTART; | ||
| 178 | |||
| 179 | stop: | ||
| 180 | printk(KERN_INFO "SPU_PROF: spu-prof timer ending\n"); | ||
| 181 | return HRTIMER_NORESTART; | ||
| 182 | } | ||
| 183 | |||
| 184 | static struct hrtimer timer; | ||
| 185 | /* | ||
| 186 | * Entry point for SPU profiling. | ||
| 187 | * NOTE: SPU profiling is done system-wide, not per-CPU. | ||
| 188 | * | ||
| 189 | * cycles_reset is the count value specified by the user when | ||
| 190 | * setting up OProfile to count SPU_CYCLES. | ||
| 191 | */ | ||
| 192 | int start_spu_profiling(unsigned int cycles_reset) | ||
| 193 | { | ||
| 194 | ktime_t kt; | ||
| 195 | |||
| 196 | pr_debug("timer resolution: %lu\n", TICK_NSEC); | ||
| 197 | kt = ktime_set(0, profiling_interval); | ||
| 198 | hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
| 199 | timer.expires = kt; | ||
| 200 | timer.function = profile_spus; | ||
| 201 | |||
| 202 | /* Allocate arrays for collecting SPU PC samples */ | ||
| 203 | samples = kzalloc(SPUS_PER_NODE * | ||
| 204 | TRACE_ARRAY_SIZE * sizeof(u32), GFP_KERNEL); | ||
| 205 | |||
| 206 | if (!samples) | ||
| 207 | return -ENOMEM; | ||
| 208 | |||
| 209 | spu_prof_running = 1; | ||
| 210 | hrtimer_start(&timer, kt, HRTIMER_MODE_REL); | ||
| 211 | |||
| 212 | return 0; | ||
| 213 | } | ||
| 214 | |||
| 215 | void stop_spu_profiling(void) | ||
| 216 | { | ||
| 217 | spu_prof_running = 0; | ||
| 218 | hrtimer_cancel(&timer); | ||
| 219 | kfree(samples); | ||
| 220 | pr_debug("SPU_PROF: stop_spu_profiling issued\n"); | ||
| 221 | } | ||
diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c new file mode 100644 index 000000000000..133665754a75 --- /dev/null +++ b/arch/powerpc/oprofile/cell/spu_task_sync.c | |||
| @@ -0,0 +1,484 @@ | |||
| 1 | /* | ||
| 2 | * Cell Broadband Engine OProfile Support | ||
| 3 | * | ||
| 4 | * (C) Copyright IBM Corporation 2006 | ||
| 5 | * | ||
| 6 | * Author: Maynard Johnson <maynardj@us.ibm.com> | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or | ||
| 9 | * modify it under the terms of the GNU General Public License | ||
| 10 | * as published by the Free Software Foundation; either version | ||
| 11 | * 2 of the License, or (at your option) any later version. | ||
| 12 | */ | ||
| 13 | |||
| 14 | /* The purpose of this file is to handle SPU event task switching | ||
| 15 | * and to record SPU context information into the OProfile | ||
| 16 | * event buffer. | ||
| 17 | * | ||
| 18 | * Additionally, the spu_sync_buffer function is provided as a helper | ||
| 19 | * for recoding actual SPU program counter samples to the event buffer. | ||
| 20 | */ | ||
| 21 | #include <linux/dcookies.h> | ||
| 22 | #include <linux/kref.h> | ||
| 23 | #include <linux/mm.h> | ||
| 24 | #include <linux/module.h> | ||
| 25 | #include <linux/notifier.h> | ||
| 26 | #include <linux/numa.h> | ||
| 27 | #include <linux/oprofile.h> | ||
| 28 | #include <linux/spinlock.h> | ||
| 29 | #include "pr_util.h" | ||
| 30 | |||
| 31 | #define RELEASE_ALL 9999 | ||
| 32 | |||
| 33 | static DEFINE_SPINLOCK(buffer_lock); | ||
| 34 | static DEFINE_SPINLOCK(cache_lock); | ||
| 35 | static int num_spu_nodes; | ||
| 36 | int spu_prof_num_nodes; | ||
| 37 | int last_guard_val[MAX_NUMNODES * 8]; | ||
| 38 | |||
| 39 | /* Container for caching information about an active SPU task. */ | ||
| 40 | struct cached_info { | ||
| 41 | struct vma_to_fileoffset_map *map; | ||
| 42 | struct spu *the_spu; /* needed to access pointer to local_store */ | ||
| 43 | struct kref cache_ref; | ||
| 44 | }; | ||
| 45 | |||
| 46 | static struct cached_info *spu_info[MAX_NUMNODES * 8]; | ||
| 47 | |||
| 48 | static void destroy_cached_info(struct kref *kref) | ||
| 49 | { | ||
| 50 | struct cached_info *info; | ||
| 51 | |||
| 52 | info = container_of(kref, struct cached_info, cache_ref); | ||
| 53 | vma_map_free(info->map); | ||
| 54 | kfree(info); | ||
| 55 | module_put(THIS_MODULE); | ||
| 56 | } | ||
| 57 | |||
| 58 | /* Return the cached_info for the passed SPU number. | ||
| 59 | * ATTENTION: Callers are responsible for obtaining the | ||
| 60 | * cache_lock if needed prior to invoking this function. | ||
| 61 | */ | ||
| 62 | static struct cached_info *get_cached_info(struct spu *the_spu, int spu_num) | ||
| 63 | { | ||
| 64 | struct kref *ref; | ||
| 65 | struct cached_info *ret_info; | ||
| 66 | |||
| 67 | if (spu_num >= num_spu_nodes) { | ||
| 68 | printk(KERN_ERR "SPU_PROF: " | ||
| 69 | "%s, line %d: Invalid index %d into spu info cache\n", | ||
| 70 | __FUNCTION__, __LINE__, spu_num); | ||
| 71 | ret_info = NULL; | ||
| 72 | goto out; | ||
| 73 | } | ||
| 74 | if (!spu_info[spu_num] && the_spu) { | ||
| 75 | ref = spu_get_profile_private_kref(the_spu->ctx); | ||
| 76 | if (ref) { | ||
| 77 | spu_info[spu_num] = container_of(ref, struct cached_info, cache_ref); | ||
| 78 | kref_get(&spu_info[spu_num]->cache_ref); | ||
| 79 | } | ||
| 80 | } | ||
| 81 | |||
| 82 | ret_info = spu_info[spu_num]; | ||
| 83 | out: | ||
| 84 | return ret_info; | ||
| 85 | } | ||
| 86 | |||
| 87 | |||
| 88 | /* Looks for cached info for the passed spu. If not found, the | ||
| 89 | * cached info is created for the passed spu. | ||
| 90 | * Returns 0 for success; otherwise, -1 for error. | ||
| 91 | */ | ||
| 92 | static int | ||
| 93 | prepare_cached_spu_info(struct spu *spu, unsigned long objectId) | ||
| 94 | { | ||
| 95 | unsigned long flags; | ||
| 96 | struct vma_to_fileoffset_map *new_map; | ||
| 97 | int retval = 0; | ||
| 98 | struct cached_info *info; | ||
| 99 | |||
| 100 | /* We won't bother getting cache_lock here since | ||
| 101 | * don't do anything with the cached_info that's returned. | ||
| 102 | */ | ||
| 103 | info = get_cached_info(spu, spu->number); | ||
| 104 | |||
| 105 | if (info) { | ||
| 106 | pr_debug("Found cached SPU info.\n"); | ||
| 107 | goto out; | ||
| 108 | } | ||
| 109 | |||
| 110 | /* Create cached_info and set spu_info[spu->number] to point to it. | ||
| 111 | * spu->number is a system-wide value, not a per-node value. | ||
| 112 | */ | ||
| 113 | info = kzalloc(sizeof(struct cached_info), GFP_KERNEL); | ||
| 114 | if (!info) { | ||
| 115 | printk(KERN_ERR "SPU_PROF: " | ||
| 116 | "%s, line %d: create vma_map failed\n", | ||
| 117 | __FUNCTION__, __LINE__); | ||
| 118 | retval = -ENOMEM; | ||
| 119 | goto err_alloc; | ||
| 120 | } | ||
| 121 | new_map = create_vma_map(spu, objectId); | ||
| 122 | if (!new_map) { | ||
| 123 | printk(KERN_ERR "SPU_PROF: " | ||
| 124 | "%s, line %d: create vma_map failed\n", | ||
| 125 | __FUNCTION__, __LINE__); | ||
| 126 | retval = -ENOMEM; | ||
| 127 | goto err_alloc; | ||
| 128 | } | ||
| 129 | |||
| 130 | pr_debug("Created vma_map\n"); | ||
| 131 | info->map = new_map; | ||
| 132 | info->the_spu = spu; | ||
| 133 | kref_init(&info->cache_ref); | ||
| 134 | spin_lock_irqsave(&cache_lock, flags); | ||
| 135 | spu_info[spu->number] = info; | ||
| 136 | /* Increment count before passing off ref to SPUFS. */ | ||
| 137 | kref_get(&info->cache_ref); | ||
| 138 | |||
| 139 | /* We increment the module refcount here since SPUFS is | ||
| 140 | * responsible for the final destruction of the cached_info, | ||
| 141 | * and it must be able to access the destroy_cached_info() | ||
| 142 | * function defined in the OProfile module. We decrement | ||
| 143 | * the module refcount in destroy_cached_info. | ||
| 144 | */ | ||
| 145 | try_module_get(THIS_MODULE); | ||
| 146 | spu_set_profile_private_kref(spu->ctx, &info->cache_ref, | ||
| 147 | destroy_cached_info); | ||
| 148 | spin_unlock_irqrestore(&cache_lock, flags); | ||
| 149 | goto out; | ||
| 150 | |||
| 151 | err_alloc: | ||
| 152 | kfree(info); | ||
| 153 | out: | ||
| 154 | return retval; | ||
| 155 | } | ||
| 156 | |||
| 157 | /* | ||
| 158 | * NOTE: The caller is responsible for locking the | ||
| 159 | * cache_lock prior to calling this function. | ||
| 160 | */ | ||
| 161 | static int release_cached_info(int spu_index) | ||
| 162 | { | ||
| 163 | int index, end; | ||
| 164 | |||
| 165 | if (spu_index == RELEASE_ALL) { | ||
| 166 | end = num_spu_nodes; | ||
| 167 | index = 0; | ||
| 168 | } else { | ||
| 169 | if (spu_index >= num_spu_nodes) { | ||
| 170 | printk(KERN_ERR "SPU_PROF: " | ||
| 171 | "%s, line %d: " | ||
| 172 | "Invalid index %d into spu info cache\n", | ||
| 173 | __FUNCTION__, __LINE__, spu_index); | ||
| 174 | goto out; | ||
| 175 | } | ||
| 176 | end = spu_index + 1; | ||
| 177 | index = spu_index; | ||
| 178 | } | ||
| 179 | for (; index < end; index++) { | ||
| 180 | if (spu_info[index]) { | ||
| 181 | kref_put(&spu_info[index]->cache_ref, | ||
| 182 | destroy_cached_info); | ||
| 183 | spu_info[index] = NULL; | ||
| 184 | } | ||
| 185 | } | ||
| 186 | |||
| 187 | out: | ||
| 188 | return 0; | ||
| 189 | } | ||
| 190 | |||
| 191 | /* The source code for fast_get_dcookie was "borrowed" | ||
| 192 | * from drivers/oprofile/buffer_sync.c. | ||
| 193 | */ | ||
| 194 | |||
| 195 | /* Optimisation. We can manage without taking the dcookie sem | ||
| 196 | * because we cannot reach this code without at least one | ||
| 197 | * dcookie user still being registered (namely, the reader | ||
| 198 | * of the event buffer). | ||
| 199 | */ | ||
| 200 | static inline unsigned long fast_get_dcookie(struct dentry *dentry, | ||
| 201 | struct vfsmount *vfsmnt) | ||
| 202 | { | ||
| 203 | unsigned long cookie; | ||
| 204 | |||
| 205 | if (dentry->d_cookie) | ||
| 206 | return (unsigned long)dentry; | ||
| 207 | get_dcookie(dentry, vfsmnt, &cookie); | ||
| 208 | return cookie; | ||
| 209 | } | ||
| 210 | |||
| 211 | /* Look up the dcookie for the task's first VM_EXECUTABLE mapping, | ||
| 212 | * which corresponds loosely to "application name". Also, determine | ||
| 213 | * the offset for the SPU ELF object. If computed offset is | ||
| 214 | * non-zero, it implies an embedded SPU object; otherwise, it's a | ||
| 215 | * separate SPU binary, in which case we retrieve it's dcookie. | ||
| 216 | * For the embedded case, we must determine if SPU ELF is embedded | ||
| 217 | * in the executable application or another file (i.e., shared lib). | ||
| 218 | * If embedded in a shared lib, we must get the dcookie and return | ||
| 219 | * that to the caller. | ||
| 220 | */ | ||
| 221 | static unsigned long | ||
| 222 | get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp, | ||
| 223 | unsigned long *spu_bin_dcookie, | ||
| 224 | unsigned long spu_ref) | ||
| 225 | { | ||
| 226 | unsigned long app_cookie = 0; | ||
| 227 | unsigned int my_offset = 0; | ||
| 228 | struct file *app = NULL; | ||
| 229 | struct vm_area_struct *vma; | ||
| 230 | struct mm_struct *mm = spu->mm; | ||
| 231 | |||
| 232 | if (!mm) | ||
| 233 | goto out; | ||
| 234 | |||
| 235 | down_read(&mm->mmap_sem); | ||
| 236 | |||
| 237 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | ||
| 238 | if (!vma->vm_file) | ||
| 239 | continue; | ||
| 240 | if (!(vma->vm_flags & VM_EXECUTABLE)) | ||
| 241 | continue; | ||
| 242 | app_cookie = fast_get_dcookie(vma->vm_file->f_dentry, | ||
| 243 | vma->vm_file->f_vfsmnt); | ||
| 244 | pr_debug("got dcookie for %s\n", | ||
| 245 | vma->vm_file->f_dentry->d_name.name); | ||
| 246 | app = vma->vm_file; | ||
| 247 | break; | ||
| 248 | } | ||
| 249 | |||
| 250 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | ||
| 251 | if (vma->vm_start > spu_ref || vma->vm_end <= spu_ref) | ||
| 252 | continue; | ||
| 253 | my_offset = spu_ref - vma->vm_start; | ||
| 254 | if (!vma->vm_file) | ||
| 255 | goto fail_no_image_cookie; | ||
| 256 | |||
| 257 | pr_debug("Found spu ELF at %X(object-id:%lx) for file %s\n", | ||
| 258 | my_offset, spu_ref, | ||
| 259 | vma->vm_file->f_dentry->d_name.name); | ||
| 260 | *offsetp = my_offset; | ||
| 261 | break; | ||
| 262 | } | ||
| 263 | |||
| 264 | *spu_bin_dcookie = fast_get_dcookie(vma->vm_file->f_dentry, | ||
| 265 | vma->vm_file->f_vfsmnt); | ||
| 266 | pr_debug("got dcookie for %s\n", vma->vm_file->f_dentry->d_name.name); | ||
| 267 | |||
| 268 | up_read(&mm->mmap_sem); | ||
| 269 | |||
| 270 | out: | ||
| 271 | return app_cookie; | ||
| 272 | |||
| 273 | fail_no_image_cookie: | ||
| 274 | up_read(&mm->mmap_sem); | ||
| 275 | |||
| 276 | printk(KERN_ERR "SPU_PROF: " | ||
| 277 | "%s, line %d: Cannot find dcookie for SPU binary\n", | ||
| 278 | __FUNCTION__, __LINE__); | ||
| 279 | goto out; | ||
| 280 | } | ||
| 281 | |||
| 282 | |||
| 283 | |||
| 284 | /* This function finds or creates cached context information for the | ||
| 285 | * passed SPU and records SPU context information into the OProfile | ||
| 286 | * event buffer. | ||
| 287 | */ | ||
| 288 | static int process_context_switch(struct spu *spu, unsigned long objectId) | ||
| 289 | { | ||
| 290 | unsigned long flags; | ||
| 291 | int retval; | ||
| 292 | unsigned int offset = 0; | ||
| 293 | unsigned long spu_cookie = 0, app_dcookie; | ||
| 294 | |||
| 295 | retval = prepare_cached_spu_info(spu, objectId); | ||
| 296 | if (retval) | ||
| 297 | goto out; | ||
| 298 | |||
| 299 | /* Get dcookie first because a mutex_lock is taken in that | ||
| 300 | * code path, so interrupts must not be disabled. | ||
| 301 | */ | ||
| 302 | app_dcookie = get_exec_dcookie_and_offset(spu, &offset, &spu_cookie, objectId); | ||
| 303 | if (!app_dcookie || !spu_cookie) { | ||
| 304 | retval = -ENOENT; | ||
| 305 | goto out; | ||
| 306 | } | ||
| 307 | |||
| 308 | /* Record context info in event buffer */ | ||
| 309 | spin_lock_irqsave(&buffer_lock, flags); | ||
| 310 | add_event_entry(ESCAPE_CODE); | ||
| 311 | add_event_entry(SPU_CTX_SWITCH_CODE); | ||
| 312 | add_event_entry(spu->number); | ||
| 313 | add_event_entry(spu->pid); | ||
| 314 | add_event_entry(spu->tgid); | ||
| 315 | add_event_entry(app_dcookie); | ||
| 316 | add_event_entry(spu_cookie); | ||
| 317 | add_event_entry(offset); | ||
| 318 | spin_unlock_irqrestore(&buffer_lock, flags); | ||
| 319 | smp_wmb(); /* insure spu event buffer updates are written */ | ||
| 320 | /* don't want entries intermingled... */ | ||
| 321 | out: | ||
| 322 | return retval; | ||
| 323 | } | ||
| 324 | |||
| 325 | /* | ||
| 326 | * This function is invoked on either a bind_context or unbind_context. | ||
| 327 | * If called for an unbind_context, the val arg is 0; otherwise, | ||
| 328 | * it is the object-id value for the spu context. | ||
| 329 | * The data arg is of type 'struct spu *'. | ||
| 330 | */ | ||
| 331 | static int spu_active_notify(struct notifier_block *self, unsigned long val, | ||
| 332 | void *data) | ||
| 333 | { | ||
| 334 | int retval; | ||
| 335 | unsigned long flags; | ||
| 336 | struct spu *the_spu = data; | ||
| 337 | |||
| 338 | pr_debug("SPU event notification arrived\n"); | ||
| 339 | if (!val) { | ||
| 340 | spin_lock_irqsave(&cache_lock, flags); | ||
| 341 | retval = release_cached_info(the_spu->number); | ||
| 342 | spin_unlock_irqrestore(&cache_lock, flags); | ||
| 343 | } else { | ||
| 344 | retval = process_context_switch(the_spu, val); | ||
| 345 | } | ||
| 346 | return retval; | ||
| 347 | } | ||
| 348 | |||
| 349 | static struct notifier_block spu_active = { | ||
| 350 | .notifier_call = spu_active_notify, | ||
| 351 | }; | ||
| 352 | |||
| 353 | static int number_of_online_nodes(void) | ||
| 354 | { | ||
| 355 | u32 cpu; u32 tmp; | ||
| 356 | int nodes = 0; | ||
| 357 | for_each_online_cpu(cpu) { | ||
| 358 | tmp = cbe_cpu_to_node(cpu) + 1; | ||
| 359 | if (tmp > nodes) | ||
| 360 | nodes++; | ||
| 361 | } | ||
| 362 | return nodes; | ||
| 363 | } | ||
| 364 | |||
| 365 | /* The main purpose of this function is to synchronize | ||
| 366 | * OProfile with SPUFS by registering to be notified of | ||
| 367 | * SPU task switches. | ||
| 368 | * | ||
| 369 | * NOTE: When profiling SPUs, we must ensure that only | ||
| 370 | * spu_sync_start is invoked and not the generic sync_start | ||
| 371 | * in drivers/oprofile/oprof.c. A return value of | ||
| 372 | * SKIP_GENERIC_SYNC or SYNC_START_ERROR will | ||
| 373 | * accomplish this. | ||
| 374 | */ | ||
| 375 | int spu_sync_start(void) | ||
| 376 | { | ||
| 377 | int k; | ||
| 378 | int ret = SKIP_GENERIC_SYNC; | ||
| 379 | int register_ret; | ||
| 380 | unsigned long flags = 0; | ||
| 381 | |||
| 382 | spu_prof_num_nodes = number_of_online_nodes(); | ||
| 383 | num_spu_nodes = spu_prof_num_nodes * 8; | ||
| 384 | |||
| 385 | spin_lock_irqsave(&buffer_lock, flags); | ||
| 386 | add_event_entry(ESCAPE_CODE); | ||
| 387 | add_event_entry(SPU_PROFILING_CODE); | ||
| 388 | add_event_entry(num_spu_nodes); | ||
| 389 | spin_unlock_irqrestore(&buffer_lock, flags); | ||
| 390 | |||
| 391 | /* Register for SPU events */ | ||
| 392 | register_ret = spu_switch_event_register(&spu_active); | ||
| 393 | if (register_ret) { | ||
| 394 | ret = SYNC_START_ERROR; | ||
| 395 | goto out; | ||
| 396 | } | ||
| 397 | |||
| 398 | for (k = 0; k < (MAX_NUMNODES * 8); k++) | ||
| 399 | last_guard_val[k] = 0; | ||
| 400 | pr_debug("spu_sync_start -- running.\n"); | ||
| 401 | out: | ||
| 402 | return ret; | ||
| 403 | } | ||
| 404 | |||
| 405 | /* Record SPU program counter samples to the oprofile event buffer. */ | ||
| 406 | void spu_sync_buffer(int spu_num, unsigned int *samples, | ||
| 407 | int num_samples) | ||
| 408 | { | ||
| 409 | unsigned long long file_offset; | ||
| 410 | unsigned long flags; | ||
| 411 | int i; | ||
| 412 | struct vma_to_fileoffset_map *map; | ||
| 413 | struct spu *the_spu; | ||
| 414 | unsigned long long spu_num_ll = spu_num; | ||
| 415 | unsigned long long spu_num_shifted = spu_num_ll << 32; | ||
| 416 | struct cached_info *c_info; | ||
| 417 | |||
| 418 | /* We need to obtain the cache_lock here because it's | ||
| 419 | * possible that after getting the cached_info, the SPU job | ||
| 420 | * corresponding to this cached_info may end, thus resulting | ||
| 421 | * in the destruction of the cached_info. | ||
| 422 | */ | ||
| 423 | spin_lock_irqsave(&cache_lock, flags); | ||
| 424 | c_info = get_cached_info(NULL, spu_num); | ||
| 425 | if (!c_info) { | ||
| 426 | /* This legitimately happens when the SPU task ends before all | ||
| 427 | * samples are recorded. | ||
| 428 | * No big deal -- so we just drop a few samples. | ||
| 429 | */ | ||
| 430 | pr_debug("SPU_PROF: No cached SPU contex " | ||
| 431 | "for SPU #%d. Dropping samples.\n", spu_num); | ||
| 432 | goto out; | ||
| 433 | } | ||
| 434 | |||
| 435 | map = c_info->map; | ||
| 436 | the_spu = c_info->the_spu; | ||
| 437 | spin_lock(&buffer_lock); | ||
| 438 | for (i = 0; i < num_samples; i++) { | ||
| 439 | unsigned int sample = *(samples+i); | ||
| 440 | int grd_val = 0; | ||
| 441 | file_offset = 0; | ||
| 442 | if (sample == 0) | ||
| 443 | continue; | ||
| 444 | file_offset = vma_map_lookup( map, sample, the_spu, &grd_val); | ||
| 445 | |||
| 446 | /* If overlays are used by this SPU application, the guard | ||
| 447 | * value is non-zero, indicating which overlay section is in | ||
| 448 | * use. We need to discard samples taken during the time | ||
| 449 | * period which an overlay occurs (i.e., guard value changes). | ||
| 450 | */ | ||
| 451 | if (grd_val && grd_val != last_guard_val[spu_num]) { | ||
| 452 | last_guard_val[spu_num] = grd_val; | ||
| 453 | /* Drop the rest of the samples. */ | ||
| 454 | break; | ||
| 455 | } | ||
| 456 | |||
| 457 | add_event_entry(file_offset | spu_num_shifted); | ||
| 458 | } | ||
| 459 | spin_unlock(&buffer_lock); | ||
| 460 | out: | ||
| 461 | spin_unlock_irqrestore(&cache_lock, flags); | ||
| 462 | } | ||
| 463 | |||
| 464 | |||
| 465 | int spu_sync_stop(void) | ||
| 466 | { | ||
| 467 | unsigned long flags = 0; | ||
| 468 | int ret = spu_switch_event_unregister(&spu_active); | ||
| 469 | if (ret) { | ||
| 470 | printk(KERN_ERR "SPU_PROF: " | ||
| 471 | "%s, line %d: spu_switch_event_unregister returned %d\n", | ||
| 472 | __FUNCTION__, __LINE__, ret); | ||
| 473 | goto out; | ||
| 474 | } | ||
| 475 | |||
| 476 | spin_lock_irqsave(&cache_lock, flags); | ||
| 477 | ret = release_cached_info(RELEASE_ALL); | ||
| 478 | spin_unlock_irqrestore(&cache_lock, flags); | ||
| 479 | out: | ||
| 480 | pr_debug("spu_sync_stop -- done.\n"); | ||
| 481 | return ret; | ||
| 482 | } | ||
| 483 | |||
| 484 | |||
diff --git a/arch/powerpc/oprofile/cell/vma_map.c b/arch/powerpc/oprofile/cell/vma_map.c new file mode 100644 index 000000000000..76ec1d16aef7 --- /dev/null +++ b/arch/powerpc/oprofile/cell/vma_map.c | |||
| @@ -0,0 +1,287 @@ | |||
| 1 | /* | ||
| 2 | * Cell Broadband Engine OProfile Support | ||
| 3 | * | ||
| 4 | * (C) Copyright IBM Corporation 2006 | ||
| 5 | * | ||
| 6 | * Author: Maynard Johnson <maynardj@us.ibm.com> | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or | ||
| 9 | * modify it under the terms of the GNU General Public License | ||
| 10 | * as published by the Free Software Foundation; either version | ||
| 11 | * 2 of the License, or (at your option) any later version. | ||
| 12 | */ | ||
| 13 | |||
| 14 | /* The code in this source file is responsible for generating | ||
| 15 | * vma-to-fileOffset maps for both overlay and non-overlay SPU | ||
| 16 | * applications. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/mm.h> | ||
| 20 | #include <linux/string.h> | ||
| 21 | #include <linux/uaccess.h> | ||
| 22 | #include <linux/elf.h> | ||
| 23 | #include "pr_util.h" | ||
| 24 | |||
| 25 | |||
| 26 | void vma_map_free(struct vma_to_fileoffset_map *map) | ||
| 27 | { | ||
| 28 | while (map) { | ||
| 29 | struct vma_to_fileoffset_map *next = map->next; | ||
| 30 | kfree(map); | ||
| 31 | map = next; | ||
| 32 | } | ||
| 33 | } | ||
| 34 | |||
| 35 | unsigned int | ||
| 36 | vma_map_lookup(struct vma_to_fileoffset_map *map, unsigned int vma, | ||
| 37 | const struct spu *aSpu, int *grd_val) | ||
| 38 | { | ||
| 39 | /* | ||
| 40 | * Default the offset to the physical address + a flag value. | ||
| 41 | * Addresses of dynamically generated code can't be found in the vma | ||
| 42 | * map. For those addresses the flagged value will be sent on to | ||
| 43 | * the user space tools so they can be reported rather than just | ||
| 44 | * thrown away. | ||
| 45 | */ | ||
| 46 | u32 offset = 0x10000000 + vma; | ||
| 47 | u32 ovly_grd; | ||
| 48 | |||
| 49 | for (; map; map = map->next) { | ||
| 50 | if (vma < map->vma || vma >= map->vma + map->size) | ||
| 51 | continue; | ||
| 52 | |||
| 53 | if (map->guard_ptr) { | ||
| 54 | ovly_grd = *(u32 *)(aSpu->local_store + map->guard_ptr); | ||
| 55 | if (ovly_grd != map->guard_val) | ||
| 56 | continue; | ||
| 57 | *grd_val = ovly_grd; | ||
| 58 | } | ||
| 59 | offset = vma - map->vma + map->offset; | ||
| 60 | break; | ||
| 61 | } | ||
| 62 | |||
| 63 | return offset; | ||
| 64 | } | ||
| 65 | |||
| 66 | static struct vma_to_fileoffset_map * | ||
| 67 | vma_map_add(struct vma_to_fileoffset_map *map, unsigned int vma, | ||
| 68 | unsigned int size, unsigned int offset, unsigned int guard_ptr, | ||
| 69 | unsigned int guard_val) | ||
| 70 | { | ||
| 71 | struct vma_to_fileoffset_map *new = | ||
| 72 | kzalloc(sizeof(struct vma_to_fileoffset_map), GFP_KERNEL); | ||
| 73 | if (!new) { | ||
| 74 | printk(KERN_ERR "SPU_PROF: %s, line %d: malloc failed\n", | ||
| 75 | __FUNCTION__, __LINE__); | ||
| 76 | vma_map_free(map); | ||
| 77 | return NULL; | ||
| 78 | } | ||
| 79 | |||
| 80 | new->next = map; | ||
| 81 | new->vma = vma; | ||
| 82 | new->size = size; | ||
| 83 | new->offset = offset; | ||
| 84 | new->guard_ptr = guard_ptr; | ||
| 85 | new->guard_val = guard_val; | ||
| 86 | |||
| 87 | return new; | ||
| 88 | } | ||
| 89 | |||
| 90 | |||
| 91 | /* Parse SPE ELF header and generate a list of vma_maps. | ||
| 92 | * A pointer to the first vma_map in the generated list | ||
| 93 | * of vma_maps is returned. */ | ||
| 94 | struct vma_to_fileoffset_map *create_vma_map(const struct spu *aSpu, | ||
| 95 | unsigned long spu_elf_start) | ||
| 96 | { | ||
| 97 | static const unsigned char expected[EI_PAD] = { | ||
| 98 | [EI_MAG0] = ELFMAG0, | ||
| 99 | [EI_MAG1] = ELFMAG1, | ||
| 100 | [EI_MAG2] = ELFMAG2, | ||
| 101 | [EI_MAG3] = ELFMAG3, | ||
| 102 | [EI_CLASS] = ELFCLASS32, | ||
| 103 | [EI_DATA] = ELFDATA2MSB, | ||
| 104 | [EI_VERSION] = EV_CURRENT, | ||
| 105 | [EI_OSABI] = ELFOSABI_NONE | ||
| 106 | }; | ||
| 107 | |||
| 108 | int grd_val; | ||
| 109 | struct vma_to_fileoffset_map *map = NULL; | ||
| 110 | struct spu_overlay_info ovly; | ||
| 111 | unsigned int overlay_tbl_offset = -1; | ||
| 112 | unsigned long phdr_start, shdr_start; | ||
| 113 | Elf32_Ehdr ehdr; | ||
| 114 | Elf32_Phdr phdr; | ||
| 115 | Elf32_Shdr shdr, shdr_str; | ||
| 116 | Elf32_Sym sym; | ||
| 117 | int i, j; | ||
| 118 | char name[32]; | ||
| 119 | |||
| 120 | unsigned int ovly_table_sym = 0; | ||
| 121 | unsigned int ovly_buf_table_sym = 0; | ||
| 122 | unsigned int ovly_table_end_sym = 0; | ||
| 123 | unsigned int ovly_buf_table_end_sym = 0; | ||
| 124 | unsigned long ovly_table; | ||
| 125 | unsigned int n_ovlys; | ||
| 126 | |||
| 127 | /* Get and validate ELF header. */ | ||
| 128 | |||
| 129 | if (copy_from_user(&ehdr, (void *) spu_elf_start, sizeof (ehdr))) | ||
| 130 | goto fail; | ||
| 131 | |||
| 132 | if (memcmp(ehdr.e_ident, expected, EI_PAD) != 0) { | ||
| 133 | printk(KERN_ERR "SPU_PROF: " | ||
| 134 | "%s, line %d: Unexpected e_ident parsing SPU ELF\n", | ||
| 135 | __FUNCTION__, __LINE__); | ||
| 136 | goto fail; | ||
| 137 | } | ||
| 138 | if (ehdr.e_machine != EM_SPU) { | ||
| 139 | printk(KERN_ERR "SPU_PROF: " | ||
| 140 | "%s, line %d: Unexpected e_machine parsing SPU ELF\n", | ||
| 141 | __FUNCTION__, __LINE__); | ||
| 142 | goto fail; | ||
| 143 | } | ||
| 144 | if (ehdr.e_type != ET_EXEC) { | ||
| 145 | printk(KERN_ERR "SPU_PROF: " | ||
| 146 | "%s, line %d: Unexpected e_type parsing SPU ELF\n", | ||
| 147 | __FUNCTION__, __LINE__); | ||
| 148 | goto fail; | ||
| 149 | } | ||
| 150 | phdr_start = spu_elf_start + ehdr.e_phoff; | ||
| 151 | shdr_start = spu_elf_start + ehdr.e_shoff; | ||
| 152 | |||
| 153 | /* Traverse program headers. */ | ||
| 154 | for (i = 0; i < ehdr.e_phnum; i++) { | ||
| 155 | if (copy_from_user(&phdr, | ||
| 156 | (void *) (phdr_start + i * sizeof(phdr)), | ||
| 157 | sizeof(phdr))) | ||
| 158 | goto fail; | ||
| 159 | |||
| 160 | if (phdr.p_type != PT_LOAD) | ||
| 161 | continue; | ||
| 162 | if (phdr.p_flags & (1 << 27)) | ||
| 163 | continue; | ||
| 164 | |||
| 165 | map = vma_map_add(map, phdr.p_vaddr, phdr.p_memsz, | ||
| 166 | phdr.p_offset, 0, 0); | ||
| 167 | if (!map) | ||
| 168 | goto fail; | ||
| 169 | } | ||
| 170 | |||
| 171 | pr_debug("SPU_PROF: Created non-overlay maps\n"); | ||
| 172 | /* Traverse section table and search for overlay-related symbols. */ | ||
| 173 | for (i = 0; i < ehdr.e_shnum; i++) { | ||
| 174 | if (copy_from_user(&shdr, | ||
| 175 | (void *) (shdr_start + i * sizeof(shdr)), | ||
| 176 | sizeof(shdr))) | ||
| 177 | goto fail; | ||
| 178 | |||
| 179 | if (shdr.sh_type != SHT_SYMTAB) | ||
| 180 | continue; | ||
| 181 | if (shdr.sh_entsize != sizeof (sym)) | ||
| 182 | continue; | ||
| 183 | |||
| 184 | if (copy_from_user(&shdr_str, | ||
| 185 | (void *) (shdr_start + shdr.sh_link * | ||
| 186 | sizeof(shdr)), | ||
| 187 | sizeof(shdr))) | ||
| 188 | goto fail; | ||
| 189 | |||
| 190 | if (shdr_str.sh_type != SHT_STRTAB) | ||
| 191 | goto fail;; | ||
| 192 | |||
| 193 | for (j = 0; j < shdr.sh_size / sizeof (sym); j++) { | ||
| 194 | if (copy_from_user(&sym, (void *) (spu_elf_start + | ||
| 195 | shdr.sh_offset + j * | ||
| 196 | sizeof (sym)), | ||
| 197 | sizeof (sym))) | ||
| 198 | goto fail; | ||
| 199 | |||
| 200 | if (copy_from_user(name, (void *) | ||
| 201 | (spu_elf_start + shdr_str.sh_offset + | ||
| 202 | sym.st_name), | ||
| 203 | 20)) | ||
| 204 | goto fail; | ||
| 205 | |||
| 206 | if (memcmp(name, "_ovly_table", 12) == 0) | ||
| 207 | ovly_table_sym = sym.st_value; | ||
| 208 | if (memcmp(name, "_ovly_buf_table", 16) == 0) | ||
| 209 | ovly_buf_table_sym = sym.st_value; | ||
| 210 | if (memcmp(name, "_ovly_table_end", 16) == 0) | ||
| 211 | ovly_table_end_sym = sym.st_value; | ||
| 212 | if (memcmp(name, "_ovly_buf_table_end", 20) == 0) | ||
| 213 | ovly_buf_table_end_sym = sym.st_value; | ||
| 214 | } | ||
| 215 | } | ||
| 216 | |||
| 217 | /* If we don't have overlays, we're done. */ | ||
| 218 | if (ovly_table_sym == 0 || ovly_buf_table_sym == 0 | ||
| 219 | || ovly_table_end_sym == 0 || ovly_buf_table_end_sym == 0) { | ||
| 220 | pr_debug("SPU_PROF: No overlay table found\n"); | ||
| 221 | goto out; | ||
| 222 | } else { | ||
| 223 | pr_debug("SPU_PROF: Overlay table found\n"); | ||
| 224 | } | ||
| 225 | |||
| 226 | /* The _ovly_table symbol represents a table with one entry | ||
| 227 | * per overlay section. The _ovly_buf_table symbol represents | ||
| 228 | * a table with one entry per overlay region. | ||
| 229 | * The struct spu_overlay_info gives the structure of the _ovly_table | ||
| 230 | * entries. The structure of _ovly_table_buf is simply one | ||
| 231 | * u32 word per entry. | ||
| 232 | */ | ||
| 233 | overlay_tbl_offset = vma_map_lookup(map, ovly_table_sym, | ||
| 234 | aSpu, &grd_val); | ||
| 235 | if (overlay_tbl_offset < 0) { | ||
| 236 | printk(KERN_ERR "SPU_PROF: " | ||
| 237 | "%s, line %d: Error finding SPU overlay table\n", | ||
| 238 | __FUNCTION__, __LINE__); | ||
| 239 | goto fail; | ||
| 240 | } | ||
| 241 | ovly_table = spu_elf_start + overlay_tbl_offset; | ||
| 242 | |||
| 243 | n_ovlys = (ovly_table_end_sym - | ||
| 244 | ovly_table_sym) / sizeof (ovly); | ||
| 245 | |||
| 246 | /* Traverse overlay table. */ | ||
| 247 | for (i = 0; i < n_ovlys; i++) { | ||
| 248 | if (copy_from_user(&ovly, (void *) | ||
| 249 | (ovly_table + i * sizeof (ovly)), | ||
| 250 | sizeof (ovly))) | ||
| 251 | goto fail; | ||
| 252 | |||
| 253 | /* The ovly.vma/size/offset arguments are analogous to the same | ||
| 254 | * arguments used above for non-overlay maps. The final two | ||
| 255 | * args are referred to as the guard pointer and the guard | ||
| 256 | * value. | ||
| 257 | * The guard pointer is an entry in the _ovly_buf_table, | ||
| 258 | * computed using ovly.buf as the index into the table. Since | ||
| 259 | * ovly.buf values begin at '1' to reference the first (or 0th) | ||
| 260 | * entry in the _ovly_buf_table, the computation subtracts 1 | ||
| 261 | * from ovly.buf. | ||
| 262 | * The guard value is stored in the _ovly_buf_table entry and | ||
| 263 | * is an index (starting at 1) back to the _ovly_table entry | ||
| 264 | * that is pointing at this _ovly_buf_table entry. So, for | ||
| 265 | * example, for an overlay scenario with one overlay segment | ||
| 266 | * and two overlay sections: | ||
| 267 | * - Section 1 points to the first entry of the | ||
| 268 | * _ovly_buf_table, which contains a guard value | ||
| 269 | * of '1', referencing the first (index=0) entry of | ||
| 270 | * _ovly_table. | ||
| 271 | * - Section 2 points to the second entry of the | ||
| 272 | * _ovly_buf_table, which contains a guard value | ||
| 273 | * of '2', referencing the second (index=1) entry of | ||
| 274 | * _ovly_table. | ||
| 275 | */ | ||
| 276 | map = vma_map_add(map, ovly.vma, ovly.size, ovly.offset, | ||
| 277 | ovly_buf_table_sym + (ovly.buf-1) * 4, i+1); | ||
| 278 | if (!map) | ||
| 279 | goto fail; | ||
| 280 | } | ||
| 281 | goto out; | ||
| 282 | |||
| 283 | fail: | ||
| 284 | map = NULL; | ||
| 285 | out: | ||
| 286 | return map; | ||
| 287 | } | ||
diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c index 1a7ef7e246d2..a28cce1d6c24 100644 --- a/arch/powerpc/oprofile/common.c +++ b/arch/powerpc/oprofile/common.c | |||
| @@ -29,6 +29,8 @@ static struct op_powerpc_model *model; | |||
| 29 | static struct op_counter_config ctr[OP_MAX_COUNTER]; | 29 | static struct op_counter_config ctr[OP_MAX_COUNTER]; |
| 30 | static struct op_system_config sys; | 30 | static struct op_system_config sys; |
| 31 | 31 | ||
| 32 | static int op_per_cpu_rc; | ||
| 33 | |||
| 32 | static void op_handle_interrupt(struct pt_regs *regs) | 34 | static void op_handle_interrupt(struct pt_regs *regs) |
| 33 | { | 35 | { |
| 34 | model->handle_interrupt(regs, ctr); | 36 | model->handle_interrupt(regs, ctr); |
| @@ -36,25 +38,41 @@ static void op_handle_interrupt(struct pt_regs *regs) | |||
| 36 | 38 | ||
| 37 | static void op_powerpc_cpu_setup(void *dummy) | 39 | static void op_powerpc_cpu_setup(void *dummy) |
| 38 | { | 40 | { |
| 39 | model->cpu_setup(ctr); | 41 | int ret; |
| 42 | |||
| 43 | ret = model->cpu_setup(ctr); | ||
| 44 | |||
| 45 | if (ret != 0) | ||
| 46 | op_per_cpu_rc = ret; | ||
| 40 | } | 47 | } |
| 41 | 48 | ||
| 42 | static int op_powerpc_setup(void) | 49 | static int op_powerpc_setup(void) |
| 43 | { | 50 | { |
| 44 | int err; | 51 | int err; |
| 45 | 52 | ||
| 53 | op_per_cpu_rc = 0; | ||
| 54 | |||
| 46 | /* Grab the hardware */ | 55 | /* Grab the hardware */ |
| 47 | err = reserve_pmc_hardware(op_handle_interrupt); | 56 | err = reserve_pmc_hardware(op_handle_interrupt); |
| 48 | if (err) | 57 | if (err) |
| 49 | return err; | 58 | return err; |
| 50 | 59 | ||
| 51 | /* Pre-compute the values to stuff in the hardware registers. */ | 60 | /* Pre-compute the values to stuff in the hardware registers. */ |
| 52 | model->reg_setup(ctr, &sys, model->num_counters); | 61 | op_per_cpu_rc = model->reg_setup(ctr, &sys, model->num_counters); |
| 53 | 62 | ||
| 54 | /* Configure the registers on all cpus. */ | 63 | if (op_per_cpu_rc) |
| 64 | goto out; | ||
| 65 | |||
| 66 | /* Configure the registers on all cpus. If an error occurs on one | ||
| 67 | * of the cpus, op_per_cpu_rc will be set to the error */ | ||
| 55 | on_each_cpu(op_powerpc_cpu_setup, NULL, 0, 1); | 68 | on_each_cpu(op_powerpc_cpu_setup, NULL, 0, 1); |
| 56 | 69 | ||
| 57 | return 0; | 70 | out: if (op_per_cpu_rc) { |
| 71 | /* error on setup release the performance counter hardware */ | ||
| 72 | release_pmc_hardware(); | ||
| 73 | } | ||
| 74 | |||
| 75 | return op_per_cpu_rc; | ||
| 58 | } | 76 | } |
| 59 | 77 | ||
| 60 | static void op_powerpc_shutdown(void) | 78 | static void op_powerpc_shutdown(void) |
| @@ -64,16 +82,29 @@ static void op_powerpc_shutdown(void) | |||
| 64 | 82 | ||
| 65 | static void op_powerpc_cpu_start(void *dummy) | 83 | static void op_powerpc_cpu_start(void *dummy) |
| 66 | { | 84 | { |
| 67 | model->start(ctr); | 85 | /* If any of the cpus have return an error, set the |
| 86 | * global flag to the error so it can be returned | ||
| 87 | * to the generic OProfile caller. | ||
| 88 | */ | ||
| 89 | int ret; | ||
| 90 | |||
| 91 | ret = model->start(ctr); | ||
| 92 | if (ret != 0) | ||
| 93 | op_per_cpu_rc = ret; | ||
| 68 | } | 94 | } |
| 69 | 95 | ||
| 70 | static int op_powerpc_start(void) | 96 | static int op_powerpc_start(void) |
| 71 | { | 97 | { |
| 98 | op_per_cpu_rc = 0; | ||
| 99 | |||
| 72 | if (model->global_start) | 100 | if (model->global_start) |
| 73 | model->global_start(ctr); | 101 | return model->global_start(ctr); |
| 74 | if (model->start) | 102 | if (model->start) { |
| 75 | on_each_cpu(op_powerpc_cpu_start, NULL, 0, 1); | 103 | on_each_cpu(op_powerpc_cpu_start, NULL, 0, 1); |
| 76 | return 0; | 104 | return op_per_cpu_rc; |
| 105 | } | ||
| 106 | return -EIO; /* No start function is defined for this | ||
| 107 | power architecture */ | ||
| 77 | } | 108 | } |
| 78 | 109 | ||
| 79 | static inline void op_powerpc_cpu_stop(void *dummy) | 110 | static inline void op_powerpc_cpu_stop(void *dummy) |
| @@ -147,11 +178,13 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) | |||
| 147 | 178 | ||
| 148 | switch (cur_cpu_spec->oprofile_type) { | 179 | switch (cur_cpu_spec->oprofile_type) { |
| 149 | #ifdef CONFIG_PPC64 | 180 | #ifdef CONFIG_PPC64 |
| 150 | #ifdef CONFIG_PPC_CELL_NATIVE | 181 | #ifdef CONFIG_OPROFILE_CELL |
| 151 | case PPC_OPROFILE_CELL: | 182 | case PPC_OPROFILE_CELL: |
| 152 | if (firmware_has_feature(FW_FEATURE_LPAR)) | 183 | if (firmware_has_feature(FW_FEATURE_LPAR)) |
| 153 | return -ENODEV; | 184 | return -ENODEV; |
| 154 | model = &op_model_cell; | 185 | model = &op_model_cell; |
| 186 | ops->sync_start = model->sync_start; | ||
| 187 | ops->sync_stop = model->sync_stop; | ||
| 155 | break; | 188 | break; |
| 156 | #endif | 189 | #endif |
| 157 | case PPC_OPROFILE_RS64: | 190 | case PPC_OPROFILE_RS64: |
diff --git a/arch/powerpc/oprofile/op_model_7450.c b/arch/powerpc/oprofile/op_model_7450.c index 5d1bbaf35ccb..cc599eb8768b 100644 --- a/arch/powerpc/oprofile/op_model_7450.c +++ b/arch/powerpc/oprofile/op_model_7450.c | |||
| @@ -81,7 +81,7 @@ static void pmc_stop_ctrs(void) | |||
| 81 | 81 | ||
| 82 | /* Configures the counters on this CPU based on the global | 82 | /* Configures the counters on this CPU based on the global |
| 83 | * settings */ | 83 | * settings */ |
| 84 | static void fsl7450_cpu_setup(struct op_counter_config *ctr) | 84 | static int fsl7450_cpu_setup(struct op_counter_config *ctr) |
| 85 | { | 85 | { |
| 86 | /* freeze all counters */ | 86 | /* freeze all counters */ |
| 87 | pmc_stop_ctrs(); | 87 | pmc_stop_ctrs(); |
| @@ -89,12 +89,14 @@ static void fsl7450_cpu_setup(struct op_counter_config *ctr) | |||
| 89 | mtspr(SPRN_MMCR0, mmcr0_val); | 89 | mtspr(SPRN_MMCR0, mmcr0_val); |
| 90 | mtspr(SPRN_MMCR1, mmcr1_val); | 90 | mtspr(SPRN_MMCR1, mmcr1_val); |
| 91 | mtspr(SPRN_MMCR2, mmcr2_val); | 91 | mtspr(SPRN_MMCR2, mmcr2_val); |
| 92 | |||
| 93 | return 0; | ||
| 92 | } | 94 | } |
| 93 | 95 | ||
| 94 | #define NUM_CTRS 6 | 96 | #define NUM_CTRS 6 |
| 95 | 97 | ||
| 96 | /* Configures the global settings for the countes on all CPUs. */ | 98 | /* Configures the global settings for the countes on all CPUs. */ |
| 97 | static void fsl7450_reg_setup(struct op_counter_config *ctr, | 99 | static int fsl7450_reg_setup(struct op_counter_config *ctr, |
| 98 | struct op_system_config *sys, | 100 | struct op_system_config *sys, |
| 99 | int num_ctrs) | 101 | int num_ctrs) |
| 100 | { | 102 | { |
| @@ -126,10 +128,12 @@ static void fsl7450_reg_setup(struct op_counter_config *ctr, | |||
| 126 | | mmcr1_event6(ctr[5].event); | 128 | | mmcr1_event6(ctr[5].event); |
| 127 | 129 | ||
| 128 | mmcr2_val = 0; | 130 | mmcr2_val = 0; |
| 131 | |||
| 132 | return 0; | ||
| 129 | } | 133 | } |
| 130 | 134 | ||
| 131 | /* Sets the counters on this CPU to the chosen values, and starts them */ | 135 | /* Sets the counters on this CPU to the chosen values, and starts them */ |
| 132 | static void fsl7450_start(struct op_counter_config *ctr) | 136 | static int fsl7450_start(struct op_counter_config *ctr) |
| 133 | { | 137 | { |
| 134 | int i; | 138 | int i; |
| 135 | 139 | ||
| @@ -148,6 +152,8 @@ static void fsl7450_start(struct op_counter_config *ctr) | |||
| 148 | pmc_start_ctrs(); | 152 | pmc_start_ctrs(); |
| 149 | 153 | ||
| 150 | oprofile_running = 1; | 154 | oprofile_running = 1; |
| 155 | |||
| 156 | return 0; | ||
| 151 | } | 157 | } |
| 152 | 158 | ||
| 153 | /* Stop the counters on this CPU */ | 159 | /* Stop the counters on this CPU */ |
| @@ -193,7 +199,7 @@ static void fsl7450_handle_interrupt(struct pt_regs *regs, | |||
| 193 | /* The freeze bit was set by the interrupt. */ | 199 | /* The freeze bit was set by the interrupt. */ |
| 194 | /* Clear the freeze bit, and reenable the interrupt. | 200 | /* Clear the freeze bit, and reenable the interrupt. |
| 195 | * The counters won't actually start until the rfi clears | 201 | * The counters won't actually start until the rfi clears |
| 196 | * the PMM bit */ | 202 | * the PM/M bit */ |
| 197 | pmc_start_ctrs(); | 203 | pmc_start_ctrs(); |
| 198 | } | 204 | } |
| 199 | 205 | ||
diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c index c29293befba9..d928b54f3a0f 100644 --- a/arch/powerpc/oprofile/op_model_cell.c +++ b/arch/powerpc/oprofile/op_model_cell.c | |||
| @@ -5,8 +5,8 @@ | |||
| 5 | * | 5 | * |
| 6 | * Author: David Erb (djerb@us.ibm.com) | 6 | * Author: David Erb (djerb@us.ibm.com) |
| 7 | * Modifications: | 7 | * Modifications: |
| 8 | * Carl Love <carll@us.ibm.com> | 8 | * Carl Love <carll@us.ibm.com> |
| 9 | * Maynard Johnson <maynardj@us.ibm.com> | 9 | * Maynard Johnson <maynardj@us.ibm.com> |
| 10 | * | 10 | * |
| 11 | * This program is free software; you can redistribute it and/or | 11 | * This program is free software; you can redistribute it and/or |
| 12 | * modify it under the terms of the GNU General Public License | 12 | * modify it under the terms of the GNU General Public License |
| @@ -38,12 +38,25 @@ | |||
| 38 | 38 | ||
| 39 | #include "../platforms/cell/interrupt.h" | 39 | #include "../platforms/cell/interrupt.h" |
| 40 | #include "../platforms/cell/cbe_regs.h" | 40 | #include "../platforms/cell/cbe_regs.h" |
| 41 | #include "cell/pr_util.h" | ||
| 42 | |||
| 43 | static void cell_global_stop_spu(void); | ||
| 44 | |||
| 45 | /* | ||
| 46 | * spu_cycle_reset is the number of cycles between samples. | ||
| 47 | * This variable is used for SPU profiling and should ONLY be set | ||
| 48 | * at the beginning of cell_reg_setup; otherwise, it's read-only. | ||
| 49 | */ | ||
| 50 | static unsigned int spu_cycle_reset; | ||
| 51 | |||
| 52 | #define NUM_SPUS_PER_NODE 8 | ||
| 53 | #define SPU_CYCLES_EVENT_NUM 2 /* event number for SPU_CYCLES */ | ||
| 41 | 54 | ||
| 42 | #define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */ | 55 | #define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */ |
| 43 | #define PPU_CYCLES_GRP_NUM 1 /* special group number for identifying | 56 | #define PPU_CYCLES_GRP_NUM 1 /* special group number for identifying |
| 44 | * PPU_CYCLES event | 57 | * PPU_CYCLES event |
| 45 | */ | 58 | */ |
| 46 | #define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */ | 59 | #define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */ |
| 47 | 60 | ||
| 48 | #define NUM_THREADS 2 /* number of physical threads in | 61 | #define NUM_THREADS 2 /* number of physical threads in |
| 49 | * physical processor | 62 | * physical processor |
| @@ -51,6 +64,7 @@ | |||
| 51 | #define NUM_TRACE_BUS_WORDS 4 | 64 | #define NUM_TRACE_BUS_WORDS 4 |
| 52 | #define NUM_INPUT_BUS_WORDS 2 | 65 | #define NUM_INPUT_BUS_WORDS 2 |
| 53 | 66 | ||
| 67 | #define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */ | ||
| 54 | 68 | ||
| 55 | struct pmc_cntrl_data { | 69 | struct pmc_cntrl_data { |
| 56 | unsigned long vcntr; | 70 | unsigned long vcntr; |
| @@ -62,11 +76,10 @@ struct pmc_cntrl_data { | |||
| 62 | /* | 76 | /* |
| 63 | * ibm,cbe-perftools rtas parameters | 77 | * ibm,cbe-perftools rtas parameters |
| 64 | */ | 78 | */ |
| 65 | |||
| 66 | struct pm_signal { | 79 | struct pm_signal { |
| 67 | u16 cpu; /* Processor to modify */ | 80 | u16 cpu; /* Processor to modify */ |
| 68 | u16 sub_unit; /* hw subunit this applies to (if applicable) */ | 81 | u16 sub_unit; /* hw subunit this applies to (if applicable)*/ |
| 69 | short int signal_group; /* Signal Group to Enable/Disable */ | 82 | short int signal_group; /* Signal Group to Enable/Disable */ |
| 70 | u8 bus_word; /* Enable/Disable on this Trace/Trigger/Event | 83 | u8 bus_word; /* Enable/Disable on this Trace/Trigger/Event |
| 71 | * Bus Word(s) (bitmask) | 84 | * Bus Word(s) (bitmask) |
| 72 | */ | 85 | */ |
| @@ -112,21 +125,42 @@ static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values); | |||
| 112 | 125 | ||
| 113 | static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; | 126 | static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; |
| 114 | 127 | ||
| 115 | /* Interpetation of hdw_thread: | 128 | /* |
| 129 | * The CELL profiling code makes rtas calls to setup the debug bus to | ||
| 130 | * route the performance signals. Additionally, SPU profiling requires | ||
| 131 | * a second rtas call to setup the hardware to capture the SPU PCs. | ||
| 132 | * The EIO error value is returned if the token lookups or the rtas | ||
| 133 | * call fail. The EIO error number is the best choice of the existing | ||
| 134 | * error numbers. The probability of rtas related error is very low. But | ||
| 135 | * by returning EIO and printing additional information to dmsg the user | ||
| 136 | * will know that OProfile did not start and dmesg will tell them why. | ||
| 137 | * OProfile does not support returning errors on Stop. Not a huge issue | ||
| 138 | * since failure to reset the debug bus or stop the SPU PC collection is | ||
| 139 | * not a fatel issue. Chances are if the Stop failed, Start doesn't work | ||
| 140 | * either. | ||
| 141 | */ | ||
| 142 | |||
| 143 | /* | ||
| 144 | * Interpetation of hdw_thread: | ||
| 116 | * 0 - even virtual cpus 0, 2, 4,... | 145 | * 0 - even virtual cpus 0, 2, 4,... |
| 117 | * 1 - odd virtual cpus 1, 3, 5, ... | 146 | * 1 - odd virtual cpus 1, 3, 5, ... |
| 147 | * | ||
| 148 | * FIXME: this is strictly wrong, we need to clean this up in a number | ||
| 149 | * of places. It works for now. -arnd | ||
| 118 | */ | 150 | */ |
| 119 | static u32 hdw_thread; | 151 | static u32 hdw_thread; |
| 120 | 152 | ||
| 121 | static u32 virt_cntr_inter_mask; | 153 | static u32 virt_cntr_inter_mask; |
| 122 | static struct timer_list timer_virt_cntr; | 154 | static struct timer_list timer_virt_cntr; |
| 123 | 155 | ||
| 124 | /* pm_signal needs to be global since it is initialized in | 156 | /* |
| 157 | * pm_signal needs to be global since it is initialized in | ||
| 125 | * cell_reg_setup at the time when the necessary information | 158 | * cell_reg_setup at the time when the necessary information |
| 126 | * is available. | 159 | * is available. |
| 127 | */ | 160 | */ |
| 128 | static struct pm_signal pm_signal[NR_PHYS_CTRS]; | 161 | static struct pm_signal pm_signal[NR_PHYS_CTRS]; |
| 129 | static int pm_rtas_token; | 162 | static int pm_rtas_token; /* token for debug bus setup call */ |
| 163 | static int spu_rtas_token; /* token for SPU cycle profiling */ | ||
| 130 | 164 | ||
| 131 | static u32 reset_value[NR_PHYS_CTRS]; | 165 | static u32 reset_value[NR_PHYS_CTRS]; |
| 132 | static int num_counters; | 166 | static int num_counters; |
| @@ -147,8 +181,8 @@ rtas_ibm_cbe_perftools(int subfunc, int passthru, | |||
| 147 | { | 181 | { |
| 148 | u64 paddr = __pa(address); | 182 | u64 paddr = __pa(address); |
| 149 | 183 | ||
| 150 | return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc, passthru, | 184 | return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc, |
| 151 | paddr >> 32, paddr & 0xffffffff, length); | 185 | passthru, paddr >> 32, paddr & 0xffffffff, length); |
| 152 | } | 186 | } |
| 153 | 187 | ||
| 154 | static void pm_rtas_reset_signals(u32 node) | 188 | static void pm_rtas_reset_signals(u32 node) |
| @@ -156,12 +190,13 @@ static void pm_rtas_reset_signals(u32 node) | |||
| 156 | int ret; | 190 | int ret; |
| 157 | struct pm_signal pm_signal_local; | 191 | struct pm_signal pm_signal_local; |
| 158 | 192 | ||
| 159 | /* The debug bus is being set to the passthru disable state. | 193 | /* |
| 160 | * However, the FW still expects atleast one legal signal routing | 194 | * The debug bus is being set to the passthru disable state. |
| 161 | * entry or it will return an error on the arguments. If we don't | 195 | * However, the FW still expects atleast one legal signal routing |
| 162 | * supply a valid entry, we must ignore all return values. Ignoring | 196 | * entry or it will return an error on the arguments. If we don't |
| 163 | * all return values means we might miss an error we should be | 197 | * supply a valid entry, we must ignore all return values. Ignoring |
| 164 | * concerned about. | 198 | * all return values means we might miss an error we should be |
| 199 | * concerned about. | ||
| 165 | */ | 200 | */ |
| 166 | 201 | ||
| 167 | /* fw expects physical cpu #. */ | 202 | /* fw expects physical cpu #. */ |
| @@ -175,18 +210,24 @@ static void pm_rtas_reset_signals(u32 node) | |||
| 175 | &pm_signal_local, | 210 | &pm_signal_local, |
| 176 | sizeof(struct pm_signal)); | 211 | sizeof(struct pm_signal)); |
| 177 | 212 | ||
| 178 | if (ret) | 213 | if (unlikely(ret)) |
| 214 | /* | ||
| 215 | * Not a fatal error. For Oprofile stop, the oprofile | ||
| 216 | * functions do not support returning an error for | ||
| 217 | * failure to stop OProfile. | ||
| 218 | */ | ||
| 179 | printk(KERN_WARNING "%s: rtas returned: %d\n", | 219 | printk(KERN_WARNING "%s: rtas returned: %d\n", |
| 180 | __FUNCTION__, ret); | 220 | __FUNCTION__, ret); |
| 181 | } | 221 | } |
| 182 | 222 | ||
| 183 | static void pm_rtas_activate_signals(u32 node, u32 count) | 223 | static int pm_rtas_activate_signals(u32 node, u32 count) |
| 184 | { | 224 | { |
| 185 | int ret; | 225 | int ret; |
| 186 | int i, j; | 226 | int i, j; |
| 187 | struct pm_signal pm_signal_local[NR_PHYS_CTRS]; | 227 | struct pm_signal pm_signal_local[NR_PHYS_CTRS]; |
| 188 | 228 | ||
| 189 | /* There is no debug setup required for the cycles event. | 229 | /* |
| 230 | * There is no debug setup required for the cycles event. | ||
| 190 | * Note that only events in the same group can be used. | 231 | * Note that only events in the same group can be used. |
| 191 | * Otherwise, there will be conflicts in correctly routing | 232 | * Otherwise, there will be conflicts in correctly routing |
| 192 | * the signals on the debug bus. It is the responsiblity | 233 | * the signals on the debug bus. It is the responsiblity |
| @@ -213,10 +254,14 @@ static void pm_rtas_activate_signals(u32 node, u32 count) | |||
| 213 | pm_signal_local, | 254 | pm_signal_local, |
| 214 | i * sizeof(struct pm_signal)); | 255 | i * sizeof(struct pm_signal)); |
| 215 | 256 | ||
| 216 | if (ret) | 257 | if (unlikely(ret)) { |
| 217 | printk(KERN_WARNING "%s: rtas returned: %d\n", | 258 | printk(KERN_WARNING "%s: rtas returned: %d\n", |
| 218 | __FUNCTION__, ret); | 259 | __FUNCTION__, ret); |
| 260 | return -EIO; | ||
| 261 | } | ||
| 219 | } | 262 | } |
| 263 | |||
| 264 | return 0; | ||
| 220 | } | 265 | } |
| 221 | 266 | ||
| 222 | /* | 267 | /* |
| @@ -260,11 +305,12 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask) | |||
| 260 | pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity); | 305 | pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity); |
| 261 | pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control); | 306 | pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control); |
| 262 | 307 | ||
| 263 | /* Some of the islands signal selection is based on 64 bit words. | 308 | /* |
| 309 | * Some of the islands signal selection is based on 64 bit words. | ||
| 264 | * The debug bus words are 32 bits, the input words to the performance | 310 | * The debug bus words are 32 bits, the input words to the performance |
| 265 | * counters are defined as 32 bits. Need to convert the 64 bit island | 311 | * counters are defined as 32 bits. Need to convert the 64 bit island |
| 266 | * specification to the appropriate 32 input bit and bus word for the | 312 | * specification to the appropriate 32 input bit and bus word for the |
| 267 | * performance counter event selection. See the CELL Performance | 313 | * performance counter event selection. See the CELL Performance |
| 268 | * monitoring signals manual and the Perf cntr hardware descriptions | 314 | * monitoring signals manual and the Perf cntr hardware descriptions |
| 269 | * for the details. | 315 | * for the details. |
| 270 | */ | 316 | */ |
| @@ -298,6 +344,7 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask) | |||
| 298 | input_bus[j] = i; | 344 | input_bus[j] = i; |
| 299 | pm_regs.group_control |= | 345 | pm_regs.group_control |= |
| 300 | (i << (31 - i)); | 346 | (i << (31 - i)); |
| 347 | |||
| 301 | break; | 348 | break; |
| 302 | } | 349 | } |
| 303 | } | 350 | } |
| @@ -309,7 +356,8 @@ out: | |||
| 309 | 356 | ||
| 310 | static void write_pm_cntrl(int cpu) | 357 | static void write_pm_cntrl(int cpu) |
| 311 | { | 358 | { |
| 312 | /* Oprofile will use 32 bit counters, set bits 7:10 to 0 | 359 | /* |
| 360 | * Oprofile will use 32 bit counters, set bits 7:10 to 0 | ||
| 313 | * pmregs.pm_cntrl is a global | 361 | * pmregs.pm_cntrl is a global |
| 314 | */ | 362 | */ |
| 315 | 363 | ||
| @@ -326,7 +374,8 @@ static void write_pm_cntrl(int cpu) | |||
| 326 | if (pm_regs.pm_cntrl.freeze == 1) | 374 | if (pm_regs.pm_cntrl.freeze == 1) |
| 327 | val |= CBE_PM_FREEZE_ALL_CTRS; | 375 | val |= CBE_PM_FREEZE_ALL_CTRS; |
| 328 | 376 | ||
| 329 | /* Routine set_count_mode must be called previously to set | 377 | /* |
| 378 | * Routine set_count_mode must be called previously to set | ||
| 330 | * the count mode based on the user selection of user and kernel. | 379 | * the count mode based on the user selection of user and kernel. |
| 331 | */ | 380 | */ |
| 332 | val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode); | 381 | val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode); |
| @@ -336,7 +385,8 @@ static void write_pm_cntrl(int cpu) | |||
| 336 | static inline void | 385 | static inline void |
| 337 | set_count_mode(u32 kernel, u32 user) | 386 | set_count_mode(u32 kernel, u32 user) |
| 338 | { | 387 | { |
| 339 | /* The user must specify user and kernel if they want them. If | 388 | /* |
| 389 | * The user must specify user and kernel if they want them. If | ||
| 340 | * neither is specified, OProfile will count in hypervisor mode. | 390 | * neither is specified, OProfile will count in hypervisor mode. |
| 341 | * pm_regs.pm_cntrl is a global | 391 | * pm_regs.pm_cntrl is a global |
| 342 | */ | 392 | */ |
| @@ -364,7 +414,7 @@ static inline void enable_ctr(u32 cpu, u32 ctr, u32 * pm07_cntrl) | |||
| 364 | 414 | ||
| 365 | /* | 415 | /* |
| 366 | * Oprofile is expected to collect data on all CPUs simultaneously. | 416 | * Oprofile is expected to collect data on all CPUs simultaneously. |
| 367 | * However, there is one set of performance counters per node. There are | 417 | * However, there is one set of performance counters per node. There are |
| 368 | * two hardware threads or virtual CPUs on each node. Hence, OProfile must | 418 | * two hardware threads or virtual CPUs on each node. Hence, OProfile must |
| 369 | * multiplex in time the performance counter collection on the two virtual | 419 | * multiplex in time the performance counter collection on the two virtual |
| 370 | * CPUs. The multiplexing of the performance counters is done by this | 420 | * CPUs. The multiplexing of the performance counters is done by this |
| @@ -377,19 +427,19 @@ static inline void enable_ctr(u32 cpu, u32 ctr, u32 * pm07_cntrl) | |||
| 377 | * pair of per-cpu arrays is used for storing the previous and next | 427 | * pair of per-cpu arrays is used for storing the previous and next |
| 378 | * pmc values for a given node. | 428 | * pmc values for a given node. |
| 379 | * NOTE: We use the per-cpu variable to improve cache performance. | 429 | * NOTE: We use the per-cpu variable to improve cache performance. |
| 430 | * | ||
| 431 | * This routine will alternate loading the virtual counters for | ||
| 432 | * virtual CPUs | ||
| 380 | */ | 433 | */ |
| 381 | static void cell_virtual_cntr(unsigned long data) | 434 | static void cell_virtual_cntr(unsigned long data) |
| 382 | { | 435 | { |
| 383 | /* This routine will alternate loading the virtual counters for | ||
| 384 | * virtual CPUs | ||
| 385 | */ | ||
| 386 | int i, prev_hdw_thread, next_hdw_thread; | 436 | int i, prev_hdw_thread, next_hdw_thread; |
| 387 | u32 cpu; | 437 | u32 cpu; |
| 388 | unsigned long flags; | 438 | unsigned long flags; |
| 389 | 439 | ||
| 390 | /* Make sure that the interrupt_hander and | 440 | /* |
| 391 | * the virt counter are not both playing with | 441 | * Make sure that the interrupt_hander and the virt counter are |
| 392 | * the counters on the same node. | 442 | * not both playing with the counters on the same node. |
| 393 | */ | 443 | */ |
| 394 | 444 | ||
| 395 | spin_lock_irqsave(&virt_cntr_lock, flags); | 445 | spin_lock_irqsave(&virt_cntr_lock, flags); |
| @@ -400,22 +450,25 @@ static void cell_virtual_cntr(unsigned long data) | |||
| 400 | hdw_thread = 1 ^ hdw_thread; | 450 | hdw_thread = 1 ^ hdw_thread; |
| 401 | next_hdw_thread = hdw_thread; | 451 | next_hdw_thread = hdw_thread; |
| 402 | 452 | ||
| 403 | for (i = 0; i < num_counters; i++) | 453 | /* |
| 404 | /* There are some per thread events. Must do the | 454 | * There are some per thread events. Must do the |
| 405 | * set event, for the thread that is being started | 455 | * set event, for the thread that is being started |
| 406 | */ | 456 | */ |
| 457 | for (i = 0; i < num_counters; i++) | ||
| 407 | set_pm_event(i, | 458 | set_pm_event(i, |
| 408 | pmc_cntrl[next_hdw_thread][i].evnts, | 459 | pmc_cntrl[next_hdw_thread][i].evnts, |
| 409 | pmc_cntrl[next_hdw_thread][i].masks); | 460 | pmc_cntrl[next_hdw_thread][i].masks); |
| 410 | 461 | ||
| 411 | /* The following is done only once per each node, but | 462 | /* |
| 463 | * The following is done only once per each node, but | ||
| 412 | * we need cpu #, not node #, to pass to the cbe_xxx functions. | 464 | * we need cpu #, not node #, to pass to the cbe_xxx functions. |
| 413 | */ | 465 | */ |
| 414 | for_each_online_cpu(cpu) { | 466 | for_each_online_cpu(cpu) { |
| 415 | if (cbe_get_hw_thread_id(cpu)) | 467 | if (cbe_get_hw_thread_id(cpu)) |
| 416 | continue; | 468 | continue; |
| 417 | 469 | ||
| 418 | /* stop counters, save counter values, restore counts | 470 | /* |
| 471 | * stop counters, save counter values, restore counts | ||
| 419 | * for previous thread | 472 | * for previous thread |
| 420 | */ | 473 | */ |
| 421 | cbe_disable_pm(cpu); | 474 | cbe_disable_pm(cpu); |
| @@ -428,7 +481,7 @@ static void cell_virtual_cntr(unsigned long data) | |||
| 428 | == 0xFFFFFFFF) | 481 | == 0xFFFFFFFF) |
| 429 | /* If the cntr value is 0xffffffff, we must | 482 | /* If the cntr value is 0xffffffff, we must |
| 430 | * reset that to 0xfffffff0 when the current | 483 | * reset that to 0xfffffff0 when the current |
| 431 | * thread is restarted. This will generate a | 484 | * thread is restarted. This will generate a |
| 432 | * new interrupt and make sure that we never | 485 | * new interrupt and make sure that we never |
| 433 | * restore the counters to the max value. If | 486 | * restore the counters to the max value. If |
| 434 | * the counters were restored to the max value, | 487 | * the counters were restored to the max value, |
| @@ -444,13 +497,15 @@ static void cell_virtual_cntr(unsigned long data) | |||
| 444 | next_hdw_thread)[i]); | 497 | next_hdw_thread)[i]); |
| 445 | } | 498 | } |
| 446 | 499 | ||
| 447 | /* Switch to the other thread. Change the interrupt | 500 | /* |
| 501 | * Switch to the other thread. Change the interrupt | ||
| 448 | * and control regs to be scheduled on the CPU | 502 | * and control regs to be scheduled on the CPU |
| 449 | * corresponding to the thread to execute. | 503 | * corresponding to the thread to execute. |
| 450 | */ | 504 | */ |
| 451 | for (i = 0; i < num_counters; i++) { | 505 | for (i = 0; i < num_counters; i++) { |
| 452 | if (pmc_cntrl[next_hdw_thread][i].enabled) { | 506 | if (pmc_cntrl[next_hdw_thread][i].enabled) { |
| 453 | /* There are some per thread events. | 507 | /* |
| 508 | * There are some per thread events. | ||
| 454 | * Must do the set event, enable_cntr | 509 | * Must do the set event, enable_cntr |
| 455 | * for each cpu. | 510 | * for each cpu. |
| 456 | */ | 511 | */ |
| @@ -482,17 +537,42 @@ static void start_virt_cntrs(void) | |||
| 482 | } | 537 | } |
| 483 | 538 | ||
| 484 | /* This function is called once for all cpus combined */ | 539 | /* This function is called once for all cpus combined */ |
| 485 | static void | 540 | static int cell_reg_setup(struct op_counter_config *ctr, |
| 486 | cell_reg_setup(struct op_counter_config *ctr, | 541 | struct op_system_config *sys, int num_ctrs) |
| 487 | struct op_system_config *sys, int num_ctrs) | ||
| 488 | { | 542 | { |
| 489 | int i, j, cpu; | 543 | int i, j, cpu; |
| 544 | spu_cycle_reset = 0; | ||
| 545 | |||
| 546 | if (ctr[0].event == SPU_CYCLES_EVENT_NUM) { | ||
| 547 | spu_cycle_reset = ctr[0].count; | ||
| 548 | |||
| 549 | /* | ||
| 550 | * Each node will need to make the rtas call to start | ||
| 551 | * and stop SPU profiling. Get the token once and store it. | ||
| 552 | */ | ||
| 553 | spu_rtas_token = rtas_token("ibm,cbe-spu-perftools"); | ||
| 554 | |||
| 555 | if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) { | ||
| 556 | printk(KERN_ERR | ||
| 557 | "%s: rtas token ibm,cbe-spu-perftools unknown\n", | ||
| 558 | __FUNCTION__); | ||
| 559 | return -EIO; | ||
| 560 | } | ||
| 561 | } | ||
| 490 | 562 | ||
| 491 | pm_rtas_token = rtas_token("ibm,cbe-perftools"); | 563 | pm_rtas_token = rtas_token("ibm,cbe-perftools"); |
| 492 | if (pm_rtas_token == RTAS_UNKNOWN_SERVICE) { | 564 | |
| 493 | printk(KERN_WARNING "%s: RTAS_UNKNOWN_SERVICE\n", | 565 | /* |
| 566 | * For all events excetp PPU CYCLEs, each node will need to make | ||
| 567 | * the rtas cbe-perftools call to setup and reset the debug bus. | ||
| 568 | * Make the token lookup call once and store it in the global | ||
| 569 | * variable pm_rtas_token. | ||
| 570 | */ | ||
| 571 | if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) { | ||
| 572 | printk(KERN_ERR | ||
| 573 | "%s: rtas token ibm,cbe-perftools unknown\n", | ||
| 494 | __FUNCTION__); | 574 | __FUNCTION__); |
| 495 | goto out; | 575 | return -EIO; |
| 496 | } | 576 | } |
| 497 | 577 | ||
| 498 | num_counters = num_ctrs; | 578 | num_counters = num_ctrs; |
| @@ -520,7 +600,8 @@ cell_reg_setup(struct op_counter_config *ctr, | |||
| 520 | per_cpu(pmc_values, j)[i] = 0; | 600 | per_cpu(pmc_values, j)[i] = 0; |
| 521 | } | 601 | } |
| 522 | 602 | ||
| 523 | /* Setup the thread 1 events, map the thread 0 event to the | 603 | /* |
| 604 | * Setup the thread 1 events, map the thread 0 event to the | ||
| 524 | * equivalent thread 1 event. | 605 | * equivalent thread 1 event. |
| 525 | */ | 606 | */ |
| 526 | for (i = 0; i < num_ctrs; ++i) { | 607 | for (i = 0; i < num_ctrs; ++i) { |
| @@ -544,9 +625,10 @@ cell_reg_setup(struct op_counter_config *ctr, | |||
| 544 | for (i = 0; i < NUM_INPUT_BUS_WORDS; i++) | 625 | for (i = 0; i < NUM_INPUT_BUS_WORDS; i++) |
| 545 | input_bus[i] = 0xff; | 626 | input_bus[i] = 0xff; |
| 546 | 627 | ||
| 547 | /* Our counters count up, and "count" refers to | 628 | /* |
| 629 | * Our counters count up, and "count" refers to | ||
| 548 | * how much before the next interrupt, and we interrupt | 630 | * how much before the next interrupt, and we interrupt |
| 549 | * on overflow. So we calculate the starting value | 631 | * on overflow. So we calculate the starting value |
| 550 | * which will give us "count" until overflow. | 632 | * which will give us "count" until overflow. |
| 551 | * Then we set the events on the enabled counters. | 633 | * Then we set the events on the enabled counters. |
| 552 | */ | 634 | */ |
| @@ -569,28 +651,27 @@ cell_reg_setup(struct op_counter_config *ctr, | |||
| 569 | for (i = 0; i < num_counters; ++i) { | 651 | for (i = 0; i < num_counters; ++i) { |
| 570 | per_cpu(pmc_values, cpu)[i] = reset_value[i]; | 652 | per_cpu(pmc_values, cpu)[i] = reset_value[i]; |
| 571 | } | 653 | } |
| 572 | out: | 654 | |
| 573 | ; | 655 | return 0; |
| 574 | } | 656 | } |
| 575 | 657 | ||
| 658 | |||
| 659 | |||
| 576 | /* This function is called once for each cpu */ | 660 | /* This function is called once for each cpu */ |
| 577 | static void cell_cpu_setup(struct op_counter_config *cntr) | 661 | static int cell_cpu_setup(struct op_counter_config *cntr) |
| 578 | { | 662 | { |
| 579 | u32 cpu = smp_processor_id(); | 663 | u32 cpu = smp_processor_id(); |
| 580 | u32 num_enabled = 0; | 664 | u32 num_enabled = 0; |
| 581 | int i; | 665 | int i; |
| 582 | 666 | ||
| 667 | if (spu_cycle_reset) | ||
| 668 | return 0; | ||
| 669 | |||
| 583 | /* There is one performance monitor per processor chip (i.e. node), | 670 | /* There is one performance monitor per processor chip (i.e. node), |
| 584 | * so we only need to perform this function once per node. | 671 | * so we only need to perform this function once per node. |
| 585 | */ | 672 | */ |
| 586 | if (cbe_get_hw_thread_id(cpu)) | 673 | if (cbe_get_hw_thread_id(cpu)) |
| 587 | goto out; | 674 | return 0; |
| 588 | |||
| 589 | if (pm_rtas_token == RTAS_UNKNOWN_SERVICE) { | ||
| 590 | printk(KERN_WARNING "%s: RTAS_UNKNOWN_SERVICE\n", | ||
| 591 | __FUNCTION__); | ||
| 592 | goto out; | ||
| 593 | } | ||
| 594 | 675 | ||
| 595 | /* Stop all counters */ | 676 | /* Stop all counters */ |
| 596 | cbe_disable_pm(cpu); | 677 | cbe_disable_pm(cpu); |
| @@ -609,16 +690,286 @@ static void cell_cpu_setup(struct op_counter_config *cntr) | |||
| 609 | } | 690 | } |
| 610 | } | 691 | } |
| 611 | 692 | ||
| 612 | pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled); | 693 | /* |
| 694 | * The pm_rtas_activate_signals will return -EIO if the FW | ||
| 695 | * call failed. | ||
| 696 | */ | ||
| 697 | return pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled); | ||
| 698 | } | ||
| 699 | |||
| 700 | #define ENTRIES 303 | ||
| 701 | #define MAXLFSR 0xFFFFFF | ||
| 702 | |||
| 703 | /* precomputed table of 24 bit LFSR values */ | ||
| 704 | static int initial_lfsr[] = { | ||
| 705 | 8221349, 12579195, 5379618, 10097839, 7512963, 7519310, 3955098, 10753424, | ||
| 706 | 15507573, 7458917, 285419, 2641121, 9780088, 3915503, 6668768, 1548716, | ||
| 707 | 4885000, 8774424, 9650099, 2044357, 2304411, 9326253, 10332526, 4421547, | ||
| 708 | 3440748, 10179459, 13332843, 10375561, 1313462, 8375100, 5198480, 6071392, | ||
| 709 | 9341783, 1526887, 3985002, 1439429, 13923762, 7010104, 11969769, 4547026, | ||
| 710 | 2040072, 4025602, 3437678, 7939992, 11444177, 4496094, 9803157, 10745556, | ||
| 711 | 3671780, 4257846, 5662259, 13196905, 3237343, 12077182, 16222879, 7587769, | ||
| 712 | 14706824, 2184640, 12591135, 10420257, 7406075, 3648978, 11042541, 15906893, | ||
| 713 | 11914928, 4732944, 10695697, 12928164, 11980531, 4430912, 11939291, 2917017, | ||
| 714 | 6119256, 4172004, 9373765, 8410071, 14788383, 5047459, 5474428, 1737756, | ||
| 715 | 15967514, 13351758, 6691285, 8034329, 2856544, 14394753, 11310160, 12149558, | ||
| 716 | 7487528, 7542781, 15668898, 12525138, 12790975, 3707933, 9106617, 1965401, | ||
| 717 | 16219109, 12801644, 2443203, 4909502, 8762329, 3120803, 6360315, 9309720, | ||
| 718 | 15164599, 10844842, 4456529, 6667610, 14924259, 884312, 6234963, 3326042, | ||
| 719 | 15973422, 13919464, 5272099, 6414643, 3909029, 2764324, 5237926, 4774955, | ||
| 720 | 10445906, 4955302, 5203726, 10798229, 11443419, 2303395, 333836, 9646934, | ||
| 721 | 3464726, 4159182, 568492, 995747, 10318756, 13299332, 4836017, 8237783, | ||
| 722 | 3878992, 2581665, 11394667, 5672745, 14412947, 3159169, 9094251, 16467278, | ||
| 723 | 8671392, 15230076, 4843545, 7009238, 15504095, 1494895, 9627886, 14485051, | ||
| 724 | 8304291, 252817, 12421642, 16085736, 4774072, 2456177, 4160695, 15409741, | ||
| 725 | 4902868, 5793091, 13162925, 16039714, 782255, 11347835, 14884586, 366972, | ||
| 726 | 16308990, 11913488, 13390465, 2958444, 10340278, 1177858, 1319431, 10426302, | ||
| 727 | 2868597, 126119, 5784857, 5245324, 10903900, 16436004, 3389013, 1742384, | ||
| 728 | 14674502, 10279218, 8536112, 10364279, 6877778, 14051163, 1025130, 6072469, | ||
| 729 | 1988305, 8354440, 8216060, 16342977, 13112639, 3976679, 5913576, 8816697, | ||
| 730 | 6879995, 14043764, 3339515, 9364420, 15808858, 12261651, 2141560, 5636398, | ||
| 731 | 10345425, 10414756, 781725, 6155650, 4746914, 5078683, 7469001, 6799140, | ||
| 732 | 10156444, 9667150, 10116470, 4133858, 2121972, 1124204, 1003577, 1611214, | ||
| 733 | 14304602, 16221850, 13878465, 13577744, 3629235, 8772583, 10881308, 2410386, | ||
| 734 | 7300044, 5378855, 9301235, 12755149, 4977682, 8083074, 10327581, 6395087, | ||
| 735 | 9155434, 15501696, 7514362, 14520507, 15808945, 3244584, 4741962, 9658130, | ||
| 736 | 14336147, 8654727, 7969093, 15759799, 14029445, 5038459, 9894848, 8659300, | ||
| 737 | 13699287, 8834306, 10712885, 14753895, 10410465, 3373251, 309501, 9561475, | ||
| 738 | 5526688, 14647426, 14209836, 5339224, 207299, 14069911, 8722990, 2290950, | ||
| 739 | 3258216, 12505185, 6007317, 9218111, 14661019, 10537428, 11731949, 9027003, | ||
| 740 | 6641507, 9490160, 200241, 9720425, 16277895, 10816638, 1554761, 10431375, | ||
| 741 | 7467528, 6790302, 3429078, 14633753, 14428997, 11463204, 3576212, 2003426, | ||
| 742 | 6123687, 820520, 9992513, 15784513, 5778891, 6428165, 8388607 | ||
| 743 | }; | ||
| 744 | |||
| 745 | /* | ||
| 746 | * The hardware uses an LFSR counting sequence to determine when to capture | ||
| 747 | * the SPU PCs. An LFSR sequence is like a puesdo random number sequence | ||
| 748 | * where each number occurs once in the sequence but the sequence is not in | ||
| 749 | * numerical order. The SPU PC capture is done when the LFSR sequence reaches | ||
| 750 | * the last value in the sequence. Hence the user specified value N | ||
| 751 | * corresponds to the LFSR number that is N from the end of the sequence. | ||
| 752 | * | ||
| 753 | * To avoid the time to compute the LFSR, a lookup table is used. The 24 bit | ||
| 754 | * LFSR sequence is broken into four ranges. The spacing of the precomputed | ||
| 755 | * values is adjusted in each range so the error between the user specifed | ||
| 756 | * number (N) of events between samples and the actual number of events based | ||
| 757 | * on the precomputed value will be les then about 6.2%. Note, if the user | ||
| 758 | * specifies N < 2^16, the LFSR value that is 2^16 from the end will be used. | ||
| 759 | * This is to prevent the loss of samples because the trace buffer is full. | ||
| 760 | * | ||
| 761 | * User specified N Step between Index in | ||
| 762 | * precomputed values precomputed | ||
| 763 | * table | ||
| 764 | * 0 to 2^16-1 ---- 0 | ||
| 765 | * 2^16 to 2^16+2^19-1 2^12 1 to 128 | ||
| 766 | * 2^16+2^19 to 2^16+2^19+2^22-1 2^15 129 to 256 | ||
| 767 | * 2^16+2^19+2^22 to 2^24-1 2^18 257 to 302 | ||
| 768 | * | ||
| 769 | * | ||
| 770 | * For example, the LFSR values in the second range are computed for 2^16, | ||
| 771 | * 2^16+2^12, ... , 2^19-2^16, 2^19 and stored in the table at indicies | ||
| 772 | * 1, 2,..., 127, 128. | ||
| 773 | * | ||
| 774 | * The 24 bit LFSR value for the nth number in the sequence can be | ||
| 775 | * calculated using the following code: | ||
| 776 | * | ||
| 777 | * #define size 24 | ||
| 778 | * int calculate_lfsr(int n) | ||
| 779 | * { | ||
| 780 | * int i; | ||
| 781 | * unsigned int newlfsr0; | ||
| 782 | * unsigned int lfsr = 0xFFFFFF; | ||
| 783 | * unsigned int howmany = n; | ||
| 784 | * | ||
| 785 | * for (i = 2; i < howmany + 2; i++) { | ||
| 786 | * newlfsr0 = (((lfsr >> (size - 1 - 0)) & 1) ^ | ||
| 787 | * ((lfsr >> (size - 1 - 1)) & 1) ^ | ||
| 788 | * (((lfsr >> (size - 1 - 6)) & 1) ^ | ||
| 789 | * ((lfsr >> (size - 1 - 23)) & 1))); | ||
| 790 | * | ||
| 791 | * lfsr >>= 1; | ||
| 792 | * lfsr = lfsr | (newlfsr0 << (size - 1)); | ||
| 793 | * } | ||
| 794 | * return lfsr; | ||
| 795 | * } | ||
| 796 | */ | ||
| 797 | |||
| 798 | #define V2_16 (0x1 << 16) | ||
| 799 | #define V2_19 (0x1 << 19) | ||
| 800 | #define V2_22 (0x1 << 22) | ||
| 801 | |||
| 802 | static int calculate_lfsr(int n) | ||
| 803 | { | ||
| 804 | /* | ||
| 805 | * The ranges and steps are in powers of 2 so the calculations | ||
| 806 | * can be done using shifts rather then divide. | ||
| 807 | */ | ||
| 808 | int index; | ||
| 809 | |||
| 810 | if ((n >> 16) == 0) | ||
| 811 | index = 0; | ||
| 812 | else if (((n - V2_16) >> 19) == 0) | ||
| 813 | index = ((n - V2_16) >> 12) + 1; | ||
| 814 | else if (((n - V2_16 - V2_19) >> 22) == 0) | ||
| 815 | index = ((n - V2_16 - V2_19) >> 15 ) + 1 + 128; | ||
| 816 | else if (((n - V2_16 - V2_19 - V2_22) >> 24) == 0) | ||
| 817 | index = ((n - V2_16 - V2_19 - V2_22) >> 18 ) + 1 + 256; | ||
| 818 | else | ||
| 819 | index = ENTRIES-1; | ||
| 820 | |||
| 821 | /* make sure index is valid */ | ||
| 822 | if ((index > ENTRIES) || (index < 0)) | ||
| 823 | index = ENTRIES-1; | ||
| 824 | |||
| 825 | return initial_lfsr[index]; | ||
| 826 | } | ||
| 827 | |||
| 828 | static int pm_rtas_activate_spu_profiling(u32 node) | ||
| 829 | { | ||
| 830 | int ret, i; | ||
| 831 | struct pm_signal pm_signal_local[NR_PHYS_CTRS]; | ||
| 832 | |||
| 833 | /* | ||
| 834 | * Set up the rtas call to configure the debug bus to | ||
| 835 | * route the SPU PCs. Setup the pm_signal for each SPU | ||
| 836 | */ | ||
| 837 | for (i = 0; i < NUM_SPUS_PER_NODE; i++) { | ||
| 838 | pm_signal_local[i].cpu = node; | ||
| 839 | pm_signal_local[i].signal_group = 41; | ||
| 840 | /* spu i on word (i/2) */ | ||
| 841 | pm_signal_local[i].bus_word = 1 << i / 2; | ||
| 842 | /* spu i */ | ||
| 843 | pm_signal_local[i].sub_unit = i; | ||
| 844 | pm_signal_local[i].bit = 63; | ||
| 845 | } | ||
| 846 | |||
| 847 | ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE, | ||
| 848 | PASSTHRU_ENABLE, pm_signal_local, | ||
| 849 | (NUM_SPUS_PER_NODE | ||
| 850 | * sizeof(struct pm_signal))); | ||
| 851 | |||
| 852 | if (unlikely(ret)) { | ||
| 853 | printk(KERN_WARNING "%s: rtas returned: %d\n", | ||
| 854 | __FUNCTION__, ret); | ||
| 855 | return -EIO; | ||
| 856 | } | ||
| 857 | |||
| 858 | return 0; | ||
| 859 | } | ||
| 860 | |||
| 861 | #ifdef CONFIG_CPU_FREQ | ||
| 862 | static int | ||
| 863 | oprof_cpufreq_notify(struct notifier_block *nb, unsigned long val, void *data) | ||
| 864 | { | ||
| 865 | int ret = 0; | ||
| 866 | struct cpufreq_freqs *frq = data; | ||
| 867 | if ((val == CPUFREQ_PRECHANGE && frq->old < frq->new) || | ||
| 868 | (val == CPUFREQ_POSTCHANGE && frq->old > frq->new) || | ||
| 869 | (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) | ||
| 870 | set_spu_profiling_frequency(frq->new, spu_cycle_reset); | ||
| 871 | return ret; | ||
| 872 | } | ||
| 873 | |||
| 874 | static struct notifier_block cpu_freq_notifier_block = { | ||
| 875 | .notifier_call = oprof_cpufreq_notify | ||
| 876 | }; | ||
| 877 | #endif | ||
| 878 | |||
| 879 | static int cell_global_start_spu(struct op_counter_config *ctr) | ||
| 880 | { | ||
| 881 | int subfunc; | ||
| 882 | unsigned int lfsr_value; | ||
| 883 | int cpu; | ||
| 884 | int ret; | ||
| 885 | int rtas_error; | ||
| 886 | unsigned int cpu_khzfreq = 0; | ||
| 887 | |||
| 888 | /* The SPU profiling uses time-based profiling based on | ||
| 889 | * cpu frequency, so if configured with the CPU_FREQ | ||
| 890 | * option, we should detect frequency changes and react | ||
| 891 | * accordingly. | ||
| 892 | */ | ||
| 893 | #ifdef CONFIG_CPU_FREQ | ||
| 894 | ret = cpufreq_register_notifier(&cpu_freq_notifier_block, | ||
| 895 | CPUFREQ_TRANSITION_NOTIFIER); | ||
| 896 | if (ret < 0) | ||
| 897 | /* this is not a fatal error */ | ||
| 898 | printk(KERN_ERR "CPU freq change registration failed: %d\n", | ||
| 899 | ret); | ||
| 900 | |||
| 901 | else | ||
| 902 | cpu_khzfreq = cpufreq_quick_get(smp_processor_id()); | ||
| 903 | #endif | ||
| 904 | |||
| 905 | set_spu_profiling_frequency(cpu_khzfreq, spu_cycle_reset); | ||
| 906 | |||
| 907 | for_each_online_cpu(cpu) { | ||
| 908 | if (cbe_get_hw_thread_id(cpu)) | ||
| 909 | continue; | ||
| 910 | |||
| 911 | /* | ||
| 912 | * Setup SPU cycle-based profiling. | ||
| 913 | * Set perf_mon_control bit 0 to a zero before | ||
| 914 | * enabling spu collection hardware. | ||
| 915 | */ | ||
| 916 | cbe_write_pm(cpu, pm_control, 0); | ||
| 917 | |||
| 918 | if (spu_cycle_reset > MAX_SPU_COUNT) | ||
| 919 | /* use largest possible value */ | ||
| 920 | lfsr_value = calculate_lfsr(MAX_SPU_COUNT-1); | ||
| 921 | else | ||
| 922 | lfsr_value = calculate_lfsr(spu_cycle_reset); | ||
| 923 | |||
| 924 | /* must use a non zero value. Zero disables data collection. */ | ||
| 925 | if (lfsr_value == 0) | ||
| 926 | lfsr_value = calculate_lfsr(1); | ||
| 927 | |||
| 928 | lfsr_value = lfsr_value << 8; /* shift lfsr to correct | ||
| 929 | * register location | ||
| 930 | */ | ||
| 931 | |||
| 932 | /* debug bus setup */ | ||
| 933 | ret = pm_rtas_activate_spu_profiling(cbe_cpu_to_node(cpu)); | ||
| 934 | |||
| 935 | if (unlikely(ret)) { | ||
| 936 | rtas_error = ret; | ||
| 937 | goto out; | ||
| 938 | } | ||
| 939 | |||
| 940 | |||
| 941 | subfunc = 2; /* 2 - activate SPU tracing, 3 - deactivate */ | ||
| 942 | |||
| 943 | /* start profiling */ | ||
| 944 | ret = rtas_call(spu_rtas_token, 3, 1, NULL, subfunc, | ||
| 945 | cbe_cpu_to_node(cpu), lfsr_value); | ||
| 946 | |||
| 947 | if (unlikely(ret != 0)) { | ||
| 948 | printk(KERN_ERR | ||
| 949 | "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n", | ||
| 950 | __FUNCTION__, ret); | ||
| 951 | rtas_error = -EIO; | ||
| 952 | goto out; | ||
| 953 | } | ||
| 954 | } | ||
| 955 | |||
| 956 | rtas_error = start_spu_profiling(spu_cycle_reset); | ||
| 957 | if (rtas_error) | ||
| 958 | goto out_stop; | ||
| 959 | |||
| 960 | oprofile_running = 1; | ||
| 961 | return 0; | ||
| 962 | |||
| 963 | out_stop: | ||
| 964 | cell_global_stop_spu(); /* clean up the PMU/debug bus */ | ||
| 613 | out: | 965 | out: |
| 614 | ; | 966 | return rtas_error; |
| 615 | } | 967 | } |
| 616 | 968 | ||
| 617 | static void cell_global_start(struct op_counter_config *ctr) | 969 | static int cell_global_start_ppu(struct op_counter_config *ctr) |
| 618 | { | 970 | { |
| 619 | u32 cpu; | 971 | u32 cpu, i; |
| 620 | u32 interrupt_mask = 0; | 972 | u32 interrupt_mask = 0; |
| 621 | u32 i; | ||
| 622 | 973 | ||
| 623 | /* This routine gets called once for the system. | 974 | /* This routine gets called once for the system. |
| 624 | * There is one performance monitor per node, so we | 975 | * There is one performance monitor per node, so we |
| @@ -651,19 +1002,79 @@ static void cell_global_start(struct op_counter_config *ctr) | |||
| 651 | oprofile_running = 1; | 1002 | oprofile_running = 1; |
| 652 | smp_wmb(); | 1003 | smp_wmb(); |
| 653 | 1004 | ||
| 654 | /* NOTE: start_virt_cntrs will result in cell_virtual_cntr() being | 1005 | /* |
| 655 | * executed which manipulates the PMU. We start the "virtual counter" | 1006 | * NOTE: start_virt_cntrs will result in cell_virtual_cntr() being |
| 1007 | * executed which manipulates the PMU. We start the "virtual counter" | ||
| 656 | * here so that we do not need to synchronize access to the PMU in | 1008 | * here so that we do not need to synchronize access to the PMU in |
| 657 | * the above for-loop. | 1009 | * the above for-loop. |
| 658 | */ | 1010 | */ |
| 659 | start_virt_cntrs(); | 1011 | start_virt_cntrs(); |
| 1012 | |||
| 1013 | return 0; | ||
| 660 | } | 1014 | } |
| 661 | 1015 | ||
| 662 | static void cell_global_stop(void) | 1016 | static int cell_global_start(struct op_counter_config *ctr) |
| 1017 | { | ||
| 1018 | if (spu_cycle_reset) | ||
| 1019 | return cell_global_start_spu(ctr); | ||
| 1020 | else | ||
| 1021 | return cell_global_start_ppu(ctr); | ||
| 1022 | } | ||
| 1023 | |||
| 1024 | /* | ||
| 1025 | * Note the generic OProfile stop calls do not support returning | ||
| 1026 | * an error on stop. Hence, will not return an error if the FW | ||
| 1027 | * calls fail on stop. Failure to reset the debug bus is not an issue. | ||
| 1028 | * Failure to disable the SPU profiling is not an issue. The FW calls | ||
| 1029 | * to enable the performance counters and debug bus will work even if | ||
| 1030 | * the hardware was not cleanly reset. | ||
| 1031 | */ | ||
| 1032 | static void cell_global_stop_spu(void) | ||
| 1033 | { | ||
| 1034 | int subfunc, rtn_value; | ||
| 1035 | unsigned int lfsr_value; | ||
| 1036 | int cpu; | ||
| 1037 | |||
| 1038 | oprofile_running = 0; | ||
| 1039 | |||
| 1040 | #ifdef CONFIG_CPU_FREQ | ||
| 1041 | cpufreq_unregister_notifier(&cpu_freq_notifier_block, | ||
| 1042 | CPUFREQ_TRANSITION_NOTIFIER); | ||
| 1043 | #endif | ||
| 1044 | |||
| 1045 | for_each_online_cpu(cpu) { | ||
| 1046 | if (cbe_get_hw_thread_id(cpu)) | ||
| 1047 | continue; | ||
| 1048 | |||
| 1049 | subfunc = 3; /* | ||
| 1050 | * 2 - activate SPU tracing, | ||
| 1051 | * 3 - deactivate | ||
| 1052 | */ | ||
| 1053 | lfsr_value = 0x8f100000; | ||
| 1054 | |||
| 1055 | rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL, | ||
| 1056 | subfunc, cbe_cpu_to_node(cpu), | ||
| 1057 | lfsr_value); | ||
| 1058 | |||
| 1059 | if (unlikely(rtn_value != 0)) { | ||
| 1060 | printk(KERN_ERR | ||
| 1061 | "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n", | ||
| 1062 | __FUNCTION__, rtn_value); | ||
| 1063 | } | ||
| 1064 | |||
| 1065 | /* Deactivate the signals */ | ||
| 1066 | pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); | ||
| 1067 | } | ||
| 1068 | |||
| 1069 | stop_spu_profiling(); | ||
| 1070 | } | ||
| 1071 | |||
| 1072 | static void cell_global_stop_ppu(void) | ||
| 663 | { | 1073 | { |
| 664 | int cpu; | 1074 | int cpu; |
| 665 | 1075 | ||
| 666 | /* This routine will be called once for the system. | 1076 | /* |
| 1077 | * This routine will be called once for the system. | ||
| 667 | * There is one performance monitor per node, so we | 1078 | * There is one performance monitor per node, so we |
| 668 | * only need to perform this function once per node. | 1079 | * only need to perform this function once per node. |
| 669 | */ | 1080 | */ |
| @@ -687,8 +1098,16 @@ static void cell_global_stop(void) | |||
| 687 | } | 1098 | } |
| 688 | } | 1099 | } |
| 689 | 1100 | ||
| 690 | static void | 1101 | static void cell_global_stop(void) |
| 691 | cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) | 1102 | { |
| 1103 | if (spu_cycle_reset) | ||
| 1104 | cell_global_stop_spu(); | ||
| 1105 | else | ||
| 1106 | cell_global_stop_ppu(); | ||
| 1107 | } | ||
| 1108 | |||
| 1109 | static void cell_handle_interrupt(struct pt_regs *regs, | ||
| 1110 | struct op_counter_config *ctr) | ||
| 692 | { | 1111 | { |
| 693 | u32 cpu; | 1112 | u32 cpu; |
| 694 | u64 pc; | 1113 | u64 pc; |
| @@ -699,13 +1118,15 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) | |||
| 699 | 1118 | ||
| 700 | cpu = smp_processor_id(); | 1119 | cpu = smp_processor_id(); |
| 701 | 1120 | ||
| 702 | /* Need to make sure the interrupt handler and the virt counter | 1121 | /* |
| 1122 | * Need to make sure the interrupt handler and the virt counter | ||
| 703 | * routine are not running at the same time. See the | 1123 | * routine are not running at the same time. See the |
| 704 | * cell_virtual_cntr() routine for additional comments. | 1124 | * cell_virtual_cntr() routine for additional comments. |
| 705 | */ | 1125 | */ |
| 706 | spin_lock_irqsave(&virt_cntr_lock, flags); | 1126 | spin_lock_irqsave(&virt_cntr_lock, flags); |
| 707 | 1127 | ||
| 708 | /* Need to disable and reenable the performance counters | 1128 | /* |
| 1129 | * Need to disable and reenable the performance counters | ||
| 709 | * to get the desired behavior from the hardware. This | 1130 | * to get the desired behavior from the hardware. This |
| 710 | * is hardware specific. | 1131 | * is hardware specific. |
| 711 | */ | 1132 | */ |
| @@ -714,7 +1135,8 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) | |||
| 714 | 1135 | ||
| 715 | interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu); | 1136 | interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu); |
| 716 | 1137 | ||
| 717 | /* If the interrupt mask has been cleared, then the virt cntr | 1138 | /* |
| 1139 | * If the interrupt mask has been cleared, then the virt cntr | ||
| 718 | * has cleared the interrupt. When the thread that generated | 1140 | * has cleared the interrupt. When the thread that generated |
| 719 | * the interrupt is restored, the data count will be restored to | 1141 | * the interrupt is restored, the data count will be restored to |
| 720 | * 0xffffff0 to cause the interrupt to be regenerated. | 1142 | * 0xffffff0 to cause the interrupt to be regenerated. |
| @@ -732,18 +1154,20 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) | |||
| 732 | } | 1154 | } |
| 733 | } | 1155 | } |
| 734 | 1156 | ||
| 735 | /* The counters were frozen by the interrupt. | 1157 | /* |
| 1158 | * The counters were frozen by the interrupt. | ||
| 736 | * Reenable the interrupt and restart the counters. | 1159 | * Reenable the interrupt and restart the counters. |
| 737 | * If there was a race between the interrupt handler and | 1160 | * If there was a race between the interrupt handler and |
| 738 | * the virtual counter routine. The virutal counter | 1161 | * the virtual counter routine. The virutal counter |
| 739 | * routine may have cleared the interrupts. Hence must | 1162 | * routine may have cleared the interrupts. Hence must |
| 740 | * use the virt_cntr_inter_mask to re-enable the interrupts. | 1163 | * use the virt_cntr_inter_mask to re-enable the interrupts. |
| 741 | */ | 1164 | */ |
| 742 | cbe_enable_pm_interrupts(cpu, hdw_thread, | 1165 | cbe_enable_pm_interrupts(cpu, hdw_thread, |
| 743 | virt_cntr_inter_mask); | 1166 | virt_cntr_inter_mask); |
| 744 | 1167 | ||
| 745 | /* The writes to the various performance counters only writes | 1168 | /* |
| 746 | * to a latch. The new values (interrupt setting bits, reset | 1169 | * The writes to the various performance counters only writes |
| 1170 | * to a latch. The new values (interrupt setting bits, reset | ||
| 747 | * counter value etc.) are not copied to the actual registers | 1171 | * counter value etc.) are not copied to the actual registers |
| 748 | * until the performance monitor is enabled. In order to get | 1172 | * until the performance monitor is enabled. In order to get |
| 749 | * this to work as desired, the permormance monitor needs to | 1173 | * this to work as desired, the permormance monitor needs to |
| @@ -755,10 +1179,33 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) | |||
| 755 | spin_unlock_irqrestore(&virt_cntr_lock, flags); | 1179 | spin_unlock_irqrestore(&virt_cntr_lock, flags); |
| 756 | } | 1180 | } |
| 757 | 1181 | ||
| 1182 | /* | ||
| 1183 | * This function is called from the generic OProfile | ||
| 1184 | * driver. When profiling PPUs, we need to do the | ||
| 1185 | * generic sync start; otherwise, do spu_sync_start. | ||
| 1186 | */ | ||
| 1187 | static int cell_sync_start(void) | ||
| 1188 | { | ||
| 1189 | if (spu_cycle_reset) | ||
| 1190 | return spu_sync_start(); | ||
| 1191 | else | ||
| 1192 | return DO_GENERIC_SYNC; | ||
| 1193 | } | ||
| 1194 | |||
| 1195 | static int cell_sync_stop(void) | ||
| 1196 | { | ||
| 1197 | if (spu_cycle_reset) | ||
| 1198 | return spu_sync_stop(); | ||
| 1199 | else | ||
| 1200 | return 1; | ||
| 1201 | } | ||
| 1202 | |||
| 758 | struct op_powerpc_model op_model_cell = { | 1203 | struct op_powerpc_model op_model_cell = { |
| 759 | .reg_setup = cell_reg_setup, | 1204 | .reg_setup = cell_reg_setup, |
| 760 | .cpu_setup = cell_cpu_setup, | 1205 | .cpu_setup = cell_cpu_setup, |
| 761 | .global_start = cell_global_start, | 1206 | .global_start = cell_global_start, |
| 762 | .global_stop = cell_global_stop, | 1207 | .global_stop = cell_global_stop, |
| 1208 | .sync_start = cell_sync_start, | ||
| 1209 | .sync_stop = cell_sync_stop, | ||
| 763 | .handle_interrupt = cell_handle_interrupt, | 1210 | .handle_interrupt = cell_handle_interrupt, |
| 764 | }; | 1211 | }; |
diff --git a/arch/powerpc/oprofile/op_model_fsl_booke.c b/arch/powerpc/oprofile/op_model_fsl_booke.c index 2267eb8c661b..183a28bb1812 100644 --- a/arch/powerpc/oprofile/op_model_fsl_booke.c +++ b/arch/powerpc/oprofile/op_model_fsl_booke.c | |||
| @@ -244,7 +244,7 @@ static void dump_pmcs(void) | |||
| 244 | mfpmr(PMRN_PMLCA3), mfpmr(PMRN_PMLCB3)); | 244 | mfpmr(PMRN_PMLCA3), mfpmr(PMRN_PMLCB3)); |
| 245 | } | 245 | } |
| 246 | 246 | ||
| 247 | static void fsl_booke_cpu_setup(struct op_counter_config *ctr) | 247 | static int fsl_booke_cpu_setup(struct op_counter_config *ctr) |
| 248 | { | 248 | { |
| 249 | int i; | 249 | int i; |
| 250 | 250 | ||
| @@ -258,9 +258,11 @@ static void fsl_booke_cpu_setup(struct op_counter_config *ctr) | |||
| 258 | 258 | ||
| 259 | set_pmc_user_kernel(i, ctr[i].user, ctr[i].kernel); | 259 | set_pmc_user_kernel(i, ctr[i].user, ctr[i].kernel); |
| 260 | } | 260 | } |
| 261 | |||
| 262 | return 0; | ||
| 261 | } | 263 | } |
| 262 | 264 | ||
| 263 | static void fsl_booke_reg_setup(struct op_counter_config *ctr, | 265 | static int fsl_booke_reg_setup(struct op_counter_config *ctr, |
| 264 | struct op_system_config *sys, | 266 | struct op_system_config *sys, |
| 265 | int num_ctrs) | 267 | int num_ctrs) |
| 266 | { | 268 | { |
| @@ -276,9 +278,10 @@ static void fsl_booke_reg_setup(struct op_counter_config *ctr, | |||
| 276 | for (i = 0; i < num_counters; ++i) | 278 | for (i = 0; i < num_counters; ++i) |
| 277 | reset_value[i] = 0x80000000UL - ctr[i].count; | 279 | reset_value[i] = 0x80000000UL - ctr[i].count; |
| 278 | 280 | ||
| 281 | return 0; | ||
| 279 | } | 282 | } |
| 280 | 283 | ||
| 281 | static void fsl_booke_start(struct op_counter_config *ctr) | 284 | static int fsl_booke_start(struct op_counter_config *ctr) |
| 282 | { | 285 | { |
| 283 | int i; | 286 | int i; |
| 284 | 287 | ||
| @@ -308,6 +311,8 @@ static void fsl_booke_start(struct op_counter_config *ctr) | |||
| 308 | 311 | ||
| 309 | pr_debug("start on cpu %d, pmgc0 %x\n", smp_processor_id(), | 312 | pr_debug("start on cpu %d, pmgc0 %x\n", smp_processor_id(), |
| 310 | mfpmr(PMRN_PMGC0)); | 313 | mfpmr(PMRN_PMGC0)); |
| 314 | |||
| 315 | return 0; | ||
| 311 | } | 316 | } |
| 312 | 317 | ||
| 313 | static void fsl_booke_stop(void) | 318 | static void fsl_booke_stop(void) |
diff --git a/arch/powerpc/oprofile/op_model_pa6t.c b/arch/powerpc/oprofile/op_model_pa6t.c index e8a56b0adadc..c40de461fd4e 100644 --- a/arch/powerpc/oprofile/op_model_pa6t.c +++ b/arch/powerpc/oprofile/op_model_pa6t.c | |||
| @@ -89,7 +89,7 @@ static inline void ctr_write(unsigned int i, u64 val) | |||
| 89 | 89 | ||
| 90 | 90 | ||
| 91 | /* precompute the values to stuff in the hardware registers */ | 91 | /* precompute the values to stuff in the hardware registers */ |
| 92 | static void pa6t_reg_setup(struct op_counter_config *ctr, | 92 | static int pa6t_reg_setup(struct op_counter_config *ctr, |
| 93 | struct op_system_config *sys, | 93 | struct op_system_config *sys, |
| 94 | int num_ctrs) | 94 | int num_ctrs) |
| 95 | { | 95 | { |
| @@ -135,10 +135,12 @@ static void pa6t_reg_setup(struct op_counter_config *ctr, | |||
| 135 | pr_debug("reset_value for pmc%u inited to 0x%lx\n", | 135 | pr_debug("reset_value for pmc%u inited to 0x%lx\n", |
| 136 | pmc, reset_value[pmc]); | 136 | pmc, reset_value[pmc]); |
| 137 | } | 137 | } |
| 138 | |||
| 139 | return 0; | ||
| 138 | } | 140 | } |
| 139 | 141 | ||
| 140 | /* configure registers on this cpu */ | 142 | /* configure registers on this cpu */ |
| 141 | static void pa6t_cpu_setup(struct op_counter_config *ctr) | 143 | static int pa6t_cpu_setup(struct op_counter_config *ctr) |
| 142 | { | 144 | { |
| 143 | u64 mmcr0 = mmcr0_val; | 145 | u64 mmcr0 = mmcr0_val; |
| 144 | u64 mmcr1 = mmcr1_val; | 146 | u64 mmcr1 = mmcr1_val; |
| @@ -154,9 +156,11 @@ static void pa6t_cpu_setup(struct op_counter_config *ctr) | |||
| 154 | mfspr(SPRN_PA6T_MMCR0)); | 156 | mfspr(SPRN_PA6T_MMCR0)); |
| 155 | pr_debug("setup on cpu %d, mmcr1 %016lx\n", smp_processor_id(), | 157 | pr_debug("setup on cpu %d, mmcr1 %016lx\n", smp_processor_id(), |
| 156 | mfspr(SPRN_PA6T_MMCR1)); | 158 | mfspr(SPRN_PA6T_MMCR1)); |
| 159 | |||
| 160 | return 0; | ||
| 157 | } | 161 | } |
| 158 | 162 | ||
| 159 | static void pa6t_start(struct op_counter_config *ctr) | 163 | static int pa6t_start(struct op_counter_config *ctr) |
| 160 | { | 164 | { |
| 161 | int i; | 165 | int i; |
| 162 | 166 | ||
| @@ -174,6 +178,8 @@ static void pa6t_start(struct op_counter_config *ctr) | |||
| 174 | oprofile_running = 1; | 178 | oprofile_running = 1; |
| 175 | 179 | ||
| 176 | pr_debug("start on cpu %d, mmcr0 %lx\n", smp_processor_id(), mmcr0); | 180 | pr_debug("start on cpu %d, mmcr0 %lx\n", smp_processor_id(), mmcr0); |
| 181 | |||
| 182 | return 0; | ||
| 177 | } | 183 | } |
| 178 | 184 | ||
| 179 | static void pa6t_stop(void) | 185 | static void pa6t_stop(void) |
diff --git a/arch/powerpc/oprofile/op_model_power4.c b/arch/powerpc/oprofile/op_model_power4.c index a7c206b665af..cddc250a6a5c 100644 --- a/arch/powerpc/oprofile/op_model_power4.c +++ b/arch/powerpc/oprofile/op_model_power4.c | |||
| @@ -32,7 +32,7 @@ static u32 mmcr0_val; | |||
| 32 | static u64 mmcr1_val; | 32 | static u64 mmcr1_val; |
| 33 | static u64 mmcra_val; | 33 | static u64 mmcra_val; |
| 34 | 34 | ||
| 35 | static void power4_reg_setup(struct op_counter_config *ctr, | 35 | static int power4_reg_setup(struct op_counter_config *ctr, |
| 36 | struct op_system_config *sys, | 36 | struct op_system_config *sys, |
| 37 | int num_ctrs) | 37 | int num_ctrs) |
| 38 | { | 38 | { |
| @@ -60,6 +60,8 @@ static void power4_reg_setup(struct op_counter_config *ctr, | |||
| 60 | mmcr0_val &= ~MMCR0_PROBLEM_DISABLE; | 60 | mmcr0_val &= ~MMCR0_PROBLEM_DISABLE; |
| 61 | else | 61 | else |
| 62 | mmcr0_val |= MMCR0_PROBLEM_DISABLE; | 62 | mmcr0_val |= MMCR0_PROBLEM_DISABLE; |
| 63 | |||
| 64 | return 0; | ||
| 63 | } | 65 | } |
| 64 | 66 | ||
| 65 | extern void ppc64_enable_pmcs(void); | 67 | extern void ppc64_enable_pmcs(void); |
| @@ -84,7 +86,7 @@ static inline int mmcra_must_set_sample(void) | |||
| 84 | return 0; | 86 | return 0; |
| 85 | } | 87 | } |
| 86 | 88 | ||
| 87 | static void power4_cpu_setup(struct op_counter_config *ctr) | 89 | static int power4_cpu_setup(struct op_counter_config *ctr) |
| 88 | { | 90 | { |
| 89 | unsigned int mmcr0 = mmcr0_val; | 91 | unsigned int mmcr0 = mmcr0_val; |
| 90 | unsigned long mmcra = mmcra_val; | 92 | unsigned long mmcra = mmcra_val; |
| @@ -111,9 +113,11 @@ static void power4_cpu_setup(struct op_counter_config *ctr) | |||
| 111 | mfspr(SPRN_MMCR1)); | 113 | mfspr(SPRN_MMCR1)); |
| 112 | dbg("setup on cpu %d, mmcra %lx\n", smp_processor_id(), | 114 | dbg("setup on cpu %d, mmcra %lx\n", smp_processor_id(), |
| 113 | mfspr(SPRN_MMCRA)); | 115 | mfspr(SPRN_MMCRA)); |
| 116 | |||
| 117 | return 0; | ||
| 114 | } | 118 | } |
| 115 | 119 | ||
| 116 | static void power4_start(struct op_counter_config *ctr) | 120 | static int power4_start(struct op_counter_config *ctr) |
| 117 | { | 121 | { |
| 118 | int i; | 122 | int i; |
| 119 | unsigned int mmcr0; | 123 | unsigned int mmcr0; |
| @@ -148,6 +152,7 @@ static void power4_start(struct op_counter_config *ctr) | |||
| 148 | oprofile_running = 1; | 152 | oprofile_running = 1; |
| 149 | 153 | ||
| 150 | dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); | 154 | dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); |
| 155 | return 0; | ||
| 151 | } | 156 | } |
| 152 | 157 | ||
| 153 | static void power4_stop(void) | 158 | static void power4_stop(void) |
diff --git a/arch/powerpc/oprofile/op_model_rs64.c b/arch/powerpc/oprofile/op_model_rs64.c index c731acbfb2a5..a20afe45d936 100644 --- a/arch/powerpc/oprofile/op_model_rs64.c +++ b/arch/powerpc/oprofile/op_model_rs64.c | |||
| @@ -88,7 +88,7 @@ static unsigned long reset_value[OP_MAX_COUNTER]; | |||
| 88 | 88 | ||
| 89 | static int num_counters; | 89 | static int num_counters; |
| 90 | 90 | ||
| 91 | static void rs64_reg_setup(struct op_counter_config *ctr, | 91 | static int rs64_reg_setup(struct op_counter_config *ctr, |
| 92 | struct op_system_config *sys, | 92 | struct op_system_config *sys, |
| 93 | int num_ctrs) | 93 | int num_ctrs) |
| 94 | { | 94 | { |
| @@ -100,9 +100,10 @@ static void rs64_reg_setup(struct op_counter_config *ctr, | |||
| 100 | reset_value[i] = 0x80000000UL - ctr[i].count; | 100 | reset_value[i] = 0x80000000UL - ctr[i].count; |
| 101 | 101 | ||
| 102 | /* XXX setup user and kernel profiling */ | 102 | /* XXX setup user and kernel profiling */ |
| 103 | return 0; | ||
| 103 | } | 104 | } |
| 104 | 105 | ||
| 105 | static void rs64_cpu_setup(struct op_counter_config *ctr) | 106 | static int rs64_cpu_setup(struct op_counter_config *ctr) |
| 106 | { | 107 | { |
| 107 | unsigned int mmcr0; | 108 | unsigned int mmcr0; |
| 108 | 109 | ||
| @@ -125,9 +126,11 @@ static void rs64_cpu_setup(struct op_counter_config *ctr) | |||
| 125 | mfspr(SPRN_MMCR0)); | 126 | mfspr(SPRN_MMCR0)); |
| 126 | dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(), | 127 | dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(), |
| 127 | mfspr(SPRN_MMCR1)); | 128 | mfspr(SPRN_MMCR1)); |
| 129 | |||
| 130 | return 0; | ||
| 128 | } | 131 | } |
| 129 | 132 | ||
| 130 | static void rs64_start(struct op_counter_config *ctr) | 133 | static int rs64_start(struct op_counter_config *ctr) |
| 131 | { | 134 | { |
| 132 | int i; | 135 | int i; |
| 133 | unsigned int mmcr0; | 136 | unsigned int mmcr0; |
| @@ -155,6 +158,7 @@ static void rs64_start(struct op_counter_config *ctr) | |||
| 155 | mtspr(SPRN_MMCR0, mmcr0); | 158 | mtspr(SPRN_MMCR0, mmcr0); |
| 156 | 159 | ||
| 157 | dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); | 160 | dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); |
| 161 | return 0; | ||
| 158 | } | 162 | } |
| 159 | 163 | ||
| 160 | static void rs64_stop(void) | 164 | static void rs64_stop(void) |
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 33545d352e92..932538a93c2b 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig | |||
| @@ -272,4 +272,14 @@ config CPM2 | |||
| 272 | you wish to build a kernel for a machine with a CPM2 coprocessor | 272 | you wish to build a kernel for a machine with a CPM2 coprocessor |
| 273 | on it (826x, 827x, 8560). | 273 | on it (826x, 827x, 8560). |
| 274 | 274 | ||
| 275 | config AXON_RAM | ||
| 276 | tristate "Axon DDR2 memory device driver" | ||
| 277 | depends on PPC_IBM_CELL_BLADE | ||
| 278 | default m | ||
| 279 | help | ||
| 280 | It registers one block device per Axon's DDR2 memory bank found | ||
| 281 | on a system. Block devices are called axonram?, their major and | ||
| 282 | minor numbers are available in /proc/devices, /proc/partitions or | ||
| 283 | in /sys/block/axonram?/dev. | ||
| 284 | |||
| 275 | endmenu | 285 | endmenu |
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig index 9b2b386ccf48..ac8032034fb8 100644 --- a/arch/powerpc/platforms/cell/Kconfig +++ b/arch/powerpc/platforms/cell/Kconfig | |||
| @@ -73,4 +73,14 @@ config CBE_CPUFREQ | |||
| 73 | For details, take a look at <file:Documentation/cpu-freq/>. | 73 | For details, take a look at <file:Documentation/cpu-freq/>. |
| 74 | If you don't have such processor, say N | 74 | If you don't have such processor, say N |
| 75 | 75 | ||
| 76 | config CBE_CPUFREQ_PMI | ||
| 77 | tristate "CBE frequency scaling using PMI interface" | ||
| 78 | depends on CBE_CPUFREQ && PPC_PMI && EXPERIMENTAL | ||
| 79 | default n | ||
| 80 | help | ||
| 81 | Select this, if you want to use the PMI interface | ||
| 82 | to switch frequencies. Using PMI, the | ||
| 83 | processor will not only be able to run at lower speed, | ||
| 84 | but also at lower core voltage. | ||
| 85 | |||
| 76 | endmenu | 86 | endmenu |
diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile index 869af89df6ff..f88a7c76f296 100644 --- a/arch/powerpc/platforms/cell/Makefile +++ b/arch/powerpc/platforms/cell/Makefile | |||
| @@ -4,7 +4,9 @@ obj-$(CONFIG_PPC_CELL_NATIVE) += interrupt.o iommu.o setup.o \ | |||
| 4 | obj-$(CONFIG_CBE_RAS) += ras.o | 4 | obj-$(CONFIG_CBE_RAS) += ras.o |
| 5 | 5 | ||
| 6 | obj-$(CONFIG_CBE_THERM) += cbe_thermal.o | 6 | obj-$(CONFIG_CBE_THERM) += cbe_thermal.o |
| 7 | obj-$(CONFIG_CBE_CPUFREQ) += cbe_cpufreq.o | 7 | obj-$(CONFIG_CBE_CPUFREQ_PMI) += cbe_cpufreq_pmi.o |
| 8 | obj-$(CONFIG_CBE_CPUFREQ) += cbe-cpufreq.o | ||
| 9 | cbe-cpufreq-y += cbe_cpufreq_pervasive.o cbe_cpufreq.o | ||
| 8 | 10 | ||
| 9 | ifeq ($(CONFIG_SMP),y) | 11 | ifeq ($(CONFIG_SMP),y) |
| 10 | obj-$(CONFIG_PPC_CELL_NATIVE) += smp.o | 12 | obj-$(CONFIG_PPC_CELL_NATIVE) += smp.o |
| @@ -23,3 +25,5 @@ obj-$(CONFIG_SPU_BASE) += spu_callbacks.o spu_base.o \ | |||
| 23 | $(spu-priv1-y) \ | 25 | $(spu-priv1-y) \ |
| 24 | $(spu-manage-y) \ | 26 | $(spu-manage-y) \ |
| 25 | spufs/ | 27 | spufs/ |
| 28 | |||
| 29 | obj-$(CONFIG_PCI_MSI) += axon_msi.o | ||
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c new file mode 100644 index 000000000000..4c9ab5b70bae --- /dev/null +++ b/arch/powerpc/platforms/cell/axon_msi.c | |||
| @@ -0,0 +1,445 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2007, Michael Ellerman, IBM Corporation. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public License | ||
| 6 | * as published by the Free Software Foundation; either version | ||
| 7 | * 2 of the License, or (at your option) any later version. | ||
| 8 | */ | ||
| 9 | |||
| 10 | |||
| 11 | #include <linux/interrupt.h> | ||
| 12 | #include <linux/irq.h> | ||
| 13 | #include <linux/kernel.h> | ||
| 14 | #include <linux/pci.h> | ||
| 15 | #include <linux/msi.h> | ||
| 16 | #include <linux/reboot.h> | ||
| 17 | |||
| 18 | #include <asm/dcr.h> | ||
| 19 | #include <asm/machdep.h> | ||
| 20 | #include <asm/prom.h> | ||
| 21 | |||
| 22 | |||
| 23 | /* | ||
| 24 | * MSIC registers, specified as offsets from dcr_base | ||
| 25 | */ | ||
| 26 | #define MSIC_CTRL_REG 0x0 | ||
| 27 | |||
| 28 | /* Base Address registers specify FIFO location in BE memory */ | ||
| 29 | #define MSIC_BASE_ADDR_HI_REG 0x3 | ||
| 30 | #define MSIC_BASE_ADDR_LO_REG 0x4 | ||
| 31 | |||
| 32 | /* Hold the read/write offsets into the FIFO */ | ||
| 33 | #define MSIC_READ_OFFSET_REG 0x5 | ||
| 34 | #define MSIC_WRITE_OFFSET_REG 0x6 | ||
| 35 | |||
| 36 | |||
| 37 | /* MSIC control register flags */ | ||
| 38 | #define MSIC_CTRL_ENABLE 0x0001 | ||
| 39 | #define MSIC_CTRL_FIFO_FULL_ENABLE 0x0002 | ||
| 40 | #define MSIC_CTRL_IRQ_ENABLE 0x0008 | ||
| 41 | #define MSIC_CTRL_FULL_STOP_ENABLE 0x0010 | ||
| 42 | |||
| 43 | /* | ||
| 44 | * The MSIC can be configured to use a FIFO of 32KB, 64KB, 128KB or 256KB. | ||
| 45 | * Currently we're using a 64KB FIFO size. | ||
| 46 | */ | ||
| 47 | #define MSIC_FIFO_SIZE_SHIFT 16 | ||
| 48 | #define MSIC_FIFO_SIZE_BYTES (1 << MSIC_FIFO_SIZE_SHIFT) | ||
| 49 | |||
| 50 | /* | ||
| 51 | * To configure the FIFO size as (1 << n) bytes, we write (n - 15) into bits | ||
| 52 | * 8-9 of the MSIC control reg. | ||
| 53 | */ | ||
| 54 | #define MSIC_CTRL_FIFO_SIZE (((MSIC_FIFO_SIZE_SHIFT - 15) << 8) & 0x300) | ||
| 55 | |||
| 56 | /* | ||
| 57 | * We need to mask the read/write offsets to make sure they stay within | ||
| 58 | * the bounds of the FIFO. Also they should always be 16-byte aligned. | ||
| 59 | */ | ||
| 60 | #define MSIC_FIFO_SIZE_MASK ((MSIC_FIFO_SIZE_BYTES - 1) & ~0xFu) | ||
| 61 | |||
| 62 | /* Each entry in the FIFO is 16 bytes, the first 4 bytes hold the irq # */ | ||
| 63 | #define MSIC_FIFO_ENTRY_SIZE 0x10 | ||
| 64 | |||
| 65 | |||
| 66 | struct axon_msic { | ||
| 67 | struct device_node *dn; | ||
| 68 | struct irq_host *irq_host; | ||
| 69 | __le32 *fifo; | ||
| 70 | dcr_host_t dcr_host; | ||
| 71 | struct list_head list; | ||
| 72 | u32 read_offset; | ||
| 73 | u32 dcr_base; | ||
| 74 | }; | ||
| 75 | |||
| 76 | static LIST_HEAD(axon_msic_list); | ||
| 77 | |||
| 78 | static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val) | ||
| 79 | { | ||
| 80 | pr_debug("axon_msi: dcr_write(0x%x, 0x%x)\n", val, dcr_n); | ||
| 81 | |||
| 82 | dcr_write(msic->dcr_host, msic->dcr_base + dcr_n, val); | ||
| 83 | } | ||
| 84 | |||
| 85 | static u32 msic_dcr_read(struct axon_msic *msic, unsigned int dcr_n) | ||
| 86 | { | ||
| 87 | return dcr_read(msic->dcr_host, msic->dcr_base + dcr_n); | ||
| 88 | } | ||
| 89 | |||
| 90 | static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc) | ||
| 91 | { | ||
| 92 | struct axon_msic *msic = get_irq_data(irq); | ||
| 93 | u32 write_offset, msi; | ||
| 94 | int idx; | ||
| 95 | |||
| 96 | write_offset = msic_dcr_read(msic, MSIC_WRITE_OFFSET_REG); | ||
| 97 | pr_debug("axon_msi: original write_offset 0x%x\n", write_offset); | ||
| 98 | |||
| 99 | /* write_offset doesn't wrap properly, so we have to mask it */ | ||
| 100 | write_offset &= MSIC_FIFO_SIZE_MASK; | ||
| 101 | |||
| 102 | while (msic->read_offset != write_offset) { | ||
| 103 | idx = msic->read_offset / sizeof(__le32); | ||
| 104 | msi = le32_to_cpu(msic->fifo[idx]); | ||
| 105 | msi &= 0xFFFF; | ||
| 106 | |||
| 107 | pr_debug("axon_msi: woff %x roff %x msi %x\n", | ||
| 108 | write_offset, msic->read_offset, msi); | ||
| 109 | |||
| 110 | msic->read_offset += MSIC_FIFO_ENTRY_SIZE; | ||
| 111 | msic->read_offset &= MSIC_FIFO_SIZE_MASK; | ||
| 112 | |||
| 113 | if (msi < NR_IRQS && irq_map[msi].host == msic->irq_host) | ||
| 114 | generic_handle_irq(msi); | ||
| 115 | else | ||
| 116 | pr_debug("axon_msi: invalid irq 0x%x!\n", msi); | ||
| 117 | } | ||
| 118 | |||
| 119 | desc->chip->eoi(irq); | ||
| 120 | } | ||
| 121 | |||
| 122 | static struct axon_msic *find_msi_translator(struct pci_dev *dev) | ||
| 123 | { | ||
| 124 | struct irq_host *irq_host; | ||
| 125 | struct device_node *dn, *tmp; | ||
| 126 | const phandle *ph; | ||
| 127 | struct axon_msic *msic = NULL; | ||
| 128 | |||
| 129 | dn = pci_device_to_OF_node(dev); | ||
| 130 | if (!dn) { | ||
| 131 | dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n"); | ||
| 132 | return NULL; | ||
| 133 | } | ||
| 134 | |||
| 135 | for (; dn; tmp = of_get_parent(dn), of_node_put(dn), dn = tmp) { | ||
| 136 | ph = of_get_property(dn, "msi-translator", NULL); | ||
| 137 | if (ph) | ||
| 138 | break; | ||
| 139 | } | ||
| 140 | |||
| 141 | if (!ph) { | ||
| 142 | dev_dbg(&dev->dev, | ||
| 143 | "axon_msi: no msi-translator property found\n"); | ||
| 144 | goto out_error; | ||
| 145 | } | ||
| 146 | |||
| 147 | tmp = dn; | ||
| 148 | dn = of_find_node_by_phandle(*ph); | ||
| 149 | if (!dn) { | ||
| 150 | dev_dbg(&dev->dev, | ||
| 151 | "axon_msi: msi-translator doesn't point to a node\n"); | ||
| 152 | goto out_error; | ||
| 153 | } | ||
| 154 | |||
| 155 | irq_host = irq_find_host(dn); | ||
| 156 | if (!irq_host) { | ||
| 157 | dev_dbg(&dev->dev, "axon_msi: no irq_host found for node %s\n", | ||
| 158 | dn->full_name); | ||
| 159 | goto out_error; | ||
| 160 | } | ||
| 161 | |||
| 162 | msic = irq_host->host_data; | ||
| 163 | |||
| 164 | out_error: | ||
| 165 | of_node_put(dn); | ||
| 166 | of_node_put(tmp); | ||
| 167 | |||
| 168 | return msic; | ||
| 169 | } | ||
| 170 | |||
| 171 | static int axon_msi_check_device(struct pci_dev *dev, int nvec, int type) | ||
| 172 | { | ||
| 173 | if (!find_msi_translator(dev)) | ||
| 174 | return -ENODEV; | ||
| 175 | |||
| 176 | return 0; | ||
| 177 | } | ||
| 178 | |||
| 179 | static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg) | ||
| 180 | { | ||
| 181 | struct device_node *dn, *tmp; | ||
| 182 | struct msi_desc *entry; | ||
| 183 | int len; | ||
| 184 | const u32 *prop; | ||
| 185 | |||
| 186 | dn = pci_device_to_OF_node(dev); | ||
| 187 | if (!dn) { | ||
| 188 | dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n"); | ||
| 189 | return -ENODEV; | ||
| 190 | } | ||
| 191 | |||
| 192 | entry = list_first_entry(&dev->msi_list, struct msi_desc, list); | ||
| 193 | |||
| 194 | for (; dn; tmp = of_get_parent(dn), of_node_put(dn), dn = tmp) { | ||
| 195 | if (entry->msi_attrib.is_64) { | ||
| 196 | prop = of_get_property(dn, "msi-address-64", &len); | ||
| 197 | if (prop) | ||
| 198 | break; | ||
| 199 | } | ||
| 200 | |||
| 201 | prop = of_get_property(dn, "msi-address-32", &len); | ||
| 202 | if (prop) | ||
| 203 | break; | ||
| 204 | } | ||
| 205 | |||
| 206 | if (!prop) { | ||
| 207 | dev_dbg(&dev->dev, | ||
| 208 | "axon_msi: no msi-address-(32|64) properties found\n"); | ||
| 209 | return -ENOENT; | ||
| 210 | } | ||
| 211 | |||
| 212 | switch (len) { | ||
| 213 | case 8: | ||
| 214 | msg->address_hi = prop[0]; | ||
| 215 | msg->address_lo = prop[1]; | ||
| 216 | break; | ||
| 217 | case 4: | ||
| 218 | msg->address_hi = 0; | ||
| 219 | msg->address_lo = prop[0]; | ||
| 220 | break; | ||
| 221 | default: | ||
| 222 | dev_dbg(&dev->dev, | ||
| 223 | "axon_msi: malformed msi-address-(32|64) property\n"); | ||
| 224 | of_node_put(dn); | ||
| 225 | return -EINVAL; | ||
| 226 | } | ||
| 227 | |||
| 228 | of_node_put(dn); | ||
| 229 | |||
| 230 | return 0; | ||
| 231 | } | ||
| 232 | |||
| 233 | static int axon_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | ||
| 234 | { | ||
| 235 | unsigned int virq, rc; | ||
| 236 | struct msi_desc *entry; | ||
| 237 | struct msi_msg msg; | ||
| 238 | struct axon_msic *msic; | ||
| 239 | |||
| 240 | msic = find_msi_translator(dev); | ||
| 241 | if (!msic) | ||
| 242 | return -ENODEV; | ||
| 243 | |||
| 244 | rc = setup_msi_msg_address(dev, &msg); | ||
| 245 | if (rc) | ||
| 246 | return rc; | ||
| 247 | |||
| 248 | /* We rely on being able to stash a virq in a u16 */ | ||
| 249 | BUILD_BUG_ON(NR_IRQS > 65536); | ||
| 250 | |||
| 251 | list_for_each_entry(entry, &dev->msi_list, list) { | ||
| 252 | virq = irq_create_direct_mapping(msic->irq_host); | ||
| 253 | if (virq == NO_IRQ) { | ||
| 254 | dev_warn(&dev->dev, | ||
| 255 | "axon_msi: virq allocation failed!\n"); | ||
| 256 | return -1; | ||
| 257 | } | ||
| 258 | dev_dbg(&dev->dev, "axon_msi: allocated virq 0x%x\n", virq); | ||
| 259 | |||
| 260 | set_irq_msi(virq, entry); | ||
| 261 | msg.data = virq; | ||
| 262 | write_msi_msg(virq, &msg); | ||
| 263 | } | ||
| 264 | |||
| 265 | return 0; | ||
| 266 | } | ||
| 267 | |||
| 268 | static void axon_msi_teardown_msi_irqs(struct pci_dev *dev) | ||
| 269 | { | ||
| 270 | struct msi_desc *entry; | ||
| 271 | |||
| 272 | dev_dbg(&dev->dev, "axon_msi: tearing down msi irqs\n"); | ||
| 273 | |||
| 274 | list_for_each_entry(entry, &dev->msi_list, list) { | ||
| 275 | if (entry->irq == NO_IRQ) | ||
| 276 | continue; | ||
| 277 | |||
| 278 | set_irq_msi(entry->irq, NULL); | ||
| 279 | irq_dispose_mapping(entry->irq); | ||
| 280 | } | ||
| 281 | } | ||
| 282 | |||
| 283 | static struct irq_chip msic_irq_chip = { | ||
| 284 | .mask = mask_msi_irq, | ||
| 285 | .unmask = unmask_msi_irq, | ||
| 286 | .shutdown = unmask_msi_irq, | ||
| 287 | .typename = "AXON-MSI", | ||
| 288 | }; | ||
| 289 | |||
| 290 | static int msic_host_map(struct irq_host *h, unsigned int virq, | ||
| 291 | irq_hw_number_t hw) | ||
| 292 | { | ||
| 293 | set_irq_chip_and_handler(virq, &msic_irq_chip, handle_simple_irq); | ||
| 294 | |||
| 295 | return 0; | ||
| 296 | } | ||
| 297 | |||
| 298 | static int msic_host_match(struct irq_host *host, struct device_node *dn) | ||
| 299 | { | ||
| 300 | struct axon_msic *msic = host->host_data; | ||
| 301 | |||
| 302 | return msic->dn == dn; | ||
| 303 | } | ||
| 304 | |||
| 305 | static struct irq_host_ops msic_host_ops = { | ||
| 306 | .match = msic_host_match, | ||
| 307 | .map = msic_host_map, | ||
| 308 | }; | ||
| 309 | |||
| 310 | static int axon_msi_notify_reboot(struct notifier_block *nb, | ||
| 311 | unsigned long code, void *data) | ||
| 312 | { | ||
| 313 | struct axon_msic *msic; | ||
| 314 | u32 tmp; | ||
| 315 | |||
| 316 | list_for_each_entry(msic, &axon_msic_list, list) { | ||
| 317 | pr_debug("axon_msi: disabling %s\n", msic->dn->full_name); | ||
| 318 | tmp = msic_dcr_read(msic, MSIC_CTRL_REG); | ||
| 319 | tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE; | ||
| 320 | msic_dcr_write(msic, MSIC_CTRL_REG, tmp); | ||
| 321 | } | ||
| 322 | |||
| 323 | return 0; | ||
| 324 | } | ||
| 325 | |||
| 326 | static struct notifier_block axon_msi_reboot_notifier = { | ||
| 327 | .notifier_call = axon_msi_notify_reboot | ||
| 328 | }; | ||
| 329 | |||
| 330 | static int axon_msi_setup_one(struct device_node *dn) | ||
| 331 | { | ||
| 332 | struct page *page; | ||
| 333 | struct axon_msic *msic; | ||
| 334 | unsigned int virq; | ||
| 335 | int dcr_len; | ||
| 336 | |||
| 337 | pr_debug("axon_msi: setting up dn %s\n", dn->full_name); | ||
| 338 | |||
| 339 | msic = kzalloc(sizeof(struct axon_msic), GFP_KERNEL); | ||
| 340 | if (!msic) { | ||
| 341 | printk(KERN_ERR "axon_msi: couldn't allocate msic for %s\n", | ||
| 342 | dn->full_name); | ||
| 343 | goto out; | ||
| 344 | } | ||
| 345 | |||
| 346 | msic->dcr_base = dcr_resource_start(dn, 0); | ||
| 347 | dcr_len = dcr_resource_len(dn, 0); | ||
| 348 | |||
| 349 | if (msic->dcr_base == 0 || dcr_len == 0) { | ||
| 350 | printk(KERN_ERR | ||
| 351 | "axon_msi: couldn't parse dcr properties on %s\n", | ||
| 352 | dn->full_name); | ||
| 353 | goto out; | ||
| 354 | } | ||
| 355 | |||
| 356 | msic->dcr_host = dcr_map(dn, msic->dcr_base, dcr_len); | ||
| 357 | if (!DCR_MAP_OK(msic->dcr_host)) { | ||
| 358 | printk(KERN_ERR "axon_msi: dcr_map failed for %s\n", | ||
| 359 | dn->full_name); | ||
| 360 | goto out_free_msic; | ||
| 361 | } | ||
| 362 | |||
| 363 | page = alloc_pages_node(of_node_to_nid(dn), GFP_KERNEL, | ||
| 364 | get_order(MSIC_FIFO_SIZE_BYTES)); | ||
| 365 | if (!page) { | ||
| 366 | printk(KERN_ERR "axon_msi: couldn't allocate fifo for %s\n", | ||
| 367 | dn->full_name); | ||
| 368 | goto out_free_msic; | ||
| 369 | } | ||
| 370 | |||
| 371 | msic->fifo = page_address(page); | ||
| 372 | |||
| 373 | msic->irq_host = irq_alloc_host(IRQ_HOST_MAP_NOMAP, NR_IRQS, | ||
| 374 | &msic_host_ops, 0); | ||
| 375 | if (!msic->irq_host) { | ||
| 376 | printk(KERN_ERR "axon_msi: couldn't allocate irq_host for %s\n", | ||
| 377 | dn->full_name); | ||
| 378 | goto out_free_fifo; | ||
| 379 | } | ||
| 380 | |||
| 381 | msic->irq_host->host_data = msic; | ||
| 382 | |||
| 383 | virq = irq_of_parse_and_map(dn, 0); | ||
| 384 | if (virq == NO_IRQ) { | ||
| 385 | printk(KERN_ERR "axon_msi: irq parse and map failed for %s\n", | ||
| 386 | dn->full_name); | ||
| 387 | goto out_free_host; | ||
| 388 | } | ||
| 389 | |||
| 390 | msic->dn = of_node_get(dn); | ||
| 391 | |||
| 392 | set_irq_data(virq, msic); | ||
| 393 | set_irq_chained_handler(virq, axon_msi_cascade); | ||
| 394 | pr_debug("axon_msi: irq 0x%x setup for axon_msi\n", virq); | ||
| 395 | |||
| 396 | /* Enable the MSIC hardware */ | ||
| 397 | msic_dcr_write(msic, MSIC_BASE_ADDR_HI_REG, (u64)msic->fifo >> 32); | ||
| 398 | msic_dcr_write(msic, MSIC_BASE_ADDR_LO_REG, | ||
| 399 | (u64)msic->fifo & 0xFFFFFFFF); | ||
| 400 | msic_dcr_write(msic, MSIC_CTRL_REG, | ||
| 401 | MSIC_CTRL_IRQ_ENABLE | MSIC_CTRL_ENABLE | | ||
| 402 | MSIC_CTRL_FIFO_SIZE); | ||
| 403 | |||
| 404 | list_add(&msic->list, &axon_msic_list); | ||
| 405 | |||
| 406 | printk(KERN_DEBUG "axon_msi: setup MSIC on %s\n", dn->full_name); | ||
| 407 | |||
| 408 | return 0; | ||
| 409 | |||
| 410 | out_free_host: | ||
| 411 | kfree(msic->irq_host); | ||
| 412 | out_free_fifo: | ||
| 413 | __free_pages(virt_to_page(msic->fifo), get_order(MSIC_FIFO_SIZE_BYTES)); | ||
| 414 | out_free_msic: | ||
| 415 | kfree(msic); | ||
| 416 | out: | ||
| 417 | |||
| 418 | return -1; | ||
| 419 | } | ||
| 420 | |||
| 421 | static int axon_msi_init(void) | ||
| 422 | { | ||
| 423 | struct device_node *dn; | ||
| 424 | int found = 0; | ||
| 425 | |||
| 426 | pr_debug("axon_msi: initialising ...\n"); | ||
| 427 | |||
| 428 | for_each_compatible_node(dn, NULL, "ibm,axon-msic") { | ||
| 429 | if (axon_msi_setup_one(dn) == 0) | ||
| 430 | found++; | ||
| 431 | } | ||
| 432 | |||
| 433 | if (found) { | ||
| 434 | ppc_md.setup_msi_irqs = axon_msi_setup_msi_irqs; | ||
| 435 | ppc_md.teardown_msi_irqs = axon_msi_teardown_msi_irqs; | ||
| 436 | ppc_md.msi_check_device = axon_msi_check_device; | ||
| 437 | |||
| 438 | register_reboot_notifier(&axon_msi_reboot_notifier); | ||
| 439 | |||
| 440 | pr_debug("axon_msi: registered callbacks!\n"); | ||
| 441 | } | ||
| 442 | |||
| 443 | return 0; | ||
| 444 | } | ||
| 445 | arch_initcall(axon_msi_init); | ||
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq.c b/arch/powerpc/platforms/cell/cbe_cpufreq.c index ab511d5b65a4..0b6e8ee85ab1 100644 --- a/arch/powerpc/platforms/cell/cbe_cpufreq.c +++ b/arch/powerpc/platforms/cell/cbe_cpufreq.c | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * cpufreq driver for the cell processor | 2 | * cpufreq driver for the cell processor |
| 3 | * | 3 | * |
| 4 | * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 | 4 | * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007 |
| 5 | * | 5 | * |
| 6 | * Author: Christian Krafft <krafft@de.ibm.com> | 6 | * Author: Christian Krafft <krafft@de.ibm.com> |
| 7 | * | 7 | * |
| @@ -21,18 +21,11 @@ | |||
| 21 | */ | 21 | */ |
| 22 | 22 | ||
| 23 | #include <linux/cpufreq.h> | 23 | #include <linux/cpufreq.h> |
| 24 | #include <linux/timer.h> | ||
| 25 | |||
| 26 | #include <asm/hw_irq.h> | ||
| 27 | #include <asm/io.h> | ||
| 28 | #include <asm/machdep.h> | 24 | #include <asm/machdep.h> |
| 29 | #include <asm/processor.h> | ||
| 30 | #include <asm/prom.h> | ||
| 31 | #include <asm/time.h> | ||
| 32 | #include <asm/pmi.h> | ||
| 33 | #include <asm/of_platform.h> | 25 | #include <asm/of_platform.h> |
| 34 | 26 | #include <asm/prom.h> | |
| 35 | #include "cbe_regs.h" | 27 | #include "cbe_regs.h" |
| 28 | #include "cbe_cpufreq.h" | ||
| 36 | 29 | ||
| 37 | static DEFINE_MUTEX(cbe_switch_mutex); | 30 | static DEFINE_MUTEX(cbe_switch_mutex); |
| 38 | 31 | ||
| @@ -50,159 +43,24 @@ static struct cpufreq_frequency_table cbe_freqs[] = { | |||
| 50 | {0, CPUFREQ_TABLE_END}, | 43 | {0, CPUFREQ_TABLE_END}, |
| 51 | }; | 44 | }; |
| 52 | 45 | ||
| 53 | /* to write to MIC register */ | ||
| 54 | static u64 MIC_Slow_Fast_Timer_table[] = { | ||
| 55 | [0 ... 7] = 0x007fc00000000000ull, | ||
| 56 | }; | ||
| 57 | |||
| 58 | /* more values for the MIC */ | ||
| 59 | static u64 MIC_Slow_Next_Timer_table[] = { | ||
| 60 | 0x0000240000000000ull, | ||
| 61 | 0x0000268000000000ull, | ||
| 62 | 0x000029C000000000ull, | ||
| 63 | 0x00002D0000000000ull, | ||
| 64 | 0x0000300000000000ull, | ||
| 65 | 0x0000334000000000ull, | ||
| 66 | 0x000039C000000000ull, | ||
| 67 | 0x00003FC000000000ull, | ||
| 68 | }; | ||
| 69 | |||
| 70 | static unsigned int pmi_frequency_limit = 0; | ||
| 71 | /* | 46 | /* |
| 72 | * hardware specific functions | 47 | * hardware specific functions |
| 73 | */ | 48 | */ |
| 74 | 49 | ||
| 75 | static struct of_device *pmi_dev; | 50 | static int set_pmode(unsigned int cpu, unsigned int slow_mode) |
| 76 | |||
| 77 | #ifdef CONFIG_PPC_PMI | ||
| 78 | static int set_pmode_pmi(int cpu, unsigned int pmode) | ||
| 79 | { | ||
| 80 | int ret; | ||
| 81 | pmi_message_t pmi_msg; | ||
| 82 | #ifdef DEBUG | ||
| 83 | u64 time; | ||
| 84 | #endif | ||
| 85 | |||
| 86 | pmi_msg.type = PMI_TYPE_FREQ_CHANGE; | ||
| 87 | pmi_msg.data1 = cbe_cpu_to_node(cpu); | ||
| 88 | pmi_msg.data2 = pmode; | ||
| 89 | |||
| 90 | #ifdef DEBUG | ||
| 91 | time = (u64) get_cycles(); | ||
| 92 | #endif | ||
| 93 | |||
| 94 | pmi_send_message(pmi_dev, pmi_msg); | ||
| 95 | ret = pmi_msg.data2; | ||
| 96 | |||
| 97 | pr_debug("PMI returned slow mode %d\n", ret); | ||
| 98 | |||
| 99 | #ifdef DEBUG | ||
| 100 | time = (u64) get_cycles() - time; /* actual cycles (not cpu cycles!) */ | ||
| 101 | time = 1000000000 * time / CLOCK_TICK_RATE; /* time in ns (10^-9) */ | ||
| 102 | pr_debug("had to wait %lu ns for a transition\n", time); | ||
| 103 | #endif | ||
| 104 | return ret; | ||
| 105 | } | ||
| 106 | #endif | ||
| 107 | |||
| 108 | static int get_pmode(int cpu) | ||
| 109 | { | 51 | { |
| 110 | int ret; | 52 | int rc; |
| 111 | struct cbe_pmd_regs __iomem *pmd_regs; | ||
| 112 | |||
| 113 | pmd_regs = cbe_get_cpu_pmd_regs(cpu); | ||
| 114 | ret = in_be64(&pmd_regs->pmsr) & 0x07; | ||
| 115 | |||
| 116 | return ret; | ||
| 117 | } | ||
| 118 | |||
| 119 | static int set_pmode_reg(int cpu, unsigned int pmode) | ||
| 120 | { | ||
| 121 | struct cbe_pmd_regs __iomem *pmd_regs; | ||
| 122 | struct cbe_mic_tm_regs __iomem *mic_tm_regs; | ||
| 123 | u64 flags; | ||
| 124 | u64 value; | ||
| 125 | |||
| 126 | local_irq_save(flags); | ||
| 127 | |||
| 128 | mic_tm_regs = cbe_get_cpu_mic_tm_regs(cpu); | ||
| 129 | pmd_regs = cbe_get_cpu_pmd_regs(cpu); | ||
| 130 | |||
| 131 | pr_debug("pm register is mapped at %p\n", &pmd_regs->pmcr); | ||
| 132 | pr_debug("mic register is mapped at %p\n", &mic_tm_regs->slow_fast_timer_0); | ||
| 133 | |||
| 134 | out_be64(&mic_tm_regs->slow_fast_timer_0, MIC_Slow_Fast_Timer_table[pmode]); | ||
| 135 | out_be64(&mic_tm_regs->slow_fast_timer_1, MIC_Slow_Fast_Timer_table[pmode]); | ||
| 136 | |||
| 137 | out_be64(&mic_tm_regs->slow_next_timer_0, MIC_Slow_Next_Timer_table[pmode]); | ||
| 138 | out_be64(&mic_tm_regs->slow_next_timer_1, MIC_Slow_Next_Timer_table[pmode]); | ||
| 139 | |||
| 140 | value = in_be64(&pmd_regs->pmcr); | ||
| 141 | /* set bits to zero */ | ||
| 142 | value &= 0xFFFFFFFFFFFFFFF8ull; | ||
| 143 | /* set bits to next pmode */ | ||
| 144 | value |= pmode; | ||
| 145 | |||
| 146 | out_be64(&pmd_regs->pmcr, value); | ||
| 147 | |||
| 148 | /* wait until new pmode appears in status register */ | ||
| 149 | value = in_be64(&pmd_regs->pmsr) & 0x07; | ||
| 150 | while(value != pmode) { | ||
| 151 | cpu_relax(); | ||
| 152 | value = in_be64(&pmd_regs->pmsr) & 0x07; | ||
| 153 | } | ||
| 154 | |||
| 155 | local_irq_restore(flags); | ||
| 156 | |||
| 157 | return 0; | ||
| 158 | } | ||
| 159 | 53 | ||
| 160 | static int set_pmode(int cpu, unsigned int slow_mode) { | 54 | if (cbe_cpufreq_has_pmi) |
| 161 | #ifdef CONFIG_PPC_PMI | 55 | rc = cbe_cpufreq_set_pmode_pmi(cpu, slow_mode); |
| 162 | if (pmi_dev) | ||
| 163 | return set_pmode_pmi(cpu, slow_mode); | ||
| 164 | else | 56 | else |
| 165 | #endif | 57 | rc = cbe_cpufreq_set_pmode(cpu, slow_mode); |
| 166 | return set_pmode_reg(cpu, slow_mode); | ||
| 167 | } | ||
| 168 | |||
| 169 | static void cbe_cpufreq_handle_pmi(struct of_device *dev, pmi_message_t pmi_msg) | ||
| 170 | { | ||
| 171 | u8 cpu; | ||
| 172 | u8 cbe_pmode_new; | ||
| 173 | |||
| 174 | BUG_ON(pmi_msg.type != PMI_TYPE_FREQ_CHANGE); | ||
| 175 | 58 | ||
| 176 | cpu = cbe_node_to_cpu(pmi_msg.data1); | 59 | pr_debug("register contains slow mode %d\n", cbe_cpufreq_get_pmode(cpu)); |
| 177 | cbe_pmode_new = pmi_msg.data2; | ||
| 178 | 60 | ||
| 179 | pmi_frequency_limit = cbe_freqs[cbe_pmode_new].frequency; | 61 | return rc; |
| 180 | |||
| 181 | pr_debug("cbe_handle_pmi: max freq=%d\n", pmi_frequency_limit); | ||
| 182 | } | ||
| 183 | |||
| 184 | static int pmi_notifier(struct notifier_block *nb, | ||
| 185 | unsigned long event, void *data) | ||
| 186 | { | ||
| 187 | struct cpufreq_policy *policy = data; | ||
| 188 | |||
| 189 | if (event != CPUFREQ_INCOMPATIBLE) | ||
| 190 | return 0; | ||
| 191 | |||
| 192 | cpufreq_verify_within_limits(policy, 0, pmi_frequency_limit); | ||
| 193 | return 0; | ||
| 194 | } | 62 | } |
| 195 | 63 | ||
| 196 | static struct notifier_block pmi_notifier_block = { | ||
| 197 | .notifier_call = pmi_notifier, | ||
| 198 | }; | ||
| 199 | |||
| 200 | static struct pmi_handler cbe_pmi_handler = { | ||
| 201 | .type = PMI_TYPE_FREQ_CHANGE, | ||
| 202 | .handle_pmi_message = cbe_cpufreq_handle_pmi, | ||
| 203 | }; | ||
| 204 | |||
| 205 | |||
| 206 | /* | 64 | /* |
| 207 | * cpufreq functions | 65 | * cpufreq functions |
| 208 | */ | 66 | */ |
| @@ -221,8 +79,19 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy) | |||
| 221 | 79 | ||
| 222 | pr_debug("init cpufreq on CPU %d\n", policy->cpu); | 80 | pr_debug("init cpufreq on CPU %d\n", policy->cpu); |
| 223 | 81 | ||
| 82 | /* | ||
| 83 | * Let's check we can actually get to the CELL regs | ||
| 84 | */ | ||
| 85 | if (!cbe_get_cpu_pmd_regs(policy->cpu) || | ||
| 86 | !cbe_get_cpu_mic_tm_regs(policy->cpu)) { | ||
| 87 | pr_info("invalid CBE regs pointers for cpufreq\n"); | ||
| 88 | return -EINVAL; | ||
| 89 | } | ||
| 90 | |||
| 224 | max_freqp = of_get_property(cpu, "clock-frequency", NULL); | 91 | max_freqp = of_get_property(cpu, "clock-frequency", NULL); |
| 225 | 92 | ||
| 93 | of_node_put(cpu); | ||
| 94 | |||
| 226 | if (!max_freqp) | 95 | if (!max_freqp) |
| 227 | return -EINVAL; | 96 | return -EINVAL; |
| 228 | 97 | ||
| @@ -239,10 +108,12 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy) | |||
| 239 | } | 108 | } |
| 240 | 109 | ||
| 241 | policy->governor = CPUFREQ_DEFAULT_GOVERNOR; | 110 | policy->governor = CPUFREQ_DEFAULT_GOVERNOR; |
| 242 | /* if DEBUG is enabled set_pmode() measures the correct latency of a transition */ | 111 | |
| 112 | /* if DEBUG is enabled set_pmode() measures the latency | ||
| 113 | * of a transition */ | ||
| 243 | policy->cpuinfo.transition_latency = 25000; | 114 | policy->cpuinfo.transition_latency = 25000; |
| 244 | 115 | ||
| 245 | cur_pmode = get_pmode(policy->cpu); | 116 | cur_pmode = cbe_cpufreq_get_pmode(policy->cpu); |
| 246 | pr_debug("current pmode is at %d\n",cur_pmode); | 117 | pr_debug("current pmode is at %d\n",cur_pmode); |
| 247 | 118 | ||
| 248 | policy->cur = cbe_freqs[cur_pmode].frequency; | 119 | policy->cur = cbe_freqs[cur_pmode].frequency; |
| @@ -253,21 +124,13 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy) | |||
| 253 | 124 | ||
| 254 | cpufreq_frequency_table_get_attr(cbe_freqs, policy->cpu); | 125 | cpufreq_frequency_table_get_attr(cbe_freqs, policy->cpu); |
| 255 | 126 | ||
| 256 | if (pmi_dev) { | 127 | /* this ensures that policy->cpuinfo_min |
| 257 | /* frequency might get limited later, initialize limit with max_freq */ | 128 | * and policy->cpuinfo_max are set correctly */ |
| 258 | pmi_frequency_limit = max_freq; | ||
| 259 | cpufreq_register_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER); | ||
| 260 | } | ||
| 261 | |||
| 262 | /* this ensures that policy->cpuinfo_min and policy->cpuinfo_max are set correctly */ | ||
| 263 | return cpufreq_frequency_table_cpuinfo(policy, cbe_freqs); | 129 | return cpufreq_frequency_table_cpuinfo(policy, cbe_freqs); |
| 264 | } | 130 | } |
| 265 | 131 | ||
| 266 | static int cbe_cpufreq_cpu_exit(struct cpufreq_policy *policy) | 132 | static int cbe_cpufreq_cpu_exit(struct cpufreq_policy *policy) |
| 267 | { | 133 | { |
| 268 | if (pmi_dev) | ||
| 269 | cpufreq_unregister_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER); | ||
| 270 | |||
| 271 | cpufreq_frequency_table_put_attr(policy->cpu); | 134 | cpufreq_frequency_table_put_attr(policy->cpu); |
| 272 | return 0; | 135 | return 0; |
| 273 | } | 136 | } |
| @@ -277,13 +140,13 @@ static int cbe_cpufreq_verify(struct cpufreq_policy *policy) | |||
| 277 | return cpufreq_frequency_table_verify(policy, cbe_freqs); | 140 | return cpufreq_frequency_table_verify(policy, cbe_freqs); |
| 278 | } | 141 | } |
| 279 | 142 | ||
| 280 | 143 | static int cbe_cpufreq_target(struct cpufreq_policy *policy, | |
| 281 | static int cbe_cpufreq_target(struct cpufreq_policy *policy, unsigned int target_freq, | 144 | unsigned int target_freq, |
| 282 | unsigned int relation) | 145 | unsigned int relation) |
| 283 | { | 146 | { |
| 284 | int rc; | 147 | int rc; |
| 285 | struct cpufreq_freqs freqs; | 148 | struct cpufreq_freqs freqs; |
| 286 | int cbe_pmode_new; | 149 | unsigned int cbe_pmode_new; |
| 287 | 150 | ||
| 288 | cpufreq_frequency_table_target(policy, | 151 | cpufreq_frequency_table_target(policy, |
| 289 | cbe_freqs, | 152 | cbe_freqs, |
| @@ -298,12 +161,14 @@ static int cbe_cpufreq_target(struct cpufreq_policy *policy, unsigned int target | |||
| 298 | mutex_lock(&cbe_switch_mutex); | 161 | mutex_lock(&cbe_switch_mutex); |
| 299 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 162 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
| 300 | 163 | ||
| 301 | pr_debug("setting frequency for cpu %d to %d kHz, 1/%d of max frequency\n", | 164 | pr_debug("setting frequency for cpu %d to %d kHz, " \ |
| 165 | "1/%d of max frequency\n", | ||
| 302 | policy->cpu, | 166 | policy->cpu, |
| 303 | cbe_freqs[cbe_pmode_new].frequency, | 167 | cbe_freqs[cbe_pmode_new].frequency, |
| 304 | cbe_freqs[cbe_pmode_new].index); | 168 | cbe_freqs[cbe_pmode_new].index); |
| 305 | 169 | ||
| 306 | rc = set_pmode(policy->cpu, cbe_pmode_new); | 170 | rc = set_pmode(policy->cpu, cbe_pmode_new); |
| 171 | |||
| 307 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 172 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
| 308 | mutex_unlock(&cbe_switch_mutex); | 173 | mutex_unlock(&cbe_switch_mutex); |
| 309 | 174 | ||
| @@ -326,28 +191,14 @@ static struct cpufreq_driver cbe_cpufreq_driver = { | |||
| 326 | 191 | ||
| 327 | static int __init cbe_cpufreq_init(void) | 192 | static int __init cbe_cpufreq_init(void) |
| 328 | { | 193 | { |
| 329 | #ifdef CONFIG_PPC_PMI | ||
| 330 | struct device_node *np; | ||
| 331 | #endif | ||
| 332 | if (!machine_is(cell)) | 194 | if (!machine_is(cell)) |
| 333 | return -ENODEV; | 195 | return -ENODEV; |
| 334 | #ifdef CONFIG_PPC_PMI | ||
| 335 | np = of_find_node_by_type(NULL, "ibm,pmi"); | ||
| 336 | |||
| 337 | pmi_dev = of_find_device_by_node(np); | ||
| 338 | 196 | ||
| 339 | if (pmi_dev) | ||
| 340 | pmi_register_handler(pmi_dev, &cbe_pmi_handler); | ||
| 341 | #endif | ||
| 342 | return cpufreq_register_driver(&cbe_cpufreq_driver); | 197 | return cpufreq_register_driver(&cbe_cpufreq_driver); |
| 343 | } | 198 | } |
| 344 | 199 | ||
| 345 | static void __exit cbe_cpufreq_exit(void) | 200 | static void __exit cbe_cpufreq_exit(void) |
| 346 | { | 201 | { |
| 347 | #ifdef CONFIG_PPC_PMI | ||
| 348 | if (pmi_dev) | ||
| 349 | pmi_unregister_handler(pmi_dev, &cbe_pmi_handler); | ||
| 350 | #endif | ||
| 351 | cpufreq_unregister_driver(&cbe_cpufreq_driver); | 202 | cpufreq_unregister_driver(&cbe_cpufreq_driver); |
| 352 | } | 203 | } |
| 353 | 204 | ||
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq.h b/arch/powerpc/platforms/cell/cbe_cpufreq.h new file mode 100644 index 000000000000..c1d86bfa92ff --- /dev/null +++ b/arch/powerpc/platforms/cell/cbe_cpufreq.h | |||
| @@ -0,0 +1,24 @@ | |||
| 1 | /* | ||
| 2 | * cbe_cpufreq.h | ||
| 3 | * | ||
| 4 | * This file contains the definitions used by the cbe_cpufreq driver. | ||
| 5 | * | ||
| 6 | * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007 | ||
| 7 | * | ||
| 8 | * Author: Christian Krafft <krafft@de.ibm.com> | ||
| 9 | * | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include <linux/cpufreq.h> | ||
| 13 | #include <linux/types.h> | ||
| 14 | |||
| 15 | int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode); | ||
| 16 | int cbe_cpufreq_get_pmode(int cpu); | ||
| 17 | |||
| 18 | int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode); | ||
| 19 | |||
| 20 | #if defined(CONFIG_CBE_CPUFREQ_PMI) || defined(CONFIG_CBE_CPUFREQ_PMI_MODULE) | ||
| 21 | extern bool cbe_cpufreq_has_pmi; | ||
| 22 | #else | ||
| 23 | #define cbe_cpufreq_has_pmi (0) | ||
| 24 | #endif | ||
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c b/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c new file mode 100644 index 000000000000..163263b3e1cd --- /dev/null +++ b/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c | |||
| @@ -0,0 +1,115 @@ | |||
| 1 | /* | ||
| 2 | * pervasive backend for the cbe_cpufreq driver | ||
| 3 | * | ||
| 4 | * This driver makes use of the pervasive unit to | ||
| 5 | * engage the desired frequency. | ||
| 6 | * | ||
| 7 | * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007 | ||
| 8 | * | ||
| 9 | * Author: Christian Krafft <krafft@de.ibm.com> | ||
| 10 | * | ||
| 11 | * This program is free software; you can redistribute it and/or modify | ||
| 12 | * it under the terms of the GNU General Public License as published by | ||
| 13 | * the Free Software Foundation; either version 2, or (at your option) | ||
| 14 | * any later version. | ||
| 15 | * | ||
| 16 | * This program is distributed in the hope that it will be useful, | ||
| 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 19 | * GNU General Public License for more details. | ||
| 20 | * | ||
| 21 | * You should have received a copy of the GNU General Public License | ||
| 22 | * along with this program; if not, write to the Free Software | ||
| 23 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
| 24 | */ | ||
| 25 | |||
| 26 | #include <linux/io.h> | ||
| 27 | #include <linux/kernel.h> | ||
| 28 | #include <linux/time.h> | ||
| 29 | #include <asm/machdep.h> | ||
| 30 | #include <asm/hw_irq.h> | ||
| 31 | |||
| 32 | #include "cbe_regs.h" | ||
| 33 | #include "cbe_cpufreq.h" | ||
| 34 | |||
| 35 | /* to write to MIC register */ | ||
| 36 | static u64 MIC_Slow_Fast_Timer_table[] = { | ||
| 37 | [0 ... 7] = 0x007fc00000000000ull, | ||
| 38 | }; | ||
| 39 | |||
| 40 | /* more values for the MIC */ | ||
| 41 | static u64 MIC_Slow_Next_Timer_table[] = { | ||
| 42 | 0x0000240000000000ull, | ||
| 43 | 0x0000268000000000ull, | ||
| 44 | 0x000029C000000000ull, | ||
| 45 | 0x00002D0000000000ull, | ||
| 46 | 0x0000300000000000ull, | ||
| 47 | 0x0000334000000000ull, | ||
| 48 | 0x000039C000000000ull, | ||
| 49 | 0x00003FC000000000ull, | ||
| 50 | }; | ||
| 51 | |||
| 52 | |||
| 53 | int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode) | ||
| 54 | { | ||
| 55 | struct cbe_pmd_regs __iomem *pmd_regs; | ||
| 56 | struct cbe_mic_tm_regs __iomem *mic_tm_regs; | ||
| 57 | u64 flags; | ||
| 58 | u64 value; | ||
| 59 | #ifdef DEBUG | ||
| 60 | long time; | ||
| 61 | #endif | ||
| 62 | |||
| 63 | local_irq_save(flags); | ||
| 64 | |||
| 65 | mic_tm_regs = cbe_get_cpu_mic_tm_regs(cpu); | ||
| 66 | pmd_regs = cbe_get_cpu_pmd_regs(cpu); | ||
| 67 | |||
| 68 | #ifdef DEBUG | ||
| 69 | time = jiffies; | ||
| 70 | #endif | ||
| 71 | |||
| 72 | out_be64(&mic_tm_regs->slow_fast_timer_0, MIC_Slow_Fast_Timer_table[pmode]); | ||
| 73 | out_be64(&mic_tm_regs->slow_fast_timer_1, MIC_Slow_Fast_Timer_table[pmode]); | ||
| 74 | |||
| 75 | out_be64(&mic_tm_regs->slow_next_timer_0, MIC_Slow_Next_Timer_table[pmode]); | ||
| 76 | out_be64(&mic_tm_regs->slow_next_timer_1, MIC_Slow_Next_Timer_table[pmode]); | ||
| 77 | |||
| 78 | value = in_be64(&pmd_regs->pmcr); | ||
| 79 | /* set bits to zero */ | ||
| 80 | value &= 0xFFFFFFFFFFFFFFF8ull; | ||
| 81 | /* set bits to next pmode */ | ||
| 82 | value |= pmode; | ||
| 83 | |||
| 84 | out_be64(&pmd_regs->pmcr, value); | ||
| 85 | |||
| 86 | #ifdef DEBUG | ||
| 87 | /* wait until new pmode appears in status register */ | ||
| 88 | value = in_be64(&pmd_regs->pmsr) & 0x07; | ||
| 89 | while (value != pmode) { | ||
| 90 | cpu_relax(); | ||
| 91 | value = in_be64(&pmd_regs->pmsr) & 0x07; | ||
| 92 | } | ||
| 93 | |||
| 94 | time = jiffies - time; | ||
| 95 | time = jiffies_to_msecs(time); | ||
| 96 | pr_debug("had to wait %lu ms for a transition using " \ | ||
| 97 | "pervasive unit\n", time); | ||
| 98 | #endif | ||
| 99 | local_irq_restore(flags); | ||
| 100 | |||
| 101 | return 0; | ||
| 102 | } | ||
| 103 | |||
| 104 | |||
| 105 | int cbe_cpufreq_get_pmode(int cpu) | ||
| 106 | { | ||
| 107 | int ret; | ||
| 108 | struct cbe_pmd_regs __iomem *pmd_regs; | ||
| 109 | |||
| 110 | pmd_regs = cbe_get_cpu_pmd_regs(cpu); | ||
| 111 | ret = in_be64(&pmd_regs->pmsr) & 0x07; | ||
| 112 | |||
| 113 | return ret; | ||
| 114 | } | ||
| 115 | |||
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c b/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c new file mode 100644 index 000000000000..fc6f38982ff4 --- /dev/null +++ b/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c | |||
| @@ -0,0 +1,148 @@ | |||
| 1 | /* | ||
| 2 | * pmi backend for the cbe_cpufreq driver | ||
| 3 | * | ||
| 4 | * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007 | ||
| 5 | * | ||
| 6 | * Author: Christian Krafft <krafft@de.ibm.com> | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or modify | ||
| 9 | * it under the terms of the GNU General Public License as published by | ||
| 10 | * the Free Software Foundation; either version 2, or (at your option) | ||
| 11 | * any later version. | ||
| 12 | * | ||
| 13 | * This program is distributed in the hope that it will be useful, | ||
| 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 16 | * GNU General Public License for more details. | ||
| 17 | * | ||
| 18 | * You should have received a copy of the GNU General Public License | ||
| 19 | * along with this program; if not, write to the Free Software | ||
| 20 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
| 21 | */ | ||
| 22 | |||
| 23 | #include <linux/kernel.h> | ||
| 24 | #include <linux/types.h> | ||
| 25 | #include <linux/timer.h> | ||
| 26 | #include <asm/of_platform.h> | ||
| 27 | #include <asm/processor.h> | ||
| 28 | #include <asm/prom.h> | ||
| 29 | #include <asm/pmi.h> | ||
| 30 | |||
| 31 | #ifdef DEBUG | ||
| 32 | #include <asm/time.h> | ||
| 33 | #endif | ||
| 34 | |||
| 35 | #include "cbe_regs.h" | ||
| 36 | #include "cbe_cpufreq.h" | ||
| 37 | |||
| 38 | static u8 pmi_slow_mode_limit[MAX_CBE]; | ||
| 39 | |||
| 40 | bool cbe_cpufreq_has_pmi = false; | ||
| 41 | EXPORT_SYMBOL_GPL(cbe_cpufreq_has_pmi); | ||
| 42 | |||
| 43 | /* | ||
| 44 | * hardware specific functions | ||
| 45 | */ | ||
| 46 | |||
| 47 | int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode) | ||
| 48 | { | ||
| 49 | int ret; | ||
| 50 | pmi_message_t pmi_msg; | ||
| 51 | #ifdef DEBUG | ||
| 52 | long time; | ||
| 53 | #endif | ||
| 54 | pmi_msg.type = PMI_TYPE_FREQ_CHANGE; | ||
| 55 | pmi_msg.data1 = cbe_cpu_to_node(cpu); | ||
| 56 | pmi_msg.data2 = pmode; | ||
| 57 | |||
| 58 | #ifdef DEBUG | ||
| 59 | time = jiffies; | ||
| 60 | #endif | ||
| 61 | pmi_send_message(pmi_msg); | ||
| 62 | |||
| 63 | #ifdef DEBUG | ||
| 64 | time = jiffies - time; | ||
| 65 | time = jiffies_to_msecs(time); | ||
| 66 | pr_debug("had to wait %lu ms for a transition using " \ | ||
| 67 | "PMI\n", time); | ||
| 68 | #endif | ||
| 69 | ret = pmi_msg.data2; | ||
| 70 | pr_debug("PMI returned slow mode %d\n", ret); | ||
| 71 | |||
| 72 | return ret; | ||
| 73 | } | ||
| 74 | EXPORT_SYMBOL_GPL(cbe_cpufreq_set_pmode_pmi); | ||
| 75 | |||
| 76 | |||
| 77 | static void cbe_cpufreq_handle_pmi(pmi_message_t pmi_msg) | ||
| 78 | { | ||
| 79 | u8 node, slow_mode; | ||
| 80 | |||
| 81 | BUG_ON(pmi_msg.type != PMI_TYPE_FREQ_CHANGE); | ||
| 82 | |||
| 83 | node = pmi_msg.data1; | ||
| 84 | slow_mode = pmi_msg.data2; | ||
| 85 | |||
| 86 | pmi_slow_mode_limit[node] = slow_mode; | ||
| 87 | |||
| 88 | pr_debug("cbe_handle_pmi: node: %d max_freq: %d\n", node, slow_mode); | ||
| 89 | } | ||
| 90 | |||
| 91 | static int pmi_notifier(struct notifier_block *nb, | ||
| 92 | unsigned long event, void *data) | ||
| 93 | { | ||
| 94 | struct cpufreq_policy *policy = data; | ||
| 95 | struct cpufreq_frequency_table *cbe_freqs; | ||
| 96 | u8 node; | ||
| 97 | |||
| 98 | cbe_freqs = cpufreq_frequency_get_table(policy->cpu); | ||
| 99 | node = cbe_cpu_to_node(policy->cpu); | ||
| 100 | |||
| 101 | pr_debug("got notified, event=%lu, node=%u\n", event, node); | ||
| 102 | |||
| 103 | if (pmi_slow_mode_limit[node] != 0) { | ||
| 104 | pr_debug("limiting node %d to slow mode %d\n", | ||
| 105 | node, pmi_slow_mode_limit[node]); | ||
| 106 | |||
| 107 | cpufreq_verify_within_limits(policy, 0, | ||
| 108 | |||
| 109 | cbe_freqs[pmi_slow_mode_limit[node]].frequency); | ||
| 110 | } | ||
| 111 | |||
| 112 | return 0; | ||
| 113 | } | ||
| 114 | |||
| 115 | static struct notifier_block pmi_notifier_block = { | ||
| 116 | .notifier_call = pmi_notifier, | ||
| 117 | }; | ||
| 118 | |||
| 119 | static struct pmi_handler cbe_pmi_handler = { | ||
| 120 | .type = PMI_TYPE_FREQ_CHANGE, | ||
| 121 | .handle_pmi_message = cbe_cpufreq_handle_pmi, | ||
| 122 | }; | ||
| 123 | |||
| 124 | |||
| 125 | |||
| 126 | static int __init cbe_cpufreq_pmi_init(void) | ||
| 127 | { | ||
| 128 | cbe_cpufreq_has_pmi = pmi_register_handler(&cbe_pmi_handler) == 0; | ||
| 129 | |||
| 130 | if (!cbe_cpufreq_has_pmi) | ||
| 131 | return -ENODEV; | ||
| 132 | |||
| 133 | cpufreq_register_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER); | ||
| 134 | |||
| 135 | return 0; | ||
| 136 | } | ||
| 137 | |||
| 138 | static void __exit cbe_cpufreq_pmi_exit(void) | ||
| 139 | { | ||
| 140 | cpufreq_unregister_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER); | ||
| 141 | pmi_unregister_handler(&cbe_pmi_handler); | ||
| 142 | } | ||
| 143 | |||
| 144 | module_init(cbe_cpufreq_pmi_init); | ||
| 145 | module_exit(cbe_cpufreq_pmi_exit); | ||
| 146 | |||
| 147 | MODULE_LICENSE("GPL"); | ||
| 148 | MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>"); | ||
diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c index 12c9674b4b1f..c8f7f0007422 100644 --- a/arch/powerpc/platforms/cell/cbe_regs.c +++ b/arch/powerpc/platforms/cell/cbe_regs.c | |||
| @@ -174,6 +174,13 @@ static struct device_node *cbe_get_be_node(int cpu_id) | |||
| 174 | 174 | ||
| 175 | cpu_handle = of_get_property(np, "cpus", &len); | 175 | cpu_handle = of_get_property(np, "cpus", &len); |
| 176 | 176 | ||
| 177 | /* | ||
| 178 | * the CAB SLOF tree is non compliant, so we just assume | ||
| 179 | * there is only one node | ||
| 180 | */ | ||
| 181 | if (WARN_ON_ONCE(!cpu_handle)) | ||
| 182 | return np; | ||
| 183 | |||
| 177 | for (i=0; i<len; i++) | 184 | for (i=0; i<len; i++) |
| 178 | if (of_find_node_by_phandle(cpu_handle[i]) == of_get_cpu_node(cpu_id, NULL)) | 185 | if (of_find_node_by_phandle(cpu_handle[i]) == of_get_cpu_node(cpu_id, NULL)) |
| 179 | return np; | 186 | return np; |
diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c index f370f0fa6f4c..e4132f8f51b3 100644 --- a/arch/powerpc/platforms/cell/cbe_thermal.c +++ b/arch/powerpc/platforms/cell/cbe_thermal.c | |||
| @@ -292,7 +292,7 @@ static struct attribute_group ppe_attribute_group = { | |||
| 292 | /* | 292 | /* |
| 293 | * initialize throttling with default values | 293 | * initialize throttling with default values |
| 294 | */ | 294 | */ |
| 295 | static void __init init_default_values(void) | 295 | static int __init init_default_values(void) |
| 296 | { | 296 | { |
| 297 | int cpu; | 297 | int cpu; |
| 298 | struct cbe_pmd_regs __iomem *pmd_regs; | 298 | struct cbe_pmd_regs __iomem *pmd_regs; |
| @@ -339,25 +339,40 @@ static void __init init_default_values(void) | |||
| 339 | for_each_possible_cpu (cpu) { | 339 | for_each_possible_cpu (cpu) { |
| 340 | pr_debug("processing cpu %d\n", cpu); | 340 | pr_debug("processing cpu %d\n", cpu); |
| 341 | sysdev = get_cpu_sysdev(cpu); | 341 | sysdev = get_cpu_sysdev(cpu); |
| 342 | |||
| 343 | if (!sysdev) { | ||
| 344 | pr_info("invalid sysdev pointer for cbe_thermal\n"); | ||
| 345 | return -EINVAL; | ||
| 346 | } | ||
| 347 | |||
| 342 | pmd_regs = cbe_get_cpu_pmd_regs(sysdev->id); | 348 | pmd_regs = cbe_get_cpu_pmd_regs(sysdev->id); |
| 343 | 349 | ||
| 350 | if (!pmd_regs) { | ||
| 351 | pr_info("invalid CBE regs pointer for cbe_thermal\n"); | ||
| 352 | return -EINVAL; | ||
| 353 | } | ||
| 354 | |||
| 344 | out_be64(&pmd_regs->tm_str2, str2); | 355 | out_be64(&pmd_regs->tm_str2, str2); |
| 345 | out_be64(&pmd_regs->tm_str1.val, str1.val); | 356 | out_be64(&pmd_regs->tm_str1.val, str1.val); |
| 346 | out_be64(&pmd_regs->tm_tpr.val, tpr.val); | 357 | out_be64(&pmd_regs->tm_tpr.val, tpr.val); |
| 347 | out_be64(&pmd_regs->tm_cr1.val, cr1.val); | 358 | out_be64(&pmd_regs->tm_cr1.val, cr1.val); |
| 348 | out_be64(&pmd_regs->tm_cr2, cr2); | 359 | out_be64(&pmd_regs->tm_cr2, cr2); |
| 349 | } | 360 | } |
| 361 | |||
| 362 | return 0; | ||
| 350 | } | 363 | } |
| 351 | 364 | ||
| 352 | 365 | ||
| 353 | static int __init thermal_init(void) | 366 | static int __init thermal_init(void) |
| 354 | { | 367 | { |
| 355 | init_default_values(); | 368 | int rc = init_default_values(); |
| 356 | 369 | ||
| 357 | spu_add_sysdev_attr_group(&spu_attribute_group); | 370 | if (rc == 0) { |
| 358 | cpu_add_sysdev_attr_group(&ppe_attribute_group); | 371 | spu_add_sysdev_attr_group(&spu_attribute_group); |
| 372 | cpu_add_sysdev_attr_group(&ppe_attribute_group); | ||
| 373 | } | ||
| 359 | 374 | ||
| 360 | return 0; | 375 | return rc; |
| 361 | } | 376 | } |
| 362 | module_init(thermal_init); | 377 | module_init(thermal_init); |
| 363 | 378 | ||
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index 96a8f609690c..90124228b8f4 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c | |||
| @@ -35,18 +35,37 @@ | |||
| 35 | #include <asm/spu.h> | 35 | #include <asm/spu.h> |
| 36 | #include <asm/spu_priv1.h> | 36 | #include <asm/spu_priv1.h> |
| 37 | #include <asm/xmon.h> | 37 | #include <asm/xmon.h> |
| 38 | #include <asm/prom.h> | ||
| 39 | #include "spu_priv1_mmio.h" | ||
| 38 | 40 | ||
| 39 | const struct spu_management_ops *spu_management_ops; | 41 | const struct spu_management_ops *spu_management_ops; |
| 40 | EXPORT_SYMBOL_GPL(spu_management_ops); | 42 | EXPORT_SYMBOL_GPL(spu_management_ops); |
| 41 | 43 | ||
| 42 | const struct spu_priv1_ops *spu_priv1_ops; | 44 | const struct spu_priv1_ops *spu_priv1_ops; |
| 45 | EXPORT_SYMBOL_GPL(spu_priv1_ops); | ||
| 43 | 46 | ||
| 44 | static struct list_head spu_list[MAX_NUMNODES]; | 47 | struct cbe_spu_info cbe_spu_info[MAX_NUMNODES]; |
| 45 | static LIST_HEAD(spu_full_list); | 48 | EXPORT_SYMBOL_GPL(cbe_spu_info); |
| 46 | static DEFINE_MUTEX(spu_mutex); | ||
| 47 | static DEFINE_SPINLOCK(spu_list_lock); | ||
| 48 | 49 | ||
| 49 | EXPORT_SYMBOL_GPL(spu_priv1_ops); | 50 | /* |
| 51 | * Protects cbe_spu_info and spu->number. | ||
| 52 | */ | ||
| 53 | static DEFINE_SPINLOCK(spu_lock); | ||
| 54 | |||
| 55 | /* | ||
| 56 | * List of all spus in the system. | ||
| 57 | * | ||
| 58 | * This list is iterated by callers from irq context and callers that | ||
| 59 | * want to sleep. Thus modifications need to be done with both | ||
| 60 | * spu_full_list_lock and spu_full_list_mutex held, while iterating | ||
| 61 | * through it requires either of these locks. | ||
| 62 | * | ||
| 63 | * In addition spu_full_list_lock protects all assignmens to | ||
| 64 | * spu->mm. | ||
| 65 | */ | ||
| 66 | static LIST_HEAD(spu_full_list); | ||
| 67 | static DEFINE_SPINLOCK(spu_full_list_lock); | ||
| 68 | static DEFINE_MUTEX(spu_full_list_mutex); | ||
| 50 | 69 | ||
| 51 | void spu_invalidate_slbs(struct spu *spu) | 70 | void spu_invalidate_slbs(struct spu *spu) |
| 52 | { | 71 | { |
| @@ -65,12 +84,12 @@ void spu_flush_all_slbs(struct mm_struct *mm) | |||
| 65 | struct spu *spu; | 84 | struct spu *spu; |
| 66 | unsigned long flags; | 85 | unsigned long flags; |
| 67 | 86 | ||
| 68 | spin_lock_irqsave(&spu_list_lock, flags); | 87 | spin_lock_irqsave(&spu_full_list_lock, flags); |
| 69 | list_for_each_entry(spu, &spu_full_list, full_list) { | 88 | list_for_each_entry(spu, &spu_full_list, full_list) { |
| 70 | if (spu->mm == mm) | 89 | if (spu->mm == mm) |
| 71 | spu_invalidate_slbs(spu); | 90 | spu_invalidate_slbs(spu); |
| 72 | } | 91 | } |
| 73 | spin_unlock_irqrestore(&spu_list_lock, flags); | 92 | spin_unlock_irqrestore(&spu_full_list_lock, flags); |
| 74 | } | 93 | } |
| 75 | 94 | ||
| 76 | /* The hack below stinks... try to do something better one of | 95 | /* The hack below stinks... try to do something better one of |
| @@ -88,9 +107,9 @@ void spu_associate_mm(struct spu *spu, struct mm_struct *mm) | |||
| 88 | { | 107 | { |
| 89 | unsigned long flags; | 108 | unsigned long flags; |
| 90 | 109 | ||
| 91 | spin_lock_irqsave(&spu_list_lock, flags); | 110 | spin_lock_irqsave(&spu_full_list_lock, flags); |
| 92 | spu->mm = mm; | 111 | spu->mm = mm; |
| 93 | spin_unlock_irqrestore(&spu_list_lock, flags); | 112 | spin_unlock_irqrestore(&spu_full_list_lock, flags); |
| 94 | if (mm) | 113 | if (mm) |
| 95 | mm_needs_global_tlbie(mm); | 114 | mm_needs_global_tlbie(mm); |
| 96 | } | 115 | } |
| @@ -390,7 +409,7 @@ static void spu_free_irqs(struct spu *spu) | |||
| 390 | free_irq(spu->irqs[2], spu); | 409 | free_irq(spu->irqs[2], spu); |
| 391 | } | 410 | } |
| 392 | 411 | ||
| 393 | static void spu_init_channels(struct spu *spu) | 412 | void spu_init_channels(struct spu *spu) |
| 394 | { | 413 | { |
| 395 | static const struct { | 414 | static const struct { |
| 396 | unsigned channel; | 415 | unsigned channel; |
| @@ -423,46 +442,7 @@ static void spu_init_channels(struct spu *spu) | |||
| 423 | out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count); | 442 | out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count); |
| 424 | } | 443 | } |
| 425 | } | 444 | } |
| 426 | 445 | EXPORT_SYMBOL_GPL(spu_init_channels); | |
| 427 | struct spu *spu_alloc_node(int node) | ||
| 428 | { | ||
| 429 | struct spu *spu = NULL; | ||
| 430 | |||
| 431 | mutex_lock(&spu_mutex); | ||
| 432 | if (!list_empty(&spu_list[node])) { | ||
| 433 | spu = list_entry(spu_list[node].next, struct spu, list); | ||
| 434 | list_del_init(&spu->list); | ||
| 435 | pr_debug("Got SPU %d %d\n", spu->number, spu->node); | ||
| 436 | } | ||
| 437 | mutex_unlock(&spu_mutex); | ||
| 438 | |||
| 439 | if (spu) | ||
| 440 | spu_init_channels(spu); | ||
| 441 | return spu; | ||
| 442 | } | ||
| 443 | EXPORT_SYMBOL_GPL(spu_alloc_node); | ||
| 444 | |||
| 445 | struct spu *spu_alloc(void) | ||
| 446 | { | ||
| 447 | struct spu *spu = NULL; | ||
| 448 | int node; | ||
| 449 | |||
| 450 | for (node = 0; node < MAX_NUMNODES; node++) { | ||
| 451 | spu = spu_alloc_node(node); | ||
| 452 | if (spu) | ||
| 453 | break; | ||
| 454 | } | ||
| 455 | |||
| 456 | return spu; | ||
| 457 | } | ||
| 458 | |||
| 459 | void spu_free(struct spu *spu) | ||
| 460 | { | ||
| 461 | mutex_lock(&spu_mutex); | ||
| 462 | list_add_tail(&spu->list, &spu_list[spu->node]); | ||
| 463 | mutex_unlock(&spu_mutex); | ||
| 464 | } | ||
| 465 | EXPORT_SYMBOL_GPL(spu_free); | ||
| 466 | 446 | ||
| 467 | static int spu_shutdown(struct sys_device *sysdev) | 447 | static int spu_shutdown(struct sys_device *sysdev) |
| 468 | { | 448 | { |
| @@ -481,12 +461,12 @@ struct sysdev_class spu_sysdev_class = { | |||
| 481 | int spu_add_sysdev_attr(struct sysdev_attribute *attr) | 461 | int spu_add_sysdev_attr(struct sysdev_attribute *attr) |
| 482 | { | 462 | { |
| 483 | struct spu *spu; | 463 | struct spu *spu; |
| 484 | mutex_lock(&spu_mutex); | ||
| 485 | 464 | ||
| 465 | mutex_lock(&spu_full_list_mutex); | ||
| 486 | list_for_each_entry(spu, &spu_full_list, full_list) | 466 | list_for_each_entry(spu, &spu_full_list, full_list) |
| 487 | sysdev_create_file(&spu->sysdev, attr); | 467 | sysdev_create_file(&spu->sysdev, attr); |
| 468 | mutex_unlock(&spu_full_list_mutex); | ||
| 488 | 469 | ||
| 489 | mutex_unlock(&spu_mutex); | ||
| 490 | return 0; | 470 | return 0; |
| 491 | } | 471 | } |
| 492 | EXPORT_SYMBOL_GPL(spu_add_sysdev_attr); | 472 | EXPORT_SYMBOL_GPL(spu_add_sysdev_attr); |
| @@ -494,12 +474,12 @@ EXPORT_SYMBOL_GPL(spu_add_sysdev_attr); | |||
| 494 | int spu_add_sysdev_attr_group(struct attribute_group *attrs) | 474 | int spu_add_sysdev_attr_group(struct attribute_group *attrs) |
| 495 | { | 475 | { |
| 496 | struct spu *spu; | 476 | struct spu *spu; |
| 497 | mutex_lock(&spu_mutex); | ||
| 498 | 477 | ||
| 478 | mutex_lock(&spu_full_list_mutex); | ||
| 499 | list_for_each_entry(spu, &spu_full_list, full_list) | 479 | list_for_each_entry(spu, &spu_full_list, full_list) |
| 500 | sysfs_create_group(&spu->sysdev.kobj, attrs); | 480 | sysfs_create_group(&spu->sysdev.kobj, attrs); |
| 481 | mutex_unlock(&spu_full_list_mutex); | ||
| 501 | 482 | ||
| 502 | mutex_unlock(&spu_mutex); | ||
| 503 | return 0; | 483 | return 0; |
| 504 | } | 484 | } |
| 505 | EXPORT_SYMBOL_GPL(spu_add_sysdev_attr_group); | 485 | EXPORT_SYMBOL_GPL(spu_add_sysdev_attr_group); |
| @@ -508,24 +488,22 @@ EXPORT_SYMBOL_GPL(spu_add_sysdev_attr_group); | |||
| 508 | void spu_remove_sysdev_attr(struct sysdev_attribute *attr) | 488 | void spu_remove_sysdev_attr(struct sysdev_attribute *attr) |
| 509 | { | 489 | { |
| 510 | struct spu *spu; | 490 | struct spu *spu; |
| 511 | mutex_lock(&spu_mutex); | ||
| 512 | 491 | ||
| 492 | mutex_lock(&spu_full_list_mutex); | ||
| 513 | list_for_each_entry(spu, &spu_full_list, full_list) | 493 | list_for_each_entry(spu, &spu_full_list, full_list) |
| 514 | sysdev_remove_file(&spu->sysdev, attr); | 494 | sysdev_remove_file(&spu->sysdev, attr); |
| 515 | 495 | mutex_unlock(&spu_full_list_mutex); | |
| 516 | mutex_unlock(&spu_mutex); | ||
| 517 | } | 496 | } |
| 518 | EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr); | 497 | EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr); |
| 519 | 498 | ||
| 520 | void spu_remove_sysdev_attr_group(struct attribute_group *attrs) | 499 | void spu_remove_sysdev_attr_group(struct attribute_group *attrs) |
| 521 | { | 500 | { |
| 522 | struct spu *spu; | 501 | struct spu *spu; |
| 523 | mutex_lock(&spu_mutex); | ||
| 524 | 502 | ||
| 503 | mutex_lock(&spu_full_list_mutex); | ||
| 525 | list_for_each_entry(spu, &spu_full_list, full_list) | 504 | list_for_each_entry(spu, &spu_full_list, full_list) |
| 526 | sysfs_remove_group(&spu->sysdev.kobj, attrs); | 505 | sysfs_remove_group(&spu->sysdev.kobj, attrs); |
| 527 | 506 | mutex_unlock(&spu_full_list_mutex); | |
| 528 | mutex_unlock(&spu_mutex); | ||
| 529 | } | 507 | } |
| 530 | EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr_group); | 508 | EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr_group); |
| 531 | 509 | ||
| @@ -553,16 +531,19 @@ static int __init create_spu(void *data) | |||
| 553 | int ret; | 531 | int ret; |
| 554 | static int number; | 532 | static int number; |
| 555 | unsigned long flags; | 533 | unsigned long flags; |
| 534 | struct timespec ts; | ||
| 556 | 535 | ||
| 557 | ret = -ENOMEM; | 536 | ret = -ENOMEM; |
| 558 | spu = kzalloc(sizeof (*spu), GFP_KERNEL); | 537 | spu = kzalloc(sizeof (*spu), GFP_KERNEL); |
| 559 | if (!spu) | 538 | if (!spu) |
| 560 | goto out; | 539 | goto out; |
| 561 | 540 | ||
| 541 | spu->alloc_state = SPU_FREE; | ||
| 542 | |||
| 562 | spin_lock_init(&spu->register_lock); | 543 | spin_lock_init(&spu->register_lock); |
| 563 | mutex_lock(&spu_mutex); | 544 | spin_lock(&spu_lock); |
| 564 | spu->number = number++; | 545 | spu->number = number++; |
| 565 | mutex_unlock(&spu_mutex); | 546 | spin_unlock(&spu_lock); |
| 566 | 547 | ||
| 567 | ret = spu_create_spu(spu, data); | 548 | ret = spu_create_spu(spu, data); |
| 568 | 549 | ||
| @@ -579,15 +560,22 @@ static int __init create_spu(void *data) | |||
| 579 | if (ret) | 560 | if (ret) |
| 580 | goto out_free_irqs; | 561 | goto out_free_irqs; |
| 581 | 562 | ||
| 582 | mutex_lock(&spu_mutex); | 563 | mutex_lock(&cbe_spu_info[spu->node].list_mutex); |
| 583 | spin_lock_irqsave(&spu_list_lock, flags); | 564 | list_add(&spu->cbe_list, &cbe_spu_info[spu->node].spus); |
| 584 | list_add(&spu->list, &spu_list[spu->node]); | 565 | cbe_spu_info[spu->node].n_spus++; |
| 566 | mutex_unlock(&cbe_spu_info[spu->node].list_mutex); | ||
| 567 | |||
| 568 | mutex_lock(&spu_full_list_mutex); | ||
| 569 | spin_lock_irqsave(&spu_full_list_lock, flags); | ||
| 585 | list_add(&spu->full_list, &spu_full_list); | 570 | list_add(&spu->full_list, &spu_full_list); |
| 586 | spin_unlock_irqrestore(&spu_list_lock, flags); | 571 | spin_unlock_irqrestore(&spu_full_list_lock, flags); |
| 587 | mutex_unlock(&spu_mutex); | 572 | mutex_unlock(&spu_full_list_mutex); |
| 573 | |||
| 574 | spu->stats.util_state = SPU_UTIL_IDLE_LOADED; | ||
| 575 | ktime_get_ts(&ts); | ||
| 576 | spu->stats.tstamp = timespec_to_ns(&ts); | ||
| 588 | 577 | ||
| 589 | spu->stats.utilization_state = SPU_UTIL_IDLE; | 578 | INIT_LIST_HEAD(&spu->aff_list); |
| 590 | spu->stats.tstamp = jiffies; | ||
| 591 | 579 | ||
| 592 | goto out; | 580 | goto out; |
| 593 | 581 | ||
| @@ -608,12 +596,20 @@ static const char *spu_state_names[] = { | |||
| 608 | static unsigned long long spu_acct_time(struct spu *spu, | 596 | static unsigned long long spu_acct_time(struct spu *spu, |
| 609 | enum spu_utilization_state state) | 597 | enum spu_utilization_state state) |
| 610 | { | 598 | { |
| 599 | struct timespec ts; | ||
| 611 | unsigned long long time = spu->stats.times[state]; | 600 | unsigned long long time = spu->stats.times[state]; |
| 612 | 601 | ||
| 613 | if (spu->stats.utilization_state == state) | 602 | /* |
| 614 | time += jiffies - spu->stats.tstamp; | 603 | * If the spu is idle or the context is stopped, utilization |
| 604 | * statistics are not updated. Apply the time delta from the | ||
| 605 | * last recorded state of the spu. | ||
| 606 | */ | ||
| 607 | if (spu->stats.util_state == state) { | ||
| 608 | ktime_get_ts(&ts); | ||
| 609 | time += timespec_to_ns(&ts) - spu->stats.tstamp; | ||
| 610 | } | ||
| 615 | 611 | ||
| 616 | return jiffies_to_msecs(time); | 612 | return time / NSEC_PER_MSEC; |
| 617 | } | 613 | } |
| 618 | 614 | ||
| 619 | 615 | ||
| @@ -623,11 +619,11 @@ static ssize_t spu_stat_show(struct sys_device *sysdev, char *buf) | |||
| 623 | 619 | ||
| 624 | return sprintf(buf, "%s %llu %llu %llu %llu " | 620 | return sprintf(buf, "%s %llu %llu %llu %llu " |
| 625 | "%llu %llu %llu %llu %llu %llu %llu %llu\n", | 621 | "%llu %llu %llu %llu %llu %llu %llu %llu\n", |
| 626 | spu_state_names[spu->stats.utilization_state], | 622 | spu_state_names[spu->stats.util_state], |
| 627 | spu_acct_time(spu, SPU_UTIL_USER), | 623 | spu_acct_time(spu, SPU_UTIL_USER), |
| 628 | spu_acct_time(spu, SPU_UTIL_SYSTEM), | 624 | spu_acct_time(spu, SPU_UTIL_SYSTEM), |
| 629 | spu_acct_time(spu, SPU_UTIL_IOWAIT), | 625 | spu_acct_time(spu, SPU_UTIL_IOWAIT), |
| 630 | spu_acct_time(spu, SPU_UTIL_IDLE), | 626 | spu_acct_time(spu, SPU_UTIL_IDLE_LOADED), |
| 631 | spu->stats.vol_ctx_switch, | 627 | spu->stats.vol_ctx_switch, |
| 632 | spu->stats.invol_ctx_switch, | 628 | spu->stats.invol_ctx_switch, |
| 633 | spu->stats.slb_flt, | 629 | spu->stats.slb_flt, |
| @@ -640,12 +636,146 @@ static ssize_t spu_stat_show(struct sys_device *sysdev, char *buf) | |||
| 640 | 636 | ||
| 641 | static SYSDEV_ATTR(stat, 0644, spu_stat_show, NULL); | 637 | static SYSDEV_ATTR(stat, 0644, spu_stat_show, NULL); |
| 642 | 638 | ||
| 639 | /* Hardcoded affinity idxs for QS20 */ | ||
| 640 | #define SPES_PER_BE 8 | ||
| 641 | static int QS20_reg_idxs[SPES_PER_BE] = { 0, 2, 4, 6, 7, 5, 3, 1 }; | ||
| 642 | static int QS20_reg_memory[SPES_PER_BE] = { 1, 1, 0, 0, 0, 0, 0, 0 }; | ||
| 643 | |||
| 644 | static struct spu *spu_lookup_reg(int node, u32 reg) | ||
| 645 | { | ||
| 646 | struct spu *spu; | ||
| 647 | |||
| 648 | list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { | ||
| 649 | if (*(u32 *)get_property(spu_devnode(spu), "reg", NULL) == reg) | ||
| 650 | return spu; | ||
| 651 | } | ||
| 652 | return NULL; | ||
| 653 | } | ||
| 654 | |||
| 655 | static void init_aff_QS20_harcoded(void) | ||
| 656 | { | ||
| 657 | int node, i; | ||
| 658 | struct spu *last_spu, *spu; | ||
| 659 | u32 reg; | ||
| 660 | |||
| 661 | for (node = 0; node < MAX_NUMNODES; node++) { | ||
| 662 | last_spu = NULL; | ||
| 663 | for (i = 0; i < SPES_PER_BE; i++) { | ||
| 664 | reg = QS20_reg_idxs[i]; | ||
| 665 | spu = spu_lookup_reg(node, reg); | ||
| 666 | if (!spu) | ||
| 667 | continue; | ||
| 668 | spu->has_mem_affinity = QS20_reg_memory[reg]; | ||
| 669 | if (last_spu) | ||
| 670 | list_add_tail(&spu->aff_list, | ||
| 671 | &last_spu->aff_list); | ||
| 672 | last_spu = spu; | ||
| 673 | } | ||
| 674 | } | ||
| 675 | } | ||
| 676 | |||
| 677 | static int of_has_vicinity(void) | ||
| 678 | { | ||
| 679 | struct spu* spu; | ||
| 680 | |||
| 681 | spu = list_entry(cbe_spu_info[0].spus.next, struct spu, cbe_list); | ||
| 682 | return of_find_property(spu_devnode(spu), "vicinity", NULL) != NULL; | ||
| 683 | } | ||
| 684 | |||
| 685 | static struct spu *aff_devnode_spu(int cbe, struct device_node *dn) | ||
| 686 | { | ||
| 687 | struct spu *spu; | ||
| 688 | |||
| 689 | list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) | ||
| 690 | if (spu_devnode(spu) == dn) | ||
| 691 | return spu; | ||
| 692 | return NULL; | ||
| 693 | } | ||
| 694 | |||
| 695 | static struct spu * | ||
| 696 | aff_node_next_to(int cbe, struct device_node *target, struct device_node *avoid) | ||
| 697 | { | ||
| 698 | struct spu *spu; | ||
| 699 | const phandle *vic_handles; | ||
| 700 | int lenp, i; | ||
| 701 | |||
| 702 | list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) { | ||
| 703 | if (spu_devnode(spu) == avoid) | ||
| 704 | continue; | ||
| 705 | vic_handles = get_property(spu_devnode(spu), "vicinity", &lenp); | ||
| 706 | for (i=0; i < (lenp / sizeof(phandle)); i++) { | ||
| 707 | if (vic_handles[i] == target->linux_phandle) | ||
| 708 | return spu; | ||
| 709 | } | ||
| 710 | } | ||
| 711 | return NULL; | ||
| 712 | } | ||
| 713 | |||
| 714 | static void init_aff_fw_vicinity_node(int cbe) | ||
| 715 | { | ||
| 716 | struct spu *spu, *last_spu; | ||
| 717 | struct device_node *vic_dn, *last_spu_dn; | ||
| 718 | phandle avoid_ph; | ||
| 719 | const phandle *vic_handles; | ||
| 720 | const char *name; | ||
| 721 | int lenp, i, added, mem_aff; | ||
| 722 | |||
| 723 | last_spu = list_entry(cbe_spu_info[cbe].spus.next, struct spu, cbe_list); | ||
| 724 | avoid_ph = 0; | ||
| 725 | for (added = 1; added < cbe_spu_info[cbe].n_spus; added++) { | ||
| 726 | last_spu_dn = spu_devnode(last_spu); | ||
| 727 | vic_handles = get_property(last_spu_dn, "vicinity", &lenp); | ||
| 728 | |||
| 729 | for (i = 0; i < (lenp / sizeof(phandle)); i++) { | ||
| 730 | if (vic_handles[i] == avoid_ph) | ||
| 731 | continue; | ||
| 732 | |||
| 733 | vic_dn = of_find_node_by_phandle(vic_handles[i]); | ||
| 734 | if (!vic_dn) | ||
| 735 | continue; | ||
| 736 | |||
| 737 | name = get_property(vic_dn, "name", NULL); | ||
| 738 | if (strcmp(name, "spe") == 0) { | ||
| 739 | spu = aff_devnode_spu(cbe, vic_dn); | ||
| 740 | avoid_ph = last_spu_dn->linux_phandle; | ||
| 741 | } | ||
| 742 | else { | ||
| 743 | mem_aff = strcmp(name, "mic-tm") == 0; | ||
| 744 | spu = aff_node_next_to(cbe, vic_dn, last_spu_dn); | ||
| 745 | if (!spu) | ||
| 746 | continue; | ||
| 747 | if (mem_aff) { | ||
| 748 | last_spu->has_mem_affinity = 1; | ||
| 749 | spu->has_mem_affinity = 1; | ||
| 750 | } | ||
| 751 | avoid_ph = vic_dn->linux_phandle; | ||
| 752 | } | ||
| 753 | list_add_tail(&spu->aff_list, &last_spu->aff_list); | ||
| 754 | last_spu = spu; | ||
| 755 | break; | ||
| 756 | } | ||
| 757 | } | ||
| 758 | } | ||
| 759 | |||
| 760 | static void init_aff_fw_vicinity(void) | ||
| 761 | { | ||
| 762 | int cbe; | ||
| 763 | |||
| 764 | /* sets has_mem_affinity for each spu, as long as the | ||
| 765 | * spu->aff_list list, linking each spu to its neighbors | ||
| 766 | */ | ||
| 767 | for (cbe = 0; cbe < MAX_NUMNODES; cbe++) | ||
| 768 | init_aff_fw_vicinity_node(cbe); | ||
| 769 | } | ||
| 770 | |||
| 643 | static int __init init_spu_base(void) | 771 | static int __init init_spu_base(void) |
| 644 | { | 772 | { |
| 645 | int i, ret = 0; | 773 | int i, ret = 0; |
| 646 | 774 | ||
| 647 | for (i = 0; i < MAX_NUMNODES; i++) | 775 | for (i = 0; i < MAX_NUMNODES; i++) { |
| 648 | INIT_LIST_HEAD(&spu_list[i]); | 776 | mutex_init(&cbe_spu_info[i].list_mutex); |
| 777 | INIT_LIST_HEAD(&cbe_spu_info[i].spus); | ||
| 778 | } | ||
| 649 | 779 | ||
| 650 | if (!spu_management_ops) | 780 | if (!spu_management_ops) |
| 651 | goto out; | 781 | goto out; |
| @@ -675,16 +805,25 @@ static int __init init_spu_base(void) | |||
| 675 | fb_append_extra_logo(&logo_spe_clut224, ret); | 805 | fb_append_extra_logo(&logo_spe_clut224, ret); |
| 676 | } | 806 | } |
| 677 | 807 | ||
| 808 | mutex_lock(&spu_full_list_mutex); | ||
| 678 | xmon_register_spus(&spu_full_list); | 809 | xmon_register_spus(&spu_full_list); |
| 679 | 810 | crash_register_spus(&spu_full_list); | |
| 811 | mutex_unlock(&spu_full_list_mutex); | ||
| 680 | spu_add_sysdev_attr(&attr_stat); | 812 | spu_add_sysdev_attr(&attr_stat); |
| 681 | 813 | ||
| 814 | if (of_has_vicinity()) { | ||
| 815 | init_aff_fw_vicinity(); | ||
| 816 | } else { | ||
| 817 | long root = of_get_flat_dt_root(); | ||
| 818 | if (of_flat_dt_is_compatible(root, "IBM,CPBW-1.0")) | ||
| 819 | init_aff_QS20_harcoded(); | ||
| 820 | } | ||
| 821 | |||
| 682 | return 0; | 822 | return 0; |
| 683 | 823 | ||
| 684 | out_unregister_sysdev_class: | 824 | out_unregister_sysdev_class: |
| 685 | sysdev_class_unregister(&spu_sysdev_class); | 825 | sysdev_class_unregister(&spu_sysdev_class); |
| 686 | out: | 826 | out: |
| 687 | |||
| 688 | return ret; | 827 | return ret; |
| 689 | } | 828 | } |
| 690 | module_init(init_spu_base); | 829 | module_init(init_spu_base); |
diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c index 261b507a901a..dd2c6688c8aa 100644 --- a/arch/powerpc/platforms/cell/spu_syscalls.c +++ b/arch/powerpc/platforms/cell/spu_syscalls.c | |||
| @@ -34,14 +34,27 @@ struct spufs_calls spufs_calls = { | |||
| 34 | * this file is not used and the syscalls directly enter the fs code */ | 34 | * this file is not used and the syscalls directly enter the fs code */ |
| 35 | 35 | ||
| 36 | asmlinkage long sys_spu_create(const char __user *name, | 36 | asmlinkage long sys_spu_create(const char __user *name, |
| 37 | unsigned int flags, mode_t mode) | 37 | unsigned int flags, mode_t mode, int neighbor_fd) |
| 38 | { | 38 | { |
| 39 | long ret; | 39 | long ret; |
| 40 | struct module *owner = spufs_calls.owner; | 40 | struct module *owner = spufs_calls.owner; |
| 41 | struct file *neighbor; | ||
| 42 | int fput_needed; | ||
| 41 | 43 | ||
| 42 | ret = -ENOSYS; | 44 | ret = -ENOSYS; |
| 43 | if (owner && try_module_get(owner)) { | 45 | if (owner && try_module_get(owner)) { |
| 44 | ret = spufs_calls.create_thread(name, flags, mode); | 46 | if (flags & SPU_CREATE_AFFINITY_SPU) { |
| 47 | neighbor = fget_light(neighbor_fd, &fput_needed); | ||
| 48 | if (neighbor) { | ||
| 49 | ret = spufs_calls.create_thread(name, flags, | ||
| 50 | mode, neighbor); | ||
| 51 | fput_light(neighbor, fput_needed); | ||
| 52 | } | ||
| 53 | } | ||
| 54 | else { | ||
| 55 | ret = spufs_calls.create_thread(name, flags, | ||
| 56 | mode, NULL); | ||
| 57 | } | ||
| 45 | module_put(owner); | 58 | module_put(owner); |
| 46 | } | 59 | } |
| 47 | return ret; | 60 | return ret; |
diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c index 6d7bd60f5380..6694f86d7000 100644 --- a/arch/powerpc/platforms/cell/spufs/context.c +++ b/arch/powerpc/platforms/cell/spufs/context.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | 22 | ||
| 23 | #include <linux/fs.h> | 23 | #include <linux/fs.h> |
| 24 | #include <linux/mm.h> | 24 | #include <linux/mm.h> |
| 25 | #include <linux/module.h> | ||
| 25 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
| 26 | #include <asm/atomic.h> | 27 | #include <asm/atomic.h> |
| 27 | #include <asm/spu.h> | 28 | #include <asm/spu.h> |
| @@ -55,12 +56,12 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang) | |||
| 55 | ctx->ops = &spu_backing_ops; | 56 | ctx->ops = &spu_backing_ops; |
| 56 | ctx->owner = get_task_mm(current); | 57 | ctx->owner = get_task_mm(current); |
| 57 | INIT_LIST_HEAD(&ctx->rq); | 58 | INIT_LIST_HEAD(&ctx->rq); |
| 59 | INIT_LIST_HEAD(&ctx->aff_list); | ||
| 58 | if (gang) | 60 | if (gang) |
| 59 | spu_gang_add_ctx(gang, ctx); | 61 | spu_gang_add_ctx(gang, ctx); |
| 60 | ctx->cpus_allowed = current->cpus_allowed; | 62 | ctx->cpus_allowed = current->cpus_allowed; |
| 61 | spu_set_timeslice(ctx); | 63 | spu_set_timeslice(ctx); |
| 62 | ctx->stats.execution_state = SPUCTX_UTIL_USER; | 64 | ctx->stats.util_state = SPU_UTIL_IDLE_LOADED; |
| 63 | ctx->stats.tstamp = jiffies; | ||
| 64 | 65 | ||
| 65 | atomic_inc(&nr_spu_contexts); | 66 | atomic_inc(&nr_spu_contexts); |
| 66 | goto out; | 67 | goto out; |
| @@ -81,6 +82,8 @@ void destroy_spu_context(struct kref *kref) | |||
| 81 | spu_fini_csa(&ctx->csa); | 82 | spu_fini_csa(&ctx->csa); |
| 82 | if (ctx->gang) | 83 | if (ctx->gang) |
| 83 | spu_gang_remove_ctx(ctx->gang, ctx); | 84 | spu_gang_remove_ctx(ctx->gang, ctx); |
| 85 | if (ctx->prof_priv_kref) | ||
| 86 | kref_put(ctx->prof_priv_kref, ctx->prof_priv_release); | ||
| 84 | BUG_ON(!list_empty(&ctx->rq)); | 87 | BUG_ON(!list_empty(&ctx->rq)); |
| 85 | atomic_dec(&nr_spu_contexts); | 88 | atomic_dec(&nr_spu_contexts); |
| 86 | kfree(ctx); | 89 | kfree(ctx); |
| @@ -166,6 +169,39 @@ int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags) | |||
| 166 | void spu_acquire_saved(struct spu_context *ctx) | 169 | void spu_acquire_saved(struct spu_context *ctx) |
| 167 | { | 170 | { |
| 168 | spu_acquire(ctx); | 171 | spu_acquire(ctx); |
| 169 | if (ctx->state != SPU_STATE_SAVED) | 172 | if (ctx->state != SPU_STATE_SAVED) { |
| 173 | set_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags); | ||
| 170 | spu_deactivate(ctx); | 174 | spu_deactivate(ctx); |
| 175 | } | ||
| 176 | } | ||
| 177 | |||
| 178 | /** | ||
| 179 | * spu_release_saved - unlock spu context and return it to the runqueue | ||
| 180 | * @ctx: context to unlock | ||
| 181 | */ | ||
| 182 | void spu_release_saved(struct spu_context *ctx) | ||
| 183 | { | ||
| 184 | BUG_ON(ctx->state != SPU_STATE_SAVED); | ||
| 185 | |||
| 186 | if (test_and_clear_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags)) | ||
| 187 | spu_activate(ctx, 0); | ||
| 188 | |||
| 189 | spu_release(ctx); | ||
| 171 | } | 190 | } |
| 191 | |||
| 192 | void spu_set_profile_private_kref(struct spu_context *ctx, | ||
| 193 | struct kref *prof_info_kref, | ||
| 194 | void ( * prof_info_release) (struct kref *kref)) | ||
| 195 | { | ||
| 196 | ctx->prof_priv_kref = prof_info_kref; | ||
| 197 | ctx->prof_priv_release = prof_info_release; | ||
| 198 | } | ||
| 199 | EXPORT_SYMBOL_GPL(spu_set_profile_private_kref); | ||
| 200 | |||
| 201 | void *spu_get_profile_private_kref(struct spu_context *ctx) | ||
| 202 | { | ||
| 203 | return ctx->prof_priv_kref; | ||
| 204 | } | ||
| 205 | EXPORT_SYMBOL_GPL(spu_get_profile_private_kref); | ||
| 206 | |||
| 207 | |||
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index 5d9ad5a0307b..5e31799b1e3f 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c | |||
| @@ -226,7 +226,7 @@ static void spufs_arch_write_notes(struct file *file) | |||
| 226 | spu_acquire_saved(ctx_info->ctx); | 226 | spu_acquire_saved(ctx_info->ctx); |
| 227 | for (j = 0; j < spufs_coredump_num_notes; j++) | 227 | for (j = 0; j < spufs_coredump_num_notes; j++) |
| 228 | spufs_arch_write_note(ctx_info, j, file); | 228 | spufs_arch_write_note(ctx_info, j, file); |
| 229 | spu_release(ctx_info->ctx); | 229 | spu_release_saved(ctx_info->ctx); |
| 230 | list_del(&ctx_info->list); | 230 | list_del(&ctx_info->list); |
| 231 | kfree(ctx_info); | 231 | kfree(ctx_info); |
| 232 | } | 232 | } |
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c index f53a07437472..917eab4be486 100644 --- a/arch/powerpc/platforms/cell/spufs/fault.c +++ b/arch/powerpc/platforms/cell/spufs/fault.c | |||
| @@ -179,16 +179,14 @@ int spufs_handle_class1(struct spu_context *ctx) | |||
| 179 | if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED))) | 179 | if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED))) |
| 180 | return 0; | 180 | return 0; |
| 181 | 181 | ||
| 182 | spuctx_switch_state(ctx, SPUCTX_UTIL_IOWAIT); | 182 | spuctx_switch_state(ctx, SPU_UTIL_IOWAIT); |
| 183 | 183 | ||
| 184 | pr_debug("ctx %p: ea %016lx, dsisr %016lx state %d\n", ctx, ea, | 184 | pr_debug("ctx %p: ea %016lx, dsisr %016lx state %d\n", ctx, ea, |
| 185 | dsisr, ctx->state); | 185 | dsisr, ctx->state); |
| 186 | 186 | ||
| 187 | ctx->stats.hash_flt++; | 187 | ctx->stats.hash_flt++; |
| 188 | if (ctx->state == SPU_STATE_RUNNABLE) { | 188 | if (ctx->state == SPU_STATE_RUNNABLE) |
| 189 | ctx->spu->stats.hash_flt++; | 189 | ctx->spu->stats.hash_flt++; |
| 190 | spu_switch_state(ctx->spu, SPU_UTIL_IOWAIT); | ||
| 191 | } | ||
| 192 | 190 | ||
| 193 | /* we must not hold the lock when entering spu_handle_mm_fault */ | 191 | /* we must not hold the lock when entering spu_handle_mm_fault */ |
| 194 | spu_release(ctx); | 192 | spu_release(ctx); |
| @@ -226,7 +224,7 @@ int spufs_handle_class1(struct spu_context *ctx) | |||
| 226 | } else | 224 | } else |
| 227 | spufs_handle_dma_error(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE); | 225 | spufs_handle_dma_error(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE); |
| 228 | 226 | ||
| 229 | spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM); | 227 | spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); |
| 230 | return ret; | 228 | return ret; |
| 231 | } | 229 | } |
| 232 | EXPORT_SYMBOL_GPL(spufs_handle_class1); | 230 | EXPORT_SYMBOL_GPL(spufs_handle_class1); |
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index c2814ea96af2..7de4e919687b 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c | |||
| @@ -370,7 +370,7 @@ spufs_regs_read(struct file *file, char __user *buffer, | |||
| 370 | 370 | ||
| 371 | spu_acquire_saved(ctx); | 371 | spu_acquire_saved(ctx); |
| 372 | ret = __spufs_regs_read(ctx, buffer, size, pos); | 372 | ret = __spufs_regs_read(ctx, buffer, size, pos); |
| 373 | spu_release(ctx); | 373 | spu_release_saved(ctx); |
| 374 | return ret; | 374 | return ret; |
| 375 | } | 375 | } |
| 376 | 376 | ||
| @@ -392,7 +392,7 @@ spufs_regs_write(struct file *file, const char __user *buffer, | |||
| 392 | ret = copy_from_user(lscsa->gprs + *pos - size, | 392 | ret = copy_from_user(lscsa->gprs + *pos - size, |
| 393 | buffer, size) ? -EFAULT : size; | 393 | buffer, size) ? -EFAULT : size; |
| 394 | 394 | ||
| 395 | spu_release(ctx); | 395 | spu_release_saved(ctx); |
| 396 | return ret; | 396 | return ret; |
| 397 | } | 397 | } |
| 398 | 398 | ||
| @@ -421,7 +421,7 @@ spufs_fpcr_read(struct file *file, char __user * buffer, | |||
| 421 | 421 | ||
| 422 | spu_acquire_saved(ctx); | 422 | spu_acquire_saved(ctx); |
| 423 | ret = __spufs_fpcr_read(ctx, buffer, size, pos); | 423 | ret = __spufs_fpcr_read(ctx, buffer, size, pos); |
| 424 | spu_release(ctx); | 424 | spu_release_saved(ctx); |
| 425 | return ret; | 425 | return ret; |
| 426 | } | 426 | } |
| 427 | 427 | ||
| @@ -443,7 +443,7 @@ spufs_fpcr_write(struct file *file, const char __user * buffer, | |||
| 443 | ret = copy_from_user((char *)&lscsa->fpcr + *pos - size, | 443 | ret = copy_from_user((char *)&lscsa->fpcr + *pos - size, |
| 444 | buffer, size) ? -EFAULT : size; | 444 | buffer, size) ? -EFAULT : size; |
| 445 | 445 | ||
| 446 | spu_release(ctx); | 446 | spu_release_saved(ctx); |
| 447 | return ret; | 447 | return ret; |
| 448 | } | 448 | } |
| 449 | 449 | ||
| @@ -868,7 +868,7 @@ static ssize_t spufs_signal1_read(struct file *file, char __user *buf, | |||
| 868 | 868 | ||
| 869 | spu_acquire_saved(ctx); | 869 | spu_acquire_saved(ctx); |
| 870 | ret = __spufs_signal1_read(ctx, buf, len, pos); | 870 | ret = __spufs_signal1_read(ctx, buf, len, pos); |
| 871 | spu_release(ctx); | 871 | spu_release_saved(ctx); |
| 872 | 872 | ||
| 873 | return ret; | 873 | return ret; |
| 874 | } | 874 | } |
| @@ -934,6 +934,13 @@ static const struct file_operations spufs_signal1_fops = { | |||
| 934 | .mmap = spufs_signal1_mmap, | 934 | .mmap = spufs_signal1_mmap, |
| 935 | }; | 935 | }; |
| 936 | 936 | ||
| 937 | static const struct file_operations spufs_signal1_nosched_fops = { | ||
| 938 | .open = spufs_signal1_open, | ||
| 939 | .release = spufs_signal1_release, | ||
| 940 | .write = spufs_signal1_write, | ||
| 941 | .mmap = spufs_signal1_mmap, | ||
| 942 | }; | ||
| 943 | |||
| 937 | static int spufs_signal2_open(struct inode *inode, struct file *file) | 944 | static int spufs_signal2_open(struct inode *inode, struct file *file) |
| 938 | { | 945 | { |
| 939 | struct spufs_inode_info *i = SPUFS_I(inode); | 946 | struct spufs_inode_info *i = SPUFS_I(inode); |
| @@ -992,7 +999,7 @@ static ssize_t spufs_signal2_read(struct file *file, char __user *buf, | |||
| 992 | 999 | ||
| 993 | spu_acquire_saved(ctx); | 1000 | spu_acquire_saved(ctx); |
| 994 | ret = __spufs_signal2_read(ctx, buf, len, pos); | 1001 | ret = __spufs_signal2_read(ctx, buf, len, pos); |
| 995 | spu_release(ctx); | 1002 | spu_release_saved(ctx); |
| 996 | 1003 | ||
| 997 | return ret; | 1004 | return ret; |
| 998 | } | 1005 | } |
| @@ -1062,6 +1069,13 @@ static const struct file_operations spufs_signal2_fops = { | |||
| 1062 | .mmap = spufs_signal2_mmap, | 1069 | .mmap = spufs_signal2_mmap, |
| 1063 | }; | 1070 | }; |
| 1064 | 1071 | ||
| 1072 | static const struct file_operations spufs_signal2_nosched_fops = { | ||
| 1073 | .open = spufs_signal2_open, | ||
| 1074 | .release = spufs_signal2_release, | ||
| 1075 | .write = spufs_signal2_write, | ||
| 1076 | .mmap = spufs_signal2_mmap, | ||
| 1077 | }; | ||
| 1078 | |||
| 1065 | static void spufs_signal1_type_set(void *data, u64 val) | 1079 | static void spufs_signal1_type_set(void *data, u64 val) |
| 1066 | { | 1080 | { |
| 1067 | struct spu_context *ctx = data; | 1081 | struct spu_context *ctx = data; |
| @@ -1612,7 +1626,7 @@ static void spufs_decr_set(void *data, u64 val) | |||
| 1612 | struct spu_lscsa *lscsa = ctx->csa.lscsa; | 1626 | struct spu_lscsa *lscsa = ctx->csa.lscsa; |
| 1613 | spu_acquire_saved(ctx); | 1627 | spu_acquire_saved(ctx); |
| 1614 | lscsa->decr.slot[0] = (u32) val; | 1628 | lscsa->decr.slot[0] = (u32) val; |
| 1615 | spu_release(ctx); | 1629 | spu_release_saved(ctx); |
| 1616 | } | 1630 | } |
| 1617 | 1631 | ||
| 1618 | static u64 __spufs_decr_get(void *data) | 1632 | static u64 __spufs_decr_get(void *data) |
| @@ -1628,7 +1642,7 @@ static u64 spufs_decr_get(void *data) | |||
| 1628 | u64 ret; | 1642 | u64 ret; |
| 1629 | spu_acquire_saved(ctx); | 1643 | spu_acquire_saved(ctx); |
| 1630 | ret = __spufs_decr_get(data); | 1644 | ret = __spufs_decr_get(data); |
| 1631 | spu_release(ctx); | 1645 | spu_release_saved(ctx); |
| 1632 | return ret; | 1646 | return ret; |
| 1633 | } | 1647 | } |
| 1634 | DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set, | 1648 | DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set, |
| @@ -1637,17 +1651,21 @@ DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set, | |||
| 1637 | static void spufs_decr_status_set(void *data, u64 val) | 1651 | static void spufs_decr_status_set(void *data, u64 val) |
| 1638 | { | 1652 | { |
| 1639 | struct spu_context *ctx = data; | 1653 | struct spu_context *ctx = data; |
| 1640 | struct spu_lscsa *lscsa = ctx->csa.lscsa; | ||
| 1641 | spu_acquire_saved(ctx); | 1654 | spu_acquire_saved(ctx); |
| 1642 | lscsa->decr_status.slot[0] = (u32) val; | 1655 | if (val) |
| 1643 | spu_release(ctx); | 1656 | ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING; |
| 1657 | else | ||
| 1658 | ctx->csa.priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING; | ||
| 1659 | spu_release_saved(ctx); | ||
| 1644 | } | 1660 | } |
| 1645 | 1661 | ||
| 1646 | static u64 __spufs_decr_status_get(void *data) | 1662 | static u64 __spufs_decr_status_get(void *data) |
| 1647 | { | 1663 | { |
| 1648 | struct spu_context *ctx = data; | 1664 | struct spu_context *ctx = data; |
| 1649 | struct spu_lscsa *lscsa = ctx->csa.lscsa; | 1665 | if (ctx->csa.priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING) |
| 1650 | return lscsa->decr_status.slot[0]; | 1666 | return SPU_DECR_STATUS_RUNNING; |
| 1667 | else | ||
| 1668 | return 0; | ||
| 1651 | } | 1669 | } |
| 1652 | 1670 | ||
| 1653 | static u64 spufs_decr_status_get(void *data) | 1671 | static u64 spufs_decr_status_get(void *data) |
| @@ -1656,7 +1674,7 @@ static u64 spufs_decr_status_get(void *data) | |||
| 1656 | u64 ret; | 1674 | u64 ret; |
| 1657 | spu_acquire_saved(ctx); | 1675 | spu_acquire_saved(ctx); |
| 1658 | ret = __spufs_decr_status_get(data); | 1676 | ret = __spufs_decr_status_get(data); |
| 1659 | spu_release(ctx); | 1677 | spu_release_saved(ctx); |
| 1660 | return ret; | 1678 | return ret; |
| 1661 | } | 1679 | } |
| 1662 | DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get, | 1680 | DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get, |
| @@ -1668,7 +1686,7 @@ static void spufs_event_mask_set(void *data, u64 val) | |||
| 1668 | struct spu_lscsa *lscsa = ctx->csa.lscsa; | 1686 | struct spu_lscsa *lscsa = ctx->csa.lscsa; |
| 1669 | spu_acquire_saved(ctx); | 1687 | spu_acquire_saved(ctx); |
| 1670 | lscsa->event_mask.slot[0] = (u32) val; | 1688 | lscsa->event_mask.slot[0] = (u32) val; |
| 1671 | spu_release(ctx); | 1689 | spu_release_saved(ctx); |
| 1672 | } | 1690 | } |
| 1673 | 1691 | ||
| 1674 | static u64 __spufs_event_mask_get(void *data) | 1692 | static u64 __spufs_event_mask_get(void *data) |
| @@ -1684,7 +1702,7 @@ static u64 spufs_event_mask_get(void *data) | |||
| 1684 | u64 ret; | 1702 | u64 ret; |
| 1685 | spu_acquire_saved(ctx); | 1703 | spu_acquire_saved(ctx); |
| 1686 | ret = __spufs_event_mask_get(data); | 1704 | ret = __spufs_event_mask_get(data); |
| 1687 | spu_release(ctx); | 1705 | spu_release_saved(ctx); |
| 1688 | return ret; | 1706 | return ret; |
| 1689 | } | 1707 | } |
| 1690 | DEFINE_SIMPLE_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get, | 1708 | DEFINE_SIMPLE_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get, |
| @@ -1708,7 +1726,7 @@ static u64 spufs_event_status_get(void *data) | |||
| 1708 | 1726 | ||
| 1709 | spu_acquire_saved(ctx); | 1727 | spu_acquire_saved(ctx); |
| 1710 | ret = __spufs_event_status_get(data); | 1728 | ret = __spufs_event_status_get(data); |
| 1711 | spu_release(ctx); | 1729 | spu_release_saved(ctx); |
| 1712 | return ret; | 1730 | return ret; |
| 1713 | } | 1731 | } |
| 1714 | DEFINE_SIMPLE_ATTRIBUTE(spufs_event_status_ops, spufs_event_status_get, | 1732 | DEFINE_SIMPLE_ATTRIBUTE(spufs_event_status_ops, spufs_event_status_get, |
| @@ -1720,7 +1738,7 @@ static void spufs_srr0_set(void *data, u64 val) | |||
| 1720 | struct spu_lscsa *lscsa = ctx->csa.lscsa; | 1738 | struct spu_lscsa *lscsa = ctx->csa.lscsa; |
| 1721 | spu_acquire_saved(ctx); | 1739 | spu_acquire_saved(ctx); |
| 1722 | lscsa->srr0.slot[0] = (u32) val; | 1740 | lscsa->srr0.slot[0] = (u32) val; |
| 1723 | spu_release(ctx); | 1741 | spu_release_saved(ctx); |
| 1724 | } | 1742 | } |
| 1725 | 1743 | ||
| 1726 | static u64 spufs_srr0_get(void *data) | 1744 | static u64 spufs_srr0_get(void *data) |
| @@ -1730,7 +1748,7 @@ static u64 spufs_srr0_get(void *data) | |||
| 1730 | u64 ret; | 1748 | u64 ret; |
| 1731 | spu_acquire_saved(ctx); | 1749 | spu_acquire_saved(ctx); |
| 1732 | ret = lscsa->srr0.slot[0]; | 1750 | ret = lscsa->srr0.slot[0]; |
| 1733 | spu_release(ctx); | 1751 | spu_release_saved(ctx); |
| 1734 | return ret; | 1752 | return ret; |
| 1735 | } | 1753 | } |
| 1736 | DEFINE_SIMPLE_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set, | 1754 | DEFINE_SIMPLE_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set, |
| @@ -1786,7 +1804,7 @@ static u64 spufs_lslr_get(void *data) | |||
| 1786 | 1804 | ||
| 1787 | spu_acquire_saved(ctx); | 1805 | spu_acquire_saved(ctx); |
| 1788 | ret = __spufs_lslr_get(data); | 1806 | ret = __spufs_lslr_get(data); |
| 1789 | spu_release(ctx); | 1807 | spu_release_saved(ctx); |
| 1790 | 1808 | ||
| 1791 | return ret; | 1809 | return ret; |
| 1792 | } | 1810 | } |
| @@ -1850,7 +1868,7 @@ static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf, | |||
| 1850 | spin_lock(&ctx->csa.register_lock); | 1868 | spin_lock(&ctx->csa.register_lock); |
| 1851 | ret = __spufs_mbox_info_read(ctx, buf, len, pos); | 1869 | ret = __spufs_mbox_info_read(ctx, buf, len, pos); |
| 1852 | spin_unlock(&ctx->csa.register_lock); | 1870 | spin_unlock(&ctx->csa.register_lock); |
| 1853 | spu_release(ctx); | 1871 | spu_release_saved(ctx); |
| 1854 | 1872 | ||
| 1855 | return ret; | 1873 | return ret; |
| 1856 | } | 1874 | } |
| @@ -1888,7 +1906,7 @@ static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf, | |||
| 1888 | spin_lock(&ctx->csa.register_lock); | 1906 | spin_lock(&ctx->csa.register_lock); |
| 1889 | ret = __spufs_ibox_info_read(ctx, buf, len, pos); | 1907 | ret = __spufs_ibox_info_read(ctx, buf, len, pos); |
| 1890 | spin_unlock(&ctx->csa.register_lock); | 1908 | spin_unlock(&ctx->csa.register_lock); |
| 1891 | spu_release(ctx); | 1909 | spu_release_saved(ctx); |
| 1892 | 1910 | ||
| 1893 | return ret; | 1911 | return ret; |
| 1894 | } | 1912 | } |
| @@ -1929,7 +1947,7 @@ static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf, | |||
| 1929 | spin_lock(&ctx->csa.register_lock); | 1947 | spin_lock(&ctx->csa.register_lock); |
| 1930 | ret = __spufs_wbox_info_read(ctx, buf, len, pos); | 1948 | ret = __spufs_wbox_info_read(ctx, buf, len, pos); |
| 1931 | spin_unlock(&ctx->csa.register_lock); | 1949 | spin_unlock(&ctx->csa.register_lock); |
| 1932 | spu_release(ctx); | 1950 | spu_release_saved(ctx); |
| 1933 | 1951 | ||
| 1934 | return ret; | 1952 | return ret; |
| 1935 | } | 1953 | } |
| @@ -1979,7 +1997,7 @@ static ssize_t spufs_dma_info_read(struct file *file, char __user *buf, | |||
| 1979 | spin_lock(&ctx->csa.register_lock); | 1997 | spin_lock(&ctx->csa.register_lock); |
| 1980 | ret = __spufs_dma_info_read(ctx, buf, len, pos); | 1998 | ret = __spufs_dma_info_read(ctx, buf, len, pos); |
| 1981 | spin_unlock(&ctx->csa.register_lock); | 1999 | spin_unlock(&ctx->csa.register_lock); |
| 1982 | spu_release(ctx); | 2000 | spu_release_saved(ctx); |
| 1983 | 2001 | ||
| 1984 | return ret; | 2002 | return ret; |
| 1985 | } | 2003 | } |
| @@ -2030,7 +2048,7 @@ static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf, | |||
| 2030 | spin_lock(&ctx->csa.register_lock); | 2048 | spin_lock(&ctx->csa.register_lock); |
| 2031 | ret = __spufs_proxydma_info_read(ctx, buf, len, pos); | 2049 | ret = __spufs_proxydma_info_read(ctx, buf, len, pos); |
| 2032 | spin_unlock(&ctx->csa.register_lock); | 2050 | spin_unlock(&ctx->csa.register_lock); |
| 2033 | spu_release(ctx); | 2051 | spu_release_saved(ctx); |
| 2034 | 2052 | ||
| 2035 | return ret; | 2053 | return ret; |
| 2036 | } | 2054 | } |
| @@ -2065,14 +2083,26 @@ static const char *ctx_state_names[] = { | |||
| 2065 | }; | 2083 | }; |
| 2066 | 2084 | ||
| 2067 | static unsigned long long spufs_acct_time(struct spu_context *ctx, | 2085 | static unsigned long long spufs_acct_time(struct spu_context *ctx, |
| 2068 | enum spuctx_execution_state state) | 2086 | enum spu_utilization_state state) |
| 2069 | { | 2087 | { |
| 2070 | unsigned long time = ctx->stats.times[state]; | 2088 | struct timespec ts; |
| 2089 | unsigned long long time = ctx->stats.times[state]; | ||
| 2071 | 2090 | ||
| 2072 | if (ctx->stats.execution_state == state) | 2091 | /* |
| 2073 | time += jiffies - ctx->stats.tstamp; | 2092 | * In general, utilization statistics are updated by the controlling |
| 2093 | * thread as the spu context moves through various well defined | ||
| 2094 | * state transitions, but if the context is lazily loaded its | ||
| 2095 | * utilization statistics are not updated as the controlling thread | ||
| 2096 | * is not tightly coupled with the execution of the spu context. We | ||
| 2097 | * calculate and apply the time delta from the last recorded state | ||
| 2098 | * of the spu context. | ||
| 2099 | */ | ||
| 2100 | if (ctx->spu && ctx->stats.util_state == state) { | ||
| 2101 | ktime_get_ts(&ts); | ||
| 2102 | time += timespec_to_ns(&ts) - ctx->stats.tstamp; | ||
| 2103 | } | ||
| 2074 | 2104 | ||
| 2075 | return jiffies_to_msecs(time); | 2105 | return time / NSEC_PER_MSEC; |
| 2076 | } | 2106 | } |
| 2077 | 2107 | ||
| 2078 | static unsigned long long spufs_slb_flts(struct spu_context *ctx) | 2108 | static unsigned long long spufs_slb_flts(struct spu_context *ctx) |
| @@ -2107,11 +2137,11 @@ static int spufs_show_stat(struct seq_file *s, void *private) | |||
| 2107 | spu_acquire(ctx); | 2137 | spu_acquire(ctx); |
| 2108 | seq_printf(s, "%s %llu %llu %llu %llu " | 2138 | seq_printf(s, "%s %llu %llu %llu %llu " |
| 2109 | "%llu %llu %llu %llu %llu %llu %llu %llu\n", | 2139 | "%llu %llu %llu %llu %llu %llu %llu %llu\n", |
| 2110 | ctx_state_names[ctx->stats.execution_state], | 2140 | ctx_state_names[ctx->stats.util_state], |
| 2111 | spufs_acct_time(ctx, SPUCTX_UTIL_USER), | 2141 | spufs_acct_time(ctx, SPU_UTIL_USER), |
| 2112 | spufs_acct_time(ctx, SPUCTX_UTIL_SYSTEM), | 2142 | spufs_acct_time(ctx, SPU_UTIL_SYSTEM), |
| 2113 | spufs_acct_time(ctx, SPUCTX_UTIL_IOWAIT), | 2143 | spufs_acct_time(ctx, SPU_UTIL_IOWAIT), |
| 2114 | spufs_acct_time(ctx, SPUCTX_UTIL_LOADED), | 2144 | spufs_acct_time(ctx, SPU_UTIL_IDLE_LOADED), |
| 2115 | ctx->stats.vol_ctx_switch, | 2145 | ctx->stats.vol_ctx_switch, |
| 2116 | ctx->stats.invol_ctx_switch, | 2146 | ctx->stats.invol_ctx_switch, |
| 2117 | spufs_slb_flts(ctx), | 2147 | spufs_slb_flts(ctx), |
| @@ -2184,8 +2214,8 @@ struct tree_descr spufs_dir_nosched_contents[] = { | |||
| 2184 | { "mbox_stat", &spufs_mbox_stat_fops, 0444, }, | 2214 | { "mbox_stat", &spufs_mbox_stat_fops, 0444, }, |
| 2185 | { "ibox_stat", &spufs_ibox_stat_fops, 0444, }, | 2215 | { "ibox_stat", &spufs_ibox_stat_fops, 0444, }, |
| 2186 | { "wbox_stat", &spufs_wbox_stat_fops, 0444, }, | 2216 | { "wbox_stat", &spufs_wbox_stat_fops, 0444, }, |
| 2187 | { "signal1", &spufs_signal1_fops, 0666, }, | 2217 | { "signal1", &spufs_signal1_nosched_fops, 0222, }, |
| 2188 | { "signal2", &spufs_signal2_fops, 0666, }, | 2218 | { "signal2", &spufs_signal2_nosched_fops, 0222, }, |
| 2189 | { "signal1_type", &spufs_signal1_type, 0666, }, | 2219 | { "signal1_type", &spufs_signal1_type, 0666, }, |
| 2190 | { "signal2_type", &spufs_signal2_type, 0666, }, | 2220 | { "signal2_type", &spufs_signal2_type, 0666, }, |
| 2191 | { "mss", &spufs_mss_fops, 0666, }, | 2221 | { "mss", &spufs_mss_fops, 0666, }, |
diff --git a/arch/powerpc/platforms/cell/spufs/gang.c b/arch/powerpc/platforms/cell/spufs/gang.c index 212ea78f9051..71a443253021 100644 --- a/arch/powerpc/platforms/cell/spufs/gang.c +++ b/arch/powerpc/platforms/cell/spufs/gang.c | |||
| @@ -35,7 +35,9 @@ struct spu_gang *alloc_spu_gang(void) | |||
| 35 | 35 | ||
| 36 | kref_init(&gang->kref); | 36 | kref_init(&gang->kref); |
| 37 | mutex_init(&gang->mutex); | 37 | mutex_init(&gang->mutex); |
| 38 | mutex_init(&gang->aff_mutex); | ||
| 38 | INIT_LIST_HEAD(&gang->list); | 39 | INIT_LIST_HEAD(&gang->list); |
| 40 | INIT_LIST_HEAD(&gang->aff_list_head); | ||
| 39 | 41 | ||
| 40 | out: | 42 | out: |
| 41 | return gang; | 43 | return gang; |
| @@ -73,6 +75,10 @@ void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx) | |||
| 73 | { | 75 | { |
| 74 | mutex_lock(&gang->mutex); | 76 | mutex_lock(&gang->mutex); |
| 75 | WARN_ON(ctx->gang != gang); | 77 | WARN_ON(ctx->gang != gang); |
| 78 | if (!list_empty(&ctx->aff_list)) { | ||
| 79 | list_del_init(&ctx->aff_list); | ||
| 80 | gang->aff_flags &= ~AFF_OFFSETS_SET; | ||
| 81 | } | ||
| 76 | list_del_init(&ctx->gang_list); | 82 | list_del_init(&ctx->gang_list); |
| 77 | gang->contexts--; | 83 | gang->contexts--; |
| 78 | mutex_unlock(&gang->mutex); | 84 | mutex_unlock(&gang->mutex); |
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 7eb4d6cbcb74..b3d0dd118dd0 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c | |||
| @@ -316,11 +316,107 @@ out: | |||
| 316 | return ret; | 316 | return ret; |
| 317 | } | 317 | } |
| 318 | 318 | ||
| 319 | static int spufs_create_context(struct inode *inode, | 319 | static struct spu_context * |
| 320 | struct dentry *dentry, | 320 | spufs_assert_affinity(unsigned int flags, struct spu_gang *gang, |
| 321 | struct vfsmount *mnt, int flags, int mode) | 321 | struct file *filp) |
| 322 | { | ||
| 323 | struct spu_context *tmp, *neighbor; | ||
| 324 | int count, node; | ||
| 325 | int aff_supp; | ||
| 326 | |||
| 327 | aff_supp = !list_empty(&(list_entry(cbe_spu_info[0].spus.next, | ||
| 328 | struct spu, cbe_list))->aff_list); | ||
| 329 | |||
| 330 | if (!aff_supp) | ||
| 331 | return ERR_PTR(-EINVAL); | ||
| 332 | |||
| 333 | if (flags & SPU_CREATE_GANG) | ||
| 334 | return ERR_PTR(-EINVAL); | ||
| 335 | |||
| 336 | if (flags & SPU_CREATE_AFFINITY_MEM && | ||
| 337 | gang->aff_ref_ctx && | ||
| 338 | gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM) | ||
| 339 | return ERR_PTR(-EEXIST); | ||
| 340 | |||
| 341 | if (gang->aff_flags & AFF_MERGED) | ||
| 342 | return ERR_PTR(-EBUSY); | ||
| 343 | |||
| 344 | neighbor = NULL; | ||
| 345 | if (flags & SPU_CREATE_AFFINITY_SPU) { | ||
| 346 | if (!filp || filp->f_op != &spufs_context_fops) | ||
| 347 | return ERR_PTR(-EINVAL); | ||
| 348 | |||
| 349 | neighbor = get_spu_context( | ||
| 350 | SPUFS_I(filp->f_dentry->d_inode)->i_ctx); | ||
| 351 | |||
| 352 | if (!list_empty(&neighbor->aff_list) && !(neighbor->aff_head) && | ||
| 353 | !list_is_last(&neighbor->aff_list, &gang->aff_list_head) && | ||
| 354 | !list_entry(neighbor->aff_list.next, struct spu_context, | ||
| 355 | aff_list)->aff_head) | ||
| 356 | return ERR_PTR(-EEXIST); | ||
| 357 | |||
| 358 | if (gang != neighbor->gang) | ||
| 359 | return ERR_PTR(-EINVAL); | ||
| 360 | |||
| 361 | count = 1; | ||
| 362 | list_for_each_entry(tmp, &gang->aff_list_head, aff_list) | ||
| 363 | count++; | ||
| 364 | if (list_empty(&neighbor->aff_list)) | ||
| 365 | count++; | ||
| 366 | |||
| 367 | for (node = 0; node < MAX_NUMNODES; node++) { | ||
| 368 | if ((cbe_spu_info[node].n_spus - atomic_read( | ||
| 369 | &cbe_spu_info[node].reserved_spus)) >= count) | ||
| 370 | break; | ||
| 371 | } | ||
| 372 | |||
| 373 | if (node == MAX_NUMNODES) | ||
| 374 | return ERR_PTR(-EEXIST); | ||
| 375 | } | ||
| 376 | |||
| 377 | return neighbor; | ||
| 378 | } | ||
| 379 | |||
| 380 | static void | ||
| 381 | spufs_set_affinity(unsigned int flags, struct spu_context *ctx, | ||
| 382 | struct spu_context *neighbor) | ||
| 383 | { | ||
| 384 | if (flags & SPU_CREATE_AFFINITY_MEM) | ||
| 385 | ctx->gang->aff_ref_ctx = ctx; | ||
| 386 | |||
| 387 | if (flags & SPU_CREATE_AFFINITY_SPU) { | ||
| 388 | if (list_empty(&neighbor->aff_list)) { | ||
| 389 | list_add_tail(&neighbor->aff_list, | ||
| 390 | &ctx->gang->aff_list_head); | ||
| 391 | neighbor->aff_head = 1; | ||
| 392 | } | ||
| 393 | |||
| 394 | if (list_is_last(&neighbor->aff_list, &ctx->gang->aff_list_head) | ||
| 395 | || list_entry(neighbor->aff_list.next, struct spu_context, | ||
| 396 | aff_list)->aff_head) { | ||
| 397 | list_add(&ctx->aff_list, &neighbor->aff_list); | ||
| 398 | } else { | ||
| 399 | list_add_tail(&ctx->aff_list, &neighbor->aff_list); | ||
| 400 | if (neighbor->aff_head) { | ||
| 401 | neighbor->aff_head = 0; | ||
| 402 | ctx->aff_head = 1; | ||
| 403 | } | ||
| 404 | } | ||
| 405 | |||
| 406 | if (!ctx->gang->aff_ref_ctx) | ||
| 407 | ctx->gang->aff_ref_ctx = ctx; | ||
| 408 | } | ||
| 409 | } | ||
| 410 | |||
| 411 | static int | ||
| 412 | spufs_create_context(struct inode *inode, struct dentry *dentry, | ||
| 413 | struct vfsmount *mnt, int flags, int mode, | ||
| 414 | struct file *aff_filp) | ||
| 322 | { | 415 | { |
| 323 | int ret; | 416 | int ret; |
| 417 | int affinity; | ||
| 418 | struct spu_gang *gang; | ||
| 419 | struct spu_context *neighbor; | ||
| 324 | 420 | ||
| 325 | ret = -EPERM; | 421 | ret = -EPERM; |
| 326 | if ((flags & SPU_CREATE_NOSCHED) && | 422 | if ((flags & SPU_CREATE_NOSCHED) && |
| @@ -336,9 +432,29 @@ static int spufs_create_context(struct inode *inode, | |||
| 336 | if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader) | 432 | if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader) |
| 337 | goto out_unlock; | 433 | goto out_unlock; |
| 338 | 434 | ||
| 435 | gang = NULL; | ||
| 436 | neighbor = NULL; | ||
| 437 | affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU); | ||
| 438 | if (affinity) { | ||
| 439 | gang = SPUFS_I(inode)->i_gang; | ||
| 440 | ret = -EINVAL; | ||
| 441 | if (!gang) | ||
| 442 | goto out_unlock; | ||
| 443 | mutex_lock(&gang->aff_mutex); | ||
| 444 | neighbor = spufs_assert_affinity(flags, gang, aff_filp); | ||
| 445 | if (IS_ERR(neighbor)) { | ||
| 446 | ret = PTR_ERR(neighbor); | ||
| 447 | goto out_aff_unlock; | ||
| 448 | } | ||
| 449 | } | ||
| 450 | |||
| 339 | ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO); | 451 | ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO); |
| 340 | if (ret) | 452 | if (ret) |
| 341 | goto out_unlock; | 453 | goto out_aff_unlock; |
| 454 | |||
| 455 | if (affinity) | ||
| 456 | spufs_set_affinity(flags, SPUFS_I(dentry->d_inode)->i_ctx, | ||
| 457 | neighbor); | ||
| 342 | 458 | ||
| 343 | /* | 459 | /* |
| 344 | * get references for dget and mntget, will be released | 460 | * get references for dget and mntget, will be released |
| @@ -352,6 +468,9 @@ static int spufs_create_context(struct inode *inode, | |||
| 352 | goto out; | 468 | goto out; |
| 353 | } | 469 | } |
| 354 | 470 | ||
| 471 | out_aff_unlock: | ||
| 472 | if (affinity) | ||
| 473 | mutex_unlock(&gang->aff_mutex); | ||
| 355 | out_unlock: | 474 | out_unlock: |
| 356 | mutex_unlock(&inode->i_mutex); | 475 | mutex_unlock(&inode->i_mutex); |
| 357 | out: | 476 | out: |
| @@ -450,7 +569,8 @@ out: | |||
| 450 | 569 | ||
| 451 | static struct file_system_type spufs_type; | 570 | static struct file_system_type spufs_type; |
| 452 | 571 | ||
| 453 | long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode) | 572 | long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode, |
| 573 | struct file *filp) | ||
| 454 | { | 574 | { |
| 455 | struct dentry *dentry; | 575 | struct dentry *dentry; |
| 456 | int ret; | 576 | int ret; |
| @@ -487,7 +607,7 @@ long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode) | |||
| 487 | dentry, nd->mnt, mode); | 607 | dentry, nd->mnt, mode); |
| 488 | else | 608 | else |
| 489 | return spufs_create_context(nd->dentry->d_inode, | 609 | return spufs_create_context(nd->dentry->d_inode, |
| 490 | dentry, nd->mnt, flags, mode); | 610 | dentry, nd->mnt, flags, mode, filp); |
| 491 | 611 | ||
| 492 | out_dput: | 612 | out_dput: |
| 493 | dput(dentry); | 613 | dput(dentry); |
diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c index 58ae13b7de84..0b50fa5cb39d 100644 --- a/arch/powerpc/platforms/cell/spufs/run.c +++ b/arch/powerpc/platforms/cell/spufs/run.c | |||
| @@ -18,15 +18,17 @@ void spufs_stop_callback(struct spu *spu) | |||
| 18 | wake_up_all(&ctx->stop_wq); | 18 | wake_up_all(&ctx->stop_wq); |
| 19 | } | 19 | } |
| 20 | 20 | ||
| 21 | static inline int spu_stopped(struct spu_context *ctx, u32 * stat) | 21 | static inline int spu_stopped(struct spu_context *ctx, u32 *stat) |
| 22 | { | 22 | { |
| 23 | struct spu *spu; | 23 | struct spu *spu; |
| 24 | u64 pte_fault; | 24 | u64 pte_fault; |
| 25 | 25 | ||
| 26 | *stat = ctx->ops->status_read(ctx); | 26 | *stat = ctx->ops->status_read(ctx); |
| 27 | if (ctx->state != SPU_STATE_RUNNABLE) | 27 | |
| 28 | return 1; | ||
| 29 | spu = ctx->spu; | 28 | spu = ctx->spu; |
| 29 | if (ctx->state != SPU_STATE_RUNNABLE || | ||
| 30 | test_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags)) | ||
| 31 | return 1; | ||
| 30 | pte_fault = spu->dsisr & | 32 | pte_fault = spu->dsisr & |
| 31 | (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED); | 33 | (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED); |
| 32 | return (!(*stat & SPU_STATUS_RUNNING) || pte_fault || spu->class_0_pending) ? | 34 | return (!(*stat & SPU_STATUS_RUNNING) || pte_fault || spu->class_0_pending) ? |
| @@ -124,8 +126,10 @@ out: | |||
| 124 | return ret; | 126 | return ret; |
| 125 | } | 127 | } |
| 126 | 128 | ||
| 127 | static int spu_run_init(struct spu_context *ctx, u32 * npc) | 129 | static int spu_run_init(struct spu_context *ctx, u32 *npc) |
| 128 | { | 130 | { |
| 131 | spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); | ||
| 132 | |||
| 129 | if (ctx->flags & SPU_CREATE_ISOLATE) { | 133 | if (ctx->flags & SPU_CREATE_ISOLATE) { |
| 130 | unsigned long runcntl; | 134 | unsigned long runcntl; |
| 131 | 135 | ||
| @@ -151,16 +155,20 @@ static int spu_run_init(struct spu_context *ctx, u32 * npc) | |||
| 151 | ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE); | 155 | ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE); |
| 152 | } | 156 | } |
| 153 | 157 | ||
| 158 | spuctx_switch_state(ctx, SPU_UTIL_USER); | ||
| 159 | |||
| 154 | return 0; | 160 | return 0; |
| 155 | } | 161 | } |
| 156 | 162 | ||
| 157 | static int spu_run_fini(struct spu_context *ctx, u32 * npc, | 163 | static int spu_run_fini(struct spu_context *ctx, u32 *npc, |
| 158 | u32 * status) | 164 | u32 *status) |
| 159 | { | 165 | { |
| 160 | int ret = 0; | 166 | int ret = 0; |
| 161 | 167 | ||
| 162 | *status = ctx->ops->status_read(ctx); | 168 | *status = ctx->ops->status_read(ctx); |
| 163 | *npc = ctx->ops->npc_read(ctx); | 169 | *npc = ctx->ops->npc_read(ctx); |
| 170 | |||
| 171 | spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); | ||
| 164 | spu_release(ctx); | 172 | spu_release(ctx); |
| 165 | 173 | ||
| 166 | if (signal_pending(current)) | 174 | if (signal_pending(current)) |
| @@ -289,10 +297,10 @@ static inline int spu_process_events(struct spu_context *ctx) | |||
| 289 | return ret; | 297 | return ret; |
| 290 | } | 298 | } |
| 291 | 299 | ||
| 292 | long spufs_run_spu(struct file *file, struct spu_context *ctx, | 300 | long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event) |
| 293 | u32 *npc, u32 *event) | ||
| 294 | { | 301 | { |
| 295 | int ret; | 302 | int ret; |
| 303 | struct spu *spu; | ||
| 296 | u32 status; | 304 | u32 status; |
| 297 | 305 | ||
| 298 | if (mutex_lock_interruptible(&ctx->run_mutex)) | 306 | if (mutex_lock_interruptible(&ctx->run_mutex)) |
| @@ -328,6 +336,17 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx, | |||
| 328 | ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status)); | 336 | ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status)); |
| 329 | if (unlikely(ret)) | 337 | if (unlikely(ret)) |
| 330 | break; | 338 | break; |
| 339 | spu = ctx->spu; | ||
| 340 | if (unlikely(test_and_clear_bit(SPU_SCHED_NOTIFY_ACTIVE, | ||
| 341 | &ctx->sched_flags))) { | ||
| 342 | if (!(status & SPU_STATUS_STOPPED_BY_STOP)) { | ||
| 343 | spu_switch_notify(spu, ctx); | ||
| 344 | continue; | ||
| 345 | } | ||
| 346 | } | ||
| 347 | |||
| 348 | spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); | ||
| 349 | |||
| 331 | if ((status & SPU_STATUS_STOPPED_BY_STOP) && | 350 | if ((status & SPU_STATUS_STOPPED_BY_STOP) && |
| 332 | (status >> SPU_STOP_STATUS_SHIFT == 0x2104)) { | 351 | (status >> SPU_STOP_STATUS_SHIFT == 0x2104)) { |
| 333 | ret = spu_process_callback(ctx); | 352 | ret = spu_process_callback(ctx); |
| @@ -356,6 +375,7 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx, | |||
| 356 | (ctx->state == SPU_STATE_RUNNABLE)) | 375 | (ctx->state == SPU_STATE_RUNNABLE)) |
| 357 | ctx->stats.libassist++; | 376 | ctx->stats.libassist++; |
| 358 | 377 | ||
| 378 | |||
| 359 | ctx->ops->master_stop(ctx); | 379 | ctx->ops->master_stop(ctx); |
| 360 | ret = spu_run_fini(ctx, npc, &status); | 380 | ret = spu_run_fini(ctx, npc, &status); |
| 361 | spu_yield(ctx); | 381 | spu_yield(ctx); |
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index e5b4dd1db286..227968b4779d 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c | |||
| @@ -51,9 +51,6 @@ struct spu_prio_array { | |||
| 51 | DECLARE_BITMAP(bitmap, MAX_PRIO); | 51 | DECLARE_BITMAP(bitmap, MAX_PRIO); |
| 52 | struct list_head runq[MAX_PRIO]; | 52 | struct list_head runq[MAX_PRIO]; |
| 53 | spinlock_t runq_lock; | 53 | spinlock_t runq_lock; |
| 54 | struct list_head active_list[MAX_NUMNODES]; | ||
| 55 | struct mutex active_mutex[MAX_NUMNODES]; | ||
| 56 | int nr_active[MAX_NUMNODES]; | ||
| 57 | int nr_waiting; | 54 | int nr_waiting; |
| 58 | }; | 55 | }; |
| 59 | 56 | ||
| @@ -127,7 +124,7 @@ void __spu_update_sched_info(struct spu_context *ctx) | |||
| 127 | ctx->policy = current->policy; | 124 | ctx->policy = current->policy; |
| 128 | 125 | ||
| 129 | /* | 126 | /* |
| 130 | * A lot of places that don't hold active_mutex poke into | 127 | * A lot of places that don't hold list_mutex poke into |
| 131 | * cpus_allowed, including grab_runnable_context which | 128 | * cpus_allowed, including grab_runnable_context which |
| 132 | * already holds the runq_lock. So abuse runq_lock | 129 | * already holds the runq_lock. So abuse runq_lock |
| 133 | * to protect this field aswell. | 130 | * to protect this field aswell. |
| @@ -141,9 +138,9 @@ void spu_update_sched_info(struct spu_context *ctx) | |||
| 141 | { | 138 | { |
| 142 | int node = ctx->spu->node; | 139 | int node = ctx->spu->node; |
| 143 | 140 | ||
| 144 | mutex_lock(&spu_prio->active_mutex[node]); | 141 | mutex_lock(&cbe_spu_info[node].list_mutex); |
| 145 | __spu_update_sched_info(ctx); | 142 | __spu_update_sched_info(ctx); |
| 146 | mutex_unlock(&spu_prio->active_mutex[node]); | 143 | mutex_unlock(&cbe_spu_info[node].list_mutex); |
| 147 | } | 144 | } |
| 148 | 145 | ||
| 149 | static int __node_allowed(struct spu_context *ctx, int node) | 146 | static int __node_allowed(struct spu_context *ctx, int node) |
| @@ -169,56 +166,56 @@ static int node_allowed(struct spu_context *ctx, int node) | |||
| 169 | return rval; | 166 | return rval; |
| 170 | } | 167 | } |
| 171 | 168 | ||
| 172 | /** | 169 | static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier); |
| 173 | * spu_add_to_active_list - add spu to active list | ||
| 174 | * @spu: spu to add to the active list | ||
| 175 | */ | ||
| 176 | static void spu_add_to_active_list(struct spu *spu) | ||
| 177 | { | ||
| 178 | int node = spu->node; | ||
| 179 | |||
| 180 | mutex_lock(&spu_prio->active_mutex[node]); | ||
| 181 | spu_prio->nr_active[node]++; | ||
| 182 | list_add_tail(&spu->list, &spu_prio->active_list[node]); | ||
| 183 | mutex_unlock(&spu_prio->active_mutex[node]); | ||
| 184 | } | ||
| 185 | 170 | ||
| 186 | static void __spu_remove_from_active_list(struct spu *spu) | 171 | void spu_switch_notify(struct spu *spu, struct spu_context *ctx) |
| 187 | { | 172 | { |
| 188 | list_del_init(&spu->list); | 173 | blocking_notifier_call_chain(&spu_switch_notifier, |
| 189 | spu_prio->nr_active[spu->node]--; | 174 | ctx ? ctx->object_id : 0, spu); |
| 190 | } | 175 | } |
| 191 | 176 | ||
| 192 | /** | 177 | static void notify_spus_active(void) |
| 193 | * spu_remove_from_active_list - remove spu from active list | ||
| 194 | * @spu: spu to remove from the active list | ||
| 195 | */ | ||
| 196 | static void spu_remove_from_active_list(struct spu *spu) | ||
| 197 | { | 178 | { |
| 198 | int node = spu->node; | 179 | int node; |
| 199 | |||
| 200 | mutex_lock(&spu_prio->active_mutex[node]); | ||
| 201 | __spu_remove_from_active_list(spu); | ||
| 202 | mutex_unlock(&spu_prio->active_mutex[node]); | ||
| 203 | } | ||
| 204 | 180 | ||
| 205 | static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier); | 181 | /* |
| 182 | * Wake up the active spu_contexts. | ||
| 183 | * | ||
| 184 | * When the awakened processes see their "notify_active" flag is set, | ||
| 185 | * they will call spu_switch_notify(); | ||
| 186 | */ | ||
| 187 | for_each_online_node(node) { | ||
| 188 | struct spu *spu; | ||
| 206 | 189 | ||
| 207 | static void spu_switch_notify(struct spu *spu, struct spu_context *ctx) | 190 | mutex_lock(&cbe_spu_info[node].list_mutex); |
| 208 | { | 191 | list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { |
| 209 | blocking_notifier_call_chain(&spu_switch_notifier, | 192 | if (spu->alloc_state != SPU_FREE) { |
| 210 | ctx ? ctx->object_id : 0, spu); | 193 | struct spu_context *ctx = spu->ctx; |
| 194 | set_bit(SPU_SCHED_NOTIFY_ACTIVE, | ||
| 195 | &ctx->sched_flags); | ||
| 196 | mb(); | ||
| 197 | wake_up_all(&ctx->stop_wq); | ||
| 198 | } | ||
| 199 | } | ||
| 200 | mutex_unlock(&cbe_spu_info[node].list_mutex); | ||
| 201 | } | ||
| 211 | } | 202 | } |
| 212 | 203 | ||
| 213 | int spu_switch_event_register(struct notifier_block * n) | 204 | int spu_switch_event_register(struct notifier_block * n) |
| 214 | { | 205 | { |
| 215 | return blocking_notifier_chain_register(&spu_switch_notifier, n); | 206 | int ret; |
| 207 | ret = blocking_notifier_chain_register(&spu_switch_notifier, n); | ||
| 208 | if (!ret) | ||
| 209 | notify_spus_active(); | ||
| 210 | return ret; | ||
| 216 | } | 211 | } |
| 212 | EXPORT_SYMBOL_GPL(spu_switch_event_register); | ||
| 217 | 213 | ||
| 218 | int spu_switch_event_unregister(struct notifier_block * n) | 214 | int spu_switch_event_unregister(struct notifier_block * n) |
| 219 | { | 215 | { |
| 220 | return blocking_notifier_chain_unregister(&spu_switch_notifier, n); | 216 | return blocking_notifier_chain_unregister(&spu_switch_notifier, n); |
| 221 | } | 217 | } |
| 218 | EXPORT_SYMBOL_GPL(spu_switch_event_unregister); | ||
| 222 | 219 | ||
| 223 | /** | 220 | /** |
| 224 | * spu_bind_context - bind spu context to physical spu | 221 | * spu_bind_context - bind spu context to physical spu |
| @@ -229,6 +226,12 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx) | |||
| 229 | { | 226 | { |
| 230 | pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid, | 227 | pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid, |
| 231 | spu->number, spu->node); | 228 | spu->number, spu->node); |
| 229 | spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); | ||
| 230 | |||
| 231 | if (ctx->flags & SPU_CREATE_NOSCHED) | ||
| 232 | atomic_inc(&cbe_spu_info[spu->node].reserved_spus); | ||
| 233 | if (!list_empty(&ctx->aff_list)) | ||
| 234 | atomic_inc(&ctx->gang->aff_sched_count); | ||
| 232 | 235 | ||
| 233 | ctx->stats.slb_flt_base = spu->stats.slb_flt; | 236 | ctx->stats.slb_flt_base = spu->stats.slb_flt; |
| 234 | ctx->stats.class2_intr_base = spu->stats.class2_intr; | 237 | ctx->stats.class2_intr_base = spu->stats.class2_intr; |
| @@ -238,6 +241,7 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx) | |||
| 238 | ctx->spu = spu; | 241 | ctx->spu = spu; |
| 239 | ctx->ops = &spu_hw_ops; | 242 | ctx->ops = &spu_hw_ops; |
| 240 | spu->pid = current->pid; | 243 | spu->pid = current->pid; |
| 244 | spu->tgid = current->tgid; | ||
| 241 | spu_associate_mm(spu, ctx->owner); | 245 | spu_associate_mm(spu, ctx->owner); |
| 242 | spu->ibox_callback = spufs_ibox_callback; | 246 | spu->ibox_callback = spufs_ibox_callback; |
| 243 | spu->wbox_callback = spufs_wbox_callback; | 247 | spu->wbox_callback = spufs_wbox_callback; |
| @@ -251,7 +255,153 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx) | |||
| 251 | spu_cpu_affinity_set(spu, raw_smp_processor_id()); | 255 | spu_cpu_affinity_set(spu, raw_smp_processor_id()); |
| 252 | spu_switch_notify(spu, ctx); | 256 | spu_switch_notify(spu, ctx); |
| 253 | ctx->state = SPU_STATE_RUNNABLE; | 257 | ctx->state = SPU_STATE_RUNNABLE; |
| 254 | spu_switch_state(spu, SPU_UTIL_SYSTEM); | 258 | |
| 259 | spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); | ||
| 260 | } | ||
| 261 | |||
| 262 | /* | ||
| 263 | * Must be used with the list_mutex held. | ||
| 264 | */ | ||
| 265 | static inline int sched_spu(struct spu *spu) | ||
| 266 | { | ||
| 267 | BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex)); | ||
| 268 | |||
| 269 | return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED)); | ||
| 270 | } | ||
| 271 | |||
| 272 | static void aff_merge_remaining_ctxs(struct spu_gang *gang) | ||
| 273 | { | ||
| 274 | struct spu_context *ctx; | ||
| 275 | |||
| 276 | list_for_each_entry(ctx, &gang->aff_list_head, aff_list) { | ||
| 277 | if (list_empty(&ctx->aff_list)) | ||
| 278 | list_add(&ctx->aff_list, &gang->aff_list_head); | ||
| 279 | } | ||
| 280 | gang->aff_flags |= AFF_MERGED; | ||
| 281 | } | ||
| 282 | |||
| 283 | static void aff_set_offsets(struct spu_gang *gang) | ||
| 284 | { | ||
| 285 | struct spu_context *ctx; | ||
| 286 | int offset; | ||
| 287 | |||
| 288 | offset = -1; | ||
| 289 | list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list, | ||
| 290 | aff_list) { | ||
| 291 | if (&ctx->aff_list == &gang->aff_list_head) | ||
| 292 | break; | ||
| 293 | ctx->aff_offset = offset--; | ||
| 294 | } | ||
| 295 | |||
| 296 | offset = 0; | ||
| 297 | list_for_each_entry(ctx, gang->aff_ref_ctx->aff_list.prev, aff_list) { | ||
| 298 | if (&ctx->aff_list == &gang->aff_list_head) | ||
| 299 | break; | ||
| 300 | ctx->aff_offset = offset++; | ||
| 301 | } | ||
| 302 | |||
| 303 | gang->aff_flags |= AFF_OFFSETS_SET; | ||
| 304 | } | ||
| 305 | |||
| 306 | static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff, | ||
| 307 | int group_size, int lowest_offset) | ||
| 308 | { | ||
| 309 | struct spu *spu; | ||
| 310 | int node, n; | ||
| 311 | |||
| 312 | /* | ||
| 313 | * TODO: A better algorithm could be used to find a good spu to be | ||
| 314 | * used as reference location for the ctxs chain. | ||
| 315 | */ | ||
| 316 | node = cpu_to_node(raw_smp_processor_id()); | ||
| 317 | for (n = 0; n < MAX_NUMNODES; n++, node++) { | ||
| 318 | node = (node < MAX_NUMNODES) ? node : 0; | ||
| 319 | if (!node_allowed(ctx, node)) | ||
| 320 | continue; | ||
| 321 | mutex_lock(&cbe_spu_info[node].list_mutex); | ||
| 322 | list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { | ||
| 323 | if ((!mem_aff || spu->has_mem_affinity) && | ||
| 324 | sched_spu(spu)) { | ||
| 325 | mutex_unlock(&cbe_spu_info[node].list_mutex); | ||
| 326 | return spu; | ||
| 327 | } | ||
| 328 | } | ||
| 329 | mutex_unlock(&cbe_spu_info[node].list_mutex); | ||
| 330 | } | ||
| 331 | return NULL; | ||
| 332 | } | ||
| 333 | |||
| 334 | static void aff_set_ref_point_location(struct spu_gang *gang) | ||
| 335 | { | ||
| 336 | int mem_aff, gs, lowest_offset; | ||
| 337 | struct spu_context *ctx; | ||
| 338 | struct spu *tmp; | ||
| 339 | |||
| 340 | mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM; | ||
| 341 | lowest_offset = 0; | ||
| 342 | gs = 0; | ||
| 343 | |||
| 344 | list_for_each_entry(tmp, &gang->aff_list_head, aff_list) | ||
| 345 | gs++; | ||
| 346 | |||
| 347 | list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list, | ||
| 348 | aff_list) { | ||
| 349 | if (&ctx->aff_list == &gang->aff_list_head) | ||
| 350 | break; | ||
| 351 | lowest_offset = ctx->aff_offset; | ||
| 352 | } | ||
| 353 | |||
| 354 | gang->aff_ref_spu = aff_ref_location(ctx, mem_aff, gs, lowest_offset); | ||
| 355 | } | ||
| 356 | |||
| 357 | static struct spu *ctx_location(struct spu *ref, int offset, int node) | ||
| 358 | { | ||
| 359 | struct spu *spu; | ||
| 360 | |||
| 361 | spu = NULL; | ||
| 362 | if (offset >= 0) { | ||
| 363 | list_for_each_entry(spu, ref->aff_list.prev, aff_list) { | ||
| 364 | BUG_ON(spu->node != node); | ||
| 365 | if (offset == 0) | ||
| 366 | break; | ||
| 367 | if (sched_spu(spu)) | ||
| 368 | offset--; | ||
| 369 | } | ||
| 370 | } else { | ||
| 371 | list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) { | ||
| 372 | BUG_ON(spu->node != node); | ||
| 373 | if (offset == 0) | ||
| 374 | break; | ||
| 375 | if (sched_spu(spu)) | ||
| 376 | offset++; | ||
| 377 | } | ||
| 378 | } | ||
| 379 | |||
| 380 | return spu; | ||
| 381 | } | ||
| 382 | |||
| 383 | /* | ||
| 384 | * affinity_check is called each time a context is going to be scheduled. | ||
| 385 | * It returns the spu ptr on which the context must run. | ||
| 386 | */ | ||
| 387 | static int has_affinity(struct spu_context *ctx) | ||
| 388 | { | ||
| 389 | struct spu_gang *gang = ctx->gang; | ||
| 390 | |||
| 391 | if (list_empty(&ctx->aff_list)) | ||
| 392 | return 0; | ||
| 393 | |||
| 394 | mutex_lock(&gang->aff_mutex); | ||
| 395 | if (!gang->aff_ref_spu) { | ||
| 396 | if (!(gang->aff_flags & AFF_MERGED)) | ||
| 397 | aff_merge_remaining_ctxs(gang); | ||
| 398 | if (!(gang->aff_flags & AFF_OFFSETS_SET)) | ||
| 399 | aff_set_offsets(gang); | ||
| 400 | aff_set_ref_point_location(gang); | ||
| 401 | } | ||
| 402 | mutex_unlock(&gang->aff_mutex); | ||
| 403 | |||
| 404 | return gang->aff_ref_spu != NULL; | ||
| 255 | } | 405 | } |
| 256 | 406 | ||
| 257 | /** | 407 | /** |
| @@ -263,9 +413,13 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) | |||
| 263 | { | 413 | { |
| 264 | pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__, | 414 | pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__, |
| 265 | spu->pid, spu->number, spu->node); | 415 | spu->pid, spu->number, spu->node); |
| 416 | spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); | ||
| 266 | 417 | ||
| 267 | spu_switch_state(spu, SPU_UTIL_IDLE); | 418 | if (spu->ctx->flags & SPU_CREATE_NOSCHED) |
| 268 | 419 | atomic_dec(&cbe_spu_info[spu->node].reserved_spus); | |
| 420 | if (!list_empty(&ctx->aff_list)) | ||
| 421 | if (atomic_dec_and_test(&ctx->gang->aff_sched_count)) | ||
| 422 | ctx->gang->aff_ref_spu = NULL; | ||
| 269 | spu_switch_notify(spu, NULL); | 423 | spu_switch_notify(spu, NULL); |
| 270 | spu_unmap_mappings(ctx); | 424 | spu_unmap_mappings(ctx); |
| 271 | spu_save(&ctx->csa, spu); | 425 | spu_save(&ctx->csa, spu); |
| @@ -278,8 +432,8 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) | |||
| 278 | spu->dma_callback = NULL; | 432 | spu->dma_callback = NULL; |
| 279 | spu_associate_mm(spu, NULL); | 433 | spu_associate_mm(spu, NULL); |
| 280 | spu->pid = 0; | 434 | spu->pid = 0; |
| 435 | spu->tgid = 0; | ||
| 281 | ctx->ops = &spu_backing_ops; | 436 | ctx->ops = &spu_backing_ops; |
| 282 | ctx->spu = NULL; | ||
| 283 | spu->flags = 0; | 437 | spu->flags = 0; |
| 284 | spu->ctx = NULL; | 438 | spu->ctx = NULL; |
| 285 | 439 | ||
| @@ -287,6 +441,10 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) | |||
| 287 | (spu->stats.slb_flt - ctx->stats.slb_flt_base); | 441 | (spu->stats.slb_flt - ctx->stats.slb_flt_base); |
| 288 | ctx->stats.class2_intr += | 442 | ctx->stats.class2_intr += |
| 289 | (spu->stats.class2_intr - ctx->stats.class2_intr_base); | 443 | (spu->stats.class2_intr - ctx->stats.class2_intr_base); |
| 444 | |||
| 445 | /* This maps the underlying spu state to idle */ | ||
| 446 | spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); | ||
| 447 | ctx->spu = NULL; | ||
| 290 | } | 448 | } |
| 291 | 449 | ||
| 292 | /** | 450 | /** |
| @@ -352,18 +510,41 @@ static void spu_prio_wait(struct spu_context *ctx) | |||
| 352 | 510 | ||
| 353 | static struct spu *spu_get_idle(struct spu_context *ctx) | 511 | static struct spu *spu_get_idle(struct spu_context *ctx) |
| 354 | { | 512 | { |
| 355 | struct spu *spu = NULL; | 513 | struct spu *spu; |
| 356 | int node = cpu_to_node(raw_smp_processor_id()); | 514 | int node, n; |
| 357 | int n; | 515 | |
| 516 | if (has_affinity(ctx)) { | ||
| 517 | node = ctx->gang->aff_ref_spu->node; | ||
| 358 | 518 | ||
| 519 | mutex_lock(&cbe_spu_info[node].list_mutex); | ||
| 520 | spu = ctx_location(ctx->gang->aff_ref_spu, ctx->aff_offset, node); | ||
| 521 | if (spu && spu->alloc_state == SPU_FREE) | ||
| 522 | goto found; | ||
| 523 | mutex_unlock(&cbe_spu_info[node].list_mutex); | ||
| 524 | return NULL; | ||
| 525 | } | ||
| 526 | |||
| 527 | node = cpu_to_node(raw_smp_processor_id()); | ||
| 359 | for (n = 0; n < MAX_NUMNODES; n++, node++) { | 528 | for (n = 0; n < MAX_NUMNODES; n++, node++) { |
| 360 | node = (node < MAX_NUMNODES) ? node : 0; | 529 | node = (node < MAX_NUMNODES) ? node : 0; |
| 361 | if (!node_allowed(ctx, node)) | 530 | if (!node_allowed(ctx, node)) |
| 362 | continue; | 531 | continue; |
| 363 | spu = spu_alloc_node(node); | 532 | |
| 364 | if (spu) | 533 | mutex_lock(&cbe_spu_info[node].list_mutex); |
| 365 | break; | 534 | list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { |
| 535 | if (spu->alloc_state == SPU_FREE) | ||
| 536 | goto found; | ||
| 537 | } | ||
| 538 | mutex_unlock(&cbe_spu_info[node].list_mutex); | ||
| 366 | } | 539 | } |
| 540 | |||
| 541 | return NULL; | ||
| 542 | |||
| 543 | found: | ||
| 544 | spu->alloc_state = SPU_USED; | ||
| 545 | mutex_unlock(&cbe_spu_info[node].list_mutex); | ||
| 546 | pr_debug("Got SPU %d %d\n", spu->number, spu->node); | ||
| 547 | spu_init_channels(spu); | ||
| 367 | return spu; | 548 | return spu; |
| 368 | } | 549 | } |
| 369 | 550 | ||
| @@ -393,15 +574,15 @@ static struct spu *find_victim(struct spu_context *ctx) | |||
| 393 | if (!node_allowed(ctx, node)) | 574 | if (!node_allowed(ctx, node)) |
| 394 | continue; | 575 | continue; |
| 395 | 576 | ||
| 396 | mutex_lock(&spu_prio->active_mutex[node]); | 577 | mutex_lock(&cbe_spu_info[node].list_mutex); |
| 397 | list_for_each_entry(spu, &spu_prio->active_list[node], list) { | 578 | list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { |
| 398 | struct spu_context *tmp = spu->ctx; | 579 | struct spu_context *tmp = spu->ctx; |
| 399 | 580 | ||
| 400 | if (tmp->prio > ctx->prio && | 581 | if (tmp->prio > ctx->prio && |
| 401 | (!victim || tmp->prio > victim->prio)) | 582 | (!victim || tmp->prio > victim->prio)) |
| 402 | victim = spu->ctx; | 583 | victim = spu->ctx; |
| 403 | } | 584 | } |
| 404 | mutex_unlock(&spu_prio->active_mutex[node]); | 585 | mutex_unlock(&cbe_spu_info[node].list_mutex); |
| 405 | 586 | ||
| 406 | if (victim) { | 587 | if (victim) { |
| 407 | /* | 588 | /* |
| @@ -426,7 +607,11 @@ static struct spu *find_victim(struct spu_context *ctx) | |||
| 426 | victim = NULL; | 607 | victim = NULL; |
| 427 | goto restart; | 608 | goto restart; |
| 428 | } | 609 | } |
| 429 | spu_remove_from_active_list(spu); | 610 | |
| 611 | mutex_lock(&cbe_spu_info[node].list_mutex); | ||
| 612 | cbe_spu_info[node].nr_active--; | ||
| 613 | mutex_unlock(&cbe_spu_info[node].list_mutex); | ||
| 614 | |||
| 430 | spu_unbind_context(spu, victim); | 615 | spu_unbind_context(spu, victim); |
| 431 | victim->stats.invol_ctx_switch++; | 616 | victim->stats.invol_ctx_switch++; |
| 432 | spu->stats.invol_ctx_switch++; | 617 | spu->stats.invol_ctx_switch++; |
| @@ -455,8 +640,6 @@ static struct spu *find_victim(struct spu_context *ctx) | |||
| 455 | */ | 640 | */ |
| 456 | int spu_activate(struct spu_context *ctx, unsigned long flags) | 641 | int spu_activate(struct spu_context *ctx, unsigned long flags) |
| 457 | { | 642 | { |
| 458 | spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM); | ||
| 459 | |||
| 460 | do { | 643 | do { |
| 461 | struct spu *spu; | 644 | struct spu *spu; |
| 462 | 645 | ||
| @@ -477,8 +660,12 @@ int spu_activate(struct spu_context *ctx, unsigned long flags) | |||
| 477 | if (!spu && rt_prio(ctx->prio)) | 660 | if (!spu && rt_prio(ctx->prio)) |
| 478 | spu = find_victim(ctx); | 661 | spu = find_victim(ctx); |
| 479 | if (spu) { | 662 | if (spu) { |
| 663 | int node = spu->node; | ||
| 664 | |||
| 665 | mutex_lock(&cbe_spu_info[node].list_mutex); | ||
| 480 | spu_bind_context(spu, ctx); | 666 | spu_bind_context(spu, ctx); |
| 481 | spu_add_to_active_list(spu); | 667 | cbe_spu_info[node].nr_active++; |
| 668 | mutex_unlock(&cbe_spu_info[node].list_mutex); | ||
| 482 | return 0; | 669 | return 0; |
| 483 | } | 670 | } |
| 484 | 671 | ||
| @@ -500,7 +687,7 @@ static struct spu_context *grab_runnable_context(int prio, int node) | |||
| 500 | int best; | 687 | int best; |
| 501 | 688 | ||
| 502 | spin_lock(&spu_prio->runq_lock); | 689 | spin_lock(&spu_prio->runq_lock); |
| 503 | best = sched_find_first_bit(spu_prio->bitmap); | 690 | best = find_first_bit(spu_prio->bitmap, prio); |
| 504 | while (best < prio) { | 691 | while (best < prio) { |
| 505 | struct list_head *rq = &spu_prio->runq[best]; | 692 | struct list_head *rq = &spu_prio->runq[best]; |
| 506 | 693 | ||
| @@ -527,11 +714,17 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio) | |||
| 527 | if (spu) { | 714 | if (spu) { |
| 528 | new = grab_runnable_context(max_prio, spu->node); | 715 | new = grab_runnable_context(max_prio, spu->node); |
| 529 | if (new || force) { | 716 | if (new || force) { |
| 530 | spu_remove_from_active_list(spu); | 717 | int node = spu->node; |
| 718 | |||
| 719 | mutex_lock(&cbe_spu_info[node].list_mutex); | ||
| 531 | spu_unbind_context(spu, ctx); | 720 | spu_unbind_context(spu, ctx); |
| 721 | spu->alloc_state = SPU_FREE; | ||
| 722 | cbe_spu_info[node].nr_active--; | ||
| 723 | mutex_unlock(&cbe_spu_info[node].list_mutex); | ||
| 724 | |||
| 532 | ctx->stats.vol_ctx_switch++; | 725 | ctx->stats.vol_ctx_switch++; |
| 533 | spu->stats.vol_ctx_switch++; | 726 | spu->stats.vol_ctx_switch++; |
| 534 | spu_free(spu); | 727 | |
| 535 | if (new) | 728 | if (new) |
| 536 | wake_up(&new->stop_wq); | 729 | wake_up(&new->stop_wq); |
| 537 | } | 730 | } |
| @@ -550,21 +743,11 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio) | |||
| 550 | */ | 743 | */ |
| 551 | void spu_deactivate(struct spu_context *ctx) | 744 | void spu_deactivate(struct spu_context *ctx) |
| 552 | { | 745 | { |
| 553 | /* | ||
| 554 | * We must never reach this for a nosched context, | ||
| 555 | * but handle the case gracefull instead of panicing. | ||
| 556 | */ | ||
| 557 | if (ctx->flags & SPU_CREATE_NOSCHED) { | ||
| 558 | WARN_ON(1); | ||
| 559 | return; | ||
| 560 | } | ||
| 561 | |||
| 562 | __spu_deactivate(ctx, 1, MAX_PRIO); | 746 | __spu_deactivate(ctx, 1, MAX_PRIO); |
| 563 | spuctx_switch_state(ctx, SPUCTX_UTIL_USER); | ||
| 564 | } | 747 | } |
| 565 | 748 | ||
| 566 | /** | 749 | /** |
| 567 | * spu_yield - yield a physical spu if others are waiting | 750 | * spu_yield - yield a physical spu if others are waiting |
| 568 | * @ctx: spu context to yield | 751 | * @ctx: spu context to yield |
| 569 | * | 752 | * |
| 570 | * Check if there is a higher priority context waiting and if yes | 753 | * Check if there is a higher priority context waiting and if yes |
| @@ -575,17 +758,12 @@ void spu_yield(struct spu_context *ctx) | |||
| 575 | { | 758 | { |
| 576 | if (!(ctx->flags & SPU_CREATE_NOSCHED)) { | 759 | if (!(ctx->flags & SPU_CREATE_NOSCHED)) { |
| 577 | mutex_lock(&ctx->state_mutex); | 760 | mutex_lock(&ctx->state_mutex); |
| 578 | if (__spu_deactivate(ctx, 0, MAX_PRIO)) | 761 | __spu_deactivate(ctx, 0, MAX_PRIO); |
| 579 | spuctx_switch_state(ctx, SPUCTX_UTIL_USER); | ||
| 580 | else { | ||
| 581 | spuctx_switch_state(ctx, SPUCTX_UTIL_LOADED); | ||
| 582 | spu_switch_state(ctx->spu, SPU_UTIL_USER); | ||
| 583 | } | ||
| 584 | mutex_unlock(&ctx->state_mutex); | 762 | mutex_unlock(&ctx->state_mutex); |
| 585 | } | 763 | } |
| 586 | } | 764 | } |
| 587 | 765 | ||
| 588 | static void spusched_tick(struct spu_context *ctx) | 766 | static noinline void spusched_tick(struct spu_context *ctx) |
| 589 | { | 767 | { |
| 590 | if (ctx->flags & SPU_CREATE_NOSCHED) | 768 | if (ctx->flags & SPU_CREATE_NOSCHED) |
| 591 | return; | 769 | return; |
| @@ -596,7 +774,7 @@ static void spusched_tick(struct spu_context *ctx) | |||
| 596 | return; | 774 | return; |
| 597 | 775 | ||
| 598 | /* | 776 | /* |
| 599 | * Unfortunately active_mutex ranks outside of state_mutex, so | 777 | * Unfortunately list_mutex ranks outside of state_mutex, so |
| 600 | * we have to trylock here. If we fail give the context another | 778 | * we have to trylock here. If we fail give the context another |
| 601 | * tick and try again. | 779 | * tick and try again. |
| 602 | */ | 780 | */ |
| @@ -606,12 +784,11 @@ static void spusched_tick(struct spu_context *ctx) | |||
| 606 | 784 | ||
| 607 | new = grab_runnable_context(ctx->prio + 1, spu->node); | 785 | new = grab_runnable_context(ctx->prio + 1, spu->node); |
| 608 | if (new) { | 786 | if (new) { |
| 609 | |||
| 610 | __spu_remove_from_active_list(spu); | ||
| 611 | spu_unbind_context(spu, ctx); | 787 | spu_unbind_context(spu, ctx); |
| 612 | ctx->stats.invol_ctx_switch++; | 788 | ctx->stats.invol_ctx_switch++; |
| 613 | spu->stats.invol_ctx_switch++; | 789 | spu->stats.invol_ctx_switch++; |
| 614 | spu_free(spu); | 790 | spu->alloc_state = SPU_FREE; |
| 791 | cbe_spu_info[spu->node].nr_active--; | ||
| 615 | wake_up(&new->stop_wq); | 792 | wake_up(&new->stop_wq); |
| 616 | /* | 793 | /* |
| 617 | * We need to break out of the wait loop in | 794 | * We need to break out of the wait loop in |
| @@ -632,7 +809,7 @@ static void spusched_tick(struct spu_context *ctx) | |||
| 632 | * | 809 | * |
| 633 | * Return the number of tasks currently running or waiting to run. | 810 | * Return the number of tasks currently running or waiting to run. |
| 634 | * | 811 | * |
| 635 | * Note that we don't take runq_lock / active_mutex here. Reading | 812 | * Note that we don't take runq_lock / list_mutex here. Reading |
| 636 | * a single 32bit value is atomic on powerpc, and we don't care | 813 | * a single 32bit value is atomic on powerpc, and we don't care |
| 637 | * about memory ordering issues here. | 814 | * about memory ordering issues here. |
| 638 | */ | 815 | */ |
| @@ -641,7 +818,7 @@ static unsigned long count_active_contexts(void) | |||
| 641 | int nr_active = 0, node; | 818 | int nr_active = 0, node; |
| 642 | 819 | ||
| 643 | for (node = 0; node < MAX_NUMNODES; node++) | 820 | for (node = 0; node < MAX_NUMNODES; node++) |
| 644 | nr_active += spu_prio->nr_active[node]; | 821 | nr_active += cbe_spu_info[node].nr_active; |
| 645 | nr_active += spu_prio->nr_waiting; | 822 | nr_active += spu_prio->nr_waiting; |
| 646 | 823 | ||
| 647 | return nr_active; | 824 | return nr_active; |
| @@ -681,19 +858,18 @@ static void spusched_wake(unsigned long data) | |||
| 681 | 858 | ||
| 682 | static int spusched_thread(void *unused) | 859 | static int spusched_thread(void *unused) |
| 683 | { | 860 | { |
| 684 | struct spu *spu, *next; | 861 | struct spu *spu; |
| 685 | int node; | 862 | int node; |
| 686 | 863 | ||
| 687 | while (!kthread_should_stop()) { | 864 | while (!kthread_should_stop()) { |
| 688 | set_current_state(TASK_INTERRUPTIBLE); | 865 | set_current_state(TASK_INTERRUPTIBLE); |
| 689 | schedule(); | 866 | schedule(); |
| 690 | for (node = 0; node < MAX_NUMNODES; node++) { | 867 | for (node = 0; node < MAX_NUMNODES; node++) { |
| 691 | mutex_lock(&spu_prio->active_mutex[node]); | 868 | mutex_lock(&cbe_spu_info[node].list_mutex); |
| 692 | list_for_each_entry_safe(spu, next, | 869 | list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) |
| 693 | &spu_prio->active_list[node], | 870 | if (spu->ctx) |
| 694 | list) | 871 | spusched_tick(spu->ctx); |
| 695 | spusched_tick(spu->ctx); | 872 | mutex_unlock(&cbe_spu_info[node].list_mutex); |
| 696 | mutex_unlock(&spu_prio->active_mutex[node]); | ||
| 697 | } | 873 | } |
| 698 | } | 874 | } |
| 699 | 875 | ||
| @@ -751,10 +927,9 @@ int __init spu_sched_init(void) | |||
| 751 | INIT_LIST_HEAD(&spu_prio->runq[i]); | 927 | INIT_LIST_HEAD(&spu_prio->runq[i]); |
| 752 | __clear_bit(i, spu_prio->bitmap); | 928 | __clear_bit(i, spu_prio->bitmap); |
| 753 | } | 929 | } |
| 754 | __set_bit(MAX_PRIO, spu_prio->bitmap); | ||
| 755 | for (i = 0; i < MAX_NUMNODES; i++) { | 930 | for (i = 0; i < MAX_NUMNODES; i++) { |
| 756 | mutex_init(&spu_prio->active_mutex[i]); | 931 | mutex_init(&cbe_spu_info[i].list_mutex); |
| 757 | INIT_LIST_HEAD(&spu_prio->active_list[i]); | 932 | INIT_LIST_HEAD(&cbe_spu_info[i].spus); |
| 758 | } | 933 | } |
| 759 | spin_lock_init(&spu_prio->runq_lock); | 934 | spin_lock_init(&spu_prio->runq_lock); |
| 760 | 935 | ||
| @@ -783,9 +958,9 @@ int __init spu_sched_init(void) | |||
| 783 | return err; | 958 | return err; |
| 784 | } | 959 | } |
| 785 | 960 | ||
| 786 | void __exit spu_sched_exit(void) | 961 | void spu_sched_exit(void) |
| 787 | { | 962 | { |
| 788 | struct spu *spu, *tmp; | 963 | struct spu *spu; |
| 789 | int node; | 964 | int node; |
| 790 | 965 | ||
| 791 | remove_proc_entry("spu_loadavg", NULL); | 966 | remove_proc_entry("spu_loadavg", NULL); |
| @@ -794,13 +969,11 @@ void __exit spu_sched_exit(void) | |||
| 794 | kthread_stop(spusched_task); | 969 | kthread_stop(spusched_task); |
| 795 | 970 | ||
| 796 | for (node = 0; node < MAX_NUMNODES; node++) { | 971 | for (node = 0; node < MAX_NUMNODES; node++) { |
| 797 | mutex_lock(&spu_prio->active_mutex[node]); | 972 | mutex_lock(&cbe_spu_info[node].list_mutex); |
| 798 | list_for_each_entry_safe(spu, tmp, &spu_prio->active_list[node], | 973 | list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) |
| 799 | list) { | 974 | if (spu->alloc_state != SPU_FREE) |
| 800 | list_del_init(&spu->list); | 975 | spu->alloc_state = SPU_FREE; |
| 801 | spu_free(spu); | 976 | mutex_unlock(&cbe_spu_info[node].list_mutex); |
| 802 | } | ||
| 803 | mutex_unlock(&spu_prio->active_mutex[node]); | ||
| 804 | } | 977 | } |
| 805 | kfree(spu_prio); | 978 | kfree(spu_prio); |
| 806 | } | 979 | } |
diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore.c b/arch/powerpc/platforms/cell/spufs/spu_restore.c index 4e19ed7a0756..21a9c952d88b 100644 --- a/arch/powerpc/platforms/cell/spufs/spu_restore.c +++ b/arch/powerpc/platforms/cell/spufs/spu_restore.c | |||
| @@ -84,13 +84,13 @@ static inline void restore_decr(void) | |||
| 84 | unsigned int decr_running; | 84 | unsigned int decr_running; |
| 85 | unsigned int decr; | 85 | unsigned int decr; |
| 86 | 86 | ||
| 87 | /* Restore, Step 6: | 87 | /* Restore, Step 6(moved): |
| 88 | * If the LSCSA "decrementer running" flag is set | 88 | * If the LSCSA "decrementer running" flag is set |
| 89 | * then write the SPU_WrDec channel with the | 89 | * then write the SPU_WrDec channel with the |
| 90 | * decrementer value from LSCSA. | 90 | * decrementer value from LSCSA. |
| 91 | */ | 91 | */ |
| 92 | offset = LSCSA_QW_OFFSET(decr_status); | 92 | offset = LSCSA_QW_OFFSET(decr_status); |
| 93 | decr_running = regs_spill[offset].slot[0]; | 93 | decr_running = regs_spill[offset].slot[0] & SPU_DECR_STATUS_RUNNING; |
| 94 | if (decr_running) { | 94 | if (decr_running) { |
| 95 | offset = LSCSA_QW_OFFSET(decr); | 95 | offset = LSCSA_QW_OFFSET(decr); |
| 96 | decr = regs_spill[offset].slot[0]; | 96 | decr = regs_spill[offset].slot[0]; |
| @@ -318,10 +318,10 @@ int main() | |||
| 318 | build_dma_list(lscsa_ea); /* Step 3. */ | 318 | build_dma_list(lscsa_ea); /* Step 3. */ |
| 319 | restore_upper_240kb(lscsa_ea); /* Step 4. */ | 319 | restore_upper_240kb(lscsa_ea); /* Step 4. */ |
| 320 | /* Step 5: done by 'exit'. */ | 320 | /* Step 5: done by 'exit'. */ |
| 321 | restore_decr(); /* Step 6. */ | ||
| 322 | enqueue_putllc(lscsa_ea); /* Step 7. */ | 321 | enqueue_putllc(lscsa_ea); /* Step 7. */ |
| 323 | set_tag_update(); /* Step 8. */ | 322 | set_tag_update(); /* Step 8. */ |
| 324 | read_tag_status(); /* Step 9. */ | 323 | read_tag_status(); /* Step 9. */ |
| 324 | restore_decr(); /* moved Step 6. */ | ||
| 325 | read_llar_status(); /* Step 10. */ | 325 | read_llar_status(); /* Step 10. */ |
| 326 | write_ppu_mb(); /* Step 11. */ | 326 | write_ppu_mb(); /* Step 11. */ |
| 327 | write_ppuint_mb(); /* Step 12. */ | 327 | write_ppuint_mb(); /* Step 12. */ |
diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped b/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped index 15183d209b58..f383b027e8bf 100644 --- a/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped +++ b/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped | |||
| @@ -10,7 +10,7 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = { | |||
| 10 | 0x24fd8081, | 10 | 0x24fd8081, |
| 11 | 0x1cd80081, | 11 | 0x1cd80081, |
| 12 | 0x33001180, | 12 | 0x33001180, |
| 13 | 0x42030003, | 13 | 0x42034003, |
| 14 | 0x33800284, | 14 | 0x33800284, |
| 15 | 0x1c010204, | 15 | 0x1c010204, |
| 16 | 0x40200000, | 16 | 0x40200000, |
| @@ -24,22 +24,22 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = { | |||
| 24 | 0x23fffd84, | 24 | 0x23fffd84, |
| 25 | 0x1c100183, | 25 | 0x1c100183, |
| 26 | 0x217ffa85, | 26 | 0x217ffa85, |
| 27 | 0x3080a000, | 27 | 0x3080b000, |
| 28 | 0x3080a201, | 28 | 0x3080b201, |
| 29 | 0x3080a402, | 29 | 0x3080b402, |
| 30 | 0x3080a603, | 30 | 0x3080b603, |
| 31 | 0x3080a804, | 31 | 0x3080b804, |
| 32 | 0x3080aa05, | 32 | 0x3080ba05, |
| 33 | 0x3080ac06, | 33 | 0x3080bc06, |
| 34 | 0x3080ae07, | 34 | 0x3080be07, |
| 35 | 0x3080b008, | 35 | 0x3080c008, |
| 36 | 0x3080b209, | 36 | 0x3080c209, |
| 37 | 0x3080b40a, | 37 | 0x3080c40a, |
| 38 | 0x3080b60b, | 38 | 0x3080c60b, |
| 39 | 0x3080b80c, | 39 | 0x3080c80c, |
| 40 | 0x3080ba0d, | 40 | 0x3080ca0d, |
| 41 | 0x3080bc0e, | 41 | 0x3080cc0e, |
| 42 | 0x3080be0f, | 42 | 0x3080ce0f, |
| 43 | 0x00003ffc, | 43 | 0x00003ffc, |
| 44 | 0x00000000, | 44 | 0x00000000, |
| 45 | 0x00000000, | 45 | 0x00000000, |
| @@ -48,19 +48,18 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = { | |||
| 48 | 0x3ec00083, | 48 | 0x3ec00083, |
| 49 | 0xb0a14103, | 49 | 0xb0a14103, |
| 50 | 0x01a00204, | 50 | 0x01a00204, |
| 51 | 0x3ec10082, | 51 | 0x3ec10083, |
| 52 | 0x4202800e, | 52 | 0x4202c002, |
| 53 | 0x04000703, | 53 | 0xb0a14203, |
| 54 | 0xb0a14202, | 54 | 0x21a00802, |
| 55 | 0x21a00803, | 55 | 0x3fbf028a, |
| 56 | 0x3fbf028d, | 56 | 0x3f20050a, |
| 57 | 0x3f20068d, | 57 | 0x3fbe0502, |
| 58 | 0x3fbe0682, | ||
| 59 | 0x3fe30102, | 58 | 0x3fe30102, |
| 60 | 0x21a00882, | 59 | 0x21a00882, |
| 61 | 0x3f82028f, | 60 | 0x3f82028b, |
| 62 | 0x3fe3078f, | 61 | 0x3fe3058b, |
| 63 | 0x3fbf0784, | 62 | 0x3fbf0584, |
| 64 | 0x3f200204, | 63 | 0x3f200204, |
| 65 | 0x3fbe0204, | 64 | 0x3fbe0204, |
| 66 | 0x3fe30204, | 65 | 0x3fe30204, |
| @@ -75,252 +74,285 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = { | |||
| 75 | 0x21a00083, | 74 | 0x21a00083, |
| 76 | 0x40800082, | 75 | 0x40800082, |
| 77 | 0x21a00b02, | 76 | 0x21a00b02, |
| 78 | 0x10002818, | 77 | 0x10002612, |
| 79 | 0x42a00002, | 78 | 0x42a00003, |
| 80 | 0x32800007, | 79 | 0x42074006, |
| 81 | 0x4207000c, | 80 | 0x1800c204, |
| 82 | 0x18008208, | 81 | 0x40a00008, |
| 83 | 0x40a0000b, | 82 | 0x40800789, |
| 84 | 0x4080020a, | 83 | 0x1c010305, |
| 85 | 0x40800709, | 84 | 0x34000302, |
| 86 | 0x00200000, | ||
| 87 | 0x42070002, | ||
| 88 | 0x3ac30384, | ||
| 89 | 0x1cffc489, | 85 | 0x1cffc489, |
| 90 | 0x00200000, | 86 | 0x3ec00303, |
| 91 | 0x18008383, | 87 | 0x3ec00287, |
| 92 | 0x38830382, | 88 | 0xb0408403, |
| 93 | 0x4cffc486, | 89 | 0x24000302, |
| 94 | 0x3ac28185, | 90 | 0x34000282, |
| 95 | 0xb0408584, | 91 | 0x1c020306, |
| 96 | 0x28830382, | 92 | 0xb0408207, |
| 97 | 0x1c020387, | 93 | 0x18020204, |
| 98 | 0x38828182, | 94 | 0x24000282, |
| 99 | 0xb0408405, | 95 | 0x217ffa09, |
| 100 | 0x1802c408, | 96 | 0x04000402, |
| 101 | 0x28828182, | 97 | 0x21a00802, |
| 102 | 0x217ff886, | 98 | 0x3fbe0504, |
| 103 | 0x04000583, | 99 | 0x3fe30204, |
| 104 | 0x21a00803, | 100 | 0x21a00884, |
| 105 | 0x3fbe0682, | 101 | 0x42074002, |
| 106 | 0x3fe30102, | 102 | 0x21a00902, |
| 107 | 0x04000106, | 103 | 0x40803c03, |
| 108 | 0x21a00886, | 104 | 0x21a00983, |
| 109 | 0x04000603, | 105 | 0x04000485, |
| 110 | 0x21a00903, | 106 | 0x21a00a05, |
| 111 | 0x40803c02, | ||
| 112 | 0x21a00982, | ||
| 113 | 0x40800003, | ||
| 114 | 0x04000184, | ||
| 115 | 0x21a00a04, | ||
| 116 | 0x40802202, | 107 | 0x40802202, |
| 117 | 0x21a00a82, | 108 | 0x21a00a82, |
| 118 | 0x42028005, | 109 | 0x21a00805, |
| 119 | 0x34208702, | 110 | 0x21a00884, |
| 120 | 0x21002282, | 111 | 0x3fbf0582, |
| 121 | 0x21a00804, | ||
| 122 | 0x21a00886, | ||
| 123 | 0x3fbf0782, | ||
| 124 | 0x3f200102, | 112 | 0x3f200102, |
| 125 | 0x3fbe0102, | 113 | 0x3fbe0102, |
| 126 | 0x3fe30102, | 114 | 0x3fe30102, |
| 127 | 0x21a00902, | 115 | 0x21a00902, |
| 128 | 0x40804003, | 116 | 0x40804003, |
| 129 | 0x21a00983, | 117 | 0x21a00983, |
| 130 | 0x21a00a04, | 118 | 0x21a00a05, |
| 131 | 0x40805a02, | 119 | 0x40805a02, |
| 132 | 0x21a00a82, | 120 | 0x21a00a82, |
| 133 | 0x40800083, | 121 | 0x40800083, |
| 134 | 0x21a00b83, | 122 | 0x21a00b83, |
| 135 | 0x01a00c02, | 123 | 0x01a00c02, |
| 136 | 0x01a00d83, | 124 | 0x30809c03, |
| 137 | 0x3420c282, | 125 | 0x34000182, |
| 126 | 0x14004102, | ||
| 127 | 0x21002082, | ||
| 128 | 0x01a00d82, | ||
| 129 | 0x3080a003, | ||
| 130 | 0x34000182, | ||
| 138 | 0x21a00e02, | 131 | 0x21a00e02, |
| 139 | 0x34210283, | 132 | 0x3080a203, |
| 140 | 0x21a00f03, | 133 | 0x34000182, |
| 141 | 0x34200284, | 134 | 0x21a00f02, |
| 142 | 0x77400200, | 135 | 0x3080a403, |
| 143 | 0x3421c282, | 136 | 0x34000182, |
| 137 | 0x77400100, | ||
| 138 | 0x3080a603, | ||
| 139 | 0x34000182, | ||
| 144 | 0x21a00702, | 140 | 0x21a00702, |
| 145 | 0x34218283, | 141 | 0x3080a803, |
| 146 | 0x21a00083, | 142 | 0x34000182, |
| 147 | 0x34214282, | 143 | 0x21a00082, |
| 144 | 0x3080aa03, | ||
| 145 | 0x34000182, | ||
| 148 | 0x21a00b02, | 146 | 0x21a00b02, |
| 149 | 0x4200480c, | 147 | 0x4020007f, |
| 150 | 0x00200000, | 148 | 0x3080ae02, |
| 151 | 0x1c010286, | 149 | 0x42004805, |
| 152 | 0x34220284, | 150 | 0x3080ac04, |
| 153 | 0x34220302, | 151 | 0x34000103, |
| 154 | 0x0f608203, | 152 | 0x34000202, |
| 155 | 0x5c024204, | 153 | 0x1cffc183, |
| 156 | 0x3b81810b, | 154 | 0x3b810106, |
| 157 | 0x42013c02, | 155 | 0x0f608184, |
| 158 | 0x00200000, | 156 | 0x42013802, |
| 159 | 0x18008185, | 157 | 0x5c020183, |
| 160 | 0x38808183, | 158 | 0x38810102, |
| 161 | 0x3b814182, | 159 | 0x3b810102, |
| 162 | 0x21004e84, | 160 | 0x21000e83, |
| 163 | 0x4020007f, | 161 | 0x4020007f, |
| 164 | 0x35000100, | 162 | 0x35000100, |
| 165 | 0x000004e0, | 163 | 0x00000470, |
| 166 | 0x000002a0, | 164 | 0x000002f8, |
| 167 | 0x000002e8, | 165 | 0x00000430, |
| 168 | 0x00000428, | ||
| 169 | 0x00000360, | 166 | 0x00000360, |
| 170 | 0x000002e8, | 167 | 0x000002f8, |
| 171 | 0x000004a0, | ||
| 172 | 0x00000468, | ||
| 173 | 0x000003c8, | 168 | 0x000003c8, |
| 169 | 0x000004a8, | ||
| 170 | 0x00000298, | ||
| 174 | 0x00000360, | 171 | 0x00000360, |
| 172 | 0x00200000, | ||
| 175 | 0x409ffe02, | 173 | 0x409ffe02, |
| 176 | 0x30801203, | 174 | 0x30801203, |
| 177 | 0x40800204, | 175 | 0x40800208, |
| 178 | 0x3ec40085, | 176 | 0x3ec40084, |
| 179 | 0x10009c09, | 177 | 0x40800407, |
| 180 | 0x3ac10606, | 178 | 0x3ac20289, |
| 181 | 0xb060c105, | 179 | 0xb060c104, |
| 182 | 0x4020007f, | 180 | 0x3ac1c284, |
| 183 | 0x4020007f, | ||
| 184 | 0x20801203, | 181 | 0x20801203, |
| 185 | 0x38810602, | 182 | 0x38820282, |
| 186 | 0xb0408586, | 183 | 0x41004003, |
| 187 | 0x28810602, | 184 | 0xb0408189, |
| 188 | 0x32004180, | 185 | 0x28820282, |
| 189 | 0x34204702, | 186 | 0x3881c282, |
| 187 | 0xb0408304, | ||
| 188 | 0x2881c282, | ||
| 189 | 0x00400000, | ||
| 190 | 0x40800003, | ||
| 191 | 0x35000000, | ||
| 192 | 0x30809e03, | ||
| 193 | 0x34000182, | ||
| 190 | 0x21a00382, | 194 | 0x21a00382, |
| 191 | 0x4020007f, | 195 | 0x4020007f, |
| 192 | 0x327fdc80, | 196 | 0x327fde00, |
| 193 | 0x409ffe02, | 197 | 0x409ffe02, |
| 194 | 0x30801203, | 198 | 0x30801203, |
| 195 | 0x40800204, | 199 | 0x40800206, |
| 196 | 0x3ec40087, | 200 | 0x3ec40084, |
| 197 | 0x40800405, | 201 | 0x40800407, |
| 198 | 0x00200000, | 202 | 0x40800608, |
| 199 | 0x40800606, | 203 | 0x3ac1828a, |
| 200 | 0x3ac10608, | 204 | 0x3ac20289, |
| 201 | 0x3ac14609, | 205 | 0xb060c104, |
| 202 | 0x3ac1860a, | 206 | 0x3ac1c284, |
| 203 | 0xb060c107, | ||
| 204 | 0x20801203, | 207 | 0x20801203, |
| 208 | 0x38818282, | ||
| 205 | 0x41004003, | 209 | 0x41004003, |
| 206 | 0x38810602, | 210 | 0xb040818a, |
| 207 | 0x4020007f, | 211 | 0x10005b0b, |
| 208 | 0xb0408188, | 212 | 0x41201003, |
| 209 | 0x4020007f, | 213 | 0x28818282, |
| 210 | 0x28810602, | 214 | 0x3881c282, |
| 211 | 0x41201002, | 215 | 0xb0408184, |
| 212 | 0x38814603, | ||
| 213 | 0x10009c09, | ||
| 214 | 0xb060c109, | ||
| 215 | 0x4020007f, | ||
| 216 | 0x28814603, | ||
| 217 | 0x41193f83, | 216 | 0x41193f83, |
| 218 | 0x38818602, | ||
| 219 | 0x60ffc003, | 217 | 0x60ffc003, |
| 220 | 0xb040818a, | 218 | 0x2881c282, |
| 221 | 0x28818602, | 219 | 0x38820282, |
| 222 | 0x32003080, | 220 | 0xb0408189, |
| 221 | 0x28820282, | ||
| 222 | 0x327fef80, | ||
| 223 | 0x409ffe02, | 223 | 0x409ffe02, |
| 224 | 0x30801203, | 224 | 0x30801203, |
| 225 | 0x40800204, | 225 | 0x40800207, |
| 226 | 0x3ec40087, | 226 | 0x3ec40086, |
| 227 | 0x41201008, | 227 | 0x4120100b, |
| 228 | 0x10009c14, | 228 | 0x10005b14, |
| 229 | 0x40800405, | 229 | 0x40800404, |
| 230 | 0x3ac10609, | 230 | 0x3ac1c289, |
| 231 | 0x40800606, | 231 | 0x40800608, |
| 232 | 0x3ac1460a, | 232 | 0xb060c106, |
| 233 | 0xb060c107, | 233 | 0x3ac10286, |
| 234 | 0x3ac1860b, | 234 | 0x3ac2028a, |
| 235 | 0x20801203, | 235 | 0x20801203, |
| 236 | 0x38810602, | 236 | 0x3881c282, |
| 237 | 0xb0408409, | ||
| 238 | 0x28810602, | ||
| 239 | 0x38814603, | ||
| 240 | 0xb060c40a, | ||
| 241 | 0x4020007f, | ||
| 242 | 0x28814603, | ||
| 243 | 0x41193f83, | 237 | 0x41193f83, |
| 244 | 0x38818602, | ||
| 245 | 0x60ffc003, | 238 | 0x60ffc003, |
| 246 | 0xb040818b, | 239 | 0xb0408589, |
| 247 | 0x28818602, | 240 | 0x2881c282, |
| 248 | 0x32002380, | 241 | 0x38810282, |
| 249 | 0x409ffe02, | 242 | 0xb0408586, |
| 250 | 0x30801204, | 243 | 0x28810282, |
| 251 | 0x40800205, | 244 | 0x38820282, |
| 252 | 0x3ec40083, | 245 | 0xb040818a, |
| 253 | 0x40800406, | 246 | 0x28820282, |
| 254 | 0x3ac14607, | ||
| 255 | 0x3ac18608, | ||
| 256 | 0xb0810103, | ||
| 257 | 0x41004002, | ||
| 258 | 0x20801204, | ||
| 259 | 0x4020007f, | ||
| 260 | 0x38814603, | ||
| 261 | 0x10009c0b, | ||
| 262 | 0xb060c107, | ||
| 263 | 0x4020007f, | ||
| 264 | 0x4020007f, | ||
| 265 | 0x28814603, | ||
| 266 | 0x38818602, | ||
| 267 | 0x4020007f, | ||
| 268 | 0x4020007f, | 247 | 0x4020007f, |
| 269 | 0xb0408588, | 248 | 0x327fe280, |
| 270 | 0x28818602, | 249 | 0x409ffe02, |
| 250 | 0x30801203, | ||
| 251 | 0x40800207, | ||
| 252 | 0x3ec40084, | ||
| 253 | 0x40800408, | ||
| 254 | 0x10005b14, | ||
| 255 | 0x40800609, | ||
| 256 | 0x3ac1c28a, | ||
| 257 | 0x3ac2028b, | ||
| 258 | 0xb060c104, | ||
| 259 | 0x3ac24284, | ||
| 260 | 0x20801203, | ||
| 261 | 0x41201003, | ||
| 262 | 0x3881c282, | ||
| 263 | 0xb040830a, | ||
| 264 | 0x2881c282, | ||
| 265 | 0x38820282, | ||
| 266 | 0xb040818b, | ||
| 267 | 0x41193f83, | ||
| 268 | 0x60ffc003, | ||
| 269 | 0x28820282, | ||
| 270 | 0x38824282, | ||
| 271 | 0xb0408184, | ||
| 272 | 0x28824282, | ||
| 271 | 0x4020007f, | 273 | 0x4020007f, |
| 272 | 0x32001780, | 274 | 0x327fd580, |
| 273 | 0x409ffe02, | 275 | 0x409ffe02, |
| 274 | 0x1000640e, | 276 | 0x1000658e, |
| 275 | 0x40800204, | 277 | 0x40800206, |
| 276 | 0x30801203, | 278 | 0x30801203, |
| 277 | 0x40800405, | 279 | 0x40800407, |
| 278 | 0x3ec40087, | 280 | 0x3ec40084, |
| 279 | 0x40800606, | 281 | 0x40800608, |
| 280 | 0x3ac10608, | 282 | 0x3ac1828a, |
| 281 | 0x3ac14609, | 283 | 0x3ac20289, |
| 282 | 0x3ac1860a, | 284 | 0xb060c104, |
| 283 | 0xb060c107, | 285 | 0x3ac1c284, |
| 284 | 0x20801203, | 286 | 0x20801203, |
| 285 | 0x413d8003, | 287 | 0x413d8003, |
| 286 | 0x38810602, | 288 | 0x38818282, |
| 287 | 0x4020007f, | 289 | 0x4020007f, |
| 288 | 0x327fd780, | 290 | 0x327fd800, |
| 289 | 0x409ffe02, | 291 | 0x409ffe03, |
| 290 | 0x10007f0c, | 292 | 0x30801202, |
| 291 | 0x40800205, | 293 | 0x40800207, |
| 292 | 0x30801204, | 294 | 0x3ec40084, |
| 293 | 0x40800406, | 295 | 0x10005b09, |
| 294 | 0x3ec40083, | 296 | 0x3ac1c288, |
| 295 | 0x3ac14607, | 297 | 0xb0408184, |
| 296 | 0x3ac18608, | ||
| 297 | 0xb0810103, | ||
| 298 | 0x413d8002, | ||
| 299 | 0x20801204, | ||
| 300 | 0x38814603, | ||
| 301 | 0x4020007f, | 298 | 0x4020007f, |
| 302 | 0x327feb80, | 299 | 0x4020007f, |
| 300 | 0x20801202, | ||
| 301 | 0x3881c282, | ||
| 302 | 0xb0408308, | ||
| 303 | 0x2881c282, | ||
| 304 | 0x327fc680, | ||
| 303 | 0x409ffe02, | 305 | 0x409ffe02, |
| 306 | 0x1000588b, | ||
| 307 | 0x40800208, | ||
| 304 | 0x30801203, | 308 | 0x30801203, |
| 305 | 0x40800204, | 309 | 0x40800407, |
| 306 | 0x3ec40087, | 310 | 0x3ec40084, |
| 307 | 0x40800405, | 311 | 0x3ac20289, |
| 308 | 0x1000650a, | 312 | 0xb060c104, |
| 309 | 0x40800606, | 313 | 0x3ac1c284, |
| 310 | 0x3ac10608, | ||
| 311 | 0x3ac14609, | ||
| 312 | 0x3ac1860a, | ||
| 313 | 0xb060c107, | ||
| 314 | 0x20801203, | 314 | 0x20801203, |
| 315 | 0x38810602, | 315 | 0x413d8003, |
| 316 | 0xb0408588, | 316 | 0x38820282, |
| 317 | 0x4020007f, | 317 | 0x327fbd80, |
| 318 | 0x327fc980, | 318 | 0x00200000, |
| 319 | 0x00400000, | 319 | 0x00000da0, |
| 320 | 0x40800003, | 320 | 0x00000000, |
| 321 | 0x4020007f, | 321 | 0x00000000, |
| 322 | 0x35000000, | 322 | 0x00000000, |
| 323 | 0x00000d90, | ||
| 324 | 0x00000000, | ||
| 325 | 0x00000000, | ||
| 326 | 0x00000000, | ||
| 327 | 0x00000db0, | ||
| 328 | 0x00000000, | ||
| 329 | 0x00000000, | ||
| 330 | 0x00000000, | ||
| 331 | 0x00000dc0, | ||
| 332 | 0x00000000, | ||
| 333 | 0x00000000, | ||
| 334 | 0x00000000, | ||
| 335 | 0x00000d80, | ||
| 336 | 0x00000000, | ||
| 337 | 0x00000000, | ||
| 338 | 0x00000000, | ||
| 339 | 0x00000df0, | ||
| 340 | 0x00000000, | ||
| 341 | 0x00000000, | ||
| 342 | 0x00000000, | ||
| 343 | 0x00000de0, | ||
| 344 | 0x00000000, | ||
| 345 | 0x00000000, | ||
| 346 | 0x00000000, | ||
| 347 | 0x00000dd0, | ||
| 348 | 0x00000000, | ||
| 349 | 0x00000000, | ||
| 350 | 0x00000000, | ||
| 351 | 0x00000e04, | ||
| 352 | 0x00000000, | ||
| 353 | 0x00000000, | ||
| 323 | 0x00000000, | 354 | 0x00000000, |
| 355 | 0x00000e00, | ||
| 324 | 0x00000000, | 356 | 0x00000000, |
| 325 | 0x00000000, | 357 | 0x00000000, |
| 326 | 0x00000000, | 358 | 0x00000000, |
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index 08b3530288ac..8b20c0c1556f 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h | |||
| @@ -40,17 +40,13 @@ enum { | |||
| 40 | struct spu_context_ops; | 40 | struct spu_context_ops; |
| 41 | struct spu_gang; | 41 | struct spu_gang; |
| 42 | 42 | ||
| 43 | /* | 43 | enum { |
| 44 | * This is the state for spu utilization reporting to userspace. | 44 | SPU_SCHED_WAS_ACTIVE, /* was active upon spu_acquire_saved() */ |
| 45 | * Because this state is visible to userspace it must never change and needs | 45 | }; |
| 46 | * to be kept strictly separate from any internal state kept by the kernel. | 46 | |
| 47 | */ | 47 | /* ctx->sched_flags */ |
| 48 | enum spuctx_execution_state { | 48 | enum { |
| 49 | SPUCTX_UTIL_USER = 0, | 49 | SPU_SCHED_NOTIFY_ACTIVE, |
| 50 | SPUCTX_UTIL_SYSTEM, | ||
| 51 | SPUCTX_UTIL_IOWAIT, | ||
| 52 | SPUCTX_UTIL_LOADED, | ||
| 53 | SPUCTX_UTIL_MAX | ||
| 54 | }; | 50 | }; |
| 55 | 51 | ||
| 56 | struct spu_context { | 52 | struct spu_context { |
| @@ -89,6 +85,8 @@ struct spu_context { | |||
| 89 | 85 | ||
| 90 | struct list_head gang_list; | 86 | struct list_head gang_list; |
| 91 | struct spu_gang *gang; | 87 | struct spu_gang *gang; |
| 88 | struct kref *prof_priv_kref; | ||
| 89 | void ( * prof_priv_release) (struct kref *kref); | ||
| 92 | 90 | ||
| 93 | /* owner thread */ | 91 | /* owner thread */ |
| 94 | pid_t tid; | 92 | pid_t tid; |
| @@ -104,9 +102,9 @@ struct spu_context { | |||
| 104 | /* statistics */ | 102 | /* statistics */ |
| 105 | struct { | 103 | struct { |
| 106 | /* updates protected by ctx->state_mutex */ | 104 | /* updates protected by ctx->state_mutex */ |
| 107 | enum spuctx_execution_state execution_state; | 105 | enum spu_utilization_state util_state; |
| 108 | unsigned long tstamp; /* time of last ctx switch */ | 106 | unsigned long long tstamp; /* time of last state switch */ |
| 109 | unsigned long times[SPUCTX_UTIL_MAX]; | 107 | unsigned long long times[SPU_UTIL_MAX]; |
| 110 | unsigned long long vol_ctx_switch; | 108 | unsigned long long vol_ctx_switch; |
| 111 | unsigned long long invol_ctx_switch; | 109 | unsigned long long invol_ctx_switch; |
| 112 | unsigned long long min_flt; | 110 | unsigned long long min_flt; |
| @@ -118,6 +116,10 @@ struct spu_context { | |||
| 118 | unsigned long long class2_intr_base; /* # at last ctx switch */ | 116 | unsigned long long class2_intr_base; /* # at last ctx switch */ |
| 119 | unsigned long long libassist; | 117 | unsigned long long libassist; |
| 120 | } stats; | 118 | } stats; |
| 119 | |||
| 120 | struct list_head aff_list; | ||
| 121 | int aff_head; | ||
| 122 | int aff_offset; | ||
| 121 | }; | 123 | }; |
| 122 | 124 | ||
| 123 | struct spu_gang { | 125 | struct spu_gang { |
| @@ -125,8 +127,19 @@ struct spu_gang { | |||
| 125 | struct mutex mutex; | 127 | struct mutex mutex; |
| 126 | struct kref kref; | 128 | struct kref kref; |
| 127 | int contexts; | 129 | int contexts; |
| 130 | |||
| 131 | struct spu_context *aff_ref_ctx; | ||
| 132 | struct list_head aff_list_head; | ||
| 133 | struct mutex aff_mutex; | ||
| 134 | int aff_flags; | ||
| 135 | struct spu *aff_ref_spu; | ||
| 136 | atomic_t aff_sched_count; | ||
| 128 | }; | 137 | }; |
| 129 | 138 | ||
| 139 | /* Flag bits for spu_gang aff_flags */ | ||
| 140 | #define AFF_OFFSETS_SET 1 | ||
| 141 | #define AFF_MERGED 2 | ||
| 142 | |||
| 130 | struct mfc_dma_command { | 143 | struct mfc_dma_command { |
| 131 | int32_t pad; /* reserved */ | 144 | int32_t pad; /* reserved */ |
| 132 | uint32_t lsa; /* local storage address */ | 145 | uint32_t lsa; /* local storage address */ |
| @@ -190,10 +203,9 @@ extern struct tree_descr spufs_dir_contents[]; | |||
| 190 | extern struct tree_descr spufs_dir_nosched_contents[]; | 203 | extern struct tree_descr spufs_dir_nosched_contents[]; |
| 191 | 204 | ||
| 192 | /* system call implementation */ | 205 | /* system call implementation */ |
| 193 | long spufs_run_spu(struct file *file, | 206 | long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status); |
| 194 | struct spu_context *ctx, u32 *npc, u32 *status); | 207 | long spufs_create(struct nameidata *nd, unsigned int flags, |
| 195 | long spufs_create(struct nameidata *nd, | 208 | mode_t mode, struct file *filp); |
| 196 | unsigned int flags, mode_t mode); | ||
| 197 | extern const struct file_operations spufs_context_fops; | 209 | extern const struct file_operations spufs_context_fops; |
| 198 | 210 | ||
| 199 | /* gang management */ | 211 | /* gang management */ |
| @@ -206,6 +218,9 @@ void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx); | |||
| 206 | /* fault handling */ | 218 | /* fault handling */ |
| 207 | int spufs_handle_class1(struct spu_context *ctx); | 219 | int spufs_handle_class1(struct spu_context *ctx); |
| 208 | 220 | ||
| 221 | /* affinity */ | ||
| 222 | struct spu *affinity_check(struct spu_context *ctx); | ||
| 223 | |||
| 209 | /* context management */ | 224 | /* context management */ |
| 210 | extern atomic_t nr_spu_contexts; | 225 | extern atomic_t nr_spu_contexts; |
| 211 | static inline void spu_acquire(struct spu_context *ctx) | 226 | static inline void spu_acquire(struct spu_context *ctx) |
| @@ -227,15 +242,17 @@ void spu_unmap_mappings(struct spu_context *ctx); | |||
| 227 | void spu_forget(struct spu_context *ctx); | 242 | void spu_forget(struct spu_context *ctx); |
| 228 | int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags); | 243 | int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags); |
| 229 | void spu_acquire_saved(struct spu_context *ctx); | 244 | void spu_acquire_saved(struct spu_context *ctx); |
| 245 | void spu_release_saved(struct spu_context *ctx); | ||
| 230 | 246 | ||
| 231 | int spu_activate(struct spu_context *ctx, unsigned long flags); | 247 | int spu_activate(struct spu_context *ctx, unsigned long flags); |
| 232 | void spu_deactivate(struct spu_context *ctx); | 248 | void spu_deactivate(struct spu_context *ctx); |
| 233 | void spu_yield(struct spu_context *ctx); | 249 | void spu_yield(struct spu_context *ctx); |
| 250 | void spu_switch_notify(struct spu *spu, struct spu_context *ctx); | ||
| 234 | void spu_set_timeslice(struct spu_context *ctx); | 251 | void spu_set_timeslice(struct spu_context *ctx); |
| 235 | void spu_update_sched_info(struct spu_context *ctx); | 252 | void spu_update_sched_info(struct spu_context *ctx); |
| 236 | void __spu_update_sched_info(struct spu_context *ctx); | 253 | void __spu_update_sched_info(struct spu_context *ctx); |
| 237 | int __init spu_sched_init(void); | 254 | int __init spu_sched_init(void); |
| 238 | void __exit spu_sched_exit(void); | 255 | void spu_sched_exit(void); |
| 239 | 256 | ||
| 240 | extern char *isolated_loader; | 257 | extern char *isolated_loader; |
| 241 | 258 | ||
| @@ -293,30 +310,34 @@ extern int spufs_coredump_num_notes; | |||
| 293 | * line. | 310 | * line. |
| 294 | */ | 311 | */ |
| 295 | static inline void spuctx_switch_state(struct spu_context *ctx, | 312 | static inline void spuctx_switch_state(struct spu_context *ctx, |
| 296 | enum spuctx_execution_state new_state) | 313 | enum spu_utilization_state new_state) |
| 297 | { | 314 | { |
| 298 | WARN_ON(!mutex_is_locked(&ctx->state_mutex)); | 315 | unsigned long long curtime; |
| 299 | 316 | signed long long delta; | |
| 300 | if (ctx->stats.execution_state != new_state) { | 317 | struct timespec ts; |
| 301 | unsigned long curtime = jiffies; | 318 | struct spu *spu; |
| 302 | 319 | enum spu_utilization_state old_state; | |
| 303 | ctx->stats.times[ctx->stats.execution_state] += | ||
| 304 | curtime - ctx->stats.tstamp; | ||
| 305 | ctx->stats.tstamp = curtime; | ||
| 306 | ctx->stats.execution_state = new_state; | ||
| 307 | } | ||
| 308 | } | ||
| 309 | 320 | ||
| 310 | static inline void spu_switch_state(struct spu *spu, | 321 | ktime_get_ts(&ts); |
| 311 | enum spuctx_execution_state new_state) | 322 | curtime = timespec_to_ns(&ts); |
| 312 | { | 323 | delta = curtime - ctx->stats.tstamp; |
| 313 | if (spu->stats.utilization_state != new_state) { | ||
| 314 | unsigned long curtime = jiffies; | ||
| 315 | 324 | ||
| 316 | spu->stats.times[spu->stats.utilization_state] += | 325 | WARN_ON(!mutex_is_locked(&ctx->state_mutex)); |
| 317 | curtime - spu->stats.tstamp; | 326 | WARN_ON(delta < 0); |
| 327 | |||
| 328 | spu = ctx->spu; | ||
| 329 | old_state = ctx->stats.util_state; | ||
| 330 | ctx->stats.util_state = new_state; | ||
| 331 | ctx->stats.tstamp = curtime; | ||
| 332 | |||
| 333 | /* | ||
| 334 | * Update the physical SPU utilization statistics. | ||
| 335 | */ | ||
| 336 | if (spu) { | ||
| 337 | ctx->stats.times[old_state] += delta; | ||
| 338 | spu->stats.times[old_state] += delta; | ||
| 339 | spu->stats.util_state = new_state; | ||
| 318 | spu->stats.tstamp = curtime; | 340 | spu->stats.tstamp = curtime; |
| 319 | spu->stats.utilization_state = new_state; | ||
| 320 | } | 341 | } |
| 321 | } | 342 | } |
| 322 | 343 | ||
diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c index 9c506ba08cdc..27ffdae98e5a 100644 --- a/arch/powerpc/platforms/cell/spufs/switch.c +++ b/arch/powerpc/platforms/cell/spufs/switch.c | |||
| @@ -180,7 +180,7 @@ static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu) | |||
| 180 | case MFC_CNTL_SUSPEND_COMPLETE: | 180 | case MFC_CNTL_SUSPEND_COMPLETE: |
| 181 | if (csa) { | 181 | if (csa) { |
| 182 | csa->priv2.mfc_control_RW = | 182 | csa->priv2.mfc_control_RW = |
| 183 | in_be64(&priv2->mfc_control_RW) | | 183 | MFC_CNTL_SUSPEND_MASK | |
| 184 | MFC_CNTL_SUSPEND_DMA_QUEUE; | 184 | MFC_CNTL_SUSPEND_DMA_QUEUE; |
| 185 | } | 185 | } |
| 186 | break; | 186 | break; |
| @@ -190,9 +190,7 @@ static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu) | |||
| 190 | MFC_CNTL_SUSPEND_DMA_STATUS_MASK) == | 190 | MFC_CNTL_SUSPEND_DMA_STATUS_MASK) == |
| 191 | MFC_CNTL_SUSPEND_COMPLETE); | 191 | MFC_CNTL_SUSPEND_COMPLETE); |
| 192 | if (csa) { | 192 | if (csa) { |
| 193 | csa->priv2.mfc_control_RW = | 193 | csa->priv2.mfc_control_RW = 0; |
| 194 | in_be64(&priv2->mfc_control_RW) & | ||
| 195 | ~MFC_CNTL_SUSPEND_DMA_QUEUE; | ||
| 196 | } | 194 | } |
| 197 | break; | 195 | break; |
| 198 | } | 196 | } |
| @@ -251,16 +249,8 @@ static inline void save_mfc_decr(struct spu_state *csa, struct spu *spu) | |||
| 251 | * Read MFC_CNTL[Ds]. Update saved copy of | 249 | * Read MFC_CNTL[Ds]. Update saved copy of |
| 252 | * CSA.MFC_CNTL[Ds]. | 250 | * CSA.MFC_CNTL[Ds]. |
| 253 | */ | 251 | */ |
| 254 | if (in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DECREMENTER_RUNNING) { | 252 | csa->priv2.mfc_control_RW |= |
| 255 | csa->priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING; | 253 | in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DECREMENTER_RUNNING; |
| 256 | csa->suspend_time = get_cycles(); | ||
| 257 | out_be64(&priv2->spu_chnlcntptr_RW, 7ULL); | ||
| 258 | eieio(); | ||
| 259 | csa->spu_chnldata_RW[7] = in_be64(&priv2->spu_chnldata_RW); | ||
| 260 | eieio(); | ||
| 261 | } else { | ||
| 262 | csa->priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING; | ||
| 263 | } | ||
| 264 | } | 254 | } |
| 265 | 255 | ||
| 266 | static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu) | 256 | static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu) |
| @@ -271,7 +261,8 @@ static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu) | |||
| 271 | * Write MFC_CNTL[Dh] set to a '1' to halt | 261 | * Write MFC_CNTL[Dh] set to a '1' to halt |
| 272 | * the decrementer. | 262 | * the decrementer. |
| 273 | */ | 263 | */ |
| 274 | out_be64(&priv2->mfc_control_RW, MFC_CNTL_DECREMENTER_HALTED); | 264 | out_be64(&priv2->mfc_control_RW, |
| 265 | MFC_CNTL_DECREMENTER_HALTED | MFC_CNTL_SUSPEND_MASK); | ||
| 275 | eieio(); | 266 | eieio(); |
| 276 | } | 267 | } |
| 277 | 268 | ||
| @@ -615,7 +606,7 @@ static inline void save_ppuint_mb(struct spu_state *csa, struct spu *spu) | |||
| 615 | static inline void save_ch_part1(struct spu_state *csa, struct spu *spu) | 606 | static inline void save_ch_part1(struct spu_state *csa, struct spu *spu) |
| 616 | { | 607 | { |
| 617 | struct spu_priv2 __iomem *priv2 = spu->priv2; | 608 | struct spu_priv2 __iomem *priv2 = spu->priv2; |
| 618 | u64 idx, ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; | 609 | u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; |
| 619 | int i; | 610 | int i; |
| 620 | 611 | ||
| 621 | /* Save, Step 42: | 612 | /* Save, Step 42: |
| @@ -626,7 +617,7 @@ static inline void save_ch_part1(struct spu_state *csa, struct spu *spu) | |||
| 626 | csa->spu_chnldata_RW[1] = in_be64(&priv2->spu_chnldata_RW); | 617 | csa->spu_chnldata_RW[1] = in_be64(&priv2->spu_chnldata_RW); |
| 627 | 618 | ||
| 628 | /* Save the following CH: [0,3,4,24,25,27] */ | 619 | /* Save the following CH: [0,3,4,24,25,27] */ |
| 629 | for (i = 0; i < 7; i++) { | 620 | for (i = 0; i < ARRAY_SIZE(ch_indices); i++) { |
| 630 | idx = ch_indices[i]; | 621 | idx = ch_indices[i]; |
| 631 | out_be64(&priv2->spu_chnlcntptr_RW, idx); | 622 | out_be64(&priv2->spu_chnlcntptr_RW, idx); |
| 632 | eieio(); | 623 | eieio(); |
| @@ -983,13 +974,13 @@ static inline void terminate_spu_app(struct spu_state *csa, struct spu *spu) | |||
| 983 | */ | 974 | */ |
| 984 | } | 975 | } |
| 985 | 976 | ||
| 986 | static inline void suspend_mfc(struct spu_state *csa, struct spu *spu) | 977 | static inline void suspend_mfc_and_halt_decr(struct spu_state *csa, |
| 978 | struct spu *spu) | ||
| 987 | { | 979 | { |
| 988 | struct spu_priv2 __iomem *priv2 = spu->priv2; | 980 | struct spu_priv2 __iomem *priv2 = spu->priv2; |
| 989 | 981 | ||
| 990 | /* Restore, Step 7: | 982 | /* Restore, Step 7: |
| 991 | * Restore, Step 47. | 983 | * Write MFC_Cntl[Dh,Sc,Sm]='1','1','0' to suspend |
| 992 | * Write MFC_Cntl[Dh,Sc]='1','1' to suspend | ||
| 993 | * the queue and halt the decrementer. | 984 | * the queue and halt the decrementer. |
| 994 | */ | 985 | */ |
| 995 | out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE | | 986 | out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE | |
| @@ -1090,7 +1081,7 @@ static inline void clear_spu_status(struct spu_state *csa, struct spu *spu) | |||
| 1090 | static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu) | 1081 | static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu) |
| 1091 | { | 1082 | { |
| 1092 | struct spu_priv2 __iomem *priv2 = spu->priv2; | 1083 | struct spu_priv2 __iomem *priv2 = spu->priv2; |
| 1093 | u64 ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; | 1084 | u64 ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; |
| 1094 | u64 idx; | 1085 | u64 idx; |
| 1095 | int i; | 1086 | int i; |
| 1096 | 1087 | ||
| @@ -1102,7 +1093,7 @@ static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu) | |||
| 1102 | out_be64(&priv2->spu_chnldata_RW, 0UL); | 1093 | out_be64(&priv2->spu_chnldata_RW, 0UL); |
| 1103 | 1094 | ||
| 1104 | /* Reset the following CH: [0,3,4,24,25,27] */ | 1095 | /* Reset the following CH: [0,3,4,24,25,27] */ |
| 1105 | for (i = 0; i < 7; i++) { | 1096 | for (i = 0; i < ARRAY_SIZE(ch_indices); i++) { |
| 1106 | idx = ch_indices[i]; | 1097 | idx = ch_indices[i]; |
| 1107 | out_be64(&priv2->spu_chnlcntptr_RW, idx); | 1098 | out_be64(&priv2->spu_chnlcntptr_RW, idx); |
| 1108 | eieio(); | 1099 | eieio(); |
| @@ -1289,7 +1280,15 @@ static inline void setup_decr(struct spu_state *csa, struct spu *spu) | |||
| 1289 | cycles_t resume_time = get_cycles(); | 1280 | cycles_t resume_time = get_cycles(); |
| 1290 | cycles_t delta_time = resume_time - csa->suspend_time; | 1281 | cycles_t delta_time = resume_time - csa->suspend_time; |
| 1291 | 1282 | ||
| 1283 | csa->lscsa->decr_status.slot[0] = SPU_DECR_STATUS_RUNNING; | ||
| 1284 | if (csa->lscsa->decr.slot[0] < delta_time) { | ||
| 1285 | csa->lscsa->decr_status.slot[0] |= | ||
| 1286 | SPU_DECR_STATUS_WRAPPED; | ||
| 1287 | } | ||
| 1288 | |||
| 1292 | csa->lscsa->decr.slot[0] -= delta_time; | 1289 | csa->lscsa->decr.slot[0] -= delta_time; |
| 1290 | } else { | ||
| 1291 | csa->lscsa->decr_status.slot[0] = 0; | ||
| 1293 | } | 1292 | } |
| 1294 | } | 1293 | } |
| 1295 | 1294 | ||
| @@ -1398,6 +1397,18 @@ static inline void restore_ls_16kb(struct spu_state *csa, struct spu *spu) | |||
| 1398 | send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd); | 1397 | send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd); |
| 1399 | } | 1398 | } |
| 1400 | 1399 | ||
| 1400 | static inline void suspend_mfc(struct spu_state *csa, struct spu *spu) | ||
| 1401 | { | ||
| 1402 | struct spu_priv2 __iomem *priv2 = spu->priv2; | ||
| 1403 | |||
| 1404 | /* Restore, Step 47. | ||
| 1405 | * Write MFC_Cntl[Sc,Sm]='1','0' to suspend | ||
| 1406 | * the queue. | ||
| 1407 | */ | ||
| 1408 | out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE); | ||
| 1409 | eieio(); | ||
| 1410 | } | ||
| 1411 | |||
| 1401 | static inline void clear_interrupts(struct spu_state *csa, struct spu *spu) | 1412 | static inline void clear_interrupts(struct spu_state *csa, struct spu *spu) |
| 1402 | { | 1413 | { |
| 1403 | /* Restore, Step 49: | 1414 | /* Restore, Step 49: |
| @@ -1548,10 +1559,10 @@ static inline void restore_decr_wrapped(struct spu_state *csa, struct spu *spu) | |||
| 1548 | * "wrapped" flag is set, OR in a '1' to | 1559 | * "wrapped" flag is set, OR in a '1' to |
| 1549 | * CSA.SPU_Event_Status[Tm]. | 1560 | * CSA.SPU_Event_Status[Tm]. |
| 1550 | */ | 1561 | */ |
| 1551 | if (csa->lscsa->decr_status.slot[0] == 1) { | 1562 | if (csa->lscsa->decr_status.slot[0] & SPU_DECR_STATUS_WRAPPED) { |
| 1552 | csa->spu_chnldata_RW[0] |= 0x20; | 1563 | csa->spu_chnldata_RW[0] |= 0x20; |
| 1553 | } | 1564 | } |
| 1554 | if ((csa->lscsa->decr_status.slot[0] == 1) && | 1565 | if ((csa->lscsa->decr_status.slot[0] & SPU_DECR_STATUS_WRAPPED) && |
| 1555 | (csa->spu_chnlcnt_RW[0] == 0 && | 1566 | (csa->spu_chnlcnt_RW[0] == 0 && |
| 1556 | ((csa->spu_chnldata_RW[2] & 0x20) == 0x0) && | 1567 | ((csa->spu_chnldata_RW[2] & 0x20) == 0x0) && |
| 1557 | ((csa->spu_chnldata_RW[0] & 0x20) != 0x1))) { | 1568 | ((csa->spu_chnldata_RW[0] & 0x20) != 0x1))) { |
| @@ -1562,18 +1573,13 @@ static inline void restore_decr_wrapped(struct spu_state *csa, struct spu *spu) | |||
| 1562 | static inline void restore_ch_part1(struct spu_state *csa, struct spu *spu) | 1573 | static inline void restore_ch_part1(struct spu_state *csa, struct spu *spu) |
| 1563 | { | 1574 | { |
| 1564 | struct spu_priv2 __iomem *priv2 = spu->priv2; | 1575 | struct spu_priv2 __iomem *priv2 = spu->priv2; |
| 1565 | u64 idx, ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; | 1576 | u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; |
| 1566 | int i; | 1577 | int i; |
| 1567 | 1578 | ||
| 1568 | /* Restore, Step 59: | 1579 | /* Restore, Step 59: |
| 1580 | * Restore the following CH: [0,3,4,24,25,27] | ||
| 1569 | */ | 1581 | */ |
| 1570 | 1582 | for (i = 0; i < ARRAY_SIZE(ch_indices); i++) { | |
| 1571 | /* Restore CH 1 without count */ | ||
| 1572 | out_be64(&priv2->spu_chnlcntptr_RW, 1); | ||
| 1573 | out_be64(&priv2->spu_chnldata_RW, csa->spu_chnldata_RW[1]); | ||
| 1574 | |||
| 1575 | /* Restore the following CH: [0,3,4,24,25,27] */ | ||
| 1576 | for (i = 0; i < 7; i++) { | ||
| 1577 | idx = ch_indices[i]; | 1583 | idx = ch_indices[i]; |
| 1578 | out_be64(&priv2->spu_chnlcntptr_RW, idx); | 1584 | out_be64(&priv2->spu_chnlcntptr_RW, idx); |
| 1579 | eieio(); | 1585 | eieio(); |
| @@ -1932,7 +1938,7 @@ static void harvest(struct spu_state *prev, struct spu *spu) | |||
| 1932 | set_switch_pending(prev, spu); /* Step 5. */ | 1938 | set_switch_pending(prev, spu); /* Step 5. */ |
| 1933 | stop_spu_isolate(spu); /* NEW. */ | 1939 | stop_spu_isolate(spu); /* NEW. */ |
| 1934 | remove_other_spu_access(prev, spu); /* Step 6. */ | 1940 | remove_other_spu_access(prev, spu); /* Step 6. */ |
| 1935 | suspend_mfc(prev, spu); /* Step 7. */ | 1941 | suspend_mfc_and_halt_decr(prev, spu); /* Step 7. */ |
| 1936 | wait_suspend_mfc_complete(prev, spu); /* Step 8. */ | 1942 | wait_suspend_mfc_complete(prev, spu); /* Step 8. */ |
| 1937 | if (!suspend_spe(prev, spu)) /* Step 9. */ | 1943 | if (!suspend_spe(prev, spu)) /* Step 9. */ |
| 1938 | clear_spu_status(prev, spu); /* Step 10. */ | 1944 | clear_spu_status(prev, spu); /* Step 10. */ |
diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c index 8e37bdf4dfda..43f0fb88abbc 100644 --- a/arch/powerpc/platforms/cell/spufs/syscalls.c +++ b/arch/powerpc/platforms/cell/spufs/syscalls.c | |||
| @@ -47,7 +47,7 @@ static long do_spu_run(struct file *filp, | |||
| 47 | goto out; | 47 | goto out; |
| 48 | 48 | ||
| 49 | i = SPUFS_I(filp->f_path.dentry->d_inode); | 49 | i = SPUFS_I(filp->f_path.dentry->d_inode); |
| 50 | ret = spufs_run_spu(filp, i->i_ctx, &npc, &status); | 50 | ret = spufs_run_spu(i->i_ctx, &npc, &status); |
| 51 | 51 | ||
| 52 | if (put_user(npc, unpc)) | 52 | if (put_user(npc, unpc)) |
| 53 | ret = -EFAULT; | 53 | ret = -EFAULT; |
| @@ -76,8 +76,8 @@ asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus) | |||
| 76 | } | 76 | } |
| 77 | #endif | 77 | #endif |
| 78 | 78 | ||
| 79 | asmlinkage long sys_spu_create(const char __user *pathname, | 79 | asmlinkage long do_spu_create(const char __user *pathname, unsigned int flags, |
| 80 | unsigned int flags, mode_t mode) | 80 | mode_t mode, struct file *neighbor) |
| 81 | { | 81 | { |
| 82 | char *tmp; | 82 | char *tmp; |
| 83 | int ret; | 83 | int ret; |
| @@ -90,7 +90,7 @@ asmlinkage long sys_spu_create(const char __user *pathname, | |||
| 90 | ret = path_lookup(tmp, LOOKUP_PARENT| | 90 | ret = path_lookup(tmp, LOOKUP_PARENT| |
| 91 | LOOKUP_OPEN|LOOKUP_CREATE, &nd); | 91 | LOOKUP_OPEN|LOOKUP_CREATE, &nd); |
| 92 | if (!ret) { | 92 | if (!ret) { |
| 93 | ret = spufs_create(&nd, flags, mode); | 93 | ret = spufs_create(&nd, flags, mode, neighbor); |
| 94 | path_release(&nd); | 94 | path_release(&nd); |
| 95 | } | 95 | } |
| 96 | putname(tmp); | 96 | putname(tmp); |
| @@ -99,8 +99,32 @@ asmlinkage long sys_spu_create(const char __user *pathname, | |||
| 99 | return ret; | 99 | return ret; |
| 100 | } | 100 | } |
| 101 | 101 | ||
| 102 | #ifndef MODULE | ||
| 103 | asmlinkage long sys_spu_create(const char __user *pathname, unsigned int flags, | ||
| 104 | mode_t mode, int neighbor_fd) | ||
| 105 | { | ||
| 106 | int fput_needed; | ||
| 107 | struct file *neighbor; | ||
| 108 | long ret; | ||
| 109 | |||
| 110 | if (flags & SPU_CREATE_AFFINITY_SPU) { | ||
| 111 | ret = -EBADF; | ||
| 112 | neighbor = fget_light(neighbor_fd, &fput_needed); | ||
| 113 | if (neighbor) { | ||
| 114 | ret = do_spu_create(pathname, flags, mode, neighbor); | ||
| 115 | fput_light(neighbor, fput_needed); | ||
| 116 | } | ||
| 117 | } | ||
| 118 | else { | ||
| 119 | ret = do_spu_create(pathname, flags, mode, NULL); | ||
| 120 | } | ||
| 121 | |||
| 122 | return ret; | ||
| 123 | } | ||
| 124 | #endif | ||
| 125 | |||
| 102 | struct spufs_calls spufs_calls = { | 126 | struct spufs_calls spufs_calls = { |
| 103 | .create_thread = sys_spu_create, | 127 | .create_thread = do_spu_create, |
| 104 | .spu_run = do_spu_run, | 128 | .spu_run = do_spu_run, |
| 105 | .owner = THIS_MODULE, | 129 | .owner = THIS_MODULE, |
| 106 | }; | 130 | }; |
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index f65078c3d3b3..484eb4e0e9db 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile | |||
| @@ -17,6 +17,7 @@ obj-$(CONFIG_QUICC_ENGINE) += qe_lib/ | |||
| 17 | mv64x60-$(CONFIG_PCI) += mv64x60_pci.o | 17 | mv64x60-$(CONFIG_PCI) += mv64x60_pci.o |
| 18 | obj-$(CONFIG_MV64X60) += $(mv64x60-y) mv64x60_pic.o mv64x60_dev.o | 18 | obj-$(CONFIG_MV64X60) += $(mv64x60-y) mv64x60_pic.o mv64x60_dev.o |
| 19 | obj-$(CONFIG_RTC_DRV_CMOS) += rtc_cmos_setup.o | 19 | obj-$(CONFIG_RTC_DRV_CMOS) += rtc_cmos_setup.o |
| 20 | obj-$(CONFIG_AXON_RAM) += axonram.o | ||
| 20 | 21 | ||
| 21 | # contains only the suspend handler for time | 22 | # contains only the suspend handler for time |
| 22 | ifeq ($(CONFIG_RTC_CLASS),) | 23 | ifeq ($(CONFIG_RTC_CLASS),) |
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c new file mode 100644 index 000000000000..2326d5dc5752 --- /dev/null +++ b/arch/powerpc/sysdev/axonram.c | |||
| @@ -0,0 +1,381 @@ | |||
| 1 | /* | ||
| 2 | * (C) Copyright IBM Deutschland Entwicklung GmbH 2006 | ||
| 3 | * | ||
| 4 | * Author: Maxim Shchetynin <maxim@de.ibm.com> | ||
| 5 | * | ||
| 6 | * Axon DDR2 device driver. | ||
| 7 | * It registers one block device per Axon's DDR2 memory bank found on a system. | ||
| 8 | * Block devices are called axonram?, their major and minor numbers are | ||
| 9 | * available in /proc/devices, /proc/partitions or in /sys/block/axonram?/dev. | ||
| 10 | * | ||
| 11 | * This program is free software; you can redistribute it and/or modify | ||
| 12 | * it under the terms of the GNU General Public License as published by | ||
| 13 | * the Free Software Foundation; either version 2, or (at your option) | ||
| 14 | * any later version. | ||
| 15 | * | ||
| 16 | * This program is distributed in the hope that it will be useful, | ||
| 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 19 | * GNU General Public License for more details. | ||
| 20 | * | ||
| 21 | * You should have received a copy of the GNU General Public License | ||
| 22 | * along with this program; if not, write to the Free Software | ||
| 23 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
| 24 | */ | ||
| 25 | |||
| 26 | #include <linux/bio.h> | ||
| 27 | #include <linux/blkdev.h> | ||
| 28 | #include <linux/buffer_head.h> | ||
| 29 | #include <linux/device.h> | ||
| 30 | #include <linux/errno.h> | ||
| 31 | #include <linux/fs.h> | ||
| 32 | #include <linux/genhd.h> | ||
| 33 | #include <linux/interrupt.h> | ||
| 34 | #include <linux/io.h> | ||
| 35 | #include <linux/ioport.h> | ||
| 36 | #include <linux/irq.h> | ||
| 37 | #include <linux/irqreturn.h> | ||
| 38 | #include <linux/kernel.h> | ||
| 39 | #include <linux/mm.h> | ||
| 40 | #include <linux/mod_devicetable.h> | ||
| 41 | #include <linux/module.h> | ||
| 42 | #include <linux/slab.h> | ||
| 43 | #include <linux/string.h> | ||
| 44 | #include <linux/types.h> | ||
| 45 | #include <asm/of_device.h> | ||
| 46 | #include <asm/of_platform.h> | ||
| 47 | #include <asm/page.h> | ||
| 48 | #include <asm/prom.h> | ||
| 49 | |||
| 50 | #define AXON_RAM_MODULE_NAME "axonram" | ||
| 51 | #define AXON_RAM_DEVICE_NAME "axonram" | ||
| 52 | #define AXON_RAM_MINORS_PER_DISK 16 | ||
| 53 | #define AXON_RAM_BLOCK_SHIFT PAGE_SHIFT | ||
| 54 | #define AXON_RAM_BLOCK_SIZE 1 << AXON_RAM_BLOCK_SHIFT | ||
| 55 | #define AXON_RAM_SECTOR_SHIFT 9 | ||
| 56 | #define AXON_RAM_SECTOR_SIZE 1 << AXON_RAM_SECTOR_SHIFT | ||
| 57 | #define AXON_RAM_IRQ_FLAGS IRQF_SHARED | IRQF_TRIGGER_RISING | ||
| 58 | |||
| 59 | struct axon_ram_bank { | ||
| 60 | struct of_device *device; | ||
| 61 | struct gendisk *disk; | ||
| 62 | unsigned int irq_correctable; | ||
| 63 | unsigned int irq_uncorrectable; | ||
| 64 | unsigned long ph_addr; | ||
| 65 | unsigned long io_addr; | ||
| 66 | unsigned long size; | ||
| 67 | unsigned long ecc_counter; | ||
| 68 | }; | ||
| 69 | |||
| 70 | static ssize_t | ||
| 71 | axon_ram_sysfs_ecc(struct device *dev, struct device_attribute *attr, char *buf) | ||
| 72 | { | ||
| 73 | struct of_device *device = to_of_device(dev); | ||
| 74 | struct axon_ram_bank *bank = device->dev.platform_data; | ||
| 75 | |||
| 76 | BUG_ON(!bank); | ||
| 77 | |||
| 78 | return sprintf(buf, "%ld\n", bank->ecc_counter); | ||
| 79 | } | ||
| 80 | |||
| 81 | static DEVICE_ATTR(ecc, S_IRUGO, axon_ram_sysfs_ecc, NULL); | ||
| 82 | |||
| 83 | /** | ||
| 84 | * axon_ram_irq_handler - interrupt handler for Axon RAM ECC | ||
| 85 | * @irq: interrupt ID | ||
| 86 | * @dev: pointer to of_device | ||
| 87 | */ | ||
| 88 | static irqreturn_t | ||
| 89 | axon_ram_irq_handler(int irq, void *dev) | ||
| 90 | { | ||
| 91 | struct of_device *device = dev; | ||
| 92 | struct axon_ram_bank *bank = device->dev.platform_data; | ||
| 93 | |||
| 94 | BUG_ON(!bank); | ||
| 95 | |||
| 96 | if (irq == bank->irq_correctable) { | ||
| 97 | dev_err(&device->dev, "Correctable memory error occured\n"); | ||
| 98 | bank->ecc_counter++; | ||
| 99 | return IRQ_HANDLED; | ||
| 100 | } else if (irq == bank->irq_uncorrectable) { | ||
| 101 | dev_err(&device->dev, "Uncorrectable memory error occured\n"); | ||
| 102 | panic("Critical ECC error on %s", device->node->full_name); | ||
| 103 | } | ||
| 104 | |||
| 105 | return IRQ_NONE; | ||
| 106 | } | ||
| 107 | |||
| 108 | /** | ||
| 109 | * axon_ram_make_request - make_request() method for block device | ||
| 110 | * @queue, @bio: see blk_queue_make_request() | ||
| 111 | */ | ||
| 112 | static int | ||
| 113 | axon_ram_make_request(struct request_queue *queue, struct bio *bio) | ||
| 114 | { | ||
| 115 | struct axon_ram_bank *bank = bio->bi_bdev->bd_disk->private_data; | ||
| 116 | unsigned long phys_mem, phys_end; | ||
| 117 | void *user_mem; | ||
| 118 | struct bio_vec *vec; | ||
| 119 | unsigned int transfered; | ||
| 120 | unsigned short idx; | ||
| 121 | int rc = 0; | ||
| 122 | |||
| 123 | phys_mem = bank->io_addr + (bio->bi_sector << AXON_RAM_SECTOR_SHIFT); | ||
| 124 | phys_end = bank->io_addr + bank->size; | ||
| 125 | transfered = 0; | ||
| 126 | bio_for_each_segment(vec, bio, idx) { | ||
| 127 | if (unlikely(phys_mem + vec->bv_len > phys_end)) { | ||
| 128 | bio_io_error(bio, bio->bi_size); | ||
| 129 | rc = -ERANGE; | ||
| 130 | break; | ||
| 131 | } | ||
| 132 | |||
| 133 | user_mem = page_address(vec->bv_page) + vec->bv_offset; | ||
| 134 | if (bio_data_dir(bio) == READ) | ||
| 135 | memcpy(user_mem, (void *) phys_mem, vec->bv_len); | ||
| 136 | else | ||
| 137 | memcpy((void *) phys_mem, user_mem, vec->bv_len); | ||
| 138 | |||
| 139 | phys_mem += vec->bv_len; | ||
| 140 | transfered += vec->bv_len; | ||
| 141 | } | ||
| 142 | bio_endio(bio, transfered, 0); | ||
| 143 | |||
| 144 | return rc; | ||
| 145 | } | ||
| 146 | |||
| 147 | /** | ||
| 148 | * axon_ram_direct_access - direct_access() method for block device | ||
| 149 | * @device, @sector, @data: see block_device_operations method | ||
| 150 | */ | ||
| 151 | static int | ||
| 152 | axon_ram_direct_access(struct block_device *device, sector_t sector, | ||
| 153 | unsigned long *data) | ||
| 154 | { | ||
| 155 | struct axon_ram_bank *bank = device->bd_disk->private_data; | ||
| 156 | loff_t offset; | ||
| 157 | |||
| 158 | offset = sector << AXON_RAM_SECTOR_SHIFT; | ||
| 159 | if (offset >= bank->size) { | ||
| 160 | dev_err(&bank->device->dev, "Access outside of address space\n"); | ||
| 161 | return -ERANGE; | ||
| 162 | } | ||
| 163 | |||
| 164 | *data = bank->ph_addr + offset; | ||
| 165 | |||
| 166 | return 0; | ||
| 167 | } | ||
| 168 | |||
| 169 | static struct block_device_operations axon_ram_devops = { | ||
| 170 | .owner = THIS_MODULE, | ||
| 171 | .direct_access = axon_ram_direct_access | ||
| 172 | }; | ||
| 173 | |||
| 174 | /** | ||
| 175 | * axon_ram_probe - probe() method for platform driver | ||
| 176 | * @device, @device_id: see of_platform_driver method | ||
| 177 | */ | ||
| 178 | static int | ||
| 179 | axon_ram_probe(struct of_device *device, const struct of_device_id *device_id) | ||
| 180 | { | ||
| 181 | static int axon_ram_bank_id = -1; | ||
| 182 | struct axon_ram_bank *bank; | ||
| 183 | struct resource resource; | ||
| 184 | int rc = 0; | ||
| 185 | |||
| 186 | axon_ram_bank_id++; | ||
| 187 | |||
| 188 | dev_info(&device->dev, "Found memory controller on %s\n", | ||
| 189 | device->node->full_name); | ||
| 190 | |||
| 191 | bank = kzalloc(sizeof(struct axon_ram_bank), GFP_KERNEL); | ||
| 192 | if (bank == NULL) { | ||
| 193 | dev_err(&device->dev, "Out of memory\n"); | ||
| 194 | rc = -ENOMEM; | ||
| 195 | goto failed; | ||
| 196 | } | ||
| 197 | |||
| 198 | device->dev.platform_data = bank; | ||
| 199 | |||
| 200 | bank->device = device; | ||
| 201 | |||
| 202 | if (of_address_to_resource(device->node, 0, &resource) != 0) { | ||
| 203 | dev_err(&device->dev, "Cannot access device tree\n"); | ||
| 204 | rc = -EFAULT; | ||
| 205 | goto failed; | ||
| 206 | } | ||
| 207 | |||
| 208 | bank->size = resource.end - resource.start + 1; | ||
| 209 | |||
| 210 | if (bank->size == 0) { | ||
| 211 | dev_err(&device->dev, "No DDR2 memory found for %s%d\n", | ||
| 212 | AXON_RAM_DEVICE_NAME, axon_ram_bank_id); | ||
| 213 | rc = -ENODEV; | ||
| 214 | goto failed; | ||
| 215 | } | ||
| 216 | |||
| 217 | dev_info(&device->dev, "Register DDR2 memory device %s%d with %luMB\n", | ||
| 218 | AXON_RAM_DEVICE_NAME, axon_ram_bank_id, bank->size >> 20); | ||
| 219 | |||
| 220 | bank->ph_addr = resource.start; | ||
| 221 | bank->io_addr = (unsigned long) ioremap_flags( | ||
| 222 | bank->ph_addr, bank->size, _PAGE_NO_CACHE); | ||
| 223 | if (bank->io_addr == 0) { | ||
| 224 | dev_err(&device->dev, "ioremap() failed\n"); | ||
| 225 | rc = -EFAULT; | ||
| 226 | goto failed; | ||
| 227 | } | ||
| 228 | |||
| 229 | bank->disk = alloc_disk(AXON_RAM_MINORS_PER_DISK); | ||
| 230 | if (bank->disk == NULL) { | ||
| 231 | dev_err(&device->dev, "Cannot register disk\n"); | ||
| 232 | rc = -EFAULT; | ||
| 233 | goto failed; | ||
| 234 | } | ||
| 235 | |||
| 236 | bank->disk->first_minor = 0; | ||
| 237 | bank->disk->fops = &axon_ram_devops; | ||
| 238 | bank->disk->private_data = bank; | ||
| 239 | bank->disk->driverfs_dev = &device->dev; | ||
| 240 | |||
| 241 | sprintf(bank->disk->disk_name, "%s%d", | ||
| 242 | AXON_RAM_DEVICE_NAME, axon_ram_bank_id); | ||
| 243 | bank->disk->major = register_blkdev(0, bank->disk->disk_name); | ||
| 244 | if (bank->disk->major < 0) { | ||
| 245 | dev_err(&device->dev, "Cannot register block device\n"); | ||
| 246 | rc = -EFAULT; | ||
| 247 | goto failed; | ||
| 248 | } | ||
| 249 | |||
| 250 | bank->disk->queue = blk_alloc_queue(GFP_KERNEL); | ||
| 251 | if (bank->disk->queue == NULL) { | ||
| 252 | dev_err(&device->dev, "Cannot register disk queue\n"); | ||
| 253 | rc = -EFAULT; | ||
| 254 | goto failed; | ||
| 255 | } | ||
| 256 | |||
| 257 | set_capacity(bank->disk, bank->size >> AXON_RAM_SECTOR_SHIFT); | ||
| 258 | blk_queue_make_request(bank->disk->queue, axon_ram_make_request); | ||
| 259 | blk_queue_hardsect_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE); | ||
| 260 | add_disk(bank->disk); | ||
| 261 | |||
| 262 | bank->irq_correctable = irq_of_parse_and_map(device->node, 0); | ||
| 263 | bank->irq_uncorrectable = irq_of_parse_and_map(device->node, 1); | ||
| 264 | if ((bank->irq_correctable <= 0) || (bank->irq_uncorrectable <= 0)) { | ||
| 265 | dev_err(&device->dev, "Cannot access ECC interrupt ID\n"); | ||
| 266 | rc = -EFAULT; | ||
| 267 | goto failed; | ||
| 268 | } | ||
| 269 | |||
| 270 | rc = request_irq(bank->irq_correctable, axon_ram_irq_handler, | ||
| 271 | AXON_RAM_IRQ_FLAGS, bank->disk->disk_name, device); | ||
| 272 | if (rc != 0) { | ||
| 273 | dev_err(&device->dev, "Cannot register ECC interrupt handler\n"); | ||
| 274 | bank->irq_correctable = bank->irq_uncorrectable = 0; | ||
| 275 | rc = -EFAULT; | ||
| 276 | goto failed; | ||
| 277 | } | ||
| 278 | |||
| 279 | rc = request_irq(bank->irq_uncorrectable, axon_ram_irq_handler, | ||
| 280 | AXON_RAM_IRQ_FLAGS, bank->disk->disk_name, device); | ||
| 281 | if (rc != 0) { | ||
| 282 | dev_err(&device->dev, "Cannot register ECC interrupt handler\n"); | ||
| 283 | bank->irq_uncorrectable = 0; | ||
| 284 | rc = -EFAULT; | ||
| 285 | goto failed; | ||
| 286 | } | ||
| 287 | |||
| 288 | rc = device_create_file(&device->dev, &dev_attr_ecc); | ||
| 289 | if (rc != 0) { | ||
| 290 | dev_err(&device->dev, "Cannot create sysfs file\n"); | ||
| 291 | rc = -EFAULT; | ||
| 292 | goto failed; | ||
| 293 | } | ||
| 294 | |||
| 295 | return 0; | ||
| 296 | |||
| 297 | failed: | ||
| 298 | if (bank != NULL) { | ||
| 299 | if (bank->irq_uncorrectable > 0) | ||
| 300 | free_irq(bank->irq_uncorrectable, device); | ||
| 301 | if (bank->irq_correctable > 0) | ||
| 302 | free_irq(bank->irq_correctable, device); | ||
| 303 | if (bank->disk != NULL) { | ||
| 304 | if (bank->disk->queue != NULL) | ||
| 305 | blk_cleanup_queue(bank->disk->queue); | ||
| 306 | if (bank->disk->major > 0) | ||
| 307 | unregister_blkdev(bank->disk->major, | ||
| 308 | bank->disk->disk_name); | ||
| 309 | del_gendisk(bank->disk); | ||
| 310 | } | ||
| 311 | device->dev.platform_data = NULL; | ||
| 312 | if (bank->io_addr != 0) | ||
| 313 | iounmap((void __iomem *) bank->io_addr); | ||
| 314 | kfree(bank); | ||
| 315 | } | ||
| 316 | |||
| 317 | return rc; | ||
| 318 | } | ||
| 319 | |||
| 320 | /** | ||
| 321 | * axon_ram_remove - remove() method for platform driver | ||
| 322 | * @device: see of_platform_driver method | ||
| 323 | */ | ||
| 324 | static int | ||
| 325 | axon_ram_remove(struct of_device *device) | ||
| 326 | { | ||
| 327 | struct axon_ram_bank *bank = device->dev.platform_data; | ||
| 328 | |||
| 329 | BUG_ON(!bank || !bank->disk); | ||
| 330 | |||
| 331 | device_remove_file(&device->dev, &dev_attr_ecc); | ||
| 332 | free_irq(bank->irq_uncorrectable, device); | ||
| 333 | free_irq(bank->irq_correctable, device); | ||
| 334 | blk_cleanup_queue(bank->disk->queue); | ||
| 335 | unregister_blkdev(bank->disk->major, bank->disk->disk_name); | ||
| 336 | del_gendisk(bank->disk); | ||
| 337 | iounmap((void __iomem *) bank->io_addr); | ||
| 338 | kfree(bank); | ||
| 339 | |||
| 340 | return 0; | ||
| 341 | } | ||
| 342 | |||
| 343 | static struct of_device_id axon_ram_device_id[] = { | ||
| 344 | { | ||
| 345 | .type = "dma-memory" | ||
| 346 | }, | ||
| 347 | {} | ||
| 348 | }; | ||
| 349 | |||
| 350 | static struct of_platform_driver axon_ram_driver = { | ||
| 351 | .owner = THIS_MODULE, | ||
| 352 | .name = AXON_RAM_MODULE_NAME, | ||
| 353 | .match_table = axon_ram_device_id, | ||
| 354 | .probe = axon_ram_probe, | ||
| 355 | .remove = axon_ram_remove | ||
| 356 | }; | ||
| 357 | |||
| 358 | /** | ||
| 359 | * axon_ram_init | ||
| 360 | */ | ||
| 361 | static int __init | ||
| 362 | axon_ram_init(void) | ||
| 363 | { | ||
| 364 | return of_register_platform_driver(&axon_ram_driver); | ||
| 365 | } | ||
| 366 | |||
| 367 | /** | ||
| 368 | * axon_ram_exit | ||
| 369 | */ | ||
| 370 | static void __exit | ||
| 371 | axon_ram_exit(void) | ||
| 372 | { | ||
| 373 | of_unregister_platform_driver(&axon_ram_driver); | ||
| 374 | } | ||
| 375 | |||
| 376 | module_init(axon_ram_init); | ||
| 377 | module_exit(axon_ram_exit); | ||
| 378 | |||
| 379 | MODULE_LICENSE("GPL"); | ||
| 380 | MODULE_AUTHOR("Maxim Shchetynin <maxim@de.ibm.com>"); | ||
| 381 | MODULE_DESCRIPTION("Axon DDR2 RAM device driver for IBM Cell BE"); | ||
diff --git a/arch/powerpc/sysdev/pmi.c b/arch/powerpc/sysdev/pmi.c index 85a7c99c1003..2f91b55b7754 100644 --- a/arch/powerpc/sysdev/pmi.c +++ b/arch/powerpc/sysdev/pmi.c | |||
| @@ -48,15 +48,13 @@ struct pmi_data { | |||
| 48 | struct work_struct work; | 48 | struct work_struct work; |
| 49 | }; | 49 | }; |
| 50 | 50 | ||
| 51 | static struct pmi_data *data; | ||
| 51 | 52 | ||
| 52 | static int pmi_irq_handler(int irq, void *dev_id) | 53 | static int pmi_irq_handler(int irq, void *dev_id) |
| 53 | { | 54 | { |
| 54 | struct pmi_data *data; | ||
| 55 | u8 type; | 55 | u8 type; |
| 56 | int rc; | 56 | int rc; |
| 57 | 57 | ||
| 58 | data = dev_id; | ||
| 59 | |||
| 60 | spin_lock(&data->pmi_spinlock); | 58 | spin_lock(&data->pmi_spinlock); |
| 61 | 59 | ||
| 62 | type = ioread8(data->pmi_reg + PMI_READ_TYPE); | 60 | type = ioread8(data->pmi_reg + PMI_READ_TYPE); |
| @@ -111,16 +109,13 @@ MODULE_DEVICE_TABLE(of, pmi_match); | |||
| 111 | 109 | ||
| 112 | static void pmi_notify_handlers(struct work_struct *work) | 110 | static void pmi_notify_handlers(struct work_struct *work) |
| 113 | { | 111 | { |
| 114 | struct pmi_data *data; | ||
| 115 | struct pmi_handler *handler; | 112 | struct pmi_handler *handler; |
| 116 | 113 | ||
| 117 | data = container_of(work, struct pmi_data, work); | ||
| 118 | |||
| 119 | spin_lock(&data->handler_spinlock); | 114 | spin_lock(&data->handler_spinlock); |
| 120 | list_for_each_entry(handler, &data->handler, node) { | 115 | list_for_each_entry(handler, &data->handler, node) { |
| 121 | pr_debug(KERN_INFO "pmi: notifying handler %p\n", handler); | 116 | pr_debug(KERN_INFO "pmi: notifying handler %p\n", handler); |
| 122 | if (handler->type == data->msg.type) | 117 | if (handler->type == data->msg.type) |
| 123 | handler->handle_pmi_message(data->dev, data->msg); | 118 | handler->handle_pmi_message(data->msg); |
| 124 | } | 119 | } |
| 125 | spin_unlock(&data->handler_spinlock); | 120 | spin_unlock(&data->handler_spinlock); |
| 126 | } | 121 | } |
| @@ -129,9 +124,14 @@ static int pmi_of_probe(struct of_device *dev, | |||
| 129 | const struct of_device_id *match) | 124 | const struct of_device_id *match) |
| 130 | { | 125 | { |
| 131 | struct device_node *np = dev->node; | 126 | struct device_node *np = dev->node; |
| 132 | struct pmi_data *data; | ||
| 133 | int rc; | 127 | int rc; |
| 134 | 128 | ||
| 129 | if (data) { | ||
| 130 | printk(KERN_ERR "pmi: driver has already been initialized.\n"); | ||
| 131 | rc = -EBUSY; | ||
| 132 | goto out; | ||
| 133 | } | ||
| 134 | |||
| 135 | data = kzalloc(sizeof(struct pmi_data), GFP_KERNEL); | 135 | data = kzalloc(sizeof(struct pmi_data), GFP_KERNEL); |
| 136 | if (!data) { | 136 | if (!data) { |
| 137 | printk(KERN_ERR "pmi: could not allocate memory.\n"); | 137 | printk(KERN_ERR "pmi: could not allocate memory.\n"); |
| @@ -154,7 +154,6 @@ static int pmi_of_probe(struct of_device *dev, | |||
| 154 | 154 | ||
| 155 | INIT_WORK(&data->work, pmi_notify_handlers); | 155 | INIT_WORK(&data->work, pmi_notify_handlers); |
| 156 | 156 | ||
| 157 | dev->dev.driver_data = data; | ||
| 158 | data->dev = dev; | 157 | data->dev = dev; |
| 159 | 158 | ||
| 160 | data->irq = irq_of_parse_and_map(np, 0); | 159 | data->irq = irq_of_parse_and_map(np, 0); |
| @@ -164,7 +163,7 @@ static int pmi_of_probe(struct of_device *dev, | |||
| 164 | goto error_cleanup_iomap; | 163 | goto error_cleanup_iomap; |
| 165 | } | 164 | } |
| 166 | 165 | ||
| 167 | rc = request_irq(data->irq, pmi_irq_handler, 0, "pmi", data); | 166 | rc = request_irq(data->irq, pmi_irq_handler, 0, "pmi", NULL); |
| 168 | if (rc) { | 167 | if (rc) { |
| 169 | printk(KERN_ERR "pmi: can't request IRQ %d: returned %d\n", | 168 | printk(KERN_ERR "pmi: can't request IRQ %d: returned %d\n", |
| 170 | data->irq, rc); | 169 | data->irq, rc); |
| @@ -187,12 +186,9 @@ out: | |||
| 187 | 186 | ||
| 188 | static int pmi_of_remove(struct of_device *dev) | 187 | static int pmi_of_remove(struct of_device *dev) |
| 189 | { | 188 | { |
| 190 | struct pmi_data *data; | ||
| 191 | struct pmi_handler *handler, *tmp; | 189 | struct pmi_handler *handler, *tmp; |
| 192 | 190 | ||
| 193 | data = dev->dev.driver_data; | 191 | free_irq(data->irq, NULL); |
| 194 | |||
| 195 | free_irq(data->irq, data); | ||
| 196 | iounmap(data->pmi_reg); | 192 | iounmap(data->pmi_reg); |
| 197 | 193 | ||
| 198 | spin_lock(&data->handler_spinlock); | 194 | spin_lock(&data->handler_spinlock); |
| @@ -202,7 +198,8 @@ static int pmi_of_remove(struct of_device *dev) | |||
| 202 | 198 | ||
| 203 | spin_unlock(&data->handler_spinlock); | 199 | spin_unlock(&data->handler_spinlock); |
| 204 | 200 | ||
| 205 | kfree(dev->dev.driver_data); | 201 | kfree(data); |
| 202 | data = NULL; | ||
| 206 | 203 | ||
| 207 | return 0; | 204 | return 0; |
| 208 | } | 205 | } |
| @@ -226,13 +223,13 @@ static void __exit pmi_module_exit(void) | |||
| 226 | } | 223 | } |
| 227 | module_exit(pmi_module_exit); | 224 | module_exit(pmi_module_exit); |
| 228 | 225 | ||
| 229 | void pmi_send_message(struct of_device *device, pmi_message_t msg) | 226 | int pmi_send_message(pmi_message_t msg) |
| 230 | { | 227 | { |
| 231 | struct pmi_data *data; | ||
| 232 | unsigned long flags; | 228 | unsigned long flags; |
| 233 | DECLARE_COMPLETION_ONSTACK(completion); | 229 | DECLARE_COMPLETION_ONSTACK(completion); |
| 234 | 230 | ||
| 235 | data = device->dev.driver_data; | 231 | if (!data) |
| 232 | return -ENODEV; | ||
| 236 | 233 | ||
| 237 | mutex_lock(&data->msg_mutex); | 234 | mutex_lock(&data->msg_mutex); |
| 238 | 235 | ||
| @@ -256,30 +253,26 @@ void pmi_send_message(struct of_device *device, pmi_message_t msg) | |||
| 256 | data->completion = NULL; | 253 | data->completion = NULL; |
| 257 | 254 | ||
| 258 | mutex_unlock(&data->msg_mutex); | 255 | mutex_unlock(&data->msg_mutex); |
| 256 | |||
| 257 | return 0; | ||
| 259 | } | 258 | } |
| 260 | EXPORT_SYMBOL_GPL(pmi_send_message); | 259 | EXPORT_SYMBOL_GPL(pmi_send_message); |
| 261 | 260 | ||
| 262 | void pmi_register_handler(struct of_device *device, | 261 | int pmi_register_handler(struct pmi_handler *handler) |
| 263 | struct pmi_handler *handler) | ||
| 264 | { | 262 | { |
| 265 | struct pmi_data *data; | ||
| 266 | data = device->dev.driver_data; | ||
| 267 | |||
| 268 | if (!data) | 263 | if (!data) |
| 269 | return; | 264 | return -ENODEV; |
| 270 | 265 | ||
| 271 | spin_lock(&data->handler_spinlock); | 266 | spin_lock(&data->handler_spinlock); |
| 272 | list_add_tail(&handler->node, &data->handler); | 267 | list_add_tail(&handler->node, &data->handler); |
| 273 | spin_unlock(&data->handler_spinlock); | 268 | spin_unlock(&data->handler_spinlock); |
| 269 | |||
| 270 | return 0; | ||
| 274 | } | 271 | } |
| 275 | EXPORT_SYMBOL_GPL(pmi_register_handler); | 272 | EXPORT_SYMBOL_GPL(pmi_register_handler); |
| 276 | 273 | ||
| 277 | void pmi_unregister_handler(struct of_device *device, | 274 | void pmi_unregister_handler(struct pmi_handler *handler) |
| 278 | struct pmi_handler *handler) | ||
| 279 | { | 275 | { |
| 280 | struct pmi_data *data; | ||
| 281 | data = device->dev.driver_data; | ||
| 282 | |||
| 283 | if (!data) | 276 | if (!data) |
| 284 | return; | 277 | return; |
| 285 | 278 | ||
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index edd6de995726..8134c7e198a5 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c | |||
| @@ -26,8 +26,9 @@ | |||
| 26 | #include <linux/profile.h> | 26 | #include <linux/profile.h> |
| 27 | #include <linux/module.h> | 27 | #include <linux/module.h> |
| 28 | #include <linux/fs.h> | 28 | #include <linux/fs.h> |
| 29 | #include <linux/oprofile.h> | ||
| 29 | #include <linux/sched.h> | 30 | #include <linux/sched.h> |
| 30 | 31 | ||
| 31 | #include "oprofile_stats.h" | 32 | #include "oprofile_stats.h" |
| 32 | #include "event_buffer.h" | 33 | #include "event_buffer.h" |
| 33 | #include "cpu_buffer.h" | 34 | #include "cpu_buffer.h" |
diff --git a/drivers/oprofile/event_buffer.h b/drivers/oprofile/event_buffer.h index 9b6a4ebd03e3..5076ed1ebd8f 100644 --- a/drivers/oprofile/event_buffer.h +++ b/drivers/oprofile/event_buffer.h | |||
| @@ -19,28 +19,10 @@ void free_event_buffer(void); | |||
| 19 | 19 | ||
| 20 | /* wake up the process sleeping on the event file */ | 20 | /* wake up the process sleeping on the event file */ |
| 21 | void wake_up_buffer_waiter(void); | 21 | void wake_up_buffer_waiter(void); |
| 22 | 22 | ||
| 23 | /* Each escaped entry is prefixed by ESCAPE_CODE | ||
| 24 | * then one of the following codes, then the | ||
| 25 | * relevant data. | ||
| 26 | */ | ||
| 27 | #define ESCAPE_CODE ~0UL | ||
| 28 | #define CTX_SWITCH_CODE 1 | ||
| 29 | #define CPU_SWITCH_CODE 2 | ||
| 30 | #define COOKIE_SWITCH_CODE 3 | ||
| 31 | #define KERNEL_ENTER_SWITCH_CODE 4 | ||
| 32 | #define KERNEL_EXIT_SWITCH_CODE 5 | ||
| 33 | #define MODULE_LOADED_CODE 6 | ||
| 34 | #define CTX_TGID_CODE 7 | ||
| 35 | #define TRACE_BEGIN_CODE 8 | ||
| 36 | #define TRACE_END_CODE 9 | ||
| 37 | |||
| 38 | #define INVALID_COOKIE ~0UL | 23 | #define INVALID_COOKIE ~0UL |
| 39 | #define NO_COOKIE 0UL | 24 | #define NO_COOKIE 0UL |
| 40 | 25 | ||
| 41 | /* add data to the event buffer */ | ||
| 42 | void add_event_entry(unsigned long data); | ||
| 43 | |||
| 44 | extern const struct file_operations event_buffer_fops; | 26 | extern const struct file_operations event_buffer_fops; |
| 45 | 27 | ||
| 46 | /* mutex between sync_cpu_buffers() and the | 28 | /* mutex between sync_cpu_buffers() and the |
diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index e5162a64018b..2c645170f06e 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c | |||
| @@ -53,9 +53,24 @@ int oprofile_setup(void) | |||
| 53 | * us missing task deaths and eventually oopsing | 53 | * us missing task deaths and eventually oopsing |
| 54 | * when trying to process the event buffer. | 54 | * when trying to process the event buffer. |
| 55 | */ | 55 | */ |
| 56 | if (oprofile_ops.sync_start) { | ||
| 57 | int sync_ret = oprofile_ops.sync_start(); | ||
| 58 | switch (sync_ret) { | ||
| 59 | case 0: | ||
| 60 | goto post_sync; | ||
| 61 | case 1: | ||
| 62 | goto do_generic; | ||
| 63 | case -1: | ||
| 64 | goto out3; | ||
| 65 | default: | ||
| 66 | goto out3; | ||
| 67 | } | ||
| 68 | } | ||
| 69 | do_generic: | ||
| 56 | if ((err = sync_start())) | 70 | if ((err = sync_start())) |
| 57 | goto out3; | 71 | goto out3; |
| 58 | 72 | ||
| 73 | post_sync: | ||
| 59 | is_setup = 1; | 74 | is_setup = 1; |
| 60 | mutex_unlock(&start_mutex); | 75 | mutex_unlock(&start_mutex); |
| 61 | return 0; | 76 | return 0; |
| @@ -118,7 +133,20 @@ out: | |||
| 118 | void oprofile_shutdown(void) | 133 | void oprofile_shutdown(void) |
| 119 | { | 134 | { |
| 120 | mutex_lock(&start_mutex); | 135 | mutex_lock(&start_mutex); |
| 136 | if (oprofile_ops.sync_stop) { | ||
| 137 | int sync_ret = oprofile_ops.sync_stop(); | ||
| 138 | switch (sync_ret) { | ||
| 139 | case 0: | ||
| 140 | goto post_sync; | ||
| 141 | case 1: | ||
| 142 | goto do_generic; | ||
| 143 | default: | ||
| 144 | goto post_sync; | ||
| 145 | } | ||
| 146 | } | ||
| 147 | do_generic: | ||
| 121 | sync_stop(); | 148 | sync_stop(); |
| 149 | post_sync: | ||
| 122 | if (oprofile_ops.shutdown) | 150 | if (oprofile_ops.shutdown) |
| 123 | oprofile_ops.shutdown(); | 151 | oprofile_ops.shutdown(); |
| 124 | is_setup = 0; | 152 | is_setup = 0; |
diff --git a/include/asm-powerpc/oprofile_impl.h b/include/asm-powerpc/oprofile_impl.h index 8d6b47f7b300..938fefb4c4bc 100644 --- a/include/asm-powerpc/oprofile_impl.h +++ b/include/asm-powerpc/oprofile_impl.h | |||
| @@ -39,14 +39,16 @@ struct op_system_config { | |||
| 39 | 39 | ||
| 40 | /* Per-arch configuration */ | 40 | /* Per-arch configuration */ |
| 41 | struct op_powerpc_model { | 41 | struct op_powerpc_model { |
| 42 | void (*reg_setup) (struct op_counter_config *, | 42 | int (*reg_setup) (struct op_counter_config *, |
| 43 | struct op_system_config *, | 43 | struct op_system_config *, |
| 44 | int num_counters); | 44 | int num_counters); |
| 45 | void (*cpu_setup) (struct op_counter_config *); | 45 | int (*cpu_setup) (struct op_counter_config *); |
| 46 | void (*start) (struct op_counter_config *); | 46 | int (*start) (struct op_counter_config *); |
| 47 | void (*global_start) (struct op_counter_config *); | 47 | int (*global_start) (struct op_counter_config *); |
| 48 | void (*stop) (void); | 48 | void (*stop) (void); |
| 49 | void (*global_stop) (void); | 49 | void (*global_stop) (void); |
| 50 | int (*sync_start)(void); | ||
| 51 | int (*sync_stop)(void); | ||
| 50 | void (*handle_interrupt) (struct pt_regs *, | 52 | void (*handle_interrupt) (struct pt_regs *, |
| 51 | struct op_counter_config *); | 53 | struct op_counter_config *); |
| 52 | int num_counters; | 54 | int num_counters; |
diff --git a/include/asm-powerpc/pmi.h b/include/asm-powerpc/pmi.h index cb0f8aa43088..2259d4ce3846 100644 --- a/include/asm-powerpc/pmi.h +++ b/include/asm-powerpc/pmi.h | |||
| @@ -55,13 +55,13 @@ typedef struct { | |||
| 55 | struct pmi_handler { | 55 | struct pmi_handler { |
| 56 | struct list_head node; | 56 | struct list_head node; |
| 57 | u8 type; | 57 | u8 type; |
| 58 | void (*handle_pmi_message) (struct of_device *, pmi_message_t); | 58 | void (*handle_pmi_message) (pmi_message_t); |
| 59 | }; | 59 | }; |
| 60 | 60 | ||
| 61 | void pmi_register_handler(struct of_device *, struct pmi_handler *); | 61 | int pmi_register_handler(struct pmi_handler *); |
| 62 | void pmi_unregister_handler(struct of_device *, struct pmi_handler *); | 62 | void pmi_unregister_handler(struct pmi_handler *); |
| 63 | 63 | ||
| 64 | void pmi_send_message(struct of_device *, pmi_message_t); | 64 | int pmi_send_message(pmi_message_t); |
| 65 | 65 | ||
| 66 | #endif /* __KERNEL__ */ | 66 | #endif /* __KERNEL__ */ |
| 67 | #endif /* _POWERPC_PMI_H */ | 67 | #endif /* _POWERPC_PMI_H */ |
diff --git a/include/asm-powerpc/spu.h b/include/asm-powerpc/spu.h index eedc828cef2d..8836c0f1f2f7 100644 --- a/include/asm-powerpc/spu.h +++ b/include/asm-powerpc/spu.h | |||
| @@ -107,10 +107,10 @@ struct spu_runqueue; | |||
| 107 | struct device_node; | 107 | struct device_node; |
| 108 | 108 | ||
| 109 | enum spu_utilization_state { | 109 | enum spu_utilization_state { |
| 110 | SPU_UTIL_SYSTEM, | ||
| 111 | SPU_UTIL_USER, | 110 | SPU_UTIL_USER, |
| 111 | SPU_UTIL_SYSTEM, | ||
| 112 | SPU_UTIL_IOWAIT, | 112 | SPU_UTIL_IOWAIT, |
| 113 | SPU_UTIL_IDLE, | 113 | SPU_UTIL_IDLE_LOADED, |
| 114 | SPU_UTIL_MAX | 114 | SPU_UTIL_MAX |
| 115 | }; | 115 | }; |
| 116 | 116 | ||
| @@ -121,9 +121,9 @@ struct spu { | |||
| 121 | unsigned long problem_phys; | 121 | unsigned long problem_phys; |
| 122 | struct spu_problem __iomem *problem; | 122 | struct spu_problem __iomem *problem; |
| 123 | struct spu_priv2 __iomem *priv2; | 123 | struct spu_priv2 __iomem *priv2; |
| 124 | struct list_head list; | 124 | struct list_head cbe_list; |
| 125 | struct list_head sched_list; | ||
| 126 | struct list_head full_list; | 125 | struct list_head full_list; |
| 126 | enum { SPU_FREE, SPU_USED } alloc_state; | ||
| 127 | int number; | 127 | int number; |
| 128 | unsigned int irqs[3]; | 128 | unsigned int irqs[3]; |
| 129 | u32 node; | 129 | u32 node; |
| @@ -137,6 +137,7 @@ struct spu { | |||
| 137 | struct spu_runqueue *rq; | 137 | struct spu_runqueue *rq; |
| 138 | unsigned long long timestamp; | 138 | unsigned long long timestamp; |
| 139 | pid_t pid; | 139 | pid_t pid; |
| 140 | pid_t tgid; | ||
| 140 | int class_0_pending; | 141 | int class_0_pending; |
| 141 | spinlock_t register_lock; | 142 | spinlock_t register_lock; |
| 142 | 143 | ||
| @@ -165,11 +166,14 @@ struct spu { | |||
| 165 | 166 | ||
| 166 | struct sys_device sysdev; | 167 | struct sys_device sysdev; |
| 167 | 168 | ||
| 169 | int has_mem_affinity; | ||
| 170 | struct list_head aff_list; | ||
| 171 | |||
| 168 | struct { | 172 | struct { |
| 169 | /* protected by interrupt reentrancy */ | 173 | /* protected by interrupt reentrancy */ |
| 170 | enum spu_utilization_state utilization_state; | 174 | enum spu_utilization_state util_state; |
| 171 | unsigned long tstamp; /* time of last ctx switch */ | 175 | unsigned long long tstamp; |
| 172 | unsigned long times[SPU_UTIL_MAX]; | 176 | unsigned long long times[SPU_UTIL_MAX]; |
| 173 | unsigned long long vol_ctx_switch; | 177 | unsigned long long vol_ctx_switch; |
| 174 | unsigned long long invol_ctx_switch; | 178 | unsigned long long invol_ctx_switch; |
| 175 | unsigned long long min_flt; | 179 | unsigned long long min_flt; |
| @@ -181,13 +185,29 @@ struct spu { | |||
| 181 | } stats; | 185 | } stats; |
| 182 | }; | 186 | }; |
| 183 | 187 | ||
| 184 | struct spu *spu_alloc(void); | 188 | struct cbe_spu_info { |
| 185 | struct spu *spu_alloc_node(int node); | 189 | struct mutex list_mutex; |
| 186 | void spu_free(struct spu *spu); | 190 | struct list_head spus; |
| 191 | int n_spus; | ||
| 192 | int nr_active; | ||
| 193 | atomic_t reserved_spus; | ||
| 194 | }; | ||
| 195 | |||
| 196 | extern struct cbe_spu_info cbe_spu_info[]; | ||
| 197 | |||
| 198 | void spu_init_channels(struct spu *spu); | ||
| 187 | int spu_irq_class_0_bottom(struct spu *spu); | 199 | int spu_irq_class_0_bottom(struct spu *spu); |
| 188 | int spu_irq_class_1_bottom(struct spu *spu); | 200 | int spu_irq_class_1_bottom(struct spu *spu); |
| 189 | void spu_irq_setaffinity(struct spu *spu, int cpu); | 201 | void spu_irq_setaffinity(struct spu *spu, int cpu); |
| 190 | 202 | ||
| 203 | #ifdef CONFIG_KEXEC | ||
| 204 | void crash_register_spus(struct list_head *list); | ||
| 205 | #else | ||
| 206 | static inline void crash_register_spus(struct list_head *list) | ||
| 207 | { | ||
| 208 | } | ||
| 209 | #endif | ||
| 210 | |||
| 191 | extern void spu_invalidate_slbs(struct spu *spu); | 211 | extern void spu_invalidate_slbs(struct spu *spu); |
| 192 | extern void spu_associate_mm(struct spu *spu, struct mm_struct *mm); | 212 | extern void spu_associate_mm(struct spu *spu, struct mm_struct *mm); |
| 193 | 213 | ||
| @@ -195,6 +215,20 @@ extern void spu_associate_mm(struct spu *spu, struct mm_struct *mm); | |||
| 195 | struct mm_struct; | 215 | struct mm_struct; |
| 196 | extern void spu_flush_all_slbs(struct mm_struct *mm); | 216 | extern void spu_flush_all_slbs(struct mm_struct *mm); |
| 197 | 217 | ||
| 218 | /* This interface allows a profiler (e.g., OProfile) to store a ref | ||
| 219 | * to spu context information that it creates. This caching technique | ||
| 220 | * avoids the need to recreate this information after a save/restore operation. | ||
| 221 | * | ||
| 222 | * Assumes the caller has already incremented the ref count to | ||
| 223 | * profile_info; then spu_context_destroy must call kref_put | ||
| 224 | * on prof_info_kref. | ||
| 225 | */ | ||
| 226 | void spu_set_profile_private_kref(struct spu_context *ctx, | ||
| 227 | struct kref *prof_info_kref, | ||
| 228 | void ( * prof_info_release) (struct kref *kref)); | ||
| 229 | |||
| 230 | void *spu_get_profile_private_kref(struct spu_context *ctx); | ||
| 231 | |||
| 198 | /* system callbacks from the SPU */ | 232 | /* system callbacks from the SPU */ |
| 199 | struct spu_syscall_block { | 233 | struct spu_syscall_block { |
| 200 | u64 nr_ret; | 234 | u64 nr_ret; |
| @@ -206,7 +240,8 @@ extern long spu_sys_callback(struct spu_syscall_block *s); | |||
| 206 | struct file; | 240 | struct file; |
| 207 | extern struct spufs_calls { | 241 | extern struct spufs_calls { |
| 208 | asmlinkage long (*create_thread)(const char __user *name, | 242 | asmlinkage long (*create_thread)(const char __user *name, |
| 209 | unsigned int flags, mode_t mode); | 243 | unsigned int flags, mode_t mode, |
| 244 | struct file *neighbor); | ||
| 210 | asmlinkage long (*spu_run)(struct file *filp, __u32 __user *unpc, | 245 | asmlinkage long (*spu_run)(struct file *filp, __u32 __user *unpc, |
| 211 | __u32 __user *ustatus); | 246 | __u32 __user *ustatus); |
| 212 | struct module *owner; | 247 | struct module *owner; |
| @@ -233,8 +268,10 @@ struct spu_coredump_calls { | |||
| 233 | #define SPU_CREATE_GANG 0x0002 | 268 | #define SPU_CREATE_GANG 0x0002 |
| 234 | #define SPU_CREATE_NOSCHED 0x0004 | 269 | #define SPU_CREATE_NOSCHED 0x0004 |
| 235 | #define SPU_CREATE_ISOLATE 0x0008 | 270 | #define SPU_CREATE_ISOLATE 0x0008 |
| 271 | #define SPU_CREATE_AFFINITY_SPU 0x0010 | ||
| 272 | #define SPU_CREATE_AFFINITY_MEM 0x0020 | ||
| 236 | 273 | ||
| 237 | #define SPU_CREATE_FLAG_ALL 0x000f /* mask of all valid flags */ | 274 | #define SPU_CREATE_FLAG_ALL 0x003f /* mask of all valid flags */ |
| 238 | 275 | ||
| 239 | 276 | ||
| 240 | #ifdef CONFIG_SPU_FS_MODULE | 277 | #ifdef CONFIG_SPU_FS_MODULE |
| @@ -403,6 +440,7 @@ struct spu_priv2 { | |||
| 403 | #define MFC_CNTL_RESUME_DMA_QUEUE (0ull << 0) | 440 | #define MFC_CNTL_RESUME_DMA_QUEUE (0ull << 0) |
| 404 | #define MFC_CNTL_SUSPEND_DMA_QUEUE (1ull << 0) | 441 | #define MFC_CNTL_SUSPEND_DMA_QUEUE (1ull << 0) |
| 405 | #define MFC_CNTL_SUSPEND_DMA_QUEUE_MASK (1ull << 0) | 442 | #define MFC_CNTL_SUSPEND_DMA_QUEUE_MASK (1ull << 0) |
| 443 | #define MFC_CNTL_SUSPEND_MASK (1ull << 4) | ||
| 406 | #define MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION (0ull << 8) | 444 | #define MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION (0ull << 8) |
| 407 | #define MFC_CNTL_SUSPEND_IN_PROGRESS (1ull << 8) | 445 | #define MFC_CNTL_SUSPEND_IN_PROGRESS (1ull << 8) |
| 408 | #define MFC_CNTL_SUSPEND_COMPLETE (3ull << 8) | 446 | #define MFC_CNTL_SUSPEND_COMPLETE (3ull << 8) |
diff --git a/include/asm-powerpc/spu_csa.h b/include/asm-powerpc/spu_csa.h index c48ae185c874..e87794d5d4ea 100644 --- a/include/asm-powerpc/spu_csa.h +++ b/include/asm-powerpc/spu_csa.h | |||
| @@ -50,6 +50,12 @@ | |||
| 50 | #define SPU_STOPPED_STATUS_P_I 8 | 50 | #define SPU_STOPPED_STATUS_P_I 8 |
| 51 | #define SPU_STOPPED_STATUS_R 9 | 51 | #define SPU_STOPPED_STATUS_R 9 |
| 52 | 52 | ||
| 53 | /* | ||
| 54 | * Definitions for software decrementer status flag. | ||
| 55 | */ | ||
| 56 | #define SPU_DECR_STATUS_RUNNING 0x1 | ||
| 57 | #define SPU_DECR_STATUS_WRAPPED 0x2 | ||
| 58 | |||
| 53 | #ifndef __ASSEMBLY__ | 59 | #ifndef __ASSEMBLY__ |
| 54 | /** | 60 | /** |
| 55 | * spu_reg128 - generic 128-bit register definition. | 61 | * spu_reg128 - generic 128-bit register definition. |
| @@ -63,7 +69,7 @@ struct spu_reg128 { | |||
| 63 | * @gprs: Array of saved registers. | 69 | * @gprs: Array of saved registers. |
| 64 | * @fpcr: Saved floating point status control register. | 70 | * @fpcr: Saved floating point status control register. |
| 65 | * @decr: Saved decrementer value. | 71 | * @decr: Saved decrementer value. |
| 66 | * @decr_status: Indicates decrementer run status. | 72 | * @decr_status: Indicates software decrementer status flags. |
| 67 | * @ppu_mb: Saved PPU mailbox data. | 73 | * @ppu_mb: Saved PPU mailbox data. |
| 68 | * @ppuint_mb: Saved PPU interrupting mailbox data. | 74 | * @ppuint_mb: Saved PPU interrupting mailbox data. |
| 69 | * @tag_mask: Saved tag group mask. | 75 | * @tag_mask: Saved tag group mask. |
diff --git a/include/linux/dcookies.h b/include/linux/dcookies.h index 0fe7cdf326f7..98c69ab80c84 100644 --- a/include/linux/dcookies.h +++ b/include/linux/dcookies.h | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | 12 | ||
| 13 | #ifdef CONFIG_PROFILING | 13 | #ifdef CONFIG_PROFILING |
| 14 | 14 | ||
| 15 | #include <linux/dcache.h> | ||
| 15 | #include <linux/types.h> | 16 | #include <linux/types.h> |
| 16 | 17 | ||
| 17 | struct dcookie_user; | 18 | struct dcookie_user; |
diff --git a/include/linux/elf-em.h b/include/linux/elf-em.h index 0311bad838b1..5834e843a946 100644 --- a/include/linux/elf-em.h +++ b/include/linux/elf-em.h | |||
| @@ -20,7 +20,8 @@ | |||
| 20 | #define EM_PARISC 15 /* HPPA */ | 20 | #define EM_PARISC 15 /* HPPA */ |
| 21 | #define EM_SPARC32PLUS 18 /* Sun's "v8plus" */ | 21 | #define EM_SPARC32PLUS 18 /* Sun's "v8plus" */ |
| 22 | #define EM_PPC 20 /* PowerPC */ | 22 | #define EM_PPC 20 /* PowerPC */ |
| 23 | #define EM_PPC64 21 /* PowerPC64 */ | 23 | #define EM_PPC64 21 /* PowerPC64 */ |
| 24 | #define EM_SPU 23 /* Cell BE SPU */ | ||
| 24 | #define EM_SH 42 /* SuperH */ | 25 | #define EM_SH 42 /* SuperH */ |
| 25 | #define EM_SPARCV9 43 /* SPARC v9 64-bit */ | 26 | #define EM_SPARCV9 43 /* SPARC v9 64-bit */ |
| 26 | #define EM_IA_64 50 /* HP/Intel IA-64 */ | 27 | #define EM_IA_64 50 /* HP/Intel IA-64 */ |
diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 0d514b252454..041bb31100f4 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h | |||
| @@ -17,6 +17,26 @@ | |||
| 17 | #include <linux/spinlock.h> | 17 | #include <linux/spinlock.h> |
| 18 | #include <asm/atomic.h> | 18 | #include <asm/atomic.h> |
| 19 | 19 | ||
| 20 | /* Each escaped entry is prefixed by ESCAPE_CODE | ||
| 21 | * then one of the following codes, then the | ||
| 22 | * relevant data. | ||
| 23 | * These #defines live in this file so that arch-specific | ||
| 24 | * buffer sync'ing code can access them. | ||
| 25 | */ | ||
| 26 | #define ESCAPE_CODE ~0UL | ||
| 27 | #define CTX_SWITCH_CODE 1 | ||
| 28 | #define CPU_SWITCH_CODE 2 | ||
| 29 | #define COOKIE_SWITCH_CODE 3 | ||
| 30 | #define KERNEL_ENTER_SWITCH_CODE 4 | ||
| 31 | #define KERNEL_EXIT_SWITCH_CODE 5 | ||
| 32 | #define MODULE_LOADED_CODE 6 | ||
| 33 | #define CTX_TGID_CODE 7 | ||
| 34 | #define TRACE_BEGIN_CODE 8 | ||
| 35 | #define TRACE_END_CODE 9 | ||
| 36 | #define XEN_ENTER_SWITCH_CODE 10 | ||
| 37 | #define SPU_PROFILING_CODE 11 | ||
| 38 | #define SPU_CTX_SWITCH_CODE 12 | ||
| 39 | |||
| 20 | struct super_block; | 40 | struct super_block; |
| 21 | struct dentry; | 41 | struct dentry; |
| 22 | struct file_operations; | 42 | struct file_operations; |
| @@ -35,6 +55,14 @@ struct oprofile_operations { | |||
| 35 | int (*start)(void); | 55 | int (*start)(void); |
| 36 | /* Stop delivering interrupts. */ | 56 | /* Stop delivering interrupts. */ |
| 37 | void (*stop)(void); | 57 | void (*stop)(void); |
| 58 | /* Arch-specific buffer sync functions. | ||
| 59 | * Return value = 0: Success | ||
| 60 | * Return value = -1: Failure | ||
| 61 | * Return value = 1: Run generic sync function | ||
| 62 | */ | ||
| 63 | int (*sync_start)(void); | ||
| 64 | int (*sync_stop)(void); | ||
| 65 | |||
| 38 | /* Initiate a stack backtrace. Optional. */ | 66 | /* Initiate a stack backtrace. Optional. */ |
| 39 | void (*backtrace)(struct pt_regs * const regs, unsigned int depth); | 67 | void (*backtrace)(struct pt_regs * const regs, unsigned int depth); |
| 40 | /* CPU identification string. */ | 68 | /* CPU identification string. */ |
| @@ -56,6 +84,13 @@ int oprofile_arch_init(struct oprofile_operations * ops); | |||
| 56 | void oprofile_arch_exit(void); | 84 | void oprofile_arch_exit(void); |
| 57 | 85 | ||
| 58 | /** | 86 | /** |
| 87 | * Add data to the event buffer. | ||
| 88 | * The data passed is free-form, but typically consists of | ||
| 89 | * file offsets, dcookies, context information, and ESCAPE codes. | ||
| 90 | */ | ||
| 91 | void add_event_entry(unsigned long data); | ||
| 92 | |||
| 93 | /** | ||
| 59 | * Add a sample. This may be called from any context. Pass | 94 | * Add a sample. This may be called from any context. Pass |
| 60 | * smp_processor_id() as cpu. | 95 | * smp_processor_id() as cpu. |
| 61 | */ | 96 | */ |
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 7a8b1e3322e0..61def7c8fbb3 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h | |||
| @@ -549,7 +549,7 @@ asmlinkage long sys_inotify_rm_watch(int fd, u32 wd); | |||
| 549 | asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, | 549 | asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, |
| 550 | __u32 __user *ustatus); | 550 | __u32 __user *ustatus); |
| 551 | asmlinkage long sys_spu_create(const char __user *name, | 551 | asmlinkage long sys_spu_create(const char __user *name, |
| 552 | unsigned int flags, mode_t mode); | 552 | unsigned int flags, mode_t mode, int fd); |
| 553 | 553 | ||
| 554 | asmlinkage long sys_mknodat(int dfd, const char __user * filename, int mode, | 554 | asmlinkage long sys_mknodat(int dfd, const char __user * filename, int mode, |
| 555 | unsigned dev); | 555 | unsigned dev); |
