diff options
-rw-r--r-- | arch/s390/include/asm/perf_event.h | 4 | ||||
-rw-r--r-- | arch/s390/kernel/perf_cpum_sf.c | 527 |
2 files changed, 411 insertions, 120 deletions
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 23d2dfa8201d..99d7f4e333c2 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h | |||
@@ -49,6 +49,10 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); | |||
49 | #define PERF_CPUM_SF_MAX_CTR 1 | 49 | #define PERF_CPUM_SF_MAX_CTR 1 |
50 | #define PERF_EVENT_CPUM_SF 0xB0000UL /* Raw event ID */ | 50 | #define PERF_EVENT_CPUM_SF 0xB0000UL /* Raw event ID */ |
51 | 51 | ||
52 | #define REG_NONE 0 | ||
53 | #define REG_OVERFLOW 1 | ||
54 | #define OVERFLOW_REG(hwc) ((hwc)->extra_reg.config) | ||
55 | #define SFB_ALLOC_REG(hwc) ((hwc)->extra_reg.alloc) | ||
52 | #define TEAR_REG(hwc) ((hwc)->last_tag) | 56 | #define TEAR_REG(hwc) ((hwc)->last_tag) |
53 | #define SAMPL_RATE(hwc) ((hwc)->event_base) | 57 | #define SAMPL_RATE(hwc) ((hwc)->event_base) |
54 | 58 | ||
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index ae5e0192160d..ea1656073dac 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c | |||
@@ -17,6 +17,8 @@ | |||
17 | #include <linux/percpu.h> | 17 | #include <linux/percpu.h> |
18 | #include <linux/notifier.h> | 18 | #include <linux/notifier.h> |
19 | #include <linux/export.h> | 19 | #include <linux/export.h> |
20 | #include <linux/mm.h> | ||
21 | #include <linux/moduleparam.h> | ||
20 | #include <asm/cpu_mf.h> | 22 | #include <asm/cpu_mf.h> |
21 | #include <asm/irq.h> | 23 | #include <asm/irq.h> |
22 | #include <asm/debug.h> | 24 | #include <asm/debug.h> |
@@ -26,34 +28,54 @@ | |||
26 | * At least one table is required for the sampling buffer structure. | 28 | * At least one table is required for the sampling buffer structure. |
27 | * A single table contains up to 511 pointers to sample-data-blocks. | 29 | * A single table contains up to 511 pointers to sample-data-blocks. |
28 | */ | 30 | */ |
29 | #define CPUM_SF_MIN_SDBT 1 | 31 | #define CPUM_SF_MIN_SDBT 1 |
30 | 32 | ||
31 | /* Minimum number of sample-data-blocks: | 33 | /* Number of sample-data-blocks per sample-data-block-table (SDBT): |
32 | * The minimum designates a single page for sample-data-block, i.e., | 34 | * The table contains SDB origin (8 bytes) and one SDBT origin that |
33 | * up to 126 sample-data-blocks with a size of 32 bytes (bsdes). | 35 | * points to the next table. |
34 | */ | 36 | */ |
35 | #define CPUM_SF_MIN_SDB 126 | 37 | #define CPUM_SF_SDB_PER_TABLE ((PAGE_SIZE - 8) / 8) |
36 | 38 | ||
37 | /* Maximum number of sample-data-blocks: | 39 | /* Maximum page offset for an SDBT table-link entry: |
38 | * The maximum number designates approx. 256K per CPU including | 40 | * If this page offset is reached, a table-link entry to the next SDBT |
39 | * the given number of sample-data-blocks and taking the number | 41 | * must be added. |
40 | * of sample-data-block tables into account. | 42 | */ |
43 | #define CPUM_SF_SDBT_TL_OFFSET (CPUM_SF_SDB_PER_TABLE * 8) | ||
44 | static inline int require_table_link(const void *sdbt) | ||
45 | { | ||
46 | return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET; | ||
47 | } | ||
48 | |||
49 | /* Minimum and maximum sampling buffer sizes: | ||
50 | * | ||
51 | * This number represents the maximum size of the sampling buffer | ||
52 | * taking the number of sample-data-block-tables into account. | ||
41 | * | 53 | * |
42 | * Later, this number can be increased for extending the sampling | 54 | * Sampling buffer size Buffer characteristics |
43 | * buffer, for example, by factor 2 (512K) or 4 (1M). | 55 | * --------------------------------------------------- |
56 | * 64KB == 16 pages (4KB per page) | ||
57 | * 1 page for SDB-tables | ||
58 | * 15 pages for SDBs | ||
59 | * | ||
60 | * 32MB == 8192 pages (4KB per page) | ||
61 | * 16 pages for SDB-tables | ||
62 | * 8176 pages for SDBs | ||
44 | */ | 63 | */ |
45 | #define CPUM_SF_MAX_SDB 6471 | 64 | static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15; |
65 | static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176; | ||
46 | 66 | ||
47 | struct sf_buffer { | 67 | struct sf_buffer { |
48 | unsigned long sdbt; /* Sample-data-block-table origin */ | 68 | unsigned long *sdbt; /* Sample-data-block-table origin */ |
49 | /* buffer characteristics (required for buffer increments) */ | 69 | /* buffer characteristics (required for buffer increments) */ |
50 | unsigned long num_sdb; /* Number of sample-data-blocks */ | 70 | unsigned long num_sdb; /* Number of sample-data-blocks */ |
51 | unsigned long tail; /* last sample-data-block-table */ | 71 | unsigned long num_sdbt; /* Number of sample-data-block-tables */ |
72 | unsigned long *tail; /* last sample-data-block-table */ | ||
52 | }; | 73 | }; |
53 | 74 | ||
54 | struct cpu_hw_sf { | 75 | struct cpu_hw_sf { |
55 | /* CPU-measurement sampling information block */ | 76 | /* CPU-measurement sampling information block */ |
56 | struct hws_qsi_info_block qsi; | 77 | struct hws_qsi_info_block qsi; |
78 | /* CPU-measurement sampling control block */ | ||
57 | struct hws_lsctl_request_block lsctl; | 79 | struct hws_lsctl_request_block lsctl; |
58 | struct sf_buffer sfb; /* Sampling buffer */ | 80 | struct sf_buffer sfb; /* Sampling buffer */ |
59 | unsigned int flags; /* Status flags */ | 81 | unsigned int flags; /* Status flags */ |
@@ -65,11 +87,22 @@ static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf); | |||
65 | static debug_info_t *sfdbg; | 87 | static debug_info_t *sfdbg; |
66 | 88 | ||
67 | /* | 89 | /* |
90 | * sf_disable() - Switch off sampling facility | ||
91 | */ | ||
92 | static int sf_disable(void) | ||
93 | { | ||
94 | struct hws_lsctl_request_block sreq; | ||
95 | |||
96 | memset(&sreq, 0, sizeof(sreq)); | ||
97 | return lsctl(&sreq); | ||
98 | } | ||
99 | |||
100 | /* | ||
68 | * sf_buffer_available() - Check for an allocated sampling buffer | 101 | * sf_buffer_available() - Check for an allocated sampling buffer |
69 | */ | 102 | */ |
70 | static int sf_buffer_available(struct cpu_hw_sf *cpuhw) | 103 | static int sf_buffer_available(struct cpu_hw_sf *cpuhw) |
71 | { | 104 | { |
72 | return (cpuhw->sfb.sdbt) ? 1 : 0; | 105 | return !!cpuhw->sfb.sdbt; |
73 | } | 106 | } |
74 | 107 | ||
75 | /* | 108 | /* |
@@ -77,32 +110,32 @@ static int sf_buffer_available(struct cpu_hw_sf *cpuhw) | |||
77 | */ | 110 | */ |
78 | static void free_sampling_buffer(struct sf_buffer *sfb) | 111 | static void free_sampling_buffer(struct sf_buffer *sfb) |
79 | { | 112 | { |
80 | unsigned long sdbt, *curr; | 113 | unsigned long *sdbt, *curr; |
81 | 114 | ||
82 | if (!sfb->sdbt) | 115 | if (!sfb->sdbt) |
83 | return; | 116 | return; |
84 | 117 | ||
85 | sdbt = sfb->sdbt; | 118 | sdbt = sfb->sdbt; |
86 | curr = (unsigned long *) sdbt; | 119 | curr = sdbt; |
87 | 120 | ||
88 | /* we'll free the SDBT after all SDBs are processed... */ | 121 | /* Free the SDBT after all SDBs are processed... */ |
89 | while (1) { | 122 | while (1) { |
90 | if (!*curr || !sdbt) | 123 | if (!*curr || !sdbt) |
91 | break; | 124 | break; |
92 | 125 | ||
93 | /* watch for link entry reset if found */ | 126 | /* Process table-link entries */ |
94 | if (is_link_entry(curr)) { | 127 | if (is_link_entry(curr)) { |
95 | curr = get_next_sdbt(curr); | 128 | curr = get_next_sdbt(curr); |
96 | if (sdbt) | 129 | if (sdbt) |
97 | free_page(sdbt); | 130 | free_page((unsigned long) sdbt); |
98 | 131 | ||
99 | /* we are done if we reach the origin */ | 132 | /* If the origin is reached, sampling buffer is freed */ |
100 | if ((unsigned long) curr == sfb->sdbt) | 133 | if (curr == sfb->sdbt) |
101 | break; | 134 | break; |
102 | else | 135 | else |
103 | sdbt = (unsigned long) curr; | 136 | sdbt = curr; |
104 | } else { | 137 | } else { |
105 | /* process SDB pointer */ | 138 | /* Process SDB pointer */ |
106 | if (*curr) { | 139 | if (*curr) { |
107 | free_page(*curr); | 140 | free_page(*curr); |
108 | curr++; | 141 | curr++; |
@@ -111,10 +144,106 @@ static void free_sampling_buffer(struct sf_buffer *sfb) | |||
111 | } | 144 | } |
112 | 145 | ||
113 | debug_sprintf_event(sfdbg, 5, | 146 | debug_sprintf_event(sfdbg, 5, |
114 | "free_sampling_buffer: freed sdbt=%0lx\n", sfb->sdbt); | 147 | "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt); |
115 | memset(sfb, 0, sizeof(*sfb)); | 148 | memset(sfb, 0, sizeof(*sfb)); |
116 | } | 149 | } |
117 | 150 | ||
151 | static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags) | ||
152 | { | ||
153 | unsigned long sdb, *trailer; | ||
154 | |||
155 | /* Allocate and initialize sample-data-block */ | ||
156 | sdb = get_zeroed_page(gfp_flags); | ||
157 | if (!sdb) | ||
158 | return -ENOMEM; | ||
159 | trailer = trailer_entry_ptr(sdb); | ||
160 | *trailer = SDB_TE_ALERT_REQ_MASK; | ||
161 | |||
162 | /* Link SDB into the sample-data-block-table */ | ||
163 | *sdbt = sdb; | ||
164 | |||
165 | return 0; | ||
166 | } | ||
167 | |||
168 | /* | ||
169 | * realloc_sampling_buffer() - extend sampler memory | ||
170 | * | ||
171 | * Allocates new sample-data-blocks and adds them to the specified sampling | ||
172 | * buffer memory. | ||
173 | * | ||
174 | * Important: This modifies the sampling buffer and must be called when the | ||
175 | * sampling facility is disabled. | ||
176 | * | ||
177 | * Returns zero on success, non-zero otherwise. | ||
178 | */ | ||
179 | static int realloc_sampling_buffer(struct sf_buffer *sfb, | ||
180 | unsigned long num_sdb, gfp_t gfp_flags) | ||
181 | { | ||
182 | int i, rc; | ||
183 | unsigned long *new, *tail; | ||
184 | |||
185 | if (!sfb->sdbt || !sfb->tail) | ||
186 | return -EINVAL; | ||
187 | |||
188 | if (!is_link_entry(sfb->tail)) | ||
189 | return -EINVAL; | ||
190 | |||
191 | /* Append to the existing sampling buffer, overwriting the table-link | ||
192 | * register. | ||
193 | * The tail variables always points to the "tail" (last and table-link) | ||
194 | * entry in an SDB-table. | ||
195 | */ | ||
196 | tail = sfb->tail; | ||
197 | |||
198 | /* Do a sanity check whether the table-link entry points to | ||
199 | * the sampling buffer origin. | ||
200 | */ | ||
201 | if (sfb->sdbt != get_next_sdbt(tail)) { | ||
202 | debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: " | ||
203 | "sampling buffer is not linked: origin=%p" | ||
204 | "tail=%p\n", | ||
205 | (void *) sfb->sdbt, (void *) tail); | ||
206 | return -EINVAL; | ||
207 | } | ||
208 | |||
209 | /* Allocate remaining SDBs */ | ||
210 | rc = 0; | ||
211 | for (i = 0; i < num_sdb; i++) { | ||
212 | /* Allocate a new SDB-table if it is full. */ | ||
213 | if (require_table_link(tail)) { | ||
214 | new = (unsigned long *) get_zeroed_page(gfp_flags); | ||
215 | if (!new) { | ||
216 | rc = -ENOMEM; | ||
217 | break; | ||
218 | } | ||
219 | sfb->num_sdbt++; | ||
220 | /* Link current page to tail of chain */ | ||
221 | *tail = (unsigned long)(void *) new + 1; | ||
222 | tail = new; | ||
223 | } | ||
224 | |||
225 | /* Allocate a new sample-data-block. | ||
226 | * If there is not enough memory, stop the realloc process | ||
227 | * and simply use what was allocated. If this is a temporary | ||
228 | * issue, a new realloc call (if required) might succeed. | ||
229 | */ | ||
230 | rc = alloc_sample_data_block(tail, gfp_flags); | ||
231 | if (rc) | ||
232 | break; | ||
233 | sfb->num_sdb++; | ||
234 | tail++; | ||
235 | } | ||
236 | |||
237 | /* Link sampling buffer to its origin */ | ||
238 | *tail = (unsigned long) sfb->sdbt + 1; | ||
239 | sfb->tail = tail; | ||
240 | |||
241 | debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer" | ||
242 | " settings: sdbt=%lu sdb=%lu\n", | ||
243 | sfb->num_sdbt, sfb->num_sdb); | ||
244 | return rc; | ||
245 | } | ||
246 | |||
118 | /* | 247 | /* |
119 | * allocate_sampling_buffer() - allocate sampler memory | 248 | * allocate_sampling_buffer() - allocate sampler memory |
120 | * | 249 | * |
@@ -128,75 +257,74 @@ static void free_sampling_buffer(struct sf_buffer *sfb) | |||
128 | */ | 257 | */ |
129 | static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb) | 258 | static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb) |
130 | { | 259 | { |
131 | int j, k, rc; | 260 | int rc; |
132 | unsigned long *sdbt, *tail, *trailer; | ||
133 | unsigned long sdb; | ||
134 | unsigned long num_sdbt, sdb_per_table; | ||
135 | 261 | ||
136 | if (sfb->sdbt) | 262 | if (sfb->sdbt) |
137 | return -EINVAL; | 263 | return -EINVAL; |
264 | |||
265 | /* Allocate the sample-data-block-table origin */ | ||
266 | sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL); | ||
267 | if (!sfb->sdbt) | ||
268 | return -ENOMEM; | ||
138 | sfb->num_sdb = 0; | 269 | sfb->num_sdb = 0; |
270 | sfb->num_sdbt = 1; | ||
139 | 271 | ||
140 | /* Compute the number of required sample-data-block-tables (SDBT) */ | 272 | /* Link the table origin to point to itself to prepare for |
141 | num_sdbt = num_sdb / ((PAGE_SIZE - 8) / 8); | 273 | * realloc_sampling_buffer() invocation. |
142 | if (num_sdbt < CPUM_SF_MIN_SDBT) | 274 | */ |
143 | num_sdbt = CPUM_SF_MIN_SDBT; | 275 | sfb->tail = sfb->sdbt; |
144 | sdb_per_table = (PAGE_SIZE - 8) / 8; | 276 | *sfb->tail = (unsigned long)(void *) sfb->sdbt + 1; |
145 | |||
146 | debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: num_sdbt=%lu " | ||
147 | "num_sdb=%lu sdb_per_table=%lu\n", | ||
148 | num_sdbt, num_sdb, sdb_per_table); | ||
149 | sdbt = NULL; | ||
150 | tail = sdbt; | ||
151 | |||
152 | for (j = 0; j < num_sdbt; j++) { | ||
153 | sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL); | ||
154 | if (!sdbt) { | ||
155 | rc = -ENOMEM; | ||
156 | goto allocate_sdbt_error; | ||
157 | } | ||
158 | 277 | ||
159 | /* save origin of sample-data-block-table */ | 278 | /* Allocate requested number of sample-data-blocks */ |
160 | if (!sfb->sdbt) | 279 | rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL); |
161 | sfb->sdbt = (unsigned long) sdbt; | 280 | if (rc) { |
281 | free_sampling_buffer(sfb); | ||
282 | debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: " | ||
283 | "realloc_sampling_buffer failed with rc=%i\n", rc); | ||
284 | } else | ||
285 | debug_sprintf_event(sfdbg, 4, | ||
286 | "alloc_sampling_buffer: tear=%p dear=%p\n", | ||
287 | sfb->sdbt, (void *) *sfb->sdbt); | ||
288 | return rc; | ||
289 | } | ||
162 | 290 | ||
163 | /* link current page to tail of chain */ | 291 | static void sfb_set_limits(unsigned long min, unsigned long max) |
164 | if (tail) | 292 | { |
165 | *tail = (unsigned long)(void *) sdbt + 1; | 293 | CPUM_SF_MIN_SDB = min; |
294 | CPUM_SF_MAX_SDB = max; | ||
295 | } | ||
166 | 296 | ||
167 | for (k = 0; k < num_sdb && k < sdb_per_table; k++) { | 297 | static unsigned long sfb_pending_allocs(struct sf_buffer *sfb, |
168 | /* get and set SDB page */ | 298 | struct hw_perf_event *hwc) |
169 | sdb = get_zeroed_page(GFP_KERNEL); | 299 | { |
170 | if (!sdb) { | 300 | if (!sfb->sdbt) |
171 | rc = -ENOMEM; | 301 | return SFB_ALLOC_REG(hwc); |
172 | goto allocate_sdbt_error; | 302 | if (SFB_ALLOC_REG(hwc) > sfb->num_sdb) |
173 | } | 303 | return SFB_ALLOC_REG(hwc) - sfb->num_sdb; |
174 | *sdbt = sdb; | 304 | return 0; |
175 | trailer = trailer_entry_ptr(*sdbt); | 305 | } |
176 | *trailer = SDB_TE_ALERT_REQ_MASK; | ||
177 | sdbt++; | ||
178 | } | ||
179 | num_sdb -= k; | ||
180 | sfb->num_sdb += k; /* count allocated sdb's */ | ||
181 | tail = sdbt; | ||
182 | } | ||
183 | 306 | ||
184 | rc = 0; | 307 | static int sfb_has_pending_allocs(struct sf_buffer *sfb, |
185 | if (tail) | 308 | struct hw_perf_event *hwc) |
186 | *tail = sfb->sdbt + 1; | 309 | { |
187 | sfb->tail = (unsigned long) (void *)tail; | 310 | return sfb_pending_allocs(sfb, hwc) > 0; |
311 | } | ||
188 | 312 | ||
189 | allocate_sdbt_error: | 313 | static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc) |
190 | if (rc) | 314 | { |
191 | free_sampling_buffer(sfb); | 315 | /* Limit the number SDBs to not exceed the maximum */ |
192 | else | 316 | num = min_t(unsigned long, num, CPUM_SF_MAX_SDB - SFB_ALLOC_REG(hwc)); |
193 | debug_sprintf_event(sfdbg, 4, | 317 | if (num) |
194 | "alloc_sampling_buffer: tear=%0lx dear=%0lx\n", | 318 | SFB_ALLOC_REG(hwc) += num; |
195 | sfb->sdbt, *(unsigned long *) sfb->sdbt); | ||
196 | return rc; | ||
197 | } | 319 | } |
198 | 320 | ||
199 | static int allocate_sdbt(struct cpu_hw_sf *cpuhw, const struct hw_perf_event *hwc) | 321 | static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc) |
322 | { | ||
323 | SFB_ALLOC_REG(hwc) = 0; | ||
324 | sfb_account_allocs(num, hwc); | ||
325 | } | ||
326 | |||
327 | static int allocate_sdbt(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) | ||
200 | { | 328 | { |
201 | unsigned long n_sdb, freq; | 329 | unsigned long n_sdb, freq; |
202 | unsigned long factor; | 330 | unsigned long factor; |
@@ -225,39 +353,133 @@ static int allocate_sdbt(struct cpu_hw_sf *cpuhw, const struct hw_perf_event *hw | |||
225 | if (n_sdb < CPUM_SF_MIN_SDB) | 353 | if (n_sdb < CPUM_SF_MIN_SDB) |
226 | n_sdb = CPUM_SF_MIN_SDB; | 354 | n_sdb = CPUM_SF_MIN_SDB; |
227 | 355 | ||
228 | /* Return if there is already a sampling buffer allocated. | 356 | /* If there is already a sampling buffer allocated, it is very likely |
229 | * XXX Remove this later and check number of available and | 357 | * that the sampling facility is enabled too. If the event to be |
230 | * required sdb's and, if necessary, increase the sampling buffer. | 358 | * initialized requires a greater sampling buffer, the allocation must |
359 | * be postponed. Changing the sampling buffer requires the sampling | ||
360 | * facility to be in the disabled state. So, account the number of | ||
361 | * required SDBs and let cpumsf_pmu_enable() resize the buffer just | ||
362 | * before the event is started. | ||
231 | */ | 363 | */ |
364 | sfb_init_allocs(n_sdb, hwc); | ||
232 | if (sf_buffer_available(cpuhw)) | 365 | if (sf_buffer_available(cpuhw)) |
233 | return 0; | 366 | return 0; |
234 | 367 | ||
235 | debug_sprintf_event(sfdbg, 3, | 368 | debug_sprintf_event(sfdbg, 3, |
236 | "allocate_sdbt: rate=%lu f=%lu sdb=%lu/%i cpuhw=%p\n", | 369 | "allocate_sdbt: rate=%lu f=%lu sdb=%lu/%lu cpuhw=%p\n", |
237 | SAMPL_RATE(hwc), freq, n_sdb, CPUM_SF_MAX_SDB, cpuhw); | 370 | SAMPL_RATE(hwc), freq, n_sdb, CPUM_SF_MAX_SDB, cpuhw); |
238 | 371 | ||
239 | return alloc_sampling_buffer(&cpuhw->sfb, | 372 | return alloc_sampling_buffer(&cpuhw->sfb, |
240 | min_t(unsigned long, n_sdb, CPUM_SF_MAX_SDB)); | 373 | sfb_pending_allocs(&cpuhw->sfb, hwc)); |
241 | } | 374 | } |
242 | 375 | ||
376 | static unsigned long min_percent(unsigned int percent, unsigned long base, | ||
377 | unsigned long min) | ||
378 | { | ||
379 | return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100)); | ||
380 | } | ||
243 | 381 | ||
244 | /* Number of perf events counting hardware events */ | 382 | static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base) |
245 | static atomic_t num_events; | 383 | { |
246 | /* Used to avoid races in calling reserve/release_cpumf_hardware */ | 384 | /* Use a percentage-based approach to extend the sampling facility |
247 | static DEFINE_MUTEX(pmc_reserve_mutex); | 385 | * buffer. Accept up to 5% sample data loss. |
386 | * Vary the extents between 1% to 5% of the current number of | ||
387 | * sample-data-blocks. | ||
388 | */ | ||
389 | if (ratio <= 5) | ||
390 | return 0; | ||
391 | if (ratio <= 25) | ||
392 | return min_percent(1, base, 1); | ||
393 | if (ratio <= 50) | ||
394 | return min_percent(1, base, 1); | ||
395 | if (ratio <= 75) | ||
396 | return min_percent(2, base, 2); | ||
397 | if (ratio <= 100) | ||
398 | return min_percent(3, base, 3); | ||
399 | if (ratio <= 250) | ||
400 | return min_percent(4, base, 4); | ||
401 | |||
402 | return min_percent(5, base, 8); | ||
403 | } | ||
248 | 404 | ||
249 | /* | 405 | static void sfb_account_overflows(struct cpu_hw_sf *cpuhw, |
250 | * sf_disable() - Switch off sampling facility | 406 | struct hw_perf_event *hwc) |
407 | { | ||
408 | unsigned long ratio, num; | ||
409 | |||
410 | if (!OVERFLOW_REG(hwc)) | ||
411 | return; | ||
412 | |||
413 | /* The sample_overflow contains the average number of sample data | ||
414 | * that has been lost because sample-data-blocks were full. | ||
415 | * | ||
416 | * Calculate the total number of sample data entries that has been | ||
417 | * discarded. Then calculate the ratio of lost samples to total samples | ||
418 | * per second in percent. | ||
419 | */ | ||
420 | ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb, | ||
421 | sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc))); | ||
422 | |||
423 | /* Compute number of sample-data-blocks */ | ||
424 | num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb); | ||
425 | if (num) | ||
426 | sfb_account_allocs(num, hwc); | ||
427 | |||
428 | debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu" | ||
429 | " num=%lu\n", OVERFLOW_REG(hwc), ratio, num); | ||
430 | OVERFLOW_REG(hwc) = 0; | ||
431 | } | ||
432 | |||
433 | /* extend_sampling_buffer() - Extend sampling buffer | ||
434 | * @sfb: Sampling buffer structure (for local CPU) | ||
435 | * @hwc: Perf event hardware structure | ||
436 | * | ||
437 | * Use this function to extend the sampling buffer based on the overflow counter | ||
438 | * and postponed allocation extents stored in the specified Perf event hardware. | ||
439 | * | ||
440 | * Important: This function disables the sampling facility in order to safely | ||
441 | * change the sampling buffer structure. Do not call this function | ||
442 | * when the PMU is active. | ||
251 | */ | 443 | */ |
252 | static int sf_disable(void) | 444 | static void extend_sampling_buffer(struct sf_buffer *sfb, |
445 | struct hw_perf_event *hwc) | ||
253 | { | 446 | { |
254 | struct hws_lsctl_request_block sreq; | 447 | unsigned long num, num_old; |
448 | int rc; | ||
255 | 449 | ||
256 | memset(&sreq, 0, sizeof(sreq)); | 450 | num = sfb_pending_allocs(sfb, hwc); |
257 | return lsctl(&sreq); | 451 | if (!num) |
452 | return; | ||
453 | num_old = sfb->num_sdb; | ||
454 | |||
455 | /* Disable the sampling facility to reset any states and also | ||
456 | * clear pending measurement alerts. | ||
457 | */ | ||
458 | sf_disable(); | ||
459 | |||
460 | /* Extend the sampling buffer. | ||
461 | * This memory allocation typically happens in an atomic context when | ||
462 | * called by perf. Because this is a reallocation, it is fine if the | ||
463 | * new SDB-request cannot be satisfied immediately. | ||
464 | */ | ||
465 | rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC); | ||
466 | if (rc) | ||
467 | debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc " | ||
468 | "failed with rc=%i\n", rc); | ||
469 | |||
470 | if (sfb_has_pending_allocs(sfb, hwc)) | ||
471 | debug_sprintf_event(sfdbg, 5, "sfb: extend: " | ||
472 | "req=%lu alloc=%lu remaining=%lu\n", | ||
473 | num, sfb->num_sdb - num_old, | ||
474 | sfb_pending_allocs(sfb, hwc)); | ||
258 | } | 475 | } |
259 | 476 | ||
260 | 477 | ||
478 | /* Number of perf events counting hardware events */ | ||
479 | static atomic_t num_events; | ||
480 | /* Used to avoid races in calling reserve/release_cpumf_hardware */ | ||
481 | static DEFINE_MUTEX(pmc_reserve_mutex); | ||
482 | |||
261 | #define PMC_INIT 0 | 483 | #define PMC_INIT 0 |
262 | #define PMC_RELEASE 1 | 484 | #define PMC_RELEASE 1 |
263 | #define PMC_FAILURE 2 | 485 | #define PMC_FAILURE 2 |
@@ -345,19 +567,17 @@ static void hw_init_period(struct hw_perf_event *hwc, u64 period) | |||
345 | } | 567 | } |
346 | 568 | ||
347 | static void hw_reset_registers(struct hw_perf_event *hwc, | 569 | static void hw_reset_registers(struct hw_perf_event *hwc, |
348 | unsigned long sdbt_origin) | 570 | unsigned long *sdbt_origin) |
349 | { | 571 | { |
350 | TEAR_REG(hwc) = sdbt_origin; /* (re)set to first sdb table */ | 572 | /* (Re)set to first sample-data-block-table */ |
573 | TEAR_REG(hwc) = (unsigned long) sdbt_origin; | ||
351 | } | 574 | } |
352 | 575 | ||
353 | static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si, | 576 | static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si, |
354 | unsigned long rate) | 577 | unsigned long rate) |
355 | { | 578 | { |
356 | if (rate < si->min_sampl_rate) | 579 | return clamp_t(unsigned long, rate, |
357 | return si->min_sampl_rate; | 580 | si->min_sampl_rate, si->max_sampl_rate); |
358 | if (rate > si->max_sampl_rate) | ||
359 | return si->max_sampl_rate; | ||
360 | return rate; | ||
361 | } | 581 | } |
362 | 582 | ||
363 | static int __hw_perf_event_init(struct perf_event *event) | 583 | static int __hw_perf_event_init(struct perf_event *event) |
@@ -448,6 +668,10 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
448 | SAMPL_RATE(hwc) = rate; | 668 | SAMPL_RATE(hwc) = rate; |
449 | hw_init_period(hwc, SAMPL_RATE(hwc)); | 669 | hw_init_period(hwc, SAMPL_RATE(hwc)); |
450 | 670 | ||
671 | /* Initialize sample data overflow accounting */ | ||
672 | hwc->extra_reg.reg = REG_OVERFLOW; | ||
673 | OVERFLOW_REG(hwc) = 0; | ||
674 | |||
451 | /* Allocate the per-CPU sampling buffer using the CPU information | 675 | /* Allocate the per-CPU sampling buffer using the CPU information |
452 | * from the event. If the event is not pinned to a particular | 676 | * from the event. If the event is not pinned to a particular |
453 | * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling | 677 | * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling |
@@ -513,6 +737,7 @@ static int cpumsf_pmu_event_init(struct perf_event *event) | |||
513 | static void cpumsf_pmu_enable(struct pmu *pmu) | 737 | static void cpumsf_pmu_enable(struct pmu *pmu) |
514 | { | 738 | { |
515 | struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); | 739 | struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); |
740 | struct hw_perf_event *hwc; | ||
516 | int err; | 741 | int err; |
517 | 742 | ||
518 | if (cpuhw->flags & PMU_F_ENABLED) | 743 | if (cpuhw->flags & PMU_F_ENABLED) |
@@ -521,6 +746,26 @@ static void cpumsf_pmu_enable(struct pmu *pmu) | |||
521 | if (cpuhw->flags & PMU_F_ERR_MASK) | 746 | if (cpuhw->flags & PMU_F_ERR_MASK) |
522 | return; | 747 | return; |
523 | 748 | ||
749 | /* Check whether to extent the sampling buffer. | ||
750 | * | ||
751 | * Two conditions trigger an increase of the sampling buffer for a | ||
752 | * perf event: | ||
753 | * 1. Postponed buffer allocations from the event initialization. | ||
754 | * 2. Sampling overflows that contribute to pending allocations. | ||
755 | * | ||
756 | * Note that the extend_sampling_buffer() function disables the sampling | ||
757 | * facility, but it can be fully re-enabled using sampling controls that | ||
758 | * have been saved in cpumsf_pmu_disable(). | ||
759 | */ | ||
760 | if (cpuhw->event) { | ||
761 | hwc = &cpuhw->event->hw; | ||
762 | /* Account number of overflow-designated buffer extents */ | ||
763 | sfb_account_overflows(cpuhw, hwc); | ||
764 | if (sfb_has_pending_allocs(&cpuhw->sfb, hwc)) | ||
765 | extend_sampling_buffer(&cpuhw->sfb, hwc); | ||
766 | } | ||
767 | |||
768 | /* (Re)enable the PMU and sampling facility */ | ||
524 | cpuhw->flags |= PMU_F_ENABLED; | 769 | cpuhw->flags |= PMU_F_ENABLED; |
525 | barrier(); | 770 | barrier(); |
526 | 771 | ||
@@ -632,8 +877,6 @@ static int perf_push_sample(struct perf_event *event, | |||
632 | if (perf_event_overflow(event, &data, ®s)) { | 877 | if (perf_event_overflow(event, &data, ®s)) { |
633 | overflow = 1; | 878 | overflow = 1; |
634 | event->pmu->stop(event, 0); | 879 | event->pmu->stop(event, 0); |
635 | debug_sprintf_event(sfdbg, 4, "perf_push_sample: PMU stopped" | ||
636 | " because of an event overflow\n"); | ||
637 | } | 880 | } |
638 | perf_event_update_userpage(event); | 881 | perf_event_update_userpage(event); |
639 | 882 | ||
@@ -710,11 +953,11 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) | |||
710 | struct hw_perf_event *hwc = &event->hw; | 953 | struct hw_perf_event *hwc = &event->hw; |
711 | struct hws_trailer_entry *te; | 954 | struct hws_trailer_entry *te; |
712 | unsigned long *sdbt; | 955 | unsigned long *sdbt; |
713 | unsigned long long event_overflow, sampl_overflow; | 956 | unsigned long long event_overflow, sampl_overflow, num_sdb; |
714 | int done; | 957 | int done; |
715 | 958 | ||
716 | sdbt = (unsigned long *) TEAR_REG(hwc); | 959 | sdbt = (unsigned long *) TEAR_REG(hwc); |
717 | done = event_overflow = sampl_overflow = 0; | 960 | done = event_overflow = sampl_overflow = num_sdb = 0; |
718 | while (!done) { | 961 | while (!done) { |
719 | /* Get the trailer entry of the sample-data-block */ | 962 | /* Get the trailer entry of the sample-data-block */ |
720 | te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); | 963 | te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); |
@@ -726,17 +969,13 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) | |||
726 | break; | 969 | break; |
727 | } | 970 | } |
728 | 971 | ||
729 | /* Check sample overflow count */ | 972 | /* Check the sample overflow count */ |
730 | if (te->overflow) { | 973 | if (te->overflow) |
731 | /* Increment sample overflow counter */ | 974 | /* Account sample overflows and, if a particular limit |
732 | sampl_overflow += te->overflow; | 975 | * is reached, extend the sampling buffer. |
733 | 976 | * For details, see sfb_account_overflows(). | |
734 | /* XXX: If an sample overflow occurs, increase the | ||
735 | * sampling buffer. Set a "realloc" flag because | ||
736 | * the sampler must be re-enabled for changing | ||
737 | * the sample-data-block-table content. | ||
738 | */ | 977 | */ |
739 | } | 978 | sampl_overflow += te->overflow; |
740 | 979 | ||
741 | /* Timestamps are valid for full sample-data-blocks only */ | 980 | /* Timestamps are valid for full sample-data-blocks only */ |
742 | debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p " | 981 | debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p " |
@@ -749,6 +988,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) | |||
749 | * is stopped and remaining samples will be discarded. | 988 | * is stopped and remaining samples will be discarded. |
750 | */ | 989 | */ |
751 | hw_collect_samples(event, sdbt, &event_overflow); | 990 | hw_collect_samples(event, sdbt, &event_overflow); |
991 | num_sdb++; | ||
752 | 992 | ||
753 | /* Reset trailer */ | 993 | /* Reset trailer */ |
754 | xchg(&te->overflow, 0); | 994 | xchg(&te->overflow, 0); |
@@ -775,6 +1015,10 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) | |||
775 | flush_all = 1; | 1015 | flush_all = 1; |
776 | } | 1016 | } |
777 | 1017 | ||
1018 | /* Account sample overflows in the event hardware structure */ | ||
1019 | if (sampl_overflow) | ||
1020 | OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) + | ||
1021 | sampl_overflow, 1 + num_sdb); | ||
778 | if (sampl_overflow || event_overflow) | 1022 | if (sampl_overflow || event_overflow) |
779 | debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: " | 1023 | debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: " |
780 | "overflow stats: sample=%llu event=%llu\n", | 1024 | "overflow stats: sample=%llu event=%llu\n", |
@@ -849,7 +1093,7 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags) | |||
849 | */ | 1093 | */ |
850 | cpuhw->lsctl.s = 0; | 1094 | cpuhw->lsctl.s = 0; |
851 | cpuhw->lsctl.h = 1; | 1095 | cpuhw->lsctl.h = 1; |
852 | cpuhw->lsctl.tear = cpuhw->sfb.sdbt; | 1096 | cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt; |
853 | cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt; | 1097 | cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt; |
854 | cpuhw->lsctl.interval = SAMPL_RATE(&event->hw); | 1098 | cpuhw->lsctl.interval = SAMPL_RATE(&event->hw); |
855 | hw_reset_registers(&event->hw, cpuhw->sfb.sdbt); | 1099 | hw_reset_registers(&event->hw, cpuhw->sfb.sdbt); |
@@ -1018,6 +1262,48 @@ static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self, | |||
1018 | return NOTIFY_OK; | 1262 | return NOTIFY_OK; |
1019 | } | 1263 | } |
1020 | 1264 | ||
1265 | static int param_get_sfb_size(char *buffer, const struct kernel_param *kp) | ||
1266 | { | ||
1267 | if (!cpum_sf_avail()) | ||
1268 | return -ENODEV; | ||
1269 | return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); | ||
1270 | } | ||
1271 | |||
1272 | static int param_set_sfb_size(const char *val, const struct kernel_param *kp) | ||
1273 | { | ||
1274 | int rc; | ||
1275 | unsigned long min, max; | ||
1276 | |||
1277 | if (!cpum_sf_avail()) | ||
1278 | return -ENODEV; | ||
1279 | if (!val || !strlen(val)) | ||
1280 | return -EINVAL; | ||
1281 | |||
1282 | /* Valid parameter values: "min,max" or "max" */ | ||
1283 | min = CPUM_SF_MIN_SDB; | ||
1284 | max = CPUM_SF_MAX_SDB; | ||
1285 | if (strchr(val, ',')) | ||
1286 | rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL; | ||
1287 | else | ||
1288 | rc = kstrtoul(val, 10, &max); | ||
1289 | |||
1290 | if (min < 2 || min >= max || max > get_num_physpages()) | ||
1291 | rc = -EINVAL; | ||
1292 | if (rc) | ||
1293 | return rc; | ||
1294 | |||
1295 | sfb_set_limits(min, max); | ||
1296 | pr_info("Changed sampling buffer settings: min=%lu max=%lu\n", | ||
1297 | CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); | ||
1298 | return 0; | ||
1299 | } | ||
1300 | |||
1301 | #define param_check_sfb_size(name, p) __param_check(name, p, void) | ||
1302 | static struct kernel_param_ops param_ops_sfb_size = { | ||
1303 | .set = param_set_sfb_size, | ||
1304 | .get = param_get_sfb_size, | ||
1305 | }; | ||
1306 | |||
1021 | static int __init init_cpum_sampling_pmu(void) | 1307 | static int __init init_cpum_sampling_pmu(void) |
1022 | { | 1308 | { |
1023 | int err; | 1309 | int err; |
@@ -1047,3 +1333,4 @@ out: | |||
1047 | return err; | 1333 | return err; |
1048 | } | 1334 | } |
1049 | arch_initcall(init_cpum_sampling_pmu); | 1335 | arch_initcall(init_cpum_sampling_pmu); |
1336 | core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640); | ||