aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/s390/include/asm/perf_event.h4
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c527
2 files changed, 411 insertions, 120 deletions
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 23d2dfa8201d..99d7f4e333c2 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -49,6 +49,10 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
49#define PERF_CPUM_SF_MAX_CTR 1 49#define PERF_CPUM_SF_MAX_CTR 1
50#define PERF_EVENT_CPUM_SF 0xB0000UL /* Raw event ID */ 50#define PERF_EVENT_CPUM_SF 0xB0000UL /* Raw event ID */
51 51
52#define REG_NONE 0
53#define REG_OVERFLOW 1
54#define OVERFLOW_REG(hwc) ((hwc)->extra_reg.config)
55#define SFB_ALLOC_REG(hwc) ((hwc)->extra_reg.alloc)
52#define TEAR_REG(hwc) ((hwc)->last_tag) 56#define TEAR_REG(hwc) ((hwc)->last_tag)
53#define SAMPL_RATE(hwc) ((hwc)->event_base) 57#define SAMPL_RATE(hwc) ((hwc)->event_base)
54 58
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index ae5e0192160d..ea1656073dac 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -17,6 +17,8 @@
17#include <linux/percpu.h> 17#include <linux/percpu.h>
18#include <linux/notifier.h> 18#include <linux/notifier.h>
19#include <linux/export.h> 19#include <linux/export.h>
20#include <linux/mm.h>
21#include <linux/moduleparam.h>
20#include <asm/cpu_mf.h> 22#include <asm/cpu_mf.h>
21#include <asm/irq.h> 23#include <asm/irq.h>
22#include <asm/debug.h> 24#include <asm/debug.h>
@@ -26,34 +28,54 @@
26 * At least one table is required for the sampling buffer structure. 28 * At least one table is required for the sampling buffer structure.
27 * A single table contains up to 511 pointers to sample-data-blocks. 29 * A single table contains up to 511 pointers to sample-data-blocks.
28 */ 30 */
29#define CPUM_SF_MIN_SDBT 1 31#define CPUM_SF_MIN_SDBT 1
30 32
31/* Minimum number of sample-data-blocks: 33/* Number of sample-data-blocks per sample-data-block-table (SDBT):
32 * The minimum designates a single page for sample-data-block, i.e., 34 * The table contains SDB origin (8 bytes) and one SDBT origin that
33 * up to 126 sample-data-blocks with a size of 32 bytes (bsdes). 35 * points to the next table.
34 */ 36 */
35#define CPUM_SF_MIN_SDB 126 37#define CPUM_SF_SDB_PER_TABLE ((PAGE_SIZE - 8) / 8)
36 38
37/* Maximum number of sample-data-blocks: 39/* Maximum page offset for an SDBT table-link entry:
38 * The maximum number designates approx. 256K per CPU including 40 * If this page offset is reached, a table-link entry to the next SDBT
39 * the given number of sample-data-blocks and taking the number 41 * must be added.
40 * of sample-data-block tables into account. 42 */
43#define CPUM_SF_SDBT_TL_OFFSET (CPUM_SF_SDB_PER_TABLE * 8)
44static inline int require_table_link(const void *sdbt)
45{
46 return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET;
47}
48
49/* Minimum and maximum sampling buffer sizes:
50 *
51 * This number represents the maximum size of the sampling buffer
52 * taking the number of sample-data-block-tables into account.
41 * 53 *
42 * Later, this number can be increased for extending the sampling 54 * Sampling buffer size Buffer characteristics
43 * buffer, for example, by factor 2 (512K) or 4 (1M). 55 * ---------------------------------------------------
56 * 64KB == 16 pages (4KB per page)
57 * 1 page for SDB-tables
58 * 15 pages for SDBs
59 *
60 * 32MB == 8192 pages (4KB per page)
61 * 16 pages for SDB-tables
62 * 8176 pages for SDBs
44 */ 63 */
45#define CPUM_SF_MAX_SDB 6471 64static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15;
65static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176;
46 66
47struct sf_buffer { 67struct sf_buffer {
48 unsigned long sdbt; /* Sample-data-block-table origin */ 68 unsigned long *sdbt; /* Sample-data-block-table origin */
49 /* buffer characteristics (required for buffer increments) */ 69 /* buffer characteristics (required for buffer increments) */
50 unsigned long num_sdb; /* Number of sample-data-blocks */ 70 unsigned long num_sdb; /* Number of sample-data-blocks */
51 unsigned long tail; /* last sample-data-block-table */ 71 unsigned long num_sdbt; /* Number of sample-data-block-tables */
72 unsigned long *tail; /* last sample-data-block-table */
52}; 73};
53 74
54struct cpu_hw_sf { 75struct cpu_hw_sf {
55 /* CPU-measurement sampling information block */ 76 /* CPU-measurement sampling information block */
56 struct hws_qsi_info_block qsi; 77 struct hws_qsi_info_block qsi;
78 /* CPU-measurement sampling control block */
57 struct hws_lsctl_request_block lsctl; 79 struct hws_lsctl_request_block lsctl;
58 struct sf_buffer sfb; /* Sampling buffer */ 80 struct sf_buffer sfb; /* Sampling buffer */
59 unsigned int flags; /* Status flags */ 81 unsigned int flags; /* Status flags */
@@ -65,11 +87,22 @@ static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf);
65static debug_info_t *sfdbg; 87static debug_info_t *sfdbg;
66 88
67/* 89/*
90 * sf_disable() - Switch off sampling facility
91 */
92static int sf_disable(void)
93{
94 struct hws_lsctl_request_block sreq;
95
96 memset(&sreq, 0, sizeof(sreq));
97 return lsctl(&sreq);
98}
99
100/*
68 * sf_buffer_available() - Check for an allocated sampling buffer 101 * sf_buffer_available() - Check for an allocated sampling buffer
69 */ 102 */
70static int sf_buffer_available(struct cpu_hw_sf *cpuhw) 103static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
71{ 104{
72 return (cpuhw->sfb.sdbt) ? 1 : 0; 105 return !!cpuhw->sfb.sdbt;
73} 106}
74 107
75/* 108/*
@@ -77,32 +110,32 @@ static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
77 */ 110 */
78static void free_sampling_buffer(struct sf_buffer *sfb) 111static void free_sampling_buffer(struct sf_buffer *sfb)
79{ 112{
80 unsigned long sdbt, *curr; 113 unsigned long *sdbt, *curr;
81 114
82 if (!sfb->sdbt) 115 if (!sfb->sdbt)
83 return; 116 return;
84 117
85 sdbt = sfb->sdbt; 118 sdbt = sfb->sdbt;
86 curr = (unsigned long *) sdbt; 119 curr = sdbt;
87 120
88 /* we'll free the SDBT after all SDBs are processed... */ 121 /* Free the SDBT after all SDBs are processed... */
89 while (1) { 122 while (1) {
90 if (!*curr || !sdbt) 123 if (!*curr || !sdbt)
91 break; 124 break;
92 125
93 /* watch for link entry reset if found */ 126 /* Process table-link entries */
94 if (is_link_entry(curr)) { 127 if (is_link_entry(curr)) {
95 curr = get_next_sdbt(curr); 128 curr = get_next_sdbt(curr);
96 if (sdbt) 129 if (sdbt)
97 free_page(sdbt); 130 free_page((unsigned long) sdbt);
98 131
99 /* we are done if we reach the origin */ 132 /* If the origin is reached, sampling buffer is freed */
100 if ((unsigned long) curr == sfb->sdbt) 133 if (curr == sfb->sdbt)
101 break; 134 break;
102 else 135 else
103 sdbt = (unsigned long) curr; 136 sdbt = curr;
104 } else { 137 } else {
105 /* process SDB pointer */ 138 /* Process SDB pointer */
106 if (*curr) { 139 if (*curr) {
107 free_page(*curr); 140 free_page(*curr);
108 curr++; 141 curr++;
@@ -111,10 +144,106 @@ static void free_sampling_buffer(struct sf_buffer *sfb)
111 } 144 }
112 145
113 debug_sprintf_event(sfdbg, 5, 146 debug_sprintf_event(sfdbg, 5,
114 "free_sampling_buffer: freed sdbt=%0lx\n", sfb->sdbt); 147 "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt);
115 memset(sfb, 0, sizeof(*sfb)); 148 memset(sfb, 0, sizeof(*sfb));
116} 149}
117 150
151static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags)
152{
153 unsigned long sdb, *trailer;
154
155 /* Allocate and initialize sample-data-block */
156 sdb = get_zeroed_page(gfp_flags);
157 if (!sdb)
158 return -ENOMEM;
159 trailer = trailer_entry_ptr(sdb);
160 *trailer = SDB_TE_ALERT_REQ_MASK;
161
162 /* Link SDB into the sample-data-block-table */
163 *sdbt = sdb;
164
165 return 0;
166}
167
168/*
169 * realloc_sampling_buffer() - extend sampler memory
170 *
171 * Allocates new sample-data-blocks and adds them to the specified sampling
172 * buffer memory.
173 *
174 * Important: This modifies the sampling buffer and must be called when the
175 * sampling facility is disabled.
176 *
177 * Returns zero on success, non-zero otherwise.
178 */
179static int realloc_sampling_buffer(struct sf_buffer *sfb,
180 unsigned long num_sdb, gfp_t gfp_flags)
181{
182 int i, rc;
183 unsigned long *new, *tail;
184
185 if (!sfb->sdbt || !sfb->tail)
186 return -EINVAL;
187
188 if (!is_link_entry(sfb->tail))
189 return -EINVAL;
190
191 /* Append to the existing sampling buffer, overwriting the table-link
192 * register.
193 * The tail variables always points to the "tail" (last and table-link)
194 * entry in an SDB-table.
195 */
196 tail = sfb->tail;
197
198 /* Do a sanity check whether the table-link entry points to
199 * the sampling buffer origin.
200 */
201 if (sfb->sdbt != get_next_sdbt(tail)) {
202 debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: "
203 "sampling buffer is not linked: origin=%p"
204 "tail=%p\n",
205 (void *) sfb->sdbt, (void *) tail);
206 return -EINVAL;
207 }
208
209 /* Allocate remaining SDBs */
210 rc = 0;
211 for (i = 0; i < num_sdb; i++) {
212 /* Allocate a new SDB-table if it is full. */
213 if (require_table_link(tail)) {
214 new = (unsigned long *) get_zeroed_page(gfp_flags);
215 if (!new) {
216 rc = -ENOMEM;
217 break;
218 }
219 sfb->num_sdbt++;
220 /* Link current page to tail of chain */
221 *tail = (unsigned long)(void *) new + 1;
222 tail = new;
223 }
224
225 /* Allocate a new sample-data-block.
226 * If there is not enough memory, stop the realloc process
227 * and simply use what was allocated. If this is a temporary
228 * issue, a new realloc call (if required) might succeed.
229 */
230 rc = alloc_sample_data_block(tail, gfp_flags);
231 if (rc)
232 break;
233 sfb->num_sdb++;
234 tail++;
235 }
236
237 /* Link sampling buffer to its origin */
238 *tail = (unsigned long) sfb->sdbt + 1;
239 sfb->tail = tail;
240
241 debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer"
242 " settings: sdbt=%lu sdb=%lu\n",
243 sfb->num_sdbt, sfb->num_sdb);
244 return rc;
245}
246
118/* 247/*
119 * allocate_sampling_buffer() - allocate sampler memory 248 * allocate_sampling_buffer() - allocate sampler memory
120 * 249 *
@@ -128,75 +257,74 @@ static void free_sampling_buffer(struct sf_buffer *sfb)
128 */ 257 */
129static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb) 258static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
130{ 259{
131 int j, k, rc; 260 int rc;
132 unsigned long *sdbt, *tail, *trailer;
133 unsigned long sdb;
134 unsigned long num_sdbt, sdb_per_table;
135 261
136 if (sfb->sdbt) 262 if (sfb->sdbt)
137 return -EINVAL; 263 return -EINVAL;
264
265 /* Allocate the sample-data-block-table origin */
266 sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
267 if (!sfb->sdbt)
268 return -ENOMEM;
138 sfb->num_sdb = 0; 269 sfb->num_sdb = 0;
270 sfb->num_sdbt = 1;
139 271
140 /* Compute the number of required sample-data-block-tables (SDBT) */ 272 /* Link the table origin to point to itself to prepare for
141 num_sdbt = num_sdb / ((PAGE_SIZE - 8) / 8); 273 * realloc_sampling_buffer() invocation.
142 if (num_sdbt < CPUM_SF_MIN_SDBT) 274 */
143 num_sdbt = CPUM_SF_MIN_SDBT; 275 sfb->tail = sfb->sdbt;
144 sdb_per_table = (PAGE_SIZE - 8) / 8; 276 *sfb->tail = (unsigned long)(void *) sfb->sdbt + 1;
145
146 debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: num_sdbt=%lu "
147 "num_sdb=%lu sdb_per_table=%lu\n",
148 num_sdbt, num_sdb, sdb_per_table);
149 sdbt = NULL;
150 tail = sdbt;
151
152 for (j = 0; j < num_sdbt; j++) {
153 sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
154 if (!sdbt) {
155 rc = -ENOMEM;
156 goto allocate_sdbt_error;
157 }
158 277
159 /* save origin of sample-data-block-table */ 278 /* Allocate requested number of sample-data-blocks */
160 if (!sfb->sdbt) 279 rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
161 sfb->sdbt = (unsigned long) sdbt; 280 if (rc) {
281 free_sampling_buffer(sfb);
282 debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: "
283 "realloc_sampling_buffer failed with rc=%i\n", rc);
284 } else
285 debug_sprintf_event(sfdbg, 4,
286 "alloc_sampling_buffer: tear=%p dear=%p\n",
287 sfb->sdbt, (void *) *sfb->sdbt);
288 return rc;
289}
162 290
163 /* link current page to tail of chain */ 291static void sfb_set_limits(unsigned long min, unsigned long max)
164 if (tail) 292{
165 *tail = (unsigned long)(void *) sdbt + 1; 293 CPUM_SF_MIN_SDB = min;
294 CPUM_SF_MAX_SDB = max;
295}
166 296
167 for (k = 0; k < num_sdb && k < sdb_per_table; k++) { 297static unsigned long sfb_pending_allocs(struct sf_buffer *sfb,
168 /* get and set SDB page */ 298 struct hw_perf_event *hwc)
169 sdb = get_zeroed_page(GFP_KERNEL); 299{
170 if (!sdb) { 300 if (!sfb->sdbt)
171 rc = -ENOMEM; 301 return SFB_ALLOC_REG(hwc);
172 goto allocate_sdbt_error; 302 if (SFB_ALLOC_REG(hwc) > sfb->num_sdb)
173 } 303 return SFB_ALLOC_REG(hwc) - sfb->num_sdb;
174 *sdbt = sdb; 304 return 0;
175 trailer = trailer_entry_ptr(*sdbt); 305}
176 *trailer = SDB_TE_ALERT_REQ_MASK;
177 sdbt++;
178 }
179 num_sdb -= k;
180 sfb->num_sdb += k; /* count allocated sdb's */
181 tail = sdbt;
182 }
183 306
184 rc = 0; 307static int sfb_has_pending_allocs(struct sf_buffer *sfb,
185 if (tail) 308 struct hw_perf_event *hwc)
186 *tail = sfb->sdbt + 1; 309{
187 sfb->tail = (unsigned long) (void *)tail; 310 return sfb_pending_allocs(sfb, hwc) > 0;
311}
188 312
189allocate_sdbt_error: 313static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc)
190 if (rc) 314{
191 free_sampling_buffer(sfb); 315 /* Limit the number SDBs to not exceed the maximum */
192 else 316 num = min_t(unsigned long, num, CPUM_SF_MAX_SDB - SFB_ALLOC_REG(hwc));
193 debug_sprintf_event(sfdbg, 4, 317 if (num)
194 "alloc_sampling_buffer: tear=%0lx dear=%0lx\n", 318 SFB_ALLOC_REG(hwc) += num;
195 sfb->sdbt, *(unsigned long *) sfb->sdbt);
196 return rc;
197} 319}
198 320
199static int allocate_sdbt(struct cpu_hw_sf *cpuhw, const struct hw_perf_event *hwc) 321static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc)
322{
323 SFB_ALLOC_REG(hwc) = 0;
324 sfb_account_allocs(num, hwc);
325}
326
327static int allocate_sdbt(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
200{ 328{
201 unsigned long n_sdb, freq; 329 unsigned long n_sdb, freq;
202 unsigned long factor; 330 unsigned long factor;
@@ -225,39 +353,133 @@ static int allocate_sdbt(struct cpu_hw_sf *cpuhw, const struct hw_perf_event *hw
225 if (n_sdb < CPUM_SF_MIN_SDB) 353 if (n_sdb < CPUM_SF_MIN_SDB)
226 n_sdb = CPUM_SF_MIN_SDB; 354 n_sdb = CPUM_SF_MIN_SDB;
227 355
228 /* Return if there is already a sampling buffer allocated. 356 /* If there is already a sampling buffer allocated, it is very likely
229 * XXX Remove this later and check number of available and 357 * that the sampling facility is enabled too. If the event to be
230 * required sdb's and, if necessary, increase the sampling buffer. 358 * initialized requires a greater sampling buffer, the allocation must
359 * be postponed. Changing the sampling buffer requires the sampling
360 * facility to be in the disabled state. So, account the number of
361 * required SDBs and let cpumsf_pmu_enable() resize the buffer just
362 * before the event is started.
231 */ 363 */
364 sfb_init_allocs(n_sdb, hwc);
232 if (sf_buffer_available(cpuhw)) 365 if (sf_buffer_available(cpuhw))
233 return 0; 366 return 0;
234 367
235 debug_sprintf_event(sfdbg, 3, 368 debug_sprintf_event(sfdbg, 3,
236 "allocate_sdbt: rate=%lu f=%lu sdb=%lu/%i cpuhw=%p\n", 369 "allocate_sdbt: rate=%lu f=%lu sdb=%lu/%lu cpuhw=%p\n",
237 SAMPL_RATE(hwc), freq, n_sdb, CPUM_SF_MAX_SDB, cpuhw); 370 SAMPL_RATE(hwc), freq, n_sdb, CPUM_SF_MAX_SDB, cpuhw);
238 371
239 return alloc_sampling_buffer(&cpuhw->sfb, 372 return alloc_sampling_buffer(&cpuhw->sfb,
240 min_t(unsigned long, n_sdb, CPUM_SF_MAX_SDB)); 373 sfb_pending_allocs(&cpuhw->sfb, hwc));
241} 374}
242 375
376static unsigned long min_percent(unsigned int percent, unsigned long base,
377 unsigned long min)
378{
379 return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100));
380}
243 381
244/* Number of perf events counting hardware events */ 382static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base)
245static atomic_t num_events; 383{
246/* Used to avoid races in calling reserve/release_cpumf_hardware */ 384 /* Use a percentage-based approach to extend the sampling facility
247static DEFINE_MUTEX(pmc_reserve_mutex); 385 * buffer. Accept up to 5% sample data loss.
386 * Vary the extents between 1% to 5% of the current number of
387 * sample-data-blocks.
388 */
389 if (ratio <= 5)
390 return 0;
391 if (ratio <= 25)
392 return min_percent(1, base, 1);
393 if (ratio <= 50)
394 return min_percent(1, base, 1);
395 if (ratio <= 75)
396 return min_percent(2, base, 2);
397 if (ratio <= 100)
398 return min_percent(3, base, 3);
399 if (ratio <= 250)
400 return min_percent(4, base, 4);
401
402 return min_percent(5, base, 8);
403}
248 404
249/* 405static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
250 * sf_disable() - Switch off sampling facility 406 struct hw_perf_event *hwc)
407{
408 unsigned long ratio, num;
409
410 if (!OVERFLOW_REG(hwc))
411 return;
412
413 /* The sample_overflow contains the average number of sample data
414 * that has been lost because sample-data-blocks were full.
415 *
416 * Calculate the total number of sample data entries that has been
417 * discarded. Then calculate the ratio of lost samples to total samples
418 * per second in percent.
419 */
420 ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb,
421 sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)));
422
423 /* Compute number of sample-data-blocks */
424 num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb);
425 if (num)
426 sfb_account_allocs(num, hwc);
427
428 debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu"
429 " num=%lu\n", OVERFLOW_REG(hwc), ratio, num);
430 OVERFLOW_REG(hwc) = 0;
431}
432
433/* extend_sampling_buffer() - Extend sampling buffer
434 * @sfb: Sampling buffer structure (for local CPU)
435 * @hwc: Perf event hardware structure
436 *
437 * Use this function to extend the sampling buffer based on the overflow counter
438 * and postponed allocation extents stored in the specified Perf event hardware.
439 *
440 * Important: This function disables the sampling facility in order to safely
441 * change the sampling buffer structure. Do not call this function
442 * when the PMU is active.
251 */ 443 */
252static int sf_disable(void) 444static void extend_sampling_buffer(struct sf_buffer *sfb,
445 struct hw_perf_event *hwc)
253{ 446{
254 struct hws_lsctl_request_block sreq; 447 unsigned long num, num_old;
448 int rc;
255 449
256 memset(&sreq, 0, sizeof(sreq)); 450 num = sfb_pending_allocs(sfb, hwc);
257 return lsctl(&sreq); 451 if (!num)
452 return;
453 num_old = sfb->num_sdb;
454
455 /* Disable the sampling facility to reset any states and also
456 * clear pending measurement alerts.
457 */
458 sf_disable();
459
460 /* Extend the sampling buffer.
461 * This memory allocation typically happens in an atomic context when
462 * called by perf. Because this is a reallocation, it is fine if the
463 * new SDB-request cannot be satisfied immediately.
464 */
465 rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
466 if (rc)
467 debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc "
468 "failed with rc=%i\n", rc);
469
470 if (sfb_has_pending_allocs(sfb, hwc))
471 debug_sprintf_event(sfdbg, 5, "sfb: extend: "
472 "req=%lu alloc=%lu remaining=%lu\n",
473 num, sfb->num_sdb - num_old,
474 sfb_pending_allocs(sfb, hwc));
258} 475}
259 476
260 477
478/* Number of perf events counting hardware events */
479static atomic_t num_events;
480/* Used to avoid races in calling reserve/release_cpumf_hardware */
481static DEFINE_MUTEX(pmc_reserve_mutex);
482
261#define PMC_INIT 0 483#define PMC_INIT 0
262#define PMC_RELEASE 1 484#define PMC_RELEASE 1
263#define PMC_FAILURE 2 485#define PMC_FAILURE 2
@@ -345,19 +567,17 @@ static void hw_init_period(struct hw_perf_event *hwc, u64 period)
345} 567}
346 568
347static void hw_reset_registers(struct hw_perf_event *hwc, 569static void hw_reset_registers(struct hw_perf_event *hwc,
348 unsigned long sdbt_origin) 570 unsigned long *sdbt_origin)
349{ 571{
350 TEAR_REG(hwc) = sdbt_origin; /* (re)set to first sdb table */ 572 /* (Re)set to first sample-data-block-table */
573 TEAR_REG(hwc) = (unsigned long) sdbt_origin;
351} 574}
352 575
353static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si, 576static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
354 unsigned long rate) 577 unsigned long rate)
355{ 578{
356 if (rate < si->min_sampl_rate) 579 return clamp_t(unsigned long, rate,
357 return si->min_sampl_rate; 580 si->min_sampl_rate, si->max_sampl_rate);
358 if (rate > si->max_sampl_rate)
359 return si->max_sampl_rate;
360 return rate;
361} 581}
362 582
363static int __hw_perf_event_init(struct perf_event *event) 583static int __hw_perf_event_init(struct perf_event *event)
@@ -448,6 +668,10 @@ static int __hw_perf_event_init(struct perf_event *event)
448 SAMPL_RATE(hwc) = rate; 668 SAMPL_RATE(hwc) = rate;
449 hw_init_period(hwc, SAMPL_RATE(hwc)); 669 hw_init_period(hwc, SAMPL_RATE(hwc));
450 670
671 /* Initialize sample data overflow accounting */
672 hwc->extra_reg.reg = REG_OVERFLOW;
673 OVERFLOW_REG(hwc) = 0;
674
451 /* Allocate the per-CPU sampling buffer using the CPU information 675 /* Allocate the per-CPU sampling buffer using the CPU information
452 * from the event. If the event is not pinned to a particular 676 * from the event. If the event is not pinned to a particular
453 * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling 677 * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling
@@ -513,6 +737,7 @@ static int cpumsf_pmu_event_init(struct perf_event *event)
513static void cpumsf_pmu_enable(struct pmu *pmu) 737static void cpumsf_pmu_enable(struct pmu *pmu)
514{ 738{
515 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); 739 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
740 struct hw_perf_event *hwc;
516 int err; 741 int err;
517 742
518 if (cpuhw->flags & PMU_F_ENABLED) 743 if (cpuhw->flags & PMU_F_ENABLED)
@@ -521,6 +746,26 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
521 if (cpuhw->flags & PMU_F_ERR_MASK) 746 if (cpuhw->flags & PMU_F_ERR_MASK)
522 return; 747 return;
523 748
749 /* Check whether to extent the sampling buffer.
750 *
751 * Two conditions trigger an increase of the sampling buffer for a
752 * perf event:
753 * 1. Postponed buffer allocations from the event initialization.
754 * 2. Sampling overflows that contribute to pending allocations.
755 *
756 * Note that the extend_sampling_buffer() function disables the sampling
757 * facility, but it can be fully re-enabled using sampling controls that
758 * have been saved in cpumsf_pmu_disable().
759 */
760 if (cpuhw->event) {
761 hwc = &cpuhw->event->hw;
762 /* Account number of overflow-designated buffer extents */
763 sfb_account_overflows(cpuhw, hwc);
764 if (sfb_has_pending_allocs(&cpuhw->sfb, hwc))
765 extend_sampling_buffer(&cpuhw->sfb, hwc);
766 }
767
768 /* (Re)enable the PMU and sampling facility */
524 cpuhw->flags |= PMU_F_ENABLED; 769 cpuhw->flags |= PMU_F_ENABLED;
525 barrier(); 770 barrier();
526 771
@@ -632,8 +877,6 @@ static int perf_push_sample(struct perf_event *event,
632 if (perf_event_overflow(event, &data, &regs)) { 877 if (perf_event_overflow(event, &data, &regs)) {
633 overflow = 1; 878 overflow = 1;
634 event->pmu->stop(event, 0); 879 event->pmu->stop(event, 0);
635 debug_sprintf_event(sfdbg, 4, "perf_push_sample: PMU stopped"
636 " because of an event overflow\n");
637 } 880 }
638 perf_event_update_userpage(event); 881 perf_event_update_userpage(event);
639 882
@@ -710,11 +953,11 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
710 struct hw_perf_event *hwc = &event->hw; 953 struct hw_perf_event *hwc = &event->hw;
711 struct hws_trailer_entry *te; 954 struct hws_trailer_entry *te;
712 unsigned long *sdbt; 955 unsigned long *sdbt;
713 unsigned long long event_overflow, sampl_overflow; 956 unsigned long long event_overflow, sampl_overflow, num_sdb;
714 int done; 957 int done;
715 958
716 sdbt = (unsigned long *) TEAR_REG(hwc); 959 sdbt = (unsigned long *) TEAR_REG(hwc);
717 done = event_overflow = sampl_overflow = 0; 960 done = event_overflow = sampl_overflow = num_sdb = 0;
718 while (!done) { 961 while (!done) {
719 /* Get the trailer entry of the sample-data-block */ 962 /* Get the trailer entry of the sample-data-block */
720 te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); 963 te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
@@ -726,17 +969,13 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
726 break; 969 break;
727 } 970 }
728 971
729 /* Check sample overflow count */ 972 /* Check the sample overflow count */
730 if (te->overflow) { 973 if (te->overflow)
731 /* Increment sample overflow counter */ 974 /* Account sample overflows and, if a particular limit
732 sampl_overflow += te->overflow; 975 * is reached, extend the sampling buffer.
733 976 * For details, see sfb_account_overflows().
734 /* XXX: If an sample overflow occurs, increase the
735 * sampling buffer. Set a "realloc" flag because
736 * the sampler must be re-enabled for changing
737 * the sample-data-block-table content.
738 */ 977 */
739 } 978 sampl_overflow += te->overflow;
740 979
741 /* Timestamps are valid for full sample-data-blocks only */ 980 /* Timestamps are valid for full sample-data-blocks only */
742 debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p " 981 debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p "
@@ -749,6 +988,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
749 * is stopped and remaining samples will be discarded. 988 * is stopped and remaining samples will be discarded.
750 */ 989 */
751 hw_collect_samples(event, sdbt, &event_overflow); 990 hw_collect_samples(event, sdbt, &event_overflow);
991 num_sdb++;
752 992
753 /* Reset trailer */ 993 /* Reset trailer */
754 xchg(&te->overflow, 0); 994 xchg(&te->overflow, 0);
@@ -775,6 +1015,10 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
775 flush_all = 1; 1015 flush_all = 1;
776 } 1016 }
777 1017
1018 /* Account sample overflows in the event hardware structure */
1019 if (sampl_overflow)
1020 OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) +
1021 sampl_overflow, 1 + num_sdb);
778 if (sampl_overflow || event_overflow) 1022 if (sampl_overflow || event_overflow)
779 debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: " 1023 debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: "
780 "overflow stats: sample=%llu event=%llu\n", 1024 "overflow stats: sample=%llu event=%llu\n",
@@ -849,7 +1093,7 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags)
849 */ 1093 */
850 cpuhw->lsctl.s = 0; 1094 cpuhw->lsctl.s = 0;
851 cpuhw->lsctl.h = 1; 1095 cpuhw->lsctl.h = 1;
852 cpuhw->lsctl.tear = cpuhw->sfb.sdbt; 1096 cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
853 cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt; 1097 cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
854 cpuhw->lsctl.interval = SAMPL_RATE(&event->hw); 1098 cpuhw->lsctl.interval = SAMPL_RATE(&event->hw);
855 hw_reset_registers(&event->hw, cpuhw->sfb.sdbt); 1099 hw_reset_registers(&event->hw, cpuhw->sfb.sdbt);
@@ -1018,6 +1262,48 @@ static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self,
1018 return NOTIFY_OK; 1262 return NOTIFY_OK;
1019} 1263}
1020 1264
1265static int param_get_sfb_size(char *buffer, const struct kernel_param *kp)
1266{
1267 if (!cpum_sf_avail())
1268 return -ENODEV;
1269 return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
1270}
1271
1272static int param_set_sfb_size(const char *val, const struct kernel_param *kp)
1273{
1274 int rc;
1275 unsigned long min, max;
1276
1277 if (!cpum_sf_avail())
1278 return -ENODEV;
1279 if (!val || !strlen(val))
1280 return -EINVAL;
1281
1282 /* Valid parameter values: "min,max" or "max" */
1283 min = CPUM_SF_MIN_SDB;
1284 max = CPUM_SF_MAX_SDB;
1285 if (strchr(val, ','))
1286 rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL;
1287 else
1288 rc = kstrtoul(val, 10, &max);
1289
1290 if (min < 2 || min >= max || max > get_num_physpages())
1291 rc = -EINVAL;
1292 if (rc)
1293 return rc;
1294
1295 sfb_set_limits(min, max);
1296 pr_info("Changed sampling buffer settings: min=%lu max=%lu\n",
1297 CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
1298 return 0;
1299}
1300
1301#define param_check_sfb_size(name, p) __param_check(name, p, void)
1302static struct kernel_param_ops param_ops_sfb_size = {
1303 .set = param_set_sfb_size,
1304 .get = param_get_sfb_size,
1305};
1306
1021static int __init init_cpum_sampling_pmu(void) 1307static int __init init_cpum_sampling_pmu(void)
1022{ 1308{
1023 int err; 1309 int err;
@@ -1047,3 +1333,4 @@ out:
1047 return err; 1333 return err;
1048} 1334}
1049arch_initcall(init_cpum_sampling_pmu); 1335arch_initcall(init_cpum_sampling_pmu);
1336core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640);