diff options
Diffstat (limited to 'kernel/events/ring_buffer.c')
| -rw-r--r-- | kernel/events/ring_buffer.c | 327 |
1 files changed, 323 insertions, 4 deletions
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index eadb95ce7aac..232f00f273cb 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c | |||
| @@ -243,14 +243,317 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) | |||
| 243 | spin_lock_init(&rb->event_lock); | 243 | spin_lock_init(&rb->event_lock); |
| 244 | } | 244 | } |
| 245 | 245 | ||
| 246 | /* | ||
| 247 | * This is called before hardware starts writing to the AUX area to | ||
| 248 | * obtain an output handle and make sure there's room in the buffer. | ||
| 249 | * When the capture completes, call perf_aux_output_end() to commit | ||
| 250 | * the recorded data to the buffer. | ||
| 251 | * | ||
| 252 | * The ordering is similar to that of perf_output_{begin,end}, with | ||
| 253 | * the exception of (B), which should be taken care of by the pmu | ||
| 254 | * driver, since ordering rules will differ depending on hardware. | ||
| 255 | */ | ||
| 256 | void *perf_aux_output_begin(struct perf_output_handle *handle, | ||
| 257 | struct perf_event *event) | ||
| 258 | { | ||
| 259 | struct perf_event *output_event = event; | ||
| 260 | unsigned long aux_head, aux_tail; | ||
| 261 | struct ring_buffer *rb; | ||
| 262 | |||
| 263 | if (output_event->parent) | ||
| 264 | output_event = output_event->parent; | ||
| 265 | |||
| 266 | /* | ||
| 267 | * Since this will typically be open across pmu::add/pmu::del, we | ||
| 268 | * grab ring_buffer's refcount instead of holding rcu read lock | ||
| 269 | * to make sure it doesn't disappear under us. | ||
| 270 | */ | ||
| 271 | rb = ring_buffer_get(output_event); | ||
| 272 | if (!rb) | ||
| 273 | return NULL; | ||
| 274 | |||
| 275 | if (!rb_has_aux(rb) || !atomic_inc_not_zero(&rb->aux_refcount)) | ||
| 276 | goto err; | ||
| 277 | |||
| 278 | /* | ||
| 279 | * Nesting is not supported for AUX area, make sure nested | ||
| 280 | * writers are caught early | ||
| 281 | */ | ||
| 282 | if (WARN_ON_ONCE(local_xchg(&rb->aux_nest, 1))) | ||
| 283 | goto err_put; | ||
| 284 | |||
| 285 | aux_head = local_read(&rb->aux_head); | ||
| 286 | |||
| 287 | handle->rb = rb; | ||
| 288 | handle->event = event; | ||
| 289 | handle->head = aux_head; | ||
| 290 | handle->size = 0; | ||
| 291 | |||
| 292 | /* | ||
| 293 | * In overwrite mode, AUX data stores do not depend on aux_tail, | ||
| 294 | * therefore (A) control dependency barrier does not exist. The | ||
| 295 | * (B) <-> (C) ordering is still observed by the pmu driver. | ||
| 296 | */ | ||
| 297 | if (!rb->aux_overwrite) { | ||
| 298 | aux_tail = ACCESS_ONCE(rb->user_page->aux_tail); | ||
| 299 | handle->wakeup = local_read(&rb->aux_wakeup) + rb->aux_watermark; | ||
| 300 | if (aux_head - aux_tail < perf_aux_size(rb)) | ||
| 301 | handle->size = CIRC_SPACE(aux_head, aux_tail, perf_aux_size(rb)); | ||
| 302 | |||
| 303 | /* | ||
| 304 | * handle->size computation depends on aux_tail load; this forms a | ||
| 305 | * control dependency barrier separating aux_tail load from aux data | ||
| 306 | * store that will be enabled on successful return | ||
| 307 | */ | ||
| 308 | if (!handle->size) { /* A, matches D */ | ||
| 309 | event->pending_disable = 1; | ||
| 310 | perf_output_wakeup(handle); | ||
| 311 | local_set(&rb->aux_nest, 0); | ||
| 312 | goto err_put; | ||
| 313 | } | ||
| 314 | } | ||
| 315 | |||
| 316 | return handle->rb->aux_priv; | ||
| 317 | |||
| 318 | err_put: | ||
| 319 | rb_free_aux(rb); | ||
| 320 | |||
| 321 | err: | ||
| 322 | ring_buffer_put(rb); | ||
| 323 | handle->event = NULL; | ||
| 324 | |||
| 325 | return NULL; | ||
| 326 | } | ||
| 327 | |||
| 328 | /* | ||
| 329 | * Commit the data written by hardware into the ring buffer by adjusting | ||
| 330 | * aux_head and posting a PERF_RECORD_AUX into the perf buffer. It is the | ||
| 331 | * pmu driver's responsibility to observe ordering rules of the hardware, | ||
| 332 | * so that all the data is externally visible before this is called. | ||
| 333 | */ | ||
| 334 | void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, | ||
| 335 | bool truncated) | ||
| 336 | { | ||
| 337 | struct ring_buffer *rb = handle->rb; | ||
| 338 | unsigned long aux_head; | ||
| 339 | u64 flags = 0; | ||
| 340 | |||
| 341 | if (truncated) | ||
| 342 | flags |= PERF_AUX_FLAG_TRUNCATED; | ||
| 343 | |||
| 344 | /* in overwrite mode, driver provides aux_head via handle */ | ||
| 345 | if (rb->aux_overwrite) { | ||
| 346 | flags |= PERF_AUX_FLAG_OVERWRITE; | ||
| 347 | |||
| 348 | aux_head = handle->head; | ||
| 349 | local_set(&rb->aux_head, aux_head); | ||
| 350 | } else { | ||
| 351 | aux_head = local_read(&rb->aux_head); | ||
| 352 | local_add(size, &rb->aux_head); | ||
| 353 | } | ||
| 354 | |||
| 355 | if (size || flags) { | ||
| 356 | /* | ||
| 357 | * Only send RECORD_AUX if we have something useful to communicate | ||
| 358 | */ | ||
| 359 | |||
| 360 | perf_event_aux_event(handle->event, aux_head, size, flags); | ||
| 361 | } | ||
| 362 | |||
| 363 | aux_head = rb->user_page->aux_head = local_read(&rb->aux_head); | ||
| 364 | |||
| 365 | if (aux_head - local_read(&rb->aux_wakeup) >= rb->aux_watermark) { | ||
| 366 | perf_output_wakeup(handle); | ||
| 367 | local_add(rb->aux_watermark, &rb->aux_wakeup); | ||
| 368 | } | ||
| 369 | handle->event = NULL; | ||
| 370 | |||
| 371 | local_set(&rb->aux_nest, 0); | ||
| 372 | rb_free_aux(rb); | ||
| 373 | ring_buffer_put(rb); | ||
| 374 | } | ||
| 375 | |||
| 376 | /* | ||
| 377 | * Skip over a given number of bytes in the AUX buffer, due to, for example, | ||
| 378 | * hardware's alignment constraints. | ||
| 379 | */ | ||
| 380 | int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size) | ||
| 381 | { | ||
| 382 | struct ring_buffer *rb = handle->rb; | ||
| 383 | unsigned long aux_head; | ||
| 384 | |||
| 385 | if (size > handle->size) | ||
| 386 | return -ENOSPC; | ||
| 387 | |||
| 388 | local_add(size, &rb->aux_head); | ||
| 389 | |||
| 390 | aux_head = rb->user_page->aux_head = local_read(&rb->aux_head); | ||
| 391 | if (aux_head - local_read(&rb->aux_wakeup) >= rb->aux_watermark) { | ||
| 392 | perf_output_wakeup(handle); | ||
| 393 | local_add(rb->aux_watermark, &rb->aux_wakeup); | ||
| 394 | handle->wakeup = local_read(&rb->aux_wakeup) + | ||
| 395 | rb->aux_watermark; | ||
| 396 | } | ||
| 397 | |||
| 398 | handle->head = aux_head; | ||
| 399 | handle->size -= size; | ||
| 400 | |||
| 401 | return 0; | ||
| 402 | } | ||
| 403 | |||
| 404 | void *perf_get_aux(struct perf_output_handle *handle) | ||
| 405 | { | ||
| 406 | /* this is only valid between perf_aux_output_begin and *_end */ | ||
| 407 | if (!handle->event) | ||
| 408 | return NULL; | ||
| 409 | |||
| 410 | return handle->rb->aux_priv; | ||
| 411 | } | ||
| 412 | |||
| 413 | #define PERF_AUX_GFP (GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY) | ||
| 414 | |||
| 415 | static struct page *rb_alloc_aux_page(int node, int order) | ||
| 416 | { | ||
| 417 | struct page *page; | ||
| 418 | |||
| 419 | if (order > MAX_ORDER) | ||
| 420 | order = MAX_ORDER; | ||
| 421 | |||
| 422 | do { | ||
| 423 | page = alloc_pages_node(node, PERF_AUX_GFP, order); | ||
| 424 | } while (!page && order--); | ||
| 425 | |||
| 426 | if (page && order) { | ||
| 427 | /* | ||
| 428 | * Communicate the allocation size to the driver | ||
| 429 | */ | ||
| 430 | split_page(page, order); | ||
| 431 | SetPagePrivate(page); | ||
| 432 | set_page_private(page, order); | ||
| 433 | } | ||
| 434 | |||
| 435 | return page; | ||
| 436 | } | ||
| 437 | |||
| 438 | static void rb_free_aux_page(struct ring_buffer *rb, int idx) | ||
| 439 | { | ||
| 440 | struct page *page = virt_to_page(rb->aux_pages[idx]); | ||
| 441 | |||
| 442 | ClearPagePrivate(page); | ||
| 443 | page->mapping = NULL; | ||
| 444 | __free_page(page); | ||
| 445 | } | ||
| 446 | |||
| 447 | int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, | ||
| 448 | pgoff_t pgoff, int nr_pages, long watermark, int flags) | ||
| 449 | { | ||
| 450 | bool overwrite = !(flags & RING_BUFFER_WRITABLE); | ||
| 451 | int node = (event->cpu == -1) ? -1 : cpu_to_node(event->cpu); | ||
| 452 | int ret = -ENOMEM, max_order = 0; | ||
| 453 | |||
| 454 | if (!has_aux(event)) | ||
| 455 | return -ENOTSUPP; | ||
| 456 | |||
| 457 | if (event->pmu->capabilities & PERF_PMU_CAP_AUX_NO_SG) { | ||
| 458 | /* | ||
| 459 | * We need to start with the max_order that fits in nr_pages, | ||
| 460 | * not the other way around, hence ilog2() and not get_order. | ||
| 461 | */ | ||
| 462 | max_order = ilog2(nr_pages); | ||
| 463 | |||
| 464 | /* | ||
| 465 | * PMU requests more than one contiguous chunks of memory | ||
| 466 | * for SW double buffering | ||
| 467 | */ | ||
| 468 | if ((event->pmu->capabilities & PERF_PMU_CAP_AUX_SW_DOUBLEBUF) && | ||
| 469 | !overwrite) { | ||
| 470 | if (!max_order) | ||
| 471 | return -EINVAL; | ||
| 472 | |||
| 473 | max_order--; | ||
| 474 | } | ||
| 475 | } | ||
| 476 | |||
| 477 | rb->aux_pages = kzalloc_node(nr_pages * sizeof(void *), GFP_KERNEL, node); | ||
| 478 | if (!rb->aux_pages) | ||
| 479 | return -ENOMEM; | ||
| 480 | |||
| 481 | rb->free_aux = event->pmu->free_aux; | ||
| 482 | for (rb->aux_nr_pages = 0; rb->aux_nr_pages < nr_pages;) { | ||
| 483 | struct page *page; | ||
| 484 | int last, order; | ||
| 485 | |||
| 486 | order = min(max_order, ilog2(nr_pages - rb->aux_nr_pages)); | ||
| 487 | page = rb_alloc_aux_page(node, order); | ||
| 488 | if (!page) | ||
| 489 | goto out; | ||
| 490 | |||
| 491 | for (last = rb->aux_nr_pages + (1 << page_private(page)); | ||
| 492 | last > rb->aux_nr_pages; rb->aux_nr_pages++) | ||
| 493 | rb->aux_pages[rb->aux_nr_pages] = page_address(page++); | ||
| 494 | } | ||
| 495 | |||
| 496 | rb->aux_priv = event->pmu->setup_aux(event->cpu, rb->aux_pages, nr_pages, | ||
| 497 | overwrite); | ||
| 498 | if (!rb->aux_priv) | ||
| 499 | goto out; | ||
| 500 | |||
| 501 | ret = 0; | ||
| 502 | |||
| 503 | /* | ||
| 504 | * aux_pages (and pmu driver's private data, aux_priv) will be | ||
| 505 | * referenced in both producer's and consumer's contexts, thus | ||
| 506 | * we keep a refcount here to make sure either of the two can | ||
| 507 | * reference them safely. | ||
| 508 | */ | ||
| 509 | atomic_set(&rb->aux_refcount, 1); | ||
| 510 | |||
| 511 | rb->aux_overwrite = overwrite; | ||
| 512 | rb->aux_watermark = watermark; | ||
| 513 | |||
| 514 | if (!rb->aux_watermark && !rb->aux_overwrite) | ||
| 515 | rb->aux_watermark = nr_pages << (PAGE_SHIFT - 1); | ||
| 516 | |||
| 517 | out: | ||
| 518 | if (!ret) | ||
| 519 | rb->aux_pgoff = pgoff; | ||
| 520 | else | ||
| 521 | rb_free_aux(rb); | ||
| 522 | |||
| 523 | return ret; | ||
| 524 | } | ||
| 525 | |||
| 526 | static void __rb_free_aux(struct ring_buffer *rb) | ||
| 527 | { | ||
| 528 | int pg; | ||
| 529 | |||
| 530 | if (rb->aux_priv) { | ||
| 531 | rb->free_aux(rb->aux_priv); | ||
| 532 | rb->free_aux = NULL; | ||
| 533 | rb->aux_priv = NULL; | ||
| 534 | } | ||
| 535 | |||
| 536 | for (pg = 0; pg < rb->aux_nr_pages; pg++) | ||
| 537 | rb_free_aux_page(rb, pg); | ||
| 538 | |||
| 539 | kfree(rb->aux_pages); | ||
| 540 | rb->aux_nr_pages = 0; | ||
| 541 | } | ||
| 542 | |||
| 543 | void rb_free_aux(struct ring_buffer *rb) | ||
| 544 | { | ||
| 545 | if (atomic_dec_and_test(&rb->aux_refcount)) | ||
| 546 | __rb_free_aux(rb); | ||
| 547 | } | ||
| 548 | |||
| 246 | #ifndef CONFIG_PERF_USE_VMALLOC | 549 | #ifndef CONFIG_PERF_USE_VMALLOC |
| 247 | 550 | ||
| 248 | /* | 551 | /* |
| 249 | * Back perf_mmap() with regular GFP_KERNEL-0 pages. | 552 | * Back perf_mmap() with regular GFP_KERNEL-0 pages. |
| 250 | */ | 553 | */ |
| 251 | 554 | ||
| 252 | struct page * | 555 | static struct page * |
| 253 | perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) | 556 | __perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) |
| 254 | { | 557 | { |
| 255 | if (pgoff > rb->nr_pages) | 558 | if (pgoff > rb->nr_pages) |
| 256 | return NULL; | 559 | return NULL; |
| @@ -340,8 +643,8 @@ static int data_page_nr(struct ring_buffer *rb) | |||
| 340 | return rb->nr_pages << page_order(rb); | 643 | return rb->nr_pages << page_order(rb); |
| 341 | } | 644 | } |
| 342 | 645 | ||
| 343 | struct page * | 646 | static struct page * |
| 344 | perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) | 647 | __perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) |
| 345 | { | 648 | { |
| 346 | /* The '>' counts in the user page. */ | 649 | /* The '>' counts in the user page. */ |
| 347 | if (pgoff > data_page_nr(rb)) | 650 | if (pgoff > data_page_nr(rb)) |
| @@ -416,3 +719,19 @@ fail: | |||
| 416 | } | 719 | } |
| 417 | 720 | ||
| 418 | #endif | 721 | #endif |
| 722 | |||
| 723 | struct page * | ||
| 724 | perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) | ||
| 725 | { | ||
| 726 | if (rb->aux_nr_pages) { | ||
| 727 | /* above AUX space */ | ||
| 728 | if (pgoff > rb->aux_pgoff + rb->aux_nr_pages) | ||
| 729 | return NULL; | ||
| 730 | |||
| 731 | /* AUX space */ | ||
| 732 | if (pgoff >= rb->aux_pgoff) | ||
| 733 | return virt_to_page(rb->aux_pages[pgoff - rb->aux_pgoff]); | ||
| 734 | } | ||
| 735 | |||
| 736 | return __perf_mmap_to_page(rb, pgoff); | ||
| 737 | } | ||
