diff options
-rw-r--r-- | include/linux/perf_counter.h | 10 | ||||
-rw-r--r-- | kernel/perf_counter.c | 32 |
2 files changed, 27 insertions, 15 deletions
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 972f90d7a32f..6c1ef72ea501 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h | |||
@@ -199,10 +199,14 @@ struct perf_counter_attr { | |||
199 | inherit_stat : 1, /* per task counts */ | 199 | inherit_stat : 1, /* per task counts */ |
200 | enable_on_exec : 1, /* next exec enables */ | 200 | enable_on_exec : 1, /* next exec enables */ |
201 | task : 1, /* trace fork/exit */ | 201 | task : 1, /* trace fork/exit */ |
202 | watermark : 1, /* wakeup_watermark */ | ||
202 | 203 | ||
203 | __reserved_1 : 50; | 204 | __reserved_1 : 49; |
204 | 205 | ||
205 | __u32 wakeup_events; /* wakeup every n events */ | 206 | union { |
207 | __u32 wakeup_events; /* wakeup every n events */ | ||
208 | __u32 wakeup_watermark; /* bytes before wakeup */ | ||
209 | }; | ||
206 | __u32 __reserved_2; | 210 | __u32 __reserved_2; |
207 | 211 | ||
208 | __u64 __reserved_3; | 212 | __u64 __reserved_3; |
@@ -521,6 +525,8 @@ struct perf_mmap_data { | |||
521 | atomic_t wakeup; /* needs a wakeup */ | 525 | atomic_t wakeup; /* needs a wakeup */ |
522 | atomic_t lost; /* nr records lost */ | 526 | atomic_t lost; /* nr records lost */ |
523 | 527 | ||
528 | long watermark; /* wakeup watermark */ | ||
529 | |||
524 | struct perf_counter_mmap_page *user_page; | 530 | struct perf_counter_mmap_page *user_page; |
525 | void *data_pages[0]; | 531 | void *data_pages[0]; |
526 | }; | 532 | }; |
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index fe0d1adde804..29b73b6e8146 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
@@ -2176,6 +2176,13 @@ static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages) | |||
2176 | data->nr_pages = nr_pages; | 2176 | data->nr_pages = nr_pages; |
2177 | atomic_set(&data->lock, -1); | 2177 | atomic_set(&data->lock, -1); |
2178 | 2178 | ||
2179 | if (counter->attr.watermark) { | ||
2180 | data->watermark = min_t(long, PAGE_SIZE * nr_pages, | ||
2181 | counter->attr.wakeup_watermark); | ||
2182 | } | ||
2183 | if (!data->watermark) | ||
2184 | data->watermark = max(PAGE_SIZE, PAGE_SIZE * nr_pages / 4); | ||
2185 | |||
2179 | rcu_assign_pointer(counter->data, data); | 2186 | rcu_assign_pointer(counter->data, data); |
2180 | 2187 | ||
2181 | return 0; | 2188 | return 0; |
@@ -2517,23 +2524,15 @@ struct perf_output_handle { | |||
2517 | unsigned long flags; | 2524 | unsigned long flags; |
2518 | }; | 2525 | }; |
2519 | 2526 | ||
2520 | static bool perf_output_space(struct perf_mmap_data *data, | 2527 | static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail, |
2521 | unsigned int offset, unsigned int head) | 2528 | unsigned long offset, unsigned long head) |
2522 | { | 2529 | { |
2523 | unsigned long tail; | ||
2524 | unsigned long mask; | 2530 | unsigned long mask; |
2525 | 2531 | ||
2526 | if (!data->writable) | 2532 | if (!data->writable) |
2527 | return true; | 2533 | return true; |
2528 | 2534 | ||
2529 | mask = (data->nr_pages << PAGE_SHIFT) - 1; | 2535 | mask = (data->nr_pages << PAGE_SHIFT) - 1; |
2530 | /* | ||
2531 | * Userspace could choose to issue a mb() before updating the tail | ||
2532 | * pointer. So that all reads will be completed before the write is | ||
2533 | * issued. | ||
2534 | */ | ||
2535 | tail = ACCESS_ONCE(data->user_page->data_tail); | ||
2536 | smp_rmb(); | ||
2537 | 2536 | ||
2538 | offset = (offset - tail) & mask; | 2537 | offset = (offset - tail) & mask; |
2539 | head = (head - tail) & mask; | 2538 | head = (head - tail) & mask; |
@@ -2679,7 +2678,7 @@ static int perf_output_begin(struct perf_output_handle *handle, | |||
2679 | { | 2678 | { |
2680 | struct perf_counter *output_counter; | 2679 | struct perf_counter *output_counter; |
2681 | struct perf_mmap_data *data; | 2680 | struct perf_mmap_data *data; |
2682 | unsigned int offset, head; | 2681 | unsigned long tail, offset, head; |
2683 | int have_lost; | 2682 | int have_lost; |
2684 | struct { | 2683 | struct { |
2685 | struct perf_event_header header; | 2684 | struct perf_event_header header; |
@@ -2717,16 +2716,23 @@ static int perf_output_begin(struct perf_output_handle *handle, | |||
2717 | perf_output_lock(handle); | 2716 | perf_output_lock(handle); |
2718 | 2717 | ||
2719 | do { | 2718 | do { |
2719 | /* | ||
2720 | * Userspace could choose to issue a mb() before updating the | ||
2721 | * tail pointer. So that all reads will be completed before the | ||
2722 | * write is issued. | ||
2723 | */ | ||
2724 | tail = ACCESS_ONCE(data->user_page->data_tail); | ||
2725 | smp_rmb(); | ||
2720 | offset = head = atomic_long_read(&data->head); | 2726 | offset = head = atomic_long_read(&data->head); |
2721 | head += size; | 2727 | head += size; |
2722 | if (unlikely(!perf_output_space(data, offset, head))) | 2728 | if (unlikely(!perf_output_space(data, tail, offset, head))) |
2723 | goto fail; | 2729 | goto fail; |
2724 | } while (atomic_long_cmpxchg(&data->head, offset, head) != offset); | 2730 | } while (atomic_long_cmpxchg(&data->head, offset, head) != offset); |
2725 | 2731 | ||
2726 | handle->offset = offset; | 2732 | handle->offset = offset; |
2727 | handle->head = head; | 2733 | handle->head = head; |
2728 | 2734 | ||
2729 | if ((offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT)) | 2735 | if (head - tail > data->watermark) |
2730 | atomic_set(&data->wakeup, 1); | 2736 | atomic_set(&data->wakeup, 1); |
2731 | 2737 | ||
2732 | if (have_lost) { | 2738 | if (have_lost) { |