diff options
Diffstat (limited to 'tools/include/linux')
| -rw-r--r-- | tools/include/linux/ring_buffer.h | 73 |
1 files changed, 73 insertions, 0 deletions
diff --git a/tools/include/linux/ring_buffer.h b/tools/include/linux/ring_buffer.h new file mode 100644 index 000000000000..9a083ae60473 --- /dev/null +++ b/tools/include/linux/ring_buffer.h | |||
| @@ -0,0 +1,73 @@ | |||
| 1 | #ifndef _TOOLS_LINUX_RING_BUFFER_H_ | ||
| 2 | #define _TOOLS_LINUX_RING_BUFFER_H_ | ||
| 3 | |||
| 4 | #include <asm/barrier.h> | ||
| 5 | |||
| 6 | /* | ||
| 7 | * Contract with kernel for walking the perf ring buffer from | ||
| 8 | * user space requires the following barrier pairing (quote | ||
| 9 | * from kernel/events/ring_buffer.c): | ||
| 10 | * | ||
| 11 | * Since the mmap() consumer (userspace) can run on a | ||
| 12 | * different CPU: | ||
| 13 | * | ||
| 14 | * kernel user | ||
| 15 | * | ||
| 16 | * if (LOAD ->data_tail) { LOAD ->data_head | ||
| 17 | * (A) smp_rmb() (C) | ||
| 18 | * STORE $data LOAD $data | ||
| 19 | * smp_wmb() (B) smp_mb() (D) | ||
| 20 | * STORE ->data_head STORE ->data_tail | ||
| 21 | * } | ||
| 22 | * | ||
| 23 | * Where A pairs with D, and B pairs with C. | ||
| 24 | * | ||
| 25 | * In our case A is a control dependency that separates the | ||
| 26 | * load of the ->data_tail and the stores of $data. In case | ||
| 27 | * ->data_tail indicates there is no room in the buffer to | ||
| 28 | * store $data we do not. | ||
| 29 | * | ||
| 30 | * D needs to be a full barrier since it separates the data | ||
| 31 | * READ from the tail WRITE. | ||
| 32 | * | ||
| 33 | * For B a WMB is sufficient since it separates two WRITEs, | ||
| 34 | * and for C an RMB is sufficient since it separates two READs. | ||
| 35 | * | ||
| 36 | * Note, instead of B, C, D we could also use smp_store_release() | ||
| 37 | * in B and D as well as smp_load_acquire() in C. | ||
| 38 | * | ||
| 39 | * However, this optimization does not make sense for all kernel | ||
| 40 | * supported architectures since for a fair number it would | ||
| 41 | * resolve into READ_ONCE() + smp_mb() pair for smp_load_acquire(), | ||
| 42 | * and smp_mb() + WRITE_ONCE() pair for smp_store_release(). | ||
| 43 | * | ||
| 44 | * Thus for those smp_wmb() in B and smp_rmb() in C would still | ||
| 45 | * be less expensive. For the case of D this has either the same | ||
| 46 | * cost or is less expensive, for example, due to TSO x86 can | ||
| 47 | * avoid the CPU barrier entirely. | ||
| 48 | */ | ||
| 49 | |||
| 50 | static inline u64 ring_buffer_read_head(struct perf_event_mmap_page *base) | ||
| 51 | { | ||
| 52 | /* | ||
| 53 | * Architectures where smp_load_acquire() does not fallback to | ||
| 54 | * READ_ONCE() + smp_mb() pair. | ||
| 55 | */ | ||
| 56 | #if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) || \ | ||
| 57 | defined(__ia64__) || defined(__sparc__) && defined(__arch64__) | ||
| 58 | return smp_load_acquire(&base->data_head); | ||
| 59 | #else | ||
| 60 | u64 head = READ_ONCE(base->data_head); | ||
| 61 | |||
| 62 | smp_rmb(); | ||
| 63 | return head; | ||
| 64 | #endif | ||
| 65 | } | ||
| 66 | |||
| 67 | static inline void ring_buffer_write_tail(struct perf_event_mmap_page *base, | ||
| 68 | u64 tail) | ||
| 69 | { | ||
| 70 | smp_store_release(&base->data_tail, tail); | ||
| 71 | } | ||
| 72 | |||
| 73 | #endif /* _TOOLS_LINUX_RING_BUFFER_H_ */ | ||
