Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Daniel Borkmann says: ==================== pull-request: bpf-next 2018-10-21 The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) Implement two new kind of BPF maps, that is, queue and stack map along with new peek, push and pop operations, from Mauricio. 2) Add support for MSG_PEEK flag when redirecting into an ingress psock sk_msg queue, and add a new helper bpf_msg_push_data() for insert data into the message, from John. 3) Allow for BPF programs of type BPF_PROG_TYPE_CGROUP_SKB to use direct packet access for __skb_buff, from Song. 4) Use more lightweight barriers for walking perf ring buffer for libbpf and perf tool as well. Also, various fixes and improvements from verifier side, from Daniel. 5) Add per-symbol visibility for DSO in libbpf and hide by default global symbols such as netlink related functions, from Andrey. 6) Two improvements to nfp's BPF offload to check vNIC capabilities in case prog is shared with multiple vNICs and to protect against mis-initializing atomic counters, from Jakub. 7) Fix for bpftool to use 4 context mode for the nfp disassembler, also from Jakub. 8) Fix a return value comparison in test_libbpf.sh and add several bpftool improvements in bash completion, documentation of bpf fs restrictions and batch mode summary print, from Quentin. 9) Fix a file resource leak in BPF selftest's load_kallsyms() helper, from Peng. 10) Fix an unused variable warning in map_lookup_and_delete_elem(), from Alexei. 11) Fix bpf_skb_adjust_room() signature in BPF UAPI helper doc, from Nicolas. 12) Add missing executables to .gitignore in BPF selftests, from Anders. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
author: David S. Miller <davem@davemloft.net> 2018-10-22 00:11:46 -0400
committer: David S. Miller <davem@davemloft.net> 2018-10-22 00:11:46 -0400
commit: a19c59cc10a5ebc6b5a542e56bfd9f427ce01d74 (patch)
tree: cd04c1af4e800eef175cbc51ffb6e78040d7ee27 /tools/include/linux/ring_buffer.h
parent: 92303c86b7e9b7d3895ccafb441a0354143e2a18 (diff)
parent: fe8ecccc10b3adc071de05ca7af728ca1a4ac9aa (diff)
1 files changed, 73 insertions, 0 deletions
diff --git a/tools/include/linux/ring_buffer.h b/tools/include/linux/ring_buffer.h
new file mode 100644
index 000000000000..9a083ae60473
--- /dev/null
+++ b/tools/include/linux/ring_buffer.h
@@ -0,0 +1,73 @@
+#ifndef _TOOLS_LINUX_RING_BUFFER_H_
+#define _TOOLS_LINUX_RING_BUFFER_H_
+#include <asm/barrier.h>
+/*
+ * Contract with kernel for walking the perf ring buffer from
+ * user space requires the following barrier pairing (quote
+ * from kernel/events/ring_buffer.c):
+ *
+ *   Since the mmap() consumer (userspace) can run on a
+ *   different CPU:
+ *
+ *   kernel                             user
+ *
+ *   if (LOAD ->data_tail) {            LOAD ->data_head
+ *                      (A)             smp_rmb()       (C)
+ *      STORE $data                     LOAD $data
+ *      smp_wmb()       (B)             smp_mb()        (D)
+ *      STORE ->data_head               STORE ->data_tail
+ *   }
+ *
+ *   Where A pairs with D, and B pairs with C.
+ *
+ *   In our case A is a control dependency that separates the
+ *   load of the ->data_tail and the stores of $data. In case
+ *   ->data_tail indicates there is no room in the buffer to
+ *   store $data we do not.
+ *
+ *   D needs to be a full barrier since it separates the data
+ *   READ from the tail WRITE.
+ *
+ *   For B a WMB is sufficient since it separates two WRITEs,
+ *   and for C an RMB is sufficient since it separates two READs.
+ *
+ * Note, instead of B, C, D we could also use smp_store_release()
+ * in B and D as well as smp_load_acquire() in C.
+ *
+ * However, this optimization does not make sense for all kernel
+ * supported architectures since for a fair number it would
+ * resolve into READ_ONCE() + smp_mb() pair for smp_load_acquire(),
+ * and smp_mb() + WRITE_ONCE() pair for smp_store_release().
+ *
+ * Thus for those smp_wmb() in B and smp_rmb() in C would still
+ * be less expensive. For the case of D this has either the same
+ * cost or is less expensive, for example, due to TSO x86 can
+ * avoid the CPU barrier entirely.
+ */
+static inline u64 ring_buffer_read_head(struct perf_event_mmap_page *base)
+{
+/*
+ * Architectures where smp_load_acquire() does not fallback to
+ * READ_ONCE() + smp_mb() pair.
+ */
+#if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) || \
+    defined(__ia64__) || defined(__sparc__) && defined(__arch64__)
+        return smp_load_acquire(&base->data_head);
+#else
+        u64 head = READ_ONCE(base->data_head);
+        smp_rmb();
+        return head;
+#endif
+}
+static inline void ring_buffer_write_tail(struct perf_event_mmap_page *base,
+                                          u64 tail)
+{
+        smp_store_release(&base->data_tail, tail);
+}
+#endif /* _TOOLS_LINUX_RING_BUFFER_H_ */
author	David S. Miller <davem@davemloft.net>	2018-10-22 00:11:46 -0400
committer	David S. Miller <davem@davemloft.net>	2018-10-22 00:11:46 -0400
commit	a19c59cc10a5ebc6b5a542e56bfd9f427ce01d74 (patch)
tree	cd04c1af4e800eef175cbc51ffb6e78040d7ee27 /tools/include/linux/ring_buffer.h
parent	92303c86b7e9b7d3895ccafb441a0354143e2a18 (diff)
parent	fe8ecccc10b3adc071de05ca7af728ca1a4ac9aa (diff)

diff --git a/tools/include/linux/ring_buffer.h b/tools/include/linux/ring_buffer.h new file mode 100644 index 000000000000..9a083ae60473 --- /dev/null +++ b/tools/include/linux/ring_buffer.h
@@ -0,0 +1,73 @@
	1	#ifndef _TOOLS_LINUX_RING_BUFFER_H_
	2	#define _TOOLS_LINUX_RING_BUFFER_H_
	3
	4	#include <asm/barrier.h>
	5
	6	/*
	7	* Contract with kernel for walking the perf ring buffer from
	8	* user space requires the following barrier pairing (quote
	9	* from kernel/events/ring_buffer.c):
	10	*
	11	* Since the mmap() consumer (userspace) can run on a
	12	* different CPU:
	13	*
	14	* kernel user
	15	*
	16	* if (LOAD ->data_tail) { LOAD ->data_head
	17	* (A) smp_rmb() (C)
	18	* STORE $data LOAD $data
	19	* smp_wmb() (B) smp_mb() (D)
	20	* STORE ->data_head STORE ->data_tail
	21	* }
	22	*
	23	* Where A pairs with D, and B pairs with C.
	24	*
	25	* In our case A is a control dependency that separates the
	26	* load of the ->data_tail and the stores of $data. In case
	27	* ->data_tail indicates there is no room in the buffer to
	28	* store $data we do not.
	29	*
	30	* D needs to be a full barrier since it separates the data
	31	* READ from the tail WRITE.
	32	*
	33	* For B a WMB is sufficient since it separates two WRITEs,
	34	* and for C an RMB is sufficient since it separates two READs.
	35	*
	36	* Note, instead of B, C, D we could also use smp_store_release()
	37	* in B and D as well as smp_load_acquire() in C.
	38	*
	39	* However, this optimization does not make sense for all kernel
	40	* supported architectures since for a fair number it would
	41	* resolve into READ_ONCE() + smp_mb() pair for smp_load_acquire(),
	42	* and smp_mb() + WRITE_ONCE() pair for smp_store_release().
	43	*
	44	* Thus for those smp_wmb() in B and smp_rmb() in C would still
	45	* be less expensive. For the case of D this has either the same
	46	* cost or is less expensive, for example, due to TSO x86 can
	47	* avoid the CPU barrier entirely.
	48	*/
	49
	50	static inline u64 ring_buffer_read_head(struct perf_event_mmap_page *base)
	51	{
	52	/*
	53	* Architectures where smp_load_acquire() does not fallback to
	54	* READ_ONCE() + smp_mb() pair.
	55	*/
	56	#if defined(__x86_64__) \|\| defined(__aarch64__) \|\| defined(__powerpc64__) \|\| \
	57	defined(__ia64__) \|\| defined(__sparc__) && defined(__arch64__)
	58	return smp_load_acquire(&base->data_head);
	59	#else
	60	u64 head = READ_ONCE(base->data_head);
	61
	62	smp_rmb();
	63	return head;
	64	#endif
	65	}
	66
	67	static inline void ring_buffer_write_tail(struct perf_event_mmap_page *base,
	68	u64 tail)
	69	{
	70	smp_store_release(&base->data_tail, tail);
	71	}
	72
	73	#endif /* _TOOLS_LINUX_RING_BUFFER_H_ */