diff options
author | David S. Miller <davem@davemloft.net> | 2018-05-07 23:35:08 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-05-07 23:35:08 -0400 |
commit | 01adc4851a8090b46c7a5ed9cfc4b97e65abfbf4 (patch) | |
tree | 2ae02593d7139962648dff203f3f9701e34ccbc3 | |
parent | 18b338f5f9539512e76fd9ebd4c6ca1a0e159e2b (diff) | |
parent | e94fa1d93117e7f1eb783dc9cae6c70650944449 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Minor conflict, a CHECK was placed into an if() statement
in net-next, whilst a newline was added to that CHECK
call in 'net'. Thanks to Daniel for the merge resolution.
Signed-off-by: David S. Miller <davem@davemloft.net>
107 files changed, 8852 insertions, 2713 deletions
diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst new file mode 100644 index 000000000000..91928d9ee4bf --- /dev/null +++ b/Documentation/networking/af_xdp.rst | |||
@@ -0,0 +1,297 @@ | |||
1 | .. SPDX-License-Identifier: GPL-2.0 | ||
2 | |||
3 | ====== | ||
4 | AF_XDP | ||
5 | ====== | ||
6 | |||
7 | Overview | ||
8 | ======== | ||
9 | |||
10 | AF_XDP is an address family that is optimized for high performance | ||
11 | packet processing. | ||
12 | |||
13 | This document assumes that the reader is familiar with BPF and XDP. If | ||
14 | not, the Cilium project has an excellent reference guide at | ||
15 | http://cilium.readthedocs.io/en/doc-1.0/bpf/. | ||
16 | |||
17 | Using the XDP_REDIRECT action from an XDP program, the program can | ||
18 | redirect ingress frames to other XDP enabled netdevs, using the | ||
19 | bpf_redirect_map() function. AF_XDP sockets enable the possibility for | ||
20 | XDP programs to redirect frames to a memory buffer in a user-space | ||
21 | application. | ||
22 | |||
23 | An AF_XDP socket (XSK) is created with the normal socket() | ||
24 | syscall. Associated with each XSK are two rings: the RX ring and the | ||
25 | TX ring. A socket can receive packets on the RX ring and it can send | ||
26 | packets on the TX ring. These rings are registered and sized with the | ||
27 | setsockopts XDP_RX_RING and XDP_TX_RING, respectively. It is mandatory | ||
28 | to have at least one of these rings for each socket. An RX or TX | ||
29 | descriptor ring points to a data buffer in a memory area called a | ||
30 | UMEM. RX and TX can share the same UMEM so that a packet does not have | ||
31 | to be copied between RX and TX. Moreover, if a packet needs to be kept | ||
32 | for a while due to a possible retransmit, the descriptor that points | ||
33 | to that packet can be changed to point to another and reused right | ||
34 | away. This again avoids copying data. | ||
35 | |||
36 | The UMEM consists of a number of equally size frames and each frame | ||
37 | has a unique frame id. A descriptor in one of the rings references a | ||
38 | frame by referencing its frame id. The user space allocates memory for | ||
39 | this UMEM using whatever means it feels is most appropriate (malloc, | ||
40 | mmap, huge pages, etc). This memory area is then registered with the | ||
41 | kernel using the new setsockopt XDP_UMEM_REG. The UMEM also has two | ||
42 | rings: the FILL ring and the COMPLETION ring. The fill ring is used by | ||
43 | the application to send down frame ids for the kernel to fill in with | ||
44 | RX packet data. References to these frames will then appear in the RX | ||
45 | ring once each packet has been received. The completion ring, on the | ||
46 | other hand, contains frame ids that the kernel has transmitted | ||
47 | completely and can now be used again by user space, for either TX or | ||
48 | RX. Thus, the frame ids appearing in the completion ring are ids that | ||
49 | were previously transmitted using the TX ring. In summary, the RX and | ||
50 | FILL rings are used for the RX path and the TX and COMPLETION rings | ||
51 | are used for the TX path. | ||
52 | |||
53 | The socket is then finally bound with a bind() call to a device and a | ||
54 | specific queue id on that device, and it is not until bind is | ||
55 | completed that traffic starts to flow. | ||
56 | |||
57 | The UMEM can be shared between processes, if desired. If a process | ||
58 | wants to do this, it simply skips the registration of the UMEM and its | ||
59 | corresponding two rings, sets the XDP_SHARED_UMEM flag in the bind | ||
60 | call and submits the XSK of the process it would like to share UMEM | ||
61 | with as well as its own newly created XSK socket. The new process will | ||
62 | then receive frame id references in its own RX ring that point to this | ||
63 | shared UMEM. Note that since the ring structures are single-consumer / | ||
64 | single-producer (for performance reasons), the new process has to | ||
65 | create its own socket with associated RX and TX rings, since it cannot | ||
66 | share this with the other process. This is also the reason that there | ||
67 | is only one set of FILL and COMPLETION rings per UMEM. It is the | ||
68 | responsibility of a single process to handle the UMEM. | ||
69 | |||
70 | How is then packets distributed from an XDP program to the XSKs? There | ||
71 | is a BPF map called XSKMAP (or BPF_MAP_TYPE_XSKMAP in full). The | ||
72 | user-space application can place an XSK at an arbitrary place in this | ||
73 | map. The XDP program can then redirect a packet to a specific index in | ||
74 | this map and at this point XDP validates that the XSK in that map was | ||
75 | indeed bound to that device and ring number. If not, the packet is | ||
76 | dropped. If the map is empty at that index, the packet is also | ||
77 | dropped. This also means that it is currently mandatory to have an XDP | ||
78 | program loaded (and one XSK in the XSKMAP) to be able to get any | ||
79 | traffic to user space through the XSK. | ||
80 | |||
81 | AF_XDP can operate in two different modes: XDP_SKB and XDP_DRV. If the | ||
82 | driver does not have support for XDP, or XDP_SKB is explicitly chosen | ||
83 | when loading the XDP program, XDP_SKB mode is employed that uses SKBs | ||
84 | together with the generic XDP support and copies out the data to user | ||
85 | space. A fallback mode that works for any network device. On the other | ||
86 | hand, if the driver has support for XDP, it will be used by the AF_XDP | ||
87 | code to provide better performance, but there is still a copy of the | ||
88 | data into user space. | ||
89 | |||
90 | Concepts | ||
91 | ======== | ||
92 | |||
93 | In order to use an AF_XDP socket, a number of associated objects need | ||
94 | to be setup. | ||
95 | |||
96 | Jonathan Corbet has also written an excellent article on LWN, | ||
97 | "Accelerating networking with AF_XDP". It can be found at | ||
98 | https://lwn.net/Articles/750845/. | ||
99 | |||
100 | UMEM | ||
101 | ---- | ||
102 | |||
103 | UMEM is a region of virtual contiguous memory, divided into | ||
104 | equal-sized frames. An UMEM is associated to a netdev and a specific | ||
105 | queue id of that netdev. It is created and configured (frame size, | ||
106 | frame headroom, start address and size) by using the XDP_UMEM_REG | ||
107 | setsockopt system call. A UMEM is bound to a netdev and queue id, via | ||
108 | the bind() system call. | ||
109 | |||
110 | An AF_XDP is socket linked to a single UMEM, but one UMEM can have | ||
111 | multiple AF_XDP sockets. To share an UMEM created via one socket A, | ||
112 | the next socket B can do this by setting the XDP_SHARED_UMEM flag in | ||
113 | struct sockaddr_xdp member sxdp_flags, and passing the file descriptor | ||
114 | of A to struct sockaddr_xdp member sxdp_shared_umem_fd. | ||
115 | |||
116 | The UMEM has two single-producer/single-consumer rings, that are used | ||
117 | to transfer ownership of UMEM frames between the kernel and the | ||
118 | user-space application. | ||
119 | |||
120 | Rings | ||
121 | ----- | ||
122 | |||
123 | There are a four different kind of rings: Fill, Completion, RX and | ||
124 | TX. All rings are single-producer/single-consumer, so the user-space | ||
125 | application need explicit synchronization of multiple | ||
126 | processes/threads are reading/writing to them. | ||
127 | |||
128 | The UMEM uses two rings: Fill and Completion. Each socket associated | ||
129 | with the UMEM must have an RX queue, TX queue or both. Say, that there | ||
130 | is a setup with four sockets (all doing TX and RX). Then there will be | ||
131 | one Fill ring, one Completion ring, four TX rings and four RX rings. | ||
132 | |||
133 | The rings are head(producer)/tail(consumer) based rings. A producer | ||
134 | writes the data ring at the index pointed out by struct xdp_ring | ||
135 | producer member, and increasing the producer index. A consumer reads | ||
136 | the data ring at the index pointed out by struct xdp_ring consumer | ||
137 | member, and increasing the consumer index. | ||
138 | |||
139 | The rings are configured and created via the _RING setsockopt system | ||
140 | calls and mmapped to user-space using the appropriate offset to mmap() | ||
141 | (XDP_PGOFF_RX_RING, XDP_PGOFF_TX_RING, XDP_UMEM_PGOFF_FILL_RING and | ||
142 | XDP_UMEM_PGOFF_COMPLETION_RING). | ||
143 | |||
144 | The size of the rings need to be of size power of two. | ||
145 | |||
146 | UMEM Fill Ring | ||
147 | ~~~~~~~~~~~~~~ | ||
148 | |||
149 | The Fill ring is used to transfer ownership of UMEM frames from | ||
150 | user-space to kernel-space. The UMEM indicies are passed in the | ||
151 | ring. As an example, if the UMEM is 64k and each frame is 4k, then the | ||
152 | UMEM has 16 frames and can pass indicies between 0 and 15. | ||
153 | |||
154 | Frames passed to the kernel are used for the ingress path (RX rings). | ||
155 | |||
156 | The user application produces UMEM indicies to this ring. | ||
157 | |||
158 | UMEM Completetion Ring | ||
159 | ~~~~~~~~~~~~~~~~~~~~~~ | ||
160 | |||
161 | The Completion Ring is used transfer ownership of UMEM frames from | ||
162 | kernel-space to user-space. Just like the Fill ring, UMEM indicies are | ||
163 | used. | ||
164 | |||
165 | Frames passed from the kernel to user-space are frames that has been | ||
166 | sent (TX ring) and can be used by user-space again. | ||
167 | |||
168 | The user application consumes UMEM indicies from this ring. | ||
169 | |||
170 | |||
171 | RX Ring | ||
172 | ~~~~~~~ | ||
173 | |||
174 | The RX ring is the receiving side of a socket. Each entry in the ring | ||
175 | is a struct xdp_desc descriptor. The descriptor contains UMEM index | ||
176 | (idx), the length of the data (len), the offset into the frame | ||
177 | (offset). | ||
178 | |||
179 | If no frames have been passed to kernel via the Fill ring, no | ||
180 | descriptors will (or can) appear on the RX ring. | ||
181 | |||
182 | The user application consumes struct xdp_desc descriptors from this | ||
183 | ring. | ||
184 | |||
185 | TX Ring | ||
186 | ~~~~~~~ | ||
187 | |||
188 | The TX ring is used to send frames. The struct xdp_desc descriptor is | ||
189 | filled (index, length and offset) and passed into the ring. | ||
190 | |||
191 | To start the transfer a sendmsg() system call is required. This might | ||
192 | be relaxed in the future. | ||
193 | |||
194 | The user application produces struct xdp_desc descriptors to this | ||
195 | ring. | ||
196 | |||
197 | XSKMAP / BPF_MAP_TYPE_XSKMAP | ||
198 | ---------------------------- | ||
199 | |||
200 | On XDP side there is a BPF map type BPF_MAP_TYPE_XSKMAP (XSKMAP) that | ||
201 | is used in conjunction with bpf_redirect_map() to pass the ingress | ||
202 | frame to a socket. | ||
203 | |||
204 | The user application inserts the socket into the map, via the bpf() | ||
205 | system call. | ||
206 | |||
207 | Note that if an XDP program tries to redirect to a socket that does | ||
208 | not match the queue configuration and netdev, the frame will be | ||
209 | dropped. E.g. an AF_XDP socket is bound to netdev eth0 and | ||
210 | queue 17. Only the XDP program executing for eth0 and queue 17 will | ||
211 | successfully pass data to the socket. Please refer to the sample | ||
212 | application (samples/bpf/) in for an example. | ||
213 | |||
214 | Usage | ||
215 | ===== | ||
216 | |||
217 | In order to use AF_XDP sockets there are two parts needed. The | ||
218 | user-space application and the XDP program. For a complete setup and | ||
219 | usage example, please refer to the sample application. The user-space | ||
220 | side is xdpsock_user.c and the XDP side xdpsock_kern.c. | ||
221 | |||
222 | Naive ring dequeue and enqueue could look like this:: | ||
223 | |||
224 | // typedef struct xdp_rxtx_ring RING; | ||
225 | // typedef struct xdp_umem_ring RING; | ||
226 | |||
227 | // typedef struct xdp_desc RING_TYPE; | ||
228 | // typedef __u32 RING_TYPE; | ||
229 | |||
230 | int dequeue_one(RING *ring, RING_TYPE *item) | ||
231 | { | ||
232 | __u32 entries = ring->ptrs.producer - ring->ptrs.consumer; | ||
233 | |||
234 | if (entries == 0) | ||
235 | return -1; | ||
236 | |||
237 | // read-barrier! | ||
238 | |||
239 | *item = ring->desc[ring->ptrs.consumer & (RING_SIZE - 1)]; | ||
240 | ring->ptrs.consumer++; | ||
241 | return 0; | ||
242 | } | ||
243 | |||
244 | int enqueue_one(RING *ring, const RING_TYPE *item) | ||
245 | { | ||
246 | u32 free_entries = RING_SIZE - (ring->ptrs.producer - ring->ptrs.consumer); | ||
247 | |||
248 | if (free_entries == 0) | ||
249 | return -1; | ||
250 | |||
251 | ring->desc[ring->ptrs.producer & (RING_SIZE - 1)] = *item; | ||
252 | |||
253 | // write-barrier! | ||
254 | |||
255 | ring->ptrs.producer++; | ||
256 | return 0; | ||
257 | } | ||
258 | |||
259 | |||
260 | For a more optimized version, please refer to the sample application. | ||
261 | |||
262 | Sample application | ||
263 | ================== | ||
264 | |||
265 | There is a xdpsock benchmarking/test application included that | ||
266 | demonstrates how to use AF_XDP sockets with both private and shared | ||
267 | UMEMs. Say that you would like your UDP traffic from port 4242 to end | ||
268 | up in queue 16, that we will enable AF_XDP on. Here, we use ethtool | ||
269 | for this:: | ||
270 | |||
271 | ethtool -N p3p2 rx-flow-hash udp4 fn | ||
272 | ethtool -N p3p2 flow-type udp4 src-port 4242 dst-port 4242 \ | ||
273 | action 16 | ||
274 | |||
275 | Running the rxdrop benchmark in XDP_DRV mode can then be done | ||
276 | using:: | ||
277 | |||
278 | samples/bpf/xdpsock -i p3p2 -q 16 -r -N | ||
279 | |||
280 | For XDP_SKB mode, use the switch "-S" instead of "-N" and all options | ||
281 | can be displayed with "-h", as usual. | ||
282 | |||
283 | Credits | ||
284 | ======= | ||
285 | |||
286 | - Björn Töpel (AF_XDP core) | ||
287 | - Magnus Karlsson (AF_XDP core) | ||
288 | - Alexander Duyck | ||
289 | - Alexei Starovoitov | ||
290 | - Daniel Borkmann | ||
291 | - Jesper Dangaard Brouer | ||
292 | - John Fastabend | ||
293 | - Jonathan Corbet (LWN coverage) | ||
294 | - Michael S. Tsirkin | ||
295 | - Qi Z Zhang | ||
296 | - Willem de Bruijn | ||
297 | |||
diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index fd55c7de9991..5032e1263bc9 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt | |||
@@ -483,6 +483,12 @@ Example output from dmesg: | |||
483 | [ 3389.935851] JIT code: 00000030: 00 e8 28 94 ff e0 83 f8 01 75 07 b8 ff ff 00 00 | 483 | [ 3389.935851] JIT code: 00000030: 00 e8 28 94 ff e0 83 f8 01 75 07 b8 ff ff 00 00 |
484 | [ 3389.935852] JIT code: 00000040: eb 02 31 c0 c9 c3 | 484 | [ 3389.935852] JIT code: 00000040: eb 02 31 c0 c9 c3 |
485 | 485 | ||
486 | When CONFIG_BPF_JIT_ALWAYS_ON is enabled, bpf_jit_enable is permanently set to 1 and | ||
487 | setting any other value than that will return in failure. This is even the case for | ||
488 | setting bpf_jit_enable to 2, since dumping the final JIT image into the kernel log | ||
489 | is discouraged and introspection through bpftool (under tools/bpf/bpftool/) is the | ||
490 | generally recommended approach instead. | ||
491 | |||
486 | In the kernel source tree under tools/bpf/, there's bpf_jit_disasm for | 492 | In the kernel source tree under tools/bpf/, there's bpf_jit_disasm for |
487 | generating disassembly out of the kernel log's hexdump: | 493 | generating disassembly out of the kernel log's hexdump: |
488 | 494 | ||
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index f204eaff657d..cbd9bdd4a79e 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst | |||
@@ -6,6 +6,7 @@ Contents: | |||
6 | .. toctree:: | 6 | .. toctree:: |
7 | :maxdepth: 2 | 7 | :maxdepth: 2 |
8 | 8 | ||
9 | af_xdp | ||
9 | batman-adv | 10 | batman-adv |
10 | can | 11 | can |
11 | dpaa2/index | 12 | dpaa2/index |
diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt index 5992602469d8..9ecde517728c 100644 --- a/Documentation/sysctl/net.txt +++ b/Documentation/sysctl/net.txt | |||
@@ -45,6 +45,7 @@ through bpf(2) and passing a verifier in the kernel, a JIT will then | |||
45 | translate these BPF proglets into native CPU instructions. There are | 45 | translate these BPF proglets into native CPU instructions. There are |
46 | two flavors of JITs, the newer eBPF JIT currently supported on: | 46 | two flavors of JITs, the newer eBPF JIT currently supported on: |
47 | - x86_64 | 47 | - x86_64 |
48 | - x86_32 | ||
48 | - arm64 | 49 | - arm64 |
49 | - arm32 | 50 | - arm32 |
50 | - ppc64 | 51 | - ppc64 |
diff --git a/MAINTAINERS b/MAINTAINERS index ebe0b9ed7805..b22be10d5916 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -2729,7 +2729,6 @@ F: Documentation/networking/filter.txt | |||
2729 | F: Documentation/bpf/ | 2729 | F: Documentation/bpf/ |
2730 | F: include/linux/bpf* | 2730 | F: include/linux/bpf* |
2731 | F: include/linux/filter.h | 2731 | F: include/linux/filter.h |
2732 | F: include/trace/events/bpf.h | ||
2733 | F: include/trace/events/xdp.h | 2732 | F: include/trace/events/xdp.h |
2734 | F: include/uapi/linux/bpf* | 2733 | F: include/uapi/linux/bpf* |
2735 | F: include/uapi/linux/filter.h | 2734 | F: include/uapi/linux/filter.h |
@@ -15408,6 +15407,14 @@ T: git git://linuxtv.org/media_tree.git | |||
15408 | S: Maintained | 15407 | S: Maintained |
15409 | F: drivers/media/tuners/tuner-xc2028.* | 15408 | F: drivers/media/tuners/tuner-xc2028.* |
15410 | 15409 | ||
15410 | XDP SOCKETS (AF_XDP) | ||
15411 | M: Björn Töpel <bjorn.topel@intel.com> | ||
15412 | M: Magnus Karlsson <magnus.karlsson@intel.com> | ||
15413 | L: netdev@vger.kernel.org | ||
15414 | S: Maintained | ||
15415 | F: kernel/bpf/xskmap.c | ||
15416 | F: net/xdp/ | ||
15417 | |||
15411 | XEN BLOCK SUBSYSTEM | 15418 | XEN BLOCK SUBSYSTEM |
15412 | M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 15419 | M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> |
15413 | M: Roger Pau Monné <roger.pau@citrix.com> | 15420 | M: Roger Pau Monné <roger.pau@citrix.com> |
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index b5030e1a41d8..82689b999257 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c | |||
@@ -1452,83 +1452,6 @@ exit: | |||
1452 | emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); | 1452 | emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); |
1453 | emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code)); | 1453 | emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code)); |
1454 | break; | 1454 | break; |
1455 | /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */ | ||
1456 | case BPF_LD | BPF_ABS | BPF_W: | ||
1457 | case BPF_LD | BPF_ABS | BPF_H: | ||
1458 | case BPF_LD | BPF_ABS | BPF_B: | ||
1459 | /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */ | ||
1460 | case BPF_LD | BPF_IND | BPF_W: | ||
1461 | case BPF_LD | BPF_IND | BPF_H: | ||
1462 | case BPF_LD | BPF_IND | BPF_B: | ||
1463 | { | ||
1464 | const u8 r4 = bpf2a32[BPF_REG_6][1]; /* r4 = ptr to sk_buff */ | ||
1465 | const u8 r0 = bpf2a32[BPF_REG_0][1]; /*r0: struct sk_buff *skb*/ | ||
1466 | /* rtn value */ | ||
1467 | const u8 r1 = bpf2a32[BPF_REG_0][0]; /* r1: int k */ | ||
1468 | const u8 r2 = bpf2a32[BPF_REG_1][1]; /* r2: unsigned int size */ | ||
1469 | const u8 r3 = bpf2a32[BPF_REG_1][0]; /* r3: void *buffer */ | ||
1470 | const u8 r6 = bpf2a32[TMP_REG_1][1]; /* r6: void *(*func)(..) */ | ||
1471 | int size; | ||
1472 | |||
1473 | /* Setting up first argument */ | ||
1474 | emit(ARM_MOV_R(r0, r4), ctx); | ||
1475 | |||
1476 | /* Setting up second argument */ | ||
1477 | emit_a32_mov_i(r1, imm, false, ctx); | ||
1478 | if (BPF_MODE(code) == BPF_IND) | ||
1479 | emit_a32_alu_r(r1, src_lo, false, sstk, ctx, | ||
1480 | false, false, BPF_ADD); | ||
1481 | |||
1482 | /* Setting up third argument */ | ||
1483 | switch (BPF_SIZE(code)) { | ||
1484 | case BPF_W: | ||
1485 | size = 4; | ||
1486 | break; | ||
1487 | case BPF_H: | ||
1488 | size = 2; | ||
1489 | break; | ||
1490 | case BPF_B: | ||
1491 | size = 1; | ||
1492 | break; | ||
1493 | default: | ||
1494 | return -EINVAL; | ||
1495 | } | ||
1496 | emit_a32_mov_i(r2, size, false, ctx); | ||
1497 | |||
1498 | /* Setting up fourth argument */ | ||
1499 | emit(ARM_ADD_I(r3, ARM_SP, imm8m(SKB_BUFFER)), ctx); | ||
1500 | |||
1501 | /* Setting up function pointer to call */ | ||
1502 | emit_a32_mov_i(r6, (unsigned int)bpf_load_pointer, false, ctx); | ||
1503 | emit_blx_r(r6, ctx); | ||
1504 | |||
1505 | emit(ARM_EOR_R(r1, r1, r1), ctx); | ||
1506 | /* Check if return address is NULL or not. | ||
1507 | * if NULL then jump to epilogue | ||
1508 | * else continue to load the value from retn address | ||
1509 | */ | ||
1510 | emit(ARM_CMP_I(r0, 0), ctx); | ||
1511 | jmp_offset = epilogue_offset(ctx); | ||
1512 | check_imm24(jmp_offset); | ||
1513 | _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); | ||
1514 | |||
1515 | /* Load value from the address */ | ||
1516 | switch (BPF_SIZE(code)) { | ||
1517 | case BPF_W: | ||
1518 | emit(ARM_LDR_I(r0, r0, 0), ctx); | ||
1519 | emit_rev32(r0, r0, ctx); | ||
1520 | break; | ||
1521 | case BPF_H: | ||
1522 | emit(ARM_LDRH_I(r0, r0, 0), ctx); | ||
1523 | emit_rev16(r0, r0, ctx); | ||
1524 | break; | ||
1525 | case BPF_B: | ||
1526 | emit(ARM_LDRB_I(r0, r0, 0), ctx); | ||
1527 | /* No need to reverse */ | ||
1528 | break; | ||
1529 | } | ||
1530 | break; | ||
1531 | } | ||
1532 | /* ST: *(size *)(dst + off) = imm */ | 1455 | /* ST: *(size *)(dst + off) = imm */ |
1533 | case BPF_ST | BPF_MEM | BPF_W: | 1456 | case BPF_ST | BPF_MEM | BPF_W: |
1534 | case BPF_ST | BPF_MEM | BPF_H: | 1457 | case BPF_ST | BPF_MEM | BPF_H: |
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index a93350451e8e..0b40c8fb0706 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c | |||
@@ -723,71 +723,6 @@ emit_cond_jmp: | |||
723 | emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); | 723 | emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); |
724 | break; | 724 | break; |
725 | 725 | ||
726 | /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */ | ||
727 | case BPF_LD | BPF_ABS | BPF_W: | ||
728 | case BPF_LD | BPF_ABS | BPF_H: | ||
729 | case BPF_LD | BPF_ABS | BPF_B: | ||
730 | /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */ | ||
731 | case BPF_LD | BPF_IND | BPF_W: | ||
732 | case BPF_LD | BPF_IND | BPF_H: | ||
733 | case BPF_LD | BPF_IND | BPF_B: | ||
734 | { | ||
735 | const u8 r0 = bpf2a64[BPF_REG_0]; /* r0 = return value */ | ||
736 | const u8 r6 = bpf2a64[BPF_REG_6]; /* r6 = pointer to sk_buff */ | ||
737 | const u8 fp = bpf2a64[BPF_REG_FP]; | ||
738 | const u8 r1 = bpf2a64[BPF_REG_1]; /* r1: struct sk_buff *skb */ | ||
739 | const u8 r2 = bpf2a64[BPF_REG_2]; /* r2: int k */ | ||
740 | const u8 r3 = bpf2a64[BPF_REG_3]; /* r3: unsigned int size */ | ||
741 | const u8 r4 = bpf2a64[BPF_REG_4]; /* r4: void *buffer */ | ||
742 | const u8 r5 = bpf2a64[BPF_REG_5]; /* r5: void *(*func)(...) */ | ||
743 | int size; | ||
744 | |||
745 | emit(A64_MOV(1, r1, r6), ctx); | ||
746 | emit_a64_mov_i(0, r2, imm, ctx); | ||
747 | if (BPF_MODE(code) == BPF_IND) | ||
748 | emit(A64_ADD(0, r2, r2, src), ctx); | ||
749 | switch (BPF_SIZE(code)) { | ||
750 | case BPF_W: | ||
751 | size = 4; | ||
752 | break; | ||
753 | case BPF_H: | ||
754 | size = 2; | ||
755 | break; | ||
756 | case BPF_B: | ||
757 | size = 1; | ||
758 | break; | ||
759 | default: | ||
760 | return -EINVAL; | ||
761 | } | ||
762 | emit_a64_mov_i64(r3, size, ctx); | ||
763 | emit(A64_SUB_I(1, r4, fp, ctx->stack_size), ctx); | ||
764 | emit_a64_mov_i64(r5, (unsigned long)bpf_load_pointer, ctx); | ||
765 | emit(A64_BLR(r5), ctx); | ||
766 | emit(A64_MOV(1, r0, A64_R(0)), ctx); | ||
767 | |||
768 | jmp_offset = epilogue_offset(ctx); | ||
769 | check_imm19(jmp_offset); | ||
770 | emit(A64_CBZ(1, r0, jmp_offset), ctx); | ||
771 | emit(A64_MOV(1, r5, r0), ctx); | ||
772 | switch (BPF_SIZE(code)) { | ||
773 | case BPF_W: | ||
774 | emit(A64_LDR32(r0, r5, A64_ZR), ctx); | ||
775 | #ifndef CONFIG_CPU_BIG_ENDIAN | ||
776 | emit(A64_REV32(0, r0, r0), ctx); | ||
777 | #endif | ||
778 | break; | ||
779 | case BPF_H: | ||
780 | emit(A64_LDRH(r0, r5, A64_ZR), ctx); | ||
781 | #ifndef CONFIG_CPU_BIG_ENDIAN | ||
782 | emit(A64_REV16(0, r0, r0), ctx); | ||
783 | #endif | ||
784 | break; | ||
785 | case BPF_B: | ||
786 | emit(A64_LDRB(r0, r5, A64_ZR), ctx); | ||
787 | break; | ||
788 | } | ||
789 | break; | ||
790 | } | ||
791 | default: | 726 | default: |
792 | pr_err_once("unknown opcode %02x\n", code); | 727 | pr_err_once("unknown opcode %02x\n", code); |
793 | return -EINVAL; | 728 | return -EINVAL; |
diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 3e2798bfea4f..7ba7df9c28fc 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c | |||
@@ -1267,110 +1267,6 @@ jeq_common: | |||
1267 | return -EINVAL; | 1267 | return -EINVAL; |
1268 | break; | 1268 | break; |
1269 | 1269 | ||
1270 | case BPF_LD | BPF_B | BPF_ABS: | ||
1271 | case BPF_LD | BPF_H | BPF_ABS: | ||
1272 | case BPF_LD | BPF_W | BPF_ABS: | ||
1273 | case BPF_LD | BPF_DW | BPF_ABS: | ||
1274 | ctx->flags |= EBPF_SAVE_RA; | ||
1275 | |||
1276 | gen_imm_to_reg(insn, MIPS_R_A1, ctx); | ||
1277 | emit_instr(ctx, addiu, MIPS_R_A2, MIPS_R_ZERO, size_to_len(insn)); | ||
1278 | |||
1279 | if (insn->imm < 0) { | ||
1280 | emit_const_to_reg(ctx, MIPS_R_T9, (u64)bpf_internal_load_pointer_neg_helper); | ||
1281 | } else { | ||
1282 | emit_const_to_reg(ctx, MIPS_R_T9, (u64)ool_skb_header_pointer); | ||
1283 | emit_instr(ctx, daddiu, MIPS_R_A3, MIPS_R_SP, ctx->tmp_offset); | ||
1284 | } | ||
1285 | goto ld_skb_common; | ||
1286 | |||
1287 | case BPF_LD | BPF_B | BPF_IND: | ||
1288 | case BPF_LD | BPF_H | BPF_IND: | ||
1289 | case BPF_LD | BPF_W | BPF_IND: | ||
1290 | case BPF_LD | BPF_DW | BPF_IND: | ||
1291 | ctx->flags |= EBPF_SAVE_RA; | ||
1292 | src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp); | ||
1293 | if (src < 0) | ||
1294 | return src; | ||
1295 | ts = get_reg_val_type(ctx, this_idx, insn->src_reg); | ||
1296 | if (ts == REG_32BIT_ZERO_EX) { | ||
1297 | /* sign extend */ | ||
1298 | emit_instr(ctx, sll, MIPS_R_A1, src, 0); | ||
1299 | src = MIPS_R_A1; | ||
1300 | } | ||
1301 | if (insn->imm >= S16_MIN && insn->imm <= S16_MAX) { | ||
1302 | emit_instr(ctx, daddiu, MIPS_R_A1, src, insn->imm); | ||
1303 | } else { | ||
1304 | gen_imm_to_reg(insn, MIPS_R_AT, ctx); | ||
1305 | emit_instr(ctx, daddu, MIPS_R_A1, MIPS_R_AT, src); | ||
1306 | } | ||
1307 | /* truncate to 32-bit int */ | ||
1308 | emit_instr(ctx, sll, MIPS_R_A1, MIPS_R_A1, 0); | ||
1309 | emit_instr(ctx, daddiu, MIPS_R_A3, MIPS_R_SP, ctx->tmp_offset); | ||
1310 | emit_instr(ctx, slt, MIPS_R_AT, MIPS_R_A1, MIPS_R_ZERO); | ||
1311 | |||
1312 | emit_const_to_reg(ctx, MIPS_R_T8, (u64)bpf_internal_load_pointer_neg_helper); | ||
1313 | emit_const_to_reg(ctx, MIPS_R_T9, (u64)ool_skb_header_pointer); | ||
1314 | emit_instr(ctx, addiu, MIPS_R_A2, MIPS_R_ZERO, size_to_len(insn)); | ||
1315 | emit_instr(ctx, movn, MIPS_R_T9, MIPS_R_T8, MIPS_R_AT); | ||
1316 | |||
1317 | ld_skb_common: | ||
1318 | emit_instr(ctx, jalr, MIPS_R_RA, MIPS_R_T9); | ||
1319 | /* delay slot move */ | ||
1320 | emit_instr(ctx, daddu, MIPS_R_A0, MIPS_R_S0, MIPS_R_ZERO); | ||
1321 | |||
1322 | /* Check the error value */ | ||
1323 | b_off = b_imm(exit_idx, ctx); | ||
1324 | if (is_bad_offset(b_off)) { | ||
1325 | target = j_target(ctx, exit_idx); | ||
1326 | if (target == (unsigned int)-1) | ||
1327 | return -E2BIG; | ||
1328 | |||
1329 | if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) { | ||
1330 | ctx->offsets[this_idx] |= OFFSETS_B_CONV; | ||
1331 | ctx->long_b_conversion = 1; | ||
1332 | } | ||
1333 | emit_instr(ctx, bne, MIPS_R_V0, MIPS_R_ZERO, 4 * 3); | ||
1334 | emit_instr(ctx, nop); | ||
1335 | emit_instr(ctx, j, target); | ||
1336 | emit_instr(ctx, nop); | ||
1337 | } else { | ||
1338 | emit_instr(ctx, beq, MIPS_R_V0, MIPS_R_ZERO, b_off); | ||
1339 | emit_instr(ctx, nop); | ||
1340 | } | ||
1341 | |||
1342 | #ifdef __BIG_ENDIAN | ||
1343 | need_swap = false; | ||
1344 | #else | ||
1345 | need_swap = true; | ||
1346 | #endif | ||
1347 | dst = MIPS_R_V0; | ||
1348 | switch (BPF_SIZE(insn->code)) { | ||
1349 | case BPF_B: | ||
1350 | emit_instr(ctx, lbu, dst, 0, MIPS_R_V0); | ||
1351 | break; | ||
1352 | case BPF_H: | ||
1353 | emit_instr(ctx, lhu, dst, 0, MIPS_R_V0); | ||
1354 | if (need_swap) | ||
1355 | emit_instr(ctx, wsbh, dst, dst); | ||
1356 | break; | ||
1357 | case BPF_W: | ||
1358 | emit_instr(ctx, lw, dst, 0, MIPS_R_V0); | ||
1359 | if (need_swap) { | ||
1360 | emit_instr(ctx, wsbh, dst, dst); | ||
1361 | emit_instr(ctx, rotr, dst, dst, 16); | ||
1362 | } | ||
1363 | break; | ||
1364 | case BPF_DW: | ||
1365 | emit_instr(ctx, ld, dst, 0, MIPS_R_V0); | ||
1366 | if (need_swap) { | ||
1367 | emit_instr(ctx, dsbh, dst, dst); | ||
1368 | emit_instr(ctx, dshd, dst, dst); | ||
1369 | } | ||
1370 | break; | ||
1371 | } | ||
1372 | |||
1373 | break; | ||
1374 | case BPF_ALU | BPF_END | BPF_FROM_BE: | 1270 | case BPF_ALU | BPF_END | BPF_FROM_BE: |
1375 | case BPF_ALU | BPF_END | BPF_FROM_LE: | 1271 | case BPF_ALU | BPF_END | BPF_FROM_LE: |
1376 | dst = ebpf_to_mips_reg(ctx, insn, dst_reg); | 1272 | dst = ebpf_to_mips_reg(ctx, insn, dst_reg); |
diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile index 02d369ca6a53..809f019d3cba 100644 --- a/arch/powerpc/net/Makefile +++ b/arch/powerpc/net/Makefile | |||
@@ -3,7 +3,7 @@ | |||
3 | # Arch-specific network modules | 3 | # Arch-specific network modules |
4 | # | 4 | # |
5 | ifeq ($(CONFIG_PPC64),y) | 5 | ifeq ($(CONFIG_PPC64),y) |
6 | obj-$(CONFIG_BPF_JIT) += bpf_jit_asm64.o bpf_jit_comp64.o | 6 | obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o |
7 | else | 7 | else |
8 | obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o | 8 | obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o |
9 | endif | 9 | endif |
diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h index 8bdef7ed28a8..3609be4692b3 100644 --- a/arch/powerpc/net/bpf_jit64.h +++ b/arch/powerpc/net/bpf_jit64.h | |||
@@ -20,7 +20,7 @@ | |||
20 | * with our redzone usage. | 20 | * with our redzone usage. |
21 | * | 21 | * |
22 | * [ prev sp ] <------------- | 22 | * [ prev sp ] <------------- |
23 | * [ nv gpr save area ] 8*8 | | 23 | * [ nv gpr save area ] 6*8 | |
24 | * [ tail_call_cnt ] 8 | | 24 | * [ tail_call_cnt ] 8 | |
25 | * [ local_tmp_var ] 8 | | 25 | * [ local_tmp_var ] 8 | |
26 | * fp (r31) --> [ ebpf stack space ] upto 512 | | 26 | * fp (r31) --> [ ebpf stack space ] upto 512 | |
@@ -28,8 +28,8 @@ | |||
28 | * sp (r1) ---> [ stack pointer ] -------------- | 28 | * sp (r1) ---> [ stack pointer ] -------------- |
29 | */ | 29 | */ |
30 | 30 | ||
31 | /* for gpr non volatile registers BPG_REG_6 to 10, plus skb cache registers */ | 31 | /* for gpr non volatile registers BPG_REG_6 to 10 */ |
32 | #define BPF_PPC_STACK_SAVE (8*8) | 32 | #define BPF_PPC_STACK_SAVE (6*8) |
33 | /* for bpf JIT code internal usage */ | 33 | /* for bpf JIT code internal usage */ |
34 | #define BPF_PPC_STACK_LOCALS 16 | 34 | #define BPF_PPC_STACK_LOCALS 16 |
35 | /* stack frame excluding BPF stack, ensure this is quadword aligned */ | 35 | /* stack frame excluding BPF stack, ensure this is quadword aligned */ |
@@ -39,10 +39,8 @@ | |||
39 | #ifndef __ASSEMBLY__ | 39 | #ifndef __ASSEMBLY__ |
40 | 40 | ||
41 | /* BPF register usage */ | 41 | /* BPF register usage */ |
42 | #define SKB_HLEN_REG (MAX_BPF_JIT_REG + 0) | 42 | #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) |
43 | #define SKB_DATA_REG (MAX_BPF_JIT_REG + 1) | 43 | #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) |
44 | #define TMP_REG_1 (MAX_BPF_JIT_REG + 2) | ||
45 | #define TMP_REG_2 (MAX_BPF_JIT_REG + 3) | ||
46 | 44 | ||
47 | /* BPF to ppc register mappings */ | 45 | /* BPF to ppc register mappings */ |
48 | static const int b2p[] = { | 46 | static const int b2p[] = { |
@@ -63,40 +61,23 @@ static const int b2p[] = { | |||
63 | [BPF_REG_FP] = 31, | 61 | [BPF_REG_FP] = 31, |
64 | /* eBPF jit internal registers */ | 62 | /* eBPF jit internal registers */ |
65 | [BPF_REG_AX] = 2, | 63 | [BPF_REG_AX] = 2, |
66 | [SKB_HLEN_REG] = 25, | ||
67 | [SKB_DATA_REG] = 26, | ||
68 | [TMP_REG_1] = 9, | 64 | [TMP_REG_1] = 9, |
69 | [TMP_REG_2] = 10 | 65 | [TMP_REG_2] = 10 |
70 | }; | 66 | }; |
71 | 67 | ||
72 | /* PPC NVR range -- update this if we ever use NVRs below r24 */ | 68 | /* PPC NVR range -- update this if we ever use NVRs below r27 */ |
73 | #define BPF_PPC_NVR_MIN 24 | 69 | #define BPF_PPC_NVR_MIN 27 |
74 | |||
75 | /* Assembly helpers */ | ||
76 | #define DECLARE_LOAD_FUNC(func) u64 func(u64 r3, u64 r4); \ | ||
77 | u64 func##_negative_offset(u64 r3, u64 r4); \ | ||
78 | u64 func##_positive_offset(u64 r3, u64 r4); | ||
79 | |||
80 | DECLARE_LOAD_FUNC(sk_load_word); | ||
81 | DECLARE_LOAD_FUNC(sk_load_half); | ||
82 | DECLARE_LOAD_FUNC(sk_load_byte); | ||
83 | |||
84 | #define CHOOSE_LOAD_FUNC(imm, func) \ | ||
85 | (imm < 0 ? \ | ||
86 | (imm >= SKF_LL_OFF ? func##_negative_offset : func) : \ | ||
87 | func##_positive_offset) | ||
88 | 70 | ||
89 | #define SEEN_FUNC 0x1000 /* might call external helpers */ | 71 | #define SEEN_FUNC 0x1000 /* might call external helpers */ |
90 | #define SEEN_STACK 0x2000 /* uses BPF stack */ | 72 | #define SEEN_STACK 0x2000 /* uses BPF stack */ |
91 | #define SEEN_SKB 0x4000 /* uses sk_buff */ | 73 | #define SEEN_TAILCALL 0x4000 /* uses tail calls */ |
92 | #define SEEN_TAILCALL 0x8000 /* uses tail calls */ | ||
93 | 74 | ||
94 | struct codegen_context { | 75 | struct codegen_context { |
95 | /* | 76 | /* |
96 | * This is used to track register usage as well | 77 | * This is used to track register usage as well |
97 | * as calls to external helpers. | 78 | * as calls to external helpers. |
98 | * - register usage is tracked with corresponding | 79 | * - register usage is tracked with corresponding |
99 | * bits (r3-r10 and r25-r31) | 80 | * bits (r3-r10 and r27-r31) |
100 | * - rest of the bits can be used to track other | 81 | * - rest of the bits can be used to track other |
101 | * things -- for now, we use bits 16 to 23 | 82 | * things -- for now, we use bits 16 to 23 |
102 | * encoded in SEEN_* macros above | 83 | * encoded in SEEN_* macros above |
diff --git a/arch/powerpc/net/bpf_jit_asm64.S b/arch/powerpc/net/bpf_jit_asm64.S deleted file mode 100644 index 7e4c51430b84..000000000000 --- a/arch/powerpc/net/bpf_jit_asm64.S +++ /dev/null | |||
@@ -1,180 +0,0 @@ | |||
1 | /* | ||
2 | * bpf_jit_asm64.S: Packet/header access helper functions | ||
3 | * for PPC64 BPF compiler. | ||
4 | * | ||
5 | * Copyright 2016, Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> | ||
6 | * IBM Corporation | ||
7 | * | ||
8 | * Based on bpf_jit_asm.S by Matt Evans | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License | ||
12 | * as published by the Free Software Foundation; version 2 | ||
13 | * of the License. | ||
14 | */ | ||
15 | |||
16 | #include <asm/ppc_asm.h> | ||
17 | #include <asm/ptrace.h> | ||
18 | #include "bpf_jit64.h" | ||
19 | |||
20 | /* | ||
21 | * All of these routines are called directly from generated code, | ||
22 | * with the below register usage: | ||
23 | * r27 skb pointer (ctx) | ||
24 | * r25 skb header length | ||
25 | * r26 skb->data pointer | ||
26 | * r4 offset | ||
27 | * | ||
28 | * Result is passed back in: | ||
29 | * r8 data read in host endian format (accumulator) | ||
30 | * | ||
31 | * r9 is used as a temporary register | ||
32 | */ | ||
33 | |||
34 | #define r_skb r27 | ||
35 | #define r_hlen r25 | ||
36 | #define r_data r26 | ||
37 | #define r_off r4 | ||
38 | #define r_val r8 | ||
39 | #define r_tmp r9 | ||
40 | |||
41 | _GLOBAL_TOC(sk_load_word) | ||
42 | cmpdi r_off, 0 | ||
43 | blt bpf_slow_path_word_neg | ||
44 | b sk_load_word_positive_offset | ||
45 | |||
46 | _GLOBAL_TOC(sk_load_word_positive_offset) | ||
47 | /* Are we accessing past headlen? */ | ||
48 | subi r_tmp, r_hlen, 4 | ||
49 | cmpd r_tmp, r_off | ||
50 | blt bpf_slow_path_word | ||
51 | /* Nope, just hitting the header. cr0 here is eq or gt! */ | ||
52 | LWZX_BE r_val, r_data, r_off | ||
53 | blr /* Return success, cr0 != LT */ | ||
54 | |||
55 | _GLOBAL_TOC(sk_load_half) | ||
56 | cmpdi r_off, 0 | ||
57 | blt bpf_slow_path_half_neg | ||
58 | b sk_load_half_positive_offset | ||
59 | |||
60 | _GLOBAL_TOC(sk_load_half_positive_offset) | ||
61 | subi r_tmp, r_hlen, 2 | ||
62 | cmpd r_tmp, r_off | ||
63 | blt bpf_slow_path_half | ||
64 | LHZX_BE r_val, r_data, r_off | ||
65 | blr | ||
66 | |||
67 | _GLOBAL_TOC(sk_load_byte) | ||
68 | cmpdi r_off, 0 | ||
69 | blt bpf_slow_path_byte_neg | ||
70 | b sk_load_byte_positive_offset | ||
71 | |||
72 | _GLOBAL_TOC(sk_load_byte_positive_offset) | ||
73 | cmpd r_hlen, r_off | ||
74 | ble bpf_slow_path_byte | ||
75 | lbzx r_val, r_data, r_off | ||
76 | blr | ||
77 | |||
78 | /* | ||
79 | * Call out to skb_copy_bits: | ||
80 | * Allocate a new stack frame here to remain ABI-compliant in | ||
81 | * stashing LR. | ||
82 | */ | ||
83 | #define bpf_slow_path_common(SIZE) \ | ||
84 | mflr r0; \ | ||
85 | std r0, PPC_LR_STKOFF(r1); \ | ||
86 | stdu r1, -(STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS)(r1); \ | ||
87 | mr r3, r_skb; \ | ||
88 | /* r4 = r_off as passed */ \ | ||
89 | addi r5, r1, STACK_FRAME_MIN_SIZE; \ | ||
90 | li r6, SIZE; \ | ||
91 | bl skb_copy_bits; \ | ||
92 | nop; \ | ||
93 | /* save r5 */ \ | ||
94 | addi r5, r1, STACK_FRAME_MIN_SIZE; \ | ||
95 | /* r3 = 0 on success */ \ | ||
96 | addi r1, r1, STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS; \ | ||
97 | ld r0, PPC_LR_STKOFF(r1); \ | ||
98 | mtlr r0; \ | ||
99 | cmpdi r3, 0; \ | ||
100 | blt bpf_error; /* cr0 = LT */ | ||
101 | |||
102 | bpf_slow_path_word: | ||
103 | bpf_slow_path_common(4) | ||
104 | /* Data value is on stack, and cr0 != LT */ | ||
105 | LWZX_BE r_val, 0, r5 | ||
106 | blr | ||
107 | |||
108 | bpf_slow_path_half: | ||
109 | bpf_slow_path_common(2) | ||
110 | LHZX_BE r_val, 0, r5 | ||
111 | blr | ||
112 | |||
113 | bpf_slow_path_byte: | ||
114 | bpf_slow_path_common(1) | ||
115 | lbzx r_val, 0, r5 | ||
116 | blr | ||
117 | |||
118 | /* | ||
119 | * Call out to bpf_internal_load_pointer_neg_helper | ||
120 | */ | ||
121 | #define sk_negative_common(SIZE) \ | ||
122 | mflr r0; \ | ||
123 | std r0, PPC_LR_STKOFF(r1); \ | ||
124 | stdu r1, -STACK_FRAME_MIN_SIZE(r1); \ | ||
125 | mr r3, r_skb; \ | ||
126 | /* r4 = r_off, as passed */ \ | ||
127 | li r5, SIZE; \ | ||
128 | bl bpf_internal_load_pointer_neg_helper; \ | ||
129 | nop; \ | ||
130 | addi r1, r1, STACK_FRAME_MIN_SIZE; \ | ||
131 | ld r0, PPC_LR_STKOFF(r1); \ | ||
132 | mtlr r0; \ | ||
133 | /* R3 != 0 on success */ \ | ||
134 | cmpldi r3, 0; \ | ||
135 | beq bpf_error_slow; /* cr0 = EQ */ | ||
136 | |||
137 | bpf_slow_path_word_neg: | ||
138 | lis r_tmp, -32 /* SKF_LL_OFF */ | ||
139 | cmpd r_off, r_tmp /* addr < SKF_* */ | ||
140 | blt bpf_error /* cr0 = LT */ | ||
141 | b sk_load_word_negative_offset | ||
142 | |||
143 | _GLOBAL_TOC(sk_load_word_negative_offset) | ||
144 | sk_negative_common(4) | ||
145 | LWZX_BE r_val, 0, r3 | ||
146 | blr | ||
147 | |||
148 | bpf_slow_path_half_neg: | ||
149 | lis r_tmp, -32 /* SKF_LL_OFF */ | ||
150 | cmpd r_off, r_tmp /* addr < SKF_* */ | ||
151 | blt bpf_error /* cr0 = LT */ | ||
152 | b sk_load_half_negative_offset | ||
153 | |||
154 | _GLOBAL_TOC(sk_load_half_negative_offset) | ||
155 | sk_negative_common(2) | ||
156 | LHZX_BE r_val, 0, r3 | ||
157 | blr | ||
158 | |||
159 | bpf_slow_path_byte_neg: | ||
160 | lis r_tmp, -32 /* SKF_LL_OFF */ | ||
161 | cmpd r_off, r_tmp /* addr < SKF_* */ | ||
162 | blt bpf_error /* cr0 = LT */ | ||
163 | b sk_load_byte_negative_offset | ||
164 | |||
165 | _GLOBAL_TOC(sk_load_byte_negative_offset) | ||
166 | sk_negative_common(1) | ||
167 | lbzx r_val, 0, r3 | ||
168 | blr | ||
169 | |||
170 | bpf_error_slow: | ||
171 | /* fabricate a cr0 = lt */ | ||
172 | li r_tmp, -1 | ||
173 | cmpdi r_tmp, 0 | ||
174 | bpf_error: | ||
175 | /* | ||
176 | * Entered with cr0 = lt | ||
177 | * Generated code will 'blt epilogue', returning 0. | ||
178 | */ | ||
179 | li r_val, 0 | ||
180 | blr | ||
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 0ef3d9580e98..1bdb1aff0619 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c | |||
@@ -59,7 +59,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) | |||
59 | * [ prev sp ] <------------- | 59 | * [ prev sp ] <------------- |
60 | * [ ... ] | | 60 | * [ ... ] | |
61 | * sp (r1) ---> [ stack pointer ] -------------- | 61 | * sp (r1) ---> [ stack pointer ] -------------- |
62 | * [ nv gpr save area ] 8*8 | 62 | * [ nv gpr save area ] 6*8 |
63 | * [ tail_call_cnt ] 8 | 63 | * [ tail_call_cnt ] 8 |
64 | * [ local_tmp_var ] 8 | 64 | * [ local_tmp_var ] 8 |
65 | * [ unused red zone ] 208 bytes protected | 65 | * [ unused red zone ] 208 bytes protected |
@@ -88,21 +88,6 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) | |||
88 | BUG(); | 88 | BUG(); |
89 | } | 89 | } |
90 | 90 | ||
91 | static void bpf_jit_emit_skb_loads(u32 *image, struct codegen_context *ctx) | ||
92 | { | ||
93 | /* | ||
94 | * Load skb->len and skb->data_len | ||
95 | * r3 points to skb | ||
96 | */ | ||
97 | PPC_LWZ(b2p[SKB_HLEN_REG], 3, offsetof(struct sk_buff, len)); | ||
98 | PPC_LWZ(b2p[TMP_REG_1], 3, offsetof(struct sk_buff, data_len)); | ||
99 | /* header_len = len - data_len */ | ||
100 | PPC_SUB(b2p[SKB_HLEN_REG], b2p[SKB_HLEN_REG], b2p[TMP_REG_1]); | ||
101 | |||
102 | /* skb->data pointer */ | ||
103 | PPC_BPF_LL(b2p[SKB_DATA_REG], 3, offsetof(struct sk_buff, data)); | ||
104 | } | ||
105 | |||
106 | static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) | 91 | static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) |
107 | { | 92 | { |
108 | int i; | 93 | int i; |
@@ -145,18 +130,6 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) | |||
145 | if (bpf_is_seen_register(ctx, i)) | 130 | if (bpf_is_seen_register(ctx, i)) |
146 | PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); | 131 | PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); |
147 | 132 | ||
148 | /* | ||
149 | * Save additional non-volatile regs if we cache skb | ||
150 | * Also, setup skb data | ||
151 | */ | ||
152 | if (ctx->seen & SEEN_SKB) { | ||
153 | PPC_BPF_STL(b2p[SKB_HLEN_REG], 1, | ||
154 | bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG])); | ||
155 | PPC_BPF_STL(b2p[SKB_DATA_REG], 1, | ||
156 | bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG])); | ||
157 | bpf_jit_emit_skb_loads(image, ctx); | ||
158 | } | ||
159 | |||
160 | /* Setup frame pointer to point to the bpf stack area */ | 133 | /* Setup frame pointer to point to the bpf stack area */ |
161 | if (bpf_is_seen_register(ctx, BPF_REG_FP)) | 134 | if (bpf_is_seen_register(ctx, BPF_REG_FP)) |
162 | PPC_ADDI(b2p[BPF_REG_FP], 1, | 135 | PPC_ADDI(b2p[BPF_REG_FP], 1, |
@@ -172,14 +145,6 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx | |||
172 | if (bpf_is_seen_register(ctx, i)) | 145 | if (bpf_is_seen_register(ctx, i)) |
173 | PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); | 146 | PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); |
174 | 147 | ||
175 | /* Restore non-volatile registers used for skb cache */ | ||
176 | if (ctx->seen & SEEN_SKB) { | ||
177 | PPC_BPF_LL(b2p[SKB_HLEN_REG], 1, | ||
178 | bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG])); | ||
179 | PPC_BPF_LL(b2p[SKB_DATA_REG], 1, | ||
180 | bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG])); | ||
181 | } | ||
182 | |||
183 | /* Tear down our stack frame */ | 148 | /* Tear down our stack frame */ |
184 | if (bpf_has_stack_frame(ctx)) { | 149 | if (bpf_has_stack_frame(ctx)) { |
185 | PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size); | 150 | PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size); |
@@ -753,23 +718,10 @@ emit_clear: | |||
753 | ctx->seen |= SEEN_FUNC; | 718 | ctx->seen |= SEEN_FUNC; |
754 | func = (u8 *) __bpf_call_base + imm; | 719 | func = (u8 *) __bpf_call_base + imm; |
755 | 720 | ||
756 | /* Save skb pointer if we need to re-cache skb data */ | ||
757 | if ((ctx->seen & SEEN_SKB) && | ||
758 | bpf_helper_changes_pkt_data(func)) | ||
759 | PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx)); | ||
760 | |||
761 | bpf_jit_emit_func_call(image, ctx, (u64)func); | 721 | bpf_jit_emit_func_call(image, ctx, (u64)func); |
762 | 722 | ||
763 | /* move return value from r3 to BPF_REG_0 */ | 723 | /* move return value from r3 to BPF_REG_0 */ |
764 | PPC_MR(b2p[BPF_REG_0], 3); | 724 | PPC_MR(b2p[BPF_REG_0], 3); |
765 | |||
766 | /* refresh skb cache */ | ||
767 | if ((ctx->seen & SEEN_SKB) && | ||
768 | bpf_helper_changes_pkt_data(func)) { | ||
769 | /* reload skb pointer to r3 */ | ||
770 | PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx)); | ||
771 | bpf_jit_emit_skb_loads(image, ctx); | ||
772 | } | ||
773 | break; | 725 | break; |
774 | 726 | ||
775 | /* | 727 | /* |
@@ -887,65 +839,6 @@ cond_branch: | |||
887 | break; | 839 | break; |
888 | 840 | ||
889 | /* | 841 | /* |
890 | * Loads from packet header/data | ||
891 | * Assume 32-bit input value in imm and X (src_reg) | ||
892 | */ | ||
893 | |||
894 | /* Absolute loads */ | ||
895 | case BPF_LD | BPF_W | BPF_ABS: | ||
896 | func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_word); | ||
897 | goto common_load_abs; | ||
898 | case BPF_LD | BPF_H | BPF_ABS: | ||
899 | func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_half); | ||
900 | goto common_load_abs; | ||
901 | case BPF_LD | BPF_B | BPF_ABS: | ||
902 | func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_byte); | ||
903 | common_load_abs: | ||
904 | /* | ||
905 | * Load from [imm] | ||
906 | * Load into r4, which can just be passed onto | ||
907 | * skb load helpers as the second parameter | ||
908 | */ | ||
909 | PPC_LI32(4, imm); | ||
910 | goto common_load; | ||
911 | |||
912 | /* Indirect loads */ | ||
913 | case BPF_LD | BPF_W | BPF_IND: | ||
914 | func = (u8 *)sk_load_word; | ||
915 | goto common_load_ind; | ||
916 | case BPF_LD | BPF_H | BPF_IND: | ||
917 | func = (u8 *)sk_load_half; | ||
918 | goto common_load_ind; | ||
919 | case BPF_LD | BPF_B | BPF_IND: | ||
920 | func = (u8 *)sk_load_byte; | ||
921 | common_load_ind: | ||
922 | /* | ||
923 | * Load from [src_reg + imm] | ||
924 | * Treat src_reg as a 32-bit value | ||
925 | */ | ||
926 | PPC_EXTSW(4, src_reg); | ||
927 | if (imm) { | ||
928 | if (imm >= -32768 && imm < 32768) | ||
929 | PPC_ADDI(4, 4, IMM_L(imm)); | ||
930 | else { | ||
931 | PPC_LI32(b2p[TMP_REG_1], imm); | ||
932 | PPC_ADD(4, 4, b2p[TMP_REG_1]); | ||
933 | } | ||
934 | } | ||
935 | |||
936 | common_load: | ||
937 | ctx->seen |= SEEN_SKB; | ||
938 | ctx->seen |= SEEN_FUNC; | ||
939 | bpf_jit_emit_func_call(image, ctx, (u64)func); | ||
940 | |||
941 | /* | ||
942 | * Helper returns 'lt' condition on error, and an | ||
943 | * appropriate return value in BPF_REG_0 | ||
944 | */ | ||
945 | PPC_BCC(COND_LT, exit_addr); | ||
946 | break; | ||
947 | |||
948 | /* | ||
949 | * Tail call | 842 | * Tail call |
950 | */ | 843 | */ |
951 | case BPF_JMP | BPF_TAIL_CALL: | 844 | case BPF_JMP | BPF_TAIL_CALL: |
diff --git a/arch/s390/net/Makefile b/arch/s390/net/Makefile index e0d5f245e42b..d4663b4bf509 100644 --- a/arch/s390/net/Makefile +++ b/arch/s390/net/Makefile | |||
@@ -2,4 +2,4 @@ | |||
2 | # | 2 | # |
3 | # Arch-specific network modules | 3 | # Arch-specific network modules |
4 | # | 4 | # |
5 | obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o | 5 | obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o |
diff --git a/arch/s390/net/bpf_jit.S b/arch/s390/net/bpf_jit.S deleted file mode 100644 index 25bb4643c4f4..000000000000 --- a/arch/s390/net/bpf_jit.S +++ /dev/null | |||
@@ -1,116 +0,0 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | /* | ||
3 | * BPF Jit compiler for s390, help functions. | ||
4 | * | ||
5 | * Copyright IBM Corp. 2012,2015 | ||
6 | * | ||
7 | * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> | ||
8 | * Michael Holzheu <holzheu@linux.vnet.ibm.com> | ||
9 | */ | ||
10 | |||
11 | #include <linux/linkage.h> | ||
12 | #include "bpf_jit.h" | ||
13 | |||
14 | /* | ||
15 | * Calling convention: | ||
16 | * registers %r7-%r10, %r11,%r13, and %r15 are call saved | ||
17 | * | ||
18 | * Input (64 bit): | ||
19 | * %r3 (%b2) = offset into skb data | ||
20 | * %r6 (%b5) = return address | ||
21 | * %r7 (%b6) = skb pointer | ||
22 | * %r12 = skb data pointer | ||
23 | * | ||
24 | * Output: | ||
25 | * %r14= %b0 = return value (read skb value) | ||
26 | * | ||
27 | * Work registers: %r2,%r4,%r5,%r14 | ||
28 | * | ||
29 | * skb_copy_bits takes 4 parameters: | ||
30 | * %r2 = skb pointer | ||
31 | * %r3 = offset into skb data | ||
32 | * %r4 = pointer to temp buffer | ||
33 | * %r5 = length to copy | ||
34 | * Return value in %r2: 0 = ok | ||
35 | * | ||
36 | * bpf_internal_load_pointer_neg_helper takes 3 parameters: | ||
37 | * %r2 = skb pointer | ||
38 | * %r3 = offset into data | ||
39 | * %r4 = length to copy | ||
40 | * Return value in %r2: Pointer to data | ||
41 | */ | ||
42 | |||
43 | #define SKF_MAX_NEG_OFF -0x200000 /* SKF_LL_OFF from filter.h */ | ||
44 | |||
45 | /* | ||
46 | * Load SIZE bytes from SKB | ||
47 | */ | ||
48 | #define sk_load_common(NAME, SIZE, LOAD) \ | ||
49 | ENTRY(sk_load_##NAME); \ | ||
50 | ltgr %r3,%r3; /* Is offset negative? */ \ | ||
51 | jl sk_load_##NAME##_slow_neg; \ | ||
52 | ENTRY(sk_load_##NAME##_pos); \ | ||
53 | aghi %r3,SIZE; /* Offset + SIZE */ \ | ||
54 | clg %r3,STK_OFF_HLEN(%r15); /* Offset + SIZE > hlen? */ \ | ||
55 | jh sk_load_##NAME##_slow; \ | ||
56 | LOAD %r14,-SIZE(%r3,%r12); /* Get data from skb */ \ | ||
57 | b OFF_OK(%r6); /* Return */ \ | ||
58 | \ | ||
59 | sk_load_##NAME##_slow:; \ | ||
60 | lgr %r2,%r7; /* Arg1 = skb pointer */ \ | ||
61 | aghi %r3,-SIZE; /* Arg2 = offset */ \ | ||
62 | la %r4,STK_OFF_TMP(%r15); /* Arg3 = temp bufffer */ \ | ||
63 | lghi %r5,SIZE; /* Arg4 = size */ \ | ||
64 | brasl %r14,skb_copy_bits; /* Get data from skb */ \ | ||
65 | LOAD %r14,STK_OFF_TMP(%r15); /* Load from temp bufffer */ \ | ||
66 | ltgr %r2,%r2; /* Set cc to (%r2 != 0) */ \ | ||
67 | br %r6; /* Return */ | ||
68 | |||
69 | sk_load_common(word, 4, llgf) /* r14 = *(u32 *) (skb->data+offset) */ | ||
70 | sk_load_common(half, 2, llgh) /* r14 = *(u16 *) (skb->data+offset) */ | ||
71 | |||
72 | /* | ||
73 | * Load 1 byte from SKB (optimized version) | ||
74 | */ | ||
75 | /* r14 = *(u8 *) (skb->data+offset) */ | ||
76 | ENTRY(sk_load_byte) | ||
77 | ltgr %r3,%r3 # Is offset negative? | ||
78 | jl sk_load_byte_slow_neg | ||
79 | ENTRY(sk_load_byte_pos) | ||
80 | clg %r3,STK_OFF_HLEN(%r15) # Offset >= hlen? | ||
81 | jnl sk_load_byte_slow | ||
82 | llgc %r14,0(%r3,%r12) # Get byte from skb | ||
83 | b OFF_OK(%r6) # Return OK | ||
84 | |||
85 | sk_load_byte_slow: | ||
86 | lgr %r2,%r7 # Arg1 = skb pointer | ||
87 | # Arg2 = offset | ||
88 | la %r4,STK_OFF_TMP(%r15) # Arg3 = pointer to temp buffer | ||
89 | lghi %r5,1 # Arg4 = size (1 byte) | ||
90 | brasl %r14,skb_copy_bits # Get data from skb | ||
91 | llgc %r14,STK_OFF_TMP(%r15) # Load result from temp buffer | ||
92 | ltgr %r2,%r2 # Set cc to (%r2 != 0) | ||
93 | br %r6 # Return cc | ||
94 | |||
95 | #define sk_negative_common(NAME, SIZE, LOAD) \ | ||
96 | sk_load_##NAME##_slow_neg:; \ | ||
97 | cgfi %r3,SKF_MAX_NEG_OFF; \ | ||
98 | jl bpf_error; \ | ||
99 | lgr %r2,%r7; /* Arg1 = skb pointer */ \ | ||
100 | /* Arg2 = offset */ \ | ||
101 | lghi %r4,SIZE; /* Arg3 = size */ \ | ||
102 | brasl %r14,bpf_internal_load_pointer_neg_helper; \ | ||
103 | ltgr %r2,%r2; \ | ||
104 | jz bpf_error; \ | ||
105 | LOAD %r14,0(%r2); /* Get data from pointer */ \ | ||
106 | xr %r3,%r3; /* Set cc to zero */ \ | ||
107 | br %r6; /* Return cc */ | ||
108 | |||
109 | sk_negative_common(word, 4, llgf) | ||
110 | sk_negative_common(half, 2, llgh) | ||
111 | sk_negative_common(byte, 1, llgc) | ||
112 | |||
113 | bpf_error: | ||
114 | # force a return 0 from jit handler | ||
115 | ltgr %r15,%r15 # Set condition code | ||
116 | br %r6 | ||
diff --git a/arch/s390/net/bpf_jit.h b/arch/s390/net/bpf_jit.h index 5e1e5133132d..7822ea92e54a 100644 --- a/arch/s390/net/bpf_jit.h +++ b/arch/s390/net/bpf_jit.h | |||
@@ -16,9 +16,6 @@ | |||
16 | #include <linux/filter.h> | 16 | #include <linux/filter.h> |
17 | #include <linux/types.h> | 17 | #include <linux/types.h> |
18 | 18 | ||
19 | extern u8 sk_load_word_pos[], sk_load_half_pos[], sk_load_byte_pos[]; | ||
20 | extern u8 sk_load_word[], sk_load_half[], sk_load_byte[]; | ||
21 | |||
22 | #endif /* __ASSEMBLY__ */ | 19 | #endif /* __ASSEMBLY__ */ |
23 | 20 | ||
24 | /* | 21 | /* |
@@ -36,15 +33,6 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[]; | |||
36 | * | | | | 33 | * | | | |
37 | * | BPF stack | | | 34 | * | BPF stack | | |
38 | * | | | | 35 | * | | | |
39 | * +---------------+ | | ||
40 | * | 8 byte skbp | | | ||
41 | * R15+176 -> +---------------+ | | ||
42 | * | 8 byte hlen | | | ||
43 | * R15+168 -> +---------------+ | | ||
44 | * | 4 byte align | | | ||
45 | * +---------------+ | | ||
46 | * | 4 byte temp | | | ||
47 | * | for bpf_jit.S | | | ||
48 | * R15+160 -> +---------------+ | | 36 | * R15+160 -> +---------------+ | |
49 | * | new backchain | | | 37 | * | new backchain | | |
50 | * R15+152 -> +---------------+ | | 38 | * R15+152 -> +---------------+ | |
@@ -57,17 +45,11 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[]; | |||
57 | * The stack size used by the BPF program ("BPF stack" above) is passed | 45 | * The stack size used by the BPF program ("BPF stack" above) is passed |
58 | * via "aux->stack_depth". | 46 | * via "aux->stack_depth". |
59 | */ | 47 | */ |
60 | #define STK_SPACE_ADD (8 + 8 + 4 + 4 + 160) | 48 | #define STK_SPACE_ADD (160) |
61 | #define STK_160_UNUSED (160 - 12 * 8) | 49 | #define STK_160_UNUSED (160 - 12 * 8) |
62 | #define STK_OFF (STK_SPACE_ADD - STK_160_UNUSED) | 50 | #define STK_OFF (STK_SPACE_ADD - STK_160_UNUSED) |
63 | #define STK_OFF_TMP 160 /* Offset of tmp buffer on stack */ | ||
64 | #define STK_OFF_HLEN 168 /* Offset of SKB header length on stack */ | ||
65 | #define STK_OFF_SKBP 176 /* Offset of SKB pointer on stack */ | ||
66 | 51 | ||
67 | #define STK_OFF_R6 (160 - 11 * 8) /* Offset of r6 on stack */ | 52 | #define STK_OFF_R6 (160 - 11 * 8) /* Offset of r6 on stack */ |
68 | #define STK_OFF_TCCNT (160 - 12 * 8) /* Offset of tail_call_cnt on stack */ | 53 | #define STK_OFF_TCCNT (160 - 12 * 8) /* Offset of tail_call_cnt on stack */ |
69 | 54 | ||
70 | /* Offset to skip condition code check */ | ||
71 | #define OFF_OK 4 | ||
72 | |||
73 | #endif /* __ARCH_S390_NET_BPF_JIT_H */ | 55 | #endif /* __ARCH_S390_NET_BPF_JIT_H */ |
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 78a19c93b380..b020bea040b7 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c | |||
@@ -47,23 +47,21 @@ struct bpf_jit { | |||
47 | 47 | ||
48 | #define BPF_SIZE_MAX 0xffff /* Max size for program (16 bit branches) */ | 48 | #define BPF_SIZE_MAX 0xffff /* Max size for program (16 bit branches) */ |
49 | 49 | ||
50 | #define SEEN_SKB 1 /* skb access */ | 50 | #define SEEN_MEM (1 << 0) /* use mem[] for temporary storage */ |
51 | #define SEEN_MEM 2 /* use mem[] for temporary storage */ | 51 | #define SEEN_RET0 (1 << 1) /* ret0_ip points to a valid return 0 */ |
52 | #define SEEN_RET0 4 /* ret0_ip points to a valid return 0 */ | 52 | #define SEEN_LITERAL (1 << 2) /* code uses literals */ |
53 | #define SEEN_LITERAL 8 /* code uses literals */ | 53 | #define SEEN_FUNC (1 << 3) /* calls C functions */ |
54 | #define SEEN_FUNC 16 /* calls C functions */ | 54 | #define SEEN_TAIL_CALL (1 << 4) /* code uses tail calls */ |
55 | #define SEEN_TAIL_CALL 32 /* code uses tail calls */ | 55 | #define SEEN_REG_AX (1 << 5) /* code uses constant blinding */ |
56 | #define SEEN_REG_AX 64 /* code uses constant blinding */ | 56 | #define SEEN_STACK (SEEN_FUNC | SEEN_MEM) |
57 | #define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB) | ||
58 | 57 | ||
59 | /* | 58 | /* |
60 | * s390 registers | 59 | * s390 registers |
61 | */ | 60 | */ |
62 | #define REG_W0 (MAX_BPF_JIT_REG + 0) /* Work register 1 (even) */ | 61 | #define REG_W0 (MAX_BPF_JIT_REG + 0) /* Work register 1 (even) */ |
63 | #define REG_W1 (MAX_BPF_JIT_REG + 1) /* Work register 2 (odd) */ | 62 | #define REG_W1 (MAX_BPF_JIT_REG + 1) /* Work register 2 (odd) */ |
64 | #define REG_SKB_DATA (MAX_BPF_JIT_REG + 2) /* SKB data register */ | 63 | #define REG_L (MAX_BPF_JIT_REG + 2) /* Literal pool register */ |
65 | #define REG_L (MAX_BPF_JIT_REG + 3) /* Literal pool register */ | 64 | #define REG_15 (MAX_BPF_JIT_REG + 3) /* Register 15 */ |
66 | #define REG_15 (MAX_BPF_JIT_REG + 4) /* Register 15 */ | ||
67 | #define REG_0 REG_W0 /* Register 0 */ | 65 | #define REG_0 REG_W0 /* Register 0 */ |
68 | #define REG_1 REG_W1 /* Register 1 */ | 66 | #define REG_1 REG_W1 /* Register 1 */ |
69 | #define REG_2 BPF_REG_1 /* Register 2 */ | 67 | #define REG_2 BPF_REG_1 /* Register 2 */ |
@@ -88,10 +86,8 @@ static const int reg2hex[] = { | |||
88 | [BPF_REG_9] = 10, | 86 | [BPF_REG_9] = 10, |
89 | /* BPF stack pointer */ | 87 | /* BPF stack pointer */ |
90 | [BPF_REG_FP] = 13, | 88 | [BPF_REG_FP] = 13, |
91 | /* Register for blinding (shared with REG_SKB_DATA) */ | 89 | /* Register for blinding */ |
92 | [BPF_REG_AX] = 12, | 90 | [BPF_REG_AX] = 12, |
93 | /* SKB data pointer */ | ||
94 | [REG_SKB_DATA] = 12, | ||
95 | /* Work registers for s390x backend */ | 91 | /* Work registers for s390x backend */ |
96 | [REG_W0] = 0, | 92 | [REG_W0] = 0, |
97 | [REG_W1] = 1, | 93 | [REG_W1] = 1, |
@@ -385,27 +381,6 @@ static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth) | |||
385 | } | 381 | } |
386 | 382 | ||
387 | /* | 383 | /* |
388 | * For SKB access %b1 contains the SKB pointer. For "bpf_jit.S" | ||
389 | * we store the SKB header length on the stack and the SKB data | ||
390 | * pointer in REG_SKB_DATA if BPF_REG_AX is not used. | ||
391 | */ | ||
392 | static void emit_load_skb_data_hlen(struct bpf_jit *jit) | ||
393 | { | ||
394 | /* Header length: llgf %w1,<len>(%b1) */ | ||
395 | EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_1, | ||
396 | offsetof(struct sk_buff, len)); | ||
397 | /* s %w1,<data_len>(%b1) */ | ||
398 | EMIT4_DISP(0x5b000000, REG_W1, BPF_REG_1, | ||
399 | offsetof(struct sk_buff, data_len)); | ||
400 | /* stg %w1,ST_OFF_HLEN(%r0,%r15) */ | ||
401 | EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, STK_OFF_HLEN); | ||
402 | if (!(jit->seen & SEEN_REG_AX)) | ||
403 | /* lg %skb_data,data_off(%b1) */ | ||
404 | EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0, | ||
405 | BPF_REG_1, offsetof(struct sk_buff, data)); | ||
406 | } | ||
407 | |||
408 | /* | ||
409 | * Emit function prologue | 384 | * Emit function prologue |
410 | * | 385 | * |
411 | * Save registers and create stack frame if necessary. | 386 | * Save registers and create stack frame if necessary. |
@@ -445,12 +420,6 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth) | |||
445 | EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, | 420 | EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, |
446 | REG_15, 152); | 421 | REG_15, 152); |
447 | } | 422 | } |
448 | if (jit->seen & SEEN_SKB) { | ||
449 | emit_load_skb_data_hlen(jit); | ||
450 | /* stg %b1,ST_OFF_SKBP(%r0,%r15) */ | ||
451 | EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15, | ||
452 | STK_OFF_SKBP); | ||
453 | } | ||
454 | } | 423 | } |
455 | 424 | ||
456 | /* | 425 | /* |
@@ -483,12 +452,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i | |||
483 | { | 452 | { |
484 | struct bpf_insn *insn = &fp->insnsi[i]; | 453 | struct bpf_insn *insn = &fp->insnsi[i]; |
485 | int jmp_off, last, insn_count = 1; | 454 | int jmp_off, last, insn_count = 1; |
486 | unsigned int func_addr, mask; | ||
487 | u32 dst_reg = insn->dst_reg; | 455 | u32 dst_reg = insn->dst_reg; |
488 | u32 src_reg = insn->src_reg; | 456 | u32 src_reg = insn->src_reg; |
489 | u32 *addrs = jit->addrs; | 457 | u32 *addrs = jit->addrs; |
490 | s32 imm = insn->imm; | 458 | s32 imm = insn->imm; |
491 | s16 off = insn->off; | 459 | s16 off = insn->off; |
460 | unsigned int mask; | ||
492 | 461 | ||
493 | if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX) | 462 | if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX) |
494 | jit->seen |= SEEN_REG_AX; | 463 | jit->seen |= SEEN_REG_AX; |
@@ -970,13 +939,6 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i | |||
970 | EMIT2(0x0d00, REG_14, REG_W1); | 939 | EMIT2(0x0d00, REG_14, REG_W1); |
971 | /* lgr %b0,%r2: load return value into %b0 */ | 940 | /* lgr %b0,%r2: load return value into %b0 */ |
972 | EMIT4(0xb9040000, BPF_REG_0, REG_2); | 941 | EMIT4(0xb9040000, BPF_REG_0, REG_2); |
973 | if ((jit->seen & SEEN_SKB) && | ||
974 | bpf_helper_changes_pkt_data((void *)func)) { | ||
975 | /* lg %b1,ST_OFF_SKBP(%r15) */ | ||
976 | EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0, | ||
977 | REG_15, STK_OFF_SKBP); | ||
978 | emit_load_skb_data_hlen(jit); | ||
979 | } | ||
980 | break; | 942 | break; |
981 | } | 943 | } |
982 | case BPF_JMP | BPF_TAIL_CALL: | 944 | case BPF_JMP | BPF_TAIL_CALL: |
@@ -1176,73 +1138,6 @@ branch_oc: | |||
1176 | jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4); | 1138 | jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4); |
1177 | EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off); | 1139 | EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off); |
1178 | break; | 1140 | break; |
1179 | /* | ||
1180 | * BPF_LD | ||
1181 | */ | ||
1182 | case BPF_LD | BPF_ABS | BPF_B: /* b0 = *(u8 *) (skb->data+imm) */ | ||
1183 | case BPF_LD | BPF_IND | BPF_B: /* b0 = *(u8 *) (skb->data+imm+src) */ | ||
1184 | if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0)) | ||
1185 | func_addr = __pa(sk_load_byte_pos); | ||
1186 | else | ||
1187 | func_addr = __pa(sk_load_byte); | ||
1188 | goto call_fn; | ||
1189 | case BPF_LD | BPF_ABS | BPF_H: /* b0 = *(u16 *) (skb->data+imm) */ | ||
1190 | case BPF_LD | BPF_IND | BPF_H: /* b0 = *(u16 *) (skb->data+imm+src) */ | ||
1191 | if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0)) | ||
1192 | func_addr = __pa(sk_load_half_pos); | ||
1193 | else | ||
1194 | func_addr = __pa(sk_load_half); | ||
1195 | goto call_fn; | ||
1196 | case BPF_LD | BPF_ABS | BPF_W: /* b0 = *(u32 *) (skb->data+imm) */ | ||
1197 | case BPF_LD | BPF_IND | BPF_W: /* b0 = *(u32 *) (skb->data+imm+src) */ | ||
1198 | if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0)) | ||
1199 | func_addr = __pa(sk_load_word_pos); | ||
1200 | else | ||
1201 | func_addr = __pa(sk_load_word); | ||
1202 | goto call_fn; | ||
1203 | call_fn: | ||
1204 | jit->seen |= SEEN_SKB | SEEN_RET0 | SEEN_FUNC; | ||
1205 | REG_SET_SEEN(REG_14); /* Return address of possible func call */ | ||
1206 | |||
1207 | /* | ||
1208 | * Implicit input: | ||
1209 | * BPF_REG_6 (R7) : skb pointer | ||
1210 | * REG_SKB_DATA (R12): skb data pointer (if no BPF_REG_AX) | ||
1211 | * | ||
1212 | * Calculated input: | ||
1213 | * BPF_REG_2 (R3) : offset of byte(s) to fetch in skb | ||
1214 | * BPF_REG_5 (R6) : return address | ||
1215 | * | ||
1216 | * Output: | ||
1217 | * BPF_REG_0 (R14): data read from skb | ||
1218 | * | ||
1219 | * Scratch registers (BPF_REG_1-5) | ||
1220 | */ | ||
1221 | |||
1222 | /* Call function: llilf %w1,func_addr */ | ||
1223 | EMIT6_IMM(0xc00f0000, REG_W1, func_addr); | ||
1224 | |||
1225 | /* Offset: lgfi %b2,imm */ | ||
1226 | EMIT6_IMM(0xc0010000, BPF_REG_2, imm); | ||
1227 | if (BPF_MODE(insn->code) == BPF_IND) | ||
1228 | /* agfr %b2,%src (%src is s32 here) */ | ||
1229 | EMIT4(0xb9180000, BPF_REG_2, src_reg); | ||
1230 | |||
1231 | /* Reload REG_SKB_DATA if BPF_REG_AX is used */ | ||
1232 | if (jit->seen & SEEN_REG_AX) | ||
1233 | /* lg %skb_data,data_off(%b6) */ | ||
1234 | EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0, | ||
1235 | BPF_REG_6, offsetof(struct sk_buff, data)); | ||
1236 | /* basr %b5,%w1 (%b5 is call saved) */ | ||
1237 | EMIT2(0x0d00, BPF_REG_5, REG_W1); | ||
1238 | |||
1239 | /* | ||
1240 | * Note: For fast access we jump directly after the | ||
1241 | * jnz instruction from bpf_jit.S | ||
1242 | */ | ||
1243 | /* jnz <ret0> */ | ||
1244 | EMIT4_PCREL(0xa7740000, jit->ret0_ip - jit->prg); | ||
1245 | break; | ||
1246 | default: /* too complex, give up */ | 1141 | default: /* too complex, give up */ |
1247 | pr_err("Unknown opcode %02x\n", insn->code); | 1142 | pr_err("Unknown opcode %02x\n", insn->code); |
1248 | return -1; | 1143 | return -1; |
diff --git a/arch/sparc/net/Makefile b/arch/sparc/net/Makefile index 76fa8e95b721..d32aac3a25b8 100644 --- a/arch/sparc/net/Makefile +++ b/arch/sparc/net/Makefile | |||
@@ -1,4 +1,7 @@ | |||
1 | # | 1 | # |
2 | # Arch-specific network modules | 2 | # Arch-specific network modules |
3 | # | 3 | # |
4 | obj-$(CONFIG_BPF_JIT) += bpf_jit_asm_$(BITS).o bpf_jit_comp_$(BITS).o | 4 | obj-$(CONFIG_BPF_JIT) += bpf_jit_comp_$(BITS).o |
5 | ifeq ($(BITS),32) | ||
6 | obj-$(CONFIG_BPF_JIT) += bpf_jit_asm_32.o | ||
7 | endif | ||
diff --git a/arch/sparc/net/bpf_jit_64.h b/arch/sparc/net/bpf_jit_64.h index 428f7fd19175..fbc836f1c51c 100644 --- a/arch/sparc/net/bpf_jit_64.h +++ b/arch/sparc/net/bpf_jit_64.h | |||
@@ -33,35 +33,6 @@ | |||
33 | #define I5 0x1d | 33 | #define I5 0x1d |
34 | #define FP 0x1e | 34 | #define FP 0x1e |
35 | #define I7 0x1f | 35 | #define I7 0x1f |
36 | |||
37 | #define r_SKB L0 | ||
38 | #define r_HEADLEN L4 | ||
39 | #define r_SKB_DATA L5 | ||
40 | #define r_TMP G1 | ||
41 | #define r_TMP2 G3 | ||
42 | |||
43 | /* assembly code in arch/sparc/net/bpf_jit_asm_64.S */ | ||
44 | extern u32 bpf_jit_load_word[]; | ||
45 | extern u32 bpf_jit_load_half[]; | ||
46 | extern u32 bpf_jit_load_byte[]; | ||
47 | extern u32 bpf_jit_load_byte_msh[]; | ||
48 | extern u32 bpf_jit_load_word_positive_offset[]; | ||
49 | extern u32 bpf_jit_load_half_positive_offset[]; | ||
50 | extern u32 bpf_jit_load_byte_positive_offset[]; | ||
51 | extern u32 bpf_jit_load_byte_msh_positive_offset[]; | ||
52 | extern u32 bpf_jit_load_word_negative_offset[]; | ||
53 | extern u32 bpf_jit_load_half_negative_offset[]; | ||
54 | extern u32 bpf_jit_load_byte_negative_offset[]; | ||
55 | extern u32 bpf_jit_load_byte_msh_negative_offset[]; | ||
56 | |||
57 | #else | ||
58 | #define r_RESULT %o0 | ||
59 | #define r_SKB %o0 | ||
60 | #define r_OFF %o1 | ||
61 | #define r_HEADLEN %l4 | ||
62 | #define r_SKB_DATA %l5 | ||
63 | #define r_TMP %g1 | ||
64 | #define r_TMP2 %g3 | ||
65 | #endif | 36 | #endif |
66 | 37 | ||
67 | #endif /* _BPF_JIT_H */ | 38 | #endif /* _BPF_JIT_H */ |
diff --git a/arch/sparc/net/bpf_jit_asm_64.S b/arch/sparc/net/bpf_jit_asm_64.S deleted file mode 100644 index 7177867052a1..000000000000 --- a/arch/sparc/net/bpf_jit_asm_64.S +++ /dev/null | |||
@@ -1,162 +0,0 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | #include <asm/ptrace.h> | ||
3 | |||
4 | #include "bpf_jit_64.h" | ||
5 | |||
6 | #define SAVE_SZ 176 | ||
7 | #define SCRATCH_OFF STACK_BIAS + 128 | ||
8 | #define BE_PTR(label) be,pn %xcc, label | ||
9 | #define SIGN_EXTEND(reg) sra reg, 0, reg | ||
10 | |||
11 | #define SKF_MAX_NEG_OFF (-0x200000) /* SKF_LL_OFF from filter.h */ | ||
12 | |||
13 | .text | ||
14 | .globl bpf_jit_load_word | ||
15 | bpf_jit_load_word: | ||
16 | cmp r_OFF, 0 | ||
17 | bl bpf_slow_path_word_neg | ||
18 | nop | ||
19 | .globl bpf_jit_load_word_positive_offset | ||
20 | bpf_jit_load_word_positive_offset: | ||
21 | sub r_HEADLEN, r_OFF, r_TMP | ||
22 | cmp r_TMP, 3 | ||
23 | ble bpf_slow_path_word | ||
24 | add r_SKB_DATA, r_OFF, r_TMP | ||
25 | andcc r_TMP, 3, %g0 | ||
26 | bne load_word_unaligned | ||
27 | nop | ||
28 | retl | ||
29 | ld [r_TMP], r_RESULT | ||
30 | load_word_unaligned: | ||
31 | ldub [r_TMP + 0x0], r_OFF | ||
32 | ldub [r_TMP + 0x1], r_TMP2 | ||
33 | sll r_OFF, 8, r_OFF | ||
34 | or r_OFF, r_TMP2, r_OFF | ||
35 | ldub [r_TMP + 0x2], r_TMP2 | ||
36 | sll r_OFF, 8, r_OFF | ||
37 | or r_OFF, r_TMP2, r_OFF | ||
38 | ldub [r_TMP + 0x3], r_TMP2 | ||
39 | sll r_OFF, 8, r_OFF | ||
40 | retl | ||
41 | or r_OFF, r_TMP2, r_RESULT | ||
42 | |||
43 | .globl bpf_jit_load_half | ||
44 | bpf_jit_load_half: | ||
45 | cmp r_OFF, 0 | ||
46 | bl bpf_slow_path_half_neg | ||
47 | nop | ||
48 | .globl bpf_jit_load_half_positive_offset | ||
49 | bpf_jit_load_half_positive_offset: | ||
50 | sub r_HEADLEN, r_OFF, r_TMP | ||
51 | cmp r_TMP, 1 | ||
52 | ble bpf_slow_path_half | ||
53 | add r_SKB_DATA, r_OFF, r_TMP | ||
54 | andcc r_TMP, 1, %g0 | ||
55 | bne load_half_unaligned | ||
56 | nop | ||
57 | retl | ||
58 | lduh [r_TMP], r_RESULT | ||
59 | load_half_unaligned: | ||
60 | ldub [r_TMP + 0x0], r_OFF | ||
61 | ldub [r_TMP + 0x1], r_TMP2 | ||
62 | sll r_OFF, 8, r_OFF | ||
63 | retl | ||
64 | or r_OFF, r_TMP2, r_RESULT | ||
65 | |||
66 | .globl bpf_jit_load_byte | ||
67 | bpf_jit_load_byte: | ||
68 | cmp r_OFF, 0 | ||
69 | bl bpf_slow_path_byte_neg | ||
70 | nop | ||
71 | .globl bpf_jit_load_byte_positive_offset | ||
72 | bpf_jit_load_byte_positive_offset: | ||
73 | cmp r_OFF, r_HEADLEN | ||
74 | bge bpf_slow_path_byte | ||
75 | nop | ||
76 | retl | ||
77 | ldub [r_SKB_DATA + r_OFF], r_RESULT | ||
78 | |||
79 | #define bpf_slow_path_common(LEN) \ | ||
80 | save %sp, -SAVE_SZ, %sp; \ | ||
81 | mov %i0, %o0; \ | ||
82 | mov %i1, %o1; \ | ||
83 | add %fp, SCRATCH_OFF, %o2; \ | ||
84 | call skb_copy_bits; \ | ||
85 | mov (LEN), %o3; \ | ||
86 | cmp %o0, 0; \ | ||
87 | restore; | ||
88 | |||
89 | bpf_slow_path_word: | ||
90 | bpf_slow_path_common(4) | ||
91 | bl bpf_error | ||
92 | ld [%sp + SCRATCH_OFF], r_RESULT | ||
93 | retl | ||
94 | nop | ||
95 | bpf_slow_path_half: | ||
96 | bpf_slow_path_common(2) | ||
97 | bl bpf_error | ||
98 | lduh [%sp + SCRATCH_OFF], r_RESULT | ||
99 | retl | ||
100 | nop | ||
101 | bpf_slow_path_byte: | ||
102 | bpf_slow_path_common(1) | ||
103 | bl bpf_error | ||
104 | ldub [%sp + SCRATCH_OFF], r_RESULT | ||
105 | retl | ||
106 | nop | ||
107 | |||
108 | #define bpf_negative_common(LEN) \ | ||
109 | save %sp, -SAVE_SZ, %sp; \ | ||
110 | mov %i0, %o0; \ | ||
111 | mov %i1, %o1; \ | ||
112 | SIGN_EXTEND(%o1); \ | ||
113 | call bpf_internal_load_pointer_neg_helper; \ | ||
114 | mov (LEN), %o2; \ | ||
115 | mov %o0, r_TMP; \ | ||
116 | cmp %o0, 0; \ | ||
117 | BE_PTR(bpf_error); \ | ||
118 | restore; | ||
119 | |||
120 | bpf_slow_path_word_neg: | ||
121 | sethi %hi(SKF_MAX_NEG_OFF), r_TMP | ||
122 | cmp r_OFF, r_TMP | ||
123 | bl bpf_error | ||
124 | nop | ||
125 | .globl bpf_jit_load_word_negative_offset | ||
126 | bpf_jit_load_word_negative_offset: | ||
127 | bpf_negative_common(4) | ||
128 | andcc r_TMP, 3, %g0 | ||
129 | bne load_word_unaligned | ||
130 | nop | ||
131 | retl | ||
132 | ld [r_TMP], r_RESULT | ||
133 | |||
134 | bpf_slow_path_half_neg: | ||
135 | sethi %hi(SKF_MAX_NEG_OFF), r_TMP | ||
136 | cmp r_OFF, r_TMP | ||
137 | bl bpf_error | ||
138 | nop | ||
139 | .globl bpf_jit_load_half_negative_offset | ||
140 | bpf_jit_load_half_negative_offset: | ||
141 | bpf_negative_common(2) | ||
142 | andcc r_TMP, 1, %g0 | ||
143 | bne load_half_unaligned | ||
144 | nop | ||
145 | retl | ||
146 | lduh [r_TMP], r_RESULT | ||
147 | |||
148 | bpf_slow_path_byte_neg: | ||
149 | sethi %hi(SKF_MAX_NEG_OFF), r_TMP | ||
150 | cmp r_OFF, r_TMP | ||
151 | bl bpf_error | ||
152 | nop | ||
153 | .globl bpf_jit_load_byte_negative_offset | ||
154 | bpf_jit_load_byte_negative_offset: | ||
155 | bpf_negative_common(1) | ||
156 | retl | ||
157 | ldub [r_TMP], r_RESULT | ||
158 | |||
159 | bpf_error: | ||
160 | /* Make the JIT program itself return zero. */ | ||
161 | ret | ||
162 | restore %g0, %g0, %o0 | ||
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index 48a25869349b..9f5918e0693a 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c | |||
@@ -48,10 +48,6 @@ static void bpf_flush_icache(void *start_, void *end_) | |||
48 | } | 48 | } |
49 | } | 49 | } |
50 | 50 | ||
51 | #define SEEN_DATAREF 1 /* might call external helpers */ | ||
52 | #define SEEN_XREG 2 /* ebx is used */ | ||
53 | #define SEEN_MEM 4 /* use mem[] for temporary storage */ | ||
54 | |||
55 | #define S13(X) ((X) & 0x1fff) | 51 | #define S13(X) ((X) & 0x1fff) |
56 | #define S5(X) ((X) & 0x1f) | 52 | #define S5(X) ((X) & 0x1f) |
57 | #define IMMED 0x00002000 | 53 | #define IMMED 0x00002000 |
@@ -198,7 +194,6 @@ struct jit_ctx { | |||
198 | bool tmp_1_used; | 194 | bool tmp_1_used; |
199 | bool tmp_2_used; | 195 | bool tmp_2_used; |
200 | bool tmp_3_used; | 196 | bool tmp_3_used; |
201 | bool saw_ld_abs_ind; | ||
202 | bool saw_frame_pointer; | 197 | bool saw_frame_pointer; |
203 | bool saw_call; | 198 | bool saw_call; |
204 | bool saw_tail_call; | 199 | bool saw_tail_call; |
@@ -207,9 +202,7 @@ struct jit_ctx { | |||
207 | 202 | ||
208 | #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) | 203 | #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) |
209 | #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) | 204 | #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) |
210 | #define SKB_HLEN_REG (MAX_BPF_JIT_REG + 2) | 205 | #define TMP_REG_3 (MAX_BPF_JIT_REG + 2) |
211 | #define SKB_DATA_REG (MAX_BPF_JIT_REG + 3) | ||
212 | #define TMP_REG_3 (MAX_BPF_JIT_REG + 4) | ||
213 | 206 | ||
214 | /* Map BPF registers to SPARC registers */ | 207 | /* Map BPF registers to SPARC registers */ |
215 | static const int bpf2sparc[] = { | 208 | static const int bpf2sparc[] = { |
@@ -238,9 +231,6 @@ static const int bpf2sparc[] = { | |||
238 | [TMP_REG_1] = G1, | 231 | [TMP_REG_1] = G1, |
239 | [TMP_REG_2] = G2, | 232 | [TMP_REG_2] = G2, |
240 | [TMP_REG_3] = G3, | 233 | [TMP_REG_3] = G3, |
241 | |||
242 | [SKB_HLEN_REG] = L4, | ||
243 | [SKB_DATA_REG] = L5, | ||
244 | }; | 234 | }; |
245 | 235 | ||
246 | static void emit(const u32 insn, struct jit_ctx *ctx) | 236 | static void emit(const u32 insn, struct jit_ctx *ctx) |
@@ -800,25 +790,6 @@ static int emit_compare_and_branch(const u8 code, const u8 dst, u8 src, | |||
800 | return 0; | 790 | return 0; |
801 | } | 791 | } |
802 | 792 | ||
803 | static void load_skb_regs(struct jit_ctx *ctx, u8 r_skb) | ||
804 | { | ||
805 | const u8 r_headlen = bpf2sparc[SKB_HLEN_REG]; | ||
806 | const u8 r_data = bpf2sparc[SKB_DATA_REG]; | ||
807 | const u8 r_tmp = bpf2sparc[TMP_REG_1]; | ||
808 | unsigned int off; | ||
809 | |||
810 | off = offsetof(struct sk_buff, len); | ||
811 | emit(LD32I | RS1(r_skb) | S13(off) | RD(r_headlen), ctx); | ||
812 | |||
813 | off = offsetof(struct sk_buff, data_len); | ||
814 | emit(LD32I | RS1(r_skb) | S13(off) | RD(r_tmp), ctx); | ||
815 | |||
816 | emit(SUB | RS1(r_headlen) | RS2(r_tmp) | RD(r_headlen), ctx); | ||
817 | |||
818 | off = offsetof(struct sk_buff, data); | ||
819 | emit(LDPTRI | RS1(r_skb) | S13(off) | RD(r_data), ctx); | ||
820 | } | ||
821 | |||
822 | /* Just skip the save instruction and the ctx register move. */ | 793 | /* Just skip the save instruction and the ctx register move. */ |
823 | #define BPF_TAILCALL_PROLOGUE_SKIP 16 | 794 | #define BPF_TAILCALL_PROLOGUE_SKIP 16 |
824 | #define BPF_TAILCALL_CNT_SP_OFF (STACK_BIAS + 128) | 795 | #define BPF_TAILCALL_CNT_SP_OFF (STACK_BIAS + 128) |
@@ -857,9 +828,6 @@ static void build_prologue(struct jit_ctx *ctx) | |||
857 | 828 | ||
858 | emit_reg_move(I0, O0, ctx); | 829 | emit_reg_move(I0, O0, ctx); |
859 | /* If you add anything here, adjust BPF_TAILCALL_PROLOGUE_SKIP above. */ | 830 | /* If you add anything here, adjust BPF_TAILCALL_PROLOGUE_SKIP above. */ |
860 | |||
861 | if (ctx->saw_ld_abs_ind) | ||
862 | load_skb_regs(ctx, bpf2sparc[BPF_REG_1]); | ||
863 | } | 831 | } |
864 | 832 | ||
865 | static void build_epilogue(struct jit_ctx *ctx) | 833 | static void build_epilogue(struct jit_ctx *ctx) |
@@ -1225,16 +1193,11 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) | |||
1225 | u8 *func = ((u8 *)__bpf_call_base) + imm; | 1193 | u8 *func = ((u8 *)__bpf_call_base) + imm; |
1226 | 1194 | ||
1227 | ctx->saw_call = true; | 1195 | ctx->saw_call = true; |
1228 | if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func)) | ||
1229 | emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx); | ||
1230 | 1196 | ||
1231 | emit_call((u32 *)func, ctx); | 1197 | emit_call((u32 *)func, ctx); |
1232 | emit_nop(ctx); | 1198 | emit_nop(ctx); |
1233 | 1199 | ||
1234 | emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx); | 1200 | emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx); |
1235 | |||
1236 | if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func)) | ||
1237 | load_skb_regs(ctx, L7); | ||
1238 | break; | 1201 | break; |
1239 | } | 1202 | } |
1240 | 1203 | ||
@@ -1412,43 +1375,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) | |||
1412 | emit_nop(ctx); | 1375 | emit_nop(ctx); |
1413 | break; | 1376 | break; |
1414 | } | 1377 | } |
1415 | #define CHOOSE_LOAD_FUNC(K, func) \ | ||
1416 | ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) | ||
1417 | |||
1418 | /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */ | ||
1419 | case BPF_LD | BPF_ABS | BPF_W: | ||
1420 | func = CHOOSE_LOAD_FUNC(imm, bpf_jit_load_word); | ||
1421 | goto common_load; | ||
1422 | case BPF_LD | BPF_ABS | BPF_H: | ||
1423 | func = CHOOSE_LOAD_FUNC(imm, bpf_jit_load_half); | ||
1424 | goto common_load; | ||
1425 | case BPF_LD | BPF_ABS | BPF_B: | ||
1426 | func = CHOOSE_LOAD_FUNC(imm, bpf_jit_load_byte); | ||
1427 | goto common_load; | ||
1428 | /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */ | ||
1429 | case BPF_LD | BPF_IND | BPF_W: | ||
1430 | func = bpf_jit_load_word; | ||
1431 | goto common_load; | ||
1432 | case BPF_LD | BPF_IND | BPF_H: | ||
1433 | func = bpf_jit_load_half; | ||
1434 | goto common_load; | ||
1435 | |||
1436 | case BPF_LD | BPF_IND | BPF_B: | ||
1437 | func = bpf_jit_load_byte; | ||
1438 | common_load: | ||
1439 | ctx->saw_ld_abs_ind = true; | ||
1440 | |||
1441 | emit_reg_move(bpf2sparc[BPF_REG_6], O0, ctx); | ||
1442 | emit_loadimm(imm, O1, ctx); | ||
1443 | |||
1444 | if (BPF_MODE(code) == BPF_IND) | ||
1445 | emit_alu(ADD, src, O1, ctx); | ||
1446 | |||
1447 | emit_call(func, ctx); | ||
1448 | emit_alu_K(SRA, O1, 0, ctx); | ||
1449 | |||
1450 | emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx); | ||
1451 | break; | ||
1452 | 1378 | ||
1453 | default: | 1379 | default: |
1454 | pr_err_once("unknown opcode %02x\n", code); | 1380 | pr_err_once("unknown opcode %02x\n", code); |
@@ -1583,12 +1509,11 @@ skip_init_ctx: | |||
1583 | build_epilogue(&ctx); | 1509 | build_epilogue(&ctx); |
1584 | 1510 | ||
1585 | if (bpf_jit_enable > 1) | 1511 | if (bpf_jit_enable > 1) |
1586 | pr_info("Pass %d: shrink = %d, seen = [%c%c%c%c%c%c%c]\n", pass, | 1512 | pr_info("Pass %d: shrink = %d, seen = [%c%c%c%c%c%c]\n", pass, |
1587 | image_size - (ctx.idx * 4), | 1513 | image_size - (ctx.idx * 4), |
1588 | ctx.tmp_1_used ? '1' : ' ', | 1514 | ctx.tmp_1_used ? '1' : ' ', |
1589 | ctx.tmp_2_used ? '2' : ' ', | 1515 | ctx.tmp_2_used ? '2' : ' ', |
1590 | ctx.tmp_3_used ? '3' : ' ', | 1516 | ctx.tmp_3_used ? '3' : ' ', |
1591 | ctx.saw_ld_abs_ind ? 'L' : ' ', | ||
1592 | ctx.saw_frame_pointer ? 'F' : ' ', | 1517 | ctx.saw_frame_pointer ? 'F' : ' ', |
1593 | ctx.saw_call ? 'C' : ' ', | 1518 | ctx.saw_call ? 'C' : ' ', |
1594 | ctx.saw_tail_call ? 'T' : ' '); | 1519 | ctx.saw_tail_call ? 'T' : ' '); |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c07f492b871a..d51a71dcbac2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -138,7 +138,7 @@ config X86 | |||
138 | select HAVE_DMA_CONTIGUOUS | 138 | select HAVE_DMA_CONTIGUOUS |
139 | select HAVE_DYNAMIC_FTRACE | 139 | select HAVE_DYNAMIC_FTRACE |
140 | select HAVE_DYNAMIC_FTRACE_WITH_REGS | 140 | select HAVE_DYNAMIC_FTRACE_WITH_REGS |
141 | select HAVE_EBPF_JIT if X86_64 | 141 | select HAVE_EBPF_JIT |
142 | select HAVE_EFFICIENT_UNALIGNED_ACCESS | 142 | select HAVE_EFFICIENT_UNALIGNED_ACCESS |
143 | select HAVE_EXIT_THREAD | 143 | select HAVE_EXIT_THREAD |
144 | select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE | 144 | select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE |
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index f928ad9b143f..2cd344d1a6e5 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h | |||
@@ -291,16 +291,20 @@ do { \ | |||
291 | * lfence | 291 | * lfence |
292 | * jmp spec_trap | 292 | * jmp spec_trap |
293 | * do_rop: | 293 | * do_rop: |
294 | * mov %rax,(%rsp) | 294 | * mov %rax,(%rsp) for x86_64 |
295 | * mov %edx,(%esp) for x86_32 | ||
295 | * retq | 296 | * retq |
296 | * | 297 | * |
297 | * Without retpolines configured: | 298 | * Without retpolines configured: |
298 | * | 299 | * |
299 | * jmp *%rax | 300 | * jmp *%rax for x86_64 |
301 | * jmp *%edx for x86_32 | ||
300 | */ | 302 | */ |
301 | #ifdef CONFIG_RETPOLINE | 303 | #ifdef CONFIG_RETPOLINE |
304 | #ifdef CONFIG_X86_64 | ||
302 | # define RETPOLINE_RAX_BPF_JIT_SIZE 17 | 305 | # define RETPOLINE_RAX_BPF_JIT_SIZE 17 |
303 | # define RETPOLINE_RAX_BPF_JIT() \ | 306 | # define RETPOLINE_RAX_BPF_JIT() \ |
307 | do { \ | ||
304 | EMIT1_off32(0xE8, 7); /* callq do_rop */ \ | 308 | EMIT1_off32(0xE8, 7); /* callq do_rop */ \ |
305 | /* spec_trap: */ \ | 309 | /* spec_trap: */ \ |
306 | EMIT2(0xF3, 0x90); /* pause */ \ | 310 | EMIT2(0xF3, 0x90); /* pause */ \ |
@@ -308,11 +312,31 @@ do { \ | |||
308 | EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ | 312 | EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ |
309 | /* do_rop: */ \ | 313 | /* do_rop: */ \ |
310 | EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \ | 314 | EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \ |
311 | EMIT1(0xC3); /* retq */ | 315 | EMIT1(0xC3); /* retq */ \ |
316 | } while (0) | ||
312 | #else | 317 | #else |
318 | # define RETPOLINE_EDX_BPF_JIT() \ | ||
319 | do { \ | ||
320 | EMIT1_off32(0xE8, 7); /* call do_rop */ \ | ||
321 | /* spec_trap: */ \ | ||
322 | EMIT2(0xF3, 0x90); /* pause */ \ | ||
323 | EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \ | ||
324 | EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ | ||
325 | /* do_rop: */ \ | ||
326 | EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \ | ||
327 | EMIT1(0xC3); /* ret */ \ | ||
328 | } while (0) | ||
329 | #endif | ||
330 | #else /* !CONFIG_RETPOLINE */ | ||
331 | |||
332 | #ifdef CONFIG_X86_64 | ||
313 | # define RETPOLINE_RAX_BPF_JIT_SIZE 2 | 333 | # define RETPOLINE_RAX_BPF_JIT_SIZE 2 |
314 | # define RETPOLINE_RAX_BPF_JIT() \ | 334 | # define RETPOLINE_RAX_BPF_JIT() \ |
315 | EMIT2(0xFF, 0xE0); /* jmp *%rax */ | 335 | EMIT2(0xFF, 0xE0); /* jmp *%rax */ |
336 | #else | ||
337 | # define RETPOLINE_EDX_BPF_JIT() \ | ||
338 | EMIT2(0xFF, 0xE2) /* jmp *%edx */ | ||
339 | #endif | ||
316 | #endif | 340 | #endif |
317 | 341 | ||
318 | #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ | 342 | #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ |
diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile index fefb4b619598..59e123da580c 100644 --- a/arch/x86/net/Makefile +++ b/arch/x86/net/Makefile | |||
@@ -1,6 +1,9 @@ | |||
1 | # | 1 | # |
2 | # Arch-specific network modules | 2 | # Arch-specific network modules |
3 | # | 3 | # |
4 | OBJECT_FILES_NON_STANDARD_bpf_jit.o += y | ||
5 | 4 | ||
6 | obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o | 5 | ifeq ($(CONFIG_X86_32),y) |
6 | obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o | ||
7 | else | ||
8 | obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o | ||
9 | endif | ||
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S deleted file mode 100644 index b33093f84528..000000000000 --- a/arch/x86/net/bpf_jit.S +++ /dev/null | |||
@@ -1,154 +0,0 @@ | |||
1 | /* bpf_jit.S : BPF JIT helper functions | ||
2 | * | ||
3 | * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com) | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License | ||
7 | * as published by the Free Software Foundation; version 2 | ||
8 | * of the License. | ||
9 | */ | ||
10 | #include <linux/linkage.h> | ||
11 | #include <asm/frame.h> | ||
12 | |||
13 | /* | ||
14 | * Calling convention : | ||
15 | * rbx : skb pointer (callee saved) | ||
16 | * esi : offset of byte(s) to fetch in skb (can be scratched) | ||
17 | * r10 : copy of skb->data | ||
18 | * r9d : hlen = skb->len - skb->data_len | ||
19 | */ | ||
20 | #define SKBDATA %r10 | ||
21 | #define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */ | ||
22 | |||
23 | #define FUNC(name) \ | ||
24 | .globl name; \ | ||
25 | .type name, @function; \ | ||
26 | name: | ||
27 | |||
28 | FUNC(sk_load_word) | ||
29 | test %esi,%esi | ||
30 | js bpf_slow_path_word_neg | ||
31 | |||
32 | FUNC(sk_load_word_positive_offset) | ||
33 | mov %r9d,%eax # hlen | ||
34 | sub %esi,%eax # hlen - offset | ||
35 | cmp $3,%eax | ||
36 | jle bpf_slow_path_word | ||
37 | mov (SKBDATA,%rsi),%eax | ||
38 | bswap %eax /* ntohl() */ | ||
39 | ret | ||
40 | |||
41 | FUNC(sk_load_half) | ||
42 | test %esi,%esi | ||
43 | js bpf_slow_path_half_neg | ||
44 | |||
45 | FUNC(sk_load_half_positive_offset) | ||
46 | mov %r9d,%eax | ||
47 | sub %esi,%eax # hlen - offset | ||
48 | cmp $1,%eax | ||
49 | jle bpf_slow_path_half | ||
50 | movzwl (SKBDATA,%rsi),%eax | ||
51 | rol $8,%ax # ntohs() | ||
52 | ret | ||
53 | |||
54 | FUNC(sk_load_byte) | ||
55 | test %esi,%esi | ||
56 | js bpf_slow_path_byte_neg | ||
57 | |||
58 | FUNC(sk_load_byte_positive_offset) | ||
59 | cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */ | ||
60 | jle bpf_slow_path_byte | ||
61 | movzbl (SKBDATA,%rsi),%eax | ||
62 | ret | ||
63 | |||
64 | /* rsi contains offset and can be scratched */ | ||
65 | #define bpf_slow_path_common(LEN) \ | ||
66 | lea 32(%rbp), %rdx;\ | ||
67 | FRAME_BEGIN; \ | ||
68 | mov %rbx, %rdi; /* arg1 == skb */ \ | ||
69 | push %r9; \ | ||
70 | push SKBDATA; \ | ||
71 | /* rsi already has offset */ \ | ||
72 | mov $LEN,%ecx; /* len */ \ | ||
73 | call skb_copy_bits; \ | ||
74 | test %eax,%eax; \ | ||
75 | pop SKBDATA; \ | ||
76 | pop %r9; \ | ||
77 | FRAME_END | ||
78 | |||
79 | |||
80 | bpf_slow_path_word: | ||
81 | bpf_slow_path_common(4) | ||
82 | js bpf_error | ||
83 | mov 32(%rbp),%eax | ||
84 | bswap %eax | ||
85 | ret | ||
86 | |||
87 | bpf_slow_path_half: | ||
88 | bpf_slow_path_common(2) | ||
89 | js bpf_error | ||
90 | mov 32(%rbp),%ax | ||
91 | rol $8,%ax | ||
92 | movzwl %ax,%eax | ||
93 | ret | ||
94 | |||
95 | bpf_slow_path_byte: | ||
96 | bpf_slow_path_common(1) | ||
97 | js bpf_error | ||
98 | movzbl 32(%rbp),%eax | ||
99 | ret | ||
100 | |||
101 | #define sk_negative_common(SIZE) \ | ||
102 | FRAME_BEGIN; \ | ||
103 | mov %rbx, %rdi; /* arg1 == skb */ \ | ||
104 | push %r9; \ | ||
105 | push SKBDATA; \ | ||
106 | /* rsi already has offset */ \ | ||
107 | mov $SIZE,%edx; /* size */ \ | ||
108 | call bpf_internal_load_pointer_neg_helper; \ | ||
109 | test %rax,%rax; \ | ||
110 | pop SKBDATA; \ | ||
111 | pop %r9; \ | ||
112 | FRAME_END; \ | ||
113 | jz bpf_error | ||
114 | |||
115 | bpf_slow_path_word_neg: | ||
116 | cmp SKF_MAX_NEG_OFF, %esi /* test range */ | ||
117 | jl bpf_error /* offset lower -> error */ | ||
118 | |||
119 | FUNC(sk_load_word_negative_offset) | ||
120 | sk_negative_common(4) | ||
121 | mov (%rax), %eax | ||
122 | bswap %eax | ||
123 | ret | ||
124 | |||
125 | bpf_slow_path_half_neg: | ||
126 | cmp SKF_MAX_NEG_OFF, %esi | ||
127 | jl bpf_error | ||
128 | |||
129 | FUNC(sk_load_half_negative_offset) | ||
130 | sk_negative_common(2) | ||
131 | mov (%rax),%ax | ||
132 | rol $8,%ax | ||
133 | movzwl %ax,%eax | ||
134 | ret | ||
135 | |||
136 | bpf_slow_path_byte_neg: | ||
137 | cmp SKF_MAX_NEG_OFF, %esi | ||
138 | jl bpf_error | ||
139 | |||
140 | FUNC(sk_load_byte_negative_offset) | ||
141 | sk_negative_common(1) | ||
142 | movzbl (%rax), %eax | ||
143 | ret | ||
144 | |||
145 | bpf_error: | ||
146 | # force a return 0 from jit handler | ||
147 | xor %eax,%eax | ||
148 | mov (%rbp),%rbx | ||
149 | mov 8(%rbp),%r13 | ||
150 | mov 16(%rbp),%r14 | ||
151 | mov 24(%rbp),%r15 | ||
152 | add $40, %rbp | ||
153 | leaveq | ||
154 | ret | ||
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 263c8453815e..8fca446aaef6 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c | |||
@@ -1,4 +1,5 @@ | |||
1 | /* bpf_jit_comp.c : BPF JIT compiler | 1 | /* |
2 | * bpf_jit_comp.c: BPF JIT compiler | ||
2 | * | 3 | * |
3 | * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com) | 4 | * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com) |
4 | * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com | 5 | * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com |
@@ -16,15 +17,6 @@ | |||
16 | #include <asm/set_memory.h> | 17 | #include <asm/set_memory.h> |
17 | #include <asm/nospec-branch.h> | 18 | #include <asm/nospec-branch.h> |
18 | 19 | ||
19 | /* | ||
20 | * assembly code in arch/x86/net/bpf_jit.S | ||
21 | */ | ||
22 | extern u8 sk_load_word[], sk_load_half[], sk_load_byte[]; | ||
23 | extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[]; | ||
24 | extern u8 sk_load_byte_positive_offset[]; | ||
25 | extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[]; | ||
26 | extern u8 sk_load_byte_negative_offset[]; | ||
27 | |||
28 | static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) | 20 | static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) |
29 | { | 21 | { |
30 | if (len == 1) | 22 | if (len == 1) |
@@ -45,14 +37,15 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) | |||
45 | #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) | 37 | #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) |
46 | #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) | 38 | #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) |
47 | #define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) | 39 | #define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) |
40 | |||
48 | #define EMIT1_off32(b1, off) \ | 41 | #define EMIT1_off32(b1, off) \ |
49 | do {EMIT1(b1); EMIT(off, 4); } while (0) | 42 | do { EMIT1(b1); EMIT(off, 4); } while (0) |
50 | #define EMIT2_off32(b1, b2, off) \ | 43 | #define EMIT2_off32(b1, b2, off) \ |
51 | do {EMIT2(b1, b2); EMIT(off, 4); } while (0) | 44 | do { EMIT2(b1, b2); EMIT(off, 4); } while (0) |
52 | #define EMIT3_off32(b1, b2, b3, off) \ | 45 | #define EMIT3_off32(b1, b2, b3, off) \ |
53 | do {EMIT3(b1, b2, b3); EMIT(off, 4); } while (0) | 46 | do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0) |
54 | #define EMIT4_off32(b1, b2, b3, b4, off) \ | 47 | #define EMIT4_off32(b1, b2, b3, b4, off) \ |
55 | do {EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) | 48 | do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) |
56 | 49 | ||
57 | static bool is_imm8(int value) | 50 | static bool is_imm8(int value) |
58 | { | 51 | { |
@@ -70,9 +63,10 @@ static bool is_uimm32(u64 value) | |||
70 | } | 63 | } |
71 | 64 | ||
72 | /* mov dst, src */ | 65 | /* mov dst, src */ |
73 | #define EMIT_mov(DST, SRC) \ | 66 | #define EMIT_mov(DST, SRC) \ |
74 | do {if (DST != SRC) \ | 67 | do { \ |
75 | EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \ | 68 | if (DST != SRC) \ |
69 | EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \ | ||
76 | } while (0) | 70 | } while (0) |
77 | 71 | ||
78 | static int bpf_size_to_x86_bytes(int bpf_size) | 72 | static int bpf_size_to_x86_bytes(int bpf_size) |
@@ -89,7 +83,8 @@ static int bpf_size_to_x86_bytes(int bpf_size) | |||
89 | return 0; | 83 | return 0; |
90 | } | 84 | } |
91 | 85 | ||
92 | /* list of x86 cond jumps opcodes (. + s8) | 86 | /* |
87 | * List of x86 cond jumps opcodes (. + s8) | ||
93 | * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) | 88 | * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) |
94 | */ | 89 | */ |
95 | #define X86_JB 0x72 | 90 | #define X86_JB 0x72 |
@@ -103,38 +98,37 @@ static int bpf_size_to_x86_bytes(int bpf_size) | |||
103 | #define X86_JLE 0x7E | 98 | #define X86_JLE 0x7E |
104 | #define X86_JG 0x7F | 99 | #define X86_JG 0x7F |
105 | 100 | ||
106 | #define CHOOSE_LOAD_FUNC(K, func) \ | 101 | /* Pick a register outside of BPF range for JIT internal work */ |
107 | ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) | ||
108 | |||
109 | /* pick a register outside of BPF range for JIT internal work */ | ||
110 | #define AUX_REG (MAX_BPF_JIT_REG + 1) | 102 | #define AUX_REG (MAX_BPF_JIT_REG + 1) |
111 | 103 | ||
112 | /* The following table maps BPF registers to x64 registers. | 104 | /* |
105 | * The following table maps BPF registers to x86-64 registers. | ||
113 | * | 106 | * |
114 | * x64 register r12 is unused, since if used as base address | 107 | * x86-64 register R12 is unused, since if used as base address |
115 | * register in load/store instructions, it always needs an | 108 | * register in load/store instructions, it always needs an |
116 | * extra byte of encoding and is callee saved. | 109 | * extra byte of encoding and is callee saved. |
117 | * | 110 | * |
118 | * r9 caches skb->len - skb->data_len | 111 | * Also x86-64 register R9 is unused. x86-64 register R10 is |
119 | * r10 caches skb->data, and used for blinding (if enabled) | 112 | * used for blinding (if enabled). |
120 | */ | 113 | */ |
121 | static const int reg2hex[] = { | 114 | static const int reg2hex[] = { |
122 | [BPF_REG_0] = 0, /* rax */ | 115 | [BPF_REG_0] = 0, /* RAX */ |
123 | [BPF_REG_1] = 7, /* rdi */ | 116 | [BPF_REG_1] = 7, /* RDI */ |
124 | [BPF_REG_2] = 6, /* rsi */ | 117 | [BPF_REG_2] = 6, /* RSI */ |
125 | [BPF_REG_3] = 2, /* rdx */ | 118 | [BPF_REG_3] = 2, /* RDX */ |
126 | [BPF_REG_4] = 1, /* rcx */ | 119 | [BPF_REG_4] = 1, /* RCX */ |
127 | [BPF_REG_5] = 0, /* r8 */ | 120 | [BPF_REG_5] = 0, /* R8 */ |
128 | [BPF_REG_6] = 3, /* rbx callee saved */ | 121 | [BPF_REG_6] = 3, /* RBX callee saved */ |
129 | [BPF_REG_7] = 5, /* r13 callee saved */ | 122 | [BPF_REG_7] = 5, /* R13 callee saved */ |
130 | [BPF_REG_8] = 6, /* r14 callee saved */ | 123 | [BPF_REG_8] = 6, /* R14 callee saved */ |
131 | [BPF_REG_9] = 7, /* r15 callee saved */ | 124 | [BPF_REG_9] = 7, /* R15 callee saved */ |
132 | [BPF_REG_FP] = 5, /* rbp readonly */ | 125 | [BPF_REG_FP] = 5, /* RBP readonly */ |
133 | [BPF_REG_AX] = 2, /* r10 temp register */ | 126 | [BPF_REG_AX] = 2, /* R10 temp register */ |
134 | [AUX_REG] = 3, /* r11 temp register */ | 127 | [AUX_REG] = 3, /* R11 temp register */ |
135 | }; | 128 | }; |
136 | 129 | ||
137 | /* is_ereg() == true if BPF register 'reg' maps to x64 r8..r15 | 130 | /* |
131 | * is_ereg() == true if BPF register 'reg' maps to x86-64 r8..r15 | ||
138 | * which need extra byte of encoding. | 132 | * which need extra byte of encoding. |
139 | * rax,rcx,...,rbp have simpler encoding | 133 | * rax,rcx,...,rbp have simpler encoding |
140 | */ | 134 | */ |
@@ -153,7 +147,7 @@ static bool is_axreg(u32 reg) | |||
153 | return reg == BPF_REG_0; | 147 | return reg == BPF_REG_0; |
154 | } | 148 | } |
155 | 149 | ||
156 | /* add modifiers if 'reg' maps to x64 registers r8..r15 */ | 150 | /* Add modifiers if 'reg' maps to x86-64 registers R8..R15 */ |
157 | static u8 add_1mod(u8 byte, u32 reg) | 151 | static u8 add_1mod(u8 byte, u32 reg) |
158 | { | 152 | { |
159 | if (is_ereg(reg)) | 153 | if (is_ereg(reg)) |
@@ -170,13 +164,13 @@ static u8 add_2mod(u8 byte, u32 r1, u32 r2) | |||
170 | return byte; | 164 | return byte; |
171 | } | 165 | } |
172 | 166 | ||
173 | /* encode 'dst_reg' register into x64 opcode 'byte' */ | 167 | /* Encode 'dst_reg' register into x86-64 opcode 'byte' */ |
174 | static u8 add_1reg(u8 byte, u32 dst_reg) | 168 | static u8 add_1reg(u8 byte, u32 dst_reg) |
175 | { | 169 | { |
176 | return byte + reg2hex[dst_reg]; | 170 | return byte + reg2hex[dst_reg]; |
177 | } | 171 | } |
178 | 172 | ||
179 | /* encode 'dst_reg' and 'src_reg' registers into x64 opcode 'byte' */ | 173 | /* Encode 'dst_reg' and 'src_reg' registers into x86-64 opcode 'byte' */ |
180 | static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) | 174 | static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) |
181 | { | 175 | { |
182 | return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3); | 176 | return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3); |
@@ -184,27 +178,24 @@ static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) | |||
184 | 178 | ||
185 | static void jit_fill_hole(void *area, unsigned int size) | 179 | static void jit_fill_hole(void *area, unsigned int size) |
186 | { | 180 | { |
187 | /* fill whole space with int3 instructions */ | 181 | /* Fill whole space with INT3 instructions */ |
188 | memset(area, 0xcc, size); | 182 | memset(area, 0xcc, size); |
189 | } | 183 | } |
190 | 184 | ||
191 | struct jit_context { | 185 | struct jit_context { |
192 | int cleanup_addr; /* epilogue code offset */ | 186 | int cleanup_addr; /* Epilogue code offset */ |
193 | bool seen_ld_abs; | ||
194 | bool seen_ax_reg; | ||
195 | }; | 187 | }; |
196 | 188 | ||
197 | /* maximum number of bytes emitted while JITing one eBPF insn */ | 189 | /* Maximum number of bytes emitted while JITing one eBPF insn */ |
198 | #define BPF_MAX_INSN_SIZE 128 | 190 | #define BPF_MAX_INSN_SIZE 128 |
199 | #define BPF_INSN_SAFETY 64 | 191 | #define BPF_INSN_SAFETY 64 |
200 | 192 | ||
201 | #define AUX_STACK_SPACE \ | 193 | #define AUX_STACK_SPACE 40 /* Space for RBX, R13, R14, R15, tailcnt */ |
202 | (32 /* space for rbx, r13, r14, r15 */ + \ | ||
203 | 8 /* space for skb_copy_bits() buffer */) | ||
204 | 194 | ||
205 | #define PROLOGUE_SIZE 37 | 195 | #define PROLOGUE_SIZE 37 |
206 | 196 | ||
207 | /* emit x64 prologue code for BPF program and check it's size. | 197 | /* |
198 | * Emit x86-64 prologue code for BPF program and check its size. | ||
208 | * bpf_tail_call helper will skip it while jumping into another program | 199 | * bpf_tail_call helper will skip it while jumping into another program |
209 | */ | 200 | */ |
210 | static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) | 201 | static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) |
@@ -212,8 +203,11 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) | |||
212 | u8 *prog = *pprog; | 203 | u8 *prog = *pprog; |
213 | int cnt = 0; | 204 | int cnt = 0; |
214 | 205 | ||
215 | EMIT1(0x55); /* push rbp */ | 206 | /* push rbp */ |
216 | EMIT3(0x48, 0x89, 0xE5); /* mov rbp,rsp */ | 207 | EMIT1(0x55); |
208 | |||
209 | /* mov rbp,rsp */ | ||
210 | EMIT3(0x48, 0x89, 0xE5); | ||
217 | 211 | ||
218 | /* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */ | 212 | /* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */ |
219 | EMIT3_off32(0x48, 0x81, 0xEC, | 213 | EMIT3_off32(0x48, 0x81, 0xEC, |
@@ -222,19 +216,8 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) | |||
222 | /* sub rbp, AUX_STACK_SPACE */ | 216 | /* sub rbp, AUX_STACK_SPACE */ |
223 | EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE); | 217 | EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE); |
224 | 218 | ||
225 | /* all classic BPF filters use R6(rbx) save it */ | ||
226 | |||
227 | /* mov qword ptr [rbp+0],rbx */ | 219 | /* mov qword ptr [rbp+0],rbx */ |
228 | EMIT4(0x48, 0x89, 0x5D, 0); | 220 | EMIT4(0x48, 0x89, 0x5D, 0); |
229 | |||
230 | /* bpf_convert_filter() maps classic BPF register X to R7 and uses R8 | ||
231 | * as temporary, so all tcpdump filters need to spill/fill R7(r13) and | ||
232 | * R8(r14). R9(r15) spill could be made conditional, but there is only | ||
233 | * one 'bpf_error' return path out of helper functions inside bpf_jit.S | ||
234 | * The overhead of extra spill is negligible for any filter other | ||
235 | * than synthetic ones. Therefore not worth adding complexity. | ||
236 | */ | ||
237 | |||
238 | /* mov qword ptr [rbp+8],r13 */ | 221 | /* mov qword ptr [rbp+8],r13 */ |
239 | EMIT4(0x4C, 0x89, 0x6D, 8); | 222 | EMIT4(0x4C, 0x89, 0x6D, 8); |
240 | /* mov qword ptr [rbp+16],r14 */ | 223 | /* mov qword ptr [rbp+16],r14 */ |
@@ -243,9 +226,10 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) | |||
243 | EMIT4(0x4C, 0x89, 0x7D, 24); | 226 | EMIT4(0x4C, 0x89, 0x7D, 24); |
244 | 227 | ||
245 | if (!ebpf_from_cbpf) { | 228 | if (!ebpf_from_cbpf) { |
246 | /* Clear the tail call counter (tail_call_cnt): for eBPF tail | 229 | /* |
230 | * Clear the tail call counter (tail_call_cnt): for eBPF tail | ||
247 | * calls we need to reset the counter to 0. It's done in two | 231 | * calls we need to reset the counter to 0. It's done in two |
248 | * instructions, resetting rax register to 0, and moving it | 232 | * instructions, resetting RAX register to 0, and moving it |
249 | * to the counter location. | 233 | * to the counter location. |
250 | */ | 234 | */ |
251 | 235 | ||
@@ -260,7 +244,9 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) | |||
260 | *pprog = prog; | 244 | *pprog = prog; |
261 | } | 245 | } |
262 | 246 | ||
263 | /* generate the following code: | 247 | /* |
248 | * Generate the following code: | ||
249 | * | ||
264 | * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... | 250 | * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... |
265 | * if (index >= array->map.max_entries) | 251 | * if (index >= array->map.max_entries) |
266 | * goto out; | 252 | * goto out; |
@@ -278,23 +264,26 @@ static void emit_bpf_tail_call(u8 **pprog) | |||
278 | int label1, label2, label3; | 264 | int label1, label2, label3; |
279 | int cnt = 0; | 265 | int cnt = 0; |
280 | 266 | ||
281 | /* rdi - pointer to ctx | 267 | /* |
268 | * rdi - pointer to ctx | ||
282 | * rsi - pointer to bpf_array | 269 | * rsi - pointer to bpf_array |
283 | * rdx - index in bpf_array | 270 | * rdx - index in bpf_array |
284 | */ | 271 | */ |
285 | 272 | ||
286 | /* if (index >= array->map.max_entries) | 273 | /* |
287 | * goto out; | 274 | * if (index >= array->map.max_entries) |
275 | * goto out; | ||
288 | */ | 276 | */ |
289 | EMIT2(0x89, 0xD2); /* mov edx, edx */ | 277 | EMIT2(0x89, 0xD2); /* mov edx, edx */ |
290 | EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ | 278 | EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ |
291 | offsetof(struct bpf_array, map.max_entries)); | 279 | offsetof(struct bpf_array, map.max_entries)); |
292 | #define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */ | 280 | #define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* Number of bytes to jump */ |
293 | EMIT2(X86_JBE, OFFSET1); /* jbe out */ | 281 | EMIT2(X86_JBE, OFFSET1); /* jbe out */ |
294 | label1 = cnt; | 282 | label1 = cnt; |
295 | 283 | ||
296 | /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) | 284 | /* |
297 | * goto out; | 285 | * if (tail_call_cnt > MAX_TAIL_CALL_CNT) |
286 | * goto out; | ||
298 | */ | 287 | */ |
299 | EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */ | 288 | EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */ |
300 | EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ | 289 | EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ |
@@ -308,8 +297,9 @@ static void emit_bpf_tail_call(u8 **pprog) | |||
308 | EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */ | 297 | EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */ |
309 | offsetof(struct bpf_array, ptrs)); | 298 | offsetof(struct bpf_array, ptrs)); |
310 | 299 | ||
311 | /* if (prog == NULL) | 300 | /* |
312 | * goto out; | 301 | * if (prog == NULL) |
302 | * goto out; | ||
313 | */ | 303 | */ |
314 | EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ | 304 | EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ |
315 | #define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE) | 305 | #define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE) |
@@ -321,7 +311,8 @@ static void emit_bpf_tail_call(u8 **pprog) | |||
321 | offsetof(struct bpf_prog, bpf_func)); | 311 | offsetof(struct bpf_prog, bpf_func)); |
322 | EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE); /* add rax, prologue_size */ | 312 | EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE); /* add rax, prologue_size */ |
323 | 313 | ||
324 | /* now we're ready to jump into next BPF program | 314 | /* |
315 | * Wow we're ready to jump into next BPF program | ||
325 | * rdi == ctx (1st arg) | 316 | * rdi == ctx (1st arg) |
326 | * rax == prog->bpf_func + prologue_size | 317 | * rax == prog->bpf_func + prologue_size |
327 | */ | 318 | */ |
@@ -334,26 +325,6 @@ static void emit_bpf_tail_call(u8 **pprog) | |||
334 | *pprog = prog; | 325 | *pprog = prog; |
335 | } | 326 | } |
336 | 327 | ||
337 | |||
338 | static void emit_load_skb_data_hlen(u8 **pprog) | ||
339 | { | ||
340 | u8 *prog = *pprog; | ||
341 | int cnt = 0; | ||
342 | |||
343 | /* r9d = skb->len - skb->data_len (headlen) | ||
344 | * r10 = skb->data | ||
345 | */ | ||
346 | /* mov %r9d, off32(%rdi) */ | ||
347 | EMIT3_off32(0x44, 0x8b, 0x8f, offsetof(struct sk_buff, len)); | ||
348 | |||
349 | /* sub %r9d, off32(%rdi) */ | ||
350 | EMIT3_off32(0x44, 0x2b, 0x8f, offsetof(struct sk_buff, data_len)); | ||
351 | |||
352 | /* mov %r10, off32(%rdi) */ | ||
353 | EMIT3_off32(0x4c, 0x8b, 0x97, offsetof(struct sk_buff, data)); | ||
354 | *pprog = prog; | ||
355 | } | ||
356 | |||
357 | static void emit_mov_imm32(u8 **pprog, bool sign_propagate, | 328 | static void emit_mov_imm32(u8 **pprog, bool sign_propagate, |
358 | u32 dst_reg, const u32 imm32) | 329 | u32 dst_reg, const u32 imm32) |
359 | { | 330 | { |
@@ -361,7 +332,8 @@ static void emit_mov_imm32(u8 **pprog, bool sign_propagate, | |||
361 | u8 b1, b2, b3; | 332 | u8 b1, b2, b3; |
362 | int cnt = 0; | 333 | int cnt = 0; |
363 | 334 | ||
364 | /* optimization: if imm32 is positive, use 'mov %eax, imm32' | 335 | /* |
336 | * Optimization: if imm32 is positive, use 'mov %eax, imm32' | ||
365 | * (which zero-extends imm32) to save 2 bytes. | 337 | * (which zero-extends imm32) to save 2 bytes. |
366 | */ | 338 | */ |
367 | if (sign_propagate && (s32)imm32 < 0) { | 339 | if (sign_propagate && (s32)imm32 < 0) { |
@@ -373,7 +345,8 @@ static void emit_mov_imm32(u8 **pprog, bool sign_propagate, | |||
373 | goto done; | 345 | goto done; |
374 | } | 346 | } |
375 | 347 | ||
376 | /* optimization: if imm32 is zero, use 'xor %eax, %eax' | 348 | /* |
349 | * Optimization: if imm32 is zero, use 'xor %eax, %eax' | ||
377 | * to save 3 bytes. | 350 | * to save 3 bytes. |
378 | */ | 351 | */ |
379 | if (imm32 == 0) { | 352 | if (imm32 == 0) { |
@@ -400,7 +373,8 @@ static void emit_mov_imm64(u8 **pprog, u32 dst_reg, | |||
400 | int cnt = 0; | 373 | int cnt = 0; |
401 | 374 | ||
402 | if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) { | 375 | if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) { |
403 | /* For emitting plain u32, where sign bit must not be | 376 | /* |
377 | * For emitting plain u32, where sign bit must not be | ||
404 | * propagated LLVM tends to load imm64 over mov32 | 378 | * propagated LLVM tends to load imm64 over mov32 |
405 | * directly, so save couple of bytes by just doing | 379 | * directly, so save couple of bytes by just doing |
406 | * 'mov %eax, imm32' instead. | 380 | * 'mov %eax, imm32' instead. |
@@ -439,8 +413,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
439 | { | 413 | { |
440 | struct bpf_insn *insn = bpf_prog->insnsi; | 414 | struct bpf_insn *insn = bpf_prog->insnsi; |
441 | int insn_cnt = bpf_prog->len; | 415 | int insn_cnt = bpf_prog->len; |
442 | bool seen_ld_abs = ctx->seen_ld_abs | (oldproglen == 0); | ||
443 | bool seen_ax_reg = ctx->seen_ax_reg | (oldproglen == 0); | ||
444 | bool seen_exit = false; | 416 | bool seen_exit = false; |
445 | u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; | 417 | u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; |
446 | int i, cnt = 0; | 418 | int i, cnt = 0; |
@@ -450,9 +422,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
450 | emit_prologue(&prog, bpf_prog->aux->stack_depth, | 422 | emit_prologue(&prog, bpf_prog->aux->stack_depth, |
451 | bpf_prog_was_classic(bpf_prog)); | 423 | bpf_prog_was_classic(bpf_prog)); |
452 | 424 | ||
453 | if (seen_ld_abs) | ||
454 | emit_load_skb_data_hlen(&prog); | ||
455 | |||
456 | for (i = 0; i < insn_cnt; i++, insn++) { | 425 | for (i = 0; i < insn_cnt; i++, insn++) { |
457 | const s32 imm32 = insn->imm; | 426 | const s32 imm32 = insn->imm; |
458 | u32 dst_reg = insn->dst_reg; | 427 | u32 dst_reg = insn->dst_reg; |
@@ -460,13 +429,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
460 | u8 b2 = 0, b3 = 0; | 429 | u8 b2 = 0, b3 = 0; |
461 | s64 jmp_offset; | 430 | s64 jmp_offset; |
462 | u8 jmp_cond; | 431 | u8 jmp_cond; |
463 | bool reload_skb_data; | ||
464 | int ilen; | 432 | int ilen; |
465 | u8 *func; | 433 | u8 *func; |
466 | 434 | ||
467 | if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX) | ||
468 | ctx->seen_ax_reg = seen_ax_reg = true; | ||
469 | |||
470 | switch (insn->code) { | 435 | switch (insn->code) { |
471 | /* ALU */ | 436 | /* ALU */ |
472 | case BPF_ALU | BPF_ADD | BPF_X: | 437 | case BPF_ALU | BPF_ADD | BPF_X: |
@@ -525,7 +490,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
525 | else if (is_ereg(dst_reg)) | 490 | else if (is_ereg(dst_reg)) |
526 | EMIT1(add_1mod(0x40, dst_reg)); | 491 | EMIT1(add_1mod(0x40, dst_reg)); |
527 | 492 | ||
528 | /* b3 holds 'normal' opcode, b2 short form only valid | 493 | /* |
494 | * b3 holds 'normal' opcode, b2 short form only valid | ||
529 | * in case dst is eax/rax. | 495 | * in case dst is eax/rax. |
530 | */ | 496 | */ |
531 | switch (BPF_OP(insn->code)) { | 497 | switch (BPF_OP(insn->code)) { |
@@ -593,7 +559,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
593 | /* mov rax, dst_reg */ | 559 | /* mov rax, dst_reg */ |
594 | EMIT_mov(BPF_REG_0, dst_reg); | 560 | EMIT_mov(BPF_REG_0, dst_reg); |
595 | 561 | ||
596 | /* xor edx, edx | 562 | /* |
563 | * xor edx, edx | ||
597 | * equivalent to 'xor rdx, rdx', but one byte less | 564 | * equivalent to 'xor rdx, rdx', but one byte less |
598 | */ | 565 | */ |
599 | EMIT2(0x31, 0xd2); | 566 | EMIT2(0x31, 0xd2); |
@@ -655,7 +622,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
655 | } | 622 | } |
656 | break; | 623 | break; |
657 | } | 624 | } |
658 | /* shifts */ | 625 | /* Shifts */ |
659 | case BPF_ALU | BPF_LSH | BPF_K: | 626 | case BPF_ALU | BPF_LSH | BPF_K: |
660 | case BPF_ALU | BPF_RSH | BPF_K: | 627 | case BPF_ALU | BPF_RSH | BPF_K: |
661 | case BPF_ALU | BPF_ARSH | BPF_K: | 628 | case BPF_ALU | BPF_ARSH | BPF_K: |
@@ -686,7 +653,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
686 | case BPF_ALU64 | BPF_RSH | BPF_X: | 653 | case BPF_ALU64 | BPF_RSH | BPF_X: |
687 | case BPF_ALU64 | BPF_ARSH | BPF_X: | 654 | case BPF_ALU64 | BPF_ARSH | BPF_X: |
688 | 655 | ||
689 | /* check for bad case when dst_reg == rcx */ | 656 | /* Check for bad case when dst_reg == rcx */ |
690 | if (dst_reg == BPF_REG_4) { | 657 | if (dst_reg == BPF_REG_4) { |
691 | /* mov r11, dst_reg */ | 658 | /* mov r11, dst_reg */ |
692 | EMIT_mov(AUX_REG, dst_reg); | 659 | EMIT_mov(AUX_REG, dst_reg); |
@@ -724,13 +691,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
724 | case BPF_ALU | BPF_END | BPF_FROM_BE: | 691 | case BPF_ALU | BPF_END | BPF_FROM_BE: |
725 | switch (imm32) { | 692 | switch (imm32) { |
726 | case 16: | 693 | case 16: |
727 | /* emit 'ror %ax, 8' to swap lower 2 bytes */ | 694 | /* Emit 'ror %ax, 8' to swap lower 2 bytes */ |
728 | EMIT1(0x66); | 695 | EMIT1(0x66); |
729 | if (is_ereg(dst_reg)) | 696 | if (is_ereg(dst_reg)) |
730 | EMIT1(0x41); | 697 | EMIT1(0x41); |
731 | EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8); | 698 | EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8); |
732 | 699 | ||
733 | /* emit 'movzwl eax, ax' */ | 700 | /* Emit 'movzwl eax, ax' */ |
734 | if (is_ereg(dst_reg)) | 701 | if (is_ereg(dst_reg)) |
735 | EMIT3(0x45, 0x0F, 0xB7); | 702 | EMIT3(0x45, 0x0F, 0xB7); |
736 | else | 703 | else |
@@ -738,7 +705,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
738 | EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); | 705 | EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); |
739 | break; | 706 | break; |
740 | case 32: | 707 | case 32: |
741 | /* emit 'bswap eax' to swap lower 4 bytes */ | 708 | /* Emit 'bswap eax' to swap lower 4 bytes */ |
742 | if (is_ereg(dst_reg)) | 709 | if (is_ereg(dst_reg)) |
743 | EMIT2(0x41, 0x0F); | 710 | EMIT2(0x41, 0x0F); |
744 | else | 711 | else |
@@ -746,7 +713,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
746 | EMIT1(add_1reg(0xC8, dst_reg)); | 713 | EMIT1(add_1reg(0xC8, dst_reg)); |
747 | break; | 714 | break; |
748 | case 64: | 715 | case 64: |
749 | /* emit 'bswap rax' to swap 8 bytes */ | 716 | /* Emit 'bswap rax' to swap 8 bytes */ |
750 | EMIT3(add_1mod(0x48, dst_reg), 0x0F, | 717 | EMIT3(add_1mod(0x48, dst_reg), 0x0F, |
751 | add_1reg(0xC8, dst_reg)); | 718 | add_1reg(0xC8, dst_reg)); |
752 | break; | 719 | break; |
@@ -756,7 +723,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
756 | case BPF_ALU | BPF_END | BPF_FROM_LE: | 723 | case BPF_ALU | BPF_END | BPF_FROM_LE: |
757 | switch (imm32) { | 724 | switch (imm32) { |
758 | case 16: | 725 | case 16: |
759 | /* emit 'movzwl eax, ax' to zero extend 16-bit | 726 | /* |
727 | * Emit 'movzwl eax, ax' to zero extend 16-bit | ||
760 | * into 64 bit | 728 | * into 64 bit |
761 | */ | 729 | */ |
762 | if (is_ereg(dst_reg)) | 730 | if (is_ereg(dst_reg)) |
@@ -766,7 +734,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
766 | EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); | 734 | EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); |
767 | break; | 735 | break; |
768 | case 32: | 736 | case 32: |
769 | /* emit 'mov eax, eax' to clear upper 32-bits */ | 737 | /* Emit 'mov eax, eax' to clear upper 32-bits */ |
770 | if (is_ereg(dst_reg)) | 738 | if (is_ereg(dst_reg)) |
771 | EMIT1(0x45); | 739 | EMIT1(0x45); |
772 | EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg)); | 740 | EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg)); |
@@ -809,9 +777,9 @@ st: if (is_imm8(insn->off)) | |||
809 | 777 | ||
810 | /* STX: *(u8*)(dst_reg + off) = src_reg */ | 778 | /* STX: *(u8*)(dst_reg + off) = src_reg */ |
811 | case BPF_STX | BPF_MEM | BPF_B: | 779 | case BPF_STX | BPF_MEM | BPF_B: |
812 | /* emit 'mov byte ptr [rax + off], al' */ | 780 | /* Emit 'mov byte ptr [rax + off], al' */ |
813 | if (is_ereg(dst_reg) || is_ereg(src_reg) || | 781 | if (is_ereg(dst_reg) || is_ereg(src_reg) || |
814 | /* have to add extra byte for x86 SIL, DIL regs */ | 782 | /* We have to add extra byte for x86 SIL, DIL regs */ |
815 | src_reg == BPF_REG_1 || src_reg == BPF_REG_2) | 783 | src_reg == BPF_REG_1 || src_reg == BPF_REG_2) |
816 | EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); | 784 | EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); |
817 | else | 785 | else |
@@ -840,25 +808,26 @@ stx: if (is_imm8(insn->off)) | |||
840 | 808 | ||
841 | /* LDX: dst_reg = *(u8*)(src_reg + off) */ | 809 | /* LDX: dst_reg = *(u8*)(src_reg + off) */ |
842 | case BPF_LDX | BPF_MEM | BPF_B: | 810 | case BPF_LDX | BPF_MEM | BPF_B: |
843 | /* emit 'movzx rax, byte ptr [rax + off]' */ | 811 | /* Emit 'movzx rax, byte ptr [rax + off]' */ |
844 | EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6); | 812 | EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6); |
845 | goto ldx; | 813 | goto ldx; |
846 | case BPF_LDX | BPF_MEM | BPF_H: | 814 | case BPF_LDX | BPF_MEM | BPF_H: |
847 | /* emit 'movzx rax, word ptr [rax + off]' */ | 815 | /* Emit 'movzx rax, word ptr [rax + off]' */ |
848 | EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7); | 816 | EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7); |
849 | goto ldx; | 817 | goto ldx; |
850 | case BPF_LDX | BPF_MEM | BPF_W: | 818 | case BPF_LDX | BPF_MEM | BPF_W: |
851 | /* emit 'mov eax, dword ptr [rax+0x14]' */ | 819 | /* Emit 'mov eax, dword ptr [rax+0x14]' */ |
852 | if (is_ereg(dst_reg) || is_ereg(src_reg)) | 820 | if (is_ereg(dst_reg) || is_ereg(src_reg)) |
853 | EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B); | 821 | EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B); |
854 | else | 822 | else |
855 | EMIT1(0x8B); | 823 | EMIT1(0x8B); |
856 | goto ldx; | 824 | goto ldx; |
857 | case BPF_LDX | BPF_MEM | BPF_DW: | 825 | case BPF_LDX | BPF_MEM | BPF_DW: |
858 | /* emit 'mov rax, qword ptr [rax+0x14]' */ | 826 | /* Emit 'mov rax, qword ptr [rax+0x14]' */ |
859 | EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); | 827 | EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); |
860 | ldx: /* if insn->off == 0 we can save one extra byte, but | 828 | ldx: /* |
861 | * special case of x86 r13 which always needs an offset | 829 | * If insn->off == 0 we can save one extra byte, but |
830 | * special case of x86 R13 which always needs an offset | ||
862 | * is not worth the hassle | 831 | * is not worth the hassle |
863 | */ | 832 | */ |
864 | if (is_imm8(insn->off)) | 833 | if (is_imm8(insn->off)) |
@@ -870,7 +839,7 @@ ldx: /* if insn->off == 0 we can save one extra byte, but | |||
870 | 839 | ||
871 | /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */ | 840 | /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */ |
872 | case BPF_STX | BPF_XADD | BPF_W: | 841 | case BPF_STX | BPF_XADD | BPF_W: |
873 | /* emit 'lock add dword ptr [rax + off], eax' */ | 842 | /* Emit 'lock add dword ptr [rax + off], eax' */ |
874 | if (is_ereg(dst_reg) || is_ereg(src_reg)) | 843 | if (is_ereg(dst_reg) || is_ereg(src_reg)) |
875 | EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01); | 844 | EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01); |
876 | else | 845 | else |
@@ -889,35 +858,12 @@ xadd: if (is_imm8(insn->off)) | |||
889 | case BPF_JMP | BPF_CALL: | 858 | case BPF_JMP | BPF_CALL: |
890 | func = (u8 *) __bpf_call_base + imm32; | 859 | func = (u8 *) __bpf_call_base + imm32; |
891 | jmp_offset = func - (image + addrs[i]); | 860 | jmp_offset = func - (image + addrs[i]); |
892 | if (seen_ld_abs) { | ||
893 | reload_skb_data = bpf_helper_changes_pkt_data(func); | ||
894 | if (reload_skb_data) { | ||
895 | EMIT1(0x57); /* push %rdi */ | ||
896 | jmp_offset += 22; /* pop, mov, sub, mov */ | ||
897 | } else { | ||
898 | EMIT2(0x41, 0x52); /* push %r10 */ | ||
899 | EMIT2(0x41, 0x51); /* push %r9 */ | ||
900 | /* need to adjust jmp offset, since | ||
901 | * pop %r9, pop %r10 take 4 bytes after call insn | ||
902 | */ | ||
903 | jmp_offset += 4; | ||
904 | } | ||
905 | } | ||
906 | if (!imm32 || !is_simm32(jmp_offset)) { | 861 | if (!imm32 || !is_simm32(jmp_offset)) { |
907 | pr_err("unsupported bpf func %d addr %p image %p\n", | 862 | pr_err("unsupported BPF func %d addr %p image %p\n", |
908 | imm32, func, image); | 863 | imm32, func, image); |
909 | return -EINVAL; | 864 | return -EINVAL; |
910 | } | 865 | } |
911 | EMIT1_off32(0xE8, jmp_offset); | 866 | EMIT1_off32(0xE8, jmp_offset); |
912 | if (seen_ld_abs) { | ||
913 | if (reload_skb_data) { | ||
914 | EMIT1(0x5F); /* pop %rdi */ | ||
915 | emit_load_skb_data_hlen(&prog); | ||
916 | } else { | ||
917 | EMIT2(0x41, 0x59); /* pop %r9 */ | ||
918 | EMIT2(0x41, 0x5A); /* pop %r10 */ | ||
919 | } | ||
920 | } | ||
921 | break; | 867 | break; |
922 | 868 | ||
923 | case BPF_JMP | BPF_TAIL_CALL: | 869 | case BPF_JMP | BPF_TAIL_CALL: |
@@ -970,7 +916,7 @@ xadd: if (is_imm8(insn->off)) | |||
970 | else | 916 | else |
971 | EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32); | 917 | EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32); |
972 | 918 | ||
973 | emit_cond_jmp: /* convert BPF opcode to x86 */ | 919 | emit_cond_jmp: /* Convert BPF opcode to x86 */ |
974 | switch (BPF_OP(insn->code)) { | 920 | switch (BPF_OP(insn->code)) { |
975 | case BPF_JEQ: | 921 | case BPF_JEQ: |
976 | jmp_cond = X86_JE; | 922 | jmp_cond = X86_JE; |
@@ -996,22 +942,22 @@ emit_cond_jmp: /* convert BPF opcode to x86 */ | |||
996 | jmp_cond = X86_JBE; | 942 | jmp_cond = X86_JBE; |
997 | break; | 943 | break; |
998 | case BPF_JSGT: | 944 | case BPF_JSGT: |
999 | /* signed '>', GT in x86 */ | 945 | /* Signed '>', GT in x86 */ |
1000 | jmp_cond = X86_JG; | 946 | jmp_cond = X86_JG; |
1001 | break; | 947 | break; |
1002 | case BPF_JSLT: | 948 | case BPF_JSLT: |
1003 | /* signed '<', LT in x86 */ | 949 | /* Signed '<', LT in x86 */ |
1004 | jmp_cond = X86_JL; | 950 | jmp_cond = X86_JL; |
1005 | break; | 951 | break; |
1006 | case BPF_JSGE: | 952 | case BPF_JSGE: |
1007 | /* signed '>=', GE in x86 */ | 953 | /* Signed '>=', GE in x86 */ |
1008 | jmp_cond = X86_JGE; | 954 | jmp_cond = X86_JGE; |
1009 | break; | 955 | break; |
1010 | case BPF_JSLE: | 956 | case BPF_JSLE: |
1011 | /* signed '<=', LE in x86 */ | 957 | /* Signed '<=', LE in x86 */ |
1012 | jmp_cond = X86_JLE; | 958 | jmp_cond = X86_JLE; |
1013 | break; | 959 | break; |
1014 | default: /* to silence gcc warning */ | 960 | default: /* to silence GCC warning */ |
1015 | return -EFAULT; | 961 | return -EFAULT; |
1016 | } | 962 | } |
1017 | jmp_offset = addrs[i + insn->off] - addrs[i]; | 963 | jmp_offset = addrs[i + insn->off] - addrs[i]; |
@@ -1039,7 +985,7 @@ emit_cond_jmp: /* convert BPF opcode to x86 */ | |||
1039 | jmp_offset = addrs[i + insn->off] - addrs[i]; | 985 | jmp_offset = addrs[i + insn->off] - addrs[i]; |
1040 | 986 | ||
1041 | if (!jmp_offset) | 987 | if (!jmp_offset) |
1042 | /* optimize out nop jumps */ | 988 | /* Optimize out nop jumps */ |
1043 | break; | 989 | break; |
1044 | emit_jmp: | 990 | emit_jmp: |
1045 | if (is_imm8(jmp_offset)) { | 991 | if (is_imm8(jmp_offset)) { |
@@ -1052,66 +998,13 @@ emit_jmp: | |||
1052 | } | 998 | } |
1053 | break; | 999 | break; |
1054 | 1000 | ||
1055 | case BPF_LD | BPF_IND | BPF_W: | ||
1056 | func = sk_load_word; | ||
1057 | goto common_load; | ||
1058 | case BPF_LD | BPF_ABS | BPF_W: | ||
1059 | func = CHOOSE_LOAD_FUNC(imm32, sk_load_word); | ||
1060 | common_load: | ||
1061 | ctx->seen_ld_abs = seen_ld_abs = true; | ||
1062 | jmp_offset = func - (image + addrs[i]); | ||
1063 | if (!func || !is_simm32(jmp_offset)) { | ||
1064 | pr_err("unsupported bpf func %d addr %p image %p\n", | ||
1065 | imm32, func, image); | ||
1066 | return -EINVAL; | ||
1067 | } | ||
1068 | if (BPF_MODE(insn->code) == BPF_ABS) { | ||
1069 | /* mov %esi, imm32 */ | ||
1070 | EMIT1_off32(0xBE, imm32); | ||
1071 | } else { | ||
1072 | /* mov %rsi, src_reg */ | ||
1073 | EMIT_mov(BPF_REG_2, src_reg); | ||
1074 | if (imm32) { | ||
1075 | if (is_imm8(imm32)) | ||
1076 | /* add %esi, imm8 */ | ||
1077 | EMIT3(0x83, 0xC6, imm32); | ||
1078 | else | ||
1079 | /* add %esi, imm32 */ | ||
1080 | EMIT2_off32(0x81, 0xC6, imm32); | ||
1081 | } | ||
1082 | } | ||
1083 | /* skb pointer is in R6 (%rbx), it will be copied into | ||
1084 | * %rdi if skb_copy_bits() call is necessary. | ||
1085 | * sk_load_* helpers also use %r10 and %r9d. | ||
1086 | * See bpf_jit.S | ||
1087 | */ | ||
1088 | if (seen_ax_reg) | ||
1089 | /* r10 = skb->data, mov %r10, off32(%rbx) */ | ||
1090 | EMIT3_off32(0x4c, 0x8b, 0x93, | ||
1091 | offsetof(struct sk_buff, data)); | ||
1092 | EMIT1_off32(0xE8, jmp_offset); /* call */ | ||
1093 | break; | ||
1094 | |||
1095 | case BPF_LD | BPF_IND | BPF_H: | ||
1096 | func = sk_load_half; | ||
1097 | goto common_load; | ||
1098 | case BPF_LD | BPF_ABS | BPF_H: | ||
1099 | func = CHOOSE_LOAD_FUNC(imm32, sk_load_half); | ||
1100 | goto common_load; | ||
1101 | case BPF_LD | BPF_IND | BPF_B: | ||
1102 | func = sk_load_byte; | ||
1103 | goto common_load; | ||
1104 | case BPF_LD | BPF_ABS | BPF_B: | ||
1105 | func = CHOOSE_LOAD_FUNC(imm32, sk_load_byte); | ||
1106 | goto common_load; | ||
1107 | |||
1108 | case BPF_JMP | BPF_EXIT: | 1001 | case BPF_JMP | BPF_EXIT: |
1109 | if (seen_exit) { | 1002 | if (seen_exit) { |
1110 | jmp_offset = ctx->cleanup_addr - addrs[i]; | 1003 | jmp_offset = ctx->cleanup_addr - addrs[i]; |
1111 | goto emit_jmp; | 1004 | goto emit_jmp; |
1112 | } | 1005 | } |
1113 | seen_exit = true; | 1006 | seen_exit = true; |
1114 | /* update cleanup_addr */ | 1007 | /* Update cleanup_addr */ |
1115 | ctx->cleanup_addr = proglen; | 1008 | ctx->cleanup_addr = proglen; |
1116 | /* mov rbx, qword ptr [rbp+0] */ | 1009 | /* mov rbx, qword ptr [rbp+0] */ |
1117 | EMIT4(0x48, 0x8B, 0x5D, 0); | 1010 | EMIT4(0x48, 0x8B, 0x5D, 0); |
@@ -1129,10 +1022,11 @@ common_load: | |||
1129 | break; | 1022 | break; |
1130 | 1023 | ||
1131 | default: | 1024 | default: |
1132 | /* By design x64 JIT should support all BPF instructions | 1025 | /* |
1026 | * By design x86-64 JIT should support all BPF instructions. | ||
1133 | * This error will be seen if new instruction was added | 1027 | * This error will be seen if new instruction was added |
1134 | * to interpreter, but not to JIT | 1028 | * to the interpreter, but not to the JIT, or if there is |
1135 | * or if there is junk in bpf_prog | 1029 | * junk in bpf_prog. |
1136 | */ | 1030 | */ |
1137 | pr_err("bpf_jit: unknown opcode %02x\n", insn->code); | 1031 | pr_err("bpf_jit: unknown opcode %02x\n", insn->code); |
1138 | return -EINVAL; | 1032 | return -EINVAL; |
@@ -1184,7 +1078,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) | |||
1184 | return orig_prog; | 1078 | return orig_prog; |
1185 | 1079 | ||
1186 | tmp = bpf_jit_blind_constants(prog); | 1080 | tmp = bpf_jit_blind_constants(prog); |
1187 | /* If blinding was requested and we failed during blinding, | 1081 | /* |
1082 | * If blinding was requested and we failed during blinding, | ||
1188 | * we must fall back to the interpreter. | 1083 | * we must fall back to the interpreter. |
1189 | */ | 1084 | */ |
1190 | if (IS_ERR(tmp)) | 1085 | if (IS_ERR(tmp)) |
@@ -1218,8 +1113,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) | |||
1218 | goto out_addrs; | 1113 | goto out_addrs; |
1219 | } | 1114 | } |
1220 | 1115 | ||
1221 | /* Before first pass, make a rough estimation of addrs[] | 1116 | /* |
1222 | * each bpf instruction is translated to less than 64 bytes | 1117 | * Before first pass, make a rough estimation of addrs[] |
1118 | * each BPF instruction is translated to less than 64 bytes | ||
1223 | */ | 1119 | */ |
1224 | for (proglen = 0, i = 0; i < prog->len; i++) { | 1120 | for (proglen = 0, i = 0; i < prog->len; i++) { |
1225 | proglen += 64; | 1121 | proglen += 64; |
@@ -1228,10 +1124,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) | |||
1228 | ctx.cleanup_addr = proglen; | 1124 | ctx.cleanup_addr = proglen; |
1229 | skip_init_addrs: | 1125 | skip_init_addrs: |
1230 | 1126 | ||
1231 | /* JITed image shrinks with every pass and the loop iterates | 1127 | /* |
1232 | * until the image stops shrinking. Very large bpf programs | 1128 | * JITed image shrinks with every pass and the loop iterates |
1129 | * until the image stops shrinking. Very large BPF programs | ||
1233 | * may converge on the last pass. In such case do one more | 1130 | * may converge on the last pass. In such case do one more |
1234 | * pass to emit the final image | 1131 | * pass to emit the final image. |
1235 | */ | 1132 | */ |
1236 | for (pass = 0; pass < 20 || image; pass++) { | 1133 | for (pass = 0; pass < 20 || image; pass++) { |
1237 | proglen = do_jit(prog, addrs, image, oldproglen, &ctx); | 1134 | proglen = do_jit(prog, addrs, image, oldproglen, &ctx); |
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c new file mode 100644 index 000000000000..0cc04e30adc1 --- /dev/null +++ b/arch/x86/net/bpf_jit_comp32.c | |||
@@ -0,0 +1,2419 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* | ||
3 | * Just-In-Time compiler for eBPF filters on IA32 (32bit x86) | ||
4 | * | ||
5 | * Author: Wang YanQing (udknight@gmail.com) | ||
6 | * The code based on code and ideas from: | ||
7 | * Eric Dumazet (eric.dumazet@gmail.com) | ||
8 | * and from: | ||
9 | * Shubham Bansal <illusionist.neo@gmail.com> | ||
10 | */ | ||
11 | |||
12 | #include <linux/netdevice.h> | ||
13 | #include <linux/filter.h> | ||
14 | #include <linux/if_vlan.h> | ||
15 | #include <asm/cacheflush.h> | ||
16 | #include <asm/set_memory.h> | ||
17 | #include <asm/nospec-branch.h> | ||
18 | #include <linux/bpf.h> | ||
19 | |||
20 | /* | ||
21 | * eBPF prog stack layout: | ||
22 | * | ||
23 | * high | ||
24 | * original ESP => +-----+ | ||
25 | * | | callee saved registers | ||
26 | * +-----+ | ||
27 | * | ... | eBPF JIT scratch space | ||
28 | * BPF_FP,IA32_EBP => +-----+ | ||
29 | * | ... | eBPF prog stack | ||
30 | * +-----+ | ||
31 | * |RSVD | JIT scratchpad | ||
32 | * current ESP => +-----+ | ||
33 | * | | | ||
34 | * | ... | Function call stack | ||
35 | * | | | ||
36 | * +-----+ | ||
37 | * low | ||
38 | * | ||
39 | * The callee saved registers: | ||
40 | * | ||
41 | * high | ||
42 | * original ESP => +------------------+ \ | ||
43 | * | ebp | | | ||
44 | * current EBP => +------------------+ } callee saved registers | ||
45 | * | ebx,esi,edi | | | ||
46 | * +------------------+ / | ||
47 | * low | ||
48 | */ | ||
49 | |||
50 | static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) | ||
51 | { | ||
52 | if (len == 1) | ||
53 | *ptr = bytes; | ||
54 | else if (len == 2) | ||
55 | *(u16 *)ptr = bytes; | ||
56 | else { | ||
57 | *(u32 *)ptr = bytes; | ||
58 | barrier(); | ||
59 | } | ||
60 | return ptr + len; | ||
61 | } | ||
62 | |||
63 | #define EMIT(bytes, len) \ | ||
64 | do { prog = emit_code(prog, bytes, len); cnt += len; } while (0) | ||
65 | |||
66 | #define EMIT1(b1) EMIT(b1, 1) | ||
67 | #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) | ||
68 | #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) | ||
69 | #define EMIT4(b1, b2, b3, b4) \ | ||
70 | EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) | ||
71 | |||
72 | #define EMIT1_off32(b1, off) \ | ||
73 | do { EMIT1(b1); EMIT(off, 4); } while (0) | ||
74 | #define EMIT2_off32(b1, b2, off) \ | ||
75 | do { EMIT2(b1, b2); EMIT(off, 4); } while (0) | ||
76 | #define EMIT3_off32(b1, b2, b3, off) \ | ||
77 | do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0) | ||
78 | #define EMIT4_off32(b1, b2, b3, b4, off) \ | ||
79 | do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) | ||
80 | |||
81 | #define jmp_label(label, jmp_insn_len) (label - cnt - jmp_insn_len) | ||
82 | |||
83 | static bool is_imm8(int value) | ||
84 | { | ||
85 | return value <= 127 && value >= -128; | ||
86 | } | ||
87 | |||
88 | static bool is_simm32(s64 value) | ||
89 | { | ||
90 | return value == (s64) (s32) value; | ||
91 | } | ||
92 | |||
93 | #define STACK_OFFSET(k) (k) | ||
94 | #define TCALL_CNT (MAX_BPF_JIT_REG + 0) /* Tail Call Count */ | ||
95 | |||
96 | #define IA32_EAX (0x0) | ||
97 | #define IA32_EBX (0x3) | ||
98 | #define IA32_ECX (0x1) | ||
99 | #define IA32_EDX (0x2) | ||
100 | #define IA32_ESI (0x6) | ||
101 | #define IA32_EDI (0x7) | ||
102 | #define IA32_EBP (0x5) | ||
103 | #define IA32_ESP (0x4) | ||
104 | |||
105 | /* | ||
106 | * List of x86 cond jumps opcodes (. + s8) | ||
107 | * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) | ||
108 | */ | ||
109 | #define IA32_JB 0x72 | ||
110 | #define IA32_JAE 0x73 | ||
111 | #define IA32_JE 0x74 | ||
112 | #define IA32_JNE 0x75 | ||
113 | #define IA32_JBE 0x76 | ||
114 | #define IA32_JA 0x77 | ||
115 | #define IA32_JL 0x7C | ||
116 | #define IA32_JGE 0x7D | ||
117 | #define IA32_JLE 0x7E | ||
118 | #define IA32_JG 0x7F | ||
119 | |||
120 | /* | ||
121 | * Map eBPF registers to IA32 32bit registers or stack scratch space. | ||
122 | * | ||
123 | * 1. All the registers, R0-R10, are mapped to scratch space on stack. | ||
124 | * 2. We need two 64 bit temp registers to do complex operations on eBPF | ||
125 | * registers. | ||
126 | * 3. For performance reason, the BPF_REG_AX for blinding constant, is | ||
127 | * mapped to real hardware register pair, IA32_ESI and IA32_EDI. | ||
128 | * | ||
129 | * As the eBPF registers are all 64 bit registers and IA32 has only 32 bit | ||
130 | * registers, we have to map each eBPF registers with two IA32 32 bit regs | ||
131 | * or scratch memory space and we have to build eBPF 64 bit register from those. | ||
132 | * | ||
133 | * We use IA32_EAX, IA32_EDX, IA32_ECX, IA32_EBX as temporary registers. | ||
134 | */ | ||
135 | static const u8 bpf2ia32[][2] = { | ||
136 | /* Return value from in-kernel function, and exit value from eBPF */ | ||
137 | [BPF_REG_0] = {STACK_OFFSET(0), STACK_OFFSET(4)}, | ||
138 | |||
139 | /* The arguments from eBPF program to in-kernel function */ | ||
140 | /* Stored on stack scratch space */ | ||
141 | [BPF_REG_1] = {STACK_OFFSET(8), STACK_OFFSET(12)}, | ||
142 | [BPF_REG_2] = {STACK_OFFSET(16), STACK_OFFSET(20)}, | ||
143 | [BPF_REG_3] = {STACK_OFFSET(24), STACK_OFFSET(28)}, | ||
144 | [BPF_REG_4] = {STACK_OFFSET(32), STACK_OFFSET(36)}, | ||
145 | [BPF_REG_5] = {STACK_OFFSET(40), STACK_OFFSET(44)}, | ||
146 | |||
147 | /* Callee saved registers that in-kernel function will preserve */ | ||
148 | /* Stored on stack scratch space */ | ||
149 | [BPF_REG_6] = {STACK_OFFSET(48), STACK_OFFSET(52)}, | ||
150 | [BPF_REG_7] = {STACK_OFFSET(56), STACK_OFFSET(60)}, | ||
151 | [BPF_REG_8] = {STACK_OFFSET(64), STACK_OFFSET(68)}, | ||
152 | [BPF_REG_9] = {STACK_OFFSET(72), STACK_OFFSET(76)}, | ||
153 | |||
154 | /* Read only Frame Pointer to access Stack */ | ||
155 | [BPF_REG_FP] = {STACK_OFFSET(80), STACK_OFFSET(84)}, | ||
156 | |||
157 | /* Temporary register for blinding constants. */ | ||
158 | [BPF_REG_AX] = {IA32_ESI, IA32_EDI}, | ||
159 | |||
160 | /* Tail call count. Stored on stack scratch space. */ | ||
161 | [TCALL_CNT] = {STACK_OFFSET(88), STACK_OFFSET(92)}, | ||
162 | }; | ||
163 | |||
164 | #define dst_lo dst[0] | ||
165 | #define dst_hi dst[1] | ||
166 | #define src_lo src[0] | ||
167 | #define src_hi src[1] | ||
168 | |||
169 | #define STACK_ALIGNMENT 8 | ||
170 | /* | ||
171 | * Stack space for BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, | ||
172 | * BPF_REG_5, BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9, | ||
173 | * BPF_REG_FP, BPF_REG_AX and Tail call counts. | ||
174 | */ | ||
175 | #define SCRATCH_SIZE 96 | ||
176 | |||
177 | /* Total stack size used in JITed code */ | ||
178 | #define _STACK_SIZE (stack_depth + SCRATCH_SIZE) | ||
179 | |||
180 | #define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT) | ||
181 | |||
182 | /* Get the offset of eBPF REGISTERs stored on scratch space. */ | ||
183 | #define STACK_VAR(off) (off) | ||
184 | |||
185 | /* Encode 'dst_reg' register into IA32 opcode 'byte' */ | ||
186 | static u8 add_1reg(u8 byte, u32 dst_reg) | ||
187 | { | ||
188 | return byte + dst_reg; | ||
189 | } | ||
190 | |||
191 | /* Encode 'dst_reg' and 'src_reg' registers into IA32 opcode 'byte' */ | ||
192 | static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) | ||
193 | { | ||
194 | return byte + dst_reg + (src_reg << 3); | ||
195 | } | ||
196 | |||
197 | static void jit_fill_hole(void *area, unsigned int size) | ||
198 | { | ||
199 | /* Fill whole space with int3 instructions */ | ||
200 | memset(area, 0xcc, size); | ||
201 | } | ||
202 | |||
203 | static inline void emit_ia32_mov_i(const u8 dst, const u32 val, bool dstk, | ||
204 | u8 **pprog) | ||
205 | { | ||
206 | u8 *prog = *pprog; | ||
207 | int cnt = 0; | ||
208 | |||
209 | if (dstk) { | ||
210 | if (val == 0) { | ||
211 | /* xor eax,eax */ | ||
212 | EMIT2(0x33, add_2reg(0xC0, IA32_EAX, IA32_EAX)); | ||
213 | /* mov dword ptr [ebp+off],eax */ | ||
214 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
215 | STACK_VAR(dst)); | ||
216 | } else { | ||
217 | EMIT3_off32(0xC7, add_1reg(0x40, IA32_EBP), | ||
218 | STACK_VAR(dst), val); | ||
219 | } | ||
220 | } else { | ||
221 | if (val == 0) | ||
222 | EMIT2(0x33, add_2reg(0xC0, dst, dst)); | ||
223 | else | ||
224 | EMIT2_off32(0xC7, add_1reg(0xC0, dst), | ||
225 | val); | ||
226 | } | ||
227 | *pprog = prog; | ||
228 | } | ||
229 | |||
230 | /* dst = imm (4 bytes)*/ | ||
231 | static inline void emit_ia32_mov_r(const u8 dst, const u8 src, bool dstk, | ||
232 | bool sstk, u8 **pprog) | ||
233 | { | ||
234 | u8 *prog = *pprog; | ||
235 | int cnt = 0; | ||
236 | u8 sreg = sstk ? IA32_EAX : src; | ||
237 | |||
238 | if (sstk) | ||
239 | /* mov eax,dword ptr [ebp+off] */ | ||
240 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src)); | ||
241 | if (dstk) | ||
242 | /* mov dword ptr [ebp+off],eax */ | ||
243 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, sreg), STACK_VAR(dst)); | ||
244 | else | ||
245 | /* mov dst,sreg */ | ||
246 | EMIT2(0x89, add_2reg(0xC0, dst, sreg)); | ||
247 | |||
248 | *pprog = prog; | ||
249 | } | ||
250 | |||
251 | /* dst = src */ | ||
252 | static inline void emit_ia32_mov_r64(const bool is64, const u8 dst[], | ||
253 | const u8 src[], bool dstk, | ||
254 | bool sstk, u8 **pprog) | ||
255 | { | ||
256 | emit_ia32_mov_r(dst_lo, src_lo, dstk, sstk, pprog); | ||
257 | if (is64) | ||
258 | /* complete 8 byte move */ | ||
259 | emit_ia32_mov_r(dst_hi, src_hi, dstk, sstk, pprog); | ||
260 | else | ||
261 | /* zero out high 4 bytes */ | ||
262 | emit_ia32_mov_i(dst_hi, 0, dstk, pprog); | ||
263 | } | ||
264 | |||
265 | /* Sign extended move */ | ||
266 | static inline void emit_ia32_mov_i64(const bool is64, const u8 dst[], | ||
267 | const u32 val, bool dstk, u8 **pprog) | ||
268 | { | ||
269 | u32 hi = 0; | ||
270 | |||
271 | if (is64 && (val & (1<<31))) | ||
272 | hi = (u32)~0; | ||
273 | emit_ia32_mov_i(dst_lo, val, dstk, pprog); | ||
274 | emit_ia32_mov_i(dst_hi, hi, dstk, pprog); | ||
275 | } | ||
276 | |||
277 | /* | ||
278 | * ALU operation (32 bit) | ||
279 | * dst = dst * src | ||
280 | */ | ||
281 | static inline void emit_ia32_mul_r(const u8 dst, const u8 src, bool dstk, | ||
282 | bool sstk, u8 **pprog) | ||
283 | { | ||
284 | u8 *prog = *pprog; | ||
285 | int cnt = 0; | ||
286 | u8 sreg = sstk ? IA32_ECX : src; | ||
287 | |||
288 | if (sstk) | ||
289 | /* mov ecx,dword ptr [ebp+off] */ | ||
290 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src)); | ||
291 | |||
292 | if (dstk) | ||
293 | /* mov eax,dword ptr [ebp+off] */ | ||
294 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst)); | ||
295 | else | ||
296 | /* mov eax,dst */ | ||
297 | EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX)); | ||
298 | |||
299 | |||
300 | EMIT2(0xF7, add_1reg(0xE0, sreg)); | ||
301 | |||
302 | if (dstk) | ||
303 | /* mov dword ptr [ebp+off],eax */ | ||
304 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
305 | STACK_VAR(dst)); | ||
306 | else | ||
307 | /* mov dst,eax */ | ||
308 | EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX)); | ||
309 | |||
310 | *pprog = prog; | ||
311 | } | ||
312 | |||
313 | static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val, | ||
314 | bool dstk, u8 **pprog) | ||
315 | { | ||
316 | u8 *prog = *pprog; | ||
317 | int cnt = 0; | ||
318 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
319 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
320 | |||
321 | if (dstk && val != 64) { | ||
322 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
323 | STACK_VAR(dst_lo)); | ||
324 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
325 | STACK_VAR(dst_hi)); | ||
326 | } | ||
327 | switch (val) { | ||
328 | case 16: | ||
329 | /* | ||
330 | * Emit 'movzwl eax,ax' to zero extend 16-bit | ||
331 | * into 64 bit | ||
332 | */ | ||
333 | EMIT2(0x0F, 0xB7); | ||
334 | EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
335 | /* xor dreg_hi,dreg_hi */ | ||
336 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
337 | break; | ||
338 | case 32: | ||
339 | /* xor dreg_hi,dreg_hi */ | ||
340 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
341 | break; | ||
342 | case 64: | ||
343 | /* nop */ | ||
344 | break; | ||
345 | } | ||
346 | |||
347 | if (dstk && val != 64) { | ||
348 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
349 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
350 | STACK_VAR(dst_lo)); | ||
351 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
352 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
353 | STACK_VAR(dst_hi)); | ||
354 | } | ||
355 | *pprog = prog; | ||
356 | } | ||
357 | |||
358 | static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val, | ||
359 | bool dstk, u8 **pprog) | ||
360 | { | ||
361 | u8 *prog = *pprog; | ||
362 | int cnt = 0; | ||
363 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
364 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
365 | |||
366 | if (dstk) { | ||
367 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
368 | STACK_VAR(dst_lo)); | ||
369 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
370 | STACK_VAR(dst_hi)); | ||
371 | } | ||
372 | switch (val) { | ||
373 | case 16: | ||
374 | /* Emit 'ror %ax, 8' to swap lower 2 bytes */ | ||
375 | EMIT1(0x66); | ||
376 | EMIT3(0xC1, add_1reg(0xC8, dreg_lo), 8); | ||
377 | |||
378 | EMIT2(0x0F, 0xB7); | ||
379 | EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
380 | |||
381 | /* xor dreg_hi,dreg_hi */ | ||
382 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
383 | break; | ||
384 | case 32: | ||
385 | /* Emit 'bswap eax' to swap lower 4 bytes */ | ||
386 | EMIT1(0x0F); | ||
387 | EMIT1(add_1reg(0xC8, dreg_lo)); | ||
388 | |||
389 | /* xor dreg_hi,dreg_hi */ | ||
390 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
391 | break; | ||
392 | case 64: | ||
393 | /* Emit 'bswap eax' to swap lower 4 bytes */ | ||
394 | EMIT1(0x0F); | ||
395 | EMIT1(add_1reg(0xC8, dreg_lo)); | ||
396 | |||
397 | /* Emit 'bswap edx' to swap lower 4 bytes */ | ||
398 | EMIT1(0x0F); | ||
399 | EMIT1(add_1reg(0xC8, dreg_hi)); | ||
400 | |||
401 | /* mov ecx,dreg_hi */ | ||
402 | EMIT2(0x89, add_2reg(0xC0, IA32_ECX, dreg_hi)); | ||
403 | /* mov dreg_hi,dreg_lo */ | ||
404 | EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo)); | ||
405 | /* mov dreg_lo,ecx */ | ||
406 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX)); | ||
407 | |||
408 | break; | ||
409 | } | ||
410 | if (dstk) { | ||
411 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
412 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
413 | STACK_VAR(dst_lo)); | ||
414 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
415 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
416 | STACK_VAR(dst_hi)); | ||
417 | } | ||
418 | *pprog = prog; | ||
419 | } | ||
420 | |||
421 | /* | ||
422 | * ALU operation (32 bit) | ||
423 | * dst = dst (div|mod) src | ||
424 | */ | ||
425 | static inline void emit_ia32_div_mod_r(const u8 op, const u8 dst, const u8 src, | ||
426 | bool dstk, bool sstk, u8 **pprog) | ||
427 | { | ||
428 | u8 *prog = *pprog; | ||
429 | int cnt = 0; | ||
430 | |||
431 | if (sstk) | ||
432 | /* mov ecx,dword ptr [ebp+off] */ | ||
433 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
434 | STACK_VAR(src)); | ||
435 | else if (src != IA32_ECX) | ||
436 | /* mov ecx,src */ | ||
437 | EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX)); | ||
438 | |||
439 | if (dstk) | ||
440 | /* mov eax,dword ptr [ebp+off] */ | ||
441 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
442 | STACK_VAR(dst)); | ||
443 | else | ||
444 | /* mov eax,dst */ | ||
445 | EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX)); | ||
446 | |||
447 | /* xor edx,edx */ | ||
448 | EMIT2(0x31, add_2reg(0xC0, IA32_EDX, IA32_EDX)); | ||
449 | /* div ecx */ | ||
450 | EMIT2(0xF7, add_1reg(0xF0, IA32_ECX)); | ||
451 | |||
452 | if (op == BPF_MOD) { | ||
453 | if (dstk) | ||
454 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
455 | STACK_VAR(dst)); | ||
456 | else | ||
457 | EMIT2(0x89, add_2reg(0xC0, dst, IA32_EDX)); | ||
458 | } else { | ||
459 | if (dstk) | ||
460 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
461 | STACK_VAR(dst)); | ||
462 | else | ||
463 | EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX)); | ||
464 | } | ||
465 | *pprog = prog; | ||
466 | } | ||
467 | |||
468 | /* | ||
469 | * ALU operation (32 bit) | ||
470 | * dst = dst (shift) src | ||
471 | */ | ||
472 | static inline void emit_ia32_shift_r(const u8 op, const u8 dst, const u8 src, | ||
473 | bool dstk, bool sstk, u8 **pprog) | ||
474 | { | ||
475 | u8 *prog = *pprog; | ||
476 | int cnt = 0; | ||
477 | u8 dreg = dstk ? IA32_EAX : dst; | ||
478 | u8 b2; | ||
479 | |||
480 | if (dstk) | ||
481 | /* mov eax,dword ptr [ebp+off] */ | ||
482 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst)); | ||
483 | |||
484 | if (sstk) | ||
485 | /* mov ecx,dword ptr [ebp+off] */ | ||
486 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src)); | ||
487 | else if (src != IA32_ECX) | ||
488 | /* mov ecx,src */ | ||
489 | EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX)); | ||
490 | |||
491 | switch (op) { | ||
492 | case BPF_LSH: | ||
493 | b2 = 0xE0; break; | ||
494 | case BPF_RSH: | ||
495 | b2 = 0xE8; break; | ||
496 | case BPF_ARSH: | ||
497 | b2 = 0xF8; break; | ||
498 | default: | ||
499 | return; | ||
500 | } | ||
501 | EMIT2(0xD3, add_1reg(b2, dreg)); | ||
502 | |||
503 | if (dstk) | ||
504 | /* mov dword ptr [ebp+off],dreg */ | ||
505 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg), STACK_VAR(dst)); | ||
506 | *pprog = prog; | ||
507 | } | ||
508 | |||
509 | /* | ||
510 | * ALU operation (32 bit) | ||
511 | * dst = dst (op) src | ||
512 | */ | ||
513 | static inline void emit_ia32_alu_r(const bool is64, const bool hi, const u8 op, | ||
514 | const u8 dst, const u8 src, bool dstk, | ||
515 | bool sstk, u8 **pprog) | ||
516 | { | ||
517 | u8 *prog = *pprog; | ||
518 | int cnt = 0; | ||
519 | u8 sreg = sstk ? IA32_EAX : src; | ||
520 | u8 dreg = dstk ? IA32_EDX : dst; | ||
521 | |||
522 | if (sstk) | ||
523 | /* mov eax,dword ptr [ebp+off] */ | ||
524 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src)); | ||
525 | |||
526 | if (dstk) | ||
527 | /* mov eax,dword ptr [ebp+off] */ | ||
528 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(dst)); | ||
529 | |||
530 | switch (BPF_OP(op)) { | ||
531 | /* dst = dst + src */ | ||
532 | case BPF_ADD: | ||
533 | if (hi && is64) | ||
534 | EMIT2(0x11, add_2reg(0xC0, dreg, sreg)); | ||
535 | else | ||
536 | EMIT2(0x01, add_2reg(0xC0, dreg, sreg)); | ||
537 | break; | ||
538 | /* dst = dst - src */ | ||
539 | case BPF_SUB: | ||
540 | if (hi && is64) | ||
541 | EMIT2(0x19, add_2reg(0xC0, dreg, sreg)); | ||
542 | else | ||
543 | EMIT2(0x29, add_2reg(0xC0, dreg, sreg)); | ||
544 | break; | ||
545 | /* dst = dst | src */ | ||
546 | case BPF_OR: | ||
547 | EMIT2(0x09, add_2reg(0xC0, dreg, sreg)); | ||
548 | break; | ||
549 | /* dst = dst & src */ | ||
550 | case BPF_AND: | ||
551 | EMIT2(0x21, add_2reg(0xC0, dreg, sreg)); | ||
552 | break; | ||
553 | /* dst = dst ^ src */ | ||
554 | case BPF_XOR: | ||
555 | EMIT2(0x31, add_2reg(0xC0, dreg, sreg)); | ||
556 | break; | ||
557 | } | ||
558 | |||
559 | if (dstk) | ||
560 | /* mov dword ptr [ebp+off],dreg */ | ||
561 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg), | ||
562 | STACK_VAR(dst)); | ||
563 | *pprog = prog; | ||
564 | } | ||
565 | |||
566 | /* ALU operation (64 bit) */ | ||
567 | static inline void emit_ia32_alu_r64(const bool is64, const u8 op, | ||
568 | const u8 dst[], const u8 src[], | ||
569 | bool dstk, bool sstk, | ||
570 | u8 **pprog) | ||
571 | { | ||
572 | u8 *prog = *pprog; | ||
573 | |||
574 | emit_ia32_alu_r(is64, false, op, dst_lo, src_lo, dstk, sstk, &prog); | ||
575 | if (is64) | ||
576 | emit_ia32_alu_r(is64, true, op, dst_hi, src_hi, dstk, sstk, | ||
577 | &prog); | ||
578 | else | ||
579 | emit_ia32_mov_i(dst_hi, 0, dstk, &prog); | ||
580 | *pprog = prog; | ||
581 | } | ||
582 | |||
583 | /* | ||
584 | * ALU operation (32 bit) | ||
585 | * dst = dst (op) val | ||
586 | */ | ||
587 | static inline void emit_ia32_alu_i(const bool is64, const bool hi, const u8 op, | ||
588 | const u8 dst, const s32 val, bool dstk, | ||
589 | u8 **pprog) | ||
590 | { | ||
591 | u8 *prog = *pprog; | ||
592 | int cnt = 0; | ||
593 | u8 dreg = dstk ? IA32_EAX : dst; | ||
594 | u8 sreg = IA32_EDX; | ||
595 | |||
596 | if (dstk) | ||
597 | /* mov eax,dword ptr [ebp+off] */ | ||
598 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst)); | ||
599 | |||
600 | if (!is_imm8(val)) | ||
601 | /* mov edx,imm32*/ | ||
602 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EDX), val); | ||
603 | |||
604 | switch (op) { | ||
605 | /* dst = dst + val */ | ||
606 | case BPF_ADD: | ||
607 | if (hi && is64) { | ||
608 | if (is_imm8(val)) | ||
609 | EMIT3(0x83, add_1reg(0xD0, dreg), val); | ||
610 | else | ||
611 | EMIT2(0x11, add_2reg(0xC0, dreg, sreg)); | ||
612 | } else { | ||
613 | if (is_imm8(val)) | ||
614 | EMIT3(0x83, add_1reg(0xC0, dreg), val); | ||
615 | else | ||
616 | EMIT2(0x01, add_2reg(0xC0, dreg, sreg)); | ||
617 | } | ||
618 | break; | ||
619 | /* dst = dst - val */ | ||
620 | case BPF_SUB: | ||
621 | if (hi && is64) { | ||
622 | if (is_imm8(val)) | ||
623 | EMIT3(0x83, add_1reg(0xD8, dreg), val); | ||
624 | else | ||
625 | EMIT2(0x19, add_2reg(0xC0, dreg, sreg)); | ||
626 | } else { | ||
627 | if (is_imm8(val)) | ||
628 | EMIT3(0x83, add_1reg(0xE8, dreg), val); | ||
629 | else | ||
630 | EMIT2(0x29, add_2reg(0xC0, dreg, sreg)); | ||
631 | } | ||
632 | break; | ||
633 | /* dst = dst | val */ | ||
634 | case BPF_OR: | ||
635 | if (is_imm8(val)) | ||
636 | EMIT3(0x83, add_1reg(0xC8, dreg), val); | ||
637 | else | ||
638 | EMIT2(0x09, add_2reg(0xC0, dreg, sreg)); | ||
639 | break; | ||
640 | /* dst = dst & val */ | ||
641 | case BPF_AND: | ||
642 | if (is_imm8(val)) | ||
643 | EMIT3(0x83, add_1reg(0xE0, dreg), val); | ||
644 | else | ||
645 | EMIT2(0x21, add_2reg(0xC0, dreg, sreg)); | ||
646 | break; | ||
647 | /* dst = dst ^ val */ | ||
648 | case BPF_XOR: | ||
649 | if (is_imm8(val)) | ||
650 | EMIT3(0x83, add_1reg(0xF0, dreg), val); | ||
651 | else | ||
652 | EMIT2(0x31, add_2reg(0xC0, dreg, sreg)); | ||
653 | break; | ||
654 | case BPF_NEG: | ||
655 | EMIT2(0xF7, add_1reg(0xD8, dreg)); | ||
656 | break; | ||
657 | } | ||
658 | |||
659 | if (dstk) | ||
660 | /* mov dword ptr [ebp+off],dreg */ | ||
661 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg), | ||
662 | STACK_VAR(dst)); | ||
663 | *pprog = prog; | ||
664 | } | ||
665 | |||
666 | /* ALU operation (64 bit) */ | ||
667 | static inline void emit_ia32_alu_i64(const bool is64, const u8 op, | ||
668 | const u8 dst[], const u32 val, | ||
669 | bool dstk, u8 **pprog) | ||
670 | { | ||
671 | u8 *prog = *pprog; | ||
672 | u32 hi = 0; | ||
673 | |||
674 | if (is64 && (val & (1<<31))) | ||
675 | hi = (u32)~0; | ||
676 | |||
677 | emit_ia32_alu_i(is64, false, op, dst_lo, val, dstk, &prog); | ||
678 | if (is64) | ||
679 | emit_ia32_alu_i(is64, true, op, dst_hi, hi, dstk, &prog); | ||
680 | else | ||
681 | emit_ia32_mov_i(dst_hi, 0, dstk, &prog); | ||
682 | |||
683 | *pprog = prog; | ||
684 | } | ||
685 | |||
686 | /* dst = ~dst (64 bit) */ | ||
687 | static inline void emit_ia32_neg64(const u8 dst[], bool dstk, u8 **pprog) | ||
688 | { | ||
689 | u8 *prog = *pprog; | ||
690 | int cnt = 0; | ||
691 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
692 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
693 | |||
694 | if (dstk) { | ||
695 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
696 | STACK_VAR(dst_lo)); | ||
697 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
698 | STACK_VAR(dst_hi)); | ||
699 | } | ||
700 | |||
701 | /* xor ecx,ecx */ | ||
702 | EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX)); | ||
703 | /* sub dreg_lo,ecx */ | ||
704 | EMIT2(0x2B, add_2reg(0xC0, dreg_lo, IA32_ECX)); | ||
705 | /* mov dreg_lo,ecx */ | ||
706 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX)); | ||
707 | |||
708 | /* xor ecx,ecx */ | ||
709 | EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX)); | ||
710 | /* sbb dreg_hi,ecx */ | ||
711 | EMIT2(0x19, add_2reg(0xC0, dreg_hi, IA32_ECX)); | ||
712 | /* mov dreg_hi,ecx */ | ||
713 | EMIT2(0x89, add_2reg(0xC0, dreg_hi, IA32_ECX)); | ||
714 | |||
715 | if (dstk) { | ||
716 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
717 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
718 | STACK_VAR(dst_lo)); | ||
719 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
720 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
721 | STACK_VAR(dst_hi)); | ||
722 | } | ||
723 | *pprog = prog; | ||
724 | } | ||
725 | |||
726 | /* dst = dst << src */ | ||
727 | static inline void emit_ia32_lsh_r64(const u8 dst[], const u8 src[], | ||
728 | bool dstk, bool sstk, u8 **pprog) | ||
729 | { | ||
730 | u8 *prog = *pprog; | ||
731 | int cnt = 0; | ||
732 | static int jmp_label1 = -1; | ||
733 | static int jmp_label2 = -1; | ||
734 | static int jmp_label3 = -1; | ||
735 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
736 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
737 | |||
738 | if (dstk) { | ||
739 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
740 | STACK_VAR(dst_lo)); | ||
741 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
742 | STACK_VAR(dst_hi)); | ||
743 | } | ||
744 | |||
745 | if (sstk) | ||
746 | /* mov ecx,dword ptr [ebp+off] */ | ||
747 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
748 | STACK_VAR(src_lo)); | ||
749 | else | ||
750 | /* mov ecx,src_lo */ | ||
751 | EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX)); | ||
752 | |||
753 | /* cmp ecx,32 */ | ||
754 | EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); | ||
755 | /* Jumps when >= 32 */ | ||
756 | if (is_imm8(jmp_label(jmp_label1, 2))) | ||
757 | EMIT2(IA32_JAE, jmp_label(jmp_label1, 2)); | ||
758 | else | ||
759 | EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6)); | ||
760 | |||
761 | /* < 32 */ | ||
762 | /* shl dreg_hi,cl */ | ||
763 | EMIT2(0xD3, add_1reg(0xE0, dreg_hi)); | ||
764 | /* mov ebx,dreg_lo */ | ||
765 | EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX)); | ||
766 | /* shl dreg_lo,cl */ | ||
767 | EMIT2(0xD3, add_1reg(0xE0, dreg_lo)); | ||
768 | |||
769 | /* IA32_ECX = -IA32_ECX + 32 */ | ||
770 | /* neg ecx */ | ||
771 | EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); | ||
772 | /* add ecx,32 */ | ||
773 | EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); | ||
774 | |||
775 | /* shr ebx,cl */ | ||
776 | EMIT2(0xD3, add_1reg(0xE8, IA32_EBX)); | ||
777 | /* or dreg_hi,ebx */ | ||
778 | EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX)); | ||
779 | |||
780 | /* goto out; */ | ||
781 | if (is_imm8(jmp_label(jmp_label3, 2))) | ||
782 | EMIT2(0xEB, jmp_label(jmp_label3, 2)); | ||
783 | else | ||
784 | EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); | ||
785 | |||
786 | /* >= 32 */ | ||
787 | if (jmp_label1 == -1) | ||
788 | jmp_label1 = cnt; | ||
789 | |||
790 | /* cmp ecx,64 */ | ||
791 | EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64); | ||
792 | /* Jumps when >= 64 */ | ||
793 | if (is_imm8(jmp_label(jmp_label2, 2))) | ||
794 | EMIT2(IA32_JAE, jmp_label(jmp_label2, 2)); | ||
795 | else | ||
796 | EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6)); | ||
797 | |||
798 | /* >= 32 && < 64 */ | ||
799 | /* sub ecx,32 */ | ||
800 | EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32); | ||
801 | /* shl dreg_lo,cl */ | ||
802 | EMIT2(0xD3, add_1reg(0xE0, dreg_lo)); | ||
803 | /* mov dreg_hi,dreg_lo */ | ||
804 | EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo)); | ||
805 | |||
806 | /* xor dreg_lo,dreg_lo */ | ||
807 | EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
808 | |||
809 | /* goto out; */ | ||
810 | if (is_imm8(jmp_label(jmp_label3, 2))) | ||
811 | EMIT2(0xEB, jmp_label(jmp_label3, 2)); | ||
812 | else | ||
813 | EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); | ||
814 | |||
815 | /* >= 64 */ | ||
816 | if (jmp_label2 == -1) | ||
817 | jmp_label2 = cnt; | ||
818 | /* xor dreg_lo,dreg_lo */ | ||
819 | EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
820 | /* xor dreg_hi,dreg_hi */ | ||
821 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
822 | |||
823 | if (jmp_label3 == -1) | ||
824 | jmp_label3 = cnt; | ||
825 | |||
826 | if (dstk) { | ||
827 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
828 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
829 | STACK_VAR(dst_lo)); | ||
830 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
831 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
832 | STACK_VAR(dst_hi)); | ||
833 | } | ||
834 | /* out: */ | ||
835 | *pprog = prog; | ||
836 | } | ||
837 | |||
838 | /* dst = dst >> src (signed)*/ | ||
839 | static inline void emit_ia32_arsh_r64(const u8 dst[], const u8 src[], | ||
840 | bool dstk, bool sstk, u8 **pprog) | ||
841 | { | ||
842 | u8 *prog = *pprog; | ||
843 | int cnt = 0; | ||
844 | static int jmp_label1 = -1; | ||
845 | static int jmp_label2 = -1; | ||
846 | static int jmp_label3 = -1; | ||
847 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
848 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
849 | |||
850 | if (dstk) { | ||
851 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
852 | STACK_VAR(dst_lo)); | ||
853 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
854 | STACK_VAR(dst_hi)); | ||
855 | } | ||
856 | |||
857 | if (sstk) | ||
858 | /* mov ecx,dword ptr [ebp+off] */ | ||
859 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
860 | STACK_VAR(src_lo)); | ||
861 | else | ||
862 | /* mov ecx,src_lo */ | ||
863 | EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX)); | ||
864 | |||
865 | /* cmp ecx,32 */ | ||
866 | EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); | ||
867 | /* Jumps when >= 32 */ | ||
868 | if (is_imm8(jmp_label(jmp_label1, 2))) | ||
869 | EMIT2(IA32_JAE, jmp_label(jmp_label1, 2)); | ||
870 | else | ||
871 | EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6)); | ||
872 | |||
873 | /* < 32 */ | ||
874 | /* lshr dreg_lo,cl */ | ||
875 | EMIT2(0xD3, add_1reg(0xE8, dreg_lo)); | ||
876 | /* mov ebx,dreg_hi */ | ||
877 | EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX)); | ||
878 | /* ashr dreg_hi,cl */ | ||
879 | EMIT2(0xD3, add_1reg(0xF8, dreg_hi)); | ||
880 | |||
881 | /* IA32_ECX = -IA32_ECX + 32 */ | ||
882 | /* neg ecx */ | ||
883 | EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); | ||
884 | /* add ecx,32 */ | ||
885 | EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); | ||
886 | |||
887 | /* shl ebx,cl */ | ||
888 | EMIT2(0xD3, add_1reg(0xE0, IA32_EBX)); | ||
889 | /* or dreg_lo,ebx */ | ||
890 | EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX)); | ||
891 | |||
892 | /* goto out; */ | ||
893 | if (is_imm8(jmp_label(jmp_label3, 2))) | ||
894 | EMIT2(0xEB, jmp_label(jmp_label3, 2)); | ||
895 | else | ||
896 | EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); | ||
897 | |||
898 | /* >= 32 */ | ||
899 | if (jmp_label1 == -1) | ||
900 | jmp_label1 = cnt; | ||
901 | |||
902 | /* cmp ecx,64 */ | ||
903 | EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64); | ||
904 | /* Jumps when >= 64 */ | ||
905 | if (is_imm8(jmp_label(jmp_label2, 2))) | ||
906 | EMIT2(IA32_JAE, jmp_label(jmp_label2, 2)); | ||
907 | else | ||
908 | EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6)); | ||
909 | |||
910 | /* >= 32 && < 64 */ | ||
911 | /* sub ecx,32 */ | ||
912 | EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32); | ||
913 | /* ashr dreg_hi,cl */ | ||
914 | EMIT2(0xD3, add_1reg(0xF8, dreg_hi)); | ||
915 | /* mov dreg_lo,dreg_hi */ | ||
916 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
917 | |||
918 | /* ashr dreg_hi,imm8 */ | ||
919 | EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31); | ||
920 | |||
921 | /* goto out; */ | ||
922 | if (is_imm8(jmp_label(jmp_label3, 2))) | ||
923 | EMIT2(0xEB, jmp_label(jmp_label3, 2)); | ||
924 | else | ||
925 | EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); | ||
926 | |||
927 | /* >= 64 */ | ||
928 | if (jmp_label2 == -1) | ||
929 | jmp_label2 = cnt; | ||
930 | /* ashr dreg_hi,imm8 */ | ||
931 | EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31); | ||
932 | /* mov dreg_lo,dreg_hi */ | ||
933 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
934 | |||
935 | if (jmp_label3 == -1) | ||
936 | jmp_label3 = cnt; | ||
937 | |||
938 | if (dstk) { | ||
939 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
940 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
941 | STACK_VAR(dst_lo)); | ||
942 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
943 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
944 | STACK_VAR(dst_hi)); | ||
945 | } | ||
946 | /* out: */ | ||
947 | *pprog = prog; | ||
948 | } | ||
949 | |||
950 | /* dst = dst >> src */ | ||
951 | static inline void emit_ia32_rsh_r64(const u8 dst[], const u8 src[], bool dstk, | ||
952 | bool sstk, u8 **pprog) | ||
953 | { | ||
954 | u8 *prog = *pprog; | ||
955 | int cnt = 0; | ||
956 | static int jmp_label1 = -1; | ||
957 | static int jmp_label2 = -1; | ||
958 | static int jmp_label3 = -1; | ||
959 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
960 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
961 | |||
962 | if (dstk) { | ||
963 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
964 | STACK_VAR(dst_lo)); | ||
965 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
966 | STACK_VAR(dst_hi)); | ||
967 | } | ||
968 | |||
969 | if (sstk) | ||
970 | /* mov ecx,dword ptr [ebp+off] */ | ||
971 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
972 | STACK_VAR(src_lo)); | ||
973 | else | ||
974 | /* mov ecx,src_lo */ | ||
975 | EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX)); | ||
976 | |||
977 | /* cmp ecx,32 */ | ||
978 | EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); | ||
979 | /* Jumps when >= 32 */ | ||
980 | if (is_imm8(jmp_label(jmp_label1, 2))) | ||
981 | EMIT2(IA32_JAE, jmp_label(jmp_label1, 2)); | ||
982 | else | ||
983 | EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6)); | ||
984 | |||
985 | /* < 32 */ | ||
986 | /* lshr dreg_lo,cl */ | ||
987 | EMIT2(0xD3, add_1reg(0xE8, dreg_lo)); | ||
988 | /* mov ebx,dreg_hi */ | ||
989 | EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX)); | ||
990 | /* shr dreg_hi,cl */ | ||
991 | EMIT2(0xD3, add_1reg(0xE8, dreg_hi)); | ||
992 | |||
993 | /* IA32_ECX = -IA32_ECX + 32 */ | ||
994 | /* neg ecx */ | ||
995 | EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); | ||
996 | /* add ecx,32 */ | ||
997 | EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); | ||
998 | |||
999 | /* shl ebx,cl */ | ||
1000 | EMIT2(0xD3, add_1reg(0xE0, IA32_EBX)); | ||
1001 | /* or dreg_lo,ebx */ | ||
1002 | EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX)); | ||
1003 | |||
1004 | /* goto out; */ | ||
1005 | if (is_imm8(jmp_label(jmp_label3, 2))) | ||
1006 | EMIT2(0xEB, jmp_label(jmp_label3, 2)); | ||
1007 | else | ||
1008 | EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); | ||
1009 | |||
1010 | /* >= 32 */ | ||
1011 | if (jmp_label1 == -1) | ||
1012 | jmp_label1 = cnt; | ||
1013 | /* cmp ecx,64 */ | ||
1014 | EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64); | ||
1015 | /* Jumps when >= 64 */ | ||
1016 | if (is_imm8(jmp_label(jmp_label2, 2))) | ||
1017 | EMIT2(IA32_JAE, jmp_label(jmp_label2, 2)); | ||
1018 | else | ||
1019 | EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6)); | ||
1020 | |||
1021 | /* >= 32 && < 64 */ | ||
1022 | /* sub ecx,32 */ | ||
1023 | EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32); | ||
1024 | /* shr dreg_hi,cl */ | ||
1025 | EMIT2(0xD3, add_1reg(0xE8, dreg_hi)); | ||
1026 | /* mov dreg_lo,dreg_hi */ | ||
1027 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
1028 | /* xor dreg_hi,dreg_hi */ | ||
1029 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
1030 | |||
1031 | /* goto out; */ | ||
1032 | if (is_imm8(jmp_label(jmp_label3, 2))) | ||
1033 | EMIT2(0xEB, jmp_label(jmp_label3, 2)); | ||
1034 | else | ||
1035 | EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); | ||
1036 | |||
1037 | /* >= 64 */ | ||
1038 | if (jmp_label2 == -1) | ||
1039 | jmp_label2 = cnt; | ||
1040 | /* xor dreg_lo,dreg_lo */ | ||
1041 | EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
1042 | /* xor dreg_hi,dreg_hi */ | ||
1043 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
1044 | |||
1045 | if (jmp_label3 == -1) | ||
1046 | jmp_label3 = cnt; | ||
1047 | |||
1048 | if (dstk) { | ||
1049 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
1050 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
1051 | STACK_VAR(dst_lo)); | ||
1052 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
1053 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
1054 | STACK_VAR(dst_hi)); | ||
1055 | } | ||
1056 | /* out: */ | ||
1057 | *pprog = prog; | ||
1058 | } | ||
1059 | |||
1060 | /* dst = dst << val */ | ||
1061 | static inline void emit_ia32_lsh_i64(const u8 dst[], const u32 val, | ||
1062 | bool dstk, u8 **pprog) | ||
1063 | { | ||
1064 | u8 *prog = *pprog; | ||
1065 | int cnt = 0; | ||
1066 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
1067 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
1068 | |||
1069 | if (dstk) { | ||
1070 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
1071 | STACK_VAR(dst_lo)); | ||
1072 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
1073 | STACK_VAR(dst_hi)); | ||
1074 | } | ||
1075 | /* Do LSH operation */ | ||
1076 | if (val < 32) { | ||
1077 | /* shl dreg_hi,imm8 */ | ||
1078 | EMIT3(0xC1, add_1reg(0xE0, dreg_hi), val); | ||
1079 | /* mov ebx,dreg_lo */ | ||
1080 | EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX)); | ||
1081 | /* shl dreg_lo,imm8 */ | ||
1082 | EMIT3(0xC1, add_1reg(0xE0, dreg_lo), val); | ||
1083 | |||
1084 | /* IA32_ECX = 32 - val */ | ||
1085 | /* mov ecx,val */ | ||
1086 | EMIT2(0xB1, val); | ||
1087 | /* movzx ecx,ecx */ | ||
1088 | EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX)); | ||
1089 | /* neg ecx */ | ||
1090 | EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); | ||
1091 | /* add ecx,32 */ | ||
1092 | EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); | ||
1093 | |||
1094 | /* shr ebx,cl */ | ||
1095 | EMIT2(0xD3, add_1reg(0xE8, IA32_EBX)); | ||
1096 | /* or dreg_hi,ebx */ | ||
1097 | EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX)); | ||
1098 | } else if (val >= 32 && val < 64) { | ||
1099 | u32 value = val - 32; | ||
1100 | |||
1101 | /* shl dreg_lo,imm8 */ | ||
1102 | EMIT3(0xC1, add_1reg(0xE0, dreg_lo), value); | ||
1103 | /* mov dreg_hi,dreg_lo */ | ||
1104 | EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo)); | ||
1105 | /* xor dreg_lo,dreg_lo */ | ||
1106 | EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
1107 | } else { | ||
1108 | /* xor dreg_lo,dreg_lo */ | ||
1109 | EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
1110 | /* xor dreg_hi,dreg_hi */ | ||
1111 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
1112 | } | ||
1113 | |||
1114 | if (dstk) { | ||
1115 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
1116 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
1117 | STACK_VAR(dst_lo)); | ||
1118 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
1119 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
1120 | STACK_VAR(dst_hi)); | ||
1121 | } | ||
1122 | *pprog = prog; | ||
1123 | } | ||
1124 | |||
1125 | /* dst = dst >> val */ | ||
1126 | static inline void emit_ia32_rsh_i64(const u8 dst[], const u32 val, | ||
1127 | bool dstk, u8 **pprog) | ||
1128 | { | ||
1129 | u8 *prog = *pprog; | ||
1130 | int cnt = 0; | ||
1131 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
1132 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
1133 | |||
1134 | if (dstk) { | ||
1135 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
1136 | STACK_VAR(dst_lo)); | ||
1137 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
1138 | STACK_VAR(dst_hi)); | ||
1139 | } | ||
1140 | |||
1141 | /* Do RSH operation */ | ||
1142 | if (val < 32) { | ||
1143 | /* shr dreg_lo,imm8 */ | ||
1144 | EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val); | ||
1145 | /* mov ebx,dreg_hi */ | ||
1146 | EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX)); | ||
1147 | /* shr dreg_hi,imm8 */ | ||
1148 | EMIT3(0xC1, add_1reg(0xE8, dreg_hi), val); | ||
1149 | |||
1150 | /* IA32_ECX = 32 - val */ | ||
1151 | /* mov ecx,val */ | ||
1152 | EMIT2(0xB1, val); | ||
1153 | /* movzx ecx,ecx */ | ||
1154 | EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX)); | ||
1155 | /* neg ecx */ | ||
1156 | EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); | ||
1157 | /* add ecx,32 */ | ||
1158 | EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); | ||
1159 | |||
1160 | /* shl ebx,cl */ | ||
1161 | EMIT2(0xD3, add_1reg(0xE0, IA32_EBX)); | ||
1162 | /* or dreg_lo,ebx */ | ||
1163 | EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX)); | ||
1164 | } else if (val >= 32 && val < 64) { | ||
1165 | u32 value = val - 32; | ||
1166 | |||
1167 | /* shr dreg_hi,imm8 */ | ||
1168 | EMIT3(0xC1, add_1reg(0xE8, dreg_hi), value); | ||
1169 | /* mov dreg_lo,dreg_hi */ | ||
1170 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
1171 | /* xor dreg_hi,dreg_hi */ | ||
1172 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
1173 | } else { | ||
1174 | /* xor dreg_lo,dreg_lo */ | ||
1175 | EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
1176 | /* xor dreg_hi,dreg_hi */ | ||
1177 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
1178 | } | ||
1179 | |||
1180 | if (dstk) { | ||
1181 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
1182 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
1183 | STACK_VAR(dst_lo)); | ||
1184 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
1185 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
1186 | STACK_VAR(dst_hi)); | ||
1187 | } | ||
1188 | *pprog = prog; | ||
1189 | } | ||
1190 | |||
1191 | /* dst = dst >> val (signed) */ | ||
1192 | static inline void emit_ia32_arsh_i64(const u8 dst[], const u32 val, | ||
1193 | bool dstk, u8 **pprog) | ||
1194 | { | ||
1195 | u8 *prog = *pprog; | ||
1196 | int cnt = 0; | ||
1197 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
1198 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
1199 | |||
1200 | if (dstk) { | ||
1201 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
1202 | STACK_VAR(dst_lo)); | ||
1203 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
1204 | STACK_VAR(dst_hi)); | ||
1205 | } | ||
1206 | /* Do RSH operation */ | ||
1207 | if (val < 32) { | ||
1208 | /* shr dreg_lo,imm8 */ | ||
1209 | EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val); | ||
1210 | /* mov ebx,dreg_hi */ | ||
1211 | EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX)); | ||
1212 | /* ashr dreg_hi,imm8 */ | ||
1213 | EMIT3(0xC1, add_1reg(0xF8, dreg_hi), val); | ||
1214 | |||
1215 | /* IA32_ECX = 32 - val */ | ||
1216 | /* mov ecx,val */ | ||
1217 | EMIT2(0xB1, val); | ||
1218 | /* movzx ecx,ecx */ | ||
1219 | EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX)); | ||
1220 | /* neg ecx */ | ||
1221 | EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); | ||
1222 | /* add ecx,32 */ | ||
1223 | EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); | ||
1224 | |||
1225 | /* shl ebx,cl */ | ||
1226 | EMIT2(0xD3, add_1reg(0xE0, IA32_EBX)); | ||
1227 | /* or dreg_lo,ebx */ | ||
1228 | EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX)); | ||
1229 | } else if (val >= 32 && val < 64) { | ||
1230 | u32 value = val - 32; | ||
1231 | |||
1232 | /* ashr dreg_hi,imm8 */ | ||
1233 | EMIT3(0xC1, add_1reg(0xF8, dreg_hi), value); | ||
1234 | /* mov dreg_lo,dreg_hi */ | ||
1235 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
1236 | |||
1237 | /* ashr dreg_hi,imm8 */ | ||
1238 | EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31); | ||
1239 | } else { | ||
1240 | /* ashr dreg_hi,imm8 */ | ||
1241 | EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31); | ||
1242 | /* mov dreg_lo,dreg_hi */ | ||
1243 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
1244 | } | ||
1245 | |||
1246 | if (dstk) { | ||
1247 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
1248 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
1249 | STACK_VAR(dst_lo)); | ||
1250 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
1251 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
1252 | STACK_VAR(dst_hi)); | ||
1253 | } | ||
1254 | *pprog = prog; | ||
1255 | } | ||
1256 | |||
1257 | static inline void emit_ia32_mul_r64(const u8 dst[], const u8 src[], bool dstk, | ||
1258 | bool sstk, u8 **pprog) | ||
1259 | { | ||
1260 | u8 *prog = *pprog; | ||
1261 | int cnt = 0; | ||
1262 | |||
1263 | if (dstk) | ||
1264 | /* mov eax,dword ptr [ebp+off] */ | ||
1265 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
1266 | STACK_VAR(dst_hi)); | ||
1267 | else | ||
1268 | /* mov eax,dst_hi */ | ||
1269 | EMIT2(0x8B, add_2reg(0xC0, dst_hi, IA32_EAX)); | ||
1270 | |||
1271 | if (sstk) | ||
1272 | /* mul dword ptr [ebp+off] */ | ||
1273 | EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo)); | ||
1274 | else | ||
1275 | /* mul src_lo */ | ||
1276 | EMIT2(0xF7, add_1reg(0xE0, src_lo)); | ||
1277 | |||
1278 | /* mov ecx,eax */ | ||
1279 | EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX)); | ||
1280 | |||
1281 | if (dstk) | ||
1282 | /* mov eax,dword ptr [ebp+off] */ | ||
1283 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
1284 | STACK_VAR(dst_lo)); | ||
1285 | else | ||
1286 | /* mov eax,dst_lo */ | ||
1287 | EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX)); | ||
1288 | |||
1289 | if (sstk) | ||
1290 | /* mul dword ptr [ebp+off] */ | ||
1291 | EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_hi)); | ||
1292 | else | ||
1293 | /* mul src_hi */ | ||
1294 | EMIT2(0xF7, add_1reg(0xE0, src_hi)); | ||
1295 | |||
1296 | /* add eax,eax */ | ||
1297 | EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX)); | ||
1298 | |||
1299 | if (dstk) | ||
1300 | /* mov eax,dword ptr [ebp+off] */ | ||
1301 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
1302 | STACK_VAR(dst_lo)); | ||
1303 | else | ||
1304 | /* mov eax,dst_lo */ | ||
1305 | EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX)); | ||
1306 | |||
1307 | if (sstk) | ||
1308 | /* mul dword ptr [ebp+off] */ | ||
1309 | EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo)); | ||
1310 | else | ||
1311 | /* mul src_lo */ | ||
1312 | EMIT2(0xF7, add_1reg(0xE0, src_lo)); | ||
1313 | |||
1314 | /* add ecx,edx */ | ||
1315 | EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX)); | ||
1316 | |||
1317 | if (dstk) { | ||
1318 | /* mov dword ptr [ebp+off],eax */ | ||
1319 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
1320 | STACK_VAR(dst_lo)); | ||
1321 | /* mov dword ptr [ebp+off],ecx */ | ||
1322 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
1323 | STACK_VAR(dst_hi)); | ||
1324 | } else { | ||
1325 | /* mov dst_lo,eax */ | ||
1326 | EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX)); | ||
1327 | /* mov dst_hi,ecx */ | ||
1328 | EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX)); | ||
1329 | } | ||
1330 | |||
1331 | *pprog = prog; | ||
1332 | } | ||
1333 | |||
1334 | static inline void emit_ia32_mul_i64(const u8 dst[], const u32 val, | ||
1335 | bool dstk, u8 **pprog) | ||
1336 | { | ||
1337 | u8 *prog = *pprog; | ||
1338 | int cnt = 0; | ||
1339 | u32 hi; | ||
1340 | |||
1341 | hi = val & (1<<31) ? (u32)~0 : 0; | ||
1342 | /* movl eax,imm32 */ | ||
1343 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val); | ||
1344 | if (dstk) | ||
1345 | /* mul dword ptr [ebp+off] */ | ||
1346 | EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_hi)); | ||
1347 | else | ||
1348 | /* mul dst_hi */ | ||
1349 | EMIT2(0xF7, add_1reg(0xE0, dst_hi)); | ||
1350 | |||
1351 | /* mov ecx,eax */ | ||
1352 | EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX)); | ||
1353 | |||
1354 | /* movl eax,imm32 */ | ||
1355 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), hi); | ||
1356 | if (dstk) | ||
1357 | /* mul dword ptr [ebp+off] */ | ||
1358 | EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo)); | ||
1359 | else | ||
1360 | /* mul dst_lo */ | ||
1361 | EMIT2(0xF7, add_1reg(0xE0, dst_lo)); | ||
1362 | /* add ecx,eax */ | ||
1363 | EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX)); | ||
1364 | |||
1365 | /* movl eax,imm32 */ | ||
1366 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val); | ||
1367 | if (dstk) | ||
1368 | /* mul dword ptr [ebp+off] */ | ||
1369 | EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo)); | ||
1370 | else | ||
1371 | /* mul dst_lo */ | ||
1372 | EMIT2(0xF7, add_1reg(0xE0, dst_lo)); | ||
1373 | |||
1374 | /* add ecx,edx */ | ||
1375 | EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX)); | ||
1376 | |||
1377 | if (dstk) { | ||
1378 | /* mov dword ptr [ebp+off],eax */ | ||
1379 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
1380 | STACK_VAR(dst_lo)); | ||
1381 | /* mov dword ptr [ebp+off],ecx */ | ||
1382 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
1383 | STACK_VAR(dst_hi)); | ||
1384 | } else { | ||
1385 | /* mov dword ptr [ebp+off],eax */ | ||
1386 | EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX)); | ||
1387 | /* mov dword ptr [ebp+off],ecx */ | ||
1388 | EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX)); | ||
1389 | } | ||
1390 | |||
1391 | *pprog = prog; | ||
1392 | } | ||
1393 | |||
1394 | static int bpf_size_to_x86_bytes(int bpf_size) | ||
1395 | { | ||
1396 | if (bpf_size == BPF_W) | ||
1397 | return 4; | ||
1398 | else if (bpf_size == BPF_H) | ||
1399 | return 2; | ||
1400 | else if (bpf_size == BPF_B) | ||
1401 | return 1; | ||
1402 | else if (bpf_size == BPF_DW) | ||
1403 | return 4; /* imm32 */ | ||
1404 | else | ||
1405 | return 0; | ||
1406 | } | ||
1407 | |||
1408 | struct jit_context { | ||
1409 | int cleanup_addr; /* Epilogue code offset */ | ||
1410 | }; | ||
1411 | |||
1412 | /* Maximum number of bytes emitted while JITing one eBPF insn */ | ||
1413 | #define BPF_MAX_INSN_SIZE 128 | ||
1414 | #define BPF_INSN_SAFETY 64 | ||
1415 | |||
1416 | #define PROLOGUE_SIZE 35 | ||
1417 | |||
1418 | /* | ||
1419 | * Emit prologue code for BPF program and check it's size. | ||
1420 | * bpf_tail_call helper will skip it while jumping into another program. | ||
1421 | */ | ||
1422 | static void emit_prologue(u8 **pprog, u32 stack_depth) | ||
1423 | { | ||
1424 | u8 *prog = *pprog; | ||
1425 | int cnt = 0; | ||
1426 | const u8 *r1 = bpf2ia32[BPF_REG_1]; | ||
1427 | const u8 fplo = bpf2ia32[BPF_REG_FP][0]; | ||
1428 | const u8 fphi = bpf2ia32[BPF_REG_FP][1]; | ||
1429 | const u8 *tcc = bpf2ia32[TCALL_CNT]; | ||
1430 | |||
1431 | /* push ebp */ | ||
1432 | EMIT1(0x55); | ||
1433 | /* mov ebp,esp */ | ||
1434 | EMIT2(0x89, 0xE5); | ||
1435 | /* push edi */ | ||
1436 | EMIT1(0x57); | ||
1437 | /* push esi */ | ||
1438 | EMIT1(0x56); | ||
1439 | /* push ebx */ | ||
1440 | EMIT1(0x53); | ||
1441 | |||
1442 | /* sub esp,STACK_SIZE */ | ||
1443 | EMIT2_off32(0x81, 0xEC, STACK_SIZE); | ||
1444 | /* sub ebp,SCRATCH_SIZE+4+12*/ | ||
1445 | EMIT3(0x83, add_1reg(0xE8, IA32_EBP), SCRATCH_SIZE + 16); | ||
1446 | /* xor ebx,ebx */ | ||
1447 | EMIT2(0x31, add_2reg(0xC0, IA32_EBX, IA32_EBX)); | ||
1448 | |||
1449 | /* Set up BPF prog stack base register */ | ||
1450 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBP), STACK_VAR(fplo)); | ||
1451 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(fphi)); | ||
1452 | |||
1453 | /* Move BPF_CTX (EAX) to BPF_REG_R1 */ | ||
1454 | /* mov dword ptr [ebp+off],eax */ | ||
1455 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0])); | ||
1456 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(r1[1])); | ||
1457 | |||
1458 | /* Initialize Tail Count */ | ||
1459 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[0])); | ||
1460 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1])); | ||
1461 | |||
1462 | BUILD_BUG_ON(cnt != PROLOGUE_SIZE); | ||
1463 | *pprog = prog; | ||
1464 | } | ||
1465 | |||
1466 | /* Emit epilogue code for BPF program */ | ||
1467 | static void emit_epilogue(u8 **pprog, u32 stack_depth) | ||
1468 | { | ||
1469 | u8 *prog = *pprog; | ||
1470 | const u8 *r0 = bpf2ia32[BPF_REG_0]; | ||
1471 | int cnt = 0; | ||
1472 | |||
1473 | /* mov eax,dword ptr [ebp+off]*/ | ||
1474 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r0[0])); | ||
1475 | /* mov edx,dword ptr [ebp+off]*/ | ||
1476 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r0[1])); | ||
1477 | |||
1478 | /* add ebp,SCRATCH_SIZE+4+12*/ | ||
1479 | EMIT3(0x83, add_1reg(0xC0, IA32_EBP), SCRATCH_SIZE + 16); | ||
1480 | |||
1481 | /* mov ebx,dword ptr [ebp-12]*/ | ||
1482 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), -12); | ||
1483 | /* mov esi,dword ptr [ebp-8]*/ | ||
1484 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ESI), -8); | ||
1485 | /* mov edi,dword ptr [ebp-4]*/ | ||
1486 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDI), -4); | ||
1487 | |||
1488 | EMIT1(0xC9); /* leave */ | ||
1489 | EMIT1(0xC3); /* ret */ | ||
1490 | *pprog = prog; | ||
1491 | } | ||
1492 | |||
1493 | /* | ||
1494 | * Generate the following code: | ||
1495 | * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... | ||
1496 | * if (index >= array->map.max_entries) | ||
1497 | * goto out; | ||
1498 | * if (++tail_call_cnt > MAX_TAIL_CALL_CNT) | ||
1499 | * goto out; | ||
1500 | * prog = array->ptrs[index]; | ||
1501 | * if (prog == NULL) | ||
1502 | * goto out; | ||
1503 | * goto *(prog->bpf_func + prologue_size); | ||
1504 | * out: | ||
1505 | */ | ||
1506 | static void emit_bpf_tail_call(u8 **pprog) | ||
1507 | { | ||
1508 | u8 *prog = *pprog; | ||
1509 | int cnt = 0; | ||
1510 | const u8 *r1 = bpf2ia32[BPF_REG_1]; | ||
1511 | const u8 *r2 = bpf2ia32[BPF_REG_2]; | ||
1512 | const u8 *r3 = bpf2ia32[BPF_REG_3]; | ||
1513 | const u8 *tcc = bpf2ia32[TCALL_CNT]; | ||
1514 | u32 lo, hi; | ||
1515 | static int jmp_label1 = -1; | ||
1516 | |||
1517 | /* | ||
1518 | * if (index >= array->map.max_entries) | ||
1519 | * goto out; | ||
1520 | */ | ||
1521 | /* mov eax,dword ptr [ebp+off] */ | ||
1522 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r2[0])); | ||
1523 | /* mov edx,dword ptr [ebp+off] */ | ||
1524 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r3[0])); | ||
1525 | |||
1526 | /* cmp dword ptr [eax+off],edx */ | ||
1527 | EMIT3(0x39, add_2reg(0x40, IA32_EAX, IA32_EDX), | ||
1528 | offsetof(struct bpf_array, map.max_entries)); | ||
1529 | /* jbe out */ | ||
1530 | EMIT2(IA32_JBE, jmp_label(jmp_label1, 2)); | ||
1531 | |||
1532 | /* | ||
1533 | * if (tail_call_cnt > MAX_TAIL_CALL_CNT) | ||
1534 | * goto out; | ||
1535 | */ | ||
1536 | lo = (u32)MAX_TAIL_CALL_CNT; | ||
1537 | hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32); | ||
1538 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0])); | ||
1539 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1])); | ||
1540 | |||
1541 | /* cmp edx,hi */ | ||
1542 | EMIT3(0x83, add_1reg(0xF8, IA32_EBX), hi); | ||
1543 | EMIT2(IA32_JNE, 3); | ||
1544 | /* cmp ecx,lo */ | ||
1545 | EMIT3(0x83, add_1reg(0xF8, IA32_ECX), lo); | ||
1546 | |||
1547 | /* ja out */ | ||
1548 | EMIT2(IA32_JAE, jmp_label(jmp_label1, 2)); | ||
1549 | |||
1550 | /* add eax,0x1 */ | ||
1551 | EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 0x01); | ||
1552 | /* adc ebx,0x0 */ | ||
1553 | EMIT3(0x83, add_1reg(0xD0, IA32_EBX), 0x00); | ||
1554 | |||
1555 | /* mov dword ptr [ebp+off],eax */ | ||
1556 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0])); | ||
1557 | /* mov dword ptr [ebp+off],edx */ | ||
1558 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1])); | ||
1559 | |||
1560 | /* prog = array->ptrs[index]; */ | ||
1561 | /* mov edx, [eax + edx * 4 + offsetof(...)] */ | ||
1562 | EMIT3_off32(0x8B, 0x94, 0x90, offsetof(struct bpf_array, ptrs)); | ||
1563 | |||
1564 | /* | ||
1565 | * if (prog == NULL) | ||
1566 | * goto out; | ||
1567 | */ | ||
1568 | /* test edx,edx */ | ||
1569 | EMIT2(0x85, add_2reg(0xC0, IA32_EDX, IA32_EDX)); | ||
1570 | /* je out */ | ||
1571 | EMIT2(IA32_JE, jmp_label(jmp_label1, 2)); | ||
1572 | |||
1573 | /* goto *(prog->bpf_func + prologue_size); */ | ||
1574 | /* mov edx, dword ptr [edx + 32] */ | ||
1575 | EMIT3(0x8B, add_2reg(0x40, IA32_EDX, IA32_EDX), | ||
1576 | offsetof(struct bpf_prog, bpf_func)); | ||
1577 | /* add edx,prologue_size */ | ||
1578 | EMIT3(0x83, add_1reg(0xC0, IA32_EDX), PROLOGUE_SIZE); | ||
1579 | |||
1580 | /* mov eax,dword ptr [ebp+off] */ | ||
1581 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0])); | ||
1582 | |||
1583 | /* | ||
1584 | * Now we're ready to jump into next BPF program: | ||
1585 | * eax == ctx (1st arg) | ||
1586 | * edx == prog->bpf_func + prologue_size | ||
1587 | */ | ||
1588 | RETPOLINE_EDX_BPF_JIT(); | ||
1589 | |||
1590 | if (jmp_label1 == -1) | ||
1591 | jmp_label1 = cnt; | ||
1592 | |||
1593 | /* out: */ | ||
1594 | *pprog = prog; | ||
1595 | } | ||
1596 | |||
1597 | /* Push the scratch stack register on top of the stack. */ | ||
1598 | static inline void emit_push_r64(const u8 src[], u8 **pprog) | ||
1599 | { | ||
1600 | u8 *prog = *pprog; | ||
1601 | int cnt = 0; | ||
1602 | |||
1603 | /* mov ecx,dword ptr [ebp+off] */ | ||
1604 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_hi)); | ||
1605 | /* push ecx */ | ||
1606 | EMIT1(0x51); | ||
1607 | |||
1608 | /* mov ecx,dword ptr [ebp+off] */ | ||
1609 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo)); | ||
1610 | /* push ecx */ | ||
1611 | EMIT1(0x51); | ||
1612 | |||
1613 | *pprog = prog; | ||
1614 | } | ||
1615 | |||
1616 | static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | ||
1617 | int oldproglen, struct jit_context *ctx) | ||
1618 | { | ||
1619 | struct bpf_insn *insn = bpf_prog->insnsi; | ||
1620 | int insn_cnt = bpf_prog->len; | ||
1621 | bool seen_exit = false; | ||
1622 | u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; | ||
1623 | int i, cnt = 0; | ||
1624 | int proglen = 0; | ||
1625 | u8 *prog = temp; | ||
1626 | |||
1627 | emit_prologue(&prog, bpf_prog->aux->stack_depth); | ||
1628 | |||
1629 | for (i = 0; i < insn_cnt; i++, insn++) { | ||
1630 | const s32 imm32 = insn->imm; | ||
1631 | const bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; | ||
1632 | const bool dstk = insn->dst_reg == BPF_REG_AX ? false : true; | ||
1633 | const bool sstk = insn->src_reg == BPF_REG_AX ? false : true; | ||
1634 | const u8 code = insn->code; | ||
1635 | const u8 *dst = bpf2ia32[insn->dst_reg]; | ||
1636 | const u8 *src = bpf2ia32[insn->src_reg]; | ||
1637 | const u8 *r0 = bpf2ia32[BPF_REG_0]; | ||
1638 | s64 jmp_offset; | ||
1639 | u8 jmp_cond; | ||
1640 | int ilen; | ||
1641 | u8 *func; | ||
1642 | |||
1643 | switch (code) { | ||
1644 | /* ALU operations */ | ||
1645 | /* dst = src */ | ||
1646 | case BPF_ALU | BPF_MOV | BPF_K: | ||
1647 | case BPF_ALU | BPF_MOV | BPF_X: | ||
1648 | case BPF_ALU64 | BPF_MOV | BPF_K: | ||
1649 | case BPF_ALU64 | BPF_MOV | BPF_X: | ||
1650 | switch (BPF_SRC(code)) { | ||
1651 | case BPF_X: | ||
1652 | emit_ia32_mov_r64(is64, dst, src, dstk, | ||
1653 | sstk, &prog); | ||
1654 | break; | ||
1655 | case BPF_K: | ||
1656 | /* Sign-extend immediate value to dst reg */ | ||
1657 | emit_ia32_mov_i64(is64, dst, imm32, | ||
1658 | dstk, &prog); | ||
1659 | break; | ||
1660 | } | ||
1661 | break; | ||
1662 | /* dst = dst + src/imm */ | ||
1663 | /* dst = dst - src/imm */ | ||
1664 | /* dst = dst | src/imm */ | ||
1665 | /* dst = dst & src/imm */ | ||
1666 | /* dst = dst ^ src/imm */ | ||
1667 | /* dst = dst * src/imm */ | ||
1668 | /* dst = dst << src */ | ||
1669 | /* dst = dst >> src */ | ||
1670 | case BPF_ALU | BPF_ADD | BPF_K: | ||
1671 | case BPF_ALU | BPF_ADD | BPF_X: | ||
1672 | case BPF_ALU | BPF_SUB | BPF_K: | ||
1673 | case BPF_ALU | BPF_SUB | BPF_X: | ||
1674 | case BPF_ALU | BPF_OR | BPF_K: | ||
1675 | case BPF_ALU | BPF_OR | BPF_X: | ||
1676 | case BPF_ALU | BPF_AND | BPF_K: | ||
1677 | case BPF_ALU | BPF_AND | BPF_X: | ||
1678 | case BPF_ALU | BPF_XOR | BPF_K: | ||
1679 | case BPF_ALU | BPF_XOR | BPF_X: | ||
1680 | case BPF_ALU64 | BPF_ADD | BPF_K: | ||
1681 | case BPF_ALU64 | BPF_ADD | BPF_X: | ||
1682 | case BPF_ALU64 | BPF_SUB | BPF_K: | ||
1683 | case BPF_ALU64 | BPF_SUB | BPF_X: | ||
1684 | case BPF_ALU64 | BPF_OR | BPF_K: | ||
1685 | case BPF_ALU64 | BPF_OR | BPF_X: | ||
1686 | case BPF_ALU64 | BPF_AND | BPF_K: | ||
1687 | case BPF_ALU64 | BPF_AND | BPF_X: | ||
1688 | case BPF_ALU64 | BPF_XOR | BPF_K: | ||
1689 | case BPF_ALU64 | BPF_XOR | BPF_X: | ||
1690 | switch (BPF_SRC(code)) { | ||
1691 | case BPF_X: | ||
1692 | emit_ia32_alu_r64(is64, BPF_OP(code), dst, | ||
1693 | src, dstk, sstk, &prog); | ||
1694 | break; | ||
1695 | case BPF_K: | ||
1696 | emit_ia32_alu_i64(is64, BPF_OP(code), dst, | ||
1697 | imm32, dstk, &prog); | ||
1698 | break; | ||
1699 | } | ||
1700 | break; | ||
1701 | case BPF_ALU | BPF_MUL | BPF_K: | ||
1702 | case BPF_ALU | BPF_MUL | BPF_X: | ||
1703 | switch (BPF_SRC(code)) { | ||
1704 | case BPF_X: | ||
1705 | emit_ia32_mul_r(dst_lo, src_lo, dstk, | ||
1706 | sstk, &prog); | ||
1707 | break; | ||
1708 | case BPF_K: | ||
1709 | /* mov ecx,imm32*/ | ||
1710 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), | ||
1711 | imm32); | ||
1712 | emit_ia32_mul_r(dst_lo, IA32_ECX, dstk, | ||
1713 | false, &prog); | ||
1714 | break; | ||
1715 | } | ||
1716 | emit_ia32_mov_i(dst_hi, 0, dstk, &prog); | ||
1717 | break; | ||
1718 | case BPF_ALU | BPF_LSH | BPF_X: | ||
1719 | case BPF_ALU | BPF_RSH | BPF_X: | ||
1720 | case BPF_ALU | BPF_ARSH | BPF_K: | ||
1721 | case BPF_ALU | BPF_ARSH | BPF_X: | ||
1722 | switch (BPF_SRC(code)) { | ||
1723 | case BPF_X: | ||
1724 | emit_ia32_shift_r(BPF_OP(code), dst_lo, src_lo, | ||
1725 | dstk, sstk, &prog); | ||
1726 | break; | ||
1727 | case BPF_K: | ||
1728 | /* mov ecx,imm32*/ | ||
1729 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), | ||
1730 | imm32); | ||
1731 | emit_ia32_shift_r(BPF_OP(code), dst_lo, | ||
1732 | IA32_ECX, dstk, false, | ||
1733 | &prog); | ||
1734 | break; | ||
1735 | } | ||
1736 | emit_ia32_mov_i(dst_hi, 0, dstk, &prog); | ||
1737 | break; | ||
1738 | /* dst = dst / src(imm) */ | ||
1739 | /* dst = dst % src(imm) */ | ||
1740 | case BPF_ALU | BPF_DIV | BPF_K: | ||
1741 | case BPF_ALU | BPF_DIV | BPF_X: | ||
1742 | case BPF_ALU | BPF_MOD | BPF_K: | ||
1743 | case BPF_ALU | BPF_MOD | BPF_X: | ||
1744 | switch (BPF_SRC(code)) { | ||
1745 | case BPF_X: | ||
1746 | emit_ia32_div_mod_r(BPF_OP(code), dst_lo, | ||
1747 | src_lo, dstk, sstk, &prog); | ||
1748 | break; | ||
1749 | case BPF_K: | ||
1750 | /* mov ecx,imm32*/ | ||
1751 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), | ||
1752 | imm32); | ||
1753 | emit_ia32_div_mod_r(BPF_OP(code), dst_lo, | ||
1754 | IA32_ECX, dstk, false, | ||
1755 | &prog); | ||
1756 | break; | ||
1757 | } | ||
1758 | emit_ia32_mov_i(dst_hi, 0, dstk, &prog); | ||
1759 | break; | ||
1760 | case BPF_ALU64 | BPF_DIV | BPF_K: | ||
1761 | case BPF_ALU64 | BPF_DIV | BPF_X: | ||
1762 | case BPF_ALU64 | BPF_MOD | BPF_K: | ||
1763 | case BPF_ALU64 | BPF_MOD | BPF_X: | ||
1764 | goto notyet; | ||
1765 | /* dst = dst >> imm */ | ||
1766 | /* dst = dst << imm */ | ||
1767 | case BPF_ALU | BPF_RSH | BPF_K: | ||
1768 | case BPF_ALU | BPF_LSH | BPF_K: | ||
1769 | if (unlikely(imm32 > 31)) | ||
1770 | return -EINVAL; | ||
1771 | /* mov ecx,imm32*/ | ||
1772 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32); | ||
1773 | emit_ia32_shift_r(BPF_OP(code), dst_lo, IA32_ECX, dstk, | ||
1774 | false, &prog); | ||
1775 | emit_ia32_mov_i(dst_hi, 0, dstk, &prog); | ||
1776 | break; | ||
1777 | /* dst = dst << imm */ | ||
1778 | case BPF_ALU64 | BPF_LSH | BPF_K: | ||
1779 | if (unlikely(imm32 > 63)) | ||
1780 | return -EINVAL; | ||
1781 | emit_ia32_lsh_i64(dst, imm32, dstk, &prog); | ||
1782 | break; | ||
1783 | /* dst = dst >> imm */ | ||
1784 | case BPF_ALU64 | BPF_RSH | BPF_K: | ||
1785 | if (unlikely(imm32 > 63)) | ||
1786 | return -EINVAL; | ||
1787 | emit_ia32_rsh_i64(dst, imm32, dstk, &prog); | ||
1788 | break; | ||
1789 | /* dst = dst << src */ | ||
1790 | case BPF_ALU64 | BPF_LSH | BPF_X: | ||
1791 | emit_ia32_lsh_r64(dst, src, dstk, sstk, &prog); | ||
1792 | break; | ||
1793 | /* dst = dst >> src */ | ||
1794 | case BPF_ALU64 | BPF_RSH | BPF_X: | ||
1795 | emit_ia32_rsh_r64(dst, src, dstk, sstk, &prog); | ||
1796 | break; | ||
1797 | /* dst = dst >> src (signed) */ | ||
1798 | case BPF_ALU64 | BPF_ARSH | BPF_X: | ||
1799 | emit_ia32_arsh_r64(dst, src, dstk, sstk, &prog); | ||
1800 | break; | ||
1801 | /* dst = dst >> imm (signed) */ | ||
1802 | case BPF_ALU64 | BPF_ARSH | BPF_K: | ||
1803 | if (unlikely(imm32 > 63)) | ||
1804 | return -EINVAL; | ||
1805 | emit_ia32_arsh_i64(dst, imm32, dstk, &prog); | ||
1806 | break; | ||
1807 | /* dst = ~dst */ | ||
1808 | case BPF_ALU | BPF_NEG: | ||
1809 | emit_ia32_alu_i(is64, false, BPF_OP(code), | ||
1810 | dst_lo, 0, dstk, &prog); | ||
1811 | emit_ia32_mov_i(dst_hi, 0, dstk, &prog); | ||
1812 | break; | ||
1813 | /* dst = ~dst (64 bit) */ | ||
1814 | case BPF_ALU64 | BPF_NEG: | ||
1815 | emit_ia32_neg64(dst, dstk, &prog); | ||
1816 | break; | ||
1817 | /* dst = dst * src/imm */ | ||
1818 | case BPF_ALU64 | BPF_MUL | BPF_X: | ||
1819 | case BPF_ALU64 | BPF_MUL | BPF_K: | ||
1820 | switch (BPF_SRC(code)) { | ||
1821 | case BPF_X: | ||
1822 | emit_ia32_mul_r64(dst, src, dstk, sstk, &prog); | ||
1823 | break; | ||
1824 | case BPF_K: | ||
1825 | emit_ia32_mul_i64(dst, imm32, dstk, &prog); | ||
1826 | break; | ||
1827 | } | ||
1828 | break; | ||
1829 | /* dst = htole(dst) */ | ||
1830 | case BPF_ALU | BPF_END | BPF_FROM_LE: | ||
1831 | emit_ia32_to_le_r64(dst, imm32, dstk, &prog); | ||
1832 | break; | ||
1833 | /* dst = htobe(dst) */ | ||
1834 | case BPF_ALU | BPF_END | BPF_FROM_BE: | ||
1835 | emit_ia32_to_be_r64(dst, imm32, dstk, &prog); | ||
1836 | break; | ||
1837 | /* dst = imm64 */ | ||
1838 | case BPF_LD | BPF_IMM | BPF_DW: { | ||
1839 | s32 hi, lo = imm32; | ||
1840 | |||
1841 | hi = insn[1].imm; | ||
1842 | emit_ia32_mov_i(dst_lo, lo, dstk, &prog); | ||
1843 | emit_ia32_mov_i(dst_hi, hi, dstk, &prog); | ||
1844 | insn++; | ||
1845 | i++; | ||
1846 | break; | ||
1847 | } | ||
1848 | /* ST: *(u8*)(dst_reg + off) = imm */ | ||
1849 | case BPF_ST | BPF_MEM | BPF_H: | ||
1850 | case BPF_ST | BPF_MEM | BPF_B: | ||
1851 | case BPF_ST | BPF_MEM | BPF_W: | ||
1852 | case BPF_ST | BPF_MEM | BPF_DW: | ||
1853 | if (dstk) | ||
1854 | /* mov eax,dword ptr [ebp+off] */ | ||
1855 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
1856 | STACK_VAR(dst_lo)); | ||
1857 | else | ||
1858 | /* mov eax,dst_lo */ | ||
1859 | EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX)); | ||
1860 | |||
1861 | switch (BPF_SIZE(code)) { | ||
1862 | case BPF_B: | ||
1863 | EMIT(0xC6, 1); break; | ||
1864 | case BPF_H: | ||
1865 | EMIT2(0x66, 0xC7); break; | ||
1866 | case BPF_W: | ||
1867 | case BPF_DW: | ||
1868 | EMIT(0xC7, 1); break; | ||
1869 | } | ||
1870 | |||
1871 | if (is_imm8(insn->off)) | ||
1872 | EMIT2(add_1reg(0x40, IA32_EAX), insn->off); | ||
1873 | else | ||
1874 | EMIT1_off32(add_1reg(0x80, IA32_EAX), | ||
1875 | insn->off); | ||
1876 | EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(code))); | ||
1877 | |||
1878 | if (BPF_SIZE(code) == BPF_DW) { | ||
1879 | u32 hi; | ||
1880 | |||
1881 | hi = imm32 & (1<<31) ? (u32)~0 : 0; | ||
1882 | EMIT2_off32(0xC7, add_1reg(0x80, IA32_EAX), | ||
1883 | insn->off + 4); | ||
1884 | EMIT(hi, 4); | ||
1885 | } | ||
1886 | break; | ||
1887 | |||
1888 | /* STX: *(u8*)(dst_reg + off) = src_reg */ | ||
1889 | case BPF_STX | BPF_MEM | BPF_B: | ||
1890 | case BPF_STX | BPF_MEM | BPF_H: | ||
1891 | case BPF_STX | BPF_MEM | BPF_W: | ||
1892 | case BPF_STX | BPF_MEM | BPF_DW: | ||
1893 | if (dstk) | ||
1894 | /* mov eax,dword ptr [ebp+off] */ | ||
1895 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
1896 | STACK_VAR(dst_lo)); | ||
1897 | else | ||
1898 | /* mov eax,dst_lo */ | ||
1899 | EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX)); | ||
1900 | |||
1901 | if (sstk) | ||
1902 | /* mov edx,dword ptr [ebp+off] */ | ||
1903 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
1904 | STACK_VAR(src_lo)); | ||
1905 | else | ||
1906 | /* mov edx,src_lo */ | ||
1907 | EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EDX)); | ||
1908 | |||
1909 | switch (BPF_SIZE(code)) { | ||
1910 | case BPF_B: | ||
1911 | EMIT(0x88, 1); break; | ||
1912 | case BPF_H: | ||
1913 | EMIT2(0x66, 0x89); break; | ||
1914 | case BPF_W: | ||
1915 | case BPF_DW: | ||
1916 | EMIT(0x89, 1); break; | ||
1917 | } | ||
1918 | |||
1919 | if (is_imm8(insn->off)) | ||
1920 | EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX), | ||
1921 | insn->off); | ||
1922 | else | ||
1923 | EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX), | ||
1924 | insn->off); | ||
1925 | |||
1926 | if (BPF_SIZE(code) == BPF_DW) { | ||
1927 | if (sstk) | ||
1928 | /* mov edi,dword ptr [ebp+off] */ | ||
1929 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, | ||
1930 | IA32_EDX), | ||
1931 | STACK_VAR(src_hi)); | ||
1932 | else | ||
1933 | /* mov edi,src_hi */ | ||
1934 | EMIT2(0x8B, add_2reg(0xC0, src_hi, | ||
1935 | IA32_EDX)); | ||
1936 | EMIT1(0x89); | ||
1937 | if (is_imm8(insn->off + 4)) { | ||
1938 | EMIT2(add_2reg(0x40, IA32_EAX, | ||
1939 | IA32_EDX), | ||
1940 | insn->off + 4); | ||
1941 | } else { | ||
1942 | EMIT1(add_2reg(0x80, IA32_EAX, | ||
1943 | IA32_EDX)); | ||
1944 | EMIT(insn->off + 4, 4); | ||
1945 | } | ||
1946 | } | ||
1947 | break; | ||
1948 | |||
1949 | /* LDX: dst_reg = *(u8*)(src_reg + off) */ | ||
1950 | case BPF_LDX | BPF_MEM | BPF_B: | ||
1951 | case BPF_LDX | BPF_MEM | BPF_H: | ||
1952 | case BPF_LDX | BPF_MEM | BPF_W: | ||
1953 | case BPF_LDX | BPF_MEM | BPF_DW: | ||
1954 | if (sstk) | ||
1955 | /* mov eax,dword ptr [ebp+off] */ | ||
1956 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
1957 | STACK_VAR(src_lo)); | ||
1958 | else | ||
1959 | /* mov eax,dword ptr [ebp+off] */ | ||
1960 | EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EAX)); | ||
1961 | |||
1962 | switch (BPF_SIZE(code)) { | ||
1963 | case BPF_B: | ||
1964 | EMIT2(0x0F, 0xB6); break; | ||
1965 | case BPF_H: | ||
1966 | EMIT2(0x0F, 0xB7); break; | ||
1967 | case BPF_W: | ||
1968 | case BPF_DW: | ||
1969 | EMIT(0x8B, 1); break; | ||
1970 | } | ||
1971 | |||
1972 | if (is_imm8(insn->off)) | ||
1973 | EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX), | ||
1974 | insn->off); | ||
1975 | else | ||
1976 | EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX), | ||
1977 | insn->off); | ||
1978 | |||
1979 | if (dstk) | ||
1980 | /* mov dword ptr [ebp+off],edx */ | ||
1981 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
1982 | STACK_VAR(dst_lo)); | ||
1983 | else | ||
1984 | /* mov dst_lo,edx */ | ||
1985 | EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EDX)); | ||
1986 | switch (BPF_SIZE(code)) { | ||
1987 | case BPF_B: | ||
1988 | case BPF_H: | ||
1989 | case BPF_W: | ||
1990 | if (dstk) { | ||
1991 | EMIT3(0xC7, add_1reg(0x40, IA32_EBP), | ||
1992 | STACK_VAR(dst_hi)); | ||
1993 | EMIT(0x0, 4); | ||
1994 | } else { | ||
1995 | EMIT3(0xC7, add_1reg(0xC0, dst_hi), 0); | ||
1996 | } | ||
1997 | break; | ||
1998 | case BPF_DW: | ||
1999 | EMIT2_off32(0x8B, | ||
2000 | add_2reg(0x80, IA32_EAX, IA32_EDX), | ||
2001 | insn->off + 4); | ||
2002 | if (dstk) | ||
2003 | EMIT3(0x89, | ||
2004 | add_2reg(0x40, IA32_EBP, | ||
2005 | IA32_EDX), | ||
2006 | STACK_VAR(dst_hi)); | ||
2007 | else | ||
2008 | EMIT2(0x89, | ||
2009 | add_2reg(0xC0, dst_hi, IA32_EDX)); | ||
2010 | break; | ||
2011 | default: | ||
2012 | break; | ||
2013 | } | ||
2014 | break; | ||
2015 | /* call */ | ||
2016 | case BPF_JMP | BPF_CALL: | ||
2017 | { | ||
2018 | const u8 *r1 = bpf2ia32[BPF_REG_1]; | ||
2019 | const u8 *r2 = bpf2ia32[BPF_REG_2]; | ||
2020 | const u8 *r3 = bpf2ia32[BPF_REG_3]; | ||
2021 | const u8 *r4 = bpf2ia32[BPF_REG_4]; | ||
2022 | const u8 *r5 = bpf2ia32[BPF_REG_5]; | ||
2023 | |||
2024 | if (insn->src_reg == BPF_PSEUDO_CALL) | ||
2025 | goto notyet; | ||
2026 | |||
2027 | func = (u8 *) __bpf_call_base + imm32; | ||
2028 | jmp_offset = func - (image + addrs[i]); | ||
2029 | |||
2030 | if (!imm32 || !is_simm32(jmp_offset)) { | ||
2031 | pr_err("unsupported BPF func %d addr %p image %p\n", | ||
2032 | imm32, func, image); | ||
2033 | return -EINVAL; | ||
2034 | } | ||
2035 | |||
2036 | /* mov eax,dword ptr [ebp+off] */ | ||
2037 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
2038 | STACK_VAR(r1[0])); | ||
2039 | /* mov edx,dword ptr [ebp+off] */ | ||
2040 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
2041 | STACK_VAR(r1[1])); | ||
2042 | |||
2043 | emit_push_r64(r5, &prog); | ||
2044 | emit_push_r64(r4, &prog); | ||
2045 | emit_push_r64(r3, &prog); | ||
2046 | emit_push_r64(r2, &prog); | ||
2047 | |||
2048 | EMIT1_off32(0xE8, jmp_offset + 9); | ||
2049 | |||
2050 | /* mov dword ptr [ebp+off],eax */ | ||
2051 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
2052 | STACK_VAR(r0[0])); | ||
2053 | /* mov dword ptr [ebp+off],edx */ | ||
2054 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
2055 | STACK_VAR(r0[1])); | ||
2056 | |||
2057 | /* add esp,32 */ | ||
2058 | EMIT3(0x83, add_1reg(0xC0, IA32_ESP), 32); | ||
2059 | break; | ||
2060 | } | ||
2061 | case BPF_JMP | BPF_TAIL_CALL: | ||
2062 | emit_bpf_tail_call(&prog); | ||
2063 | break; | ||
2064 | |||
2065 | /* cond jump */ | ||
2066 | case BPF_JMP | BPF_JEQ | BPF_X: | ||
2067 | case BPF_JMP | BPF_JNE | BPF_X: | ||
2068 | case BPF_JMP | BPF_JGT | BPF_X: | ||
2069 | case BPF_JMP | BPF_JLT | BPF_X: | ||
2070 | case BPF_JMP | BPF_JGE | BPF_X: | ||
2071 | case BPF_JMP | BPF_JLE | BPF_X: | ||
2072 | case BPF_JMP | BPF_JSGT | BPF_X: | ||
2073 | case BPF_JMP | BPF_JSLE | BPF_X: | ||
2074 | case BPF_JMP | BPF_JSLT | BPF_X: | ||
2075 | case BPF_JMP | BPF_JSGE | BPF_X: { | ||
2076 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
2077 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
2078 | u8 sreg_lo = sstk ? IA32_ECX : src_lo; | ||
2079 | u8 sreg_hi = sstk ? IA32_EBX : src_hi; | ||
2080 | |||
2081 | if (dstk) { | ||
2082 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
2083 | STACK_VAR(dst_lo)); | ||
2084 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
2085 | STACK_VAR(dst_hi)); | ||
2086 | } | ||
2087 | |||
2088 | if (sstk) { | ||
2089 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
2090 | STACK_VAR(src_lo)); | ||
2091 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), | ||
2092 | STACK_VAR(src_hi)); | ||
2093 | } | ||
2094 | |||
2095 | /* cmp dreg_hi,sreg_hi */ | ||
2096 | EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi)); | ||
2097 | EMIT2(IA32_JNE, 2); | ||
2098 | /* cmp dreg_lo,sreg_lo */ | ||
2099 | EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo)); | ||
2100 | goto emit_cond_jmp; | ||
2101 | } | ||
2102 | case BPF_JMP | BPF_JSET | BPF_X: { | ||
2103 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
2104 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
2105 | u8 sreg_lo = sstk ? IA32_ECX : src_lo; | ||
2106 | u8 sreg_hi = sstk ? IA32_EBX : src_hi; | ||
2107 | |||
2108 | if (dstk) { | ||
2109 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
2110 | STACK_VAR(dst_lo)); | ||
2111 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
2112 | STACK_VAR(dst_hi)); | ||
2113 | } | ||
2114 | |||
2115 | if (sstk) { | ||
2116 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
2117 | STACK_VAR(src_lo)); | ||
2118 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), | ||
2119 | STACK_VAR(src_hi)); | ||
2120 | } | ||
2121 | /* and dreg_lo,sreg_lo */ | ||
2122 | EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo)); | ||
2123 | /* and dreg_hi,sreg_hi */ | ||
2124 | EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi)); | ||
2125 | /* or dreg_lo,dreg_hi */ | ||
2126 | EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
2127 | goto emit_cond_jmp; | ||
2128 | } | ||
2129 | case BPF_JMP | BPF_JSET | BPF_K: { | ||
2130 | u32 hi; | ||
2131 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
2132 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
2133 | u8 sreg_lo = IA32_ECX; | ||
2134 | u8 sreg_hi = IA32_EBX; | ||
2135 | |||
2136 | if (dstk) { | ||
2137 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
2138 | STACK_VAR(dst_lo)); | ||
2139 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
2140 | STACK_VAR(dst_hi)); | ||
2141 | } | ||
2142 | hi = imm32 & (1<<31) ? (u32)~0 : 0; | ||
2143 | |||
2144 | /* mov ecx,imm32 */ | ||
2145 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32); | ||
2146 | /* mov ebx,imm32 */ | ||
2147 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi); | ||
2148 | |||
2149 | /* and dreg_lo,sreg_lo */ | ||
2150 | EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo)); | ||
2151 | /* and dreg_hi,sreg_hi */ | ||
2152 | EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi)); | ||
2153 | /* or dreg_lo,dreg_hi */ | ||
2154 | EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
2155 | goto emit_cond_jmp; | ||
2156 | } | ||
2157 | case BPF_JMP | BPF_JEQ | BPF_K: | ||
2158 | case BPF_JMP | BPF_JNE | BPF_K: | ||
2159 | case BPF_JMP | BPF_JGT | BPF_K: | ||
2160 | case BPF_JMP | BPF_JLT | BPF_K: | ||
2161 | case BPF_JMP | BPF_JGE | BPF_K: | ||
2162 | case BPF_JMP | BPF_JLE | BPF_K: | ||
2163 | case BPF_JMP | BPF_JSGT | BPF_K: | ||
2164 | case BPF_JMP | BPF_JSLE | BPF_K: | ||
2165 | case BPF_JMP | BPF_JSLT | BPF_K: | ||
2166 | case BPF_JMP | BPF_JSGE | BPF_K: { | ||
2167 | u32 hi; | ||
2168 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
2169 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
2170 | u8 sreg_lo = IA32_ECX; | ||
2171 | u8 sreg_hi = IA32_EBX; | ||
2172 | |||
2173 | if (dstk) { | ||
2174 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
2175 | STACK_VAR(dst_lo)); | ||
2176 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
2177 | STACK_VAR(dst_hi)); | ||
2178 | } | ||
2179 | |||
2180 | hi = imm32 & (1<<31) ? (u32)~0 : 0; | ||
2181 | /* mov ecx,imm32 */ | ||
2182 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32); | ||
2183 | /* mov ebx,imm32 */ | ||
2184 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi); | ||
2185 | |||
2186 | /* cmp dreg_hi,sreg_hi */ | ||
2187 | EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi)); | ||
2188 | EMIT2(IA32_JNE, 2); | ||
2189 | /* cmp dreg_lo,sreg_lo */ | ||
2190 | EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo)); | ||
2191 | |||
2192 | emit_cond_jmp: /* Convert BPF opcode to x86 */ | ||
2193 | switch (BPF_OP(code)) { | ||
2194 | case BPF_JEQ: | ||
2195 | jmp_cond = IA32_JE; | ||
2196 | break; | ||
2197 | case BPF_JSET: | ||
2198 | case BPF_JNE: | ||
2199 | jmp_cond = IA32_JNE; | ||
2200 | break; | ||
2201 | case BPF_JGT: | ||
2202 | /* GT is unsigned '>', JA in x86 */ | ||
2203 | jmp_cond = IA32_JA; | ||
2204 | break; | ||
2205 | case BPF_JLT: | ||
2206 | /* LT is unsigned '<', JB in x86 */ | ||
2207 | jmp_cond = IA32_JB; | ||
2208 | break; | ||
2209 | case BPF_JGE: | ||
2210 | /* GE is unsigned '>=', JAE in x86 */ | ||
2211 | jmp_cond = IA32_JAE; | ||
2212 | break; | ||
2213 | case BPF_JLE: | ||
2214 | /* LE is unsigned '<=', JBE in x86 */ | ||
2215 | jmp_cond = IA32_JBE; | ||
2216 | break; | ||
2217 | case BPF_JSGT: | ||
2218 | /* Signed '>', GT in x86 */ | ||
2219 | jmp_cond = IA32_JG; | ||
2220 | break; | ||
2221 | case BPF_JSLT: | ||
2222 | /* Signed '<', LT in x86 */ | ||
2223 | jmp_cond = IA32_JL; | ||
2224 | break; | ||
2225 | case BPF_JSGE: | ||
2226 | /* Signed '>=', GE in x86 */ | ||
2227 | jmp_cond = IA32_JGE; | ||
2228 | break; | ||
2229 | case BPF_JSLE: | ||
2230 | /* Signed '<=', LE in x86 */ | ||
2231 | jmp_cond = IA32_JLE; | ||
2232 | break; | ||
2233 | default: /* to silence GCC warning */ | ||
2234 | return -EFAULT; | ||
2235 | } | ||
2236 | jmp_offset = addrs[i + insn->off] - addrs[i]; | ||
2237 | if (is_imm8(jmp_offset)) { | ||
2238 | EMIT2(jmp_cond, jmp_offset); | ||
2239 | } else if (is_simm32(jmp_offset)) { | ||
2240 | EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset); | ||
2241 | } else { | ||
2242 | pr_err("cond_jmp gen bug %llx\n", jmp_offset); | ||
2243 | return -EFAULT; | ||
2244 | } | ||
2245 | |||
2246 | break; | ||
2247 | } | ||
2248 | case BPF_JMP | BPF_JA: | ||
2249 | if (insn->off == -1) | ||
2250 | /* -1 jmp instructions will always jump | ||
2251 | * backwards two bytes. Explicitly handling | ||
2252 | * this case avoids wasting too many passes | ||
2253 | * when there are long sequences of replaced | ||
2254 | * dead code. | ||
2255 | */ | ||
2256 | jmp_offset = -2; | ||
2257 | else | ||
2258 | jmp_offset = addrs[i + insn->off] - addrs[i]; | ||
2259 | |||
2260 | if (!jmp_offset) | ||
2261 | /* Optimize out nop jumps */ | ||
2262 | break; | ||
2263 | emit_jmp: | ||
2264 | if (is_imm8(jmp_offset)) { | ||
2265 | EMIT2(0xEB, jmp_offset); | ||
2266 | } else if (is_simm32(jmp_offset)) { | ||
2267 | EMIT1_off32(0xE9, jmp_offset); | ||
2268 | } else { | ||
2269 | pr_err("jmp gen bug %llx\n", jmp_offset); | ||
2270 | return -EFAULT; | ||
2271 | } | ||
2272 | break; | ||
2273 | /* STX XADD: lock *(u32 *)(dst + off) += src */ | ||
2274 | case BPF_STX | BPF_XADD | BPF_W: | ||
2275 | /* STX XADD: lock *(u64 *)(dst + off) += src */ | ||
2276 | case BPF_STX | BPF_XADD | BPF_DW: | ||
2277 | goto notyet; | ||
2278 | case BPF_JMP | BPF_EXIT: | ||
2279 | if (seen_exit) { | ||
2280 | jmp_offset = ctx->cleanup_addr - addrs[i]; | ||
2281 | goto emit_jmp; | ||
2282 | } | ||
2283 | seen_exit = true; | ||
2284 | /* Update cleanup_addr */ | ||
2285 | ctx->cleanup_addr = proglen; | ||
2286 | emit_epilogue(&prog, bpf_prog->aux->stack_depth); | ||
2287 | break; | ||
2288 | notyet: | ||
2289 | pr_info_once("*** NOT YET: opcode %02x ***\n", code); | ||
2290 | return -EFAULT; | ||
2291 | default: | ||
2292 | /* | ||
2293 | * This error will be seen if new instruction was added | ||
2294 | * to interpreter, but not to JIT or if there is junk in | ||
2295 | * bpf_prog | ||
2296 | */ | ||
2297 | pr_err("bpf_jit: unknown opcode %02x\n", code); | ||
2298 | return -EINVAL; | ||
2299 | } | ||
2300 | |||
2301 | ilen = prog - temp; | ||
2302 | if (ilen > BPF_MAX_INSN_SIZE) { | ||
2303 | pr_err("bpf_jit: fatal insn size error\n"); | ||
2304 | return -EFAULT; | ||
2305 | } | ||
2306 | |||
2307 | if (image) { | ||
2308 | if (unlikely(proglen + ilen > oldproglen)) { | ||
2309 | pr_err("bpf_jit: fatal error\n"); | ||
2310 | return -EFAULT; | ||
2311 | } | ||
2312 | memcpy(image + proglen, temp, ilen); | ||
2313 | } | ||
2314 | proglen += ilen; | ||
2315 | addrs[i] = proglen; | ||
2316 | prog = temp; | ||
2317 | } | ||
2318 | return proglen; | ||
2319 | } | ||
2320 | |||
2321 | struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) | ||
2322 | { | ||
2323 | struct bpf_binary_header *header = NULL; | ||
2324 | struct bpf_prog *tmp, *orig_prog = prog; | ||
2325 | int proglen, oldproglen = 0; | ||
2326 | struct jit_context ctx = {}; | ||
2327 | bool tmp_blinded = false; | ||
2328 | u8 *image = NULL; | ||
2329 | int *addrs; | ||
2330 | int pass; | ||
2331 | int i; | ||
2332 | |||
2333 | if (!prog->jit_requested) | ||
2334 | return orig_prog; | ||
2335 | |||
2336 | tmp = bpf_jit_blind_constants(prog); | ||
2337 | /* | ||
2338 | * If blinding was requested and we failed during blinding, | ||
2339 | * we must fall back to the interpreter. | ||
2340 | */ | ||
2341 | if (IS_ERR(tmp)) | ||
2342 | return orig_prog; | ||
2343 | if (tmp != prog) { | ||
2344 | tmp_blinded = true; | ||
2345 | prog = tmp; | ||
2346 | } | ||
2347 | |||
2348 | addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL); | ||
2349 | if (!addrs) { | ||
2350 | prog = orig_prog; | ||
2351 | goto out; | ||
2352 | } | ||
2353 | |||
2354 | /* | ||
2355 | * Before first pass, make a rough estimation of addrs[] | ||
2356 | * each BPF instruction is translated to less than 64 bytes | ||
2357 | */ | ||
2358 | for (proglen = 0, i = 0; i < prog->len; i++) { | ||
2359 | proglen += 64; | ||
2360 | addrs[i] = proglen; | ||
2361 | } | ||
2362 | ctx.cleanup_addr = proglen; | ||
2363 | |||
2364 | /* | ||
2365 | * JITed image shrinks with every pass and the loop iterates | ||
2366 | * until the image stops shrinking. Very large BPF programs | ||
2367 | * may converge on the last pass. In such case do one more | ||
2368 | * pass to emit the final image. | ||
2369 | */ | ||
2370 | for (pass = 0; pass < 20 || image; pass++) { | ||
2371 | proglen = do_jit(prog, addrs, image, oldproglen, &ctx); | ||
2372 | if (proglen <= 0) { | ||
2373 | out_image: | ||
2374 | image = NULL; | ||
2375 | if (header) | ||
2376 | bpf_jit_binary_free(header); | ||
2377 | prog = orig_prog; | ||
2378 | goto out_addrs; | ||
2379 | } | ||
2380 | if (image) { | ||
2381 | if (proglen != oldproglen) { | ||
2382 | pr_err("bpf_jit: proglen=%d != oldproglen=%d\n", | ||
2383 | proglen, oldproglen); | ||
2384 | goto out_image; | ||
2385 | } | ||
2386 | break; | ||
2387 | } | ||
2388 | if (proglen == oldproglen) { | ||
2389 | header = bpf_jit_binary_alloc(proglen, &image, | ||
2390 | 1, jit_fill_hole); | ||
2391 | if (!header) { | ||
2392 | prog = orig_prog; | ||
2393 | goto out_addrs; | ||
2394 | } | ||
2395 | } | ||
2396 | oldproglen = proglen; | ||
2397 | cond_resched(); | ||
2398 | } | ||
2399 | |||
2400 | if (bpf_jit_enable > 1) | ||
2401 | bpf_jit_dump(prog->len, proglen, pass + 1, image); | ||
2402 | |||
2403 | if (image) { | ||
2404 | bpf_jit_binary_lock_ro(header); | ||
2405 | prog->bpf_func = (void *)image; | ||
2406 | prog->jited = 1; | ||
2407 | prog->jited_len = proglen; | ||
2408 | } else { | ||
2409 | prog = orig_prog; | ||
2410 | } | ||
2411 | |||
2412 | out_addrs: | ||
2413 | kfree(addrs); | ||
2414 | out: | ||
2415 | if (tmp_blinded) | ||
2416 | bpf_jit_prog_release_other(prog, prog == orig_prog ? | ||
2417 | tmp : orig_prog); | ||
2418 | return prog; | ||
2419 | } | ||
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c index 7e298148ca26..cb87fccb9f6a 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2017 Netronome Systems, Inc. | 2 | * Copyright (C) 2017-2018 Netronome Systems, Inc. |
3 | * | 3 | * |
4 | * This software is dual licensed under the GNU General License Version 2, | 4 | * This software is dual licensed under the GNU General License Version 2, |
5 | * June 1991 as shown in the file COPYING in the top-level directory of this | 5 | * June 1991 as shown in the file COPYING in the top-level directory of this |
@@ -102,6 +102,15 @@ nfp_bpf_cmsg_map_req_alloc(struct nfp_app_bpf *bpf, unsigned int n) | |||
102 | return nfp_bpf_cmsg_alloc(bpf, size); | 102 | return nfp_bpf_cmsg_alloc(bpf, size); |
103 | } | 103 | } |
104 | 104 | ||
105 | static u8 nfp_bpf_cmsg_get_type(struct sk_buff *skb) | ||
106 | { | ||
107 | struct cmsg_hdr *hdr; | ||
108 | |||
109 | hdr = (struct cmsg_hdr *)skb->data; | ||
110 | |||
111 | return hdr->type; | ||
112 | } | ||
113 | |||
105 | static unsigned int nfp_bpf_cmsg_get_tag(struct sk_buff *skb) | 114 | static unsigned int nfp_bpf_cmsg_get_tag(struct sk_buff *skb) |
106 | { | 115 | { |
107 | struct cmsg_hdr *hdr; | 116 | struct cmsg_hdr *hdr; |
@@ -431,6 +440,11 @@ void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb) | |||
431 | goto err_free; | 440 | goto err_free; |
432 | } | 441 | } |
433 | 442 | ||
443 | if (nfp_bpf_cmsg_get_type(skb) == CMSG_TYPE_BPF_EVENT) { | ||
444 | nfp_bpf_event_output(bpf, skb); | ||
445 | return; | ||
446 | } | ||
447 | |||
434 | nfp_ctrl_lock(bpf->app->ctrl); | 448 | nfp_ctrl_lock(bpf->app->ctrl); |
435 | 449 | ||
436 | tag = nfp_bpf_cmsg_get_tag(skb); | 450 | tag = nfp_bpf_cmsg_get_tag(skb); |
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/fw.h b/drivers/net/ethernet/netronome/nfp/bpf/fw.h index 39639ac28b01..3dbc21653ce5 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/fw.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/fw.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2017 Netronome Systems, Inc. | 2 | * Copyright (C) 2017-2018 Netronome Systems, Inc. |
3 | * | 3 | * |
4 | * This software is dual licensed under the GNU General License Version 2, | 4 | * This software is dual licensed under the GNU General License Version 2, |
5 | * June 1991 as shown in the file COPYING in the top-level directory of this | 5 | * June 1991 as shown in the file COPYING in the top-level directory of this |
@@ -37,6 +37,14 @@ | |||
37 | #include <linux/bitops.h> | 37 | #include <linux/bitops.h> |
38 | #include <linux/types.h> | 38 | #include <linux/types.h> |
39 | 39 | ||
40 | /* Kernel's enum bpf_reg_type is not uABI so people may change it breaking | ||
41 | * our FW ABI. In that case we will do translation in the driver. | ||
42 | */ | ||
43 | #define NFP_BPF_SCALAR_VALUE 1 | ||
44 | #define NFP_BPF_MAP_VALUE 4 | ||
45 | #define NFP_BPF_STACK 6 | ||
46 | #define NFP_BPF_PACKET_DATA 8 | ||
47 | |||
40 | enum bpf_cap_tlv_type { | 48 | enum bpf_cap_tlv_type { |
41 | NFP_BPF_CAP_TYPE_FUNC = 1, | 49 | NFP_BPF_CAP_TYPE_FUNC = 1, |
42 | NFP_BPF_CAP_TYPE_ADJUST_HEAD = 2, | 50 | NFP_BPF_CAP_TYPE_ADJUST_HEAD = 2, |
@@ -81,6 +89,7 @@ enum nfp_bpf_cmsg_type { | |||
81 | CMSG_TYPE_MAP_DELETE = 5, | 89 | CMSG_TYPE_MAP_DELETE = 5, |
82 | CMSG_TYPE_MAP_GETNEXT = 6, | 90 | CMSG_TYPE_MAP_GETNEXT = 6, |
83 | CMSG_TYPE_MAP_GETFIRST = 7, | 91 | CMSG_TYPE_MAP_GETFIRST = 7, |
92 | CMSG_TYPE_BPF_EVENT = 8, | ||
84 | __CMSG_TYPE_MAP_MAX, | 93 | __CMSG_TYPE_MAP_MAX, |
85 | }; | 94 | }; |
86 | 95 | ||
@@ -155,4 +164,13 @@ struct cmsg_reply_map_op { | |||
155 | __be32 resv; | 164 | __be32 resv; |
156 | struct cmsg_key_value_pair elem[0]; | 165 | struct cmsg_key_value_pair elem[0]; |
157 | }; | 166 | }; |
167 | |||
168 | struct cmsg_bpf_event { | ||
169 | struct cmsg_hdr hdr; | ||
170 | __be32 cpu_id; | ||
171 | __be64 map_ptr; | ||
172 | __be32 data_size; | ||
173 | __be32 pkt_size; | ||
174 | u8 data[0]; | ||
175 | }; | ||
158 | #endif | 176 | #endif |
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 65f0791cae0c..326a2085d650 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2016-2017 Netronome Systems, Inc. | 2 | * Copyright (C) 2016-2018 Netronome Systems, Inc. |
3 | * | 3 | * |
4 | * This software is dual licensed under the GNU General License Version 2, | 4 | * This software is dual licensed under the GNU General License Version 2, |
5 | * June 1991 as shown in the file COPYING in the top-level directory of this | 5 | * June 1991 as shown in the file COPYING in the top-level directory of this |
@@ -1395,15 +1395,9 @@ static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |||
1395 | static int | 1395 | static int |
1396 | map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | 1396 | map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1397 | { | 1397 | { |
1398 | struct bpf_offloaded_map *offmap; | ||
1399 | struct nfp_bpf_map *nfp_map; | ||
1400 | bool load_lm_ptr; | 1398 | bool load_lm_ptr; |
1401 | u32 ret_tgt; | 1399 | u32 ret_tgt; |
1402 | s64 lm_off; | 1400 | s64 lm_off; |
1403 | swreg tid; | ||
1404 | |||
1405 | offmap = (struct bpf_offloaded_map *)meta->arg1.map_ptr; | ||
1406 | nfp_map = offmap->dev_priv; | ||
1407 | 1401 | ||
1408 | /* We only have to reload LM0 if the key is not at start of stack */ | 1402 | /* We only have to reload LM0 if the key is not at start of stack */ |
1409 | lm_off = nfp_prog->stack_depth; | 1403 | lm_off = nfp_prog->stack_depth; |
@@ -1416,17 +1410,12 @@ map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |||
1416 | if (meta->func_id == BPF_FUNC_map_update_elem) | 1410 | if (meta->func_id == BPF_FUNC_map_update_elem) |
1417 | emit_csr_wr(nfp_prog, reg_b(3 * 2), NFP_CSR_ACT_LM_ADDR2); | 1411 | emit_csr_wr(nfp_prog, reg_b(3 * 2), NFP_CSR_ACT_LM_ADDR2); |
1418 | 1412 | ||
1419 | /* Load map ID into a register, it should actually fit as an immediate | ||
1420 | * but in case it doesn't deal with it here, not in the delay slots. | ||
1421 | */ | ||
1422 | tid = ur_load_imm_any(nfp_prog, nfp_map->tid, imm_a(nfp_prog)); | ||
1423 | |||
1424 | emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id, | 1413 | emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id, |
1425 | 2, RELO_BR_HELPER); | 1414 | 2, RELO_BR_HELPER); |
1426 | ret_tgt = nfp_prog_current_offset(nfp_prog) + 2; | 1415 | ret_tgt = nfp_prog_current_offset(nfp_prog) + 2; |
1427 | 1416 | ||
1428 | /* Load map ID into A0 */ | 1417 | /* Load map ID into A0 */ |
1429 | wrp_mov(nfp_prog, reg_a(0), tid); | 1418 | wrp_mov(nfp_prog, reg_a(0), reg_a(2)); |
1430 | 1419 | ||
1431 | /* Load the return address into B0 */ | 1420 | /* Load the return address into B0 */ |
1432 | wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL); | 1421 | wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL); |
@@ -1456,6 +1445,31 @@ nfp_get_prandom_u32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |||
1456 | return 0; | 1445 | return 0; |
1457 | } | 1446 | } |
1458 | 1447 | ||
1448 | static int | ||
1449 | nfp_perf_event_output(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | ||
1450 | { | ||
1451 | swreg ptr_type; | ||
1452 | u32 ret_tgt; | ||
1453 | |||
1454 | ptr_type = ur_load_imm_any(nfp_prog, meta->arg1.type, imm_a(nfp_prog)); | ||
1455 | |||
1456 | ret_tgt = nfp_prog_current_offset(nfp_prog) + 3; | ||
1457 | |||
1458 | emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id, | ||
1459 | 2, RELO_BR_HELPER); | ||
1460 | |||
1461 | /* Load ptr type into A1 */ | ||
1462 | wrp_mov(nfp_prog, reg_a(1), ptr_type); | ||
1463 | |||
1464 | /* Load the return address into B0 */ | ||
1465 | wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL); | ||
1466 | |||
1467 | if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt)) | ||
1468 | return -EINVAL; | ||
1469 | |||
1470 | return 0; | ||
1471 | } | ||
1472 | |||
1459 | /* --- Callbacks --- */ | 1473 | /* --- Callbacks --- */ |
1460 | static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | 1474 | static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1461 | { | 1475 | { |
@@ -2411,6 +2425,8 @@ static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |||
2411 | return map_call_stack_common(nfp_prog, meta); | 2425 | return map_call_stack_common(nfp_prog, meta); |
2412 | case BPF_FUNC_get_prandom_u32: | 2426 | case BPF_FUNC_get_prandom_u32: |
2413 | return nfp_get_prandom_u32(nfp_prog, meta); | 2427 | return nfp_get_prandom_u32(nfp_prog, meta); |
2428 | case BPF_FUNC_perf_event_output: | ||
2429 | return nfp_perf_event_output(nfp_prog, meta); | ||
2414 | default: | 2430 | default: |
2415 | WARN_ONCE(1, "verifier allowed unsupported function\n"); | 2431 | WARN_ONCE(1, "verifier allowed unsupported function\n"); |
2416 | return -EOPNOTSUPP; | 2432 | return -EOPNOTSUPP; |
@@ -3227,6 +3243,33 @@ static int nfp_bpf_optimize(struct nfp_prog *nfp_prog) | |||
3227 | return 0; | 3243 | return 0; |
3228 | } | 3244 | } |
3229 | 3245 | ||
3246 | static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog) | ||
3247 | { | ||
3248 | struct nfp_insn_meta *meta1, *meta2; | ||
3249 | struct nfp_bpf_map *nfp_map; | ||
3250 | struct bpf_map *map; | ||
3251 | |||
3252 | nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { | ||
3253 | if (meta1->skip || meta2->skip) | ||
3254 | continue; | ||
3255 | |||
3256 | if (meta1->insn.code != (BPF_LD | BPF_IMM | BPF_DW) || | ||
3257 | meta1->insn.src_reg != BPF_PSEUDO_MAP_FD) | ||
3258 | continue; | ||
3259 | |||
3260 | map = (void *)(unsigned long)((u32)meta1->insn.imm | | ||
3261 | (u64)meta2->insn.imm << 32); | ||
3262 | if (bpf_map_offload_neutral(map)) | ||
3263 | continue; | ||
3264 | nfp_map = map_to_offmap(map)->dev_priv; | ||
3265 | |||
3266 | meta1->insn.imm = nfp_map->tid; | ||
3267 | meta2->insn.imm = 0; | ||
3268 | } | ||
3269 | |||
3270 | return 0; | ||
3271 | } | ||
3272 | |||
3230 | static int nfp_bpf_ustore_calc(u64 *prog, unsigned int len) | 3273 | static int nfp_bpf_ustore_calc(u64 *prog, unsigned int len) |
3231 | { | 3274 | { |
3232 | __le64 *ustore = (__force __le64 *)prog; | 3275 | __le64 *ustore = (__force __le64 *)prog; |
@@ -3263,6 +3306,10 @@ int nfp_bpf_jit(struct nfp_prog *nfp_prog) | |||
3263 | { | 3306 | { |
3264 | int ret; | 3307 | int ret; |
3265 | 3308 | ||
3309 | ret = nfp_bpf_replace_map_ptrs(nfp_prog); | ||
3310 | if (ret) | ||
3311 | return ret; | ||
3312 | |||
3266 | ret = nfp_bpf_optimize(nfp_prog); | 3313 | ret = nfp_bpf_optimize(nfp_prog); |
3267 | if (ret) | 3314 | if (ret) |
3268 | return ret; | 3315 | return ret; |
@@ -3353,6 +3400,9 @@ void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv) | |||
3353 | case BPF_FUNC_map_delete_elem: | 3400 | case BPF_FUNC_map_delete_elem: |
3354 | val = nfp_prog->bpf->helpers.map_delete; | 3401 | val = nfp_prog->bpf->helpers.map_delete; |
3355 | break; | 3402 | break; |
3403 | case BPF_FUNC_perf_event_output: | ||
3404 | val = nfp_prog->bpf->helpers.perf_event_output; | ||
3405 | break; | ||
3356 | default: | 3406 | default: |
3357 | pr_err("relocation of unknown helper %d\n", | 3407 | pr_err("relocation of unknown helper %d\n", |
3358 | val); | 3408 | val); |
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 1dc424685f4e..d72f9e7f42da 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2017 Netronome Systems, Inc. | 2 | * Copyright (C) 2017-2018 Netronome Systems, Inc. |
3 | * | 3 | * |
4 | * This software is dual licensed under the GNU General License Version 2, | 4 | * This software is dual licensed under the GNU General License Version 2, |
5 | * June 1991 as shown in the file COPYING in the top-level directory of this | 5 | * June 1991 as shown in the file COPYING in the top-level directory of this |
@@ -43,6 +43,14 @@ | |||
43 | #include "fw.h" | 43 | #include "fw.h" |
44 | #include "main.h" | 44 | #include "main.h" |
45 | 45 | ||
46 | const struct rhashtable_params nfp_bpf_maps_neutral_params = { | ||
47 | .nelem_hint = 4, | ||
48 | .key_len = FIELD_SIZEOF(struct nfp_bpf_neutral_map, ptr), | ||
49 | .key_offset = offsetof(struct nfp_bpf_neutral_map, ptr), | ||
50 | .head_offset = offsetof(struct nfp_bpf_neutral_map, l), | ||
51 | .automatic_shrinking = true, | ||
52 | }; | ||
53 | |||
46 | static bool nfp_net_ebpf_capable(struct nfp_net *nn) | 54 | static bool nfp_net_ebpf_capable(struct nfp_net *nn) |
47 | { | 55 | { |
48 | #ifdef __LITTLE_ENDIAN | 56 | #ifdef __LITTLE_ENDIAN |
@@ -290,6 +298,9 @@ nfp_bpf_parse_cap_func(struct nfp_app_bpf *bpf, void __iomem *value, u32 length) | |||
290 | case BPF_FUNC_map_delete_elem: | 298 | case BPF_FUNC_map_delete_elem: |
291 | bpf->helpers.map_delete = readl(&cap->func_addr); | 299 | bpf->helpers.map_delete = readl(&cap->func_addr); |
292 | break; | 300 | break; |
301 | case BPF_FUNC_perf_event_output: | ||
302 | bpf->helpers.perf_event_output = readl(&cap->func_addr); | ||
303 | break; | ||
293 | } | 304 | } |
294 | 305 | ||
295 | return 0; | 306 | return 0; |
@@ -401,17 +412,28 @@ static int nfp_bpf_init(struct nfp_app *app) | |||
401 | init_waitqueue_head(&bpf->cmsg_wq); | 412 | init_waitqueue_head(&bpf->cmsg_wq); |
402 | INIT_LIST_HEAD(&bpf->map_list); | 413 | INIT_LIST_HEAD(&bpf->map_list); |
403 | 414 | ||
404 | err = nfp_bpf_parse_capabilities(app); | 415 | err = rhashtable_init(&bpf->maps_neutral, &nfp_bpf_maps_neutral_params); |
405 | if (err) | 416 | if (err) |
406 | goto err_free_bpf; | 417 | goto err_free_bpf; |
407 | 418 | ||
419 | err = nfp_bpf_parse_capabilities(app); | ||
420 | if (err) | ||
421 | goto err_free_neutral_maps; | ||
422 | |||
408 | return 0; | 423 | return 0; |
409 | 424 | ||
425 | err_free_neutral_maps: | ||
426 | rhashtable_destroy(&bpf->maps_neutral); | ||
410 | err_free_bpf: | 427 | err_free_bpf: |
411 | kfree(bpf); | 428 | kfree(bpf); |
412 | return err; | 429 | return err; |
413 | } | 430 | } |
414 | 431 | ||
432 | static void nfp_check_rhashtable_empty(void *ptr, void *arg) | ||
433 | { | ||
434 | WARN_ON_ONCE(1); | ||
435 | } | ||
436 | |||
415 | static void nfp_bpf_clean(struct nfp_app *app) | 437 | static void nfp_bpf_clean(struct nfp_app *app) |
416 | { | 438 | { |
417 | struct nfp_app_bpf *bpf = app->priv; | 439 | struct nfp_app_bpf *bpf = app->priv; |
@@ -419,6 +441,8 @@ static void nfp_bpf_clean(struct nfp_app *app) | |||
419 | WARN_ON(!skb_queue_empty(&bpf->cmsg_replies)); | 441 | WARN_ON(!skb_queue_empty(&bpf->cmsg_replies)); |
420 | WARN_ON(!list_empty(&bpf->map_list)); | 442 | WARN_ON(!list_empty(&bpf->map_list)); |
421 | WARN_ON(bpf->maps_in_use || bpf->map_elems_in_use); | 443 | WARN_ON(bpf->maps_in_use || bpf->map_elems_in_use); |
444 | rhashtable_free_and_destroy(&bpf->maps_neutral, | ||
445 | nfp_check_rhashtable_empty, NULL); | ||
422 | kfree(bpf); | 446 | kfree(bpf); |
423 | } | 447 | } |
424 | 448 | ||
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 68b5d326483d..82682378d57f 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2016-2017 Netronome Systems, Inc. | 2 | * Copyright (C) 2016-2018 Netronome Systems, Inc. |
3 | * | 3 | * |
4 | * This software is dual licensed under the GNU General License Version 2, | 4 | * This software is dual licensed under the GNU General License Version 2, |
5 | * June 1991 as shown in the file COPYING in the top-level directory of this | 5 | * June 1991 as shown in the file COPYING in the top-level directory of this |
@@ -39,6 +39,7 @@ | |||
39 | #include <linux/bpf_verifier.h> | 39 | #include <linux/bpf_verifier.h> |
40 | #include <linux/kernel.h> | 40 | #include <linux/kernel.h> |
41 | #include <linux/list.h> | 41 | #include <linux/list.h> |
42 | #include <linux/rhashtable.h> | ||
42 | #include <linux/skbuff.h> | 43 | #include <linux/skbuff.h> |
43 | #include <linux/types.h> | 44 | #include <linux/types.h> |
44 | #include <linux/wait.h> | 45 | #include <linux/wait.h> |
@@ -114,6 +115,8 @@ enum pkt_vec { | |||
114 | * @maps_in_use: number of currently offloaded maps | 115 | * @maps_in_use: number of currently offloaded maps |
115 | * @map_elems_in_use: number of elements allocated to offloaded maps | 116 | * @map_elems_in_use: number of elements allocated to offloaded maps |
116 | * | 117 | * |
118 | * @maps_neutral: hash table of offload-neutral maps (on pointer) | ||
119 | * | ||
117 | * @adjust_head: adjust head capability | 120 | * @adjust_head: adjust head capability |
118 | * @adjust_head.flags: extra flags for adjust head | 121 | * @adjust_head.flags: extra flags for adjust head |
119 | * @adjust_head.off_min: minimal packet offset within buffer required | 122 | * @adjust_head.off_min: minimal packet offset within buffer required |
@@ -133,6 +136,7 @@ enum pkt_vec { | |||
133 | * @helpers.map_lookup: map lookup helper address | 136 | * @helpers.map_lookup: map lookup helper address |
134 | * @helpers.map_update: map update helper address | 137 | * @helpers.map_update: map update helper address |
135 | * @helpers.map_delete: map delete helper address | 138 | * @helpers.map_delete: map delete helper address |
139 | * @helpers.perf_event_output: output perf event to a ring buffer | ||
136 | * | 140 | * |
137 | * @pseudo_random: FW initialized the pseudo-random machinery (CSRs) | 141 | * @pseudo_random: FW initialized the pseudo-random machinery (CSRs) |
138 | */ | 142 | */ |
@@ -150,6 +154,8 @@ struct nfp_app_bpf { | |||
150 | unsigned int maps_in_use; | 154 | unsigned int maps_in_use; |
151 | unsigned int map_elems_in_use; | 155 | unsigned int map_elems_in_use; |
152 | 156 | ||
157 | struct rhashtable maps_neutral; | ||
158 | |||
153 | struct nfp_bpf_cap_adjust_head { | 159 | struct nfp_bpf_cap_adjust_head { |
154 | u32 flags; | 160 | u32 flags; |
155 | int off_min; | 161 | int off_min; |
@@ -171,6 +177,7 @@ struct nfp_app_bpf { | |||
171 | u32 map_lookup; | 177 | u32 map_lookup; |
172 | u32 map_update; | 178 | u32 map_update; |
173 | u32 map_delete; | 179 | u32 map_delete; |
180 | u32 perf_event_output; | ||
174 | } helpers; | 181 | } helpers; |
175 | 182 | ||
176 | bool pseudo_random; | 183 | bool pseudo_random; |
@@ -199,6 +206,14 @@ struct nfp_bpf_map { | |||
199 | enum nfp_bpf_map_use use_map[]; | 206 | enum nfp_bpf_map_use use_map[]; |
200 | }; | 207 | }; |
201 | 208 | ||
209 | struct nfp_bpf_neutral_map { | ||
210 | struct rhash_head l; | ||
211 | struct bpf_map *ptr; | ||
212 | u32 count; | ||
213 | }; | ||
214 | |||
215 | extern const struct rhashtable_params nfp_bpf_maps_neutral_params; | ||
216 | |||
202 | struct nfp_prog; | 217 | struct nfp_prog; |
203 | struct nfp_insn_meta; | 218 | struct nfp_insn_meta; |
204 | typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *); | 219 | typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *); |
@@ -367,6 +382,8 @@ static inline bool is_mbpf_xadd(const struct nfp_insn_meta *meta) | |||
367 | * @error: error code if something went wrong | 382 | * @error: error code if something went wrong |
368 | * @stack_depth: max stack depth from the verifier | 383 | * @stack_depth: max stack depth from the verifier |
369 | * @adjust_head_location: if program has single adjust head call - the insn no. | 384 | * @adjust_head_location: if program has single adjust head call - the insn no. |
385 | * @map_records_cnt: the number of map pointers recorded for this prog | ||
386 | * @map_records: the map record pointers from bpf->maps_neutral | ||
370 | * @insns: list of BPF instruction wrappers (struct nfp_insn_meta) | 387 | * @insns: list of BPF instruction wrappers (struct nfp_insn_meta) |
371 | */ | 388 | */ |
372 | struct nfp_prog { | 389 | struct nfp_prog { |
@@ -390,6 +407,9 @@ struct nfp_prog { | |||
390 | unsigned int stack_depth; | 407 | unsigned int stack_depth; |
391 | unsigned int adjust_head_location; | 408 | unsigned int adjust_head_location; |
392 | 409 | ||
410 | unsigned int map_records_cnt; | ||
411 | struct nfp_bpf_neutral_map **map_records; | ||
412 | |||
393 | struct list_head insns; | 413 | struct list_head insns; |
394 | }; | 414 | }; |
395 | 415 | ||
@@ -440,5 +460,7 @@ int nfp_bpf_ctrl_lookup_entry(struct bpf_offloaded_map *offmap, | |||
440 | int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap, | 460 | int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap, |
441 | void *key, void *next_key); | 461 | void *key, void *next_key); |
442 | 462 | ||
463 | int nfp_bpf_event_output(struct nfp_app_bpf *bpf, struct sk_buff *skb); | ||
464 | |||
443 | void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb); | 465 | void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb); |
444 | #endif | 466 | #endif |
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 42d98792bd25..4db0ac1e42a8 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2016-2017 Netronome Systems, Inc. | 2 | * Copyright (C) 2016-2018 Netronome Systems, Inc. |
3 | * | 3 | * |
4 | * This software is dual licensed under the GNU General License Version 2, | 4 | * This software is dual licensed under the GNU General License Version 2, |
5 | * June 1991 as shown in the file COPYING in the top-level directory of this | 5 | * June 1991 as shown in the file COPYING in the top-level directory of this |
@@ -57,6 +57,126 @@ | |||
57 | #include "../nfp_net.h" | 57 | #include "../nfp_net.h" |
58 | 58 | ||
59 | static int | 59 | static int |
60 | nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog, | ||
61 | struct bpf_map *map) | ||
62 | { | ||
63 | struct nfp_bpf_neutral_map *record; | ||
64 | int err; | ||
65 | |||
66 | /* Map record paths are entered via ndo, update side is protected. */ | ||
67 | ASSERT_RTNL(); | ||
68 | |||
69 | /* Reuse path - other offloaded program is already tracking this map. */ | ||
70 | record = rhashtable_lookup_fast(&bpf->maps_neutral, &map, | ||
71 | nfp_bpf_maps_neutral_params); | ||
72 | if (record) { | ||
73 | nfp_prog->map_records[nfp_prog->map_records_cnt++] = record; | ||
74 | record->count++; | ||
75 | return 0; | ||
76 | } | ||
77 | |||
78 | /* Grab a single ref to the map for our record. The prog destroy ndo | ||
79 | * happens after free_used_maps(). | ||
80 | */ | ||
81 | map = bpf_map_inc(map, false); | ||
82 | if (IS_ERR(map)) | ||
83 | return PTR_ERR(map); | ||
84 | |||
85 | record = kmalloc(sizeof(*record), GFP_KERNEL); | ||
86 | if (!record) { | ||
87 | err = -ENOMEM; | ||
88 | goto err_map_put; | ||
89 | } | ||
90 | |||
91 | record->ptr = map; | ||
92 | record->count = 1; | ||
93 | |||
94 | err = rhashtable_insert_fast(&bpf->maps_neutral, &record->l, | ||
95 | nfp_bpf_maps_neutral_params); | ||
96 | if (err) | ||
97 | goto err_free_rec; | ||
98 | |||
99 | nfp_prog->map_records[nfp_prog->map_records_cnt++] = record; | ||
100 | |||
101 | return 0; | ||
102 | |||
103 | err_free_rec: | ||
104 | kfree(record); | ||
105 | err_map_put: | ||
106 | bpf_map_put(map); | ||
107 | return err; | ||
108 | } | ||
109 | |||
110 | static void | ||
111 | nfp_map_ptrs_forget(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog) | ||
112 | { | ||
113 | bool freed = false; | ||
114 | int i; | ||
115 | |||
116 | ASSERT_RTNL(); | ||
117 | |||
118 | for (i = 0; i < nfp_prog->map_records_cnt; i++) { | ||
119 | if (--nfp_prog->map_records[i]->count) { | ||
120 | nfp_prog->map_records[i] = NULL; | ||
121 | continue; | ||
122 | } | ||
123 | |||
124 | WARN_ON(rhashtable_remove_fast(&bpf->maps_neutral, | ||
125 | &nfp_prog->map_records[i]->l, | ||
126 | nfp_bpf_maps_neutral_params)); | ||
127 | freed = true; | ||
128 | } | ||
129 | |||
130 | if (freed) { | ||
131 | synchronize_rcu(); | ||
132 | |||
133 | for (i = 0; i < nfp_prog->map_records_cnt; i++) | ||
134 | if (nfp_prog->map_records[i]) { | ||
135 | bpf_map_put(nfp_prog->map_records[i]->ptr); | ||
136 | kfree(nfp_prog->map_records[i]); | ||
137 | } | ||
138 | } | ||
139 | |||
140 | kfree(nfp_prog->map_records); | ||
141 | nfp_prog->map_records = NULL; | ||
142 | nfp_prog->map_records_cnt = 0; | ||
143 | } | ||
144 | |||
145 | static int | ||
146 | nfp_map_ptrs_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog, | ||
147 | struct bpf_prog *prog) | ||
148 | { | ||
149 | int i, cnt, err; | ||
150 | |||
151 | /* Quickly count the maps we will have to remember */ | ||
152 | cnt = 0; | ||
153 | for (i = 0; i < prog->aux->used_map_cnt; i++) | ||
154 | if (bpf_map_offload_neutral(prog->aux->used_maps[i])) | ||
155 | cnt++; | ||
156 | if (!cnt) | ||
157 | return 0; | ||
158 | |||
159 | nfp_prog->map_records = kmalloc_array(cnt, | ||
160 | sizeof(nfp_prog->map_records[0]), | ||
161 | GFP_KERNEL); | ||
162 | if (!nfp_prog->map_records) | ||
163 | return -ENOMEM; | ||
164 | |||
165 | for (i = 0; i < prog->aux->used_map_cnt; i++) | ||
166 | if (bpf_map_offload_neutral(prog->aux->used_maps[i])) { | ||
167 | err = nfp_map_ptr_record(bpf, nfp_prog, | ||
168 | prog->aux->used_maps[i]); | ||
169 | if (err) { | ||
170 | nfp_map_ptrs_forget(bpf, nfp_prog); | ||
171 | return err; | ||
172 | } | ||
173 | } | ||
174 | WARN_ON(cnt != nfp_prog->map_records_cnt); | ||
175 | |||
176 | return 0; | ||
177 | } | ||
178 | |||
179 | static int | ||
60 | nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, | 180 | nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, |
61 | unsigned int cnt) | 181 | unsigned int cnt) |
62 | { | 182 | { |
@@ -151,7 +271,7 @@ static int nfp_bpf_translate(struct nfp_net *nn, struct bpf_prog *prog) | |||
151 | prog->aux->offload->jited_len = nfp_prog->prog_len * sizeof(u64); | 271 | prog->aux->offload->jited_len = nfp_prog->prog_len * sizeof(u64); |
152 | prog->aux->offload->jited_image = nfp_prog->prog; | 272 | prog->aux->offload->jited_image = nfp_prog->prog; |
153 | 273 | ||
154 | return 0; | 274 | return nfp_map_ptrs_record(nfp_prog->bpf, nfp_prog, prog); |
155 | } | 275 | } |
156 | 276 | ||
157 | static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog) | 277 | static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog) |
@@ -159,6 +279,7 @@ static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog) | |||
159 | struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; | 279 | struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; |
160 | 280 | ||
161 | kvfree(nfp_prog->prog); | 281 | kvfree(nfp_prog->prog); |
282 | nfp_map_ptrs_forget(nfp_prog->bpf, nfp_prog); | ||
162 | nfp_prog_free(nfp_prog); | 283 | nfp_prog_free(nfp_prog); |
163 | 284 | ||
164 | return 0; | 285 | return 0; |
@@ -320,6 +441,53 @@ int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf) | |||
320 | } | 441 | } |
321 | } | 442 | } |
322 | 443 | ||
444 | static unsigned long | ||
445 | nfp_bpf_perf_event_copy(void *dst, const void *src, | ||
446 | unsigned long off, unsigned long len) | ||
447 | { | ||
448 | memcpy(dst, src + off, len); | ||
449 | return 0; | ||
450 | } | ||
451 | |||
452 | int nfp_bpf_event_output(struct nfp_app_bpf *bpf, struct sk_buff *skb) | ||
453 | { | ||
454 | struct cmsg_bpf_event *cbe = (void *)skb->data; | ||
455 | u32 pkt_size, data_size; | ||
456 | struct bpf_map *map; | ||
457 | |||
458 | if (skb->len < sizeof(struct cmsg_bpf_event)) | ||
459 | goto err_drop; | ||
460 | |||
461 | pkt_size = be32_to_cpu(cbe->pkt_size); | ||
462 | data_size = be32_to_cpu(cbe->data_size); | ||
463 | map = (void *)(unsigned long)be64_to_cpu(cbe->map_ptr); | ||
464 | |||
465 | if (skb->len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size) | ||
466 | goto err_drop; | ||
467 | if (cbe->hdr.ver != CMSG_MAP_ABI_VERSION) | ||
468 | goto err_drop; | ||
469 | |||
470 | rcu_read_lock(); | ||
471 | if (!rhashtable_lookup_fast(&bpf->maps_neutral, &map, | ||
472 | nfp_bpf_maps_neutral_params)) { | ||
473 | rcu_read_unlock(); | ||
474 | pr_warn("perf event: dest map pointer %px not recognized, dropping event\n", | ||
475 | map); | ||
476 | goto err_drop; | ||
477 | } | ||
478 | |||
479 | bpf_event_output(map, be32_to_cpu(cbe->cpu_id), | ||
480 | &cbe->data[round_up(pkt_size, 4)], data_size, | ||
481 | cbe->data, pkt_size, nfp_bpf_perf_event_copy); | ||
482 | rcu_read_unlock(); | ||
483 | |||
484 | dev_consume_skb_any(skb); | ||
485 | return 0; | ||
486 | err_drop: | ||
487 | dev_kfree_skb_any(skb); | ||
488 | return -EINVAL; | ||
489 | } | ||
490 | |||
323 | static int | 491 | static int |
324 | nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog, | 492 | nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog, |
325 | struct netlink_ext_ack *extack) | 493 | struct netlink_ext_ack *extack) |
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index 06ad53ce4ad9..e163f3cfa47d 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2016-2017 Netronome Systems, Inc. | 2 | * Copyright (C) 2016-2018 Netronome Systems, Inc. |
3 | * | 3 | * |
4 | * This software is dual licensed under the GNU General License Version 2, | 4 | * This software is dual licensed under the GNU General License Version 2, |
5 | * June 1991 as shown in the file COPYING in the top-level directory of this | 5 | * June 1991 as shown in the file COPYING in the top-level directory of this |
@@ -36,6 +36,8 @@ | |||
36 | #include <linux/kernel.h> | 36 | #include <linux/kernel.h> |
37 | #include <linux/pkt_cls.h> | 37 | #include <linux/pkt_cls.h> |
38 | 38 | ||
39 | #include "../nfp_app.h" | ||
40 | #include "../nfp_main.h" | ||
39 | #include "fw.h" | 41 | #include "fw.h" |
40 | #include "main.h" | 42 | #include "main.h" |
41 | 43 | ||
@@ -149,15 +151,6 @@ nfp_bpf_map_call_ok(const char *fname, struct bpf_verifier_env *env, | |||
149 | return false; | 151 | return false; |
150 | } | 152 | } |
151 | 153 | ||
152 | /* Rest of the checks is only if we re-parse the same insn */ | ||
153 | if (!meta->func_id) | ||
154 | return true; | ||
155 | |||
156 | if (meta->arg1.map_ptr != reg1->map_ptr) { | ||
157 | pr_vlog(env, "%s: called for different map\n", fname); | ||
158 | return false; | ||
159 | } | ||
160 | |||
161 | return true; | 154 | return true; |
162 | } | 155 | } |
163 | 156 | ||
@@ -216,6 +209,71 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env, | |||
216 | pr_vlog(env, "bpf_get_prandom_u32(): FW doesn't support random number generation\n"); | 209 | pr_vlog(env, "bpf_get_prandom_u32(): FW doesn't support random number generation\n"); |
217 | return -EOPNOTSUPP; | 210 | return -EOPNOTSUPP; |
218 | 211 | ||
212 | case BPF_FUNC_perf_event_output: | ||
213 | BUILD_BUG_ON(NFP_BPF_SCALAR_VALUE != SCALAR_VALUE || | ||
214 | NFP_BPF_MAP_VALUE != PTR_TO_MAP_VALUE || | ||
215 | NFP_BPF_STACK != PTR_TO_STACK || | ||
216 | NFP_BPF_PACKET_DATA != PTR_TO_PACKET); | ||
217 | |||
218 | if (!bpf->helpers.perf_event_output) { | ||
219 | pr_vlog(env, "event_output: not supported by FW\n"); | ||
220 | return -EOPNOTSUPP; | ||
221 | } | ||
222 | |||
223 | /* Force current CPU to make sure we can report the event | ||
224 | * wherever we get the control message from FW. | ||
225 | */ | ||
226 | if (reg3->var_off.mask & BPF_F_INDEX_MASK || | ||
227 | (reg3->var_off.value & BPF_F_INDEX_MASK) != | ||
228 | BPF_F_CURRENT_CPU) { | ||
229 | char tn_buf[48]; | ||
230 | |||
231 | tnum_strn(tn_buf, sizeof(tn_buf), reg3->var_off); | ||
232 | pr_vlog(env, "event_output: must use BPF_F_CURRENT_CPU, var_off: %s\n", | ||
233 | tn_buf); | ||
234 | return -EOPNOTSUPP; | ||
235 | } | ||
236 | |||
237 | /* Save space in meta, we don't care about arguments other | ||
238 | * than 4th meta, shove it into arg1. | ||
239 | */ | ||
240 | reg1 = cur_regs(env) + BPF_REG_4; | ||
241 | |||
242 | if (reg1->type != SCALAR_VALUE /* NULL ptr */ && | ||
243 | reg1->type != PTR_TO_STACK && | ||
244 | reg1->type != PTR_TO_MAP_VALUE && | ||
245 | reg1->type != PTR_TO_PACKET) { | ||
246 | pr_vlog(env, "event_output: unsupported ptr type: %d\n", | ||
247 | reg1->type); | ||
248 | return -EOPNOTSUPP; | ||
249 | } | ||
250 | |||
251 | if (reg1->type == PTR_TO_STACK && | ||
252 | !nfp_bpf_stack_arg_ok("event_output", env, reg1, NULL)) | ||
253 | return -EOPNOTSUPP; | ||
254 | |||
255 | /* Warn user that on offload NFP may return success even if map | ||
256 | * is not going to accept the event, since the event output is | ||
257 | * fully async and device won't know the state of the map. | ||
258 | * There is also FW limitation on the event length. | ||
259 | * | ||
260 | * Lost events will not show up on the perf ring, driver | ||
261 | * won't see them at all. Events may also get reordered. | ||
262 | */ | ||
263 | dev_warn_once(&nfp_prog->bpf->app->pf->pdev->dev, | ||
264 | "bpf: note: return codes and behavior of bpf_event_output() helper differs for offloaded programs!\n"); | ||
265 | pr_vlog(env, "warning: return codes and behavior of event_output helper differ for offload!\n"); | ||
266 | |||
267 | if (!meta->func_id) | ||
268 | break; | ||
269 | |||
270 | if (reg1->type != meta->arg1.type) { | ||
271 | pr_vlog(env, "event_output: ptr type changed: %d %d\n", | ||
272 | meta->arg1.type, reg1->type); | ||
273 | return -EINVAL; | ||
274 | } | ||
275 | break; | ||
276 | |||
219 | default: | 277 | default: |
220 | pr_vlog(env, "unsupported function id: %d\n", func_id); | 278 | pr_vlog(env, "unsupported function id: %d\n", func_id); |
221 | return -EOPNOTSUPP; | 279 | return -EOPNOTSUPP; |
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c index 6aedef0ad433..0e0253c7e17b 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2017 Netronome Systems, Inc. | 2 | * Copyright (C) 2017-2018 Netronome Systems, Inc. |
3 | * | 3 | * |
4 | * This software is dual licensed under the GNU General License Version 2, | 4 | * This software is dual licensed under the GNU General License Version 2, |
5 | * June 1991 as shown in the file COPYING in the top-level directory of this | 5 | * June 1991 as shown in the file COPYING in the top-level directory of this |
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 38ebbc61ed99..321969da67b7 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h | |||
@@ -110,6 +110,11 @@ static inline struct bpf_offloaded_map *map_to_offmap(struct bpf_map *map) | |||
110 | return container_of(map, struct bpf_offloaded_map, map); | 110 | return container_of(map, struct bpf_offloaded_map, map); |
111 | } | 111 | } |
112 | 112 | ||
113 | static inline bool bpf_map_offload_neutral(const struct bpf_map *map) | ||
114 | { | ||
115 | return map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY; | ||
116 | } | ||
117 | |||
113 | static inline bool bpf_map_support_seq_show(const struct bpf_map *map) | 118 | static inline bool bpf_map_support_seq_show(const struct bpf_map *map) |
114 | { | 119 | { |
115 | return map->ops->map_seq_show_elem && map->ops->map_check_btf; | 120 | return map->ops->map_seq_show_elem && map->ops->map_check_btf; |
@@ -235,6 +240,8 @@ struct bpf_verifier_ops { | |||
235 | struct bpf_insn_access_aux *info); | 240 | struct bpf_insn_access_aux *info); |
236 | int (*gen_prologue)(struct bpf_insn *insn, bool direct_write, | 241 | int (*gen_prologue)(struct bpf_insn *insn, bool direct_write, |
237 | const struct bpf_prog *prog); | 242 | const struct bpf_prog *prog); |
243 | int (*gen_ld_abs)(const struct bpf_insn *orig, | ||
244 | struct bpf_insn *insn_buf); | ||
238 | u32 (*convert_ctx_access)(enum bpf_access_type type, | 245 | u32 (*convert_ctx_access)(enum bpf_access_type type, |
239 | const struct bpf_insn *src, | 246 | const struct bpf_insn *src, |
240 | struct bpf_insn *dst, | 247 | struct bpf_insn *dst, |
@@ -676,6 +683,31 @@ static inline int sock_map_prog(struct bpf_map *map, | |||
676 | } | 683 | } |
677 | #endif | 684 | #endif |
678 | 685 | ||
686 | #if defined(CONFIG_XDP_SOCKETS) | ||
687 | struct xdp_sock; | ||
688 | struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key); | ||
689 | int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp, | ||
690 | struct xdp_sock *xs); | ||
691 | void __xsk_map_flush(struct bpf_map *map); | ||
692 | #else | ||
693 | struct xdp_sock; | ||
694 | static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, | ||
695 | u32 key) | ||
696 | { | ||
697 | return NULL; | ||
698 | } | ||
699 | |||
700 | static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp, | ||
701 | struct xdp_sock *xs) | ||
702 | { | ||
703 | return -EOPNOTSUPP; | ||
704 | } | ||
705 | |||
706 | static inline void __xsk_map_flush(struct bpf_map *map) | ||
707 | { | ||
708 | } | ||
709 | #endif | ||
710 | |||
679 | /* verifier prototypes for helper functions called from eBPF programs */ | 711 | /* verifier prototypes for helper functions called from eBPF programs */ |
680 | extern const struct bpf_func_proto bpf_map_lookup_elem_proto; | 712 | extern const struct bpf_func_proto bpf_map_lookup_elem_proto; |
681 | extern const struct bpf_func_proto bpf_map_update_elem_proto; | 713 | extern const struct bpf_func_proto bpf_map_update_elem_proto; |
@@ -689,9 +721,8 @@ extern const struct bpf_func_proto bpf_ktime_get_ns_proto; | |||
689 | extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto; | 721 | extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto; |
690 | extern const struct bpf_func_proto bpf_get_current_uid_gid_proto; | 722 | extern const struct bpf_func_proto bpf_get_current_uid_gid_proto; |
691 | extern const struct bpf_func_proto bpf_get_current_comm_proto; | 723 | extern const struct bpf_func_proto bpf_get_current_comm_proto; |
692 | extern const struct bpf_func_proto bpf_skb_vlan_push_proto; | ||
693 | extern const struct bpf_func_proto bpf_skb_vlan_pop_proto; | ||
694 | extern const struct bpf_func_proto bpf_get_stackid_proto; | 724 | extern const struct bpf_func_proto bpf_get_stackid_proto; |
725 | extern const struct bpf_func_proto bpf_get_stack_proto; | ||
695 | extern const struct bpf_func_proto bpf_sock_map_update_proto; | 726 | extern const struct bpf_func_proto bpf_sock_map_update_proto; |
696 | 727 | ||
697 | /* Shared helpers among cBPF and eBPF. */ | 728 | /* Shared helpers among cBPF and eBPF. */ |
diff --git a/include/linux/bpf_trace.h b/include/linux/bpf_trace.h index e6fe98ae3794..ddf896abcfb6 100644 --- a/include/linux/bpf_trace.h +++ b/include/linux/bpf_trace.h | |||
@@ -2,7 +2,6 @@ | |||
2 | #ifndef __LINUX_BPF_TRACE_H__ | 2 | #ifndef __LINUX_BPF_TRACE_H__ |
3 | #define __LINUX_BPF_TRACE_H__ | 3 | #define __LINUX_BPF_TRACE_H__ |
4 | 4 | ||
5 | #include <trace/events/bpf.h> | ||
6 | #include <trace/events/xdp.h> | 5 | #include <trace/events/xdp.h> |
7 | 6 | ||
8 | #endif /* __LINUX_BPF_TRACE_H__ */ | 7 | #endif /* __LINUX_BPF_TRACE_H__ */ |
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 2b28fcf6f6ae..d7df1b323082 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h | |||
@@ -49,4 +49,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) | |||
49 | BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) | 49 | BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) |
50 | #endif | 50 | #endif |
51 | BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) | 51 | BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) |
52 | #if defined(CONFIG_XDP_SOCKETS) | ||
53 | BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops) | ||
54 | #endif | ||
52 | #endif | 55 | #endif |
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7e61c395fddf..8f70dc181e23 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h | |||
@@ -173,6 +173,11 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) | |||
173 | 173 | ||
174 | #define BPF_MAX_SUBPROGS 256 | 174 | #define BPF_MAX_SUBPROGS 256 |
175 | 175 | ||
176 | struct bpf_subprog_info { | ||
177 | u32 start; /* insn idx of function entry point */ | ||
178 | u16 stack_depth; /* max. stack depth used by this function */ | ||
179 | }; | ||
180 | |||
176 | /* single container for all structs | 181 | /* single container for all structs |
177 | * one verifier_env per bpf_check() call | 182 | * one verifier_env per bpf_check() call |
178 | */ | 183 | */ |
@@ -191,9 +196,7 @@ struct bpf_verifier_env { | |||
191 | bool seen_direct_write; | 196 | bool seen_direct_write; |
192 | struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ | 197 | struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ |
193 | struct bpf_verifier_log log; | 198 | struct bpf_verifier_log log; |
194 | u32 subprog_starts[BPF_MAX_SUBPROGS]; | 199 | struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1]; |
195 | /* computes the stack depth of each bpf function */ | ||
196 | u16 subprog_stack_depth[BPF_MAX_SUBPROGS + 1]; | ||
197 | u32 subprog_cnt; | 200 | u32 subprog_cnt; |
198 | }; | 201 | }; |
199 | 202 | ||
diff --git a/include/linux/filter.h b/include/linux/filter.h index 4da8b2308174..da7e16523128 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h | |||
@@ -47,7 +47,9 @@ struct xdp_buff; | |||
47 | /* Additional register mappings for converted user programs. */ | 47 | /* Additional register mappings for converted user programs. */ |
48 | #define BPF_REG_A BPF_REG_0 | 48 | #define BPF_REG_A BPF_REG_0 |
49 | #define BPF_REG_X BPF_REG_7 | 49 | #define BPF_REG_X BPF_REG_7 |
50 | #define BPF_REG_TMP BPF_REG_8 | 50 | #define BPF_REG_TMP BPF_REG_2 /* scratch reg */ |
51 | #define BPF_REG_D BPF_REG_8 /* data, callee-saved */ | ||
52 | #define BPF_REG_H BPF_REG_9 /* hlen, callee-saved */ | ||
51 | 53 | ||
52 | /* Kernel hidden auxiliary/helper register for hardening step. | 54 | /* Kernel hidden auxiliary/helper register for hardening step. |
53 | * Only used by eBPF JITs. It's nothing more than a temporary | 55 | * Only used by eBPF JITs. It's nothing more than a temporary |
@@ -468,7 +470,8 @@ struct bpf_prog { | |||
468 | dst_needed:1, /* Do we need dst entry? */ | 470 | dst_needed:1, /* Do we need dst entry? */ |
469 | blinded:1, /* Was blinded */ | 471 | blinded:1, /* Was blinded */ |
470 | is_func:1, /* program is a bpf function */ | 472 | is_func:1, /* program is a bpf function */ |
471 | kprobe_override:1; /* Do we override a kprobe? */ | 473 | kprobe_override:1, /* Do we override a kprobe? */ |
474 | has_callchain_buf:1; /* callchain buffer allocated? */ | ||
472 | enum bpf_prog_type type; /* Type of BPF program */ | 475 | enum bpf_prog_type type; /* Type of BPF program */ |
473 | enum bpf_attach_type expected_attach_type; /* For some prog types */ | 476 | enum bpf_attach_type expected_attach_type; /* For some prog types */ |
474 | u32 len; /* Number of filter blocks */ | 477 | u32 len; /* Number of filter blocks */ |
@@ -759,7 +762,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, | |||
759 | * This does not appear to be a real limitation for existing software. | 762 | * This does not appear to be a real limitation for existing software. |
760 | */ | 763 | */ |
761 | int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, | 764 | int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, |
762 | struct bpf_prog *prog); | 765 | struct xdp_buff *xdp, struct bpf_prog *prog); |
763 | int xdp_do_redirect(struct net_device *dev, | 766 | int xdp_do_redirect(struct net_device *dev, |
764 | struct xdp_buff *xdp, | 767 | struct xdp_buff *xdp, |
765 | struct bpf_prog *prog); | 768 | struct bpf_prog *prog); |
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 46dcb5f7522f..03ed492c4e14 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h | |||
@@ -2510,6 +2510,7 @@ void dev_disable_lro(struct net_device *dev); | |||
2510 | int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); | 2510 | int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); |
2511 | int dev_queue_xmit(struct sk_buff *skb); | 2511 | int dev_queue_xmit(struct sk_buff *skb); |
2512 | int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv); | 2512 | int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv); |
2513 | int dev_direct_xmit(struct sk_buff *skb, u16 queue_id); | ||
2513 | int register_netdevice(struct net_device *dev); | 2514 | int register_netdevice(struct net_device *dev); |
2514 | void unregister_netdevice_queue(struct net_device *dev, struct list_head *head); | 2515 | void unregister_netdevice_queue(struct net_device *dev, struct list_head *head); |
2515 | void unregister_netdevice_many(struct list_head *head); | 2516 | void unregister_netdevice_many(struct list_head *head); |
diff --git a/include/linux/socket.h b/include/linux/socket.h index ea50f4a65816..7ed4713d5337 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h | |||
@@ -207,8 +207,9 @@ struct ucred { | |||
207 | * PF_SMC protocol family that | 207 | * PF_SMC protocol family that |
208 | * reuses AF_INET address family | 208 | * reuses AF_INET address family |
209 | */ | 209 | */ |
210 | #define AF_XDP 44 /* XDP sockets */ | ||
210 | 211 | ||
211 | #define AF_MAX 44 /* For now.. */ | 212 | #define AF_MAX 45 /* For now.. */ |
212 | 213 | ||
213 | /* Protocol families, same as address families. */ | 214 | /* Protocol families, same as address families. */ |
214 | #define PF_UNSPEC AF_UNSPEC | 215 | #define PF_UNSPEC AF_UNSPEC |
@@ -257,6 +258,7 @@ struct ucred { | |||
257 | #define PF_KCM AF_KCM | 258 | #define PF_KCM AF_KCM |
258 | #define PF_QIPCRTR AF_QIPCRTR | 259 | #define PF_QIPCRTR AF_QIPCRTR |
259 | #define PF_SMC AF_SMC | 260 | #define PF_SMC AF_SMC |
261 | #define PF_XDP AF_XDP | ||
260 | #define PF_MAX AF_MAX | 262 | #define PF_MAX AF_MAX |
261 | 263 | ||
262 | /* Maximum queue length specifiable by listen. */ | 264 | /* Maximum queue length specifiable by listen. */ |
@@ -338,6 +340,7 @@ struct ucred { | |||
338 | #define SOL_NFC 280 | 340 | #define SOL_NFC 280 |
339 | #define SOL_KCM 281 | 341 | #define SOL_KCM 281 |
340 | #define SOL_TLS 282 | 342 | #define SOL_TLS 282 |
343 | #define SOL_XDP 283 | ||
341 | 344 | ||
342 | /* IPX options */ | 345 | /* IPX options */ |
343 | #define IPX_TYPE 1 | 346 | #define IPX_TYPE 1 |
diff --git a/include/linux/tnum.h b/include/linux/tnum.h index 0d2d3da46139..c7dc2b5902c0 100644 --- a/include/linux/tnum.h +++ b/include/linux/tnum.h | |||
@@ -23,8 +23,10 @@ struct tnum tnum_range(u64 min, u64 max); | |||
23 | /* Arithmetic and logical ops */ | 23 | /* Arithmetic and logical ops */ |
24 | /* Shift a tnum left (by a fixed shift) */ | 24 | /* Shift a tnum left (by a fixed shift) */ |
25 | struct tnum tnum_lshift(struct tnum a, u8 shift); | 25 | struct tnum tnum_lshift(struct tnum a, u8 shift); |
26 | /* Shift a tnum right (by a fixed shift) */ | 26 | /* Shift (rsh) a tnum right (by a fixed shift) */ |
27 | struct tnum tnum_rshift(struct tnum a, u8 shift); | 27 | struct tnum tnum_rshift(struct tnum a, u8 shift); |
28 | /* Shift (arsh) a tnum right (by a fixed min_shift) */ | ||
29 | struct tnum tnum_arshift(struct tnum a, u8 min_shift); | ||
28 | /* Add two tnums, return @a + @b */ | 30 | /* Add two tnums, return @a + @b */ |
29 | struct tnum tnum_add(struct tnum a, struct tnum b); | 31 | struct tnum tnum_add(struct tnum a, struct tnum b); |
30 | /* Subtract two tnums, return @a - @b */ | 32 | /* Subtract two tnums, return @a - @b */ |
diff --git a/include/net/xdp.h b/include/net/xdp.h index 137ad5f9f40f..0b689cf561c7 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h | |||
@@ -104,6 +104,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp) | |||
104 | } | 104 | } |
105 | 105 | ||
106 | void xdp_return_frame(struct xdp_frame *xdpf); | 106 | void xdp_return_frame(struct xdp_frame *xdpf); |
107 | void xdp_return_buff(struct xdp_buff *xdp); | ||
107 | 108 | ||
108 | int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, | 109 | int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, |
109 | struct net_device *dev, u32 queue_index); | 110 | struct net_device *dev, u32 queue_index); |
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h new file mode 100644 index 000000000000..185f4928fbda --- /dev/null +++ b/include/net/xdp_sock.h | |||
@@ -0,0 +1,66 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 | ||
2 | * AF_XDP internal functions | ||
3 | * Copyright(c) 2018 Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | #ifndef _LINUX_XDP_SOCK_H | ||
16 | #define _LINUX_XDP_SOCK_H | ||
17 | |||
18 | #include <linux/mutex.h> | ||
19 | #include <net/sock.h> | ||
20 | |||
21 | struct net_device; | ||
22 | struct xsk_queue; | ||
23 | struct xdp_umem; | ||
24 | |||
25 | struct xdp_sock { | ||
26 | /* struct sock must be the first member of struct xdp_sock */ | ||
27 | struct sock sk; | ||
28 | struct xsk_queue *rx; | ||
29 | struct net_device *dev; | ||
30 | struct xdp_umem *umem; | ||
31 | struct list_head flush_node; | ||
32 | u16 queue_id; | ||
33 | struct xsk_queue *tx ____cacheline_aligned_in_smp; | ||
34 | /* Protects multiple processes in the control path */ | ||
35 | struct mutex mutex; | ||
36 | u64 rx_dropped; | ||
37 | }; | ||
38 | |||
39 | struct xdp_buff; | ||
40 | #ifdef CONFIG_XDP_SOCKETS | ||
41 | int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); | ||
42 | int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); | ||
43 | void xsk_flush(struct xdp_sock *xs); | ||
44 | bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs); | ||
45 | #else | ||
46 | static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) | ||
47 | { | ||
48 | return -ENOTSUPP; | ||
49 | } | ||
50 | |||
51 | static inline int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) | ||
52 | { | ||
53 | return -ENOTSUPP; | ||
54 | } | ||
55 | |||
56 | static inline void xsk_flush(struct xdp_sock *xs) | ||
57 | { | ||
58 | } | ||
59 | |||
60 | static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs) | ||
61 | { | ||
62 | return false; | ||
63 | } | ||
64 | #endif /* CONFIG_XDP_SOCKETS */ | ||
65 | |||
66 | #endif /* _LINUX_XDP_SOCK_H */ | ||
diff --git a/include/trace/events/bpf.h b/include/trace/events/bpf.h deleted file mode 100644 index 150185647e6b..000000000000 --- a/include/trace/events/bpf.h +++ /dev/null | |||
@@ -1,355 +0,0 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | #undef TRACE_SYSTEM | ||
3 | #define TRACE_SYSTEM bpf | ||
4 | |||
5 | #if !defined(_TRACE_BPF_H) || defined(TRACE_HEADER_MULTI_READ) | ||
6 | #define _TRACE_BPF_H | ||
7 | |||
8 | /* These are only used within the BPF_SYSCALL code */ | ||
9 | #ifdef CONFIG_BPF_SYSCALL | ||
10 | |||
11 | #include <linux/filter.h> | ||
12 | #include <linux/bpf.h> | ||
13 | #include <linux/fs.h> | ||
14 | #include <linux/tracepoint.h> | ||
15 | |||
16 | #define __PROG_TYPE_MAP(FN) \ | ||
17 | FN(SOCKET_FILTER) \ | ||
18 | FN(KPROBE) \ | ||
19 | FN(SCHED_CLS) \ | ||
20 | FN(SCHED_ACT) \ | ||
21 | FN(TRACEPOINT) \ | ||
22 | FN(XDP) \ | ||
23 | FN(PERF_EVENT) \ | ||
24 | FN(CGROUP_SKB) \ | ||
25 | FN(CGROUP_SOCK) \ | ||
26 | FN(LWT_IN) \ | ||
27 | FN(LWT_OUT) \ | ||
28 | FN(LWT_XMIT) | ||
29 | |||
30 | #define __MAP_TYPE_MAP(FN) \ | ||
31 | FN(HASH) \ | ||
32 | FN(ARRAY) \ | ||
33 | FN(PROG_ARRAY) \ | ||
34 | FN(PERF_EVENT_ARRAY) \ | ||
35 | FN(PERCPU_HASH) \ | ||
36 | FN(PERCPU_ARRAY) \ | ||
37 | FN(STACK_TRACE) \ | ||
38 | FN(CGROUP_ARRAY) \ | ||
39 | FN(LRU_HASH) \ | ||
40 | FN(LRU_PERCPU_HASH) \ | ||
41 | FN(LPM_TRIE) | ||
42 | |||
43 | #define __PROG_TYPE_TP_FN(x) \ | ||
44 | TRACE_DEFINE_ENUM(BPF_PROG_TYPE_##x); | ||
45 | #define __PROG_TYPE_SYM_FN(x) \ | ||
46 | { BPF_PROG_TYPE_##x, #x }, | ||
47 | #define __PROG_TYPE_SYM_TAB \ | ||
48 | __PROG_TYPE_MAP(__PROG_TYPE_SYM_FN) { -1, 0 } | ||
49 | __PROG_TYPE_MAP(__PROG_TYPE_TP_FN) | ||
50 | |||
51 | #define __MAP_TYPE_TP_FN(x) \ | ||
52 | TRACE_DEFINE_ENUM(BPF_MAP_TYPE_##x); | ||
53 | #define __MAP_TYPE_SYM_FN(x) \ | ||
54 | { BPF_MAP_TYPE_##x, #x }, | ||
55 | #define __MAP_TYPE_SYM_TAB \ | ||
56 | __MAP_TYPE_MAP(__MAP_TYPE_SYM_FN) { -1, 0 } | ||
57 | __MAP_TYPE_MAP(__MAP_TYPE_TP_FN) | ||
58 | |||
59 | DECLARE_EVENT_CLASS(bpf_prog_event, | ||
60 | |||
61 | TP_PROTO(const struct bpf_prog *prg), | ||
62 | |||
63 | TP_ARGS(prg), | ||
64 | |||
65 | TP_STRUCT__entry( | ||
66 | __array(u8, prog_tag, 8) | ||
67 | __field(u32, type) | ||
68 | ), | ||
69 | |||
70 | TP_fast_assign( | ||
71 | BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag)); | ||
72 | memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag)); | ||
73 | __entry->type = prg->type; | ||
74 | ), | ||
75 | |||
76 | TP_printk("prog=%s type=%s", | ||
77 | __print_hex_str(__entry->prog_tag, 8), | ||
78 | __print_symbolic(__entry->type, __PROG_TYPE_SYM_TAB)) | ||
79 | ); | ||
80 | |||
81 | DEFINE_EVENT(bpf_prog_event, bpf_prog_get_type, | ||
82 | |||
83 | TP_PROTO(const struct bpf_prog *prg), | ||
84 | |||
85 | TP_ARGS(prg) | ||
86 | ); | ||
87 | |||
88 | DEFINE_EVENT(bpf_prog_event, bpf_prog_put_rcu, | ||
89 | |||
90 | TP_PROTO(const struct bpf_prog *prg), | ||
91 | |||
92 | TP_ARGS(prg) | ||
93 | ); | ||
94 | |||
95 | TRACE_EVENT(bpf_prog_load, | ||
96 | |||
97 | TP_PROTO(const struct bpf_prog *prg, int ufd), | ||
98 | |||
99 | TP_ARGS(prg, ufd), | ||
100 | |||
101 | TP_STRUCT__entry( | ||
102 | __array(u8, prog_tag, 8) | ||
103 | __field(u32, type) | ||
104 | __field(int, ufd) | ||
105 | ), | ||
106 | |||
107 | TP_fast_assign( | ||
108 | BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag)); | ||
109 | memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag)); | ||
110 | __entry->type = prg->type; | ||
111 | __entry->ufd = ufd; | ||
112 | ), | ||
113 | |||
114 | TP_printk("prog=%s type=%s ufd=%d", | ||
115 | __print_hex_str(__entry->prog_tag, 8), | ||
116 | __print_symbolic(__entry->type, __PROG_TYPE_SYM_TAB), | ||
117 | __entry->ufd) | ||
118 | ); | ||
119 | |||
120 | TRACE_EVENT(bpf_map_create, | ||
121 | |||
122 | TP_PROTO(const struct bpf_map *map, int ufd), | ||
123 | |||
124 | TP_ARGS(map, ufd), | ||
125 | |||
126 | TP_STRUCT__entry( | ||
127 | __field(u32, type) | ||
128 | __field(u32, size_key) | ||
129 | __field(u32, size_value) | ||
130 | __field(u32, max_entries) | ||
131 | __field(u32, flags) | ||
132 | __field(int, ufd) | ||
133 | ), | ||
134 | |||
135 | TP_fast_assign( | ||
136 | __entry->type = map->map_type; | ||
137 | __entry->size_key = map->key_size; | ||
138 | __entry->size_value = map->value_size; | ||
139 | __entry->max_entries = map->max_entries; | ||
140 | __entry->flags = map->map_flags; | ||
141 | __entry->ufd = ufd; | ||
142 | ), | ||
143 | |||
144 | TP_printk("map type=%s ufd=%d key=%u val=%u max=%u flags=%x", | ||
145 | __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB), | ||
146 | __entry->ufd, __entry->size_key, __entry->size_value, | ||
147 | __entry->max_entries, __entry->flags) | ||
148 | ); | ||
149 | |||
150 | DECLARE_EVENT_CLASS(bpf_obj_prog, | ||
151 | |||
152 | TP_PROTO(const struct bpf_prog *prg, int ufd, | ||
153 | const struct filename *pname), | ||
154 | |||
155 | TP_ARGS(prg, ufd, pname), | ||
156 | |||
157 | TP_STRUCT__entry( | ||
158 | __array(u8, prog_tag, 8) | ||
159 | __field(int, ufd) | ||
160 | __string(path, pname->name) | ||
161 | ), | ||
162 | |||
163 | TP_fast_assign( | ||
164 | BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag)); | ||
165 | memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag)); | ||
166 | __assign_str(path, pname->name); | ||
167 | __entry->ufd = ufd; | ||
168 | ), | ||
169 | |||
170 | TP_printk("prog=%s path=%s ufd=%d", | ||
171 | __print_hex_str(__entry->prog_tag, 8), | ||
172 | __get_str(path), __entry->ufd) | ||
173 | ); | ||
174 | |||
175 | DEFINE_EVENT(bpf_obj_prog, bpf_obj_pin_prog, | ||
176 | |||
177 | TP_PROTO(const struct bpf_prog *prg, int ufd, | ||
178 | const struct filename *pname), | ||
179 | |||
180 | TP_ARGS(prg, ufd, pname) | ||
181 | ); | ||
182 | |||
183 | DEFINE_EVENT(bpf_obj_prog, bpf_obj_get_prog, | ||
184 | |||
185 | TP_PROTO(const struct bpf_prog *prg, int ufd, | ||
186 | const struct filename *pname), | ||
187 | |||
188 | TP_ARGS(prg, ufd, pname) | ||
189 | ); | ||
190 | |||
191 | DECLARE_EVENT_CLASS(bpf_obj_map, | ||
192 | |||
193 | TP_PROTO(const struct bpf_map *map, int ufd, | ||
194 | const struct filename *pname), | ||
195 | |||
196 | TP_ARGS(map, ufd, pname), | ||
197 | |||
198 | TP_STRUCT__entry( | ||
199 | __field(u32, type) | ||
200 | __field(int, ufd) | ||
201 | __string(path, pname->name) | ||
202 | ), | ||
203 | |||
204 | TP_fast_assign( | ||
205 | __assign_str(path, pname->name); | ||
206 | __entry->type = map->map_type; | ||
207 | __entry->ufd = ufd; | ||
208 | ), | ||
209 | |||
210 | TP_printk("map type=%s ufd=%d path=%s", | ||
211 | __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB), | ||
212 | __entry->ufd, __get_str(path)) | ||
213 | ); | ||
214 | |||
215 | DEFINE_EVENT(bpf_obj_map, bpf_obj_pin_map, | ||
216 | |||
217 | TP_PROTO(const struct bpf_map *map, int ufd, | ||
218 | const struct filename *pname), | ||
219 | |||
220 | TP_ARGS(map, ufd, pname) | ||
221 | ); | ||
222 | |||
223 | DEFINE_EVENT(bpf_obj_map, bpf_obj_get_map, | ||
224 | |||
225 | TP_PROTO(const struct bpf_map *map, int ufd, | ||
226 | const struct filename *pname), | ||
227 | |||
228 | TP_ARGS(map, ufd, pname) | ||
229 | ); | ||
230 | |||
231 | DECLARE_EVENT_CLASS(bpf_map_keyval, | ||
232 | |||
233 | TP_PROTO(const struct bpf_map *map, int ufd, | ||
234 | const void *key, const void *val), | ||
235 | |||
236 | TP_ARGS(map, ufd, key, val), | ||
237 | |||
238 | TP_STRUCT__entry( | ||
239 | __field(u32, type) | ||
240 | __field(u32, key_len) | ||
241 | __dynamic_array(u8, key, map->key_size) | ||
242 | __field(bool, key_trunc) | ||
243 | __field(u32, val_len) | ||
244 | __dynamic_array(u8, val, map->value_size) | ||
245 | __field(bool, val_trunc) | ||
246 | __field(int, ufd) | ||
247 | ), | ||
248 | |||
249 | TP_fast_assign( | ||
250 | memcpy(__get_dynamic_array(key), key, map->key_size); | ||
251 | memcpy(__get_dynamic_array(val), val, map->value_size); | ||
252 | __entry->type = map->map_type; | ||
253 | __entry->key_len = min(map->key_size, 16U); | ||
254 | __entry->key_trunc = map->key_size != __entry->key_len; | ||
255 | __entry->val_len = min(map->value_size, 16U); | ||
256 | __entry->val_trunc = map->value_size != __entry->val_len; | ||
257 | __entry->ufd = ufd; | ||
258 | ), | ||
259 | |||
260 | TP_printk("map type=%s ufd=%d key=[%s%s] val=[%s%s]", | ||
261 | __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB), | ||
262 | __entry->ufd, | ||
263 | __print_hex(__get_dynamic_array(key), __entry->key_len), | ||
264 | __entry->key_trunc ? " ..." : "", | ||
265 | __print_hex(__get_dynamic_array(val), __entry->val_len), | ||
266 | __entry->val_trunc ? " ..." : "") | ||
267 | ); | ||
268 | |||
269 | DEFINE_EVENT(bpf_map_keyval, bpf_map_lookup_elem, | ||
270 | |||
271 | TP_PROTO(const struct bpf_map *map, int ufd, | ||
272 | const void *key, const void *val), | ||
273 | |||
274 | TP_ARGS(map, ufd, key, val) | ||
275 | ); | ||
276 | |||
277 | DEFINE_EVENT(bpf_map_keyval, bpf_map_update_elem, | ||
278 | |||
279 | TP_PROTO(const struct bpf_map *map, int ufd, | ||
280 | const void *key, const void *val), | ||
281 | |||
282 | TP_ARGS(map, ufd, key, val) | ||
283 | ); | ||
284 | |||
285 | TRACE_EVENT(bpf_map_delete_elem, | ||
286 | |||
287 | TP_PROTO(const struct bpf_map *map, int ufd, | ||
288 | const void *key), | ||
289 | |||
290 | TP_ARGS(map, ufd, key), | ||
291 | |||
292 | TP_STRUCT__entry( | ||
293 | __field(u32, type) | ||
294 | __field(u32, key_len) | ||
295 | __dynamic_array(u8, key, map->key_size) | ||
296 | __field(bool, key_trunc) | ||
297 | __field(int, ufd) | ||
298 | ), | ||
299 | |||
300 | TP_fast_assign( | ||
301 | memcpy(__get_dynamic_array(key), key, map->key_size); | ||
302 | __entry->type = map->map_type; | ||
303 | __entry->key_len = min(map->key_size, 16U); | ||
304 | __entry->key_trunc = map->key_size != __entry->key_len; | ||
305 | __entry->ufd = ufd; | ||
306 | ), | ||
307 | |||
308 | TP_printk("map type=%s ufd=%d key=[%s%s]", | ||
309 | __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB), | ||
310 | __entry->ufd, | ||
311 | __print_hex(__get_dynamic_array(key), __entry->key_len), | ||
312 | __entry->key_trunc ? " ..." : "") | ||
313 | ); | ||
314 | |||
315 | TRACE_EVENT(bpf_map_next_key, | ||
316 | |||
317 | TP_PROTO(const struct bpf_map *map, int ufd, | ||
318 | const void *key, const void *key_next), | ||
319 | |||
320 | TP_ARGS(map, ufd, key, key_next), | ||
321 | |||
322 | TP_STRUCT__entry( | ||
323 | __field(u32, type) | ||
324 | __field(u32, key_len) | ||
325 | __dynamic_array(u8, key, map->key_size) | ||
326 | __dynamic_array(u8, nxt, map->key_size) | ||
327 | __field(bool, key_trunc) | ||
328 | __field(bool, key_null) | ||
329 | __field(int, ufd) | ||
330 | ), | ||
331 | |||
332 | TP_fast_assign( | ||
333 | if (key) | ||
334 | memcpy(__get_dynamic_array(key), key, map->key_size); | ||
335 | __entry->key_null = !key; | ||
336 | memcpy(__get_dynamic_array(nxt), key_next, map->key_size); | ||
337 | __entry->type = map->map_type; | ||
338 | __entry->key_len = min(map->key_size, 16U); | ||
339 | __entry->key_trunc = map->key_size != __entry->key_len; | ||
340 | __entry->ufd = ufd; | ||
341 | ), | ||
342 | |||
343 | TP_printk("map type=%s ufd=%d key=[%s%s] next=[%s%s]", | ||
344 | __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB), | ||
345 | __entry->ufd, | ||
346 | __entry->key_null ? "NULL" : __print_hex(__get_dynamic_array(key), | ||
347 | __entry->key_len), | ||
348 | __entry->key_trunc && !__entry->key_null ? " ..." : "", | ||
349 | __print_hex(__get_dynamic_array(nxt), __entry->key_len), | ||
350 | __entry->key_trunc ? " ..." : "") | ||
351 | ); | ||
352 | #endif /* CONFIG_BPF_SYSCALL */ | ||
353 | #endif /* _TRACE_BPF_H */ | ||
354 | |||
355 | #include <trace/define_trace.h> | ||
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index da77a9388947..93d5a4eeec2a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h | |||
@@ -116,6 +116,7 @@ enum bpf_map_type { | |||
116 | BPF_MAP_TYPE_DEVMAP, | 116 | BPF_MAP_TYPE_DEVMAP, |
117 | BPF_MAP_TYPE_SOCKMAP, | 117 | BPF_MAP_TYPE_SOCKMAP, |
118 | BPF_MAP_TYPE_CPUMAP, | 118 | BPF_MAP_TYPE_CPUMAP, |
119 | BPF_MAP_TYPE_XSKMAP, | ||
119 | }; | 120 | }; |
120 | 121 | ||
121 | enum bpf_prog_type { | 122 | enum bpf_prog_type { |
@@ -828,12 +829,12 @@ union bpf_attr { | |||
828 | * | 829 | * |
829 | * Also, be aware that the newer helper | 830 | * Also, be aware that the newer helper |
830 | * **bpf_perf_event_read_value**\ () is recommended over | 831 | * **bpf_perf_event_read_value**\ () is recommended over |
831 | * **bpf_perf_event_read*\ () in general. The latter has some ABI | 832 | * **bpf_perf_event_read**\ () in general. The latter has some ABI |
832 | * quirks where error and counter value are used as a return code | 833 | * quirks where error and counter value are used as a return code |
833 | * (which is wrong to do since ranges may overlap). This issue is | 834 | * (which is wrong to do since ranges may overlap). This issue is |
834 | * fixed with bpf_perf_event_read_value(), which at the same time | 835 | * fixed with **bpf_perf_event_read_value**\ (), which at the same |
835 | * provides more features over the **bpf_perf_event_read**\ () | 836 | * time provides more features over the **bpf_perf_event_read**\ |
836 | * interface. Please refer to the description of | 837 | * () interface. Please refer to the description of |
837 | * **bpf_perf_event_read_value**\ () for details. | 838 | * **bpf_perf_event_read_value**\ () for details. |
838 | * Return | 839 | * Return |
839 | * The value of the perf event counter read from the map, or a | 840 | * The value of the perf event counter read from the map, or a |
@@ -1361,7 +1362,7 @@ union bpf_attr { | |||
1361 | * Return | 1362 | * Return |
1362 | * 0 | 1363 | * 0 |
1363 | * | 1364 | * |
1364 | * int bpf_setsockopt(struct bpf_sock_ops_kern *bpf_socket, int level, int optname, char *optval, int optlen) | 1365 | * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen) |
1365 | * Description | 1366 | * Description |
1366 | * Emulate a call to **setsockopt()** on the socket associated to | 1367 | * Emulate a call to **setsockopt()** on the socket associated to |
1367 | * *bpf_socket*, which must be a full socket. The *level* at | 1368 | * *bpf_socket*, which must be a full socket. The *level* at |
@@ -1435,7 +1436,7 @@ union bpf_attr { | |||
1435 | * Return | 1436 | * Return |
1436 | * **SK_PASS** on success, or **SK_DROP** on error. | 1437 | * **SK_PASS** on success, or **SK_DROP** on error. |
1437 | * | 1438 | * |
1438 | * int bpf_sock_map_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags) | 1439 | * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) |
1439 | * Description | 1440 | * Description |
1440 | * Add an entry to, or update a *map* referencing sockets. The | 1441 | * Add an entry to, or update a *map* referencing sockets. The |
1441 | * *skops* is used as a new value for the entry associated to | 1442 | * *skops* is used as a new value for the entry associated to |
@@ -1533,7 +1534,7 @@ union bpf_attr { | |||
1533 | * Return | 1534 | * Return |
1534 | * 0 on success, or a negative error in case of failure. | 1535 | * 0 on success, or a negative error in case of failure. |
1535 | * | 1536 | * |
1536 | * int bpf_perf_prog_read_value(struct bpf_perf_event_data_kern *ctx, struct bpf_perf_event_value *buf, u32 buf_size) | 1537 | * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) |
1537 | * Description | 1538 | * Description |
1538 | * For en eBPF program attached to a perf event, retrieve the | 1539 | * For en eBPF program attached to a perf event, retrieve the |
1539 | * value of the event counter associated to *ctx* and store it in | 1540 | * value of the event counter associated to *ctx* and store it in |
@@ -1544,7 +1545,7 @@ union bpf_attr { | |||
1544 | * Return | 1545 | * Return |
1545 | * 0 on success, or a negative error in case of failure. | 1546 | * 0 on success, or a negative error in case of failure. |
1546 | * | 1547 | * |
1547 | * int bpf_getsockopt(struct bpf_sock_ops_kern *bpf_socket, int level, int optname, char *optval, int optlen) | 1548 | * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen) |
1548 | * Description | 1549 | * Description |
1549 | * Emulate a call to **getsockopt()** on the socket associated to | 1550 | * Emulate a call to **getsockopt()** on the socket associated to |
1550 | * *bpf_socket*, which must be a full socket. The *level* at | 1551 | * *bpf_socket*, which must be a full socket. The *level* at |
@@ -1588,7 +1589,7 @@ union bpf_attr { | |||
1588 | * Return | 1589 | * Return |
1589 | * 0 | 1590 | * 0 |
1590 | * | 1591 | * |
1591 | * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops_kern *bpf_sock, int argval) | 1592 | * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval) |
1592 | * Description | 1593 | * Description |
1593 | * Attempt to set the value of the **bpf_sock_ops_cb_flags** field | 1594 | * Attempt to set the value of the **bpf_sock_ops_cb_flags** field |
1594 | * for the full TCP socket associated to *bpf_sock_ops* to | 1595 | * for the full TCP socket associated to *bpf_sock_ops* to |
@@ -1721,7 +1722,7 @@ union bpf_attr { | |||
1721 | * Return | 1722 | * Return |
1722 | * 0 on success, or a negative error in case of failure. | 1723 | * 0 on success, or a negative error in case of failure. |
1723 | * | 1724 | * |
1724 | * int bpf_bind(struct bpf_sock_addr_kern *ctx, struct sockaddr *addr, int addr_len) | 1725 | * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) |
1725 | * Description | 1726 | * Description |
1726 | * Bind the socket associated to *ctx* to the address pointed by | 1727 | * Bind the socket associated to *ctx* to the address pointed by |
1727 | * *addr*, of length *addr_len*. This allows for making outgoing | 1728 | * *addr*, of length *addr_len*. This allows for making outgoing |
@@ -1767,6 +1768,64 @@ union bpf_attr { | |||
1767 | * **CONFIG_XFRM** configuration option. | 1768 | * **CONFIG_XFRM** configuration option. |
1768 | * Return | 1769 | * Return |
1769 | * 0 on success, or a negative error in case of failure. | 1770 | * 0 on success, or a negative error in case of failure. |
1771 | * | ||
1772 | * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags) | ||
1773 | * Description | ||
1774 | * Return a user or a kernel stack in bpf program provided buffer. | ||
1775 | * To achieve this, the helper needs *ctx*, which is a pointer | ||
1776 | * to the context on which the tracing program is executed. | ||
1777 | * To store the stacktrace, the bpf program provides *buf* with | ||
1778 | * a nonnegative *size*. | ||
1779 | * | ||
1780 | * The last argument, *flags*, holds the number of stack frames to | ||
1781 | * skip (from 0 to 255), masked with | ||
1782 | * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set | ||
1783 | * the following flags: | ||
1784 | * | ||
1785 | * **BPF_F_USER_STACK** | ||
1786 | * Collect a user space stack instead of a kernel stack. | ||
1787 | * **BPF_F_USER_BUILD_ID** | ||
1788 | * Collect buildid+offset instead of ips for user stack, | ||
1789 | * only valid if **BPF_F_USER_STACK** is also specified. | ||
1790 | * | ||
1791 | * **bpf_get_stack**\ () can collect up to | ||
1792 | * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject | ||
1793 | * to sufficient large buffer size. Note that | ||
1794 | * this limit can be controlled with the **sysctl** program, and | ||
1795 | * that it should be manually increased in order to profile long | ||
1796 | * user stacks (such as stacks for Java programs). To do so, use: | ||
1797 | * | ||
1798 | * :: | ||
1799 | * | ||
1800 | * # sysctl kernel.perf_event_max_stack=<new value> | ||
1801 | * | ||
1802 | * Return | ||
1803 | * a non-negative value equal to or less than size on success, or | ||
1804 | * a negative error in case of failure. | ||
1805 | * | ||
1806 | * int skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header) | ||
1807 | * Description | ||
1808 | * This helper is similar to **bpf_skb_load_bytes**\ () in that | ||
1809 | * it provides an easy way to load *len* bytes from *offset* | ||
1810 | * from the packet associated to *skb*, into the buffer pointed | ||
1811 | * by *to*. The difference to **bpf_skb_load_bytes**\ () is that | ||
1812 | * a fifth argument *start_header* exists in order to select a | ||
1813 | * base offset to start from. *start_header* can be one of: | ||
1814 | * | ||
1815 | * **BPF_HDR_START_MAC** | ||
1816 | * Base offset to load data from is *skb*'s mac header. | ||
1817 | * **BPF_HDR_START_NET** | ||
1818 | * Base offset to load data from is *skb*'s network header. | ||
1819 | * | ||
1820 | * In general, "direct packet access" is the preferred method to | ||
1821 | * access packet data, however, this helper is in particular useful | ||
1822 | * in socket filters where *skb*\ **->data** does not always point | ||
1823 | * to the start of the mac header and where "direct packet access" | ||
1824 | * is not available. | ||
1825 | * | ||
1826 | * Return | ||
1827 | * 0 on success, or a negative error in case of failure. | ||
1828 | * | ||
1770 | */ | 1829 | */ |
1771 | #define __BPF_FUNC_MAPPER(FN) \ | 1830 | #define __BPF_FUNC_MAPPER(FN) \ |
1772 | FN(unspec), \ | 1831 | FN(unspec), \ |
@@ -1835,7 +1894,9 @@ union bpf_attr { | |||
1835 | FN(msg_pull_data), \ | 1894 | FN(msg_pull_data), \ |
1836 | FN(bind), \ | 1895 | FN(bind), \ |
1837 | FN(xdp_adjust_tail), \ | 1896 | FN(xdp_adjust_tail), \ |
1838 | FN(skb_get_xfrm_state), | 1897 | FN(skb_get_xfrm_state), \ |
1898 | FN(get_stack), \ | ||
1899 | FN(skb_load_bytes_relative), | ||
1839 | 1900 | ||
1840 | /* integer value in 'imm' field of BPF_CALL instruction selects which helper | 1901 | /* integer value in 'imm' field of BPF_CALL instruction selects which helper |
1841 | * function eBPF program intends to call | 1902 | * function eBPF program intends to call |
@@ -1869,11 +1930,14 @@ enum bpf_func_id { | |||
1869 | /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ | 1930 | /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ |
1870 | #define BPF_F_TUNINFO_IPV6 (1ULL << 0) | 1931 | #define BPF_F_TUNINFO_IPV6 (1ULL << 0) |
1871 | 1932 | ||
1872 | /* BPF_FUNC_get_stackid flags. */ | 1933 | /* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */ |
1873 | #define BPF_F_SKIP_FIELD_MASK 0xffULL | 1934 | #define BPF_F_SKIP_FIELD_MASK 0xffULL |
1874 | #define BPF_F_USER_STACK (1ULL << 8) | 1935 | #define BPF_F_USER_STACK (1ULL << 8) |
1936 | /* flags used by BPF_FUNC_get_stackid only. */ | ||
1875 | #define BPF_F_FAST_STACK_CMP (1ULL << 9) | 1937 | #define BPF_F_FAST_STACK_CMP (1ULL << 9) |
1876 | #define BPF_F_REUSE_STACKID (1ULL << 10) | 1938 | #define BPF_F_REUSE_STACKID (1ULL << 10) |
1939 | /* flags used by BPF_FUNC_get_stack only. */ | ||
1940 | #define BPF_F_USER_BUILD_ID (1ULL << 11) | ||
1877 | 1941 | ||
1878 | /* BPF_FUNC_skb_set_tunnel_key flags. */ | 1942 | /* BPF_FUNC_skb_set_tunnel_key flags. */ |
1879 | #define BPF_F_ZERO_CSUM_TX (1ULL << 1) | 1943 | #define BPF_F_ZERO_CSUM_TX (1ULL << 1) |
@@ -1893,6 +1957,12 @@ enum bpf_adj_room_mode { | |||
1893 | BPF_ADJ_ROOM_NET, | 1957 | BPF_ADJ_ROOM_NET, |
1894 | }; | 1958 | }; |
1895 | 1959 | ||
1960 | /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */ | ||
1961 | enum bpf_hdr_start_off { | ||
1962 | BPF_HDR_START_MAC, | ||
1963 | BPF_HDR_START_NET, | ||
1964 | }; | ||
1965 | |||
1896 | /* user accessible mirror of in-kernel sk_buff. | 1966 | /* user accessible mirror of in-kernel sk_buff. |
1897 | * new fields can only be added to the end of this structure | 1967 | * new fields can only be added to the end of this structure |
1898 | */ | 1968 | */ |
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h new file mode 100644 index 000000000000..77b88c4efe98 --- /dev/null +++ b/include/uapi/linux/if_xdp.h | |||
@@ -0,0 +1,87 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note | ||
2 | * | ||
3 | * if_xdp: XDP socket user-space interface | ||
4 | * Copyright(c) 2018 Intel Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * Author(s): Björn Töpel <bjorn.topel@intel.com> | ||
16 | * Magnus Karlsson <magnus.karlsson@intel.com> | ||
17 | */ | ||
18 | |||
19 | #ifndef _LINUX_IF_XDP_H | ||
20 | #define _LINUX_IF_XDP_H | ||
21 | |||
22 | #include <linux/types.h> | ||
23 | |||
24 | /* Options for the sxdp_flags field */ | ||
25 | #define XDP_SHARED_UMEM 1 | ||
26 | |||
27 | struct sockaddr_xdp { | ||
28 | __u16 sxdp_family; | ||
29 | __u32 sxdp_ifindex; | ||
30 | __u32 sxdp_queue_id; | ||
31 | __u32 sxdp_shared_umem_fd; | ||
32 | __u16 sxdp_flags; | ||
33 | }; | ||
34 | |||
35 | /* XDP socket options */ | ||
36 | #define XDP_RX_RING 1 | ||
37 | #define XDP_TX_RING 2 | ||
38 | #define XDP_UMEM_REG 3 | ||
39 | #define XDP_UMEM_FILL_RING 4 | ||
40 | #define XDP_UMEM_COMPLETION_RING 5 | ||
41 | #define XDP_STATISTICS 6 | ||
42 | |||
43 | struct xdp_umem_reg { | ||
44 | __u64 addr; /* Start of packet data area */ | ||
45 | __u64 len; /* Length of packet data area */ | ||
46 | __u32 frame_size; /* Frame size */ | ||
47 | __u32 frame_headroom; /* Frame head room */ | ||
48 | }; | ||
49 | |||
50 | struct xdp_statistics { | ||
51 | __u64 rx_dropped; /* Dropped for reasons other than invalid desc */ | ||
52 | __u64 rx_invalid_descs; /* Dropped due to invalid descriptor */ | ||
53 | __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */ | ||
54 | }; | ||
55 | |||
56 | /* Pgoff for mmaping the rings */ | ||
57 | #define XDP_PGOFF_RX_RING 0 | ||
58 | #define XDP_PGOFF_TX_RING 0x80000000 | ||
59 | #define XDP_UMEM_PGOFF_FILL_RING 0x100000000 | ||
60 | #define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000 | ||
61 | |||
62 | struct xdp_desc { | ||
63 | __u32 idx; | ||
64 | __u32 len; | ||
65 | __u16 offset; | ||
66 | __u8 flags; | ||
67 | __u8 padding[5]; | ||
68 | }; | ||
69 | |||
70 | struct xdp_ring { | ||
71 | __u32 producer __attribute__((aligned(64))); | ||
72 | __u32 consumer __attribute__((aligned(64))); | ||
73 | }; | ||
74 | |||
75 | /* Used for the RX and TX queues for packets */ | ||
76 | struct xdp_rxtx_ring { | ||
77 | struct xdp_ring ptrs; | ||
78 | struct xdp_desc desc[0] __attribute__((aligned(64))); | ||
79 | }; | ||
80 | |||
81 | /* Used for the fill and completion queues for buffers */ | ||
82 | struct xdp_umem_ring { | ||
83 | struct xdp_ring ptrs; | ||
84 | __u32 desc[0] __attribute__((aligned(64))); | ||
85 | }; | ||
86 | |||
87 | #endif /* _LINUX_IF_XDP_H */ | ||
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 35c485fa9ea3..f27f5496d6fe 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile | |||
@@ -8,6 +8,9 @@ obj-$(CONFIG_BPF_SYSCALL) += btf.o | |||
8 | ifeq ($(CONFIG_NET),y) | 8 | ifeq ($(CONFIG_NET),y) |
9 | obj-$(CONFIG_BPF_SYSCALL) += devmap.o | 9 | obj-$(CONFIG_BPF_SYSCALL) += devmap.o |
10 | obj-$(CONFIG_BPF_SYSCALL) += cpumap.o | 10 | obj-$(CONFIG_BPF_SYSCALL) += cpumap.o |
11 | ifeq ($(CONFIG_XDP_SOCKETS),y) | ||
12 | obj-$(CONFIG_BPF_SYSCALL) += xskmap.o | ||
13 | endif | ||
11 | obj-$(CONFIG_BPF_SYSCALL) += offload.o | 14 | obj-$(CONFIG_BPF_SYSCALL) += offload.o |
12 | ifeq ($(CONFIG_STREAM_PARSER),y) | 15 | ifeq ($(CONFIG_STREAM_PARSER),y) |
13 | ifeq ($(CONFIG_INET),y) | 16 | ifeq ($(CONFIG_INET),y) |
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index ba03ec39efb3..d0d7d9462368 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/rbtree_latch.h> | 31 | #include <linux/rbtree_latch.h> |
32 | #include <linux/kallsyms.h> | 32 | #include <linux/kallsyms.h> |
33 | #include <linux/rcupdate.h> | 33 | #include <linux/rcupdate.h> |
34 | #include <linux/perf_event.h> | ||
34 | 35 | ||
35 | #include <asm/unaligned.h> | 36 | #include <asm/unaligned.h> |
36 | 37 | ||
@@ -633,23 +634,6 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from, | |||
633 | *to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off); | 634 | *to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off); |
634 | break; | 635 | break; |
635 | 636 | ||
636 | case BPF_LD | BPF_ABS | BPF_W: | ||
637 | case BPF_LD | BPF_ABS | BPF_H: | ||
638 | case BPF_LD | BPF_ABS | BPF_B: | ||
639 | *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); | ||
640 | *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); | ||
641 | *to++ = BPF_LD_IND(from->code, BPF_REG_AX, 0); | ||
642 | break; | ||
643 | |||
644 | case BPF_LD | BPF_IND | BPF_W: | ||
645 | case BPF_LD | BPF_IND | BPF_H: | ||
646 | case BPF_LD | BPF_IND | BPF_B: | ||
647 | *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); | ||
648 | *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); | ||
649 | *to++ = BPF_ALU32_REG(BPF_ADD, BPF_REG_AX, from->src_reg); | ||
650 | *to++ = BPF_LD_IND(from->code, BPF_REG_AX, 0); | ||
651 | break; | ||
652 | |||
653 | case BPF_LD | BPF_IMM | BPF_DW: | 637 | case BPF_LD | BPF_IMM | BPF_DW: |
654 | *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm); | 638 | *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm); |
655 | *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); | 639 | *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); |
@@ -890,14 +874,7 @@ EXPORT_SYMBOL_GPL(__bpf_call_base); | |||
890 | INSN_3(LDX, MEM, W), \ | 874 | INSN_3(LDX, MEM, W), \ |
891 | INSN_3(LDX, MEM, DW), \ | 875 | INSN_3(LDX, MEM, DW), \ |
892 | /* Immediate based. */ \ | 876 | /* Immediate based. */ \ |
893 | INSN_3(LD, IMM, DW), \ | 877 | INSN_3(LD, IMM, DW) |
894 | /* Misc (old cBPF carry-over). */ \ | ||
895 | INSN_3(LD, ABS, B), \ | ||
896 | INSN_3(LD, ABS, H), \ | ||
897 | INSN_3(LD, ABS, W), \ | ||
898 | INSN_3(LD, IND, B), \ | ||
899 | INSN_3(LD, IND, H), \ | ||
900 | INSN_3(LD, IND, W) | ||
901 | 878 | ||
902 | bool bpf_opcode_in_insntable(u8 code) | 879 | bool bpf_opcode_in_insntable(u8 code) |
903 | { | 880 | { |
@@ -907,6 +884,13 @@ bool bpf_opcode_in_insntable(u8 code) | |||
907 | [0 ... 255] = false, | 884 | [0 ... 255] = false, |
908 | /* Now overwrite non-defaults ... */ | 885 | /* Now overwrite non-defaults ... */ |
909 | BPF_INSN_MAP(BPF_INSN_2_TBL, BPF_INSN_3_TBL), | 886 | BPF_INSN_MAP(BPF_INSN_2_TBL, BPF_INSN_3_TBL), |
887 | /* UAPI exposed, but rewritten opcodes. cBPF carry-over. */ | ||
888 | [BPF_LD | BPF_ABS | BPF_B] = true, | ||
889 | [BPF_LD | BPF_ABS | BPF_H] = true, | ||
890 | [BPF_LD | BPF_ABS | BPF_W] = true, | ||
891 | [BPF_LD | BPF_IND | BPF_B] = true, | ||
892 | [BPF_LD | BPF_IND | BPF_H] = true, | ||
893 | [BPF_LD | BPF_IND | BPF_W] = true, | ||
910 | }; | 894 | }; |
911 | #undef BPF_INSN_3_TBL | 895 | #undef BPF_INSN_3_TBL |
912 | #undef BPF_INSN_2_TBL | 896 | #undef BPF_INSN_2_TBL |
@@ -937,8 +921,6 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) | |||
937 | #undef BPF_INSN_3_LBL | 921 | #undef BPF_INSN_3_LBL |
938 | #undef BPF_INSN_2_LBL | 922 | #undef BPF_INSN_2_LBL |
939 | u32 tail_call_cnt = 0; | 923 | u32 tail_call_cnt = 0; |
940 | void *ptr; | ||
941 | int off; | ||
942 | 924 | ||
943 | #define CONT ({ insn++; goto select_insn; }) | 925 | #define CONT ({ insn++; goto select_insn; }) |
944 | #define CONT_JMP ({ insn++; goto select_insn; }) | 926 | #define CONT_JMP ({ insn++; goto select_insn; }) |
@@ -1265,67 +1247,6 @@ out: | |||
1265 | atomic64_add((u64) SRC, (atomic64_t *)(unsigned long) | 1247 | atomic64_add((u64) SRC, (atomic64_t *)(unsigned long) |
1266 | (DST + insn->off)); | 1248 | (DST + insn->off)); |
1267 | CONT; | 1249 | CONT; |
1268 | LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */ | ||
1269 | off = IMM; | ||
1270 | load_word: | ||
1271 | /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only | ||
1272 | * appearing in the programs where ctx == skb | ||
1273 | * (see may_access_skb() in the verifier). All programs | ||
1274 | * keep 'ctx' in regs[BPF_REG_CTX] == BPF_R6, | ||
1275 | * bpf_convert_filter() saves it in BPF_R6, internal BPF | ||
1276 | * verifier will check that BPF_R6 == ctx. | ||
1277 | * | ||
1278 | * BPF_ABS and BPF_IND are wrappers of function calls, | ||
1279 | * so they scratch BPF_R1-BPF_R5 registers, preserve | ||
1280 | * BPF_R6-BPF_R9, and store return value into BPF_R0. | ||
1281 | * | ||
1282 | * Implicit input: | ||
1283 | * ctx == skb == BPF_R6 == CTX | ||
1284 | * | ||
1285 | * Explicit input: | ||
1286 | * SRC == any register | ||
1287 | * IMM == 32-bit immediate | ||
1288 | * | ||
1289 | * Output: | ||
1290 | * BPF_R0 - 8/16/32-bit skb data converted to cpu endianness | ||
1291 | */ | ||
1292 | |||
1293 | ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp); | ||
1294 | if (likely(ptr != NULL)) { | ||
1295 | BPF_R0 = get_unaligned_be32(ptr); | ||
1296 | CONT; | ||
1297 | } | ||
1298 | |||
1299 | return 0; | ||
1300 | LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */ | ||
1301 | off = IMM; | ||
1302 | load_half: | ||
1303 | ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp); | ||
1304 | if (likely(ptr != NULL)) { | ||
1305 | BPF_R0 = get_unaligned_be16(ptr); | ||
1306 | CONT; | ||
1307 | } | ||
1308 | |||
1309 | return 0; | ||
1310 | LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */ | ||
1311 | off = IMM; | ||
1312 | load_byte: | ||
1313 | ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp); | ||
1314 | if (likely(ptr != NULL)) { | ||
1315 | BPF_R0 = *(u8 *)ptr; | ||
1316 | CONT; | ||
1317 | } | ||
1318 | |||
1319 | return 0; | ||
1320 | LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + src_reg + imm32)) */ | ||
1321 | off = IMM + SRC; | ||
1322 | goto load_word; | ||
1323 | LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + src_reg + imm32)) */ | ||
1324 | off = IMM + SRC; | ||
1325 | goto load_half; | ||
1326 | LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + src_reg + imm32) */ | ||
1327 | off = IMM + SRC; | ||
1328 | goto load_byte; | ||
1329 | 1250 | ||
1330 | default_label: | 1251 | default_label: |
1331 | /* If we ever reach this, we have a bug somewhere. Die hard here | 1252 | /* If we ever reach this, we have a bug somewhere. Die hard here |
@@ -1722,6 +1643,10 @@ static void bpf_prog_free_deferred(struct work_struct *work) | |||
1722 | aux = container_of(work, struct bpf_prog_aux, work); | 1643 | aux = container_of(work, struct bpf_prog_aux, work); |
1723 | if (bpf_prog_is_dev_bound(aux)) | 1644 | if (bpf_prog_is_dev_bound(aux)) |
1724 | bpf_prog_offload_destroy(aux->prog); | 1645 | bpf_prog_offload_destroy(aux->prog); |
1646 | #ifdef CONFIG_PERF_EVENTS | ||
1647 | if (aux->prog->has_callchain_buf) | ||
1648 | put_callchain_buffers(); | ||
1649 | #endif | ||
1725 | for (i = 0; i < aux->func_cnt; i++) | 1650 | for (i = 0; i < aux->func_cnt; i++) |
1726 | bpf_jit_free(aux->func[i]); | 1651 | bpf_jit_free(aux->func[i]); |
1727 | if (aux->func_cnt) { | 1652 | if (aux->func_cnt) { |
@@ -1794,6 +1719,7 @@ bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, | |||
1794 | { | 1719 | { |
1795 | return -ENOTSUPP; | 1720 | return -ENOTSUPP; |
1796 | } | 1721 | } |
1722 | EXPORT_SYMBOL_GPL(bpf_event_output); | ||
1797 | 1723 | ||
1798 | /* Always built-in helper functions. */ | 1724 | /* Always built-in helper functions. */ |
1799 | const struct bpf_func_proto bpf_tail_call_proto = { | 1725 | const struct bpf_func_proto bpf_tail_call_proto = { |
@@ -1840,9 +1766,3 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to, | |||
1840 | #include <linux/bpf_trace.h> | 1766 | #include <linux/bpf_trace.h> |
1841 | 1767 | ||
1842 | EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception); | 1768 | EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception); |
1843 | |||
1844 | /* These are only used within the BPF_SYSCALL code */ | ||
1845 | #ifdef CONFIG_BPF_SYSCALL | ||
1846 | EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_get_type); | ||
1847 | EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_put_rcu); | ||
1848 | #endif | ||
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index a41343009ccc..ed13645bd80c 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c | |||
@@ -429,13 +429,6 @@ int bpf_obj_pin_user(u32 ufd, const char __user *pathname) | |||
429 | ret = bpf_obj_do_pin(pname, raw, type); | 429 | ret = bpf_obj_do_pin(pname, raw, type); |
430 | if (ret != 0) | 430 | if (ret != 0) |
431 | bpf_any_put(raw, type); | 431 | bpf_any_put(raw, type); |
432 | if ((trace_bpf_obj_pin_prog_enabled() || | ||
433 | trace_bpf_obj_pin_map_enabled()) && !ret) { | ||
434 | if (type == BPF_TYPE_PROG) | ||
435 | trace_bpf_obj_pin_prog(raw, ufd, pname); | ||
436 | if (type == BPF_TYPE_MAP) | ||
437 | trace_bpf_obj_pin_map(raw, ufd, pname); | ||
438 | } | ||
439 | out: | 432 | out: |
440 | putname(pname); | 433 | putname(pname); |
441 | return ret; | 434 | return ret; |
@@ -502,15 +495,8 @@ int bpf_obj_get_user(const char __user *pathname, int flags) | |||
502 | else | 495 | else |
503 | goto out; | 496 | goto out; |
504 | 497 | ||
505 | if (ret < 0) { | 498 | if (ret < 0) |
506 | bpf_any_put(raw, type); | 499 | bpf_any_put(raw, type); |
507 | } else if (trace_bpf_obj_get_prog_enabled() || | ||
508 | trace_bpf_obj_get_map_enabled()) { | ||
509 | if (type == BPF_TYPE_PROG) | ||
510 | trace_bpf_obj_get_prog(raw, ret, pname); | ||
511 | if (type == BPF_TYPE_MAP) | ||
512 | trace_bpf_obj_get_map(raw, ret, pname); | ||
513 | } | ||
514 | out: | 500 | out: |
515 | putname(pname); | 501 | putname(pname); |
516 | return ret; | 502 | return ret; |
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index c9401075b58c..ac747d5cf7c6 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2017 Netronome Systems, Inc. | 2 | * Copyright (C) 2017-2018 Netronome Systems, Inc. |
3 | * | 3 | * |
4 | * This software is licensed under the GNU General License Version 2, | 4 | * This software is licensed under the GNU General License Version 2, |
5 | * June 1991 as shown in the file COPYING in the top-level directory of this | 5 | * June 1991 as shown in the file COPYING in the top-level directory of this |
@@ -474,8 +474,10 @@ bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map) | |||
474 | struct bpf_prog_offload *offload; | 474 | struct bpf_prog_offload *offload; |
475 | bool ret; | 475 | bool ret; |
476 | 476 | ||
477 | if (!bpf_prog_is_dev_bound(prog->aux) || !bpf_map_is_dev_bound(map)) | 477 | if (!bpf_prog_is_dev_bound(prog->aux)) |
478 | return false; | 478 | return false; |
479 | if (!bpf_map_is_dev_bound(map)) | ||
480 | return bpf_map_offload_neutral(map); | ||
479 | 481 | ||
480 | down_read(&bpf_devs_lock); | 482 | down_read(&bpf_devs_lock); |
481 | offload = prog->aux->offload; | 483 | offload = prog->aux->offload; |
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 57eeb1234b67..3ba102b41512 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c | |||
@@ -262,16 +262,11 @@ out: | |||
262 | return ret; | 262 | return ret; |
263 | } | 263 | } |
264 | 264 | ||
265 | static void stack_map_get_build_id_offset(struct bpf_map *map, | 265 | static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, |
266 | struct stack_map_bucket *bucket, | ||
267 | u64 *ips, u32 trace_nr, bool user) | 266 | u64 *ips, u32 trace_nr, bool user) |
268 | { | 267 | { |
269 | int i; | 268 | int i; |
270 | struct vm_area_struct *vma; | 269 | struct vm_area_struct *vma; |
271 | struct bpf_stack_build_id *id_offs; | ||
272 | |||
273 | bucket->nr = trace_nr; | ||
274 | id_offs = (struct bpf_stack_build_id *)bucket->data; | ||
275 | 270 | ||
276 | /* | 271 | /* |
277 | * We cannot do up_read() in nmi context, so build_id lookup is | 272 | * We cannot do up_read() in nmi context, so build_id lookup is |
@@ -361,8 +356,10 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, | |||
361 | pcpu_freelist_pop(&smap->freelist); | 356 | pcpu_freelist_pop(&smap->freelist); |
362 | if (unlikely(!new_bucket)) | 357 | if (unlikely(!new_bucket)) |
363 | return -ENOMEM; | 358 | return -ENOMEM; |
364 | stack_map_get_build_id_offset(map, new_bucket, ips, | 359 | new_bucket->nr = trace_nr; |
365 | trace_nr, user); | 360 | stack_map_get_build_id_offset( |
361 | (struct bpf_stack_build_id *)new_bucket->data, | ||
362 | ips, trace_nr, user); | ||
366 | trace_len = trace_nr * sizeof(struct bpf_stack_build_id); | 363 | trace_len = trace_nr * sizeof(struct bpf_stack_build_id); |
367 | if (hash_matches && bucket->nr == trace_nr && | 364 | if (hash_matches && bucket->nr == trace_nr && |
368 | memcmp(bucket->data, new_bucket->data, trace_len) == 0) { | 365 | memcmp(bucket->data, new_bucket->data, trace_len) == 0) { |
@@ -405,6 +402,73 @@ const struct bpf_func_proto bpf_get_stackid_proto = { | |||
405 | .arg3_type = ARG_ANYTHING, | 402 | .arg3_type = ARG_ANYTHING, |
406 | }; | 403 | }; |
407 | 404 | ||
405 | BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size, | ||
406 | u64, flags) | ||
407 | { | ||
408 | u32 init_nr, trace_nr, copy_len, elem_size, num_elem; | ||
409 | bool user_build_id = flags & BPF_F_USER_BUILD_ID; | ||
410 | u32 skip = flags & BPF_F_SKIP_FIELD_MASK; | ||
411 | bool user = flags & BPF_F_USER_STACK; | ||
412 | struct perf_callchain_entry *trace; | ||
413 | bool kernel = !user; | ||
414 | int err = -EINVAL; | ||
415 | u64 *ips; | ||
416 | |||
417 | if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | | ||
418 | BPF_F_USER_BUILD_ID))) | ||
419 | goto clear; | ||
420 | if (kernel && user_build_id) | ||
421 | goto clear; | ||
422 | |||
423 | elem_size = (user && user_build_id) ? sizeof(struct bpf_stack_build_id) | ||
424 | : sizeof(u64); | ||
425 | if (unlikely(size % elem_size)) | ||
426 | goto clear; | ||
427 | |||
428 | num_elem = size / elem_size; | ||
429 | if (sysctl_perf_event_max_stack < num_elem) | ||
430 | init_nr = 0; | ||
431 | else | ||
432 | init_nr = sysctl_perf_event_max_stack - num_elem; | ||
433 | trace = get_perf_callchain(regs, init_nr, kernel, user, | ||
434 | sysctl_perf_event_max_stack, false, false); | ||
435 | if (unlikely(!trace)) | ||
436 | goto err_fault; | ||
437 | |||
438 | trace_nr = trace->nr - init_nr; | ||
439 | if (trace_nr < skip) | ||
440 | goto err_fault; | ||
441 | |||
442 | trace_nr -= skip; | ||
443 | trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem; | ||
444 | copy_len = trace_nr * elem_size; | ||
445 | ips = trace->ip + skip + init_nr; | ||
446 | if (user && user_build_id) | ||
447 | stack_map_get_build_id_offset(buf, ips, trace_nr, user); | ||
448 | else | ||
449 | memcpy(buf, ips, copy_len); | ||
450 | |||
451 | if (size > copy_len) | ||
452 | memset(buf + copy_len, 0, size - copy_len); | ||
453 | return copy_len; | ||
454 | |||
455 | err_fault: | ||
456 | err = -EFAULT; | ||
457 | clear: | ||
458 | memset(buf, 0, size); | ||
459 | return err; | ||
460 | } | ||
461 | |||
462 | const struct bpf_func_proto bpf_get_stack_proto = { | ||
463 | .func = bpf_get_stack, | ||
464 | .gpl_only = true, | ||
465 | .ret_type = RET_INTEGER, | ||
466 | .arg1_type = ARG_PTR_TO_CTX, | ||
467 | .arg2_type = ARG_PTR_TO_UNINIT_MEM, | ||
468 | .arg3_type = ARG_CONST_SIZE_OR_ZERO, | ||
469 | .arg4_type = ARG_ANYTHING, | ||
470 | }; | ||
471 | |||
408 | /* Called from eBPF program */ | 472 | /* Called from eBPF program */ |
409 | static void *stack_map_lookup_elem(struct bpf_map *map, void *key) | 473 | static void *stack_map_lookup_elem(struct bpf_map *map, void *key) |
410 | { | 474 | { |
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0bd2944eafb9..9b87198deea2 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c | |||
@@ -282,6 +282,7 @@ void bpf_map_put(struct bpf_map *map) | |||
282 | { | 282 | { |
283 | __bpf_map_put(map, true); | 283 | __bpf_map_put(map, true); |
284 | } | 284 | } |
285 | EXPORT_SYMBOL_GPL(bpf_map_put); | ||
285 | 286 | ||
286 | void bpf_map_put_with_uref(struct bpf_map *map) | 287 | void bpf_map_put_with_uref(struct bpf_map *map) |
287 | { | 288 | { |
@@ -503,7 +504,6 @@ static int map_create(union bpf_attr *attr) | |||
503 | return err; | 504 | return err; |
504 | } | 505 | } |
505 | 506 | ||
506 | trace_bpf_map_create(map, err); | ||
507 | return err; | 507 | return err; |
508 | 508 | ||
509 | free_map: | 509 | free_map: |
@@ -544,6 +544,7 @@ struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) | |||
544 | atomic_inc(&map->usercnt); | 544 | atomic_inc(&map->usercnt); |
545 | return map; | 545 | return map; |
546 | } | 546 | } |
547 | EXPORT_SYMBOL_GPL(bpf_map_inc); | ||
547 | 548 | ||
548 | struct bpf_map *bpf_map_get_with_uref(u32 ufd) | 549 | struct bpf_map *bpf_map_get_with_uref(u32 ufd) |
549 | { | 550 | { |
@@ -663,7 +664,6 @@ static int map_lookup_elem(union bpf_attr *attr) | |||
663 | if (copy_to_user(uvalue, value, value_size) != 0) | 664 | if (copy_to_user(uvalue, value, value_size) != 0) |
664 | goto free_value; | 665 | goto free_value; |
665 | 666 | ||
666 | trace_bpf_map_lookup_elem(map, ufd, key, value); | ||
667 | err = 0; | 667 | err = 0; |
668 | 668 | ||
669 | free_value: | 669 | free_value: |
@@ -760,8 +760,6 @@ static int map_update_elem(union bpf_attr *attr) | |||
760 | __this_cpu_dec(bpf_prog_active); | 760 | __this_cpu_dec(bpf_prog_active); |
761 | preempt_enable(); | 761 | preempt_enable(); |
762 | out: | 762 | out: |
763 | if (!err) | ||
764 | trace_bpf_map_update_elem(map, ufd, key, value); | ||
765 | free_value: | 763 | free_value: |
766 | kfree(value); | 764 | kfree(value); |
767 | free_key: | 765 | free_key: |
@@ -814,8 +812,6 @@ static int map_delete_elem(union bpf_attr *attr) | |||
814 | __this_cpu_dec(bpf_prog_active); | 812 | __this_cpu_dec(bpf_prog_active); |
815 | preempt_enable(); | 813 | preempt_enable(); |
816 | out: | 814 | out: |
817 | if (!err) | ||
818 | trace_bpf_map_delete_elem(map, ufd, key); | ||
819 | kfree(key); | 815 | kfree(key); |
820 | err_put: | 816 | err_put: |
821 | fdput(f); | 817 | fdput(f); |
@@ -879,7 +875,6 @@ out: | |||
879 | if (copy_to_user(unext_key, next_key, map->key_size) != 0) | 875 | if (copy_to_user(unext_key, next_key, map->key_size) != 0) |
880 | goto free_next_key; | 876 | goto free_next_key; |
881 | 877 | ||
882 | trace_bpf_map_next_key(map, ufd, key, next_key); | ||
883 | err = 0; | 878 | err = 0; |
884 | 879 | ||
885 | free_next_key: | 880 | free_next_key: |
@@ -1027,7 +1022,6 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) | |||
1027 | if (atomic_dec_and_test(&prog->aux->refcnt)) { | 1022 | if (atomic_dec_and_test(&prog->aux->refcnt)) { |
1028 | int i; | 1023 | int i; |
1029 | 1024 | ||
1030 | trace_bpf_prog_put_rcu(prog); | ||
1031 | /* bpf_prog_free_id() must be called first */ | 1025 | /* bpf_prog_free_id() must be called first */ |
1032 | bpf_prog_free_id(prog, do_idr_lock); | 1026 | bpf_prog_free_id(prog, do_idr_lock); |
1033 | 1027 | ||
@@ -1194,11 +1188,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd) | |||
1194 | struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, | 1188 | struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, |
1195 | bool attach_drv) | 1189 | bool attach_drv) |
1196 | { | 1190 | { |
1197 | struct bpf_prog *prog = __bpf_prog_get(ufd, &type, attach_drv); | 1191 | return __bpf_prog_get(ufd, &type, attach_drv); |
1198 | |||
1199 | if (!IS_ERR(prog)) | ||
1200 | trace_bpf_prog_get_type(prog); | ||
1201 | return prog; | ||
1202 | } | 1192 | } |
1203 | EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); | 1193 | EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); |
1204 | 1194 | ||
@@ -1373,7 +1363,6 @@ static int bpf_prog_load(union bpf_attr *attr) | |||
1373 | } | 1363 | } |
1374 | 1364 | ||
1375 | bpf_prog_kallsyms_add(prog); | 1365 | bpf_prog_kallsyms_add(prog); |
1376 | trace_bpf_prog_load(prog, err); | ||
1377 | return err; | 1366 | return err; |
1378 | 1367 | ||
1379 | free_used_maps: | 1368 | free_used_maps: |
diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c index 1f4bf68c12db..938d41211be7 100644 --- a/kernel/bpf/tnum.c +++ b/kernel/bpf/tnum.c | |||
@@ -43,6 +43,16 @@ struct tnum tnum_rshift(struct tnum a, u8 shift) | |||
43 | return TNUM(a.value >> shift, a.mask >> shift); | 43 | return TNUM(a.value >> shift, a.mask >> shift); |
44 | } | 44 | } |
45 | 45 | ||
46 | struct tnum tnum_arshift(struct tnum a, u8 min_shift) | ||
47 | { | ||
48 | /* if a.value is negative, arithmetic shifting by minimum shift | ||
49 | * will have larger negative offset compared to more shifting. | ||
50 | * If a.value is nonnegative, arithmetic shifting by minimum shift | ||
51 | * will have larger positive offset compare to more shifting. | ||
52 | */ | ||
53 | return TNUM((s64)a.value >> min_shift, (s64)a.mask >> min_shift); | ||
54 | } | ||
55 | |||
46 | struct tnum tnum_add(struct tnum a, struct tnum b) | 56 | struct tnum tnum_add(struct tnum a, struct tnum b) |
47 | { | 57 | { |
48 | u64 sm, sv, sigma, chi, mu; | 58 | u64 sm, sv, sigma, chi, mu; |
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index eb1a596aebd3..d5e1a6c4165d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/stringify.h> | 22 | #include <linux/stringify.h> |
23 | #include <linux/bsearch.h> | 23 | #include <linux/bsearch.h> |
24 | #include <linux/sort.h> | 24 | #include <linux/sort.h> |
25 | #include <linux/perf_event.h> | ||
25 | 26 | ||
26 | #include "disasm.h" | 27 | #include "disasm.h" |
27 | 28 | ||
@@ -164,6 +165,8 @@ struct bpf_call_arg_meta { | |||
164 | bool pkt_access; | 165 | bool pkt_access; |
165 | int regno; | 166 | int regno; |
166 | int access_size; | 167 | int access_size; |
168 | s64 msize_smax_value; | ||
169 | u64 msize_umax_value; | ||
167 | }; | 170 | }; |
168 | 171 | ||
169 | static DEFINE_MUTEX(bpf_verifier_lock); | 172 | static DEFINE_MUTEX(bpf_verifier_lock); |
@@ -738,18 +741,19 @@ enum reg_arg_type { | |||
738 | 741 | ||
739 | static int cmp_subprogs(const void *a, const void *b) | 742 | static int cmp_subprogs(const void *a, const void *b) |
740 | { | 743 | { |
741 | return *(int *)a - *(int *)b; | 744 | return ((struct bpf_subprog_info *)a)->start - |
745 | ((struct bpf_subprog_info *)b)->start; | ||
742 | } | 746 | } |
743 | 747 | ||
744 | static int find_subprog(struct bpf_verifier_env *env, int off) | 748 | static int find_subprog(struct bpf_verifier_env *env, int off) |
745 | { | 749 | { |
746 | u32 *p; | 750 | struct bpf_subprog_info *p; |
747 | 751 | ||
748 | p = bsearch(&off, env->subprog_starts, env->subprog_cnt, | 752 | p = bsearch(&off, env->subprog_info, env->subprog_cnt, |
749 | sizeof(env->subprog_starts[0]), cmp_subprogs); | 753 | sizeof(env->subprog_info[0]), cmp_subprogs); |
750 | if (!p) | 754 | if (!p) |
751 | return -ENOENT; | 755 | return -ENOENT; |
752 | return p - env->subprog_starts; | 756 | return p - env->subprog_info; |
753 | 757 | ||
754 | } | 758 | } |
755 | 759 | ||
@@ -769,18 +773,24 @@ static int add_subprog(struct bpf_verifier_env *env, int off) | |||
769 | verbose(env, "too many subprograms\n"); | 773 | verbose(env, "too many subprograms\n"); |
770 | return -E2BIG; | 774 | return -E2BIG; |
771 | } | 775 | } |
772 | env->subprog_starts[env->subprog_cnt++] = off; | 776 | env->subprog_info[env->subprog_cnt++].start = off; |
773 | sort(env->subprog_starts, env->subprog_cnt, | 777 | sort(env->subprog_info, env->subprog_cnt, |
774 | sizeof(env->subprog_starts[0]), cmp_subprogs, NULL); | 778 | sizeof(env->subprog_info[0]), cmp_subprogs, NULL); |
775 | return 0; | 779 | return 0; |
776 | } | 780 | } |
777 | 781 | ||
778 | static int check_subprogs(struct bpf_verifier_env *env) | 782 | static int check_subprogs(struct bpf_verifier_env *env) |
779 | { | 783 | { |
780 | int i, ret, subprog_start, subprog_end, off, cur_subprog = 0; | 784 | int i, ret, subprog_start, subprog_end, off, cur_subprog = 0; |
785 | struct bpf_subprog_info *subprog = env->subprog_info; | ||
781 | struct bpf_insn *insn = env->prog->insnsi; | 786 | struct bpf_insn *insn = env->prog->insnsi; |
782 | int insn_cnt = env->prog->len; | 787 | int insn_cnt = env->prog->len; |
783 | 788 | ||
789 | /* Add entry function. */ | ||
790 | ret = add_subprog(env, 0); | ||
791 | if (ret < 0) | ||
792 | return ret; | ||
793 | |||
784 | /* determine subprog starts. The end is one before the next starts */ | 794 | /* determine subprog starts. The end is one before the next starts */ |
785 | for (i = 0; i < insn_cnt; i++) { | 795 | for (i = 0; i < insn_cnt; i++) { |
786 | if (insn[i].code != (BPF_JMP | BPF_CALL)) | 796 | if (insn[i].code != (BPF_JMP | BPF_CALL)) |
@@ -800,16 +810,18 @@ static int check_subprogs(struct bpf_verifier_env *env) | |||
800 | return ret; | 810 | return ret; |
801 | } | 811 | } |
802 | 812 | ||
813 | /* Add a fake 'exit' subprog which could simplify subprog iteration | ||
814 | * logic. 'subprog_cnt' should not be increased. | ||
815 | */ | ||
816 | subprog[env->subprog_cnt].start = insn_cnt; | ||
817 | |||
803 | if (env->log.level > 1) | 818 | if (env->log.level > 1) |
804 | for (i = 0; i < env->subprog_cnt; i++) | 819 | for (i = 0; i < env->subprog_cnt; i++) |
805 | verbose(env, "func#%d @%d\n", i, env->subprog_starts[i]); | 820 | verbose(env, "func#%d @%d\n", i, subprog[i].start); |
806 | 821 | ||
807 | /* now check that all jumps are within the same subprog */ | 822 | /* now check that all jumps are within the same subprog */ |
808 | subprog_start = 0; | 823 | subprog_start = subprog[cur_subprog].start; |
809 | if (env->subprog_cnt == cur_subprog) | 824 | subprog_end = subprog[cur_subprog + 1].start; |
810 | subprog_end = insn_cnt; | ||
811 | else | ||
812 | subprog_end = env->subprog_starts[cur_subprog++]; | ||
813 | for (i = 0; i < insn_cnt; i++) { | 825 | for (i = 0; i < insn_cnt; i++) { |
814 | u8 code = insn[i].code; | 826 | u8 code = insn[i].code; |
815 | 827 | ||
@@ -834,10 +846,9 @@ next: | |||
834 | return -EINVAL; | 846 | return -EINVAL; |
835 | } | 847 | } |
836 | subprog_start = subprog_end; | 848 | subprog_start = subprog_end; |
837 | if (env->subprog_cnt == cur_subprog) | 849 | cur_subprog++; |
838 | subprog_end = insn_cnt; | 850 | if (cur_subprog < env->subprog_cnt) |
839 | else | 851 | subprog_end = subprog[cur_subprog + 1].start; |
840 | subprog_end = env->subprog_starts[cur_subprog++]; | ||
841 | } | 852 | } |
842 | } | 853 | } |
843 | return 0; | 854 | return 0; |
@@ -1470,13 +1481,13 @@ static int update_stack_depth(struct bpf_verifier_env *env, | |||
1470 | const struct bpf_func_state *func, | 1481 | const struct bpf_func_state *func, |
1471 | int off) | 1482 | int off) |
1472 | { | 1483 | { |
1473 | u16 stack = env->subprog_stack_depth[func->subprogno]; | 1484 | u16 stack = env->subprog_info[func->subprogno].stack_depth; |
1474 | 1485 | ||
1475 | if (stack >= -off) | 1486 | if (stack >= -off) |
1476 | return 0; | 1487 | return 0; |
1477 | 1488 | ||
1478 | /* update known max for given subprogram */ | 1489 | /* update known max for given subprogram */ |
1479 | env->subprog_stack_depth[func->subprogno] = -off; | 1490 | env->subprog_info[func->subprogno].stack_depth = -off; |
1480 | return 0; | 1491 | return 0; |
1481 | } | 1492 | } |
1482 | 1493 | ||
@@ -1488,9 +1499,9 @@ static int update_stack_depth(struct bpf_verifier_env *env, | |||
1488 | */ | 1499 | */ |
1489 | static int check_max_stack_depth(struct bpf_verifier_env *env) | 1500 | static int check_max_stack_depth(struct bpf_verifier_env *env) |
1490 | { | 1501 | { |
1491 | int depth = 0, frame = 0, subprog = 0, i = 0, subprog_end; | 1502 | int depth = 0, frame = 0, idx = 0, i = 0, subprog_end; |
1503 | struct bpf_subprog_info *subprog = env->subprog_info; | ||
1492 | struct bpf_insn *insn = env->prog->insnsi; | 1504 | struct bpf_insn *insn = env->prog->insnsi; |
1493 | int insn_cnt = env->prog->len; | ||
1494 | int ret_insn[MAX_CALL_FRAMES]; | 1505 | int ret_insn[MAX_CALL_FRAMES]; |
1495 | int ret_prog[MAX_CALL_FRAMES]; | 1506 | int ret_prog[MAX_CALL_FRAMES]; |
1496 | 1507 | ||
@@ -1498,17 +1509,14 @@ process_func: | |||
1498 | /* round up to 32-bytes, since this is granularity | 1509 | /* round up to 32-bytes, since this is granularity |
1499 | * of interpreter stack size | 1510 | * of interpreter stack size |
1500 | */ | 1511 | */ |
1501 | depth += round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32); | 1512 | depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32); |
1502 | if (depth > MAX_BPF_STACK) { | 1513 | if (depth > MAX_BPF_STACK) { |
1503 | verbose(env, "combined stack size of %d calls is %d. Too large\n", | 1514 | verbose(env, "combined stack size of %d calls is %d. Too large\n", |
1504 | frame + 1, depth); | 1515 | frame + 1, depth); |
1505 | return -EACCES; | 1516 | return -EACCES; |
1506 | } | 1517 | } |
1507 | continue_func: | 1518 | continue_func: |
1508 | if (env->subprog_cnt == subprog) | 1519 | subprog_end = subprog[idx + 1].start; |
1509 | subprog_end = insn_cnt; | ||
1510 | else | ||
1511 | subprog_end = env->subprog_starts[subprog]; | ||
1512 | for (; i < subprog_end; i++) { | 1520 | for (; i < subprog_end; i++) { |
1513 | if (insn[i].code != (BPF_JMP | BPF_CALL)) | 1521 | if (insn[i].code != (BPF_JMP | BPF_CALL)) |
1514 | continue; | 1522 | continue; |
@@ -1516,17 +1524,16 @@ continue_func: | |||
1516 | continue; | 1524 | continue; |
1517 | /* remember insn and function to return to */ | 1525 | /* remember insn and function to return to */ |
1518 | ret_insn[frame] = i + 1; | 1526 | ret_insn[frame] = i + 1; |
1519 | ret_prog[frame] = subprog; | 1527 | ret_prog[frame] = idx; |
1520 | 1528 | ||
1521 | /* find the callee */ | 1529 | /* find the callee */ |
1522 | i = i + insn[i].imm + 1; | 1530 | i = i + insn[i].imm + 1; |
1523 | subprog = find_subprog(env, i); | 1531 | idx = find_subprog(env, i); |
1524 | if (subprog < 0) { | 1532 | if (idx < 0) { |
1525 | WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", | 1533 | WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", |
1526 | i); | 1534 | i); |
1527 | return -EFAULT; | 1535 | return -EFAULT; |
1528 | } | 1536 | } |
1529 | subprog++; | ||
1530 | frame++; | 1537 | frame++; |
1531 | if (frame >= MAX_CALL_FRAMES) { | 1538 | if (frame >= MAX_CALL_FRAMES) { |
1532 | WARN_ONCE(1, "verifier bug. Call stack is too deep\n"); | 1539 | WARN_ONCE(1, "verifier bug. Call stack is too deep\n"); |
@@ -1539,10 +1546,10 @@ continue_func: | |||
1539 | */ | 1546 | */ |
1540 | if (frame == 0) | 1547 | if (frame == 0) |
1541 | return 0; | 1548 | return 0; |
1542 | depth -= round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32); | 1549 | depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32); |
1543 | frame--; | 1550 | frame--; |
1544 | i = ret_insn[frame]; | 1551 | i = ret_insn[frame]; |
1545 | subprog = ret_prog[frame]; | 1552 | idx = ret_prog[frame]; |
1546 | goto continue_func; | 1553 | goto continue_func; |
1547 | } | 1554 | } |
1548 | 1555 | ||
@@ -1558,8 +1565,7 @@ static int get_callee_stack_depth(struct bpf_verifier_env *env, | |||
1558 | start); | 1565 | start); |
1559 | return -EFAULT; | 1566 | return -EFAULT; |
1560 | } | 1567 | } |
1561 | subprog++; | 1568 | return env->subprog_info[subprog].stack_depth; |
1562 | return env->subprog_stack_depth[subprog]; | ||
1563 | } | 1569 | } |
1564 | #endif | 1570 | #endif |
1565 | 1571 | ||
@@ -1984,6 +1990,12 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, | |||
1984 | } else if (arg_type_is_mem_size(arg_type)) { | 1990 | } else if (arg_type_is_mem_size(arg_type)) { |
1985 | bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); | 1991 | bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); |
1986 | 1992 | ||
1993 | /* remember the mem_size which may be used later | ||
1994 | * to refine return values. | ||
1995 | */ | ||
1996 | meta->msize_smax_value = reg->smax_value; | ||
1997 | meta->msize_umax_value = reg->umax_value; | ||
1998 | |||
1987 | /* The register is SCALAR_VALUE; the access check | 1999 | /* The register is SCALAR_VALUE; the access check |
1988 | * happens using its boundaries. | 2000 | * happens using its boundaries. |
1989 | */ | 2001 | */ |
@@ -2061,8 +2073,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, | |||
2061 | if (func_id != BPF_FUNC_redirect_map) | 2073 | if (func_id != BPF_FUNC_redirect_map) |
2062 | goto error; | 2074 | goto error; |
2063 | break; | 2075 | break; |
2064 | /* Restrict bpf side of cpumap, open when use-cases appear */ | 2076 | /* Restrict bpf side of cpumap and xskmap, open when use-cases |
2077 | * appear. | ||
2078 | */ | ||
2065 | case BPF_MAP_TYPE_CPUMAP: | 2079 | case BPF_MAP_TYPE_CPUMAP: |
2080 | case BPF_MAP_TYPE_XSKMAP: | ||
2066 | if (func_id != BPF_FUNC_redirect_map) | 2081 | if (func_id != BPF_FUNC_redirect_map) |
2067 | goto error; | 2082 | goto error; |
2068 | break; | 2083 | break; |
@@ -2087,7 +2102,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, | |||
2087 | case BPF_FUNC_tail_call: | 2102 | case BPF_FUNC_tail_call: |
2088 | if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) | 2103 | if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) |
2089 | goto error; | 2104 | goto error; |
2090 | if (env->subprog_cnt) { | 2105 | if (env->subprog_cnt > 1) { |
2091 | verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n"); | 2106 | verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n"); |
2092 | return -EINVAL; | 2107 | return -EINVAL; |
2093 | } | 2108 | } |
@@ -2109,7 +2124,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, | |||
2109 | break; | 2124 | break; |
2110 | case BPF_FUNC_redirect_map: | 2125 | case BPF_FUNC_redirect_map: |
2111 | if (map->map_type != BPF_MAP_TYPE_DEVMAP && | 2126 | if (map->map_type != BPF_MAP_TYPE_DEVMAP && |
2112 | map->map_type != BPF_MAP_TYPE_CPUMAP) | 2127 | map->map_type != BPF_MAP_TYPE_CPUMAP && |
2128 | map->map_type != BPF_MAP_TYPE_XSKMAP) | ||
2113 | goto error; | 2129 | goto error; |
2114 | break; | 2130 | break; |
2115 | case BPF_FUNC_sk_redirect_map: | 2131 | case BPF_FUNC_sk_redirect_map: |
@@ -2259,7 +2275,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, | |||
2259 | /* remember the callsite, it will be used by bpf_exit */ | 2275 | /* remember the callsite, it will be used by bpf_exit */ |
2260 | *insn_idx /* callsite */, | 2276 | *insn_idx /* callsite */, |
2261 | state->curframe + 1 /* frameno within this callchain */, | 2277 | state->curframe + 1 /* frameno within this callchain */, |
2262 | subprog + 1 /* subprog number within this prog */); | 2278 | subprog /* subprog number within this prog */); |
2263 | 2279 | ||
2264 | /* copy r1 - r5 args that callee can access */ | 2280 | /* copy r1 - r5 args that callee can access */ |
2265 | for (i = BPF_REG_1; i <= BPF_REG_5; i++) | 2281 | for (i = BPF_REG_1; i <= BPF_REG_5; i++) |
@@ -2323,6 +2339,23 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) | |||
2323 | return 0; | 2339 | return 0; |
2324 | } | 2340 | } |
2325 | 2341 | ||
2342 | static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type, | ||
2343 | int func_id, | ||
2344 | struct bpf_call_arg_meta *meta) | ||
2345 | { | ||
2346 | struct bpf_reg_state *ret_reg = ®s[BPF_REG_0]; | ||
2347 | |||
2348 | if (ret_type != RET_INTEGER || | ||
2349 | (func_id != BPF_FUNC_get_stack && | ||
2350 | func_id != BPF_FUNC_probe_read_str)) | ||
2351 | return; | ||
2352 | |||
2353 | ret_reg->smax_value = meta->msize_smax_value; | ||
2354 | ret_reg->umax_value = meta->msize_umax_value; | ||
2355 | __reg_deduce_bounds(ret_reg); | ||
2356 | __reg_bound_offset(ret_reg); | ||
2357 | } | ||
2358 | |||
2326 | static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx) | 2359 | static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx) |
2327 | { | 2360 | { |
2328 | const struct bpf_func_proto *fn = NULL; | 2361 | const struct bpf_func_proto *fn = NULL; |
@@ -2446,10 +2479,30 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn | |||
2446 | return -EINVAL; | 2479 | return -EINVAL; |
2447 | } | 2480 | } |
2448 | 2481 | ||
2482 | do_refine_retval_range(regs, fn->ret_type, func_id, &meta); | ||
2483 | |||
2449 | err = check_map_func_compatibility(env, meta.map_ptr, func_id); | 2484 | err = check_map_func_compatibility(env, meta.map_ptr, func_id); |
2450 | if (err) | 2485 | if (err) |
2451 | return err; | 2486 | return err; |
2452 | 2487 | ||
2488 | if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) { | ||
2489 | const char *err_str; | ||
2490 | |||
2491 | #ifdef CONFIG_PERF_EVENTS | ||
2492 | err = get_callchain_buffers(sysctl_perf_event_max_stack); | ||
2493 | err_str = "cannot get callchain buffer for func %s#%d\n"; | ||
2494 | #else | ||
2495 | err = -ENOTSUPP; | ||
2496 | err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n"; | ||
2497 | #endif | ||
2498 | if (err) { | ||
2499 | verbose(env, err_str, func_id_name(func_id), func_id); | ||
2500 | return err; | ||
2501 | } | ||
2502 | |||
2503 | env->prog->has_callchain_buf = true; | ||
2504 | } | ||
2505 | |||
2453 | if (changes_data) | 2506 | if (changes_data) |
2454 | clear_all_pkt_pointers(env); | 2507 | clear_all_pkt_pointers(env); |
2455 | return 0; | 2508 | return 0; |
@@ -2894,10 +2947,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, | |||
2894 | dst_reg->umin_value <<= umin_val; | 2947 | dst_reg->umin_value <<= umin_val; |
2895 | dst_reg->umax_value <<= umax_val; | 2948 | dst_reg->umax_value <<= umax_val; |
2896 | } | 2949 | } |
2897 | if (src_known) | 2950 | dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val); |
2898 | dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val); | ||
2899 | else | ||
2900 | dst_reg->var_off = tnum_lshift(tnum_unknown, umin_val); | ||
2901 | /* We may learn something more from the var_off */ | 2951 | /* We may learn something more from the var_off */ |
2902 | __update_reg_bounds(dst_reg); | 2952 | __update_reg_bounds(dst_reg); |
2903 | break; | 2953 | break; |
@@ -2925,16 +2975,35 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, | |||
2925 | */ | 2975 | */ |
2926 | dst_reg->smin_value = S64_MIN; | 2976 | dst_reg->smin_value = S64_MIN; |
2927 | dst_reg->smax_value = S64_MAX; | 2977 | dst_reg->smax_value = S64_MAX; |
2928 | if (src_known) | 2978 | dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val); |
2929 | dst_reg->var_off = tnum_rshift(dst_reg->var_off, | ||
2930 | umin_val); | ||
2931 | else | ||
2932 | dst_reg->var_off = tnum_rshift(tnum_unknown, umin_val); | ||
2933 | dst_reg->umin_value >>= umax_val; | 2979 | dst_reg->umin_value >>= umax_val; |
2934 | dst_reg->umax_value >>= umin_val; | 2980 | dst_reg->umax_value >>= umin_val; |
2935 | /* We may learn something more from the var_off */ | 2981 | /* We may learn something more from the var_off */ |
2936 | __update_reg_bounds(dst_reg); | 2982 | __update_reg_bounds(dst_reg); |
2937 | break; | 2983 | break; |
2984 | case BPF_ARSH: | ||
2985 | if (umax_val >= insn_bitness) { | ||
2986 | /* Shifts greater than 31 or 63 are undefined. | ||
2987 | * This includes shifts by a negative number. | ||
2988 | */ | ||
2989 | mark_reg_unknown(env, regs, insn->dst_reg); | ||
2990 | break; | ||
2991 | } | ||
2992 | |||
2993 | /* Upon reaching here, src_known is true and | ||
2994 | * umax_val is equal to umin_val. | ||
2995 | */ | ||
2996 | dst_reg->smin_value >>= umin_val; | ||
2997 | dst_reg->smax_value >>= umin_val; | ||
2998 | dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val); | ||
2999 | |||
3000 | /* blow away the dst_reg umin_value/umax_value and rely on | ||
3001 | * dst_reg var_off to refine the result. | ||
3002 | */ | ||
3003 | dst_reg->umin_value = 0; | ||
3004 | dst_reg->umax_value = U64_MAX; | ||
3005 | __update_reg_bounds(dst_reg); | ||
3006 | break; | ||
2938 | default: | 3007 | default: |
2939 | mark_reg_unknown(env, regs, insn->dst_reg); | 3008 | mark_reg_unknown(env, regs, insn->dst_reg); |
2940 | break; | 3009 | break; |
@@ -3818,7 +3887,12 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) | |||
3818 | return -EINVAL; | 3887 | return -EINVAL; |
3819 | } | 3888 | } |
3820 | 3889 | ||
3821 | if (env->subprog_cnt) { | 3890 | if (!env->ops->gen_ld_abs) { |
3891 | verbose(env, "bpf verifier is misconfigured\n"); | ||
3892 | return -EINVAL; | ||
3893 | } | ||
3894 | |||
3895 | if (env->subprog_cnt > 1) { | ||
3822 | /* when program has LD_ABS insn JITs and interpreter assume | 3896 | /* when program has LD_ABS insn JITs and interpreter assume |
3823 | * that r1 == ctx == skb which is not the case for callees | 3897 | * that r1 == ctx == skb which is not the case for callees |
3824 | * that can have arbitrary arguments. It's problematic | 3898 | * that can have arbitrary arguments. It's problematic |
@@ -4849,15 +4923,15 @@ process_bpf_exit: | |||
4849 | 4923 | ||
4850 | verbose(env, "processed %d insns (limit %d), stack depth ", | 4924 | verbose(env, "processed %d insns (limit %d), stack depth ", |
4851 | insn_processed, BPF_COMPLEXITY_LIMIT_INSNS); | 4925 | insn_processed, BPF_COMPLEXITY_LIMIT_INSNS); |
4852 | for (i = 0; i < env->subprog_cnt + 1; i++) { | 4926 | for (i = 0; i < env->subprog_cnt; i++) { |
4853 | u32 depth = env->subprog_stack_depth[i]; | 4927 | u32 depth = env->subprog_info[i].stack_depth; |
4854 | 4928 | ||
4855 | verbose(env, "%d", depth); | 4929 | verbose(env, "%d", depth); |
4856 | if (i + 1 < env->subprog_cnt + 1) | 4930 | if (i + 1 < env->subprog_cnt) |
4857 | verbose(env, "+"); | 4931 | verbose(env, "+"); |
4858 | } | 4932 | } |
4859 | verbose(env, "\n"); | 4933 | verbose(env, "\n"); |
4860 | env->prog->aux->stack_depth = env->subprog_stack_depth[0]; | 4934 | env->prog->aux->stack_depth = env->subprog_info[0].stack_depth; |
4861 | return 0; | 4935 | return 0; |
4862 | } | 4936 | } |
4863 | 4937 | ||
@@ -4981,7 +5055,7 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) | |||
4981 | /* hold the map. If the program is rejected by verifier, | 5055 | /* hold the map. If the program is rejected by verifier, |
4982 | * the map will be released by release_maps() or it | 5056 | * the map will be released by release_maps() or it |
4983 | * will be used by the valid program until it's unloaded | 5057 | * will be used by the valid program until it's unloaded |
4984 | * and all maps are released in free_bpf_prog_info() | 5058 | * and all maps are released in free_used_maps() |
4985 | */ | 5059 | */ |
4986 | map = bpf_map_inc(map, false); | 5060 | map = bpf_map_inc(map, false); |
4987 | if (IS_ERR(map)) { | 5061 | if (IS_ERR(map)) { |
@@ -5063,10 +5137,11 @@ static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len | |||
5063 | 5137 | ||
5064 | if (len == 1) | 5138 | if (len == 1) |
5065 | return; | 5139 | return; |
5066 | for (i = 0; i < env->subprog_cnt; i++) { | 5140 | /* NOTE: fake 'exit' subprog should be updated as well. */ |
5067 | if (env->subprog_starts[i] < off) | 5141 | for (i = 0; i <= env->subprog_cnt; i++) { |
5142 | if (env->subprog_info[i].start < off) | ||
5068 | continue; | 5143 | continue; |
5069 | env->subprog_starts[i] += len - 1; | 5144 | env->subprog_info[i].start += len - 1; |
5070 | } | 5145 | } |
5071 | } | 5146 | } |
5072 | 5147 | ||
@@ -5230,7 +5305,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) | |||
5230 | void *old_bpf_func; | 5305 | void *old_bpf_func; |
5231 | int err = -ENOMEM; | 5306 | int err = -ENOMEM; |
5232 | 5307 | ||
5233 | if (env->subprog_cnt == 0) | 5308 | if (env->subprog_cnt <= 1) |
5234 | return 0; | 5309 | return 0; |
5235 | 5310 | ||
5236 | for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { | 5311 | for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { |
@@ -5246,7 +5321,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) | |||
5246 | /* temporarily remember subprog id inside insn instead of | 5321 | /* temporarily remember subprog id inside insn instead of |
5247 | * aux_data, since next loop will split up all insns into funcs | 5322 | * aux_data, since next loop will split up all insns into funcs |
5248 | */ | 5323 | */ |
5249 | insn->off = subprog + 1; | 5324 | insn->off = subprog; |
5250 | /* remember original imm in case JIT fails and fallback | 5325 | /* remember original imm in case JIT fails and fallback |
5251 | * to interpreter will be needed | 5326 | * to interpreter will be needed |
5252 | */ | 5327 | */ |
@@ -5255,16 +5330,13 @@ static int jit_subprogs(struct bpf_verifier_env *env) | |||
5255 | insn->imm = 1; | 5330 | insn->imm = 1; |
5256 | } | 5331 | } |
5257 | 5332 | ||
5258 | func = kzalloc(sizeof(prog) * (env->subprog_cnt + 1), GFP_KERNEL); | 5333 | func = kzalloc(sizeof(prog) * env->subprog_cnt, GFP_KERNEL); |
5259 | if (!func) | 5334 | if (!func) |
5260 | return -ENOMEM; | 5335 | return -ENOMEM; |
5261 | 5336 | ||
5262 | for (i = 0; i <= env->subprog_cnt; i++) { | 5337 | for (i = 0; i < env->subprog_cnt; i++) { |
5263 | subprog_start = subprog_end; | 5338 | subprog_start = subprog_end; |
5264 | if (env->subprog_cnt == i) | 5339 | subprog_end = env->subprog_info[i + 1].start; |
5265 | subprog_end = prog->len; | ||
5266 | else | ||
5267 | subprog_end = env->subprog_starts[i]; | ||
5268 | 5340 | ||
5269 | len = subprog_end - subprog_start; | 5341 | len = subprog_end - subprog_start; |
5270 | func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER); | 5342 | func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER); |
@@ -5281,7 +5353,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) | |||
5281 | * Long term would need debug info to populate names | 5353 | * Long term would need debug info to populate names |
5282 | */ | 5354 | */ |
5283 | func[i]->aux->name[0] = 'F'; | 5355 | func[i]->aux->name[0] = 'F'; |
5284 | func[i]->aux->stack_depth = env->subprog_stack_depth[i]; | 5356 | func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; |
5285 | func[i]->jit_requested = 1; | 5357 | func[i]->jit_requested = 1; |
5286 | func[i] = bpf_int_jit_compile(func[i]); | 5358 | func[i] = bpf_int_jit_compile(func[i]); |
5287 | if (!func[i]->jited) { | 5359 | if (!func[i]->jited) { |
@@ -5294,7 +5366,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) | |||
5294 | * now populate all bpf_calls with correct addresses and | 5366 | * now populate all bpf_calls with correct addresses and |
5295 | * run last pass of JIT | 5367 | * run last pass of JIT |
5296 | */ | 5368 | */ |
5297 | for (i = 0; i <= env->subprog_cnt; i++) { | 5369 | for (i = 0; i < env->subprog_cnt; i++) { |
5298 | insn = func[i]->insnsi; | 5370 | insn = func[i]->insnsi; |
5299 | for (j = 0; j < func[i]->len; j++, insn++) { | 5371 | for (j = 0; j < func[i]->len; j++, insn++) { |
5300 | if (insn->code != (BPF_JMP | BPF_CALL) || | 5372 | if (insn->code != (BPF_JMP | BPF_CALL) || |
@@ -5307,7 +5379,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) | |||
5307 | __bpf_call_base; | 5379 | __bpf_call_base; |
5308 | } | 5380 | } |
5309 | } | 5381 | } |
5310 | for (i = 0; i <= env->subprog_cnt; i++) { | 5382 | for (i = 0; i < env->subprog_cnt; i++) { |
5311 | old_bpf_func = func[i]->bpf_func; | 5383 | old_bpf_func = func[i]->bpf_func; |
5312 | tmp = bpf_int_jit_compile(func[i]); | 5384 | tmp = bpf_int_jit_compile(func[i]); |
5313 | if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) { | 5385 | if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) { |
@@ -5321,7 +5393,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) | |||
5321 | /* finally lock prog and jit images for all functions and | 5393 | /* finally lock prog and jit images for all functions and |
5322 | * populate kallsysm | 5394 | * populate kallsysm |
5323 | */ | 5395 | */ |
5324 | for (i = 0; i <= env->subprog_cnt; i++) { | 5396 | for (i = 0; i < env->subprog_cnt; i++) { |
5325 | bpf_prog_lock_ro(func[i]); | 5397 | bpf_prog_lock_ro(func[i]); |
5326 | bpf_prog_kallsyms_add(func[i]); | 5398 | bpf_prog_kallsyms_add(func[i]); |
5327 | } | 5399 | } |
@@ -5338,7 +5410,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) | |||
5338 | continue; | 5410 | continue; |
5339 | insn->off = env->insn_aux_data[i].call_imm; | 5411 | insn->off = env->insn_aux_data[i].call_imm; |
5340 | subprog = find_subprog(env, i + insn->off + 1); | 5412 | subprog = find_subprog(env, i + insn->off + 1); |
5341 | addr = (unsigned long)func[subprog + 1]->bpf_func; | 5413 | addr = (unsigned long)func[subprog]->bpf_func; |
5342 | addr &= PAGE_MASK; | 5414 | addr &= PAGE_MASK; |
5343 | insn->imm = (u64 (*)(u64, u64, u64, u64, u64)) | 5415 | insn->imm = (u64 (*)(u64, u64, u64, u64, u64)) |
5344 | addr - __bpf_call_base; | 5416 | addr - __bpf_call_base; |
@@ -5347,10 +5419,10 @@ static int jit_subprogs(struct bpf_verifier_env *env) | |||
5347 | prog->jited = 1; | 5419 | prog->jited = 1; |
5348 | prog->bpf_func = func[0]->bpf_func; | 5420 | prog->bpf_func = func[0]->bpf_func; |
5349 | prog->aux->func = func; | 5421 | prog->aux->func = func; |
5350 | prog->aux->func_cnt = env->subprog_cnt + 1; | 5422 | prog->aux->func_cnt = env->subprog_cnt; |
5351 | return 0; | 5423 | return 0; |
5352 | out_free: | 5424 | out_free: |
5353 | for (i = 0; i <= env->subprog_cnt; i++) | 5425 | for (i = 0; i < env->subprog_cnt; i++) |
5354 | if (func[i]) | 5426 | if (func[i]) |
5355 | bpf_jit_free(func[i]); | 5427 | bpf_jit_free(func[i]); |
5356 | kfree(func); | 5428 | kfree(func); |
@@ -5453,6 +5525,25 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) | |||
5453 | continue; | 5525 | continue; |
5454 | } | 5526 | } |
5455 | 5527 | ||
5528 | if (BPF_CLASS(insn->code) == BPF_LD && | ||
5529 | (BPF_MODE(insn->code) == BPF_ABS || | ||
5530 | BPF_MODE(insn->code) == BPF_IND)) { | ||
5531 | cnt = env->ops->gen_ld_abs(insn, insn_buf); | ||
5532 | if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { | ||
5533 | verbose(env, "bpf verifier is misconfigured\n"); | ||
5534 | return -EINVAL; | ||
5535 | } | ||
5536 | |||
5537 | new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); | ||
5538 | if (!new_prog) | ||
5539 | return -ENOMEM; | ||
5540 | |||
5541 | delta += cnt - 1; | ||
5542 | env->prog = prog = new_prog; | ||
5543 | insn = new_prog->insnsi + i + delta; | ||
5544 | continue; | ||
5545 | } | ||
5546 | |||
5456 | if (insn->code != (BPF_JMP | BPF_CALL)) | 5547 | if (insn->code != (BPF_JMP | BPF_CALL)) |
5457 | continue; | 5548 | continue; |
5458 | if (insn->src_reg == BPF_PSEUDO_CALL) | 5549 | if (insn->src_reg == BPF_PSEUDO_CALL) |
@@ -5650,16 +5741,16 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) | |||
5650 | if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) | 5741 | if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) |
5651 | env->strict_alignment = true; | 5742 | env->strict_alignment = true; |
5652 | 5743 | ||
5744 | ret = replace_map_fd_with_map_ptr(env); | ||
5745 | if (ret < 0) | ||
5746 | goto skip_full_check; | ||
5747 | |||
5653 | if (bpf_prog_is_dev_bound(env->prog->aux)) { | 5748 | if (bpf_prog_is_dev_bound(env->prog->aux)) { |
5654 | ret = bpf_prog_offload_verifier_prep(env); | 5749 | ret = bpf_prog_offload_verifier_prep(env); |
5655 | if (ret) | 5750 | if (ret) |
5656 | goto err_unlock; | 5751 | goto skip_full_check; |
5657 | } | 5752 | } |
5658 | 5753 | ||
5659 | ret = replace_map_fd_with_map_ptr(env); | ||
5660 | if (ret < 0) | ||
5661 | goto skip_full_check; | ||
5662 | |||
5663 | env->explored_states = kcalloc(env->prog->len, | 5754 | env->explored_states = kcalloc(env->prog->len, |
5664 | sizeof(struct bpf_verifier_state_list *), | 5755 | sizeof(struct bpf_verifier_state_list *), |
5665 | GFP_USER); | 5756 | GFP_USER); |
@@ -5730,7 +5821,7 @@ skip_full_check: | |||
5730 | err_release_maps: | 5821 | err_release_maps: |
5731 | if (!env->prog->aux->used_maps) | 5822 | if (!env->prog->aux->used_maps) |
5732 | /* if we didn't copy map pointers into bpf_prog_info, release | 5823 | /* if we didn't copy map pointers into bpf_prog_info, release |
5733 | * them now. Otherwise free_bpf_prog_info() will release them. | 5824 | * them now. Otherwise free_used_maps() will release them. |
5734 | */ | 5825 | */ |
5735 | release_maps(env); | 5826 | release_maps(env); |
5736 | *prog = env->prog; | 5827 | *prog = env->prog; |
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c new file mode 100644 index 000000000000..cb3a12137404 --- /dev/null +++ b/kernel/bpf/xskmap.c | |||
@@ -0,0 +1,241 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* XSKMAP used for AF_XDP sockets | ||
3 | * Copyright(c) 2018 Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | #include <linux/bpf.h> | ||
16 | #include <linux/capability.h> | ||
17 | #include <net/xdp_sock.h> | ||
18 | #include <linux/slab.h> | ||
19 | #include <linux/sched.h> | ||
20 | |||
21 | struct xsk_map { | ||
22 | struct bpf_map map; | ||
23 | struct xdp_sock **xsk_map; | ||
24 | struct list_head __percpu *flush_list; | ||
25 | }; | ||
26 | |||
27 | static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) | ||
28 | { | ||
29 | int cpu, err = -EINVAL; | ||
30 | struct xsk_map *m; | ||
31 | u64 cost; | ||
32 | |||
33 | if (!capable(CAP_NET_ADMIN)) | ||
34 | return ERR_PTR(-EPERM); | ||
35 | |||
36 | if (attr->max_entries == 0 || attr->key_size != 4 || | ||
37 | attr->value_size != 4 || | ||
38 | attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)) | ||
39 | return ERR_PTR(-EINVAL); | ||
40 | |||
41 | m = kzalloc(sizeof(*m), GFP_USER); | ||
42 | if (!m) | ||
43 | return ERR_PTR(-ENOMEM); | ||
44 | |||
45 | bpf_map_init_from_attr(&m->map, attr); | ||
46 | |||
47 | cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *); | ||
48 | cost += sizeof(struct list_head) * num_possible_cpus(); | ||
49 | if (cost >= U32_MAX - PAGE_SIZE) | ||
50 | goto free_m; | ||
51 | |||
52 | m->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; | ||
53 | |||
54 | /* Notice returns -EPERM on if map size is larger than memlock limit */ | ||
55 | err = bpf_map_precharge_memlock(m->map.pages); | ||
56 | if (err) | ||
57 | goto free_m; | ||
58 | |||
59 | err = -ENOMEM; | ||
60 | |||
61 | m->flush_list = alloc_percpu(struct list_head); | ||
62 | if (!m->flush_list) | ||
63 | goto free_m; | ||
64 | |||
65 | for_each_possible_cpu(cpu) | ||
66 | INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu)); | ||
67 | |||
68 | m->xsk_map = bpf_map_area_alloc(m->map.max_entries * | ||
69 | sizeof(struct xdp_sock *), | ||
70 | m->map.numa_node); | ||
71 | if (!m->xsk_map) | ||
72 | goto free_percpu; | ||
73 | return &m->map; | ||
74 | |||
75 | free_percpu: | ||
76 | free_percpu(m->flush_list); | ||
77 | free_m: | ||
78 | kfree(m); | ||
79 | return ERR_PTR(err); | ||
80 | } | ||
81 | |||
82 | static void xsk_map_free(struct bpf_map *map) | ||
83 | { | ||
84 | struct xsk_map *m = container_of(map, struct xsk_map, map); | ||
85 | int i; | ||
86 | |||
87 | synchronize_net(); | ||
88 | |||
89 | for (i = 0; i < map->max_entries; i++) { | ||
90 | struct xdp_sock *xs; | ||
91 | |||
92 | xs = m->xsk_map[i]; | ||
93 | if (!xs) | ||
94 | continue; | ||
95 | |||
96 | sock_put((struct sock *)xs); | ||
97 | } | ||
98 | |||
99 | free_percpu(m->flush_list); | ||
100 | bpf_map_area_free(m->xsk_map); | ||
101 | kfree(m); | ||
102 | } | ||
103 | |||
104 | static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key) | ||
105 | { | ||
106 | struct xsk_map *m = container_of(map, struct xsk_map, map); | ||
107 | u32 index = key ? *(u32 *)key : U32_MAX; | ||
108 | u32 *next = next_key; | ||
109 | |||
110 | if (index >= m->map.max_entries) { | ||
111 | *next = 0; | ||
112 | return 0; | ||
113 | } | ||
114 | |||
115 | if (index == m->map.max_entries - 1) | ||
116 | return -ENOENT; | ||
117 | *next = index + 1; | ||
118 | return 0; | ||
119 | } | ||
120 | |||
121 | struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key) | ||
122 | { | ||
123 | struct xsk_map *m = container_of(map, struct xsk_map, map); | ||
124 | struct xdp_sock *xs; | ||
125 | |||
126 | if (key >= map->max_entries) | ||
127 | return NULL; | ||
128 | |||
129 | xs = READ_ONCE(m->xsk_map[key]); | ||
130 | return xs; | ||
131 | } | ||
132 | |||
133 | int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp, | ||
134 | struct xdp_sock *xs) | ||
135 | { | ||
136 | struct xsk_map *m = container_of(map, struct xsk_map, map); | ||
137 | struct list_head *flush_list = this_cpu_ptr(m->flush_list); | ||
138 | int err; | ||
139 | |||
140 | err = xsk_rcv(xs, xdp); | ||
141 | if (err) | ||
142 | return err; | ||
143 | |||
144 | if (!xs->flush_node.prev) | ||
145 | list_add(&xs->flush_node, flush_list); | ||
146 | |||
147 | return 0; | ||
148 | } | ||
149 | |||
150 | void __xsk_map_flush(struct bpf_map *map) | ||
151 | { | ||
152 | struct xsk_map *m = container_of(map, struct xsk_map, map); | ||
153 | struct list_head *flush_list = this_cpu_ptr(m->flush_list); | ||
154 | struct xdp_sock *xs, *tmp; | ||
155 | |||
156 | list_for_each_entry_safe(xs, tmp, flush_list, flush_node) { | ||
157 | xsk_flush(xs); | ||
158 | __list_del(xs->flush_node.prev, xs->flush_node.next); | ||
159 | xs->flush_node.prev = NULL; | ||
160 | } | ||
161 | } | ||
162 | |||
163 | static void *xsk_map_lookup_elem(struct bpf_map *map, void *key) | ||
164 | { | ||
165 | return NULL; | ||
166 | } | ||
167 | |||
168 | static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value, | ||
169 | u64 map_flags) | ||
170 | { | ||
171 | struct xsk_map *m = container_of(map, struct xsk_map, map); | ||
172 | u32 i = *(u32 *)key, fd = *(u32 *)value; | ||
173 | struct xdp_sock *xs, *old_xs; | ||
174 | struct socket *sock; | ||
175 | int err; | ||
176 | |||
177 | if (unlikely(map_flags > BPF_EXIST)) | ||
178 | return -EINVAL; | ||
179 | if (unlikely(i >= m->map.max_entries)) | ||
180 | return -E2BIG; | ||
181 | if (unlikely(map_flags == BPF_NOEXIST)) | ||
182 | return -EEXIST; | ||
183 | |||
184 | sock = sockfd_lookup(fd, &err); | ||
185 | if (!sock) | ||
186 | return err; | ||
187 | |||
188 | if (sock->sk->sk_family != PF_XDP) { | ||
189 | sockfd_put(sock); | ||
190 | return -EOPNOTSUPP; | ||
191 | } | ||
192 | |||
193 | xs = (struct xdp_sock *)sock->sk; | ||
194 | |||
195 | if (!xsk_is_setup_for_bpf_map(xs)) { | ||
196 | sockfd_put(sock); | ||
197 | return -EOPNOTSUPP; | ||
198 | } | ||
199 | |||
200 | sock_hold(sock->sk); | ||
201 | |||
202 | old_xs = xchg(&m->xsk_map[i], xs); | ||
203 | if (old_xs) { | ||
204 | /* Make sure we've flushed everything. */ | ||
205 | synchronize_net(); | ||
206 | sock_put((struct sock *)old_xs); | ||
207 | } | ||
208 | |||
209 | sockfd_put(sock); | ||
210 | return 0; | ||
211 | } | ||
212 | |||
213 | static int xsk_map_delete_elem(struct bpf_map *map, void *key) | ||
214 | { | ||
215 | struct xsk_map *m = container_of(map, struct xsk_map, map); | ||
216 | struct xdp_sock *old_xs; | ||
217 | int k = *(u32 *)key; | ||
218 | |||
219 | if (k >= map->max_entries) | ||
220 | return -EINVAL; | ||
221 | |||
222 | old_xs = xchg(&m->xsk_map[k], NULL); | ||
223 | if (old_xs) { | ||
224 | /* Make sure we've flushed everything. */ | ||
225 | synchronize_net(); | ||
226 | sock_put((struct sock *)old_xs); | ||
227 | } | ||
228 | |||
229 | return 0; | ||
230 | } | ||
231 | |||
232 | const struct bpf_map_ops xsk_map_ops = { | ||
233 | .map_alloc = xsk_map_alloc, | ||
234 | .map_free = xsk_map_free, | ||
235 | .map_get_next_key = xsk_map_get_next_key, | ||
236 | .map_lookup_elem = xsk_map_lookup_elem, | ||
237 | .map_update_elem = xsk_map_update_elem, | ||
238 | .map_delete_elem = xsk_map_delete_elem, | ||
239 | }; | ||
240 | |||
241 | |||
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 56ba0f2a01db..ce2cbbff27e4 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include "trace.h" | 20 | #include "trace.h" |
21 | 21 | ||
22 | u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); | 22 | u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); |
23 | u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); | ||
23 | 24 | ||
24 | /** | 25 | /** |
25 | * trace_call_bpf - invoke BPF program | 26 | * trace_call_bpf - invoke BPF program |
@@ -474,8 +475,6 @@ BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx) | |||
474 | struct bpf_array *array = container_of(map, struct bpf_array, map); | 475 | struct bpf_array *array = container_of(map, struct bpf_array, map); |
475 | struct cgroup *cgrp; | 476 | struct cgroup *cgrp; |
476 | 477 | ||
477 | if (unlikely(in_interrupt())) | ||
478 | return -EINVAL; | ||
479 | if (unlikely(idx >= array->map.max_entries)) | 478 | if (unlikely(idx >= array->map.max_entries)) |
480 | return -E2BIG; | 479 | return -E2BIG; |
481 | 480 | ||
@@ -577,6 +576,8 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | |||
577 | return &bpf_perf_event_output_proto; | 576 | return &bpf_perf_event_output_proto; |
578 | case BPF_FUNC_get_stackid: | 577 | case BPF_FUNC_get_stackid: |
579 | return &bpf_get_stackid_proto; | 578 | return &bpf_get_stackid_proto; |
579 | case BPF_FUNC_get_stack: | ||
580 | return &bpf_get_stack_proto; | ||
580 | case BPF_FUNC_perf_event_read_value: | 581 | case BPF_FUNC_perf_event_read_value: |
581 | return &bpf_perf_event_read_value_proto; | 582 | return &bpf_perf_event_read_value_proto; |
582 | #ifdef CONFIG_BPF_KPROBE_OVERRIDE | 583 | #ifdef CONFIG_BPF_KPROBE_OVERRIDE |
@@ -664,6 +665,25 @@ static const struct bpf_func_proto bpf_get_stackid_proto_tp = { | |||
664 | .arg3_type = ARG_ANYTHING, | 665 | .arg3_type = ARG_ANYTHING, |
665 | }; | 666 | }; |
666 | 667 | ||
668 | BPF_CALL_4(bpf_get_stack_tp, void *, tp_buff, void *, buf, u32, size, | ||
669 | u64, flags) | ||
670 | { | ||
671 | struct pt_regs *regs = *(struct pt_regs **)tp_buff; | ||
672 | |||
673 | return bpf_get_stack((unsigned long) regs, (unsigned long) buf, | ||
674 | (unsigned long) size, flags, 0); | ||
675 | } | ||
676 | |||
677 | static const struct bpf_func_proto bpf_get_stack_proto_tp = { | ||
678 | .func = bpf_get_stack_tp, | ||
679 | .gpl_only = true, | ||
680 | .ret_type = RET_INTEGER, | ||
681 | .arg1_type = ARG_PTR_TO_CTX, | ||
682 | .arg2_type = ARG_PTR_TO_UNINIT_MEM, | ||
683 | .arg3_type = ARG_CONST_SIZE_OR_ZERO, | ||
684 | .arg4_type = ARG_ANYTHING, | ||
685 | }; | ||
686 | |||
667 | static const struct bpf_func_proto * | 687 | static const struct bpf_func_proto * |
668 | tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | 688 | tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) |
669 | { | 689 | { |
@@ -672,6 +692,8 @@ tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | |||
672 | return &bpf_perf_event_output_proto_tp; | 692 | return &bpf_perf_event_output_proto_tp; |
673 | case BPF_FUNC_get_stackid: | 693 | case BPF_FUNC_get_stackid: |
674 | return &bpf_get_stackid_proto_tp; | 694 | return &bpf_get_stackid_proto_tp; |
695 | case BPF_FUNC_get_stack: | ||
696 | return &bpf_get_stack_proto_tp; | ||
675 | default: | 697 | default: |
676 | return tracing_func_proto(func_id, prog); | 698 | return tracing_func_proto(func_id, prog); |
677 | } | 699 | } |
@@ -734,6 +756,8 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | |||
734 | return &bpf_perf_event_output_proto_tp; | 756 | return &bpf_perf_event_output_proto_tp; |
735 | case BPF_FUNC_get_stackid: | 757 | case BPF_FUNC_get_stackid: |
736 | return &bpf_get_stackid_proto_tp; | 758 | return &bpf_get_stackid_proto_tp; |
759 | case BPF_FUNC_get_stack: | ||
760 | return &bpf_get_stack_proto_tp; | ||
737 | case BPF_FUNC_perf_prog_read_value: | 761 | case BPF_FUNC_perf_prog_read_value: |
738 | return &bpf_perf_prog_read_value_proto; | 762 | return &bpf_perf_prog_read_value_proto; |
739 | default: | 763 | default: |
@@ -744,7 +768,7 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | |||
744 | /* | 768 | /* |
745 | * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp | 769 | * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp |
746 | * to avoid potential recursive reuse issue when/if tracepoints are added | 770 | * to avoid potential recursive reuse issue when/if tracepoints are added |
747 | * inside bpf_*_event_output and/or bpf_get_stack_id | 771 | * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack |
748 | */ | 772 | */ |
749 | static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs); | 773 | static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs); |
750 | BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args, | 774 | BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args, |
@@ -787,6 +811,26 @@ static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = { | |||
787 | .arg3_type = ARG_ANYTHING, | 811 | .arg3_type = ARG_ANYTHING, |
788 | }; | 812 | }; |
789 | 813 | ||
814 | BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args, | ||
815 | void *, buf, u32, size, u64, flags) | ||
816 | { | ||
817 | struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs); | ||
818 | |||
819 | perf_fetch_caller_regs(regs); | ||
820 | return bpf_get_stack((unsigned long) regs, (unsigned long) buf, | ||
821 | (unsigned long) size, flags, 0); | ||
822 | } | ||
823 | |||
824 | static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = { | ||
825 | .func = bpf_get_stack_raw_tp, | ||
826 | .gpl_only = true, | ||
827 | .ret_type = RET_INTEGER, | ||
828 | .arg1_type = ARG_PTR_TO_CTX, | ||
829 | .arg2_type = ARG_PTR_TO_MEM, | ||
830 | .arg3_type = ARG_CONST_SIZE_OR_ZERO, | ||
831 | .arg4_type = ARG_ANYTHING, | ||
832 | }; | ||
833 | |||
790 | static const struct bpf_func_proto * | 834 | static const struct bpf_func_proto * |
791 | raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | 835 | raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) |
792 | { | 836 | { |
@@ -795,6 +839,8 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | |||
795 | return &bpf_perf_event_output_proto_raw_tp; | 839 | return &bpf_perf_event_output_proto_raw_tp; |
796 | case BPF_FUNC_get_stackid: | 840 | case BPF_FUNC_get_stackid: |
797 | return &bpf_get_stackid_proto_raw_tp; | 841 | return &bpf_get_stackid_proto_raw_tp; |
842 | case BPF_FUNC_get_stack: | ||
843 | return &bpf_get_stack_proto_raw_tp; | ||
798 | default: | 844 | default: |
799 | return tracing_func_proto(func_id, prog); | 845 | return tracing_func_proto(func_id, prog); |
800 | } | 846 | } |
diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 8e157806df7a..317f231462d4 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c | |||
@@ -386,116 +386,6 @@ static int bpf_fill_ld_abs_get_processor_id(struct bpf_test *self) | |||
386 | return 0; | 386 | return 0; |
387 | } | 387 | } |
388 | 388 | ||
389 | #define PUSH_CNT 68 | ||
390 | /* test: {skb->data[0], vlan_push} x 68 + {skb->data[0], vlan_pop} x 68 */ | ||
391 | static int bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self) | ||
392 | { | ||
393 | unsigned int len = BPF_MAXINSNS; | ||
394 | struct bpf_insn *insn; | ||
395 | int i = 0, j, k = 0; | ||
396 | |||
397 | insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); | ||
398 | if (!insn) | ||
399 | return -ENOMEM; | ||
400 | |||
401 | insn[i++] = BPF_MOV64_REG(R6, R1); | ||
402 | loop: | ||
403 | for (j = 0; j < PUSH_CNT; j++) { | ||
404 | insn[i++] = BPF_LD_ABS(BPF_B, 0); | ||
405 | insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0x34, len - i - 2); | ||
406 | i++; | ||
407 | insn[i++] = BPF_MOV64_REG(R1, R6); | ||
408 | insn[i++] = BPF_MOV64_IMM(R2, 1); | ||
409 | insn[i++] = BPF_MOV64_IMM(R3, 2); | ||
410 | insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
411 | bpf_skb_vlan_push_proto.func - __bpf_call_base); | ||
412 | insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0, len - i - 2); | ||
413 | i++; | ||
414 | } | ||
415 | |||
416 | for (j = 0; j < PUSH_CNT; j++) { | ||
417 | insn[i++] = BPF_LD_ABS(BPF_B, 0); | ||
418 | insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0x34, len - i - 2); | ||
419 | i++; | ||
420 | insn[i++] = BPF_MOV64_REG(R1, R6); | ||
421 | insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
422 | bpf_skb_vlan_pop_proto.func - __bpf_call_base); | ||
423 | insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0, len - i - 2); | ||
424 | i++; | ||
425 | } | ||
426 | if (++k < 5) | ||
427 | goto loop; | ||
428 | |||
429 | for (; i < len - 1; i++) | ||
430 | insn[i] = BPF_ALU32_IMM(BPF_MOV, R0, 0xbef); | ||
431 | |||
432 | insn[len - 1] = BPF_EXIT_INSN(); | ||
433 | |||
434 | self->u.ptr.insns = insn; | ||
435 | self->u.ptr.len = len; | ||
436 | |||
437 | return 0; | ||
438 | } | ||
439 | |||
440 | static int bpf_fill_ld_abs_vlan_push_pop2(struct bpf_test *self) | ||
441 | { | ||
442 | struct bpf_insn *insn; | ||
443 | |||
444 | insn = kmalloc_array(16, sizeof(*insn), GFP_KERNEL); | ||
445 | if (!insn) | ||
446 | return -ENOMEM; | ||
447 | |||
448 | /* Due to func address being non-const, we need to | ||
449 | * assemble this here. | ||
450 | */ | ||
451 | insn[0] = BPF_MOV64_REG(R6, R1); | ||
452 | insn[1] = BPF_LD_ABS(BPF_B, 0); | ||
453 | insn[2] = BPF_LD_ABS(BPF_H, 0); | ||
454 | insn[3] = BPF_LD_ABS(BPF_W, 0); | ||
455 | insn[4] = BPF_MOV64_REG(R7, R6); | ||
456 | insn[5] = BPF_MOV64_IMM(R6, 0); | ||
457 | insn[6] = BPF_MOV64_REG(R1, R7); | ||
458 | insn[7] = BPF_MOV64_IMM(R2, 1); | ||
459 | insn[8] = BPF_MOV64_IMM(R3, 2); | ||
460 | insn[9] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
461 | bpf_skb_vlan_push_proto.func - __bpf_call_base); | ||
462 | insn[10] = BPF_MOV64_REG(R6, R7); | ||
463 | insn[11] = BPF_LD_ABS(BPF_B, 0); | ||
464 | insn[12] = BPF_LD_ABS(BPF_H, 0); | ||
465 | insn[13] = BPF_LD_ABS(BPF_W, 0); | ||
466 | insn[14] = BPF_MOV64_IMM(R0, 42); | ||
467 | insn[15] = BPF_EXIT_INSN(); | ||
468 | |||
469 | self->u.ptr.insns = insn; | ||
470 | self->u.ptr.len = 16; | ||
471 | |||
472 | return 0; | ||
473 | } | ||
474 | |||
475 | static int bpf_fill_jump_around_ld_abs(struct bpf_test *self) | ||
476 | { | ||
477 | unsigned int len = BPF_MAXINSNS; | ||
478 | struct bpf_insn *insn; | ||
479 | int i = 0; | ||
480 | |||
481 | insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); | ||
482 | if (!insn) | ||
483 | return -ENOMEM; | ||
484 | |||
485 | insn[i++] = BPF_MOV64_REG(R6, R1); | ||
486 | insn[i++] = BPF_LD_ABS(BPF_B, 0); | ||
487 | insn[i] = BPF_JMP_IMM(BPF_JEQ, R0, 10, len - i - 2); | ||
488 | i++; | ||
489 | while (i < len - 1) | ||
490 | insn[i++] = BPF_LD_ABS(BPF_B, 1); | ||
491 | insn[i] = BPF_EXIT_INSN(); | ||
492 | |||
493 | self->u.ptr.insns = insn; | ||
494 | self->u.ptr.len = len; | ||
495 | |||
496 | return 0; | ||
497 | } | ||
498 | |||
499 | static int __bpf_fill_stxdw(struct bpf_test *self, int size) | 389 | static int __bpf_fill_stxdw(struct bpf_test *self, int size) |
500 | { | 390 | { |
501 | unsigned int len = BPF_MAXINSNS; | 391 | unsigned int len = BPF_MAXINSNS; |
@@ -1988,40 +1878,6 @@ static struct bpf_test tests[] = { | |||
1988 | { { 0, -1 } } | 1878 | { { 0, -1 } } |
1989 | }, | 1879 | }, |
1990 | { | 1880 | { |
1991 | "INT: DIV + ABS", | ||
1992 | .u.insns_int = { | ||
1993 | BPF_ALU64_REG(BPF_MOV, R6, R1), | ||
1994 | BPF_LD_ABS(BPF_B, 3), | ||
1995 | BPF_ALU64_IMM(BPF_MOV, R2, 2), | ||
1996 | BPF_ALU32_REG(BPF_DIV, R0, R2), | ||
1997 | BPF_ALU64_REG(BPF_MOV, R8, R0), | ||
1998 | BPF_LD_ABS(BPF_B, 4), | ||
1999 | BPF_ALU64_REG(BPF_ADD, R8, R0), | ||
2000 | BPF_LD_IND(BPF_B, R8, -70), | ||
2001 | BPF_EXIT_INSN(), | ||
2002 | }, | ||
2003 | INTERNAL, | ||
2004 | { 10, 20, 30, 40, 50 }, | ||
2005 | { { 4, 0 }, { 5, 10 } } | ||
2006 | }, | ||
2007 | { | ||
2008 | /* This one doesn't go through verifier, but is just raw insn | ||
2009 | * as opposed to cBPF tests from here. Thus div by 0 tests are | ||
2010 | * done in test_verifier in BPF kselftests. | ||
2011 | */ | ||
2012 | "INT: DIV by -1", | ||
2013 | .u.insns_int = { | ||
2014 | BPF_ALU64_REG(BPF_MOV, R6, R1), | ||
2015 | BPF_ALU64_IMM(BPF_MOV, R7, -1), | ||
2016 | BPF_LD_ABS(BPF_B, 3), | ||
2017 | BPF_ALU32_REG(BPF_DIV, R0, R7), | ||
2018 | BPF_EXIT_INSN(), | ||
2019 | }, | ||
2020 | INTERNAL, | ||
2021 | { 10, 20, 30, 40, 50 }, | ||
2022 | { { 3, 0 }, { 4, 0 } } | ||
2023 | }, | ||
2024 | { | ||
2025 | "check: missing ret", | 1881 | "check: missing ret", |
2026 | .u.insns = { | 1882 | .u.insns = { |
2027 | BPF_STMT(BPF_LD | BPF_IMM, 1), | 1883 | BPF_STMT(BPF_LD | BPF_IMM, 1), |
@@ -2383,50 +2239,6 @@ static struct bpf_test tests[] = { | |||
2383 | { }, | 2239 | { }, |
2384 | { { 0, 1 } } | 2240 | { { 0, 1 } } |
2385 | }, | 2241 | }, |
2386 | { | ||
2387 | "nmap reduced", | ||
2388 | .u.insns_int = { | ||
2389 | BPF_MOV64_REG(R6, R1), | ||
2390 | BPF_LD_ABS(BPF_H, 12), | ||
2391 | BPF_JMP_IMM(BPF_JNE, R0, 0x806, 28), | ||
2392 | BPF_LD_ABS(BPF_H, 12), | ||
2393 | BPF_JMP_IMM(BPF_JNE, R0, 0x806, 26), | ||
2394 | BPF_MOV32_IMM(R0, 18), | ||
2395 | BPF_STX_MEM(BPF_W, R10, R0, -64), | ||
2396 | BPF_LDX_MEM(BPF_W, R7, R10, -64), | ||
2397 | BPF_LD_IND(BPF_W, R7, 14), | ||
2398 | BPF_STX_MEM(BPF_W, R10, R0, -60), | ||
2399 | BPF_MOV32_IMM(R0, 280971478), | ||
2400 | BPF_STX_MEM(BPF_W, R10, R0, -56), | ||
2401 | BPF_LDX_MEM(BPF_W, R7, R10, -56), | ||
2402 | BPF_LDX_MEM(BPF_W, R0, R10, -60), | ||
2403 | BPF_ALU32_REG(BPF_SUB, R0, R7), | ||
2404 | BPF_JMP_IMM(BPF_JNE, R0, 0, 15), | ||
2405 | BPF_LD_ABS(BPF_H, 12), | ||
2406 | BPF_JMP_IMM(BPF_JNE, R0, 0x806, 13), | ||
2407 | BPF_MOV32_IMM(R0, 22), | ||
2408 | BPF_STX_MEM(BPF_W, R10, R0, -56), | ||
2409 | BPF_LDX_MEM(BPF_W, R7, R10, -56), | ||
2410 | BPF_LD_IND(BPF_H, R7, 14), | ||
2411 | BPF_STX_MEM(BPF_W, R10, R0, -52), | ||
2412 | BPF_MOV32_IMM(R0, 17366), | ||
2413 | BPF_STX_MEM(BPF_W, R10, R0, -48), | ||
2414 | BPF_LDX_MEM(BPF_W, R7, R10, -48), | ||
2415 | BPF_LDX_MEM(BPF_W, R0, R10, -52), | ||
2416 | BPF_ALU32_REG(BPF_SUB, R0, R7), | ||
2417 | BPF_JMP_IMM(BPF_JNE, R0, 0, 2), | ||
2418 | BPF_MOV32_IMM(R0, 256), | ||
2419 | BPF_EXIT_INSN(), | ||
2420 | BPF_MOV32_IMM(R0, 0), | ||
2421 | BPF_EXIT_INSN(), | ||
2422 | }, | ||
2423 | INTERNAL, | ||
2424 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x06, 0, 0, | ||
2425 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
2426 | 0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6}, | ||
2427 | { { 38, 256 } }, | ||
2428 | .stack_depth = 64, | ||
2429 | }, | ||
2430 | /* BPF_ALU | BPF_MOV | BPF_X */ | 2242 | /* BPF_ALU | BPF_MOV | BPF_X */ |
2431 | { | 2243 | { |
2432 | "ALU_MOV_X: dst = 2", | 2244 | "ALU_MOV_X: dst = 2", |
@@ -5485,22 +5297,6 @@ static struct bpf_test tests[] = { | |||
5485 | { { 1, 0xbee } }, | 5297 | { { 1, 0xbee } }, |
5486 | .fill_helper = bpf_fill_ld_abs_get_processor_id, | 5298 | .fill_helper = bpf_fill_ld_abs_get_processor_id, |
5487 | }, | 5299 | }, |
5488 | { | ||
5489 | "BPF_MAXINSNS: ld_abs+vlan_push/pop", | ||
5490 | { }, | ||
5491 | INTERNAL, | ||
5492 | { 0x34 }, | ||
5493 | { { ETH_HLEN, 0xbef } }, | ||
5494 | .fill_helper = bpf_fill_ld_abs_vlan_push_pop, | ||
5495 | }, | ||
5496 | { | ||
5497 | "BPF_MAXINSNS: jump around ld_abs", | ||
5498 | { }, | ||
5499 | INTERNAL, | ||
5500 | { 10, 11 }, | ||
5501 | { { 2, 10 } }, | ||
5502 | .fill_helper = bpf_fill_jump_around_ld_abs, | ||
5503 | }, | ||
5504 | /* | 5300 | /* |
5505 | * LD_IND / LD_ABS on fragmented SKBs | 5301 | * LD_IND / LD_ABS on fragmented SKBs |
5506 | */ | 5302 | */ |
@@ -5683,6 +5479,53 @@ static struct bpf_test tests[] = { | |||
5683 | { {0x40, 0x05 } }, | 5479 | { {0x40, 0x05 } }, |
5684 | }, | 5480 | }, |
5685 | { | 5481 | { |
5482 | "LD_IND byte positive offset, all ff", | ||
5483 | .u.insns = { | ||
5484 | BPF_STMT(BPF_LDX | BPF_IMM, 0x3e), | ||
5485 | BPF_STMT(BPF_LD | BPF_IND | BPF_B, 0x1), | ||
5486 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
5487 | }, | ||
5488 | CLASSIC, | ||
5489 | { [0x3c] = 0xff, [0x3d] = 0xff, [0x3e] = 0xff, [0x3f] = 0xff }, | ||
5490 | { {0x40, 0xff } }, | ||
5491 | }, | ||
5492 | { | ||
5493 | "LD_IND byte positive offset, out of bounds", | ||
5494 | .u.insns = { | ||
5495 | BPF_STMT(BPF_LDX | BPF_IMM, 0x3e), | ||
5496 | BPF_STMT(BPF_LD | BPF_IND | BPF_B, 0x1), | ||
5497 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
5498 | }, | ||
5499 | CLASSIC, | ||
5500 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
5501 | { {0x3f, 0 }, }, | ||
5502 | }, | ||
5503 | { | ||
5504 | "LD_IND byte negative offset, out of bounds", | ||
5505 | .u.insns = { | ||
5506 | BPF_STMT(BPF_LDX | BPF_IMM, 0x3e), | ||
5507 | BPF_STMT(BPF_LD | BPF_IND | BPF_B, -0x3f), | ||
5508 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
5509 | }, | ||
5510 | CLASSIC, | ||
5511 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
5512 | { {0x3f, 0 } }, | ||
5513 | }, | ||
5514 | { | ||
5515 | "LD_IND byte negative offset, multiple calls", | ||
5516 | .u.insns = { | ||
5517 | BPF_STMT(BPF_LDX | BPF_IMM, 0x3b), | ||
5518 | BPF_STMT(BPF_LD | BPF_IND | BPF_B, SKF_LL_OFF + 1), | ||
5519 | BPF_STMT(BPF_LD | BPF_IND | BPF_B, SKF_LL_OFF + 2), | ||
5520 | BPF_STMT(BPF_LD | BPF_IND | BPF_B, SKF_LL_OFF + 3), | ||
5521 | BPF_STMT(BPF_LD | BPF_IND | BPF_B, SKF_LL_OFF + 4), | ||
5522 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
5523 | }, | ||
5524 | CLASSIC, | ||
5525 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
5526 | { {0x40, 0x82 }, }, | ||
5527 | }, | ||
5528 | { | ||
5686 | "LD_IND halfword positive offset", | 5529 | "LD_IND halfword positive offset", |
5687 | .u.insns = { | 5530 | .u.insns = { |
5688 | BPF_STMT(BPF_LDX | BPF_IMM, 0x20), | 5531 | BPF_STMT(BPF_LDX | BPF_IMM, 0x20), |
@@ -5731,6 +5574,39 @@ static struct bpf_test tests[] = { | |||
5731 | { {0x40, 0x66cc } }, | 5574 | { {0x40, 0x66cc } }, |
5732 | }, | 5575 | }, |
5733 | { | 5576 | { |
5577 | "LD_IND halfword positive offset, all ff", | ||
5578 | .u.insns = { | ||
5579 | BPF_STMT(BPF_LDX | BPF_IMM, 0x3d), | ||
5580 | BPF_STMT(BPF_LD | BPF_IND | BPF_H, 0x1), | ||
5581 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
5582 | }, | ||
5583 | CLASSIC, | ||
5584 | { [0x3c] = 0xff, [0x3d] = 0xff, [0x3e] = 0xff, [0x3f] = 0xff }, | ||
5585 | { {0x40, 0xffff } }, | ||
5586 | }, | ||
5587 | { | ||
5588 | "LD_IND halfword positive offset, out of bounds", | ||
5589 | .u.insns = { | ||
5590 | BPF_STMT(BPF_LDX | BPF_IMM, 0x3e), | ||
5591 | BPF_STMT(BPF_LD | BPF_IND | BPF_H, 0x1), | ||
5592 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
5593 | }, | ||
5594 | CLASSIC, | ||
5595 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
5596 | { {0x3f, 0 }, }, | ||
5597 | }, | ||
5598 | { | ||
5599 | "LD_IND halfword negative offset, out of bounds", | ||
5600 | .u.insns = { | ||
5601 | BPF_STMT(BPF_LDX | BPF_IMM, 0x3e), | ||
5602 | BPF_STMT(BPF_LD | BPF_IND | BPF_H, -0x3f), | ||
5603 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
5604 | }, | ||
5605 | CLASSIC, | ||
5606 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
5607 | { {0x3f, 0 } }, | ||
5608 | }, | ||
5609 | { | ||
5734 | "LD_IND word positive offset", | 5610 | "LD_IND word positive offset", |
5735 | .u.insns = { | 5611 | .u.insns = { |
5736 | BPF_STMT(BPF_LDX | BPF_IMM, 0x20), | 5612 | BPF_STMT(BPF_LDX | BPF_IMM, 0x20), |
@@ -5821,6 +5697,39 @@ static struct bpf_test tests[] = { | |||
5821 | { {0x40, 0x66cc77dd } }, | 5697 | { {0x40, 0x66cc77dd } }, |
5822 | }, | 5698 | }, |
5823 | { | 5699 | { |
5700 | "LD_IND word positive offset, all ff", | ||
5701 | .u.insns = { | ||
5702 | BPF_STMT(BPF_LDX | BPF_IMM, 0x3b), | ||
5703 | BPF_STMT(BPF_LD | BPF_IND | BPF_W, 0x1), | ||
5704 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
5705 | }, | ||
5706 | CLASSIC, | ||
5707 | { [0x3c] = 0xff, [0x3d] = 0xff, [0x3e] = 0xff, [0x3f] = 0xff }, | ||
5708 | { {0x40, 0xffffffff } }, | ||
5709 | }, | ||
5710 | { | ||
5711 | "LD_IND word positive offset, out of bounds", | ||
5712 | .u.insns = { | ||
5713 | BPF_STMT(BPF_LDX | BPF_IMM, 0x3e), | ||
5714 | BPF_STMT(BPF_LD | BPF_IND | BPF_W, 0x1), | ||
5715 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
5716 | }, | ||
5717 | CLASSIC, | ||
5718 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
5719 | { {0x3f, 0 }, }, | ||
5720 | }, | ||
5721 | { | ||
5722 | "LD_IND word negative offset, out of bounds", | ||
5723 | .u.insns = { | ||
5724 | BPF_STMT(BPF_LDX | BPF_IMM, 0x3e), | ||
5725 | BPF_STMT(BPF_LD | BPF_IND | BPF_W, -0x3f), | ||
5726 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
5727 | }, | ||
5728 | CLASSIC, | ||
5729 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
5730 | { {0x3f, 0 } }, | ||
5731 | }, | ||
5732 | { | ||
5824 | "LD_ABS byte", | 5733 | "LD_ABS byte", |
5825 | .u.insns = { | 5734 | .u.insns = { |
5826 | BPF_STMT(BPF_LD | BPF_ABS | BPF_B, 0x20), | 5735 | BPF_STMT(BPF_LD | BPF_ABS | BPF_B, 0x20), |
@@ -5838,6 +5747,68 @@ static struct bpf_test tests[] = { | |||
5838 | { {0x40, 0xcc } }, | 5747 | { {0x40, 0xcc } }, |
5839 | }, | 5748 | }, |
5840 | { | 5749 | { |
5750 | "LD_ABS byte positive offset, all ff", | ||
5751 | .u.insns = { | ||
5752 | BPF_STMT(BPF_LD | BPF_ABS | BPF_B, 0x3f), | ||
5753 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
5754 | }, | ||
5755 | CLASSIC, | ||
5756 | { [0x3c] = 0xff, [0x3d] = 0xff, [0x3e] = 0xff, [0x3f] = 0xff }, | ||
5757 | { {0x40, 0xff } }, | ||
5758 | }, | ||
5759 | { | ||
5760 | "LD_ABS byte positive offset, out of bounds", | ||
5761 | .u.insns = { | ||
5762 | BPF_STMT(BPF_LD | BPF_ABS | BPF_B, 0x3f), | ||
5763 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
5764 | }, | ||
5765 | CLASSIC, | ||
5766 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
5767 | { {0x3f, 0 }, }, | ||
5768 | }, | ||
5769 | { | ||
5770 | "LD_ABS byte negative offset, out of bounds load", | ||
5771 | .u.insns = { | ||
5772 | BPF_STMT(BPF_LD | BPF_ABS | BPF_B, -1), | ||
5773 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
5774 | }, | ||
5775 | CLASSIC | FLAG_EXPECTED_FAIL, | ||
5776 | .expected_errcode = -EINVAL, | ||
5777 | }, | ||
5778 | { | ||
5779 | "LD_ABS byte negative offset, in bounds", | ||
5780 | .u.insns = { | ||
5781 | BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3f), | ||
5782 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
5783 | }, | ||
5784 | CLASSIC, | ||
5785 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
5786 | { {0x40, 0x82 }, }, | ||
5787 | }, | ||
5788 | { | ||
5789 | "LD_ABS byte negative offset, out of bounds", | ||
5790 | .u.insns = { | ||
5791 | BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3f), | ||
5792 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
5793 | }, | ||
5794 | CLASSIC, | ||
5795 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
5796 | { {0x3f, 0 }, }, | ||
5797 | }, | ||
5798 | { | ||
5799 | "LD_ABS byte negative offset, multiple calls", | ||
5800 | .u.insns = { | ||
5801 | BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3c), | ||
5802 | BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3d), | ||
5803 | BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3e), | ||
5804 | BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3f), | ||
5805 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
5806 | }, | ||
5807 | CLASSIC, | ||
5808 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
5809 | { {0x40, 0x82 }, }, | ||
5810 | }, | ||
5811 | { | ||
5841 | "LD_ABS halfword", | 5812 | "LD_ABS halfword", |
5842 | .u.insns = { | 5813 | .u.insns = { |
5843 | BPF_STMT(BPF_LD | BPF_ABS | BPF_H, 0x22), | 5814 | BPF_STMT(BPF_LD | BPF_ABS | BPF_H, 0x22), |
@@ -5872,6 +5843,55 @@ static struct bpf_test tests[] = { | |||
5872 | { {0x40, 0x99ff } }, | 5843 | { {0x40, 0x99ff } }, |
5873 | }, | 5844 | }, |
5874 | { | 5845 | { |
5846 | "LD_ABS halfword positive offset, all ff", | ||
5847 | .u.insns = { | ||
5848 | BPF_STMT(BPF_LD | BPF_ABS | BPF_H, 0x3e), | ||
5849 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
5850 | }, | ||
5851 | CLASSIC, | ||
5852 | { [0x3c] = 0xff, [0x3d] = 0xff, [0x3e] = 0xff, [0x3f] = 0xff }, | ||
5853 | { {0x40, 0xffff } }, | ||
5854 | }, | ||
5855 | { | ||
5856 | "LD_ABS halfword positive offset, out of bounds", | ||
5857 | .u.insns = { | ||
5858 | BPF_STMT(BPF_LD | BPF_ABS | BPF_H, 0x3f), | ||
5859 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
5860 | }, | ||
5861 | CLASSIC, | ||
5862 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
5863 | { {0x3f, 0 }, }, | ||
5864 | }, | ||
5865 | { | ||
5866 | "LD_ABS halfword negative offset, out of bounds load", | ||
5867 | .u.insns = { | ||
5868 | BPF_STMT(BPF_LD | BPF_ABS | BPF_H, -1), | ||
5869 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
5870 | }, | ||
5871 | CLASSIC | FLAG_EXPECTED_FAIL, | ||
5872 | .expected_errcode = -EINVAL, | ||
5873 | }, | ||
5874 | { | ||
5875 | "LD_ABS halfword negative offset, in bounds", | ||
5876 | .u.insns = { | ||
5877 | BPF_STMT(BPF_LD | BPF_ABS | BPF_H, SKF_LL_OFF + 0x3e), | ||
5878 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
5879 | }, | ||
5880 | CLASSIC, | ||
5881 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
5882 | { {0x40, 0x1982 }, }, | ||
5883 | }, | ||
5884 | { | ||
5885 | "LD_ABS halfword negative offset, out of bounds", | ||
5886 | .u.insns = { | ||
5887 | BPF_STMT(BPF_LD | BPF_ABS | BPF_H, SKF_LL_OFF + 0x3e), | ||
5888 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
5889 | }, | ||
5890 | CLASSIC, | ||
5891 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
5892 | { {0x3f, 0 }, }, | ||
5893 | }, | ||
5894 | { | ||
5875 | "LD_ABS word", | 5895 | "LD_ABS word", |
5876 | .u.insns = { | 5896 | .u.insns = { |
5877 | BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0x1c), | 5897 | BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0x1c), |
@@ -5939,6 +5959,140 @@ static struct bpf_test tests[] = { | |||
5939 | }, | 5959 | }, |
5940 | { {0x40, 0x88ee99ff } }, | 5960 | { {0x40, 0x88ee99ff } }, |
5941 | }, | 5961 | }, |
5962 | { | ||
5963 | "LD_ABS word positive offset, all ff", | ||
5964 | .u.insns = { | ||
5965 | BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0x3c), | ||
5966 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
5967 | }, | ||
5968 | CLASSIC, | ||
5969 | { [0x3c] = 0xff, [0x3d] = 0xff, [0x3e] = 0xff, [0x3f] = 0xff }, | ||
5970 | { {0x40, 0xffffffff } }, | ||
5971 | }, | ||
5972 | { | ||
5973 | "LD_ABS word positive offset, out of bounds", | ||
5974 | .u.insns = { | ||
5975 | BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0x3f), | ||
5976 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
5977 | }, | ||
5978 | CLASSIC, | ||
5979 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
5980 | { {0x3f, 0 }, }, | ||
5981 | }, | ||
5982 | { | ||
5983 | "LD_ABS word negative offset, out of bounds load", | ||
5984 | .u.insns = { | ||
5985 | BPF_STMT(BPF_LD | BPF_ABS | BPF_W, -1), | ||
5986 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
5987 | }, | ||
5988 | CLASSIC | FLAG_EXPECTED_FAIL, | ||
5989 | .expected_errcode = -EINVAL, | ||
5990 | }, | ||
5991 | { | ||
5992 | "LD_ABS word negative offset, in bounds", | ||
5993 | .u.insns = { | ||
5994 | BPF_STMT(BPF_LD | BPF_ABS | BPF_W, SKF_LL_OFF + 0x3c), | ||
5995 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
5996 | }, | ||
5997 | CLASSIC, | ||
5998 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
5999 | { {0x40, 0x25051982 }, }, | ||
6000 | }, | ||
6001 | { | ||
6002 | "LD_ABS word negative offset, out of bounds", | ||
6003 | .u.insns = { | ||
6004 | BPF_STMT(BPF_LD | BPF_ABS | BPF_W, SKF_LL_OFF + 0x3c), | ||
6005 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
6006 | }, | ||
6007 | CLASSIC, | ||
6008 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
6009 | { {0x3f, 0 }, }, | ||
6010 | }, | ||
6011 | { | ||
6012 | "LDX_MSH standalone, preserved A", | ||
6013 | .u.insns = { | ||
6014 | BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa), | ||
6015 | BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3c), | ||
6016 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
6017 | }, | ||
6018 | CLASSIC, | ||
6019 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
6020 | { {0x40, 0xffeebbaa }, }, | ||
6021 | }, | ||
6022 | { | ||
6023 | "LDX_MSH standalone, preserved A 2", | ||
6024 | .u.insns = { | ||
6025 | BPF_STMT(BPF_LD | BPF_IMM, 0x175e9d63), | ||
6026 | BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3c), | ||
6027 | BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3d), | ||
6028 | BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3e), | ||
6029 | BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3f), | ||
6030 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
6031 | }, | ||
6032 | CLASSIC, | ||
6033 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
6034 | { {0x40, 0x175e9d63 }, }, | ||
6035 | }, | ||
6036 | { | ||
6037 | "LDX_MSH standalone, test result 1", | ||
6038 | .u.insns = { | ||
6039 | BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa), | ||
6040 | BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3c), | ||
6041 | BPF_STMT(BPF_MISC | BPF_TXA, 0), | ||
6042 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
6043 | }, | ||
6044 | CLASSIC, | ||
6045 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
6046 | { {0x40, 0x14 }, }, | ||
6047 | }, | ||
6048 | { | ||
6049 | "LDX_MSH standalone, test result 2", | ||
6050 | .u.insns = { | ||
6051 | BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa), | ||
6052 | BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3e), | ||
6053 | BPF_STMT(BPF_MISC | BPF_TXA, 0), | ||
6054 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
6055 | }, | ||
6056 | CLASSIC, | ||
6057 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
6058 | { {0x40, 0x24 }, }, | ||
6059 | }, | ||
6060 | { | ||
6061 | "LDX_MSH standalone, negative offset", | ||
6062 | .u.insns = { | ||
6063 | BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa), | ||
6064 | BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, -1), | ||
6065 | BPF_STMT(BPF_MISC | BPF_TXA, 0), | ||
6066 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
6067 | }, | ||
6068 | CLASSIC, | ||
6069 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
6070 | { {0x40, 0 }, }, | ||
6071 | }, | ||
6072 | { | ||
6073 | "LDX_MSH standalone, negative offset 2", | ||
6074 | .u.insns = { | ||
6075 | BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa), | ||
6076 | BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, SKF_LL_OFF + 0x3e), | ||
6077 | BPF_STMT(BPF_MISC | BPF_TXA, 0), | ||
6078 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
6079 | }, | ||
6080 | CLASSIC, | ||
6081 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
6082 | { {0x40, 0x24 }, }, | ||
6083 | }, | ||
6084 | { | ||
6085 | "LDX_MSH standalone, out of bounds", | ||
6086 | .u.insns = { | ||
6087 | BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa), | ||
6088 | BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x40), | ||
6089 | BPF_STMT(BPF_MISC | BPF_TXA, 0), | ||
6090 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
6091 | }, | ||
6092 | CLASSIC, | ||
6093 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
6094 | { {0x40, 0 }, }, | ||
6095 | }, | ||
5942 | /* | 6096 | /* |
5943 | * verify that the interpreter or JIT correctly sets A and X | 6097 | * verify that the interpreter or JIT correctly sets A and X |
5944 | * to 0. | 6098 | * to 0. |
@@ -6127,14 +6281,6 @@ static struct bpf_test tests[] = { | |||
6127 | {}, | 6281 | {}, |
6128 | { {0x1, 0x42 } }, | 6282 | { {0x1, 0x42 } }, |
6129 | }, | 6283 | }, |
6130 | { | ||
6131 | "LD_ABS with helper changing skb data", | ||
6132 | { }, | ||
6133 | INTERNAL, | ||
6134 | { 0x34 }, | ||
6135 | { { ETH_HLEN, 42 } }, | ||
6136 | .fill_helper = bpf_fill_ld_abs_vlan_push_pop2, | ||
6137 | }, | ||
6138 | /* Checking interpreter vs JIT wrt signed extended imms. */ | 6284 | /* Checking interpreter vs JIT wrt signed extended imms. */ |
6139 | { | 6285 | { |
6140 | "JNE signed compare, test 1", | 6286 | "JNE signed compare, test 1", |
diff --git a/net/Kconfig b/net/Kconfig index b62089fb1332..df8d45ef47d8 100644 --- a/net/Kconfig +++ b/net/Kconfig | |||
@@ -59,6 +59,7 @@ source "net/tls/Kconfig" | |||
59 | source "net/xfrm/Kconfig" | 59 | source "net/xfrm/Kconfig" |
60 | source "net/iucv/Kconfig" | 60 | source "net/iucv/Kconfig" |
61 | source "net/smc/Kconfig" | 61 | source "net/smc/Kconfig" |
62 | source "net/xdp/Kconfig" | ||
62 | 63 | ||
63 | config INET | 64 | config INET |
64 | bool "TCP/IP networking" | 65 | bool "TCP/IP networking" |
diff --git a/net/Makefile b/net/Makefile index a6147c61b174..77aaddedbd29 100644 --- a/net/Makefile +++ b/net/Makefile | |||
@@ -85,3 +85,4 @@ obj-y += l3mdev/ | |||
85 | endif | 85 | endif |
86 | obj-$(CONFIG_QRTR) += qrtr/ | 86 | obj-$(CONFIG_QRTR) += qrtr/ |
87 | obj-$(CONFIG_NET_NCSI) += ncsi/ | 87 | obj-$(CONFIG_NET_NCSI) += ncsi/ |
88 | obj-$(CONFIG_XDP_SOCKETS) += xdp/ | ||
diff --git a/net/core/dev.c b/net/core/dev.c index bb81a6e1d354..29bf39174900 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -3627,6 +3627,44 @@ int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv) | |||
3627 | } | 3627 | } |
3628 | EXPORT_SYMBOL(dev_queue_xmit_accel); | 3628 | EXPORT_SYMBOL(dev_queue_xmit_accel); |
3629 | 3629 | ||
3630 | int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) | ||
3631 | { | ||
3632 | struct net_device *dev = skb->dev; | ||
3633 | struct sk_buff *orig_skb = skb; | ||
3634 | struct netdev_queue *txq; | ||
3635 | int ret = NETDEV_TX_BUSY; | ||
3636 | bool again = false; | ||
3637 | |||
3638 | if (unlikely(!netif_running(dev) || | ||
3639 | !netif_carrier_ok(dev))) | ||
3640 | goto drop; | ||
3641 | |||
3642 | skb = validate_xmit_skb_list(skb, dev, &again); | ||
3643 | if (skb != orig_skb) | ||
3644 | goto drop; | ||
3645 | |||
3646 | skb_set_queue_mapping(skb, queue_id); | ||
3647 | txq = skb_get_tx_queue(dev, skb); | ||
3648 | |||
3649 | local_bh_disable(); | ||
3650 | |||
3651 | HARD_TX_LOCK(dev, txq, smp_processor_id()); | ||
3652 | if (!netif_xmit_frozen_or_drv_stopped(txq)) | ||
3653 | ret = netdev_start_xmit(skb, dev, txq, false); | ||
3654 | HARD_TX_UNLOCK(dev, txq); | ||
3655 | |||
3656 | local_bh_enable(); | ||
3657 | |||
3658 | if (!dev_xmit_complete(ret)) | ||
3659 | kfree_skb(skb); | ||
3660 | |||
3661 | return ret; | ||
3662 | drop: | ||
3663 | atomic_long_inc(&dev->tx_dropped); | ||
3664 | kfree_skb_list(skb); | ||
3665 | return NET_XMIT_DROP; | ||
3666 | } | ||
3667 | EXPORT_SYMBOL(dev_direct_xmit); | ||
3630 | 3668 | ||
3631 | /************************************************************************* | 3669 | /************************************************************************* |
3632 | * Receiver routines | 3670 | * Receiver routines |
@@ -3996,12 +4034,12 @@ static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb) | |||
3996 | } | 4034 | } |
3997 | 4035 | ||
3998 | static u32 netif_receive_generic_xdp(struct sk_buff *skb, | 4036 | static u32 netif_receive_generic_xdp(struct sk_buff *skb, |
4037 | struct xdp_buff *xdp, | ||
3999 | struct bpf_prog *xdp_prog) | 4038 | struct bpf_prog *xdp_prog) |
4000 | { | 4039 | { |
4001 | struct netdev_rx_queue *rxqueue; | 4040 | struct netdev_rx_queue *rxqueue; |
4002 | void *orig_data, *orig_data_end; | 4041 | void *orig_data, *orig_data_end; |
4003 | u32 metalen, act = XDP_DROP; | 4042 | u32 metalen, act = XDP_DROP; |
4004 | struct xdp_buff xdp; | ||
4005 | int hlen, off; | 4043 | int hlen, off; |
4006 | u32 mac_len; | 4044 | u32 mac_len; |
4007 | 4045 | ||
@@ -4036,19 +4074,19 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, | |||
4036 | */ | 4074 | */ |
4037 | mac_len = skb->data - skb_mac_header(skb); | 4075 | mac_len = skb->data - skb_mac_header(skb); |
4038 | hlen = skb_headlen(skb) + mac_len; | 4076 | hlen = skb_headlen(skb) + mac_len; |
4039 | xdp.data = skb->data - mac_len; | 4077 | xdp->data = skb->data - mac_len; |
4040 | xdp.data_meta = xdp.data; | 4078 | xdp->data_meta = xdp->data; |
4041 | xdp.data_end = xdp.data + hlen; | 4079 | xdp->data_end = xdp->data + hlen; |
4042 | xdp.data_hard_start = skb->data - skb_headroom(skb); | 4080 | xdp->data_hard_start = skb->data - skb_headroom(skb); |
4043 | orig_data_end = xdp.data_end; | 4081 | orig_data_end = xdp->data_end; |
4044 | orig_data = xdp.data; | 4082 | orig_data = xdp->data; |
4045 | 4083 | ||
4046 | rxqueue = netif_get_rxqueue(skb); | 4084 | rxqueue = netif_get_rxqueue(skb); |
4047 | xdp.rxq = &rxqueue->xdp_rxq; | 4085 | xdp->rxq = &rxqueue->xdp_rxq; |
4048 | 4086 | ||
4049 | act = bpf_prog_run_xdp(xdp_prog, &xdp); | 4087 | act = bpf_prog_run_xdp(xdp_prog, xdp); |
4050 | 4088 | ||
4051 | off = xdp.data - orig_data; | 4089 | off = xdp->data - orig_data; |
4052 | if (off > 0) | 4090 | if (off > 0) |
4053 | __skb_pull(skb, off); | 4091 | __skb_pull(skb, off); |
4054 | else if (off < 0) | 4092 | else if (off < 0) |
@@ -4058,10 +4096,11 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, | |||
4058 | /* check if bpf_xdp_adjust_tail was used. it can only "shrink" | 4096 | /* check if bpf_xdp_adjust_tail was used. it can only "shrink" |
4059 | * pckt. | 4097 | * pckt. |
4060 | */ | 4098 | */ |
4061 | off = orig_data_end - xdp.data_end; | 4099 | off = orig_data_end - xdp->data_end; |
4062 | if (off != 0) { | 4100 | if (off != 0) { |
4063 | skb_set_tail_pointer(skb, xdp.data_end - xdp.data); | 4101 | skb_set_tail_pointer(skb, xdp->data_end - xdp->data); |
4064 | skb->len -= off; | 4102 | skb->len -= off; |
4103 | |||
4065 | } | 4104 | } |
4066 | 4105 | ||
4067 | switch (act) { | 4106 | switch (act) { |
@@ -4070,7 +4109,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, | |||
4070 | __skb_push(skb, mac_len); | 4109 | __skb_push(skb, mac_len); |
4071 | break; | 4110 | break; |
4072 | case XDP_PASS: | 4111 | case XDP_PASS: |
4073 | metalen = xdp.data - xdp.data_meta; | 4112 | metalen = xdp->data - xdp->data_meta; |
4074 | if (metalen) | 4113 | if (metalen) |
4075 | skb_metadata_set(skb, metalen); | 4114 | skb_metadata_set(skb, metalen); |
4076 | break; | 4115 | break; |
@@ -4120,17 +4159,19 @@ static struct static_key generic_xdp_needed __read_mostly; | |||
4120 | int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb) | 4159 | int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb) |
4121 | { | 4160 | { |
4122 | if (xdp_prog) { | 4161 | if (xdp_prog) { |
4123 | u32 act = netif_receive_generic_xdp(skb, xdp_prog); | 4162 | struct xdp_buff xdp; |
4163 | u32 act; | ||
4124 | int err; | 4164 | int err; |
4125 | 4165 | ||
4166 | act = netif_receive_generic_xdp(skb, &xdp, xdp_prog); | ||
4126 | if (act != XDP_PASS) { | 4167 | if (act != XDP_PASS) { |
4127 | switch (act) { | 4168 | switch (act) { |
4128 | case XDP_REDIRECT: | 4169 | case XDP_REDIRECT: |
4129 | err = xdp_do_generic_redirect(skb->dev, skb, | 4170 | err = xdp_do_generic_redirect(skb->dev, skb, |
4130 | xdp_prog); | 4171 | &xdp, xdp_prog); |
4131 | if (err) | 4172 | if (err) |
4132 | goto out_redir; | 4173 | goto out_redir; |
4133 | /* fallthru to submit skb */ | 4174 | break; |
4134 | case XDP_TX: | 4175 | case XDP_TX: |
4135 | generic_xdp_tx(skb, xdp_prog); | 4176 | generic_xdp_tx(skb, xdp_prog); |
4136 | break; | 4177 | break; |
diff --git a/net/core/filter.c b/net/core/filter.c index d3781daa26ab..6877426c23a6 100644 --- a/net/core/filter.c +++ b/net/core/filter.c | |||
@@ -59,6 +59,7 @@ | |||
59 | #include <net/tcp.h> | 59 | #include <net/tcp.h> |
60 | #include <net/xfrm.h> | 60 | #include <net/xfrm.h> |
61 | #include <linux/bpf_trace.h> | 61 | #include <linux/bpf_trace.h> |
62 | #include <net/xdp_sock.h> | ||
62 | 63 | ||
63 | /** | 64 | /** |
64 | * sk_filter_trim_cap - run a packet through a socket filter | 65 | * sk_filter_trim_cap - run a packet through a socket filter |
@@ -112,12 +113,12 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap) | |||
112 | } | 113 | } |
113 | EXPORT_SYMBOL(sk_filter_trim_cap); | 114 | EXPORT_SYMBOL(sk_filter_trim_cap); |
114 | 115 | ||
115 | BPF_CALL_1(__skb_get_pay_offset, struct sk_buff *, skb) | 116 | BPF_CALL_1(bpf_skb_get_pay_offset, struct sk_buff *, skb) |
116 | { | 117 | { |
117 | return skb_get_poff(skb); | 118 | return skb_get_poff(skb); |
118 | } | 119 | } |
119 | 120 | ||
120 | BPF_CALL_3(__skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x) | 121 | BPF_CALL_3(bpf_skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x) |
121 | { | 122 | { |
122 | struct nlattr *nla; | 123 | struct nlattr *nla; |
123 | 124 | ||
@@ -137,7 +138,7 @@ BPF_CALL_3(__skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x) | |||
137 | return 0; | 138 | return 0; |
138 | } | 139 | } |
139 | 140 | ||
140 | BPF_CALL_3(__skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x) | 141 | BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x) |
141 | { | 142 | { |
142 | struct nlattr *nla; | 143 | struct nlattr *nla; |
143 | 144 | ||
@@ -161,13 +162,94 @@ BPF_CALL_3(__skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x) | |||
161 | return 0; | 162 | return 0; |
162 | } | 163 | } |
163 | 164 | ||
164 | BPF_CALL_0(__get_raw_cpu_id) | 165 | BPF_CALL_4(bpf_skb_load_helper_8, const struct sk_buff *, skb, const void *, |
166 | data, int, headlen, int, offset) | ||
167 | { | ||
168 | u8 tmp, *ptr; | ||
169 | const int len = sizeof(tmp); | ||
170 | |||
171 | if (offset >= 0) { | ||
172 | if (headlen - offset >= len) | ||
173 | return *(u8 *)(data + offset); | ||
174 | if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) | ||
175 | return tmp; | ||
176 | } else { | ||
177 | ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len); | ||
178 | if (likely(ptr)) | ||
179 | return *(u8 *)ptr; | ||
180 | } | ||
181 | |||
182 | return -EFAULT; | ||
183 | } | ||
184 | |||
185 | BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb, | ||
186 | int, offset) | ||
187 | { | ||
188 | return ____bpf_skb_load_helper_8(skb, skb->data, skb->len - skb->data_len, | ||
189 | offset); | ||
190 | } | ||
191 | |||
192 | BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *, | ||
193 | data, int, headlen, int, offset) | ||
194 | { | ||
195 | u16 tmp, *ptr; | ||
196 | const int len = sizeof(tmp); | ||
197 | |||
198 | if (offset >= 0) { | ||
199 | if (headlen - offset >= len) | ||
200 | return get_unaligned_be16(data + offset); | ||
201 | if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) | ||
202 | return be16_to_cpu(tmp); | ||
203 | } else { | ||
204 | ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len); | ||
205 | if (likely(ptr)) | ||
206 | return get_unaligned_be16(ptr); | ||
207 | } | ||
208 | |||
209 | return -EFAULT; | ||
210 | } | ||
211 | |||
212 | BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb, | ||
213 | int, offset) | ||
214 | { | ||
215 | return ____bpf_skb_load_helper_16(skb, skb->data, skb->len - skb->data_len, | ||
216 | offset); | ||
217 | } | ||
218 | |||
219 | BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *, | ||
220 | data, int, headlen, int, offset) | ||
221 | { | ||
222 | u32 tmp, *ptr; | ||
223 | const int len = sizeof(tmp); | ||
224 | |||
225 | if (likely(offset >= 0)) { | ||
226 | if (headlen - offset >= len) | ||
227 | return get_unaligned_be32(data + offset); | ||
228 | if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) | ||
229 | return be32_to_cpu(tmp); | ||
230 | } else { | ||
231 | ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len); | ||
232 | if (likely(ptr)) | ||
233 | return get_unaligned_be32(ptr); | ||
234 | } | ||
235 | |||
236 | return -EFAULT; | ||
237 | } | ||
238 | |||
239 | BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb, | ||
240 | int, offset) | ||
241 | { | ||
242 | return ____bpf_skb_load_helper_32(skb, skb->data, skb->len - skb->data_len, | ||
243 | offset); | ||
244 | } | ||
245 | |||
246 | BPF_CALL_0(bpf_get_raw_cpu_id) | ||
165 | { | 247 | { |
166 | return raw_smp_processor_id(); | 248 | return raw_smp_processor_id(); |
167 | } | 249 | } |
168 | 250 | ||
169 | static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = { | 251 | static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = { |
170 | .func = __get_raw_cpu_id, | 252 | .func = bpf_get_raw_cpu_id, |
171 | .gpl_only = false, | 253 | .gpl_only = false, |
172 | .ret_type = RET_INTEGER, | 254 | .ret_type = RET_INTEGER, |
173 | }; | 255 | }; |
@@ -317,16 +399,16 @@ static bool convert_bpf_extensions(struct sock_filter *fp, | |||
317 | /* Emit call(arg1=CTX, arg2=A, arg3=X) */ | 399 | /* Emit call(arg1=CTX, arg2=A, arg3=X) */ |
318 | switch (fp->k) { | 400 | switch (fp->k) { |
319 | case SKF_AD_OFF + SKF_AD_PAY_OFFSET: | 401 | case SKF_AD_OFF + SKF_AD_PAY_OFFSET: |
320 | *insn = BPF_EMIT_CALL(__skb_get_pay_offset); | 402 | *insn = BPF_EMIT_CALL(bpf_skb_get_pay_offset); |
321 | break; | 403 | break; |
322 | case SKF_AD_OFF + SKF_AD_NLATTR: | 404 | case SKF_AD_OFF + SKF_AD_NLATTR: |
323 | *insn = BPF_EMIT_CALL(__skb_get_nlattr); | 405 | *insn = BPF_EMIT_CALL(bpf_skb_get_nlattr); |
324 | break; | 406 | break; |
325 | case SKF_AD_OFF + SKF_AD_NLATTR_NEST: | 407 | case SKF_AD_OFF + SKF_AD_NLATTR_NEST: |
326 | *insn = BPF_EMIT_CALL(__skb_get_nlattr_nest); | 408 | *insn = BPF_EMIT_CALL(bpf_skb_get_nlattr_nest); |
327 | break; | 409 | break; |
328 | case SKF_AD_OFF + SKF_AD_CPU: | 410 | case SKF_AD_OFF + SKF_AD_CPU: |
329 | *insn = BPF_EMIT_CALL(__get_raw_cpu_id); | 411 | *insn = BPF_EMIT_CALL(bpf_get_raw_cpu_id); |
330 | break; | 412 | break; |
331 | case SKF_AD_OFF + SKF_AD_RANDOM: | 413 | case SKF_AD_OFF + SKF_AD_RANDOM: |
332 | *insn = BPF_EMIT_CALL(bpf_user_rnd_u32); | 414 | *insn = BPF_EMIT_CALL(bpf_user_rnd_u32); |
@@ -353,26 +435,87 @@ static bool convert_bpf_extensions(struct sock_filter *fp, | |||
353 | return true; | 435 | return true; |
354 | } | 436 | } |
355 | 437 | ||
438 | static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp) | ||
439 | { | ||
440 | const bool unaligned_ok = IS_BUILTIN(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS); | ||
441 | int size = bpf_size_to_bytes(BPF_SIZE(fp->code)); | ||
442 | bool endian = BPF_SIZE(fp->code) == BPF_H || | ||
443 | BPF_SIZE(fp->code) == BPF_W; | ||
444 | bool indirect = BPF_MODE(fp->code) == BPF_IND; | ||
445 | const int ip_align = NET_IP_ALIGN; | ||
446 | struct bpf_insn *insn = *insnp; | ||
447 | int offset = fp->k; | ||
448 | |||
449 | if (!indirect && | ||
450 | ((unaligned_ok && offset >= 0) || | ||
451 | (!unaligned_ok && offset >= 0 && | ||
452 | offset + ip_align >= 0 && | ||
453 | offset + ip_align % size == 0))) { | ||
454 | *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H); | ||
455 | *insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset); | ||
456 | *insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP, size, 2 + endian); | ||
457 | *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A, BPF_REG_D, | ||
458 | offset); | ||
459 | if (endian) | ||
460 | *insn++ = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, size * 8); | ||
461 | *insn++ = BPF_JMP_A(8); | ||
462 | } | ||
463 | |||
464 | *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX); | ||
465 | *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_D); | ||
466 | *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_H); | ||
467 | if (!indirect) { | ||
468 | *insn++ = BPF_MOV64_IMM(BPF_REG_ARG4, offset); | ||
469 | } else { | ||
470 | *insn++ = BPF_MOV64_REG(BPF_REG_ARG4, BPF_REG_X); | ||
471 | if (fp->k) | ||
472 | *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG4, offset); | ||
473 | } | ||
474 | |||
475 | switch (BPF_SIZE(fp->code)) { | ||
476 | case BPF_B: | ||
477 | *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8); | ||
478 | break; | ||
479 | case BPF_H: | ||
480 | *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16); | ||
481 | break; | ||
482 | case BPF_W: | ||
483 | *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32); | ||
484 | break; | ||
485 | default: | ||
486 | return false; | ||
487 | } | ||
488 | |||
489 | *insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_A, 0, 2); | ||
490 | *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A); | ||
491 | *insn = BPF_EXIT_INSN(); | ||
492 | |||
493 | *insnp = insn; | ||
494 | return true; | ||
495 | } | ||
496 | |||
356 | /** | 497 | /** |
357 | * bpf_convert_filter - convert filter program | 498 | * bpf_convert_filter - convert filter program |
358 | * @prog: the user passed filter program | 499 | * @prog: the user passed filter program |
359 | * @len: the length of the user passed filter program | 500 | * @len: the length of the user passed filter program |
360 | * @new_prog: allocated 'struct bpf_prog' or NULL | 501 | * @new_prog: allocated 'struct bpf_prog' or NULL |
361 | * @new_len: pointer to store length of converted program | 502 | * @new_len: pointer to store length of converted program |
503 | * @seen_ld_abs: bool whether we've seen ld_abs/ind | ||
362 | * | 504 | * |
363 | * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn' | 505 | * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn' |
364 | * style extended BPF (eBPF). | 506 | * style extended BPF (eBPF). |
365 | * Conversion workflow: | 507 | * Conversion workflow: |
366 | * | 508 | * |
367 | * 1) First pass for calculating the new program length: | 509 | * 1) First pass for calculating the new program length: |
368 | * bpf_convert_filter(old_prog, old_len, NULL, &new_len) | 510 | * bpf_convert_filter(old_prog, old_len, NULL, &new_len, &seen_ld_abs) |
369 | * | 511 | * |
370 | * 2) 2nd pass to remap in two passes: 1st pass finds new | 512 | * 2) 2nd pass to remap in two passes: 1st pass finds new |
371 | * jump offsets, 2nd pass remapping: | 513 | * jump offsets, 2nd pass remapping: |
372 | * bpf_convert_filter(old_prog, old_len, new_prog, &new_len); | 514 | * bpf_convert_filter(old_prog, old_len, new_prog, &new_len, &seen_ld_abs) |
373 | */ | 515 | */ |
374 | static int bpf_convert_filter(struct sock_filter *prog, int len, | 516 | static int bpf_convert_filter(struct sock_filter *prog, int len, |
375 | struct bpf_prog *new_prog, int *new_len) | 517 | struct bpf_prog *new_prog, int *new_len, |
518 | bool *seen_ld_abs) | ||
376 | { | 519 | { |
377 | int new_flen = 0, pass = 0, target, i, stack_off; | 520 | int new_flen = 0, pass = 0, target, i, stack_off; |
378 | struct bpf_insn *new_insn, *first_insn = NULL; | 521 | struct bpf_insn *new_insn, *first_insn = NULL; |
@@ -411,12 +554,27 @@ do_pass: | |||
411 | * do this ourself. Initial CTX is present in BPF_REG_ARG1. | 554 | * do this ourself. Initial CTX is present in BPF_REG_ARG1. |
412 | */ | 555 | */ |
413 | *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1); | 556 | *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1); |
557 | if (*seen_ld_abs) { | ||
558 | /* For packet access in classic BPF, cache skb->data | ||
559 | * in callee-saved BPF R8 and skb->len - skb->data_len | ||
560 | * (headlen) in BPF R9. Since classic BPF is read-only | ||
561 | * on CTX, we only need to cache it once. | ||
562 | */ | ||
563 | *new_insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data), | ||
564 | BPF_REG_D, BPF_REG_CTX, | ||
565 | offsetof(struct sk_buff, data)); | ||
566 | *new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_H, BPF_REG_CTX, | ||
567 | offsetof(struct sk_buff, len)); | ||
568 | *new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_TMP, BPF_REG_CTX, | ||
569 | offsetof(struct sk_buff, data_len)); | ||
570 | *new_insn++ = BPF_ALU32_REG(BPF_SUB, BPF_REG_H, BPF_REG_TMP); | ||
571 | } | ||
414 | } else { | 572 | } else { |
415 | new_insn += 3; | 573 | new_insn += 3; |
416 | } | 574 | } |
417 | 575 | ||
418 | for (i = 0; i < len; fp++, i++) { | 576 | for (i = 0; i < len; fp++, i++) { |
419 | struct bpf_insn tmp_insns[6] = { }; | 577 | struct bpf_insn tmp_insns[32] = { }; |
420 | struct bpf_insn *insn = tmp_insns; | 578 | struct bpf_insn *insn = tmp_insns; |
421 | 579 | ||
422 | if (addrs) | 580 | if (addrs) |
@@ -459,6 +617,11 @@ do_pass: | |||
459 | BPF_MODE(fp->code) == BPF_ABS && | 617 | BPF_MODE(fp->code) == BPF_ABS && |
460 | convert_bpf_extensions(fp, &insn)) | 618 | convert_bpf_extensions(fp, &insn)) |
461 | break; | 619 | break; |
620 | if (BPF_CLASS(fp->code) == BPF_LD && | ||
621 | convert_bpf_ld_abs(fp, &insn)) { | ||
622 | *seen_ld_abs = true; | ||
623 | break; | ||
624 | } | ||
462 | 625 | ||
463 | if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) || | 626 | if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) || |
464 | fp->code == (BPF_ALU | BPF_MOD | BPF_X)) { | 627 | fp->code == (BPF_ALU | BPF_MOD | BPF_X)) { |
@@ -561,21 +724,31 @@ jmp_rest: | |||
561 | break; | 724 | break; |
562 | 725 | ||
563 | /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */ | 726 | /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */ |
564 | case BPF_LDX | BPF_MSH | BPF_B: | 727 | case BPF_LDX | BPF_MSH | BPF_B: { |
565 | /* tmp = A */ | 728 | struct sock_filter tmp = { |
566 | *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A); | 729 | .code = BPF_LD | BPF_ABS | BPF_B, |
730 | .k = fp->k, | ||
731 | }; | ||
732 | |||
733 | *seen_ld_abs = true; | ||
734 | |||
735 | /* X = A */ | ||
736 | *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); | ||
567 | /* A = BPF_R0 = *(u8 *) (skb->data + K) */ | 737 | /* A = BPF_R0 = *(u8 *) (skb->data + K) */ |
568 | *insn++ = BPF_LD_ABS(BPF_B, fp->k); | 738 | convert_bpf_ld_abs(&tmp, &insn); |
739 | insn++; | ||
569 | /* A &= 0xf */ | 740 | /* A &= 0xf */ |
570 | *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf); | 741 | *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf); |
571 | /* A <<= 2 */ | 742 | /* A <<= 2 */ |
572 | *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2); | 743 | *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2); |
744 | /* tmp = X */ | ||
745 | *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_X); | ||
573 | /* X = A */ | 746 | /* X = A */ |
574 | *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); | 747 | *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); |
575 | /* A = tmp */ | 748 | /* A = tmp */ |
576 | *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP); | 749 | *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP); |
577 | break; | 750 | break; |
578 | 751 | } | |
579 | /* RET_K is remaped into 2 insns. RET_A case doesn't need an | 752 | /* RET_K is remaped into 2 insns. RET_A case doesn't need an |
580 | * extra mov as BPF_REG_0 is already mapped into BPF_REG_A. | 753 | * extra mov as BPF_REG_0 is already mapped into BPF_REG_A. |
581 | */ | 754 | */ |
@@ -657,6 +830,8 @@ jmp_rest: | |||
657 | if (!new_prog) { | 830 | if (!new_prog) { |
658 | /* Only calculating new length. */ | 831 | /* Only calculating new length. */ |
659 | *new_len = new_insn - first_insn; | 832 | *new_len = new_insn - first_insn; |
833 | if (*seen_ld_abs) | ||
834 | *new_len += 4; /* Prologue bits. */ | ||
660 | return 0; | 835 | return 0; |
661 | } | 836 | } |
662 | 837 | ||
@@ -1018,6 +1193,7 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) | |||
1018 | struct sock_filter *old_prog; | 1193 | struct sock_filter *old_prog; |
1019 | struct bpf_prog *old_fp; | 1194 | struct bpf_prog *old_fp; |
1020 | int err, new_len, old_len = fp->len; | 1195 | int err, new_len, old_len = fp->len; |
1196 | bool seen_ld_abs = false; | ||
1021 | 1197 | ||
1022 | /* We are free to overwrite insns et al right here as it | 1198 | /* We are free to overwrite insns et al right here as it |
1023 | * won't be used at this point in time anymore internally | 1199 | * won't be used at this point in time anymore internally |
@@ -1039,7 +1215,8 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) | |||
1039 | } | 1215 | } |
1040 | 1216 | ||
1041 | /* 1st pass: calculate the new program length. */ | 1217 | /* 1st pass: calculate the new program length. */ |
1042 | err = bpf_convert_filter(old_prog, old_len, NULL, &new_len); | 1218 | err = bpf_convert_filter(old_prog, old_len, NULL, &new_len, |
1219 | &seen_ld_abs); | ||
1043 | if (err) | 1220 | if (err) |
1044 | goto out_err_free; | 1221 | goto out_err_free; |
1045 | 1222 | ||
@@ -1058,7 +1235,8 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) | |||
1058 | fp->len = new_len; | 1235 | fp->len = new_len; |
1059 | 1236 | ||
1060 | /* 2nd pass: remap sock_filter insns into bpf_insn insns. */ | 1237 | /* 2nd pass: remap sock_filter insns into bpf_insn insns. */ |
1061 | err = bpf_convert_filter(old_prog, old_len, fp, &new_len); | 1238 | err = bpf_convert_filter(old_prog, old_len, fp, &new_len, |
1239 | &seen_ld_abs); | ||
1062 | if (err) | 1240 | if (err) |
1063 | /* 2nd bpf_convert_filter() can fail only if it fails | 1241 | /* 2nd bpf_convert_filter() can fail only if it fails |
1064 | * to allocate memory, remapping must succeed. Note, | 1242 | * to allocate memory, remapping must succeed. Note, |
@@ -1506,6 +1684,47 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = { | |||
1506 | .arg4_type = ARG_CONST_SIZE, | 1684 | .arg4_type = ARG_CONST_SIZE, |
1507 | }; | 1685 | }; |
1508 | 1686 | ||
1687 | BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb, | ||
1688 | u32, offset, void *, to, u32, len, u32, start_header) | ||
1689 | { | ||
1690 | u8 *ptr; | ||
1691 | |||
1692 | if (unlikely(offset > 0xffff || len > skb_headlen(skb))) | ||
1693 | goto err_clear; | ||
1694 | |||
1695 | switch (start_header) { | ||
1696 | case BPF_HDR_START_MAC: | ||
1697 | ptr = skb_mac_header(skb) + offset; | ||
1698 | break; | ||
1699 | case BPF_HDR_START_NET: | ||
1700 | ptr = skb_network_header(skb) + offset; | ||
1701 | break; | ||
1702 | default: | ||
1703 | goto err_clear; | ||
1704 | } | ||
1705 | |||
1706 | if (likely(ptr >= skb_mac_header(skb) && | ||
1707 | ptr + len <= skb_tail_pointer(skb))) { | ||
1708 | memcpy(to, ptr, len); | ||
1709 | return 0; | ||
1710 | } | ||
1711 | |||
1712 | err_clear: | ||
1713 | memset(to, 0, len); | ||
1714 | return -EFAULT; | ||
1715 | } | ||
1716 | |||
1717 | static const struct bpf_func_proto bpf_skb_load_bytes_relative_proto = { | ||
1718 | .func = bpf_skb_load_bytes_relative, | ||
1719 | .gpl_only = false, | ||
1720 | .ret_type = RET_INTEGER, | ||
1721 | .arg1_type = ARG_PTR_TO_CTX, | ||
1722 | .arg2_type = ARG_ANYTHING, | ||
1723 | .arg3_type = ARG_PTR_TO_UNINIT_MEM, | ||
1724 | .arg4_type = ARG_CONST_SIZE, | ||
1725 | .arg5_type = ARG_ANYTHING, | ||
1726 | }; | ||
1727 | |||
1509 | BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len) | 1728 | BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len) |
1510 | { | 1729 | { |
1511 | /* Idea is the following: should the needed direct read/write | 1730 | /* Idea is the following: should the needed direct read/write |
@@ -2180,7 +2399,7 @@ BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto, | |||
2180 | return ret; | 2399 | return ret; |
2181 | } | 2400 | } |
2182 | 2401 | ||
2183 | const struct bpf_func_proto bpf_skb_vlan_push_proto = { | 2402 | static const struct bpf_func_proto bpf_skb_vlan_push_proto = { |
2184 | .func = bpf_skb_vlan_push, | 2403 | .func = bpf_skb_vlan_push, |
2185 | .gpl_only = false, | 2404 | .gpl_only = false, |
2186 | .ret_type = RET_INTEGER, | 2405 | .ret_type = RET_INTEGER, |
@@ -2188,7 +2407,6 @@ const struct bpf_func_proto bpf_skb_vlan_push_proto = { | |||
2188 | .arg2_type = ARG_ANYTHING, | 2407 | .arg2_type = ARG_ANYTHING, |
2189 | .arg3_type = ARG_ANYTHING, | 2408 | .arg3_type = ARG_ANYTHING, |
2190 | }; | 2409 | }; |
2191 | EXPORT_SYMBOL_GPL(bpf_skb_vlan_push_proto); | ||
2192 | 2410 | ||
2193 | BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb) | 2411 | BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb) |
2194 | { | 2412 | { |
@@ -2202,13 +2420,12 @@ BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb) | |||
2202 | return ret; | 2420 | return ret; |
2203 | } | 2421 | } |
2204 | 2422 | ||
2205 | const struct bpf_func_proto bpf_skb_vlan_pop_proto = { | 2423 | static const struct bpf_func_proto bpf_skb_vlan_pop_proto = { |
2206 | .func = bpf_skb_vlan_pop, | 2424 | .func = bpf_skb_vlan_pop, |
2207 | .gpl_only = false, | 2425 | .gpl_only = false, |
2208 | .ret_type = RET_INTEGER, | 2426 | .ret_type = RET_INTEGER, |
2209 | .arg1_type = ARG_PTR_TO_CTX, | 2427 | .arg1_type = ARG_PTR_TO_CTX, |
2210 | }; | 2428 | }; |
2211 | EXPORT_SYMBOL_GPL(bpf_skb_vlan_pop_proto); | ||
2212 | 2429 | ||
2213 | static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len) | 2430 | static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len) |
2214 | { | 2431 | { |
@@ -2801,7 +3018,8 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, | |||
2801 | { | 3018 | { |
2802 | int err; | 3019 | int err; |
2803 | 3020 | ||
2804 | if (map->map_type == BPF_MAP_TYPE_DEVMAP) { | 3021 | switch (map->map_type) { |
3022 | case BPF_MAP_TYPE_DEVMAP: { | ||
2805 | struct net_device *dev = fwd; | 3023 | struct net_device *dev = fwd; |
2806 | struct xdp_frame *xdpf; | 3024 | struct xdp_frame *xdpf; |
2807 | 3025 | ||
@@ -2819,14 +3037,25 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, | |||
2819 | if (err) | 3037 | if (err) |
2820 | return err; | 3038 | return err; |
2821 | __dev_map_insert_ctx(map, index); | 3039 | __dev_map_insert_ctx(map, index); |
2822 | 3040 | break; | |
2823 | } else if (map->map_type == BPF_MAP_TYPE_CPUMAP) { | 3041 | } |
3042 | case BPF_MAP_TYPE_CPUMAP: { | ||
2824 | struct bpf_cpu_map_entry *rcpu = fwd; | 3043 | struct bpf_cpu_map_entry *rcpu = fwd; |
2825 | 3044 | ||
2826 | err = cpu_map_enqueue(rcpu, xdp, dev_rx); | 3045 | err = cpu_map_enqueue(rcpu, xdp, dev_rx); |
2827 | if (err) | 3046 | if (err) |
2828 | return err; | 3047 | return err; |
2829 | __cpu_map_insert_ctx(map, index); | 3048 | __cpu_map_insert_ctx(map, index); |
3049 | break; | ||
3050 | } | ||
3051 | case BPF_MAP_TYPE_XSKMAP: { | ||
3052 | struct xdp_sock *xs = fwd; | ||
3053 | |||
3054 | err = __xsk_map_redirect(map, xdp, xs); | ||
3055 | return err; | ||
3056 | } | ||
3057 | default: | ||
3058 | break; | ||
2830 | } | 3059 | } |
2831 | return 0; | 3060 | return 0; |
2832 | } | 3061 | } |
@@ -2845,6 +3074,9 @@ void xdp_do_flush_map(void) | |||
2845 | case BPF_MAP_TYPE_CPUMAP: | 3074 | case BPF_MAP_TYPE_CPUMAP: |
2846 | __cpu_map_flush(map); | 3075 | __cpu_map_flush(map); |
2847 | break; | 3076 | break; |
3077 | case BPF_MAP_TYPE_XSKMAP: | ||
3078 | __xsk_map_flush(map); | ||
3079 | break; | ||
2848 | default: | 3080 | default: |
2849 | break; | 3081 | break; |
2850 | } | 3082 | } |
@@ -2859,6 +3091,8 @@ static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index) | |||
2859 | return __dev_map_lookup_elem(map, index); | 3091 | return __dev_map_lookup_elem(map, index); |
2860 | case BPF_MAP_TYPE_CPUMAP: | 3092 | case BPF_MAP_TYPE_CPUMAP: |
2861 | return __cpu_map_lookup_elem(map, index); | 3093 | return __cpu_map_lookup_elem(map, index); |
3094 | case BPF_MAP_TYPE_XSKMAP: | ||
3095 | return __xsk_map_lookup_elem(map, index); | ||
2862 | default: | 3096 | default: |
2863 | return NULL; | 3097 | return NULL; |
2864 | } | 3098 | } |
@@ -2956,13 +3190,14 @@ static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd) | |||
2956 | 3190 | ||
2957 | static int xdp_do_generic_redirect_map(struct net_device *dev, | 3191 | static int xdp_do_generic_redirect_map(struct net_device *dev, |
2958 | struct sk_buff *skb, | 3192 | struct sk_buff *skb, |
3193 | struct xdp_buff *xdp, | ||
2959 | struct bpf_prog *xdp_prog) | 3194 | struct bpf_prog *xdp_prog) |
2960 | { | 3195 | { |
2961 | struct redirect_info *ri = this_cpu_ptr(&redirect_info); | 3196 | struct redirect_info *ri = this_cpu_ptr(&redirect_info); |
2962 | unsigned long map_owner = ri->map_owner; | 3197 | unsigned long map_owner = ri->map_owner; |
2963 | struct bpf_map *map = ri->map; | 3198 | struct bpf_map *map = ri->map; |
2964 | struct net_device *fwd = NULL; | ||
2965 | u32 index = ri->ifindex; | 3199 | u32 index = ri->ifindex; |
3200 | void *fwd = NULL; | ||
2966 | int err = 0; | 3201 | int err = 0; |
2967 | 3202 | ||
2968 | ri->ifindex = 0; | 3203 | ri->ifindex = 0; |
@@ -2984,6 +3219,14 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, | |||
2984 | if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd)))) | 3219 | if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd)))) |
2985 | goto err; | 3220 | goto err; |
2986 | skb->dev = fwd; | 3221 | skb->dev = fwd; |
3222 | generic_xdp_tx(skb, xdp_prog); | ||
3223 | } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) { | ||
3224 | struct xdp_sock *xs = fwd; | ||
3225 | |||
3226 | err = xsk_generic_rcv(xs, xdp); | ||
3227 | if (err) | ||
3228 | goto err; | ||
3229 | consume_skb(skb); | ||
2987 | } else { | 3230 | } else { |
2988 | /* TODO: Handle BPF_MAP_TYPE_CPUMAP */ | 3231 | /* TODO: Handle BPF_MAP_TYPE_CPUMAP */ |
2989 | err = -EBADRQC; | 3232 | err = -EBADRQC; |
@@ -2998,7 +3241,7 @@ err: | |||
2998 | } | 3241 | } |
2999 | 3242 | ||
3000 | int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, | 3243 | int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, |
3001 | struct bpf_prog *xdp_prog) | 3244 | struct xdp_buff *xdp, struct bpf_prog *xdp_prog) |
3002 | { | 3245 | { |
3003 | struct redirect_info *ri = this_cpu_ptr(&redirect_info); | 3246 | struct redirect_info *ri = this_cpu_ptr(&redirect_info); |
3004 | u32 index = ri->ifindex; | 3247 | u32 index = ri->ifindex; |
@@ -3006,7 +3249,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, | |||
3006 | int err = 0; | 3249 | int err = 0; |
3007 | 3250 | ||
3008 | if (ri->map) | 3251 | if (ri->map) |
3009 | return xdp_do_generic_redirect_map(dev, skb, xdp_prog); | 3252 | return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog); |
3010 | 3253 | ||
3011 | ri->ifindex = 0; | 3254 | ri->ifindex = 0; |
3012 | fwd = dev_get_by_index_rcu(dev_net(dev), index); | 3255 | fwd = dev_get_by_index_rcu(dev_net(dev), index); |
@@ -3020,6 +3263,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, | |||
3020 | 3263 | ||
3021 | skb->dev = fwd; | 3264 | skb->dev = fwd; |
3022 | _trace_xdp_redirect(dev, xdp_prog, index); | 3265 | _trace_xdp_redirect(dev, xdp_prog, index); |
3266 | generic_xdp_tx(skb, xdp_prog); | ||
3023 | return 0; | 3267 | return 0; |
3024 | err: | 3268 | err: |
3025 | _trace_xdp_redirect_err(dev, xdp_prog, index, err); | 3269 | _trace_xdp_redirect_err(dev, xdp_prog, index, err); |
@@ -3858,6 +4102,8 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | |||
3858 | switch (func_id) { | 4102 | switch (func_id) { |
3859 | case BPF_FUNC_skb_load_bytes: | 4103 | case BPF_FUNC_skb_load_bytes: |
3860 | return &bpf_skb_load_bytes_proto; | 4104 | return &bpf_skb_load_bytes_proto; |
4105 | case BPF_FUNC_skb_load_bytes_relative: | ||
4106 | return &bpf_skb_load_bytes_relative_proto; | ||
3861 | case BPF_FUNC_get_socket_cookie: | 4107 | case BPF_FUNC_get_socket_cookie: |
3862 | return &bpf_get_socket_cookie_proto; | 4108 | return &bpf_get_socket_cookie_proto; |
3863 | case BPF_FUNC_get_socket_uid: | 4109 | case BPF_FUNC_get_socket_uid: |
@@ -3875,6 +4121,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | |||
3875 | return &bpf_skb_store_bytes_proto; | 4121 | return &bpf_skb_store_bytes_proto; |
3876 | case BPF_FUNC_skb_load_bytes: | 4122 | case BPF_FUNC_skb_load_bytes: |
3877 | return &bpf_skb_load_bytes_proto; | 4123 | return &bpf_skb_load_bytes_proto; |
4124 | case BPF_FUNC_skb_load_bytes_relative: | ||
4125 | return &bpf_skb_load_bytes_relative_proto; | ||
3878 | case BPF_FUNC_skb_pull_data: | 4126 | case BPF_FUNC_skb_pull_data: |
3879 | return &bpf_skb_pull_data_proto; | 4127 | return &bpf_skb_pull_data_proto; |
3880 | case BPF_FUNC_csum_diff: | 4128 | case BPF_FUNC_csum_diff: |
@@ -4304,6 +4552,41 @@ static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write, | |||
4304 | return insn - insn_buf; | 4552 | return insn - insn_buf; |
4305 | } | 4553 | } |
4306 | 4554 | ||
4555 | static int bpf_gen_ld_abs(const struct bpf_insn *orig, | ||
4556 | struct bpf_insn *insn_buf) | ||
4557 | { | ||
4558 | bool indirect = BPF_MODE(orig->code) == BPF_IND; | ||
4559 | struct bpf_insn *insn = insn_buf; | ||
4560 | |||
4561 | /* We're guaranteed here that CTX is in R6. */ | ||
4562 | *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_CTX); | ||
4563 | if (!indirect) { | ||
4564 | *insn++ = BPF_MOV64_IMM(BPF_REG_2, orig->imm); | ||
4565 | } else { | ||
4566 | *insn++ = BPF_MOV64_REG(BPF_REG_2, orig->src_reg); | ||
4567 | if (orig->imm) | ||
4568 | *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, orig->imm); | ||
4569 | } | ||
4570 | |||
4571 | switch (BPF_SIZE(orig->code)) { | ||
4572 | case BPF_B: | ||
4573 | *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8_no_cache); | ||
4574 | break; | ||
4575 | case BPF_H: | ||
4576 | *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16_no_cache); | ||
4577 | break; | ||
4578 | case BPF_W: | ||
4579 | *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32_no_cache); | ||
4580 | break; | ||
4581 | } | ||
4582 | |||
4583 | *insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 2); | ||
4584 | *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0); | ||
4585 | *insn++ = BPF_EXIT_INSN(); | ||
4586 | |||
4587 | return insn - insn_buf; | ||
4588 | } | ||
4589 | |||
4307 | static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write, | 4590 | static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write, |
4308 | const struct bpf_prog *prog) | 4591 | const struct bpf_prog *prog) |
4309 | { | 4592 | { |
@@ -5573,6 +5856,7 @@ const struct bpf_verifier_ops sk_filter_verifier_ops = { | |||
5573 | .get_func_proto = sk_filter_func_proto, | 5856 | .get_func_proto = sk_filter_func_proto, |
5574 | .is_valid_access = sk_filter_is_valid_access, | 5857 | .is_valid_access = sk_filter_is_valid_access, |
5575 | .convert_ctx_access = bpf_convert_ctx_access, | 5858 | .convert_ctx_access = bpf_convert_ctx_access, |
5859 | .gen_ld_abs = bpf_gen_ld_abs, | ||
5576 | }; | 5860 | }; |
5577 | 5861 | ||
5578 | const struct bpf_prog_ops sk_filter_prog_ops = { | 5862 | const struct bpf_prog_ops sk_filter_prog_ops = { |
@@ -5584,6 +5868,7 @@ const struct bpf_verifier_ops tc_cls_act_verifier_ops = { | |||
5584 | .is_valid_access = tc_cls_act_is_valid_access, | 5868 | .is_valid_access = tc_cls_act_is_valid_access, |
5585 | .convert_ctx_access = tc_cls_act_convert_ctx_access, | 5869 | .convert_ctx_access = tc_cls_act_convert_ctx_access, |
5586 | .gen_prologue = tc_cls_act_prologue, | 5870 | .gen_prologue = tc_cls_act_prologue, |
5871 | .gen_ld_abs = bpf_gen_ld_abs, | ||
5587 | }; | 5872 | }; |
5588 | 5873 | ||
5589 | const struct bpf_prog_ops tc_cls_act_prog_ops = { | 5874 | const struct bpf_prog_ops tc_cls_act_prog_ops = { |
diff --git a/net/core/sock.c b/net/core/sock.c index b2c3db169ca1..e7d8b6c955c6 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -226,7 +226,8 @@ static struct lock_class_key af_family_kern_slock_keys[AF_MAX]; | |||
226 | x "AF_RXRPC" , x "AF_ISDN" , x "AF_PHONET" , \ | 226 | x "AF_RXRPC" , x "AF_ISDN" , x "AF_PHONET" , \ |
227 | x "AF_IEEE802154", x "AF_CAIF" , x "AF_ALG" , \ | 227 | x "AF_IEEE802154", x "AF_CAIF" , x "AF_ALG" , \ |
228 | x "AF_NFC" , x "AF_VSOCK" , x "AF_KCM" , \ | 228 | x "AF_NFC" , x "AF_VSOCK" , x "AF_KCM" , \ |
229 | x "AF_QIPCRTR", x "AF_SMC" , x "AF_MAX" | 229 | x "AF_QIPCRTR", x "AF_SMC" , x "AF_XDP" , \ |
230 | x "AF_MAX" | ||
230 | 231 | ||
231 | static const char *const af_family_key_strings[AF_MAX+1] = { | 232 | static const char *const af_family_key_strings[AF_MAX+1] = { |
232 | _sock_locks("sk_lock-") | 233 | _sock_locks("sk_lock-") |
@@ -262,7 +263,8 @@ static const char *const af_family_rlock_key_strings[AF_MAX+1] = { | |||
262 | "rlock-AF_RXRPC" , "rlock-AF_ISDN" , "rlock-AF_PHONET" , | 263 | "rlock-AF_RXRPC" , "rlock-AF_ISDN" , "rlock-AF_PHONET" , |
263 | "rlock-AF_IEEE802154", "rlock-AF_CAIF" , "rlock-AF_ALG" , | 264 | "rlock-AF_IEEE802154", "rlock-AF_CAIF" , "rlock-AF_ALG" , |
264 | "rlock-AF_NFC" , "rlock-AF_VSOCK" , "rlock-AF_KCM" , | 265 | "rlock-AF_NFC" , "rlock-AF_VSOCK" , "rlock-AF_KCM" , |
265 | "rlock-AF_QIPCRTR", "rlock-AF_SMC" , "rlock-AF_MAX" | 266 | "rlock-AF_QIPCRTR", "rlock-AF_SMC" , "rlock-AF_XDP" , |
267 | "rlock-AF_MAX" | ||
266 | }; | 268 | }; |
267 | static const char *const af_family_wlock_key_strings[AF_MAX+1] = { | 269 | static const char *const af_family_wlock_key_strings[AF_MAX+1] = { |
268 | "wlock-AF_UNSPEC", "wlock-AF_UNIX" , "wlock-AF_INET" , | 270 | "wlock-AF_UNSPEC", "wlock-AF_UNIX" , "wlock-AF_INET" , |
@@ -279,7 +281,8 @@ static const char *const af_family_wlock_key_strings[AF_MAX+1] = { | |||
279 | "wlock-AF_RXRPC" , "wlock-AF_ISDN" , "wlock-AF_PHONET" , | 281 | "wlock-AF_RXRPC" , "wlock-AF_ISDN" , "wlock-AF_PHONET" , |
280 | "wlock-AF_IEEE802154", "wlock-AF_CAIF" , "wlock-AF_ALG" , | 282 | "wlock-AF_IEEE802154", "wlock-AF_CAIF" , "wlock-AF_ALG" , |
281 | "wlock-AF_NFC" , "wlock-AF_VSOCK" , "wlock-AF_KCM" , | 283 | "wlock-AF_NFC" , "wlock-AF_VSOCK" , "wlock-AF_KCM" , |
282 | "wlock-AF_QIPCRTR", "wlock-AF_SMC" , "wlock-AF_MAX" | 284 | "wlock-AF_QIPCRTR", "wlock-AF_SMC" , "wlock-AF_XDP" , |
285 | "wlock-AF_MAX" | ||
283 | }; | 286 | }; |
284 | static const char *const af_family_elock_key_strings[AF_MAX+1] = { | 287 | static const char *const af_family_elock_key_strings[AF_MAX+1] = { |
285 | "elock-AF_UNSPEC", "elock-AF_UNIX" , "elock-AF_INET" , | 288 | "elock-AF_UNSPEC", "elock-AF_UNIX" , "elock-AF_INET" , |
@@ -296,7 +299,8 @@ static const char *const af_family_elock_key_strings[AF_MAX+1] = { | |||
296 | "elock-AF_RXRPC" , "elock-AF_ISDN" , "elock-AF_PHONET" , | 299 | "elock-AF_RXRPC" , "elock-AF_ISDN" , "elock-AF_PHONET" , |
297 | "elock-AF_IEEE802154", "elock-AF_CAIF" , "elock-AF_ALG" , | 300 | "elock-AF_IEEE802154", "elock-AF_CAIF" , "elock-AF_ALG" , |
298 | "elock-AF_NFC" , "elock-AF_VSOCK" , "elock-AF_KCM" , | 301 | "elock-AF_NFC" , "elock-AF_VSOCK" , "elock-AF_KCM" , |
299 | "elock-AF_QIPCRTR", "elock-AF_SMC" , "elock-AF_MAX" | 302 | "elock-AF_QIPCRTR", "elock-AF_SMC" , "elock-AF_XDP" , |
303 | "elock-AF_MAX" | ||
300 | }; | 304 | }; |
301 | 305 | ||
302 | /* | 306 | /* |
diff --git a/net/core/xdp.c b/net/core/xdp.c index 0c86b53a3a63..bf6758f74339 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c | |||
@@ -308,11 +308,9 @@ err: | |||
308 | } | 308 | } |
309 | EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); | 309 | EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); |
310 | 310 | ||
311 | void xdp_return_frame(struct xdp_frame *xdpf) | 311 | static void xdp_return(void *data, struct xdp_mem_info *mem) |
312 | { | 312 | { |
313 | struct xdp_mem_info *mem = &xdpf->mem; | ||
314 | struct xdp_mem_allocator *xa; | 313 | struct xdp_mem_allocator *xa; |
315 | void *data = xdpf->data; | ||
316 | struct page *page; | 314 | struct page *page; |
317 | 315 | ||
318 | switch (mem->type) { | 316 | switch (mem->type) { |
@@ -339,4 +337,15 @@ void xdp_return_frame(struct xdp_frame *xdpf) | |||
339 | break; | 337 | break; |
340 | } | 338 | } |
341 | } | 339 | } |
340 | |||
341 | void xdp_return_frame(struct xdp_frame *xdpf) | ||
342 | { | ||
343 | xdp_return(xdpf->data, &xdpf->mem); | ||
344 | } | ||
342 | EXPORT_SYMBOL_GPL(xdp_return_frame); | 345 | EXPORT_SYMBOL_GPL(xdp_return_frame); |
346 | |||
347 | void xdp_return_buff(struct xdp_buff *xdp) | ||
348 | { | ||
349 | xdp_return(xdp->data, &xdp->rxq->mem); | ||
350 | } | ||
351 | EXPORT_SYMBOL_GPL(xdp_return_buff); | ||
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 01f3515cada0..611a26d5235c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
@@ -209,7 +209,7 @@ static void prb_clear_rxhash(struct tpacket_kbdq_core *, | |||
209 | static void prb_fill_vlan_info(struct tpacket_kbdq_core *, | 209 | static void prb_fill_vlan_info(struct tpacket_kbdq_core *, |
210 | struct tpacket3_hdr *); | 210 | struct tpacket3_hdr *); |
211 | static void packet_flush_mclist(struct sock *sk); | 211 | static void packet_flush_mclist(struct sock *sk); |
212 | static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb); | 212 | static u16 packet_pick_tx_queue(struct sk_buff *skb); |
213 | 213 | ||
214 | struct packet_skb_cb { | 214 | struct packet_skb_cb { |
215 | union { | 215 | union { |
@@ -243,40 +243,7 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po); | |||
243 | 243 | ||
244 | static int packet_direct_xmit(struct sk_buff *skb) | 244 | static int packet_direct_xmit(struct sk_buff *skb) |
245 | { | 245 | { |
246 | struct net_device *dev = skb->dev; | 246 | return dev_direct_xmit(skb, packet_pick_tx_queue(skb)); |
247 | struct sk_buff *orig_skb = skb; | ||
248 | struct netdev_queue *txq; | ||
249 | int ret = NETDEV_TX_BUSY; | ||
250 | bool again = false; | ||
251 | |||
252 | if (unlikely(!netif_running(dev) || | ||
253 | !netif_carrier_ok(dev))) | ||
254 | goto drop; | ||
255 | |||
256 | skb = validate_xmit_skb_list(skb, dev, &again); | ||
257 | if (skb != orig_skb) | ||
258 | goto drop; | ||
259 | |||
260 | packet_pick_tx_queue(dev, skb); | ||
261 | txq = skb_get_tx_queue(dev, skb); | ||
262 | |||
263 | local_bh_disable(); | ||
264 | |||
265 | HARD_TX_LOCK(dev, txq, smp_processor_id()); | ||
266 | if (!netif_xmit_frozen_or_drv_stopped(txq)) | ||
267 | ret = netdev_start_xmit(skb, dev, txq, false); | ||
268 | HARD_TX_UNLOCK(dev, txq); | ||
269 | |||
270 | local_bh_enable(); | ||
271 | |||
272 | if (!dev_xmit_complete(ret)) | ||
273 | kfree_skb(skb); | ||
274 | |||
275 | return ret; | ||
276 | drop: | ||
277 | atomic_long_inc(&dev->tx_dropped); | ||
278 | kfree_skb_list(skb); | ||
279 | return NET_XMIT_DROP; | ||
280 | } | 247 | } |
281 | 248 | ||
282 | static struct net_device *packet_cached_dev_get(struct packet_sock *po) | 249 | static struct net_device *packet_cached_dev_get(struct packet_sock *po) |
@@ -313,8 +280,9 @@ static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) | |||
313 | return (u16) raw_smp_processor_id() % dev->real_num_tx_queues; | 280 | return (u16) raw_smp_processor_id() % dev->real_num_tx_queues; |
314 | } | 281 | } |
315 | 282 | ||
316 | static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) | 283 | static u16 packet_pick_tx_queue(struct sk_buff *skb) |
317 | { | 284 | { |
285 | struct net_device *dev = skb->dev; | ||
318 | const struct net_device_ops *ops = dev->netdev_ops; | 286 | const struct net_device_ops *ops = dev->netdev_ops; |
319 | u16 queue_index; | 287 | u16 queue_index; |
320 | 288 | ||
@@ -326,7 +294,7 @@ static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) | |||
326 | queue_index = __packet_pick_tx_queue(dev, skb); | 294 | queue_index = __packet_pick_tx_queue(dev, skb); |
327 | } | 295 | } |
328 | 296 | ||
329 | skb_set_queue_mapping(skb, queue_index); | 297 | return queue_index; |
330 | } | 298 | } |
331 | 299 | ||
332 | /* __register_prot_hook must be invoked through register_prot_hook | 300 | /* __register_prot_hook must be invoked through register_prot_hook |
diff --git a/net/xdp/Kconfig b/net/xdp/Kconfig new file mode 100644 index 000000000000..90e4a7152854 --- /dev/null +++ b/net/xdp/Kconfig | |||
@@ -0,0 +1,7 @@ | |||
1 | config XDP_SOCKETS | ||
2 | bool "XDP sockets" | ||
3 | depends on BPF_SYSCALL | ||
4 | default n | ||
5 | help | ||
6 | XDP sockets allows a channel between XDP programs and | ||
7 | userspace applications. | ||
diff --git a/net/xdp/Makefile b/net/xdp/Makefile new file mode 100644 index 000000000000..074fb2b2d51c --- /dev/null +++ b/net/xdp/Makefile | |||
@@ -0,0 +1,2 @@ | |||
1 | obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o | ||
2 | |||
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c new file mode 100644 index 000000000000..881dfdefe235 --- /dev/null +++ b/net/xdp/xdp_umem.c | |||
@@ -0,0 +1,260 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* XDP user-space packet buffer | ||
3 | * Copyright(c) 2018 Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | #include <linux/init.h> | ||
16 | #include <linux/sched/mm.h> | ||
17 | #include <linux/sched/signal.h> | ||
18 | #include <linux/sched/task.h> | ||
19 | #include <linux/uaccess.h> | ||
20 | #include <linux/slab.h> | ||
21 | #include <linux/bpf.h> | ||
22 | #include <linux/mm.h> | ||
23 | |||
24 | #include "xdp_umem.h" | ||
25 | |||
26 | #define XDP_UMEM_MIN_FRAME_SIZE 2048 | ||
27 | |||
28 | int xdp_umem_create(struct xdp_umem **umem) | ||
29 | { | ||
30 | *umem = kzalloc(sizeof(**umem), GFP_KERNEL); | ||
31 | |||
32 | if (!(*umem)) | ||
33 | return -ENOMEM; | ||
34 | |||
35 | return 0; | ||
36 | } | ||
37 | |||
38 | static void xdp_umem_unpin_pages(struct xdp_umem *umem) | ||
39 | { | ||
40 | unsigned int i; | ||
41 | |||
42 | if (umem->pgs) { | ||
43 | for (i = 0; i < umem->npgs; i++) { | ||
44 | struct page *page = umem->pgs[i]; | ||
45 | |||
46 | set_page_dirty_lock(page); | ||
47 | put_page(page); | ||
48 | } | ||
49 | |||
50 | kfree(umem->pgs); | ||
51 | umem->pgs = NULL; | ||
52 | } | ||
53 | } | ||
54 | |||
55 | static void xdp_umem_unaccount_pages(struct xdp_umem *umem) | ||
56 | { | ||
57 | if (umem->user) { | ||
58 | atomic_long_sub(umem->npgs, &umem->user->locked_vm); | ||
59 | free_uid(umem->user); | ||
60 | } | ||
61 | } | ||
62 | |||
63 | static void xdp_umem_release(struct xdp_umem *umem) | ||
64 | { | ||
65 | struct task_struct *task; | ||
66 | struct mm_struct *mm; | ||
67 | |||
68 | if (umem->fq) { | ||
69 | xskq_destroy(umem->fq); | ||
70 | umem->fq = NULL; | ||
71 | } | ||
72 | |||
73 | if (umem->cq) { | ||
74 | xskq_destroy(umem->cq); | ||
75 | umem->cq = NULL; | ||
76 | } | ||
77 | |||
78 | if (umem->pgs) { | ||
79 | xdp_umem_unpin_pages(umem); | ||
80 | |||
81 | task = get_pid_task(umem->pid, PIDTYPE_PID); | ||
82 | put_pid(umem->pid); | ||
83 | if (!task) | ||
84 | goto out; | ||
85 | mm = get_task_mm(task); | ||
86 | put_task_struct(task); | ||
87 | if (!mm) | ||
88 | goto out; | ||
89 | |||
90 | mmput(mm); | ||
91 | umem->pgs = NULL; | ||
92 | } | ||
93 | |||
94 | xdp_umem_unaccount_pages(umem); | ||
95 | out: | ||
96 | kfree(umem); | ||
97 | } | ||
98 | |||
99 | static void xdp_umem_release_deferred(struct work_struct *work) | ||
100 | { | ||
101 | struct xdp_umem *umem = container_of(work, struct xdp_umem, work); | ||
102 | |||
103 | xdp_umem_release(umem); | ||
104 | } | ||
105 | |||
106 | void xdp_get_umem(struct xdp_umem *umem) | ||
107 | { | ||
108 | atomic_inc(&umem->users); | ||
109 | } | ||
110 | |||
111 | void xdp_put_umem(struct xdp_umem *umem) | ||
112 | { | ||
113 | if (!umem) | ||
114 | return; | ||
115 | |||
116 | if (atomic_dec_and_test(&umem->users)) { | ||
117 | INIT_WORK(&umem->work, xdp_umem_release_deferred); | ||
118 | schedule_work(&umem->work); | ||
119 | } | ||
120 | } | ||
121 | |||
122 | static int xdp_umem_pin_pages(struct xdp_umem *umem) | ||
123 | { | ||
124 | unsigned int gup_flags = FOLL_WRITE; | ||
125 | long npgs; | ||
126 | int err; | ||
127 | |||
128 | umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL); | ||
129 | if (!umem->pgs) | ||
130 | return -ENOMEM; | ||
131 | |||
132 | down_write(¤t->mm->mmap_sem); | ||
133 | npgs = get_user_pages(umem->address, umem->npgs, | ||
134 | gup_flags, &umem->pgs[0], NULL); | ||
135 | up_write(¤t->mm->mmap_sem); | ||
136 | |||
137 | if (npgs != umem->npgs) { | ||
138 | if (npgs >= 0) { | ||
139 | umem->npgs = npgs; | ||
140 | err = -ENOMEM; | ||
141 | goto out_pin; | ||
142 | } | ||
143 | err = npgs; | ||
144 | goto out_pgs; | ||
145 | } | ||
146 | return 0; | ||
147 | |||
148 | out_pin: | ||
149 | xdp_umem_unpin_pages(umem); | ||
150 | out_pgs: | ||
151 | kfree(umem->pgs); | ||
152 | umem->pgs = NULL; | ||
153 | return err; | ||
154 | } | ||
155 | |||
156 | static int xdp_umem_account_pages(struct xdp_umem *umem) | ||
157 | { | ||
158 | unsigned long lock_limit, new_npgs, old_npgs; | ||
159 | |||
160 | if (capable(CAP_IPC_LOCK)) | ||
161 | return 0; | ||
162 | |||
163 | lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; | ||
164 | umem->user = get_uid(current_user()); | ||
165 | |||
166 | do { | ||
167 | old_npgs = atomic_long_read(&umem->user->locked_vm); | ||
168 | new_npgs = old_npgs + umem->npgs; | ||
169 | if (new_npgs > lock_limit) { | ||
170 | free_uid(umem->user); | ||
171 | umem->user = NULL; | ||
172 | return -ENOBUFS; | ||
173 | } | ||
174 | } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs, | ||
175 | new_npgs) != old_npgs); | ||
176 | return 0; | ||
177 | } | ||
178 | |||
179 | int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) | ||
180 | { | ||
181 | u32 frame_size = mr->frame_size, frame_headroom = mr->frame_headroom; | ||
182 | u64 addr = mr->addr, size = mr->len; | ||
183 | unsigned int nframes, nfpp; | ||
184 | int size_chk, err; | ||
185 | |||
186 | if (!umem) | ||
187 | return -EINVAL; | ||
188 | |||
189 | if (frame_size < XDP_UMEM_MIN_FRAME_SIZE || frame_size > PAGE_SIZE) { | ||
190 | /* Strictly speaking we could support this, if: | ||
191 | * - huge pages, or* | ||
192 | * - using an IOMMU, or | ||
193 | * - making sure the memory area is consecutive | ||
194 | * but for now, we simply say "computer says no". | ||
195 | */ | ||
196 | return -EINVAL; | ||
197 | } | ||
198 | |||
199 | if (!is_power_of_2(frame_size)) | ||
200 | return -EINVAL; | ||
201 | |||
202 | if (!PAGE_ALIGNED(addr)) { | ||
203 | /* Memory area has to be page size aligned. For | ||
204 | * simplicity, this might change. | ||
205 | */ | ||
206 | return -EINVAL; | ||
207 | } | ||
208 | |||
209 | if ((addr + size) < addr) | ||
210 | return -EINVAL; | ||
211 | |||
212 | nframes = size / frame_size; | ||
213 | if (nframes == 0 || nframes > UINT_MAX) | ||
214 | return -EINVAL; | ||
215 | |||
216 | nfpp = PAGE_SIZE / frame_size; | ||
217 | if (nframes < nfpp || nframes % nfpp) | ||
218 | return -EINVAL; | ||
219 | |||
220 | frame_headroom = ALIGN(frame_headroom, 64); | ||
221 | |||
222 | size_chk = frame_size - frame_headroom - XDP_PACKET_HEADROOM; | ||
223 | if (size_chk < 0) | ||
224 | return -EINVAL; | ||
225 | |||
226 | umem->pid = get_task_pid(current, PIDTYPE_PID); | ||
227 | umem->size = (size_t)size; | ||
228 | umem->address = (unsigned long)addr; | ||
229 | umem->props.frame_size = frame_size; | ||
230 | umem->props.nframes = nframes; | ||
231 | umem->frame_headroom = frame_headroom; | ||
232 | umem->npgs = size / PAGE_SIZE; | ||
233 | umem->pgs = NULL; | ||
234 | umem->user = NULL; | ||
235 | |||
236 | umem->frame_size_log2 = ilog2(frame_size); | ||
237 | umem->nfpp_mask = nfpp - 1; | ||
238 | umem->nfpplog2 = ilog2(nfpp); | ||
239 | atomic_set(&umem->users, 1); | ||
240 | |||
241 | err = xdp_umem_account_pages(umem); | ||
242 | if (err) | ||
243 | goto out; | ||
244 | |||
245 | err = xdp_umem_pin_pages(umem); | ||
246 | if (err) | ||
247 | goto out_account; | ||
248 | return 0; | ||
249 | |||
250 | out_account: | ||
251 | xdp_umem_unaccount_pages(umem); | ||
252 | out: | ||
253 | put_pid(umem->pid); | ||
254 | return err; | ||
255 | } | ||
256 | |||
257 | bool xdp_umem_validate_queues(struct xdp_umem *umem) | ||
258 | { | ||
259 | return (umem->fq && umem->cq); | ||
260 | } | ||
diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h new file mode 100644 index 000000000000..7e0b2fab8522 --- /dev/null +++ b/net/xdp/xdp_umem.h | |||
@@ -0,0 +1,67 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 | ||
2 | * XDP user-space packet buffer | ||
3 | * Copyright(c) 2018 Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | #ifndef XDP_UMEM_H_ | ||
16 | #define XDP_UMEM_H_ | ||
17 | |||
18 | #include <linux/mm.h> | ||
19 | #include <linux/if_xdp.h> | ||
20 | #include <linux/workqueue.h> | ||
21 | |||
22 | #include "xsk_queue.h" | ||
23 | #include "xdp_umem_props.h" | ||
24 | |||
25 | struct xdp_umem { | ||
26 | struct xsk_queue *fq; | ||
27 | struct xsk_queue *cq; | ||
28 | struct page **pgs; | ||
29 | struct xdp_umem_props props; | ||
30 | u32 npgs; | ||
31 | u32 frame_headroom; | ||
32 | u32 nfpp_mask; | ||
33 | u32 nfpplog2; | ||
34 | u32 frame_size_log2; | ||
35 | struct user_struct *user; | ||
36 | struct pid *pid; | ||
37 | unsigned long address; | ||
38 | size_t size; | ||
39 | atomic_t users; | ||
40 | struct work_struct work; | ||
41 | }; | ||
42 | |||
43 | static inline char *xdp_umem_get_data(struct xdp_umem *umem, u32 idx) | ||
44 | { | ||
45 | u64 pg, off; | ||
46 | char *data; | ||
47 | |||
48 | pg = idx >> umem->nfpplog2; | ||
49 | off = (idx & umem->nfpp_mask) << umem->frame_size_log2; | ||
50 | |||
51 | data = page_address(umem->pgs[pg]); | ||
52 | return data + off; | ||
53 | } | ||
54 | |||
55 | static inline char *xdp_umem_get_data_with_headroom(struct xdp_umem *umem, | ||
56 | u32 idx) | ||
57 | { | ||
58 | return xdp_umem_get_data(umem, idx) + umem->frame_headroom; | ||
59 | } | ||
60 | |||
61 | bool xdp_umem_validate_queues(struct xdp_umem *umem); | ||
62 | int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr); | ||
63 | void xdp_get_umem(struct xdp_umem *umem); | ||
64 | void xdp_put_umem(struct xdp_umem *umem); | ||
65 | int xdp_umem_create(struct xdp_umem **umem); | ||
66 | |||
67 | #endif /* XDP_UMEM_H_ */ | ||
diff --git a/net/xdp/xdp_umem_props.h b/net/xdp/xdp_umem_props.h new file mode 100644 index 000000000000..77fb5daf29f3 --- /dev/null +++ b/net/xdp/xdp_umem_props.h | |||
@@ -0,0 +1,23 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 | ||
2 | * XDP user-space packet buffer | ||
3 | * Copyright(c) 2018 Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | #ifndef XDP_UMEM_PROPS_H_ | ||
16 | #define XDP_UMEM_PROPS_H_ | ||
17 | |||
18 | struct xdp_umem_props { | ||
19 | u32 frame_size; | ||
20 | u32 nframes; | ||
21 | }; | ||
22 | |||
23 | #endif /* XDP_UMEM_PROPS_H_ */ | ||
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c new file mode 100644 index 000000000000..009c5af5bba5 --- /dev/null +++ b/net/xdp/xsk.c | |||
@@ -0,0 +1,656 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* XDP sockets | ||
3 | * | ||
4 | * AF_XDP sockets allows a channel between XDP programs and userspace | ||
5 | * applications. | ||
6 | * Copyright(c) 2018 Intel Corporation. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * Author(s): Björn Töpel <bjorn.topel@intel.com> | ||
18 | * Magnus Karlsson <magnus.karlsson@intel.com> | ||
19 | */ | ||
20 | |||
21 | #define pr_fmt(fmt) "AF_XDP: %s: " fmt, __func__ | ||
22 | |||
23 | #include <linux/if_xdp.h> | ||
24 | #include <linux/init.h> | ||
25 | #include <linux/sched/mm.h> | ||
26 | #include <linux/sched/signal.h> | ||
27 | #include <linux/sched/task.h> | ||
28 | #include <linux/socket.h> | ||
29 | #include <linux/file.h> | ||
30 | #include <linux/uaccess.h> | ||
31 | #include <linux/net.h> | ||
32 | #include <linux/netdevice.h> | ||
33 | #include <net/xdp_sock.h> | ||
34 | #include <net/xdp.h> | ||
35 | |||
36 | #include "xsk_queue.h" | ||
37 | #include "xdp_umem.h" | ||
38 | |||
39 | #define TX_BATCH_SIZE 16 | ||
40 | |||
41 | static struct xdp_sock *xdp_sk(struct sock *sk) | ||
42 | { | ||
43 | return (struct xdp_sock *)sk; | ||
44 | } | ||
45 | |||
46 | bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs) | ||
47 | { | ||
48 | return !!xs->rx; | ||
49 | } | ||
50 | |||
51 | static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) | ||
52 | { | ||
53 | u32 *id, len = xdp->data_end - xdp->data; | ||
54 | void *buffer; | ||
55 | int err = 0; | ||
56 | |||
57 | if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) | ||
58 | return -EINVAL; | ||
59 | |||
60 | id = xskq_peek_id(xs->umem->fq); | ||
61 | if (!id) | ||
62 | return -ENOSPC; | ||
63 | |||
64 | buffer = xdp_umem_get_data_with_headroom(xs->umem, *id); | ||
65 | memcpy(buffer, xdp->data, len); | ||
66 | err = xskq_produce_batch_desc(xs->rx, *id, len, | ||
67 | xs->umem->frame_headroom); | ||
68 | if (!err) | ||
69 | xskq_discard_id(xs->umem->fq); | ||
70 | |||
71 | return err; | ||
72 | } | ||
73 | |||
74 | int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) | ||
75 | { | ||
76 | int err; | ||
77 | |||
78 | err = __xsk_rcv(xs, xdp); | ||
79 | if (likely(!err)) | ||
80 | xdp_return_buff(xdp); | ||
81 | else | ||
82 | xs->rx_dropped++; | ||
83 | |||
84 | return err; | ||
85 | } | ||
86 | |||
87 | void xsk_flush(struct xdp_sock *xs) | ||
88 | { | ||
89 | xskq_produce_flush_desc(xs->rx); | ||
90 | xs->sk.sk_data_ready(&xs->sk); | ||
91 | } | ||
92 | |||
93 | int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) | ||
94 | { | ||
95 | int err; | ||
96 | |||
97 | err = __xsk_rcv(xs, xdp); | ||
98 | if (!err) | ||
99 | xsk_flush(xs); | ||
100 | else | ||
101 | xs->rx_dropped++; | ||
102 | |||
103 | return err; | ||
104 | } | ||
105 | |||
106 | static void xsk_destruct_skb(struct sk_buff *skb) | ||
107 | { | ||
108 | u32 id = (u32)(long)skb_shinfo(skb)->destructor_arg; | ||
109 | struct xdp_sock *xs = xdp_sk(skb->sk); | ||
110 | |||
111 | WARN_ON_ONCE(xskq_produce_id(xs->umem->cq, id)); | ||
112 | |||
113 | sock_wfree(skb); | ||
114 | } | ||
115 | |||
116 | static int xsk_generic_xmit(struct sock *sk, struct msghdr *m, | ||
117 | size_t total_len) | ||
118 | { | ||
119 | bool need_wait = !(m->msg_flags & MSG_DONTWAIT); | ||
120 | u32 max_batch = TX_BATCH_SIZE; | ||
121 | struct xdp_sock *xs = xdp_sk(sk); | ||
122 | bool sent_frame = false; | ||
123 | struct xdp_desc desc; | ||
124 | struct sk_buff *skb; | ||
125 | int err = 0; | ||
126 | |||
127 | if (unlikely(!xs->tx)) | ||
128 | return -ENOBUFS; | ||
129 | if (need_wait) | ||
130 | return -EOPNOTSUPP; | ||
131 | |||
132 | mutex_lock(&xs->mutex); | ||
133 | |||
134 | while (xskq_peek_desc(xs->tx, &desc)) { | ||
135 | char *buffer; | ||
136 | u32 id, len; | ||
137 | |||
138 | if (max_batch-- == 0) { | ||
139 | err = -EAGAIN; | ||
140 | goto out; | ||
141 | } | ||
142 | |||
143 | if (xskq_reserve_id(xs->umem->cq)) { | ||
144 | err = -EAGAIN; | ||
145 | goto out; | ||
146 | } | ||
147 | |||
148 | len = desc.len; | ||
149 | if (unlikely(len > xs->dev->mtu)) { | ||
150 | err = -EMSGSIZE; | ||
151 | goto out; | ||
152 | } | ||
153 | |||
154 | skb = sock_alloc_send_skb(sk, len, !need_wait, &err); | ||
155 | if (unlikely(!skb)) { | ||
156 | err = -EAGAIN; | ||
157 | goto out; | ||
158 | } | ||
159 | |||
160 | skb_put(skb, len); | ||
161 | id = desc.idx; | ||
162 | buffer = xdp_umem_get_data(xs->umem, id) + desc.offset; | ||
163 | err = skb_store_bits(skb, 0, buffer, len); | ||
164 | if (unlikely(err)) { | ||
165 | kfree_skb(skb); | ||
166 | goto out; | ||
167 | } | ||
168 | |||
169 | skb->dev = xs->dev; | ||
170 | skb->priority = sk->sk_priority; | ||
171 | skb->mark = sk->sk_mark; | ||
172 | skb_shinfo(skb)->destructor_arg = (void *)(long)id; | ||
173 | skb->destructor = xsk_destruct_skb; | ||
174 | |||
175 | err = dev_direct_xmit(skb, xs->queue_id); | ||
176 | /* Ignore NET_XMIT_CN as packet might have been sent */ | ||
177 | if (err == NET_XMIT_DROP || err == NETDEV_TX_BUSY) { | ||
178 | err = -EAGAIN; | ||
179 | /* SKB consumed by dev_direct_xmit() */ | ||
180 | goto out; | ||
181 | } | ||
182 | |||
183 | sent_frame = true; | ||
184 | xskq_discard_desc(xs->tx); | ||
185 | } | ||
186 | |||
187 | out: | ||
188 | if (sent_frame) | ||
189 | sk->sk_write_space(sk); | ||
190 | |||
191 | mutex_unlock(&xs->mutex); | ||
192 | return err; | ||
193 | } | ||
194 | |||
195 | static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) | ||
196 | { | ||
197 | struct sock *sk = sock->sk; | ||
198 | struct xdp_sock *xs = xdp_sk(sk); | ||
199 | |||
200 | if (unlikely(!xs->dev)) | ||
201 | return -ENXIO; | ||
202 | if (unlikely(!(xs->dev->flags & IFF_UP))) | ||
203 | return -ENETDOWN; | ||
204 | |||
205 | return xsk_generic_xmit(sk, m, total_len); | ||
206 | } | ||
207 | |||
208 | static unsigned int xsk_poll(struct file *file, struct socket *sock, | ||
209 | struct poll_table_struct *wait) | ||
210 | { | ||
211 | unsigned int mask = datagram_poll(file, sock, wait); | ||
212 | struct sock *sk = sock->sk; | ||
213 | struct xdp_sock *xs = xdp_sk(sk); | ||
214 | |||
215 | if (xs->rx && !xskq_empty_desc(xs->rx)) | ||
216 | mask |= POLLIN | POLLRDNORM; | ||
217 | if (xs->tx && !xskq_full_desc(xs->tx)) | ||
218 | mask |= POLLOUT | POLLWRNORM; | ||
219 | |||
220 | return mask; | ||
221 | } | ||
222 | |||
223 | static int xsk_init_queue(u32 entries, struct xsk_queue **queue, | ||
224 | bool umem_queue) | ||
225 | { | ||
226 | struct xsk_queue *q; | ||
227 | |||
228 | if (entries == 0 || *queue || !is_power_of_2(entries)) | ||
229 | return -EINVAL; | ||
230 | |||
231 | q = xskq_create(entries, umem_queue); | ||
232 | if (!q) | ||
233 | return -ENOMEM; | ||
234 | |||
235 | *queue = q; | ||
236 | return 0; | ||
237 | } | ||
238 | |||
239 | static void __xsk_release(struct xdp_sock *xs) | ||
240 | { | ||
241 | /* Wait for driver to stop using the xdp socket. */ | ||
242 | synchronize_net(); | ||
243 | |||
244 | dev_put(xs->dev); | ||
245 | } | ||
246 | |||
247 | static int xsk_release(struct socket *sock) | ||
248 | { | ||
249 | struct sock *sk = sock->sk; | ||
250 | struct xdp_sock *xs = xdp_sk(sk); | ||
251 | struct net *net; | ||
252 | |||
253 | if (!sk) | ||
254 | return 0; | ||
255 | |||
256 | net = sock_net(sk); | ||
257 | |||
258 | local_bh_disable(); | ||
259 | sock_prot_inuse_add(net, sk->sk_prot, -1); | ||
260 | local_bh_enable(); | ||
261 | |||
262 | if (xs->dev) { | ||
263 | __xsk_release(xs); | ||
264 | xs->dev = NULL; | ||
265 | } | ||
266 | |||
267 | sock_orphan(sk); | ||
268 | sock->sk = NULL; | ||
269 | |||
270 | sk_refcnt_debug_release(sk); | ||
271 | sock_put(sk); | ||
272 | |||
273 | return 0; | ||
274 | } | ||
275 | |||
276 | static struct socket *xsk_lookup_xsk_from_fd(int fd) | ||
277 | { | ||
278 | struct socket *sock; | ||
279 | int err; | ||
280 | |||
281 | sock = sockfd_lookup(fd, &err); | ||
282 | if (!sock) | ||
283 | return ERR_PTR(-ENOTSOCK); | ||
284 | |||
285 | if (sock->sk->sk_family != PF_XDP) { | ||
286 | sockfd_put(sock); | ||
287 | return ERR_PTR(-ENOPROTOOPT); | ||
288 | } | ||
289 | |||
290 | return sock; | ||
291 | } | ||
292 | |||
293 | static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) | ||
294 | { | ||
295 | struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr; | ||
296 | struct sock *sk = sock->sk; | ||
297 | struct net_device *dev, *dev_curr; | ||
298 | struct xdp_sock *xs = xdp_sk(sk); | ||
299 | struct xdp_umem *old_umem = NULL; | ||
300 | int err = 0; | ||
301 | |||
302 | if (addr_len < sizeof(struct sockaddr_xdp)) | ||
303 | return -EINVAL; | ||
304 | if (sxdp->sxdp_family != AF_XDP) | ||
305 | return -EINVAL; | ||
306 | |||
307 | mutex_lock(&xs->mutex); | ||
308 | dev_curr = xs->dev; | ||
309 | dev = dev_get_by_index(sock_net(sk), sxdp->sxdp_ifindex); | ||
310 | if (!dev) { | ||
311 | err = -ENODEV; | ||
312 | goto out_release; | ||
313 | } | ||
314 | |||
315 | if (!xs->rx && !xs->tx) { | ||
316 | err = -EINVAL; | ||
317 | goto out_unlock; | ||
318 | } | ||
319 | |||
320 | if (sxdp->sxdp_queue_id >= dev->num_rx_queues) { | ||
321 | err = -EINVAL; | ||
322 | goto out_unlock; | ||
323 | } | ||
324 | |||
325 | if (sxdp->sxdp_flags & XDP_SHARED_UMEM) { | ||
326 | struct xdp_sock *umem_xs; | ||
327 | struct socket *sock; | ||
328 | |||
329 | if (xs->umem) { | ||
330 | /* We have already our own. */ | ||
331 | err = -EINVAL; | ||
332 | goto out_unlock; | ||
333 | } | ||
334 | |||
335 | sock = xsk_lookup_xsk_from_fd(sxdp->sxdp_shared_umem_fd); | ||
336 | if (IS_ERR(sock)) { | ||
337 | err = PTR_ERR(sock); | ||
338 | goto out_unlock; | ||
339 | } | ||
340 | |||
341 | umem_xs = xdp_sk(sock->sk); | ||
342 | if (!umem_xs->umem) { | ||
343 | /* No umem to inherit. */ | ||
344 | err = -EBADF; | ||
345 | sockfd_put(sock); | ||
346 | goto out_unlock; | ||
347 | } else if (umem_xs->dev != dev || | ||
348 | umem_xs->queue_id != sxdp->sxdp_queue_id) { | ||
349 | err = -EINVAL; | ||
350 | sockfd_put(sock); | ||
351 | goto out_unlock; | ||
352 | } | ||
353 | |||
354 | xdp_get_umem(umem_xs->umem); | ||
355 | old_umem = xs->umem; | ||
356 | xs->umem = umem_xs->umem; | ||
357 | sockfd_put(sock); | ||
358 | } else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) { | ||
359 | err = -EINVAL; | ||
360 | goto out_unlock; | ||
361 | } else { | ||
362 | /* This xsk has its own umem. */ | ||
363 | xskq_set_umem(xs->umem->fq, &xs->umem->props); | ||
364 | xskq_set_umem(xs->umem->cq, &xs->umem->props); | ||
365 | } | ||
366 | |||
367 | /* Rebind? */ | ||
368 | if (dev_curr && (dev_curr != dev || | ||
369 | xs->queue_id != sxdp->sxdp_queue_id)) { | ||
370 | __xsk_release(xs); | ||
371 | if (old_umem) | ||
372 | xdp_put_umem(old_umem); | ||
373 | } | ||
374 | |||
375 | xs->dev = dev; | ||
376 | xs->queue_id = sxdp->sxdp_queue_id; | ||
377 | |||
378 | xskq_set_umem(xs->rx, &xs->umem->props); | ||
379 | xskq_set_umem(xs->tx, &xs->umem->props); | ||
380 | |||
381 | out_unlock: | ||
382 | if (err) | ||
383 | dev_put(dev); | ||
384 | out_release: | ||
385 | mutex_unlock(&xs->mutex); | ||
386 | return err; | ||
387 | } | ||
388 | |||
389 | static int xsk_setsockopt(struct socket *sock, int level, int optname, | ||
390 | char __user *optval, unsigned int optlen) | ||
391 | { | ||
392 | struct sock *sk = sock->sk; | ||
393 | struct xdp_sock *xs = xdp_sk(sk); | ||
394 | int err; | ||
395 | |||
396 | if (level != SOL_XDP) | ||
397 | return -ENOPROTOOPT; | ||
398 | |||
399 | switch (optname) { | ||
400 | case XDP_RX_RING: | ||
401 | case XDP_TX_RING: | ||
402 | { | ||
403 | struct xsk_queue **q; | ||
404 | int entries; | ||
405 | |||
406 | if (optlen < sizeof(entries)) | ||
407 | return -EINVAL; | ||
408 | if (copy_from_user(&entries, optval, sizeof(entries))) | ||
409 | return -EFAULT; | ||
410 | |||
411 | mutex_lock(&xs->mutex); | ||
412 | q = (optname == XDP_TX_RING) ? &xs->tx : &xs->rx; | ||
413 | err = xsk_init_queue(entries, q, false); | ||
414 | mutex_unlock(&xs->mutex); | ||
415 | return err; | ||
416 | } | ||
417 | case XDP_UMEM_REG: | ||
418 | { | ||
419 | struct xdp_umem_reg mr; | ||
420 | struct xdp_umem *umem; | ||
421 | |||
422 | if (xs->umem) | ||
423 | return -EBUSY; | ||
424 | |||
425 | if (copy_from_user(&mr, optval, sizeof(mr))) | ||
426 | return -EFAULT; | ||
427 | |||
428 | mutex_lock(&xs->mutex); | ||
429 | err = xdp_umem_create(&umem); | ||
430 | |||
431 | err = xdp_umem_reg(umem, &mr); | ||
432 | if (err) { | ||
433 | kfree(umem); | ||
434 | mutex_unlock(&xs->mutex); | ||
435 | return err; | ||
436 | } | ||
437 | |||
438 | /* Make sure umem is ready before it can be seen by others */ | ||
439 | smp_wmb(); | ||
440 | |||
441 | xs->umem = umem; | ||
442 | mutex_unlock(&xs->mutex); | ||
443 | return 0; | ||
444 | } | ||
445 | case XDP_UMEM_FILL_RING: | ||
446 | case XDP_UMEM_COMPLETION_RING: | ||
447 | { | ||
448 | struct xsk_queue **q; | ||
449 | int entries; | ||
450 | |||
451 | if (!xs->umem) | ||
452 | return -EINVAL; | ||
453 | |||
454 | if (copy_from_user(&entries, optval, sizeof(entries))) | ||
455 | return -EFAULT; | ||
456 | |||
457 | mutex_lock(&xs->mutex); | ||
458 | q = (optname == XDP_UMEM_FILL_RING) ? &xs->umem->fq : | ||
459 | &xs->umem->cq; | ||
460 | err = xsk_init_queue(entries, q, true); | ||
461 | mutex_unlock(&xs->mutex); | ||
462 | return err; | ||
463 | } | ||
464 | default: | ||
465 | break; | ||
466 | } | ||
467 | |||
468 | return -ENOPROTOOPT; | ||
469 | } | ||
470 | |||
471 | static int xsk_getsockopt(struct socket *sock, int level, int optname, | ||
472 | char __user *optval, int __user *optlen) | ||
473 | { | ||
474 | struct sock *sk = sock->sk; | ||
475 | struct xdp_sock *xs = xdp_sk(sk); | ||
476 | int len; | ||
477 | |||
478 | if (level != SOL_XDP) | ||
479 | return -ENOPROTOOPT; | ||
480 | |||
481 | if (get_user(len, optlen)) | ||
482 | return -EFAULT; | ||
483 | if (len < 0) | ||
484 | return -EINVAL; | ||
485 | |||
486 | switch (optname) { | ||
487 | case XDP_STATISTICS: | ||
488 | { | ||
489 | struct xdp_statistics stats; | ||
490 | |||
491 | if (len < sizeof(stats)) | ||
492 | return -EINVAL; | ||
493 | |||
494 | mutex_lock(&xs->mutex); | ||
495 | stats.rx_dropped = xs->rx_dropped; | ||
496 | stats.rx_invalid_descs = xskq_nb_invalid_descs(xs->rx); | ||
497 | stats.tx_invalid_descs = xskq_nb_invalid_descs(xs->tx); | ||
498 | mutex_unlock(&xs->mutex); | ||
499 | |||
500 | if (copy_to_user(optval, &stats, sizeof(stats))) | ||
501 | return -EFAULT; | ||
502 | if (put_user(sizeof(stats), optlen)) | ||
503 | return -EFAULT; | ||
504 | |||
505 | return 0; | ||
506 | } | ||
507 | default: | ||
508 | break; | ||
509 | } | ||
510 | |||
511 | return -EOPNOTSUPP; | ||
512 | } | ||
513 | |||
514 | static int xsk_mmap(struct file *file, struct socket *sock, | ||
515 | struct vm_area_struct *vma) | ||
516 | { | ||
517 | unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; | ||
518 | unsigned long size = vma->vm_end - vma->vm_start; | ||
519 | struct xdp_sock *xs = xdp_sk(sock->sk); | ||
520 | struct xsk_queue *q = NULL; | ||
521 | unsigned long pfn; | ||
522 | struct page *qpg; | ||
523 | |||
524 | if (offset == XDP_PGOFF_RX_RING) { | ||
525 | q = xs->rx; | ||
526 | } else if (offset == XDP_PGOFF_TX_RING) { | ||
527 | q = xs->tx; | ||
528 | } else { | ||
529 | if (!xs->umem) | ||
530 | return -EINVAL; | ||
531 | |||
532 | if (offset == XDP_UMEM_PGOFF_FILL_RING) | ||
533 | q = xs->umem->fq; | ||
534 | else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING) | ||
535 | q = xs->umem->cq; | ||
536 | } | ||
537 | |||
538 | if (!q) | ||
539 | return -EINVAL; | ||
540 | |||
541 | qpg = virt_to_head_page(q->ring); | ||
542 | if (size > (PAGE_SIZE << compound_order(qpg))) | ||
543 | return -EINVAL; | ||
544 | |||
545 | pfn = virt_to_phys(q->ring) >> PAGE_SHIFT; | ||
546 | return remap_pfn_range(vma, vma->vm_start, pfn, | ||
547 | size, vma->vm_page_prot); | ||
548 | } | ||
549 | |||
550 | static struct proto xsk_proto = { | ||
551 | .name = "XDP", | ||
552 | .owner = THIS_MODULE, | ||
553 | .obj_size = sizeof(struct xdp_sock), | ||
554 | }; | ||
555 | |||
556 | static const struct proto_ops xsk_proto_ops = { | ||
557 | .family = PF_XDP, | ||
558 | .owner = THIS_MODULE, | ||
559 | .release = xsk_release, | ||
560 | .bind = xsk_bind, | ||
561 | .connect = sock_no_connect, | ||
562 | .socketpair = sock_no_socketpair, | ||
563 | .accept = sock_no_accept, | ||
564 | .getname = sock_no_getname, | ||
565 | .poll = xsk_poll, | ||
566 | .ioctl = sock_no_ioctl, | ||
567 | .listen = sock_no_listen, | ||
568 | .shutdown = sock_no_shutdown, | ||
569 | .setsockopt = xsk_setsockopt, | ||
570 | .getsockopt = xsk_getsockopt, | ||
571 | .sendmsg = xsk_sendmsg, | ||
572 | .recvmsg = sock_no_recvmsg, | ||
573 | .mmap = xsk_mmap, | ||
574 | .sendpage = sock_no_sendpage, | ||
575 | }; | ||
576 | |||
577 | static void xsk_destruct(struct sock *sk) | ||
578 | { | ||
579 | struct xdp_sock *xs = xdp_sk(sk); | ||
580 | |||
581 | if (!sock_flag(sk, SOCK_DEAD)) | ||
582 | return; | ||
583 | |||
584 | xskq_destroy(xs->rx); | ||
585 | xskq_destroy(xs->tx); | ||
586 | xdp_put_umem(xs->umem); | ||
587 | |||
588 | sk_refcnt_debug_dec(sk); | ||
589 | } | ||
590 | |||
591 | static int xsk_create(struct net *net, struct socket *sock, int protocol, | ||
592 | int kern) | ||
593 | { | ||
594 | struct sock *sk; | ||
595 | struct xdp_sock *xs; | ||
596 | |||
597 | if (!ns_capable(net->user_ns, CAP_NET_RAW)) | ||
598 | return -EPERM; | ||
599 | if (sock->type != SOCK_RAW) | ||
600 | return -ESOCKTNOSUPPORT; | ||
601 | |||
602 | if (protocol) | ||
603 | return -EPROTONOSUPPORT; | ||
604 | |||
605 | sock->state = SS_UNCONNECTED; | ||
606 | |||
607 | sk = sk_alloc(net, PF_XDP, GFP_KERNEL, &xsk_proto, kern); | ||
608 | if (!sk) | ||
609 | return -ENOBUFS; | ||
610 | |||
611 | sock->ops = &xsk_proto_ops; | ||
612 | |||
613 | sock_init_data(sock, sk); | ||
614 | |||
615 | sk->sk_family = PF_XDP; | ||
616 | |||
617 | sk->sk_destruct = xsk_destruct; | ||
618 | sk_refcnt_debug_inc(sk); | ||
619 | |||
620 | xs = xdp_sk(sk); | ||
621 | mutex_init(&xs->mutex); | ||
622 | |||
623 | local_bh_disable(); | ||
624 | sock_prot_inuse_add(net, &xsk_proto, 1); | ||
625 | local_bh_enable(); | ||
626 | |||
627 | return 0; | ||
628 | } | ||
629 | |||
630 | static const struct net_proto_family xsk_family_ops = { | ||
631 | .family = PF_XDP, | ||
632 | .create = xsk_create, | ||
633 | .owner = THIS_MODULE, | ||
634 | }; | ||
635 | |||
636 | static int __init xsk_init(void) | ||
637 | { | ||
638 | int err; | ||
639 | |||
640 | err = proto_register(&xsk_proto, 0 /* no slab */); | ||
641 | if (err) | ||
642 | goto out; | ||
643 | |||
644 | err = sock_register(&xsk_family_ops); | ||
645 | if (err) | ||
646 | goto out_proto; | ||
647 | |||
648 | return 0; | ||
649 | |||
650 | out_proto: | ||
651 | proto_unregister(&xsk_proto); | ||
652 | out: | ||
653 | return err; | ||
654 | } | ||
655 | |||
656 | fs_initcall(xsk_init); | ||
diff --git a/net/xdp/xsk_queue.c b/net/xdp/xsk_queue.c new file mode 100644 index 000000000000..d012e5e23591 --- /dev/null +++ b/net/xdp/xsk_queue.c | |||
@@ -0,0 +1,73 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* XDP user-space ring structure | ||
3 | * Copyright(c) 2018 Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | #include <linux/slab.h> | ||
16 | |||
17 | #include "xsk_queue.h" | ||
18 | |||
19 | void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props) | ||
20 | { | ||
21 | if (!q) | ||
22 | return; | ||
23 | |||
24 | q->umem_props = *umem_props; | ||
25 | } | ||
26 | |||
27 | static u32 xskq_umem_get_ring_size(struct xsk_queue *q) | ||
28 | { | ||
29 | return sizeof(struct xdp_umem_ring) + q->nentries * sizeof(u32); | ||
30 | } | ||
31 | |||
32 | static u32 xskq_rxtx_get_ring_size(struct xsk_queue *q) | ||
33 | { | ||
34 | return (sizeof(struct xdp_ring) + | ||
35 | q->nentries * sizeof(struct xdp_desc)); | ||
36 | } | ||
37 | |||
38 | struct xsk_queue *xskq_create(u32 nentries, bool umem_queue) | ||
39 | { | ||
40 | struct xsk_queue *q; | ||
41 | gfp_t gfp_flags; | ||
42 | size_t size; | ||
43 | |||
44 | q = kzalloc(sizeof(*q), GFP_KERNEL); | ||
45 | if (!q) | ||
46 | return NULL; | ||
47 | |||
48 | q->nentries = nentries; | ||
49 | q->ring_mask = nentries - 1; | ||
50 | |||
51 | gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | | ||
52 | __GFP_COMP | __GFP_NORETRY; | ||
53 | size = umem_queue ? xskq_umem_get_ring_size(q) : | ||
54 | xskq_rxtx_get_ring_size(q); | ||
55 | |||
56 | q->ring = (struct xdp_ring *)__get_free_pages(gfp_flags, | ||
57 | get_order(size)); | ||
58 | if (!q->ring) { | ||
59 | kfree(q); | ||
60 | return NULL; | ||
61 | } | ||
62 | |||
63 | return q; | ||
64 | } | ||
65 | |||
66 | void xskq_destroy(struct xsk_queue *q) | ||
67 | { | ||
68 | if (!q) | ||
69 | return; | ||
70 | |||
71 | page_frag_free(q->ring); | ||
72 | kfree(q); | ||
73 | } | ||
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h new file mode 100644 index 000000000000..7aa9a535db0e --- /dev/null +++ b/net/xdp/xsk_queue.h | |||
@@ -0,0 +1,247 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 | ||
2 | * XDP user-space ring structure | ||
3 | * Copyright(c) 2018 Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | #ifndef _LINUX_XSK_QUEUE_H | ||
16 | #define _LINUX_XSK_QUEUE_H | ||
17 | |||
18 | #include <linux/types.h> | ||
19 | #include <linux/if_xdp.h> | ||
20 | |||
21 | #include "xdp_umem_props.h" | ||
22 | |||
23 | #define RX_BATCH_SIZE 16 | ||
24 | |||
25 | struct xsk_queue { | ||
26 | struct xdp_umem_props umem_props; | ||
27 | u32 ring_mask; | ||
28 | u32 nentries; | ||
29 | u32 prod_head; | ||
30 | u32 prod_tail; | ||
31 | u32 cons_head; | ||
32 | u32 cons_tail; | ||
33 | struct xdp_ring *ring; | ||
34 | u64 invalid_descs; | ||
35 | }; | ||
36 | |||
37 | /* Common functions operating for both RXTX and umem queues */ | ||
38 | |||
39 | static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q) | ||
40 | { | ||
41 | return q ? q->invalid_descs : 0; | ||
42 | } | ||
43 | |||
44 | static inline u32 xskq_nb_avail(struct xsk_queue *q, u32 dcnt) | ||
45 | { | ||
46 | u32 entries = q->prod_tail - q->cons_tail; | ||
47 | |||
48 | if (entries == 0) { | ||
49 | /* Refresh the local pointer */ | ||
50 | q->prod_tail = READ_ONCE(q->ring->producer); | ||
51 | entries = q->prod_tail - q->cons_tail; | ||
52 | } | ||
53 | |||
54 | return (entries > dcnt) ? dcnt : entries; | ||
55 | } | ||
56 | |||
57 | static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt) | ||
58 | { | ||
59 | u32 free_entries = q->nentries - (producer - q->cons_tail); | ||
60 | |||
61 | if (free_entries >= dcnt) | ||
62 | return free_entries; | ||
63 | |||
64 | /* Refresh the local tail pointer */ | ||
65 | q->cons_tail = READ_ONCE(q->ring->consumer); | ||
66 | return q->nentries - (producer - q->cons_tail); | ||
67 | } | ||
68 | |||
69 | /* UMEM queue */ | ||
70 | |||
71 | static inline bool xskq_is_valid_id(struct xsk_queue *q, u32 idx) | ||
72 | { | ||
73 | if (unlikely(idx >= q->umem_props.nframes)) { | ||
74 | q->invalid_descs++; | ||
75 | return false; | ||
76 | } | ||
77 | return true; | ||
78 | } | ||
79 | |||
80 | static inline u32 *xskq_validate_id(struct xsk_queue *q) | ||
81 | { | ||
82 | while (q->cons_tail != q->cons_head) { | ||
83 | struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; | ||
84 | unsigned int idx = q->cons_tail & q->ring_mask; | ||
85 | |||
86 | if (xskq_is_valid_id(q, ring->desc[idx])) | ||
87 | return &ring->desc[idx]; | ||
88 | |||
89 | q->cons_tail++; | ||
90 | } | ||
91 | |||
92 | return NULL; | ||
93 | } | ||
94 | |||
95 | static inline u32 *xskq_peek_id(struct xsk_queue *q) | ||
96 | { | ||
97 | struct xdp_umem_ring *ring; | ||
98 | |||
99 | if (q->cons_tail == q->cons_head) { | ||
100 | WRITE_ONCE(q->ring->consumer, q->cons_tail); | ||
101 | q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE); | ||
102 | |||
103 | /* Order consumer and data */ | ||
104 | smp_rmb(); | ||
105 | |||
106 | return xskq_validate_id(q); | ||
107 | } | ||
108 | |||
109 | ring = (struct xdp_umem_ring *)q->ring; | ||
110 | return &ring->desc[q->cons_tail & q->ring_mask]; | ||
111 | } | ||
112 | |||
113 | static inline void xskq_discard_id(struct xsk_queue *q) | ||
114 | { | ||
115 | q->cons_tail++; | ||
116 | (void)xskq_validate_id(q); | ||
117 | } | ||
118 | |||
119 | static inline int xskq_produce_id(struct xsk_queue *q, u32 id) | ||
120 | { | ||
121 | struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; | ||
122 | |||
123 | ring->desc[q->prod_tail++ & q->ring_mask] = id; | ||
124 | |||
125 | /* Order producer and data */ | ||
126 | smp_wmb(); | ||
127 | |||
128 | WRITE_ONCE(q->ring->producer, q->prod_tail); | ||
129 | return 0; | ||
130 | } | ||
131 | |||
132 | static inline int xskq_reserve_id(struct xsk_queue *q) | ||
133 | { | ||
134 | if (xskq_nb_free(q, q->prod_head, 1) == 0) | ||
135 | return -ENOSPC; | ||
136 | |||
137 | q->prod_head++; | ||
138 | return 0; | ||
139 | } | ||
140 | |||
141 | /* Rx/Tx queue */ | ||
142 | |||
143 | static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d) | ||
144 | { | ||
145 | u32 buff_len; | ||
146 | |||
147 | if (unlikely(d->idx >= q->umem_props.nframes)) { | ||
148 | q->invalid_descs++; | ||
149 | return false; | ||
150 | } | ||
151 | |||
152 | buff_len = q->umem_props.frame_size; | ||
153 | if (unlikely(d->len > buff_len || d->len == 0 || | ||
154 | d->offset > buff_len || d->offset + d->len > buff_len)) { | ||
155 | q->invalid_descs++; | ||
156 | return false; | ||
157 | } | ||
158 | |||
159 | return true; | ||
160 | } | ||
161 | |||
162 | static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q, | ||
163 | struct xdp_desc *desc) | ||
164 | { | ||
165 | while (q->cons_tail != q->cons_head) { | ||
166 | struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; | ||
167 | unsigned int idx = q->cons_tail & q->ring_mask; | ||
168 | |||
169 | if (xskq_is_valid_desc(q, &ring->desc[idx])) { | ||
170 | if (desc) | ||
171 | *desc = ring->desc[idx]; | ||
172 | return desc; | ||
173 | } | ||
174 | |||
175 | q->cons_tail++; | ||
176 | } | ||
177 | |||
178 | return NULL; | ||
179 | } | ||
180 | |||
181 | static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q, | ||
182 | struct xdp_desc *desc) | ||
183 | { | ||
184 | struct xdp_rxtx_ring *ring; | ||
185 | |||
186 | if (q->cons_tail == q->cons_head) { | ||
187 | WRITE_ONCE(q->ring->consumer, q->cons_tail); | ||
188 | q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE); | ||
189 | |||
190 | /* Order consumer and data */ | ||
191 | smp_rmb(); | ||
192 | |||
193 | return xskq_validate_desc(q, desc); | ||
194 | } | ||
195 | |||
196 | ring = (struct xdp_rxtx_ring *)q->ring; | ||
197 | *desc = ring->desc[q->cons_tail & q->ring_mask]; | ||
198 | return desc; | ||
199 | } | ||
200 | |||
201 | static inline void xskq_discard_desc(struct xsk_queue *q) | ||
202 | { | ||
203 | q->cons_tail++; | ||
204 | (void)xskq_validate_desc(q, NULL); | ||
205 | } | ||
206 | |||
207 | static inline int xskq_produce_batch_desc(struct xsk_queue *q, | ||
208 | u32 id, u32 len, u16 offset) | ||
209 | { | ||
210 | struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; | ||
211 | unsigned int idx; | ||
212 | |||
213 | if (xskq_nb_free(q, q->prod_head, 1) == 0) | ||
214 | return -ENOSPC; | ||
215 | |||
216 | idx = (q->prod_head++) & q->ring_mask; | ||
217 | ring->desc[idx].idx = id; | ||
218 | ring->desc[idx].len = len; | ||
219 | ring->desc[idx].offset = offset; | ||
220 | |||
221 | return 0; | ||
222 | } | ||
223 | |||
224 | static inline void xskq_produce_flush_desc(struct xsk_queue *q) | ||
225 | { | ||
226 | /* Order producer and data */ | ||
227 | smp_wmb(); | ||
228 | |||
229 | q->prod_tail = q->prod_head, | ||
230 | WRITE_ONCE(q->ring->producer, q->prod_tail); | ||
231 | } | ||
232 | |||
233 | static inline bool xskq_full_desc(struct xsk_queue *q) | ||
234 | { | ||
235 | return (xskq_nb_avail(q, q->nentries) == q->nentries); | ||
236 | } | ||
237 | |||
238 | static inline bool xskq_empty_desc(struct xsk_queue *q) | ||
239 | { | ||
240 | return (xskq_nb_free(q, q->prod_tail, 1) == q->nentries); | ||
241 | } | ||
242 | |||
243 | void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props); | ||
244 | struct xsk_queue *xskq_create(u32 nentries, bool umem_queue); | ||
245 | void xskq_destroy(struct xsk_queue *q_ops); | ||
246 | |||
247 | #endif /* _LINUX_XSK_QUEUE_H */ | ||
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index b853581592fd..8e0c7fb6d7cc 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile | |||
@@ -45,10 +45,12 @@ hostprogs-y += xdp_rxq_info | |||
45 | hostprogs-y += syscall_tp | 45 | hostprogs-y += syscall_tp |
46 | hostprogs-y += cpustat | 46 | hostprogs-y += cpustat |
47 | hostprogs-y += xdp_adjust_tail | 47 | hostprogs-y += xdp_adjust_tail |
48 | hostprogs-y += xdpsock | ||
48 | 49 | ||
49 | # Libbpf dependencies | 50 | # Libbpf dependencies |
50 | LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o | 51 | LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o |
51 | CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o | 52 | CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o |
53 | TRACE_HELPERS := ../../tools/testing/selftests/bpf/trace_helpers.o | ||
52 | 54 | ||
53 | test_lru_dist-objs := test_lru_dist.o $(LIBBPF) | 55 | test_lru_dist-objs := test_lru_dist.o $(LIBBPF) |
54 | sock_example-objs := sock_example.o $(LIBBPF) | 56 | sock_example-objs := sock_example.o $(LIBBPF) |
@@ -65,10 +67,10 @@ tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o | |||
65 | tracex7-objs := bpf_load.o $(LIBBPF) tracex7_user.o | 67 | tracex7-objs := bpf_load.o $(LIBBPF) tracex7_user.o |
66 | load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o | 68 | load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o |
67 | test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o | 69 | test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o |
68 | trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o | 70 | trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o $(TRACE_HELPERS) |
69 | lathist-objs := bpf_load.o $(LIBBPF) lathist_user.o | 71 | lathist-objs := bpf_load.o $(LIBBPF) lathist_user.o |
70 | offwaketime-objs := bpf_load.o $(LIBBPF) offwaketime_user.o | 72 | offwaketime-objs := bpf_load.o $(LIBBPF) offwaketime_user.o $(TRACE_HELPERS) |
71 | spintest-objs := bpf_load.o $(LIBBPF) spintest_user.o | 73 | spintest-objs := bpf_load.o $(LIBBPF) spintest_user.o $(TRACE_HELPERS) |
72 | map_perf_test-objs := bpf_load.o $(LIBBPF) map_perf_test_user.o | 74 | map_perf_test-objs := bpf_load.o $(LIBBPF) map_perf_test_user.o |
73 | test_overhead-objs := bpf_load.o $(LIBBPF) test_overhead_user.o | 75 | test_overhead-objs := bpf_load.o $(LIBBPF) test_overhead_user.o |
74 | test_cgrp2_array_pin-objs := $(LIBBPF) test_cgrp2_array_pin.o | 76 | test_cgrp2_array_pin-objs := $(LIBBPF) test_cgrp2_array_pin.o |
@@ -82,8 +84,8 @@ xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o | |||
82 | xdp_router_ipv4-objs := bpf_load.o $(LIBBPF) xdp_router_ipv4_user.o | 84 | xdp_router_ipv4-objs := bpf_load.o $(LIBBPF) xdp_router_ipv4_user.o |
83 | test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) $(CGROUP_HELPERS) \ | 85 | test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) $(CGROUP_HELPERS) \ |
84 | test_current_task_under_cgroup_user.o | 86 | test_current_task_under_cgroup_user.o |
85 | trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o | 87 | trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o $(TRACE_HELPERS) |
86 | sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o | 88 | sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o $(TRACE_HELPERS) |
87 | tc_l2_redirect-objs := bpf_load.o $(LIBBPF) tc_l2_redirect_user.o | 89 | tc_l2_redirect-objs := bpf_load.o $(LIBBPF) tc_l2_redirect_user.o |
88 | lwt_len_hist-objs := bpf_load.o $(LIBBPF) lwt_len_hist_user.o | 90 | lwt_len_hist-objs := bpf_load.o $(LIBBPF) lwt_len_hist_user.o |
89 | xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o | 91 | xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o |
@@ -97,6 +99,7 @@ xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o | |||
97 | syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o | 99 | syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o |
98 | cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o | 100 | cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o |
99 | xdp_adjust_tail-objs := bpf_load.o $(LIBBPF) xdp_adjust_tail_user.o | 101 | xdp_adjust_tail-objs := bpf_load.o $(LIBBPF) xdp_adjust_tail_user.o |
102 | xdpsock-objs := bpf_load.o $(LIBBPF) xdpsock_user.o | ||
100 | 103 | ||
101 | # Tell kbuild to always build the programs | 104 | # Tell kbuild to always build the programs |
102 | always := $(hostprogs-y) | 105 | always := $(hostprogs-y) |
@@ -150,6 +153,7 @@ always += xdp2skb_meta_kern.o | |||
150 | always += syscall_tp_kern.o | 153 | always += syscall_tp_kern.o |
151 | always += cpustat_kern.o | 154 | always += cpustat_kern.o |
152 | always += xdp_adjust_tail_kern.o | 155 | always += xdp_adjust_tail_kern.o |
156 | always += xdpsock_kern.o | ||
153 | 157 | ||
154 | HOSTCFLAGS += -I$(objtree)/usr/include | 158 | HOSTCFLAGS += -I$(objtree)/usr/include |
155 | HOSTCFLAGS += -I$(srctree)/tools/lib/ | 159 | HOSTCFLAGS += -I$(srctree)/tools/lib/ |
@@ -196,6 +200,7 @@ HOSTLOADLIBES_xdp_rxq_info += -lelf | |||
196 | HOSTLOADLIBES_syscall_tp += -lelf | 200 | HOSTLOADLIBES_syscall_tp += -lelf |
197 | HOSTLOADLIBES_cpustat += -lelf | 201 | HOSTLOADLIBES_cpustat += -lelf |
198 | HOSTLOADLIBES_xdp_adjust_tail += -lelf | 202 | HOSTLOADLIBES_xdp_adjust_tail += -lelf |
203 | HOSTLOADLIBES_xdpsock += -lelf -pthread | ||
199 | 204 | ||
200 | # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: | 205 | # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: |
201 | # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang | 206 | # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang |
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index feca497d6afd..da9bccfaf391 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c | |||
@@ -145,6 +145,9 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) | |||
145 | } | 145 | } |
146 | 146 | ||
147 | if (is_kprobe || is_kretprobe) { | 147 | if (is_kprobe || is_kretprobe) { |
148 | bool need_normal_check = true; | ||
149 | const char *event_prefix = ""; | ||
150 | |||
148 | if (is_kprobe) | 151 | if (is_kprobe) |
149 | event += 7; | 152 | event += 7; |
150 | else | 153 | else |
@@ -158,18 +161,33 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) | |||
158 | if (isdigit(*event)) | 161 | if (isdigit(*event)) |
159 | return populate_prog_array(event, fd); | 162 | return populate_prog_array(event, fd); |
160 | 163 | ||
161 | snprintf(buf, sizeof(buf), | 164 | #ifdef __x86_64__ |
162 | "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events", | 165 | if (strncmp(event, "sys_", 4) == 0) { |
163 | is_kprobe ? 'p' : 'r', event, event); | 166 | snprintf(buf, sizeof(buf), |
164 | err = system(buf); | 167 | "echo '%c:__x64_%s __x64_%s' >> /sys/kernel/debug/tracing/kprobe_events", |
165 | if (err < 0) { | 168 | is_kprobe ? 'p' : 'r', event, event); |
166 | printf("failed to create kprobe '%s' error '%s'\n", | 169 | err = system(buf); |
167 | event, strerror(errno)); | 170 | if (err >= 0) { |
168 | return -1; | 171 | need_normal_check = false; |
172 | event_prefix = "__x64_"; | ||
173 | } | ||
174 | } | ||
175 | #endif | ||
176 | if (need_normal_check) { | ||
177 | snprintf(buf, sizeof(buf), | ||
178 | "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events", | ||
179 | is_kprobe ? 'p' : 'r', event, event); | ||
180 | err = system(buf); | ||
181 | if (err < 0) { | ||
182 | printf("failed to create kprobe '%s' error '%s'\n", | ||
183 | event, strerror(errno)); | ||
184 | return -1; | ||
185 | } | ||
169 | } | 186 | } |
170 | 187 | ||
171 | strcpy(buf, DEBUGFS); | 188 | strcpy(buf, DEBUGFS); |
172 | strcat(buf, "events/kprobes/"); | 189 | strcat(buf, "events/kprobes/"); |
190 | strcat(buf, event_prefix); | ||
173 | strcat(buf, event); | 191 | strcat(buf, event); |
174 | strcat(buf, "/id"); | 192 | strcat(buf, "/id"); |
175 | } else if (is_tracepoint) { | 193 | } else if (is_tracepoint) { |
@@ -648,66 +666,3 @@ void read_trace_pipe(void) | |||
648 | } | 666 | } |
649 | } | 667 | } |
650 | } | 668 | } |
651 | |||
652 | #define MAX_SYMS 300000 | ||
653 | static struct ksym syms[MAX_SYMS]; | ||
654 | static int sym_cnt; | ||
655 | |||
656 | static int ksym_cmp(const void *p1, const void *p2) | ||
657 | { | ||
658 | return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr; | ||
659 | } | ||
660 | |||
661 | int load_kallsyms(void) | ||
662 | { | ||
663 | FILE *f = fopen("/proc/kallsyms", "r"); | ||
664 | char func[256], buf[256]; | ||
665 | char symbol; | ||
666 | void *addr; | ||
667 | int i = 0; | ||
668 | |||
669 | if (!f) | ||
670 | return -ENOENT; | ||
671 | |||
672 | while (!feof(f)) { | ||
673 | if (!fgets(buf, sizeof(buf), f)) | ||
674 | break; | ||
675 | if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3) | ||
676 | break; | ||
677 | if (!addr) | ||
678 | continue; | ||
679 | syms[i].addr = (long) addr; | ||
680 | syms[i].name = strdup(func); | ||
681 | i++; | ||
682 | } | ||
683 | sym_cnt = i; | ||
684 | qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp); | ||
685 | return 0; | ||
686 | } | ||
687 | |||
688 | struct ksym *ksym_search(long key) | ||
689 | { | ||
690 | int start = 0, end = sym_cnt; | ||
691 | int result; | ||
692 | |||
693 | while (start < end) { | ||
694 | size_t mid = start + (end - start) / 2; | ||
695 | |||
696 | result = key - syms[mid].addr; | ||
697 | if (result < 0) | ||
698 | end = mid; | ||
699 | else if (result > 0) | ||
700 | start = mid + 1; | ||
701 | else | ||
702 | return &syms[mid]; | ||
703 | } | ||
704 | |||
705 | if (start >= 1 && syms[start - 1].addr < key && | ||
706 | key < syms[start].addr) | ||
707 | /* valid ksym */ | ||
708 | return &syms[start - 1]; | ||
709 | |||
710 | /* out of range. return _stext */ | ||
711 | return &syms[0]; | ||
712 | } | ||
713 | |||
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h index 453c200b389b..2c3d0b448632 100644 --- a/samples/bpf/bpf_load.h +++ b/samples/bpf/bpf_load.h | |||
@@ -54,12 +54,5 @@ int load_bpf_file(char *path); | |||
54 | int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map); | 54 | int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map); |
55 | 55 | ||
56 | void read_trace_pipe(void); | 56 | void read_trace_pipe(void); |
57 | struct ksym { | ||
58 | long addr; | ||
59 | char *name; | ||
60 | }; | ||
61 | |||
62 | int load_kallsyms(void); | ||
63 | struct ksym *ksym_search(long key); | ||
64 | int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); | 57 | int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); |
65 | #endif | 58 | #endif |
diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c index 512f87a5fd20..f06063af9fcb 100644 --- a/samples/bpf/offwaketime_user.c +++ b/samples/bpf/offwaketime_user.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <sys/resource.h> | 17 | #include <sys/resource.h> |
18 | #include "libbpf.h" | 18 | #include "libbpf.h" |
19 | #include "bpf_load.h" | 19 | #include "bpf_load.h" |
20 | #include "trace_helpers.h" | ||
20 | 21 | ||
21 | #define PRINT_RAW_ADDR 0 | 22 | #define PRINT_RAW_ADDR 0 |
22 | 23 | ||
diff --git a/samples/bpf/sampleip_user.c b/samples/bpf/sampleip_user.c index 4ed690b907ff..60c2b73d1b4d 100644 --- a/samples/bpf/sampleip_user.c +++ b/samples/bpf/sampleip_user.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "libbpf.h" | 22 | #include "libbpf.h" |
23 | #include "bpf_load.h" | 23 | #include "bpf_load.h" |
24 | #include "perf-sys.h" | 24 | #include "perf-sys.h" |
25 | #include "trace_helpers.h" | ||
25 | 26 | ||
26 | #define DEFAULT_FREQ 99 | 27 | #define DEFAULT_FREQ 99 |
27 | #define DEFAULT_SECS 5 | 28 | #define DEFAULT_SECS 5 |
diff --git a/samples/bpf/spintest_user.c b/samples/bpf/spintest_user.c index 3d736219a31c..8d3e9cfa1909 100644 --- a/samples/bpf/spintest_user.c +++ b/samples/bpf/spintest_user.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <sys/resource.h> | 7 | #include <sys/resource.h> |
8 | #include "libbpf.h" | 8 | #include "libbpf.h" |
9 | #include "bpf_load.h" | 9 | #include "bpf_load.h" |
10 | #include "trace_helpers.h" | ||
10 | 11 | ||
11 | int main(int ac, char **argv) | 12 | int main(int ac, char **argv) |
12 | { | 13 | { |
diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c index 56f7a259a7c9..1fa1becfa641 100644 --- a/samples/bpf/trace_event_user.c +++ b/samples/bpf/trace_event_user.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include "libbpf.h" | 21 | #include "libbpf.h" |
22 | #include "bpf_load.h" | 22 | #include "bpf_load.h" |
23 | #include "perf-sys.h" | 23 | #include "perf-sys.h" |
24 | #include "trace_helpers.h" | ||
24 | 25 | ||
25 | #define SAMPLE_FREQ 50 | 26 | #define SAMPLE_FREQ 50 |
26 | 27 | ||
diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c index ccca1e348017..5e78c2ecd08d 100644 --- a/samples/bpf/trace_output_user.c +++ b/samples/bpf/trace_output_user.c | |||
@@ -21,100 +21,10 @@ | |||
21 | #include "libbpf.h" | 21 | #include "libbpf.h" |
22 | #include "bpf_load.h" | 22 | #include "bpf_load.h" |
23 | #include "perf-sys.h" | 23 | #include "perf-sys.h" |
24 | #include "trace_helpers.h" | ||
24 | 25 | ||
25 | static int pmu_fd; | 26 | static int pmu_fd; |
26 | 27 | ||
27 | int page_size; | ||
28 | int page_cnt = 8; | ||
29 | volatile struct perf_event_mmap_page *header; | ||
30 | |||
31 | typedef void (*print_fn)(void *data, int size); | ||
32 | |||
33 | static int perf_event_mmap(int fd) | ||
34 | { | ||
35 | void *base; | ||
36 | int mmap_size; | ||
37 | |||
38 | page_size = getpagesize(); | ||
39 | mmap_size = page_size * (page_cnt + 1); | ||
40 | |||
41 | base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); | ||
42 | if (base == MAP_FAILED) { | ||
43 | printf("mmap err\n"); | ||
44 | return -1; | ||
45 | } | ||
46 | |||
47 | header = base; | ||
48 | return 0; | ||
49 | } | ||
50 | |||
51 | static int perf_event_poll(int fd) | ||
52 | { | ||
53 | struct pollfd pfd = { .fd = fd, .events = POLLIN }; | ||
54 | |||
55 | return poll(&pfd, 1, 1000); | ||
56 | } | ||
57 | |||
58 | struct perf_event_sample { | ||
59 | struct perf_event_header header; | ||
60 | __u32 size; | ||
61 | char data[]; | ||
62 | }; | ||
63 | |||
64 | static void perf_event_read(print_fn fn) | ||
65 | { | ||
66 | __u64 data_tail = header->data_tail; | ||
67 | __u64 data_head = header->data_head; | ||
68 | __u64 buffer_size = page_cnt * page_size; | ||
69 | void *base, *begin, *end; | ||
70 | char buf[256]; | ||
71 | |||
72 | asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */ | ||
73 | if (data_head == data_tail) | ||
74 | return; | ||
75 | |||
76 | base = ((char *)header) + page_size; | ||
77 | |||
78 | begin = base + data_tail % buffer_size; | ||
79 | end = base + data_head % buffer_size; | ||
80 | |||
81 | while (begin != end) { | ||
82 | struct perf_event_sample *e; | ||
83 | |||
84 | e = begin; | ||
85 | if (begin + e->header.size > base + buffer_size) { | ||
86 | long len = base + buffer_size - begin; | ||
87 | |||
88 | assert(len < e->header.size); | ||
89 | memcpy(buf, begin, len); | ||
90 | memcpy(buf + len, base, e->header.size - len); | ||
91 | e = (void *) buf; | ||
92 | begin = base + e->header.size - len; | ||
93 | } else if (begin + e->header.size == base + buffer_size) { | ||
94 | begin = base; | ||
95 | } else { | ||
96 | begin += e->header.size; | ||
97 | } | ||
98 | |||
99 | if (e->header.type == PERF_RECORD_SAMPLE) { | ||
100 | fn(e->data, e->size); | ||
101 | } else if (e->header.type == PERF_RECORD_LOST) { | ||
102 | struct { | ||
103 | struct perf_event_header header; | ||
104 | __u64 id; | ||
105 | __u64 lost; | ||
106 | } *lost = (void *) e; | ||
107 | printf("lost %lld events\n", lost->lost); | ||
108 | } else { | ||
109 | printf("unknown event type=%d size=%d\n", | ||
110 | e->header.type, e->header.size); | ||
111 | } | ||
112 | } | ||
113 | |||
114 | __sync_synchronize(); /* smp_mb() */ | ||
115 | header->data_tail = data_head; | ||
116 | } | ||
117 | |||
118 | static __u64 time_get_ns(void) | 28 | static __u64 time_get_ns(void) |
119 | { | 29 | { |
120 | struct timespec ts; | 30 | struct timespec ts; |
@@ -127,7 +37,7 @@ static __u64 start_time; | |||
127 | 37 | ||
128 | #define MAX_CNT 100000ll | 38 | #define MAX_CNT 100000ll |
129 | 39 | ||
130 | static void print_bpf_output(void *data, int size) | 40 | static int print_bpf_output(void *data, int size) |
131 | { | 41 | { |
132 | static __u64 cnt; | 42 | static __u64 cnt; |
133 | struct { | 43 | struct { |
@@ -138,7 +48,7 @@ static void print_bpf_output(void *data, int size) | |||
138 | if (e->cookie != 0x12345678) { | 48 | if (e->cookie != 0x12345678) { |
139 | printf("BUG pid %llx cookie %llx sized %d\n", | 49 | printf("BUG pid %llx cookie %llx sized %d\n", |
140 | e->pid, e->cookie, size); | 50 | e->pid, e->cookie, size); |
141 | kill(0, SIGINT); | 51 | return PERF_EVENT_ERROR; |
142 | } | 52 | } |
143 | 53 | ||
144 | cnt++; | 54 | cnt++; |
@@ -146,8 +56,10 @@ static void print_bpf_output(void *data, int size) | |||
146 | if (cnt == MAX_CNT) { | 56 | if (cnt == MAX_CNT) { |
147 | printf("recv %lld events per sec\n", | 57 | printf("recv %lld events per sec\n", |
148 | MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); | 58 | MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); |
149 | kill(0, SIGINT); | 59 | return PERF_EVENT_DONE; |
150 | } | 60 | } |
61 | |||
62 | return PERF_EVENT_CONT; | ||
151 | } | 63 | } |
152 | 64 | ||
153 | static void test_bpf_perf_event(void) | 65 | static void test_bpf_perf_event(void) |
@@ -170,6 +82,7 @@ int main(int argc, char **argv) | |||
170 | { | 82 | { |
171 | char filename[256]; | 83 | char filename[256]; |
172 | FILE *f; | 84 | FILE *f; |
85 | int ret; | ||
173 | 86 | ||
174 | snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); | 87 | snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); |
175 | 88 | ||
@@ -187,10 +100,7 @@ int main(int argc, char **argv) | |||
187 | (void) f; | 100 | (void) f; |
188 | 101 | ||
189 | start_time = time_get_ns(); | 102 | start_time = time_get_ns(); |
190 | for (;;) { | 103 | ret = perf_event_poller(pmu_fd, print_bpf_output); |
191 | perf_event_poll(pmu_fd); | 104 | kill(0, SIGINT); |
192 | perf_event_read(print_bpf_output); | 105 | return ret; |
193 | } | ||
194 | |||
195 | return 0; | ||
196 | } | 106 | } |
diff --git a/samples/bpf/xdpsock.h b/samples/bpf/xdpsock.h new file mode 100644 index 000000000000..533ab81adfa1 --- /dev/null +++ b/samples/bpf/xdpsock.h | |||
@@ -0,0 +1,11 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | #ifndef XDPSOCK_H_ | ||
3 | #define XDPSOCK_H_ | ||
4 | |||
5 | /* Power-of-2 number of sockets */ | ||
6 | #define MAX_SOCKS 4 | ||
7 | |||
8 | /* Round-robin receive */ | ||
9 | #define RR_LB 0 | ||
10 | |||
11 | #endif /* XDPSOCK_H_ */ | ||
diff --git a/samples/bpf/xdpsock_kern.c b/samples/bpf/xdpsock_kern.c new file mode 100644 index 000000000000..d8806c41362e --- /dev/null +++ b/samples/bpf/xdpsock_kern.c | |||
@@ -0,0 +1,56 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | #define KBUILD_MODNAME "foo" | ||
3 | #include <uapi/linux/bpf.h> | ||
4 | #include "bpf_helpers.h" | ||
5 | |||
6 | #include "xdpsock.h" | ||
7 | |||
8 | struct bpf_map_def SEC("maps") qidconf_map = { | ||
9 | .type = BPF_MAP_TYPE_ARRAY, | ||
10 | .key_size = sizeof(int), | ||
11 | .value_size = sizeof(int), | ||
12 | .max_entries = 1, | ||
13 | }; | ||
14 | |||
15 | struct bpf_map_def SEC("maps") xsks_map = { | ||
16 | .type = BPF_MAP_TYPE_XSKMAP, | ||
17 | .key_size = sizeof(int), | ||
18 | .value_size = sizeof(int), | ||
19 | .max_entries = 4, | ||
20 | }; | ||
21 | |||
22 | struct bpf_map_def SEC("maps") rr_map = { | ||
23 | .type = BPF_MAP_TYPE_PERCPU_ARRAY, | ||
24 | .key_size = sizeof(int), | ||
25 | .value_size = sizeof(unsigned int), | ||
26 | .max_entries = 1, | ||
27 | }; | ||
28 | |||
29 | SEC("xdp_sock") | ||
30 | int xdp_sock_prog(struct xdp_md *ctx) | ||
31 | { | ||
32 | int *qidconf, key = 0, idx; | ||
33 | unsigned int *rr; | ||
34 | |||
35 | qidconf = bpf_map_lookup_elem(&qidconf_map, &key); | ||
36 | if (!qidconf) | ||
37 | return XDP_ABORTED; | ||
38 | |||
39 | if (*qidconf != ctx->rx_queue_index) | ||
40 | return XDP_PASS; | ||
41 | |||
42 | #if RR_LB /* NB! RR_LB is configured in xdpsock.h */ | ||
43 | rr = bpf_map_lookup_elem(&rr_map, &key); | ||
44 | if (!rr) | ||
45 | return XDP_ABORTED; | ||
46 | |||
47 | *rr = (*rr + 1) & (MAX_SOCKS - 1); | ||
48 | idx = *rr; | ||
49 | #else | ||
50 | idx = 0; | ||
51 | #endif | ||
52 | |||
53 | return bpf_redirect_map(&xsks_map, idx, 0); | ||
54 | } | ||
55 | |||
56 | char _license[] SEC("license") = "GPL"; | ||
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c new file mode 100644 index 000000000000..4b8a7cf3e63b --- /dev/null +++ b/samples/bpf/xdpsock_user.c | |||
@@ -0,0 +1,948 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* Copyright(c) 2017 - 2018 Intel Corporation. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #include <assert.h> | ||
15 | #include <errno.h> | ||
16 | #include <getopt.h> | ||
17 | #include <libgen.h> | ||
18 | #include <linux/bpf.h> | ||
19 | #include <linux/if_link.h> | ||
20 | #include <linux/if_xdp.h> | ||
21 | #include <linux/if_ether.h> | ||
22 | #include <net/if.h> | ||
23 | #include <signal.h> | ||
24 | #include <stdbool.h> | ||
25 | #include <stdio.h> | ||
26 | #include <stdlib.h> | ||
27 | #include <string.h> | ||
28 | #include <net/ethernet.h> | ||
29 | #include <sys/resource.h> | ||
30 | #include <sys/socket.h> | ||
31 | #include <sys/mman.h> | ||
32 | #include <time.h> | ||
33 | #include <unistd.h> | ||
34 | #include <pthread.h> | ||
35 | #include <locale.h> | ||
36 | #include <sys/types.h> | ||
37 | #include <poll.h> | ||
38 | |||
39 | #include "bpf_load.h" | ||
40 | #include "bpf_util.h" | ||
41 | #include "libbpf.h" | ||
42 | |||
43 | #include "xdpsock.h" | ||
44 | |||
45 | #ifndef SOL_XDP | ||
46 | #define SOL_XDP 283 | ||
47 | #endif | ||
48 | |||
49 | #ifndef AF_XDP | ||
50 | #define AF_XDP 44 | ||
51 | #endif | ||
52 | |||
53 | #ifndef PF_XDP | ||
54 | #define PF_XDP AF_XDP | ||
55 | #endif | ||
56 | |||
57 | #define NUM_FRAMES 131072 | ||
58 | #define FRAME_HEADROOM 0 | ||
59 | #define FRAME_SIZE 2048 | ||
60 | #define NUM_DESCS 1024 | ||
61 | #define BATCH_SIZE 16 | ||
62 | |||
63 | #define FQ_NUM_DESCS 1024 | ||
64 | #define CQ_NUM_DESCS 1024 | ||
65 | |||
66 | #define DEBUG_HEXDUMP 0 | ||
67 | |||
68 | typedef __u32 u32; | ||
69 | |||
70 | static unsigned long prev_time; | ||
71 | |||
72 | enum benchmark_type { | ||
73 | BENCH_RXDROP = 0, | ||
74 | BENCH_TXONLY = 1, | ||
75 | BENCH_L2FWD = 2, | ||
76 | }; | ||
77 | |||
78 | static enum benchmark_type opt_bench = BENCH_RXDROP; | ||
79 | static u32 opt_xdp_flags; | ||
80 | static const char *opt_if = ""; | ||
81 | static int opt_ifindex; | ||
82 | static int opt_queue; | ||
83 | static int opt_poll; | ||
84 | static int opt_shared_packet_buffer; | ||
85 | static int opt_interval = 1; | ||
86 | |||
87 | struct xdp_umem_uqueue { | ||
88 | u32 cached_prod; | ||
89 | u32 cached_cons; | ||
90 | u32 mask; | ||
91 | u32 size; | ||
92 | struct xdp_umem_ring *ring; | ||
93 | }; | ||
94 | |||
95 | struct xdp_umem { | ||
96 | char (*frames)[FRAME_SIZE]; | ||
97 | struct xdp_umem_uqueue fq; | ||
98 | struct xdp_umem_uqueue cq; | ||
99 | int fd; | ||
100 | }; | ||
101 | |||
102 | struct xdp_uqueue { | ||
103 | u32 cached_prod; | ||
104 | u32 cached_cons; | ||
105 | u32 mask; | ||
106 | u32 size; | ||
107 | struct xdp_rxtx_ring *ring; | ||
108 | }; | ||
109 | |||
110 | struct xdpsock { | ||
111 | struct xdp_uqueue rx; | ||
112 | struct xdp_uqueue tx; | ||
113 | int sfd; | ||
114 | struct xdp_umem *umem; | ||
115 | u32 outstanding_tx; | ||
116 | unsigned long rx_npkts; | ||
117 | unsigned long tx_npkts; | ||
118 | unsigned long prev_rx_npkts; | ||
119 | unsigned long prev_tx_npkts; | ||
120 | }; | ||
121 | |||
122 | #define MAX_SOCKS 4 | ||
123 | static int num_socks; | ||
124 | struct xdpsock *xsks[MAX_SOCKS]; | ||
125 | |||
126 | static unsigned long get_nsecs(void) | ||
127 | { | ||
128 | struct timespec ts; | ||
129 | |||
130 | clock_gettime(CLOCK_MONOTONIC, &ts); | ||
131 | return ts.tv_sec * 1000000000UL + ts.tv_nsec; | ||
132 | } | ||
133 | |||
134 | static void dump_stats(void); | ||
135 | |||
136 | #define lassert(expr) \ | ||
137 | do { \ | ||
138 | if (!(expr)) { \ | ||
139 | fprintf(stderr, "%s:%s:%i: Assertion failed: " \ | ||
140 | #expr ": errno: %d/\"%s\"\n", \ | ||
141 | __FILE__, __func__, __LINE__, \ | ||
142 | errno, strerror(errno)); \ | ||
143 | dump_stats(); \ | ||
144 | exit(EXIT_FAILURE); \ | ||
145 | } \ | ||
146 | } while (0) | ||
147 | |||
148 | #define barrier() __asm__ __volatile__("": : :"memory") | ||
149 | #define u_smp_rmb() barrier() | ||
150 | #define u_smp_wmb() barrier() | ||
151 | #define likely(x) __builtin_expect(!!(x), 1) | ||
152 | #define unlikely(x) __builtin_expect(!!(x), 0) | ||
153 | |||
154 | static const char pkt_data[] = | ||
155 | "\x3c\xfd\xfe\x9e\x7f\x71\xec\xb1\xd7\x98\x3a\xc0\x08\x00\x45\x00" | ||
156 | "\x00\x2e\x00\x00\x00\x00\x40\x11\x88\x97\x05\x08\x07\x08\xc8\x14" | ||
157 | "\x1e\x04\x10\x92\x10\x92\x00\x1a\x6d\xa3\x34\x33\x1f\x69\x40\x6b" | ||
158 | "\x54\x59\xb6\x14\x2d\x11\x44\xbf\xaf\xd9\xbe\xaa"; | ||
159 | |||
160 | static inline u32 umem_nb_free(struct xdp_umem_uqueue *q, u32 nb) | ||
161 | { | ||
162 | u32 free_entries = q->size - (q->cached_prod - q->cached_cons); | ||
163 | |||
164 | if (free_entries >= nb) | ||
165 | return free_entries; | ||
166 | |||
167 | /* Refresh the local tail pointer */ | ||
168 | q->cached_cons = q->ring->ptrs.consumer; | ||
169 | |||
170 | return q->size - (q->cached_prod - q->cached_cons); | ||
171 | } | ||
172 | |||
173 | static inline u32 xq_nb_free(struct xdp_uqueue *q, u32 ndescs) | ||
174 | { | ||
175 | u32 free_entries = q->cached_cons - q->cached_prod; | ||
176 | |||
177 | if (free_entries >= ndescs) | ||
178 | return free_entries; | ||
179 | |||
180 | /* Refresh the local tail pointer */ | ||
181 | q->cached_cons = q->ring->ptrs.consumer + q->size; | ||
182 | return q->cached_cons - q->cached_prod; | ||
183 | } | ||
184 | |||
185 | static inline u32 umem_nb_avail(struct xdp_umem_uqueue *q, u32 nb) | ||
186 | { | ||
187 | u32 entries = q->cached_prod - q->cached_cons; | ||
188 | |||
189 | if (entries == 0) { | ||
190 | q->cached_prod = q->ring->ptrs.producer; | ||
191 | entries = q->cached_prod - q->cached_cons; | ||
192 | } | ||
193 | |||
194 | return (entries > nb) ? nb : entries; | ||
195 | } | ||
196 | |||
197 | static inline u32 xq_nb_avail(struct xdp_uqueue *q, u32 ndescs) | ||
198 | { | ||
199 | u32 entries = q->cached_prod - q->cached_cons; | ||
200 | |||
201 | if (entries == 0) { | ||
202 | q->cached_prod = q->ring->ptrs.producer; | ||
203 | entries = q->cached_prod - q->cached_cons; | ||
204 | } | ||
205 | |||
206 | return (entries > ndescs) ? ndescs : entries; | ||
207 | } | ||
208 | |||
209 | static inline int umem_fill_to_kernel_ex(struct xdp_umem_uqueue *fq, | ||
210 | struct xdp_desc *d, | ||
211 | size_t nb) | ||
212 | { | ||
213 | u32 i; | ||
214 | |||
215 | if (umem_nb_free(fq, nb) < nb) | ||
216 | return -ENOSPC; | ||
217 | |||
218 | for (i = 0; i < nb; i++) { | ||
219 | u32 idx = fq->cached_prod++ & fq->mask; | ||
220 | |||
221 | fq->ring->desc[idx] = d[i].idx; | ||
222 | } | ||
223 | |||
224 | u_smp_wmb(); | ||
225 | |||
226 | fq->ring->ptrs.producer = fq->cached_prod; | ||
227 | |||
228 | return 0; | ||
229 | } | ||
230 | |||
231 | static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u32 *d, | ||
232 | size_t nb) | ||
233 | { | ||
234 | u32 i; | ||
235 | |||
236 | if (umem_nb_free(fq, nb) < nb) | ||
237 | return -ENOSPC; | ||
238 | |||
239 | for (i = 0; i < nb; i++) { | ||
240 | u32 idx = fq->cached_prod++ & fq->mask; | ||
241 | |||
242 | fq->ring->desc[idx] = d[i]; | ||
243 | } | ||
244 | |||
245 | u_smp_wmb(); | ||
246 | |||
247 | fq->ring->ptrs.producer = fq->cached_prod; | ||
248 | |||
249 | return 0; | ||
250 | } | ||
251 | |||
252 | static inline size_t umem_complete_from_kernel(struct xdp_umem_uqueue *cq, | ||
253 | u32 *d, size_t nb) | ||
254 | { | ||
255 | u32 idx, i, entries = umem_nb_avail(cq, nb); | ||
256 | |||
257 | u_smp_rmb(); | ||
258 | |||
259 | for (i = 0; i < entries; i++) { | ||
260 | idx = cq->cached_cons++ & cq->mask; | ||
261 | d[i] = cq->ring->desc[idx]; | ||
262 | } | ||
263 | |||
264 | if (entries > 0) { | ||
265 | u_smp_wmb(); | ||
266 | |||
267 | cq->ring->ptrs.consumer = cq->cached_cons; | ||
268 | } | ||
269 | |||
270 | return entries; | ||
271 | } | ||
272 | |||
273 | static inline void *xq_get_data(struct xdpsock *xsk, __u32 idx, __u32 off) | ||
274 | { | ||
275 | lassert(idx < NUM_FRAMES); | ||
276 | return &xsk->umem->frames[idx][off]; | ||
277 | } | ||
278 | |||
279 | static inline int xq_enq(struct xdp_uqueue *uq, | ||
280 | const struct xdp_desc *descs, | ||
281 | unsigned int ndescs) | ||
282 | { | ||
283 | struct xdp_rxtx_ring *r = uq->ring; | ||
284 | unsigned int i; | ||
285 | |||
286 | if (xq_nb_free(uq, ndescs) < ndescs) | ||
287 | return -ENOSPC; | ||
288 | |||
289 | for (i = 0; i < ndescs; i++) { | ||
290 | u32 idx = uq->cached_prod++ & uq->mask; | ||
291 | |||
292 | r->desc[idx].idx = descs[i].idx; | ||
293 | r->desc[idx].len = descs[i].len; | ||
294 | r->desc[idx].offset = descs[i].offset; | ||
295 | } | ||
296 | |||
297 | u_smp_wmb(); | ||
298 | |||
299 | r->ptrs.producer = uq->cached_prod; | ||
300 | return 0; | ||
301 | } | ||
302 | |||
303 | static inline int xq_enq_tx_only(struct xdp_uqueue *uq, | ||
304 | __u32 idx, unsigned int ndescs) | ||
305 | { | ||
306 | struct xdp_rxtx_ring *q = uq->ring; | ||
307 | unsigned int i; | ||
308 | |||
309 | if (xq_nb_free(uq, ndescs) < ndescs) | ||
310 | return -ENOSPC; | ||
311 | |||
312 | for (i = 0; i < ndescs; i++) { | ||
313 | u32 idx = uq->cached_prod++ & uq->mask; | ||
314 | |||
315 | q->desc[idx].idx = idx + i; | ||
316 | q->desc[idx].len = sizeof(pkt_data) - 1; | ||
317 | q->desc[idx].offset = 0; | ||
318 | } | ||
319 | |||
320 | u_smp_wmb(); | ||
321 | |||
322 | q->ptrs.producer = uq->cached_prod; | ||
323 | return 0; | ||
324 | } | ||
325 | |||
326 | static inline int xq_deq(struct xdp_uqueue *uq, | ||
327 | struct xdp_desc *descs, | ||
328 | int ndescs) | ||
329 | { | ||
330 | struct xdp_rxtx_ring *r = uq->ring; | ||
331 | unsigned int idx; | ||
332 | int i, entries; | ||
333 | |||
334 | entries = xq_nb_avail(uq, ndescs); | ||
335 | |||
336 | u_smp_rmb(); | ||
337 | |||
338 | for (i = 0; i < entries; i++) { | ||
339 | idx = uq->cached_cons++ & uq->mask; | ||
340 | descs[i] = r->desc[idx]; | ||
341 | } | ||
342 | |||
343 | if (entries > 0) { | ||
344 | u_smp_wmb(); | ||
345 | |||
346 | r->ptrs.consumer = uq->cached_cons; | ||
347 | } | ||
348 | |||
349 | return entries; | ||
350 | } | ||
351 | |||
352 | static void swap_mac_addresses(void *data) | ||
353 | { | ||
354 | struct ether_header *eth = (struct ether_header *)data; | ||
355 | struct ether_addr *src_addr = (struct ether_addr *)ð->ether_shost; | ||
356 | struct ether_addr *dst_addr = (struct ether_addr *)ð->ether_dhost; | ||
357 | struct ether_addr tmp; | ||
358 | |||
359 | tmp = *src_addr; | ||
360 | *src_addr = *dst_addr; | ||
361 | *dst_addr = tmp; | ||
362 | } | ||
363 | |||
364 | #if DEBUG_HEXDUMP | ||
365 | static void hex_dump(void *pkt, size_t length, const char *prefix) | ||
366 | { | ||
367 | int i = 0; | ||
368 | const unsigned char *address = (unsigned char *)pkt; | ||
369 | const unsigned char *line = address; | ||
370 | size_t line_size = 32; | ||
371 | unsigned char c; | ||
372 | |||
373 | printf("length = %zu\n", length); | ||
374 | printf("%s | ", prefix); | ||
375 | while (length-- > 0) { | ||
376 | printf("%02X ", *address++); | ||
377 | if (!(++i % line_size) || (length == 0 && i % line_size)) { | ||
378 | if (length == 0) { | ||
379 | while (i++ % line_size) | ||
380 | printf("__ "); | ||
381 | } | ||
382 | printf(" | "); /* right close */ | ||
383 | while (line < address) { | ||
384 | c = *line++; | ||
385 | printf("%c", (c < 33 || c == 255) ? 0x2E : c); | ||
386 | } | ||
387 | printf("\n"); | ||
388 | if (length > 0) | ||
389 | printf("%s | ", prefix); | ||
390 | } | ||
391 | } | ||
392 | printf("\n"); | ||
393 | } | ||
394 | #endif | ||
395 | |||
396 | static size_t gen_eth_frame(char *frame) | ||
397 | { | ||
398 | memcpy(frame, pkt_data, sizeof(pkt_data) - 1); | ||
399 | return sizeof(pkt_data) - 1; | ||
400 | } | ||
401 | |||
402 | static struct xdp_umem *xdp_umem_configure(int sfd) | ||
403 | { | ||
404 | int fq_size = FQ_NUM_DESCS, cq_size = CQ_NUM_DESCS; | ||
405 | struct xdp_umem_reg mr; | ||
406 | struct xdp_umem *umem; | ||
407 | void *bufs; | ||
408 | |||
409 | umem = calloc(1, sizeof(*umem)); | ||
410 | lassert(umem); | ||
411 | |||
412 | lassert(posix_memalign(&bufs, getpagesize(), /* PAGE_SIZE aligned */ | ||
413 | NUM_FRAMES * FRAME_SIZE) == 0); | ||
414 | |||
415 | mr.addr = (__u64)bufs; | ||
416 | mr.len = NUM_FRAMES * FRAME_SIZE; | ||
417 | mr.frame_size = FRAME_SIZE; | ||
418 | mr.frame_headroom = FRAME_HEADROOM; | ||
419 | |||
420 | lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)) == 0); | ||
421 | lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_FILL_RING, &fq_size, | ||
422 | sizeof(int)) == 0); | ||
423 | lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &cq_size, | ||
424 | sizeof(int)) == 0); | ||
425 | |||
426 | umem->fq.ring = mmap(0, sizeof(struct xdp_umem_ring) + | ||
427 | FQ_NUM_DESCS * sizeof(u32), | ||
428 | PROT_READ | PROT_WRITE, | ||
429 | MAP_SHARED | MAP_POPULATE, sfd, | ||
430 | XDP_UMEM_PGOFF_FILL_RING); | ||
431 | lassert(umem->fq.ring != MAP_FAILED); | ||
432 | |||
433 | umem->fq.mask = FQ_NUM_DESCS - 1; | ||
434 | umem->fq.size = FQ_NUM_DESCS; | ||
435 | |||
436 | umem->cq.ring = mmap(0, sizeof(struct xdp_umem_ring) + | ||
437 | CQ_NUM_DESCS * sizeof(u32), | ||
438 | PROT_READ | PROT_WRITE, | ||
439 | MAP_SHARED | MAP_POPULATE, sfd, | ||
440 | XDP_UMEM_PGOFF_COMPLETION_RING); | ||
441 | lassert(umem->cq.ring != MAP_FAILED); | ||
442 | |||
443 | umem->cq.mask = CQ_NUM_DESCS - 1; | ||
444 | umem->cq.size = CQ_NUM_DESCS; | ||
445 | |||
446 | umem->frames = (char (*)[FRAME_SIZE])bufs; | ||
447 | umem->fd = sfd; | ||
448 | |||
449 | if (opt_bench == BENCH_TXONLY) { | ||
450 | int i; | ||
451 | |||
452 | for (i = 0; i < NUM_FRAMES; i++) | ||
453 | (void)gen_eth_frame(&umem->frames[i][0]); | ||
454 | } | ||
455 | |||
456 | return umem; | ||
457 | } | ||
458 | |||
459 | static struct xdpsock *xsk_configure(struct xdp_umem *umem) | ||
460 | { | ||
461 | struct sockaddr_xdp sxdp = {}; | ||
462 | int sfd, ndescs = NUM_DESCS; | ||
463 | struct xdpsock *xsk; | ||
464 | bool shared = true; | ||
465 | u32 i; | ||
466 | |||
467 | sfd = socket(PF_XDP, SOCK_RAW, 0); | ||
468 | lassert(sfd >= 0); | ||
469 | |||
470 | xsk = calloc(1, sizeof(*xsk)); | ||
471 | lassert(xsk); | ||
472 | |||
473 | xsk->sfd = sfd; | ||
474 | xsk->outstanding_tx = 0; | ||
475 | |||
476 | if (!umem) { | ||
477 | shared = false; | ||
478 | xsk->umem = xdp_umem_configure(sfd); | ||
479 | } else { | ||
480 | xsk->umem = umem; | ||
481 | } | ||
482 | |||
483 | lassert(setsockopt(sfd, SOL_XDP, XDP_RX_RING, | ||
484 | &ndescs, sizeof(int)) == 0); | ||
485 | lassert(setsockopt(sfd, SOL_XDP, XDP_TX_RING, | ||
486 | &ndescs, sizeof(int)) == 0); | ||
487 | |||
488 | /* Rx */ | ||
489 | xsk->rx.ring = mmap(NULL, | ||
490 | sizeof(struct xdp_ring) + | ||
491 | NUM_DESCS * sizeof(struct xdp_desc), | ||
492 | PROT_READ | PROT_WRITE, | ||
493 | MAP_SHARED | MAP_POPULATE, sfd, | ||
494 | XDP_PGOFF_RX_RING); | ||
495 | lassert(xsk->rx.ring != MAP_FAILED); | ||
496 | |||
497 | if (!shared) { | ||
498 | for (i = 0; i < NUM_DESCS / 2; i++) | ||
499 | lassert(umem_fill_to_kernel(&xsk->umem->fq, &i, 1) | ||
500 | == 0); | ||
501 | } | ||
502 | |||
503 | /* Tx */ | ||
504 | xsk->tx.ring = mmap(NULL, | ||
505 | sizeof(struct xdp_ring) + | ||
506 | NUM_DESCS * sizeof(struct xdp_desc), | ||
507 | PROT_READ | PROT_WRITE, | ||
508 | MAP_SHARED | MAP_POPULATE, sfd, | ||
509 | XDP_PGOFF_TX_RING); | ||
510 | lassert(xsk->tx.ring != MAP_FAILED); | ||
511 | |||
512 | xsk->rx.mask = NUM_DESCS - 1; | ||
513 | xsk->rx.size = NUM_DESCS; | ||
514 | |||
515 | xsk->tx.mask = NUM_DESCS - 1; | ||
516 | xsk->tx.size = NUM_DESCS; | ||
517 | |||
518 | sxdp.sxdp_family = PF_XDP; | ||
519 | sxdp.sxdp_ifindex = opt_ifindex; | ||
520 | sxdp.sxdp_queue_id = opt_queue; | ||
521 | if (shared) { | ||
522 | sxdp.sxdp_flags = XDP_SHARED_UMEM; | ||
523 | sxdp.sxdp_shared_umem_fd = umem->fd; | ||
524 | } | ||
525 | |||
526 | lassert(bind(sfd, (struct sockaddr *)&sxdp, sizeof(sxdp)) == 0); | ||
527 | |||
528 | return xsk; | ||
529 | } | ||
530 | |||
531 | static void print_benchmark(bool running) | ||
532 | { | ||
533 | const char *bench_str = "INVALID"; | ||
534 | |||
535 | if (opt_bench == BENCH_RXDROP) | ||
536 | bench_str = "rxdrop"; | ||
537 | else if (opt_bench == BENCH_TXONLY) | ||
538 | bench_str = "txonly"; | ||
539 | else if (opt_bench == BENCH_L2FWD) | ||
540 | bench_str = "l2fwd"; | ||
541 | |||
542 | printf("%s:%d %s ", opt_if, opt_queue, bench_str); | ||
543 | if (opt_xdp_flags & XDP_FLAGS_SKB_MODE) | ||
544 | printf("xdp-skb "); | ||
545 | else if (opt_xdp_flags & XDP_FLAGS_DRV_MODE) | ||
546 | printf("xdp-drv "); | ||
547 | else | ||
548 | printf(" "); | ||
549 | |||
550 | if (opt_poll) | ||
551 | printf("poll() "); | ||
552 | |||
553 | if (running) { | ||
554 | printf("running..."); | ||
555 | fflush(stdout); | ||
556 | } | ||
557 | } | ||
558 | |||
559 | static void dump_stats(void) | ||
560 | { | ||
561 | unsigned long now = get_nsecs(); | ||
562 | long dt = now - prev_time; | ||
563 | int i; | ||
564 | |||
565 | prev_time = now; | ||
566 | |||
567 | for (i = 0; i < num_socks; i++) { | ||
568 | char *fmt = "%-15s %'-11.0f %'-11lu\n"; | ||
569 | double rx_pps, tx_pps; | ||
570 | |||
571 | rx_pps = (xsks[i]->rx_npkts - xsks[i]->prev_rx_npkts) * | ||
572 | 1000000000. / dt; | ||
573 | tx_pps = (xsks[i]->tx_npkts - xsks[i]->prev_tx_npkts) * | ||
574 | 1000000000. / dt; | ||
575 | |||
576 | printf("\n sock%d@", i); | ||
577 | print_benchmark(false); | ||
578 | printf("\n"); | ||
579 | |||
580 | printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts", | ||
581 | dt / 1000000000.); | ||
582 | printf(fmt, "rx", rx_pps, xsks[i]->rx_npkts); | ||
583 | printf(fmt, "tx", tx_pps, xsks[i]->tx_npkts); | ||
584 | |||
585 | xsks[i]->prev_rx_npkts = xsks[i]->rx_npkts; | ||
586 | xsks[i]->prev_tx_npkts = xsks[i]->tx_npkts; | ||
587 | } | ||
588 | } | ||
589 | |||
590 | static void *poller(void *arg) | ||
591 | { | ||
592 | (void)arg; | ||
593 | for (;;) { | ||
594 | sleep(opt_interval); | ||
595 | dump_stats(); | ||
596 | } | ||
597 | |||
598 | return NULL; | ||
599 | } | ||
600 | |||
601 | static void int_exit(int sig) | ||
602 | { | ||
603 | (void)sig; | ||
604 | dump_stats(); | ||
605 | bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags); | ||
606 | exit(EXIT_SUCCESS); | ||
607 | } | ||
608 | |||
609 | static struct option long_options[] = { | ||
610 | {"rxdrop", no_argument, 0, 'r'}, | ||
611 | {"txonly", no_argument, 0, 't'}, | ||
612 | {"l2fwd", no_argument, 0, 'l'}, | ||
613 | {"interface", required_argument, 0, 'i'}, | ||
614 | {"queue", required_argument, 0, 'q'}, | ||
615 | {"poll", no_argument, 0, 'p'}, | ||
616 | {"shared-buffer", no_argument, 0, 's'}, | ||
617 | {"xdp-skb", no_argument, 0, 'S'}, | ||
618 | {"xdp-native", no_argument, 0, 'N'}, | ||
619 | {"interval", required_argument, 0, 'n'}, | ||
620 | {0, 0, 0, 0} | ||
621 | }; | ||
622 | |||
623 | static void usage(const char *prog) | ||
624 | { | ||
625 | const char *str = | ||
626 | " Usage: %s [OPTIONS]\n" | ||
627 | " Options:\n" | ||
628 | " -r, --rxdrop Discard all incoming packets (default)\n" | ||
629 | " -t, --txonly Only send packets\n" | ||
630 | " -l, --l2fwd MAC swap L2 forwarding\n" | ||
631 | " -i, --interface=n Run on interface n\n" | ||
632 | " -q, --queue=n Use queue n (default 0)\n" | ||
633 | " -p, --poll Use poll syscall\n" | ||
634 | " -s, --shared-buffer Use shared packet buffer\n" | ||
635 | " -S, --xdp-skb=n Use XDP skb-mod\n" | ||
636 | " -N, --xdp-native=n Enfore XDP native mode\n" | ||
637 | " -n, --interval=n Specify statistics update interval (default 1 sec).\n" | ||
638 | "\n"; | ||
639 | fprintf(stderr, str, prog); | ||
640 | exit(EXIT_FAILURE); | ||
641 | } | ||
642 | |||
643 | static void parse_command_line(int argc, char **argv) | ||
644 | { | ||
645 | int option_index, c; | ||
646 | |||
647 | opterr = 0; | ||
648 | |||
649 | for (;;) { | ||
650 | c = getopt_long(argc, argv, "rtli:q:psSNn:", long_options, | ||
651 | &option_index); | ||
652 | if (c == -1) | ||
653 | break; | ||
654 | |||
655 | switch (c) { | ||
656 | case 'r': | ||
657 | opt_bench = BENCH_RXDROP; | ||
658 | break; | ||
659 | case 't': | ||
660 | opt_bench = BENCH_TXONLY; | ||
661 | break; | ||
662 | case 'l': | ||
663 | opt_bench = BENCH_L2FWD; | ||
664 | break; | ||
665 | case 'i': | ||
666 | opt_if = optarg; | ||
667 | break; | ||
668 | case 'q': | ||
669 | opt_queue = atoi(optarg); | ||
670 | break; | ||
671 | case 's': | ||
672 | opt_shared_packet_buffer = 1; | ||
673 | break; | ||
674 | case 'p': | ||
675 | opt_poll = 1; | ||
676 | break; | ||
677 | case 'S': | ||
678 | opt_xdp_flags |= XDP_FLAGS_SKB_MODE; | ||
679 | break; | ||
680 | case 'N': | ||
681 | opt_xdp_flags |= XDP_FLAGS_DRV_MODE; | ||
682 | break; | ||
683 | case 'n': | ||
684 | opt_interval = atoi(optarg); | ||
685 | break; | ||
686 | default: | ||
687 | usage(basename(argv[0])); | ||
688 | } | ||
689 | } | ||
690 | |||
691 | opt_ifindex = if_nametoindex(opt_if); | ||
692 | if (!opt_ifindex) { | ||
693 | fprintf(stderr, "ERROR: interface \"%s\" does not exist\n", | ||
694 | opt_if); | ||
695 | usage(basename(argv[0])); | ||
696 | } | ||
697 | } | ||
698 | |||
699 | static void kick_tx(int fd) | ||
700 | { | ||
701 | int ret; | ||
702 | |||
703 | ret = sendto(fd, NULL, 0, MSG_DONTWAIT, NULL, 0); | ||
704 | if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN) | ||
705 | return; | ||
706 | lassert(0); | ||
707 | } | ||
708 | |||
709 | static inline void complete_tx_l2fwd(struct xdpsock *xsk) | ||
710 | { | ||
711 | u32 descs[BATCH_SIZE]; | ||
712 | unsigned int rcvd; | ||
713 | size_t ndescs; | ||
714 | |||
715 | if (!xsk->outstanding_tx) | ||
716 | return; | ||
717 | |||
718 | kick_tx(xsk->sfd); | ||
719 | ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE : | ||
720 | xsk->outstanding_tx; | ||
721 | |||
722 | /* re-add completed Tx buffers */ | ||
723 | rcvd = umem_complete_from_kernel(&xsk->umem->cq, descs, ndescs); | ||
724 | if (rcvd > 0) { | ||
725 | umem_fill_to_kernel(&xsk->umem->fq, descs, rcvd); | ||
726 | xsk->outstanding_tx -= rcvd; | ||
727 | xsk->tx_npkts += rcvd; | ||
728 | } | ||
729 | } | ||
730 | |||
731 | static inline void complete_tx_only(struct xdpsock *xsk) | ||
732 | { | ||
733 | u32 descs[BATCH_SIZE]; | ||
734 | unsigned int rcvd; | ||
735 | |||
736 | if (!xsk->outstanding_tx) | ||
737 | return; | ||
738 | |||
739 | kick_tx(xsk->sfd); | ||
740 | |||
741 | rcvd = umem_complete_from_kernel(&xsk->umem->cq, descs, BATCH_SIZE); | ||
742 | if (rcvd > 0) { | ||
743 | xsk->outstanding_tx -= rcvd; | ||
744 | xsk->tx_npkts += rcvd; | ||
745 | } | ||
746 | } | ||
747 | |||
748 | static void rx_drop(struct xdpsock *xsk) | ||
749 | { | ||
750 | struct xdp_desc descs[BATCH_SIZE]; | ||
751 | unsigned int rcvd, i; | ||
752 | |||
753 | rcvd = xq_deq(&xsk->rx, descs, BATCH_SIZE); | ||
754 | if (!rcvd) | ||
755 | return; | ||
756 | |||
757 | for (i = 0; i < rcvd; i++) { | ||
758 | u32 idx = descs[i].idx; | ||
759 | |||
760 | lassert(idx < NUM_FRAMES); | ||
761 | #if DEBUG_HEXDUMP | ||
762 | char *pkt; | ||
763 | char buf[32]; | ||
764 | |||
765 | pkt = xq_get_data(xsk, idx, descs[i].offset); | ||
766 | sprintf(buf, "idx=%d", idx); | ||
767 | hex_dump(pkt, descs[i].len, buf); | ||
768 | #endif | ||
769 | } | ||
770 | |||
771 | xsk->rx_npkts += rcvd; | ||
772 | |||
773 | umem_fill_to_kernel_ex(&xsk->umem->fq, descs, rcvd); | ||
774 | } | ||
775 | |||
776 | static void rx_drop_all(void) | ||
777 | { | ||
778 | struct pollfd fds[MAX_SOCKS + 1]; | ||
779 | int i, ret, timeout, nfds = 1; | ||
780 | |||
781 | memset(fds, 0, sizeof(fds)); | ||
782 | |||
783 | for (i = 0; i < num_socks; i++) { | ||
784 | fds[i].fd = xsks[i]->sfd; | ||
785 | fds[i].events = POLLIN; | ||
786 | timeout = 1000; /* 1sn */ | ||
787 | } | ||
788 | |||
789 | for (;;) { | ||
790 | if (opt_poll) { | ||
791 | ret = poll(fds, nfds, timeout); | ||
792 | if (ret <= 0) | ||
793 | continue; | ||
794 | } | ||
795 | |||
796 | for (i = 0; i < num_socks; i++) | ||
797 | rx_drop(xsks[i]); | ||
798 | } | ||
799 | } | ||
800 | |||
801 | static void tx_only(struct xdpsock *xsk) | ||
802 | { | ||
803 | int timeout, ret, nfds = 1; | ||
804 | struct pollfd fds[nfds + 1]; | ||
805 | unsigned int idx = 0; | ||
806 | |||
807 | memset(fds, 0, sizeof(fds)); | ||
808 | fds[0].fd = xsk->sfd; | ||
809 | fds[0].events = POLLOUT; | ||
810 | timeout = 1000; /* 1sn */ | ||
811 | |||
812 | for (;;) { | ||
813 | if (opt_poll) { | ||
814 | ret = poll(fds, nfds, timeout); | ||
815 | if (ret <= 0) | ||
816 | continue; | ||
817 | |||
818 | if (fds[0].fd != xsk->sfd || | ||
819 | !(fds[0].revents & POLLOUT)) | ||
820 | continue; | ||
821 | } | ||
822 | |||
823 | if (xq_nb_free(&xsk->tx, BATCH_SIZE) >= BATCH_SIZE) { | ||
824 | lassert(xq_enq_tx_only(&xsk->tx, idx, BATCH_SIZE) == 0); | ||
825 | |||
826 | xsk->outstanding_tx += BATCH_SIZE; | ||
827 | idx += BATCH_SIZE; | ||
828 | idx %= NUM_FRAMES; | ||
829 | } | ||
830 | |||
831 | complete_tx_only(xsk); | ||
832 | } | ||
833 | } | ||
834 | |||
835 | static void l2fwd(struct xdpsock *xsk) | ||
836 | { | ||
837 | for (;;) { | ||
838 | struct xdp_desc descs[BATCH_SIZE]; | ||
839 | unsigned int rcvd, i; | ||
840 | int ret; | ||
841 | |||
842 | for (;;) { | ||
843 | complete_tx_l2fwd(xsk); | ||
844 | |||
845 | rcvd = xq_deq(&xsk->rx, descs, BATCH_SIZE); | ||
846 | if (rcvd > 0) | ||
847 | break; | ||
848 | } | ||
849 | |||
850 | for (i = 0; i < rcvd; i++) { | ||
851 | char *pkt = xq_get_data(xsk, descs[i].idx, | ||
852 | descs[i].offset); | ||
853 | |||
854 | swap_mac_addresses(pkt); | ||
855 | #if DEBUG_HEXDUMP | ||
856 | char buf[32]; | ||
857 | u32 idx = descs[i].idx; | ||
858 | |||
859 | sprintf(buf, "idx=%d", idx); | ||
860 | hex_dump(pkt, descs[i].len, buf); | ||
861 | #endif | ||
862 | } | ||
863 | |||
864 | xsk->rx_npkts += rcvd; | ||
865 | |||
866 | ret = xq_enq(&xsk->tx, descs, rcvd); | ||
867 | lassert(ret == 0); | ||
868 | xsk->outstanding_tx += rcvd; | ||
869 | } | ||
870 | } | ||
871 | |||
872 | int main(int argc, char **argv) | ||
873 | { | ||
874 | struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; | ||
875 | char xdp_filename[256]; | ||
876 | int i, ret, key = 0; | ||
877 | pthread_t pt; | ||
878 | |||
879 | parse_command_line(argc, argv); | ||
880 | |||
881 | if (setrlimit(RLIMIT_MEMLOCK, &r)) { | ||
882 | fprintf(stderr, "ERROR: setrlimit(RLIMIT_MEMLOCK) \"%s\"\n", | ||
883 | strerror(errno)); | ||
884 | exit(EXIT_FAILURE); | ||
885 | } | ||
886 | |||
887 | snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]); | ||
888 | |||
889 | if (load_bpf_file(xdp_filename)) { | ||
890 | fprintf(stderr, "ERROR: load_bpf_file %s\n", bpf_log_buf); | ||
891 | exit(EXIT_FAILURE); | ||
892 | } | ||
893 | |||
894 | if (!prog_fd[0]) { | ||
895 | fprintf(stderr, "ERROR: load_bpf_file: \"%s\"\n", | ||
896 | strerror(errno)); | ||
897 | exit(EXIT_FAILURE); | ||
898 | } | ||
899 | |||
900 | if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd[0], opt_xdp_flags) < 0) { | ||
901 | fprintf(stderr, "ERROR: link set xdp fd failed\n"); | ||
902 | exit(EXIT_FAILURE); | ||
903 | } | ||
904 | |||
905 | ret = bpf_map_update_elem(map_fd[0], &key, &opt_queue, 0); | ||
906 | if (ret) { | ||
907 | fprintf(stderr, "ERROR: bpf_map_update_elem qidconf\n"); | ||
908 | exit(EXIT_FAILURE); | ||
909 | } | ||
910 | |||
911 | /* Create sockets... */ | ||
912 | xsks[num_socks++] = xsk_configure(NULL); | ||
913 | |||
914 | #if RR_LB | ||
915 | for (i = 0; i < MAX_SOCKS - 1; i++) | ||
916 | xsks[num_socks++] = xsk_configure(xsks[0]->umem); | ||
917 | #endif | ||
918 | |||
919 | /* ...and insert them into the map. */ | ||
920 | for (i = 0; i < num_socks; i++) { | ||
921 | key = i; | ||
922 | ret = bpf_map_update_elem(map_fd[1], &key, &xsks[i]->sfd, 0); | ||
923 | if (ret) { | ||
924 | fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i); | ||
925 | exit(EXIT_FAILURE); | ||
926 | } | ||
927 | } | ||
928 | |||
929 | signal(SIGINT, int_exit); | ||
930 | signal(SIGTERM, int_exit); | ||
931 | signal(SIGABRT, int_exit); | ||
932 | |||
933 | setlocale(LC_ALL, ""); | ||
934 | |||
935 | ret = pthread_create(&pt, NULL, poller, NULL); | ||
936 | lassert(ret == 0); | ||
937 | |||
938 | prev_time = get_nsecs(); | ||
939 | |||
940 | if (opt_bench == BENCH_RXDROP) | ||
941 | rx_drop_all(); | ||
942 | else if (opt_bench == BENCH_TXONLY) | ||
943 | tx_only(xsks[0]); | ||
944 | else | ||
945 | l2fwd(xsks[0]); | ||
946 | |||
947 | return 0; | ||
948 | } | ||
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index 30ba0fee36e4..8f59897fbda1 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py | |||
@@ -39,9 +39,9 @@ class Helper(object): | |||
39 | Break down helper function protocol into smaller chunks: return type, | 39 | Break down helper function protocol into smaller chunks: return type, |
40 | name, distincts arguments. | 40 | name, distincts arguments. |
41 | """ | 41 | """ |
42 | arg_re = re.compile('^((const )?(struct )?(\w+|...))( (\**)(\w+))?$') | 42 | arg_re = re.compile('((const )?(struct )?(\w+|...))( (\**)(\w+))?$') |
43 | res = {} | 43 | res = {} |
44 | proto_re = re.compile('^(.+) (\**)(\w+)\(((([^,]+)(, )?){1,5})\)$') | 44 | proto_re = re.compile('(.+) (\**)(\w+)\(((([^,]+)(, )?){1,5})\)$') |
45 | 45 | ||
46 | capture = proto_re.match(self.proto) | 46 | capture = proto_re.match(self.proto) |
47 | res['ret_type'] = capture.group(1) | 47 | res['ret_type'] = capture.group(1) |
@@ -87,7 +87,7 @@ class HeaderParser(object): | |||
87 | # - Same as above, with "const" and/or "struct" in front of type | 87 | # - Same as above, with "const" and/or "struct" in front of type |
88 | # - "..." (undefined number of arguments, for bpf_trace_printk()) | 88 | # - "..." (undefined number of arguments, for bpf_trace_printk()) |
89 | # There is at least one term ("void"), and at most five arguments. | 89 | # There is at least one term ("void"), and at most five arguments. |
90 | p = re.compile('^ \* ((.+) \**\w+\((((const )?(struct )?(\w+|\.\.\.)( \**\w+)?)(, )?){1,5}\))$') | 90 | p = re.compile(' \* ?((.+) \**\w+\((((const )?(struct )?(\w+|\.\.\.)( \**\w+)?)(, )?){1,5}\))$') |
91 | capture = p.match(self.line) | 91 | capture = p.match(self.line) |
92 | if not capture: | 92 | if not capture: |
93 | raise NoHelperFound | 93 | raise NoHelperFound |
@@ -95,7 +95,7 @@ class HeaderParser(object): | |||
95 | return capture.group(1) | 95 | return capture.group(1) |
96 | 96 | ||
97 | def parse_desc(self): | 97 | def parse_desc(self): |
98 | p = re.compile('^ \* \tDescription$') | 98 | p = re.compile(' \* ?(?:\t| {6,8})Description$') |
99 | capture = p.match(self.line) | 99 | capture = p.match(self.line) |
100 | if not capture: | 100 | if not capture: |
101 | # Helper can have empty description and we might be parsing another | 101 | # Helper can have empty description and we might be parsing another |
@@ -109,7 +109,7 @@ class HeaderParser(object): | |||
109 | if self.line == ' *\n': | 109 | if self.line == ' *\n': |
110 | desc += '\n' | 110 | desc += '\n' |
111 | else: | 111 | else: |
112 | p = re.compile('^ \* \t\t(.*)') | 112 | p = re.compile(' \* ?(?:\t| {6,8})(?:\t| {8})(.*)') |
113 | capture = p.match(self.line) | 113 | capture = p.match(self.line) |
114 | if capture: | 114 | if capture: |
115 | desc += capture.group(1) + '\n' | 115 | desc += capture.group(1) + '\n' |
@@ -118,7 +118,7 @@ class HeaderParser(object): | |||
118 | return desc | 118 | return desc |
119 | 119 | ||
120 | def parse_ret(self): | 120 | def parse_ret(self): |
121 | p = re.compile('^ \* \tReturn$') | 121 | p = re.compile(' \* ?(?:\t| {6,8})Return$') |
122 | capture = p.match(self.line) | 122 | capture = p.match(self.line) |
123 | if not capture: | 123 | if not capture: |
124 | # Helper can have empty retval and we might be parsing another | 124 | # Helper can have empty retval and we might be parsing another |
@@ -132,7 +132,7 @@ class HeaderParser(object): | |||
132 | if self.line == ' *\n': | 132 | if self.line == ' *\n': |
133 | ret += '\n' | 133 | ret += '\n' |
134 | else: | 134 | else: |
135 | p = re.compile('^ \* \t\t(.*)') | 135 | p = re.compile(' \* ?(?:\t| {6,8})(?:\t| {8})(.*)') |
136 | capture = p.match(self.line) | 136 | capture = p.match(self.line) |
137 | if capture: | 137 | if capture: |
138 | ret += capture.group(1) + '\n' | 138 | ret += capture.group(1) + '\n' |
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 4cafe6a19167..5c508d26b367 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c | |||
@@ -1471,7 +1471,9 @@ static inline u16 socket_type_to_security_class(int family, int type, int protoc | |||
1471 | return SECCLASS_QIPCRTR_SOCKET; | 1471 | return SECCLASS_QIPCRTR_SOCKET; |
1472 | case PF_SMC: | 1472 | case PF_SMC: |
1473 | return SECCLASS_SMC_SOCKET; | 1473 | return SECCLASS_SMC_SOCKET; |
1474 | #if PF_MAX > 44 | 1474 | case PF_XDP: |
1475 | return SECCLASS_XDP_SOCKET; | ||
1476 | #if PF_MAX > 45 | ||
1475 | #error New address family defined, please update this function. | 1477 | #error New address family defined, please update this function. |
1476 | #endif | 1478 | #endif |
1477 | } | 1479 | } |
diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index 7f0372426494..bd5fe0d3204a 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h | |||
@@ -240,9 +240,11 @@ struct security_class_mapping secclass_map[] = { | |||
240 | { "manage_subnet", NULL } }, | 240 | { "manage_subnet", NULL } }, |
241 | { "bpf", | 241 | { "bpf", |
242 | {"map_create", "map_read", "map_write", "prog_load", "prog_run"} }, | 242 | {"map_create", "map_read", "map_write", "prog_load", "prog_run"} }, |
243 | { "xdp_socket", | ||
244 | { COMMON_SOCK_PERMS, NULL } }, | ||
243 | { NULL } | 245 | { NULL } |
244 | }; | 246 | }; |
245 | 247 | ||
246 | #if PF_MAX > 44 | 248 | #if PF_MAX > 45 |
247 | #error New address family defined, please update secclass_map. | 249 | #error New address family defined, please update secclass_map. |
248 | #endif | 250 | #endif |
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 5f512b14bff9..a6258bc8ec4f 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst | |||
@@ -22,17 +22,19 @@ MAP COMMANDS | |||
22 | ============= | 22 | ============= |
23 | 23 | ||
24 | | **bpftool** **map { show | list }** [*MAP*] | 24 | | **bpftool** **map { show | list }** [*MAP*] |
25 | | **bpftool** **map dump** *MAP* | 25 | | **bpftool** **map dump** *MAP* |
26 | | **bpftool** **map update** *MAP* **key** [**hex**] *BYTES* **value** [**hex**] *VALUE* [*UPDATE_FLAGS*] | 26 | | **bpftool** **map update** *MAP* **key** *DATA* **value** *VALUE* [*UPDATE_FLAGS*] |
27 | | **bpftool** **map lookup** *MAP* **key** [**hex**] *BYTES* | 27 | | **bpftool** **map lookup** *MAP* **key** *DATA* |
28 | | **bpftool** **map getnext** *MAP* [**key** [**hex**] *BYTES*] | 28 | | **bpftool** **map getnext** *MAP* [**key** *DATA*] |
29 | | **bpftool** **map delete** *MAP* **key** [**hex**] *BYTES* | 29 | | **bpftool** **map delete** *MAP* **key** *DATA* |
30 | | **bpftool** **map pin** *MAP* *FILE* | 30 | | **bpftool** **map pin** *MAP* *FILE* |
31 | | **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*] | ||
31 | | **bpftool** **map help** | 32 | | **bpftool** **map help** |
32 | | | 33 | | |
33 | | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } | 34 | | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } |
35 | | *DATA* := { [**hex**] *BYTES* } | ||
34 | | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } | 36 | | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } |
35 | | *VALUE* := { *BYTES* | *MAP* | *PROG* } | 37 | | *VALUE* := { *DATA* | *MAP* | *PROG* } |
36 | | *UPDATE_FLAGS* := { **any** | **exist** | **noexist** } | 38 | | *UPDATE_FLAGS* := { **any** | **exist** | **noexist** } |
37 | 39 | ||
38 | DESCRIPTION | 40 | DESCRIPTION |
@@ -48,7 +50,7 @@ DESCRIPTION | |||
48 | **bpftool map dump** *MAP* | 50 | **bpftool map dump** *MAP* |
49 | Dump all entries in a given *MAP*. | 51 | Dump all entries in a given *MAP*. |
50 | 52 | ||
51 | **bpftool map update** *MAP* **key** [**hex**] *BYTES* **value** [**hex**] *VALUE* [*UPDATE_FLAGS*] | 53 | **bpftool map update** *MAP* **key** *DATA* **value** *VALUE* [*UPDATE_FLAGS*] |
52 | Update map entry for a given *KEY*. | 54 | Update map entry for a given *KEY*. |
53 | 55 | ||
54 | *UPDATE_FLAGS* can be one of: **any** update existing entry | 56 | *UPDATE_FLAGS* can be one of: **any** update existing entry |
@@ -61,13 +63,13 @@ DESCRIPTION | |||
61 | the bytes are parsed as decimal values, unless a "0x" prefix | 63 | the bytes are parsed as decimal values, unless a "0x" prefix |
62 | (for hexadecimal) or a "0" prefix (for octal) is provided. | 64 | (for hexadecimal) or a "0" prefix (for octal) is provided. |
63 | 65 | ||
64 | **bpftool map lookup** *MAP* **key** [**hex**] *BYTES* | 66 | **bpftool map lookup** *MAP* **key** *DATA* |
65 | Lookup **key** in the map. | 67 | Lookup **key** in the map. |
66 | 68 | ||
67 | **bpftool map getnext** *MAP* [**key** [**hex**] *BYTES*] | 69 | **bpftool map getnext** *MAP* [**key** *DATA*] |
68 | Get next key. If *key* is not specified, get first key. | 70 | Get next key. If *key* is not specified, get first key. |
69 | 71 | ||
70 | **bpftool map delete** *MAP* **key** [**hex**] *BYTES* | 72 | **bpftool map delete** *MAP* **key** *DATA* |
71 | Remove entry from the map. | 73 | Remove entry from the map. |
72 | 74 | ||
73 | **bpftool map pin** *MAP* *FILE* | 75 | **bpftool map pin** *MAP* *FILE* |
@@ -75,6 +77,22 @@ DESCRIPTION | |||
75 | 77 | ||
76 | Note: *FILE* must be located in *bpffs* mount. | 78 | Note: *FILE* must be located in *bpffs* mount. |
77 | 79 | ||
80 | **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*] | ||
81 | Read events from a BPF_MAP_TYPE_PERF_EVENT_ARRAY map. | ||
82 | |||
83 | Install perf rings into a perf event array map and dump | ||
84 | output of any bpf_perf_event_output() call in the kernel. | ||
85 | By default read the number of CPUs on the system and | ||
86 | install perf ring for each CPU in the corresponding index | ||
87 | in the array. | ||
88 | |||
89 | If **cpu** and **index** are specified, install perf ring | ||
90 | for given **cpu** at **index** in the array (single ring). | ||
91 | |||
92 | Note that installing a perf ring into an array will silently | ||
93 | replace any existing ring. Any other application will stop | ||
94 | receiving events if it installed its rings earlier. | ||
95 | |||
78 | **bpftool map help** | 96 | **bpftool map help** |
79 | Print short help message. | 97 | Print short help message. |
80 | 98 | ||
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index 20689a321ffe..564cb0d9692b 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst | |||
@@ -23,7 +23,7 @@ SYNOPSIS | |||
23 | 23 | ||
24 | *MAP-COMMANDS* := | 24 | *MAP-COMMANDS* := |
25 | { **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete** | 25 | { **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete** |
26 | | **pin** | **help** } | 26 | | **pin** | **event_pipe** | **help** } |
27 | 27 | ||
28 | *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** | 28 | *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** |
29 | | **load** | **help** } | 29 | | **load** | **help** } |
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 4e69782c4a79..892dbf095bff 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile | |||
@@ -39,7 +39,12 @@ CC = gcc | |||
39 | 39 | ||
40 | CFLAGS += -O2 | 40 | CFLAGS += -O2 |
41 | CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow -Wno-missing-field-initializers | 41 | CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow -Wno-missing-field-initializers |
42 | CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/ | 42 | CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \ |
43 | -I$(srctree)/kernel/bpf/ \ | ||
44 | -I$(srctree)/tools/include \ | ||
45 | -I$(srctree)/tools/include/uapi \ | ||
46 | -I$(srctree)/tools/lib/bpf \ | ||
47 | -I$(srctree)/tools/perf | ||
43 | CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"' | 48 | CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"' |
44 | LIBS = -lelf -lbfd -lopcodes $(LIBBPF) | 49 | LIBS = -lelf -lbfd -lopcodes $(LIBBPF) |
45 | 50 | ||
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 852d84a98acd..b301c9b315f1 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool | |||
@@ -1,6 +1,6 @@ | |||
1 | # bpftool(8) bash completion -*- shell-script -*- | 1 | # bpftool(8) bash completion -*- shell-script -*- |
2 | # | 2 | # |
3 | # Copyright (C) 2017 Netronome Systems, Inc. | 3 | # Copyright (C) 2017-2018 Netronome Systems, Inc. |
4 | # | 4 | # |
5 | # This software is dual licensed under the GNU General License | 5 | # This software is dual licensed under the GNU General License |
6 | # Version 2, June 1991 as shown in the file COPYING in the top-level | 6 | # Version 2, June 1991 as shown in the file COPYING in the top-level |
@@ -79,6 +79,14 @@ _bpftool_get_map_ids() | |||
79 | command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) ) | 79 | command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) ) |
80 | } | 80 | } |
81 | 81 | ||
82 | _bpftool_get_perf_map_ids() | ||
83 | { | ||
84 | COMPREPLY+=( $( compgen -W "$( bpftool -jp map 2>&1 | \ | ||
85 | command grep -C2 perf_event_array | \ | ||
86 | command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) ) | ||
87 | } | ||
88 | |||
89 | |||
82 | _bpftool_get_prog_ids() | 90 | _bpftool_get_prog_ids() |
83 | { | 91 | { |
84 | COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \ | 92 | COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \ |
@@ -359,10 +367,34 @@ _bpftool() | |||
359 | fi | 367 | fi |
360 | return 0 | 368 | return 0 |
361 | ;; | 369 | ;; |
370 | event_pipe) | ||
371 | case $prev in | ||
372 | $command) | ||
373 | COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) ) | ||
374 | return 0 | ||
375 | ;; | ||
376 | id) | ||
377 | _bpftool_get_perf_map_ids | ||
378 | return 0 | ||
379 | ;; | ||
380 | cpu) | ||
381 | return 0 | ||
382 | ;; | ||
383 | index) | ||
384 | return 0 | ||
385 | ;; | ||
386 | *) | ||
387 | _bpftool_once_attr 'cpu' | ||
388 | _bpftool_once_attr 'index' | ||
389 | return 0 | ||
390 | ;; | ||
391 | esac | ||
392 | ;; | ||
362 | *) | 393 | *) |
363 | [[ $prev == $object ]] && \ | 394 | [[ $prev == $object ]] && \ |
364 | COMPREPLY=( $( compgen -W 'delete dump getnext help \ | 395 | COMPREPLY=( $( compgen -W 'delete dump getnext help \ |
365 | lookup pin show list update' -- "$cur" ) ) | 396 | lookup pin event_pipe show list update' -- \ |
397 | "$cur" ) ) | ||
366 | ;; | 398 | ;; |
367 | esac | 399 | esac |
368 | ;; | 400 | ;; |
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 465995281dcd..32f9e397a6c0 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2017 Netronome Systems, Inc. | 2 | * Copyright (C) 2017-2018 Netronome Systems, Inc. |
3 | * | 3 | * |
4 | * This software is dual licensed under the GNU General License Version 2, | 4 | * This software is dual licensed under the GNU General License Version 2, |
5 | * June 1991 as shown in the file COPYING in the top-level directory of this | 5 | * June 1991 as shown in the file COPYING in the top-level directory of this |
@@ -33,6 +33,7 @@ | |||
33 | 33 | ||
34 | /* Author: Jakub Kicinski <kubakici@wp.pl> */ | 34 | /* Author: Jakub Kicinski <kubakici@wp.pl> */ |
35 | 35 | ||
36 | #include <ctype.h> | ||
36 | #include <errno.h> | 37 | #include <errno.h> |
37 | #include <fcntl.h> | 38 | #include <fcntl.h> |
38 | #include <fts.h> | 39 | #include <fts.h> |
@@ -330,6 +331,16 @@ char *get_fdinfo(int fd, const char *key) | |||
330 | return NULL; | 331 | return NULL; |
331 | } | 332 | } |
332 | 333 | ||
334 | void print_data_json(uint8_t *data, size_t len) | ||
335 | { | ||
336 | unsigned int i; | ||
337 | |||
338 | jsonw_start_array(json_wtr); | ||
339 | for (i = 0; i < len; i++) | ||
340 | jsonw_printf(json_wtr, "%d", data[i]); | ||
341 | jsonw_end_array(json_wtr); | ||
342 | } | ||
343 | |||
333 | void print_hex_data_json(uint8_t *data, size_t len) | 344 | void print_hex_data_json(uint8_t *data, size_t len) |
334 | { | 345 | { |
335 | unsigned int i; | 346 | unsigned int i; |
@@ -420,6 +431,70 @@ void delete_pinned_obj_table(struct pinned_obj_table *tab) | |||
420 | } | 431 | } |
421 | } | 432 | } |
422 | 433 | ||
434 | unsigned int get_page_size(void) | ||
435 | { | ||
436 | static int result; | ||
437 | |||
438 | if (!result) | ||
439 | result = getpagesize(); | ||
440 | return result; | ||
441 | } | ||
442 | |||
443 | unsigned int get_possible_cpus(void) | ||
444 | { | ||
445 | static unsigned int result; | ||
446 | char buf[128]; | ||
447 | long int n; | ||
448 | char *ptr; | ||
449 | int fd; | ||
450 | |||
451 | if (result) | ||
452 | return result; | ||
453 | |||
454 | fd = open("/sys/devices/system/cpu/possible", O_RDONLY); | ||
455 | if (fd < 0) { | ||
456 | p_err("can't open sysfs possible cpus"); | ||
457 | exit(-1); | ||
458 | } | ||
459 | |||
460 | n = read(fd, buf, sizeof(buf)); | ||
461 | if (n < 2) { | ||
462 | p_err("can't read sysfs possible cpus"); | ||
463 | exit(-1); | ||
464 | } | ||
465 | close(fd); | ||
466 | |||
467 | if (n == sizeof(buf)) { | ||
468 | p_err("read sysfs possible cpus overflow"); | ||
469 | exit(-1); | ||
470 | } | ||
471 | |||
472 | ptr = buf; | ||
473 | n = 0; | ||
474 | while (*ptr && *ptr != '\n') { | ||
475 | unsigned int a, b; | ||
476 | |||
477 | if (sscanf(ptr, "%u-%u", &a, &b) == 2) { | ||
478 | n += b - a + 1; | ||
479 | |||
480 | ptr = strchr(ptr, '-') + 1; | ||
481 | } else if (sscanf(ptr, "%u", &a) == 1) { | ||
482 | n++; | ||
483 | } else { | ||
484 | assert(0); | ||
485 | } | ||
486 | |||
487 | while (isdigit(*ptr)) | ||
488 | ptr++; | ||
489 | if (*ptr == ',') | ||
490 | ptr++; | ||
491 | } | ||
492 | |||
493 | result = n; | ||
494 | |||
495 | return result; | ||
496 | } | ||
497 | |||
423 | static char * | 498 | static char * |
424 | ifindex_to_name_ns(__u32 ifindex, __u32 ns_dev, __u32 ns_ino, char *buf) | 499 | ifindex_to_name_ns(__u32 ifindex, __u32 ns_dev, __u32 ns_ino, char *buf) |
425 | { | 500 | { |
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index b8e9584d6246..6173cd997e7a 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2017 Netronome Systems, Inc. | 2 | * Copyright (C) 2017-2018 Netronome Systems, Inc. |
3 | * | 3 | * |
4 | * This software is dual licensed under the GNU General License Version 2, | 4 | * This software is dual licensed under the GNU General License Version 2, |
5 | * June 1991 as shown in the file COPYING in the top-level directory of this | 5 | * June 1991 as shown in the file COPYING in the top-level directory of this |
@@ -117,14 +117,19 @@ int do_pin_fd(int fd, const char *name); | |||
117 | 117 | ||
118 | int do_prog(int argc, char **arg); | 118 | int do_prog(int argc, char **arg); |
119 | int do_map(int argc, char **arg); | 119 | int do_map(int argc, char **arg); |
120 | int do_event_pipe(int argc, char **argv); | ||
120 | int do_cgroup(int argc, char **arg); | 121 | int do_cgroup(int argc, char **arg); |
121 | 122 | ||
122 | int prog_parse_fd(int *argc, char ***argv); | 123 | int prog_parse_fd(int *argc, char ***argv); |
124 | int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len); | ||
123 | 125 | ||
124 | void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, | 126 | void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, |
125 | const char *arch); | 127 | const char *arch); |
128 | void print_data_json(uint8_t *data, size_t len); | ||
126 | void print_hex_data_json(uint8_t *data, size_t len); | 129 | void print_hex_data_json(uint8_t *data, size_t len); |
127 | 130 | ||
131 | unsigned int get_page_size(void); | ||
132 | unsigned int get_possible_cpus(void); | ||
128 | const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino); | 133 | const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino); |
129 | 134 | ||
130 | #endif | 135 | #endif |
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index a6cdb640a0d7..af6766e956ba 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2017 Netronome Systems, Inc. | 2 | * Copyright (C) 2017-2018 Netronome Systems, Inc. |
3 | * | 3 | * |
4 | * This software is dual licensed under the GNU General License Version 2, | 4 | * This software is dual licensed under the GNU General License Version 2, |
5 | * June 1991 as shown in the file COPYING in the top-level directory of this | 5 | * June 1991 as shown in the file COPYING in the top-level directory of this |
@@ -34,7 +34,6 @@ | |||
34 | /* Author: Jakub Kicinski <kubakici@wp.pl> */ | 34 | /* Author: Jakub Kicinski <kubakici@wp.pl> */ |
35 | 35 | ||
36 | #include <assert.h> | 36 | #include <assert.h> |
37 | #include <ctype.h> | ||
38 | #include <errno.h> | 37 | #include <errno.h> |
39 | #include <fcntl.h> | 38 | #include <fcntl.h> |
40 | #include <stdbool.h> | 39 | #include <stdbool.h> |
@@ -69,61 +68,6 @@ static const char * const map_type_name[] = { | |||
69 | [BPF_MAP_TYPE_CPUMAP] = "cpumap", | 68 | [BPF_MAP_TYPE_CPUMAP] = "cpumap", |
70 | }; | 69 | }; |
71 | 70 | ||
72 | static unsigned int get_possible_cpus(void) | ||
73 | { | ||
74 | static unsigned int result; | ||
75 | char buf[128]; | ||
76 | long int n; | ||
77 | char *ptr; | ||
78 | int fd; | ||
79 | |||
80 | if (result) | ||
81 | return result; | ||
82 | |||
83 | fd = open("/sys/devices/system/cpu/possible", O_RDONLY); | ||
84 | if (fd < 0) { | ||
85 | p_err("can't open sysfs possible cpus"); | ||
86 | exit(-1); | ||
87 | } | ||
88 | |||
89 | n = read(fd, buf, sizeof(buf)); | ||
90 | if (n < 2) { | ||
91 | p_err("can't read sysfs possible cpus"); | ||
92 | exit(-1); | ||
93 | } | ||
94 | close(fd); | ||
95 | |||
96 | if (n == sizeof(buf)) { | ||
97 | p_err("read sysfs possible cpus overflow"); | ||
98 | exit(-1); | ||
99 | } | ||
100 | |||
101 | ptr = buf; | ||
102 | n = 0; | ||
103 | while (*ptr && *ptr != '\n') { | ||
104 | unsigned int a, b; | ||
105 | |||
106 | if (sscanf(ptr, "%u-%u", &a, &b) == 2) { | ||
107 | n += b - a + 1; | ||
108 | |||
109 | ptr = strchr(ptr, '-') + 1; | ||
110 | } else if (sscanf(ptr, "%u", &a) == 1) { | ||
111 | n++; | ||
112 | } else { | ||
113 | assert(0); | ||
114 | } | ||
115 | |||
116 | while (isdigit(*ptr)) | ||
117 | ptr++; | ||
118 | if (*ptr == ',') | ||
119 | ptr++; | ||
120 | } | ||
121 | |||
122 | result = n; | ||
123 | |||
124 | return result; | ||
125 | } | ||
126 | |||
127 | static bool map_is_per_cpu(__u32 type) | 71 | static bool map_is_per_cpu(__u32 type) |
128 | { | 72 | { |
129 | return type == BPF_MAP_TYPE_PERCPU_HASH || | 73 | return type == BPF_MAP_TYPE_PERCPU_HASH || |
@@ -186,8 +130,7 @@ static int map_parse_fd(int *argc, char ***argv) | |||
186 | return -1; | 130 | return -1; |
187 | } | 131 | } |
188 | 132 | ||
189 | static int | 133 | int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) |
190 | map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) | ||
191 | { | 134 | { |
192 | int err; | 135 | int err; |
193 | int fd; | 136 | int fd; |
@@ -873,23 +816,25 @@ static int do_help(int argc, char **argv) | |||
873 | 816 | ||
874 | fprintf(stderr, | 817 | fprintf(stderr, |
875 | "Usage: %s %s { show | list } [MAP]\n" | 818 | "Usage: %s %s { show | list } [MAP]\n" |
876 | " %s %s dump MAP\n" | 819 | " %s %s dump MAP\n" |
877 | " %s %s update MAP key [hex] BYTES value [hex] VALUE [UPDATE_FLAGS]\n" | 820 | " %s %s update MAP key DATA value VALUE [UPDATE_FLAGS]\n" |
878 | " %s %s lookup MAP key [hex] BYTES\n" | 821 | " %s %s lookup MAP key DATA\n" |
879 | " %s %s getnext MAP [key [hex] BYTES]\n" | 822 | " %s %s getnext MAP [key DATA]\n" |
880 | " %s %s delete MAP key [hex] BYTES\n" | 823 | " %s %s delete MAP key DATA\n" |
881 | " %s %s pin MAP FILE\n" | 824 | " %s %s pin MAP FILE\n" |
825 | " %s %s event_pipe MAP [cpu N index M]\n" | ||
882 | " %s %s help\n" | 826 | " %s %s help\n" |
883 | "\n" | 827 | "\n" |
884 | " MAP := { id MAP_ID | pinned FILE }\n" | 828 | " MAP := { id MAP_ID | pinned FILE }\n" |
829 | " DATA := { [hex] BYTES }\n" | ||
885 | " " HELP_SPEC_PROGRAM "\n" | 830 | " " HELP_SPEC_PROGRAM "\n" |
886 | " VALUE := { BYTES | MAP | PROG }\n" | 831 | " VALUE := { DATA | MAP | PROG }\n" |
887 | " UPDATE_FLAGS := { any | exist | noexist }\n" | 832 | " UPDATE_FLAGS := { any | exist | noexist }\n" |
888 | " " HELP_SPEC_OPTIONS "\n" | 833 | " " HELP_SPEC_OPTIONS "\n" |
889 | "", | 834 | "", |
890 | bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], | 835 | bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], |
891 | bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], | 836 | bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], |
892 | bin_name, argv[-2], bin_name, argv[-2]); | 837 | bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]); |
893 | 838 | ||
894 | return 0; | 839 | return 0; |
895 | } | 840 | } |
@@ -904,6 +849,7 @@ static const struct cmd cmds[] = { | |||
904 | { "getnext", do_getnext }, | 849 | { "getnext", do_getnext }, |
905 | { "delete", do_delete }, | 850 | { "delete", do_delete }, |
906 | { "pin", do_pin }, | 851 | { "pin", do_pin }, |
852 | { "event_pipe", do_event_pipe }, | ||
907 | { 0 } | 853 | { 0 } |
908 | }; | 854 | }; |
909 | 855 | ||
diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c new file mode 100644 index 000000000000..c5a2ced8552d --- /dev/null +++ b/tools/bpf/bpftool/map_perf_ring.c | |||
@@ -0,0 +1,347 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0-only | ||
2 | /* Copyright (C) 2018 Netronome Systems, Inc. */ | ||
3 | /* This program is free software; you can redistribute it and/or | ||
4 | * modify it under the terms of version 2 of the GNU General Public | ||
5 | * License as published by the Free Software Foundation. | ||
6 | */ | ||
7 | #include <errno.h> | ||
8 | #include <fcntl.h> | ||
9 | #include <libbpf.h> | ||
10 | #include <poll.h> | ||
11 | #include <signal.h> | ||
12 | #include <stdbool.h> | ||
13 | #include <stdio.h> | ||
14 | #include <stdlib.h> | ||
15 | #include <string.h> | ||
16 | #include <time.h> | ||
17 | #include <unistd.h> | ||
18 | #include <linux/bpf.h> | ||
19 | #include <linux/perf_event.h> | ||
20 | #include <sys/ioctl.h> | ||
21 | #include <sys/mman.h> | ||
22 | #include <sys/syscall.h> | ||
23 | |||
24 | #include <bpf.h> | ||
25 | #include <perf-sys.h> | ||
26 | |||
27 | #include "main.h" | ||
28 | |||
29 | #define MMAP_PAGE_CNT 16 | ||
30 | |||
31 | static bool stop; | ||
32 | |||
33 | struct event_ring_info { | ||
34 | int fd; | ||
35 | int key; | ||
36 | unsigned int cpu; | ||
37 | void *mem; | ||
38 | }; | ||
39 | |||
40 | struct perf_event_sample { | ||
41 | struct perf_event_header header; | ||
42 | __u32 size; | ||
43 | unsigned char data[]; | ||
44 | }; | ||
45 | |||
46 | static void int_exit(int signo) | ||
47 | { | ||
48 | fprintf(stderr, "Stopping...\n"); | ||
49 | stop = true; | ||
50 | } | ||
51 | |||
52 | static void | ||
53 | print_bpf_output(struct event_ring_info *ring, struct perf_event_sample *e) | ||
54 | { | ||
55 | struct { | ||
56 | struct perf_event_header header; | ||
57 | __u64 id; | ||
58 | __u64 lost; | ||
59 | } *lost = (void *)e; | ||
60 | struct timespec ts; | ||
61 | |||
62 | if (clock_gettime(CLOCK_MONOTONIC, &ts)) { | ||
63 | perror("Can't read clock for timestamp"); | ||
64 | return; | ||
65 | } | ||
66 | |||
67 | if (json_output) { | ||
68 | jsonw_start_object(json_wtr); | ||
69 | jsonw_name(json_wtr, "timestamp"); | ||
70 | jsonw_uint(json_wtr, ts.tv_sec * 1000000000ull + ts.tv_nsec); | ||
71 | jsonw_name(json_wtr, "type"); | ||
72 | jsonw_uint(json_wtr, e->header.type); | ||
73 | jsonw_name(json_wtr, "cpu"); | ||
74 | jsonw_uint(json_wtr, ring->cpu); | ||
75 | jsonw_name(json_wtr, "index"); | ||
76 | jsonw_uint(json_wtr, ring->key); | ||
77 | if (e->header.type == PERF_RECORD_SAMPLE) { | ||
78 | jsonw_name(json_wtr, "data"); | ||
79 | print_data_json(e->data, e->size); | ||
80 | } else if (e->header.type == PERF_RECORD_LOST) { | ||
81 | jsonw_name(json_wtr, "lost"); | ||
82 | jsonw_start_object(json_wtr); | ||
83 | jsonw_name(json_wtr, "id"); | ||
84 | jsonw_uint(json_wtr, lost->id); | ||
85 | jsonw_name(json_wtr, "count"); | ||
86 | jsonw_uint(json_wtr, lost->lost); | ||
87 | jsonw_end_object(json_wtr); | ||
88 | } | ||
89 | jsonw_end_object(json_wtr); | ||
90 | } else { | ||
91 | if (e->header.type == PERF_RECORD_SAMPLE) { | ||
92 | printf("== @%ld.%ld CPU: %d index: %d =====\n", | ||
93 | (long)ts.tv_sec, ts.tv_nsec, | ||
94 | ring->cpu, ring->key); | ||
95 | fprint_hex(stdout, e->data, e->size, " "); | ||
96 | printf("\n"); | ||
97 | } else if (e->header.type == PERF_RECORD_LOST) { | ||
98 | printf("lost %lld events\n", lost->lost); | ||
99 | } else { | ||
100 | printf("unknown event type=%d size=%d\n", | ||
101 | e->header.type, e->header.size); | ||
102 | } | ||
103 | } | ||
104 | } | ||
105 | |||
106 | static void | ||
107 | perf_event_read(struct event_ring_info *ring, void **buf, size_t *buf_len) | ||
108 | { | ||
109 | volatile struct perf_event_mmap_page *header = ring->mem; | ||
110 | __u64 buffer_size = MMAP_PAGE_CNT * get_page_size(); | ||
111 | __u64 data_tail = header->data_tail; | ||
112 | __u64 data_head = header->data_head; | ||
113 | void *base, *begin, *end; | ||
114 | |||
115 | asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */ | ||
116 | if (data_head == data_tail) | ||
117 | return; | ||
118 | |||
119 | base = ((char *)header) + get_page_size(); | ||
120 | |||
121 | begin = base + data_tail % buffer_size; | ||
122 | end = base + data_head % buffer_size; | ||
123 | |||
124 | while (begin != end) { | ||
125 | struct perf_event_sample *e; | ||
126 | |||
127 | e = begin; | ||
128 | if (begin + e->header.size > base + buffer_size) { | ||
129 | long len = base + buffer_size - begin; | ||
130 | |||
131 | if (*buf_len < e->header.size) { | ||
132 | free(*buf); | ||
133 | *buf = malloc(e->header.size); | ||
134 | if (!*buf) { | ||
135 | fprintf(stderr, | ||
136 | "can't allocate memory"); | ||
137 | stop = true; | ||
138 | return; | ||
139 | } | ||
140 | *buf_len = e->header.size; | ||
141 | } | ||
142 | |||
143 | memcpy(*buf, begin, len); | ||
144 | memcpy(*buf + len, base, e->header.size - len); | ||
145 | e = (void *)*buf; | ||
146 | begin = base + e->header.size - len; | ||
147 | } else if (begin + e->header.size == base + buffer_size) { | ||
148 | begin = base; | ||
149 | } else { | ||
150 | begin += e->header.size; | ||
151 | } | ||
152 | |||
153 | print_bpf_output(ring, e); | ||
154 | } | ||
155 | |||
156 | __sync_synchronize(); /* smp_mb() */ | ||
157 | header->data_tail = data_head; | ||
158 | } | ||
159 | |||
160 | static int perf_mmap_size(void) | ||
161 | { | ||
162 | return get_page_size() * (MMAP_PAGE_CNT + 1); | ||
163 | } | ||
164 | |||
165 | static void *perf_event_mmap(int fd) | ||
166 | { | ||
167 | int mmap_size = perf_mmap_size(); | ||
168 | void *base; | ||
169 | |||
170 | base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); | ||
171 | if (base == MAP_FAILED) { | ||
172 | p_err("event mmap failed: %s\n", strerror(errno)); | ||
173 | return NULL; | ||
174 | } | ||
175 | |||
176 | return base; | ||
177 | } | ||
178 | |||
179 | static void perf_event_unmap(void *mem) | ||
180 | { | ||
181 | if (munmap(mem, perf_mmap_size())) | ||
182 | fprintf(stderr, "Can't unmap ring memory!\n"); | ||
183 | } | ||
184 | |||
185 | static int bpf_perf_event_open(int map_fd, int key, int cpu) | ||
186 | { | ||
187 | struct perf_event_attr attr = { | ||
188 | .sample_type = PERF_SAMPLE_RAW, | ||
189 | .type = PERF_TYPE_SOFTWARE, | ||
190 | .config = PERF_COUNT_SW_BPF_OUTPUT, | ||
191 | }; | ||
192 | int pmu_fd; | ||
193 | |||
194 | pmu_fd = sys_perf_event_open(&attr, -1, cpu, -1, 0); | ||
195 | if (pmu_fd < 0) { | ||
196 | p_err("failed to open perf event %d for CPU %d", key, cpu); | ||
197 | return -1; | ||
198 | } | ||
199 | |||
200 | if (bpf_map_update_elem(map_fd, &key, &pmu_fd, BPF_ANY)) { | ||
201 | p_err("failed to update map for event %d for CPU %d", key, cpu); | ||
202 | goto err_close; | ||
203 | } | ||
204 | if (ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) { | ||
205 | p_err("failed to enable event %d for CPU %d", key, cpu); | ||
206 | goto err_close; | ||
207 | } | ||
208 | |||
209 | return pmu_fd; | ||
210 | |||
211 | err_close: | ||
212 | close(pmu_fd); | ||
213 | return -1; | ||
214 | } | ||
215 | |||
216 | int do_event_pipe(int argc, char **argv) | ||
217 | { | ||
218 | int i, nfds, map_fd, index = -1, cpu = -1; | ||
219 | struct bpf_map_info map_info = {}; | ||
220 | struct event_ring_info *rings; | ||
221 | size_t tmp_buf_sz = 0; | ||
222 | void *tmp_buf = NULL; | ||
223 | struct pollfd *pfds; | ||
224 | __u32 map_info_len; | ||
225 | bool do_all = true; | ||
226 | |||
227 | map_info_len = sizeof(map_info); | ||
228 | map_fd = map_parse_fd_and_info(&argc, &argv, &map_info, &map_info_len); | ||
229 | if (map_fd < 0) | ||
230 | return -1; | ||
231 | |||
232 | if (map_info.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { | ||
233 | p_err("map is not a perf event array"); | ||
234 | goto err_close_map; | ||
235 | } | ||
236 | |||
237 | while (argc) { | ||
238 | if (argc < 2) | ||
239 | BAD_ARG(); | ||
240 | |||
241 | if (is_prefix(*argv, "cpu")) { | ||
242 | char *endptr; | ||
243 | |||
244 | NEXT_ARG(); | ||
245 | cpu = strtoul(*argv, &endptr, 0); | ||
246 | if (*endptr) { | ||
247 | p_err("can't parse %s as CPU ID", **argv); | ||
248 | goto err_close_map; | ||
249 | } | ||
250 | |||
251 | NEXT_ARG(); | ||
252 | } else if (is_prefix(*argv, "index")) { | ||
253 | char *endptr; | ||
254 | |||
255 | NEXT_ARG(); | ||
256 | index = strtoul(*argv, &endptr, 0); | ||
257 | if (*endptr) { | ||
258 | p_err("can't parse %s as index", **argv); | ||
259 | goto err_close_map; | ||
260 | } | ||
261 | |||
262 | NEXT_ARG(); | ||
263 | } else { | ||
264 | BAD_ARG(); | ||
265 | } | ||
266 | |||
267 | do_all = false; | ||
268 | } | ||
269 | |||
270 | if (!do_all) { | ||
271 | if (index == -1 || cpu == -1) { | ||
272 | p_err("cpu and index must be specified together"); | ||
273 | goto err_close_map; | ||
274 | } | ||
275 | |||
276 | nfds = 1; | ||
277 | } else { | ||
278 | nfds = min(get_possible_cpus(), map_info.max_entries); | ||
279 | cpu = 0; | ||
280 | index = 0; | ||
281 | } | ||
282 | |||
283 | rings = calloc(nfds, sizeof(rings[0])); | ||
284 | if (!rings) | ||
285 | goto err_close_map; | ||
286 | |||
287 | pfds = calloc(nfds, sizeof(pfds[0])); | ||
288 | if (!pfds) | ||
289 | goto err_free_rings; | ||
290 | |||
291 | for (i = 0; i < nfds; i++) { | ||
292 | rings[i].cpu = cpu + i; | ||
293 | rings[i].key = index + i; | ||
294 | |||
295 | rings[i].fd = bpf_perf_event_open(map_fd, rings[i].key, | ||
296 | rings[i].cpu); | ||
297 | if (rings[i].fd < 0) | ||
298 | goto err_close_fds_prev; | ||
299 | |||
300 | rings[i].mem = perf_event_mmap(rings[i].fd); | ||
301 | if (!rings[i].mem) | ||
302 | goto err_close_fds_current; | ||
303 | |||
304 | pfds[i].fd = rings[i].fd; | ||
305 | pfds[i].events = POLLIN; | ||
306 | } | ||
307 | |||
308 | signal(SIGINT, int_exit); | ||
309 | signal(SIGHUP, int_exit); | ||
310 | signal(SIGTERM, int_exit); | ||
311 | |||
312 | if (json_output) | ||
313 | jsonw_start_array(json_wtr); | ||
314 | |||
315 | while (!stop) { | ||
316 | poll(pfds, nfds, 200); | ||
317 | for (i = 0; i < nfds; i++) | ||
318 | perf_event_read(&rings[i], &tmp_buf, &tmp_buf_sz); | ||
319 | } | ||
320 | free(tmp_buf); | ||
321 | |||
322 | if (json_output) | ||
323 | jsonw_end_array(json_wtr); | ||
324 | |||
325 | for (i = 0; i < nfds; i++) { | ||
326 | perf_event_unmap(rings[i].mem); | ||
327 | close(rings[i].fd); | ||
328 | } | ||
329 | free(pfds); | ||
330 | free(rings); | ||
331 | close(map_fd); | ||
332 | |||
333 | return 0; | ||
334 | |||
335 | err_close_fds_prev: | ||
336 | while (i--) { | ||
337 | perf_event_unmap(rings[i].mem); | ||
338 | err_close_fds_current: | ||
339 | close(rings[i].fd); | ||
340 | } | ||
341 | free(pfds); | ||
342 | err_free_rings: | ||
343 | free(rings); | ||
344 | err_close_map: | ||
345 | close(map_fd); | ||
346 | return -1; | ||
347 | } | ||
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index e71a0a11afde..9bdfdf2d3fbe 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c | |||
@@ -96,7 +96,10 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) | |||
96 | return; | 96 | return; |
97 | } | 97 | } |
98 | 98 | ||
99 | strftime(buf, size, "%b %d/%H:%M", &load_tm); | 99 | if (json_output) |
100 | strftime(buf, size, "%s", &load_tm); | ||
101 | else | ||
102 | strftime(buf, size, "%FT%T%z", &load_tm); | ||
100 | } | 103 | } |
101 | 104 | ||
102 | static int prog_fd_by_tag(unsigned char *tag) | 105 | static int prog_fd_by_tag(unsigned char *tag) |
@@ -245,7 +248,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) | |||
245 | print_boot_time(info->load_time, buf, sizeof(buf)); | 248 | print_boot_time(info->load_time, buf, sizeof(buf)); |
246 | 249 | ||
247 | /* Piggy back on load_time, since 0 uid is a valid one */ | 250 | /* Piggy back on load_time, since 0 uid is a valid one */ |
248 | jsonw_string_field(json_wtr, "loaded_at", buf); | 251 | jsonw_name(json_wtr, "loaded_at"); |
252 | jsonw_printf(json_wtr, "%s", buf); | ||
249 | jsonw_uint_field(json_wtr, "uid", info->created_by_uid); | 253 | jsonw_uint_field(json_wtr, "uid", info->created_by_uid); |
250 | } | 254 | } |
251 | 255 | ||
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index da77a9388947..83a95ae388dd 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h | |||
@@ -828,12 +828,12 @@ union bpf_attr { | |||
828 | * | 828 | * |
829 | * Also, be aware that the newer helper | 829 | * Also, be aware that the newer helper |
830 | * **bpf_perf_event_read_value**\ () is recommended over | 830 | * **bpf_perf_event_read_value**\ () is recommended over |
831 | * **bpf_perf_event_read*\ () in general. The latter has some ABI | 831 | * **bpf_perf_event_read**\ () in general. The latter has some ABI |
832 | * quirks where error and counter value are used as a return code | 832 | * quirks where error and counter value are used as a return code |
833 | * (which is wrong to do since ranges may overlap). This issue is | 833 | * (which is wrong to do since ranges may overlap). This issue is |
834 | * fixed with bpf_perf_event_read_value(), which at the same time | 834 | * fixed with **bpf_perf_event_read_value**\ (), which at the same |
835 | * provides more features over the **bpf_perf_event_read**\ () | 835 | * time provides more features over the **bpf_perf_event_read**\ |
836 | * interface. Please refer to the description of | 836 | * () interface. Please refer to the description of |
837 | * **bpf_perf_event_read_value**\ () for details. | 837 | * **bpf_perf_event_read_value**\ () for details. |
838 | * Return | 838 | * Return |
839 | * The value of the perf event counter read from the map, or a | 839 | * The value of the perf event counter read from the map, or a |
@@ -1361,7 +1361,7 @@ union bpf_attr { | |||
1361 | * Return | 1361 | * Return |
1362 | * 0 | 1362 | * 0 |
1363 | * | 1363 | * |
1364 | * int bpf_setsockopt(struct bpf_sock_ops_kern *bpf_socket, int level, int optname, char *optval, int optlen) | 1364 | * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen) |
1365 | * Description | 1365 | * Description |
1366 | * Emulate a call to **setsockopt()** on the socket associated to | 1366 | * Emulate a call to **setsockopt()** on the socket associated to |
1367 | * *bpf_socket*, which must be a full socket. The *level* at | 1367 | * *bpf_socket*, which must be a full socket. The *level* at |
@@ -1435,7 +1435,7 @@ union bpf_attr { | |||
1435 | * Return | 1435 | * Return |
1436 | * **SK_PASS** on success, or **SK_DROP** on error. | 1436 | * **SK_PASS** on success, or **SK_DROP** on error. |
1437 | * | 1437 | * |
1438 | * int bpf_sock_map_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags) | 1438 | * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) |
1439 | * Description | 1439 | * Description |
1440 | * Add an entry to, or update a *map* referencing sockets. The | 1440 | * Add an entry to, or update a *map* referencing sockets. The |
1441 | * *skops* is used as a new value for the entry associated to | 1441 | * *skops* is used as a new value for the entry associated to |
@@ -1533,7 +1533,7 @@ union bpf_attr { | |||
1533 | * Return | 1533 | * Return |
1534 | * 0 on success, or a negative error in case of failure. | 1534 | * 0 on success, or a negative error in case of failure. |
1535 | * | 1535 | * |
1536 | * int bpf_perf_prog_read_value(struct bpf_perf_event_data_kern *ctx, struct bpf_perf_event_value *buf, u32 buf_size) | 1536 | * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) |
1537 | * Description | 1537 | * Description |
1538 | * For en eBPF program attached to a perf event, retrieve the | 1538 | * For en eBPF program attached to a perf event, retrieve the |
1539 | * value of the event counter associated to *ctx* and store it in | 1539 | * value of the event counter associated to *ctx* and store it in |
@@ -1544,7 +1544,7 @@ union bpf_attr { | |||
1544 | * Return | 1544 | * Return |
1545 | * 0 on success, or a negative error in case of failure. | 1545 | * 0 on success, or a negative error in case of failure. |
1546 | * | 1546 | * |
1547 | * int bpf_getsockopt(struct bpf_sock_ops_kern *bpf_socket, int level, int optname, char *optval, int optlen) | 1547 | * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen) |
1548 | * Description | 1548 | * Description |
1549 | * Emulate a call to **getsockopt()** on the socket associated to | 1549 | * Emulate a call to **getsockopt()** on the socket associated to |
1550 | * *bpf_socket*, which must be a full socket. The *level* at | 1550 | * *bpf_socket*, which must be a full socket. The *level* at |
@@ -1588,7 +1588,7 @@ union bpf_attr { | |||
1588 | * Return | 1588 | * Return |
1589 | * 0 | 1589 | * 0 |
1590 | * | 1590 | * |
1591 | * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops_kern *bpf_sock, int argval) | 1591 | * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval) |
1592 | * Description | 1592 | * Description |
1593 | * Attempt to set the value of the **bpf_sock_ops_cb_flags** field | 1593 | * Attempt to set the value of the **bpf_sock_ops_cb_flags** field |
1594 | * for the full TCP socket associated to *bpf_sock_ops* to | 1594 | * for the full TCP socket associated to *bpf_sock_ops* to |
@@ -1721,7 +1721,7 @@ union bpf_attr { | |||
1721 | * Return | 1721 | * Return |
1722 | * 0 on success, or a negative error in case of failure. | 1722 | * 0 on success, or a negative error in case of failure. |
1723 | * | 1723 | * |
1724 | * int bpf_bind(struct bpf_sock_addr_kern *ctx, struct sockaddr *addr, int addr_len) | 1724 | * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) |
1725 | * Description | 1725 | * Description |
1726 | * Bind the socket associated to *ctx* to the address pointed by | 1726 | * Bind the socket associated to *ctx* to the address pointed by |
1727 | * *addr*, of length *addr_len*. This allows for making outgoing | 1727 | * *addr*, of length *addr_len*. This allows for making outgoing |
@@ -1767,6 +1767,64 @@ union bpf_attr { | |||
1767 | * **CONFIG_XFRM** configuration option. | 1767 | * **CONFIG_XFRM** configuration option. |
1768 | * Return | 1768 | * Return |
1769 | * 0 on success, or a negative error in case of failure. | 1769 | * 0 on success, or a negative error in case of failure. |
1770 | * | ||
1771 | * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags) | ||
1772 | * Description | ||
1773 | * Return a user or a kernel stack in bpf program provided buffer. | ||
1774 | * To achieve this, the helper needs *ctx*, which is a pointer | ||
1775 | * to the context on which the tracing program is executed. | ||
1776 | * To store the stacktrace, the bpf program provides *buf* with | ||
1777 | * a nonnegative *size*. | ||
1778 | * | ||
1779 | * The last argument, *flags*, holds the number of stack frames to | ||
1780 | * skip (from 0 to 255), masked with | ||
1781 | * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set | ||
1782 | * the following flags: | ||
1783 | * | ||
1784 | * **BPF_F_USER_STACK** | ||
1785 | * Collect a user space stack instead of a kernel stack. | ||
1786 | * **BPF_F_USER_BUILD_ID** | ||
1787 | * Collect buildid+offset instead of ips for user stack, | ||
1788 | * only valid if **BPF_F_USER_STACK** is also specified. | ||
1789 | * | ||
1790 | * **bpf_get_stack**\ () can collect up to | ||
1791 | * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject | ||
1792 | * to sufficient large buffer size. Note that | ||
1793 | * this limit can be controlled with the **sysctl** program, and | ||
1794 | * that it should be manually increased in order to profile long | ||
1795 | * user stacks (such as stacks for Java programs). To do so, use: | ||
1796 | * | ||
1797 | * :: | ||
1798 | * | ||
1799 | * # sysctl kernel.perf_event_max_stack=<new value> | ||
1800 | * | ||
1801 | * Return | ||
1802 | * a non-negative value equal to or less than size on success, or | ||
1803 | * a negative error in case of failure. | ||
1804 | * | ||
1805 | * int skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header) | ||
1806 | * Description | ||
1807 | * This helper is similar to **bpf_skb_load_bytes**\ () in that | ||
1808 | * it provides an easy way to load *len* bytes from *offset* | ||
1809 | * from the packet associated to *skb*, into the buffer pointed | ||
1810 | * by *to*. The difference to **bpf_skb_load_bytes**\ () is that | ||
1811 | * a fifth argument *start_header* exists in order to select a | ||
1812 | * base offset to start from. *start_header* can be one of: | ||
1813 | * | ||
1814 | * **BPF_HDR_START_MAC** | ||
1815 | * Base offset to load data from is *skb*'s mac header. | ||
1816 | * **BPF_HDR_START_NET** | ||
1817 | * Base offset to load data from is *skb*'s network header. | ||
1818 | * | ||
1819 | * In general, "direct packet access" is the preferred method to | ||
1820 | * access packet data, however, this helper is in particular useful | ||
1821 | * in socket filters where *skb*\ **->data** does not always point | ||
1822 | * to the start of the mac header and where "direct packet access" | ||
1823 | * is not available. | ||
1824 | * | ||
1825 | * Return | ||
1826 | * 0 on success, or a negative error in case of failure. | ||
1827 | * | ||
1770 | */ | 1828 | */ |
1771 | #define __BPF_FUNC_MAPPER(FN) \ | 1829 | #define __BPF_FUNC_MAPPER(FN) \ |
1772 | FN(unspec), \ | 1830 | FN(unspec), \ |
@@ -1835,7 +1893,9 @@ union bpf_attr { | |||
1835 | FN(msg_pull_data), \ | 1893 | FN(msg_pull_data), \ |
1836 | FN(bind), \ | 1894 | FN(bind), \ |
1837 | FN(xdp_adjust_tail), \ | 1895 | FN(xdp_adjust_tail), \ |
1838 | FN(skb_get_xfrm_state), | 1896 | FN(skb_get_xfrm_state), \ |
1897 | FN(get_stack), \ | ||
1898 | FN(skb_load_bytes_relative), | ||
1839 | 1899 | ||
1840 | /* integer value in 'imm' field of BPF_CALL instruction selects which helper | 1900 | /* integer value in 'imm' field of BPF_CALL instruction selects which helper |
1841 | * function eBPF program intends to call | 1901 | * function eBPF program intends to call |
@@ -1869,11 +1929,14 @@ enum bpf_func_id { | |||
1869 | /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ | 1929 | /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ |
1870 | #define BPF_F_TUNINFO_IPV6 (1ULL << 0) | 1930 | #define BPF_F_TUNINFO_IPV6 (1ULL << 0) |
1871 | 1931 | ||
1872 | /* BPF_FUNC_get_stackid flags. */ | 1932 | /* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */ |
1873 | #define BPF_F_SKIP_FIELD_MASK 0xffULL | 1933 | #define BPF_F_SKIP_FIELD_MASK 0xffULL |
1874 | #define BPF_F_USER_STACK (1ULL << 8) | 1934 | #define BPF_F_USER_STACK (1ULL << 8) |
1935 | /* flags used by BPF_FUNC_get_stackid only. */ | ||
1875 | #define BPF_F_FAST_STACK_CMP (1ULL << 9) | 1936 | #define BPF_F_FAST_STACK_CMP (1ULL << 9) |
1876 | #define BPF_F_REUSE_STACKID (1ULL << 10) | 1937 | #define BPF_F_REUSE_STACKID (1ULL << 10) |
1938 | /* flags used by BPF_FUNC_get_stack only. */ | ||
1939 | #define BPF_F_USER_BUILD_ID (1ULL << 11) | ||
1877 | 1940 | ||
1878 | /* BPF_FUNC_skb_set_tunnel_key flags. */ | 1941 | /* BPF_FUNC_skb_set_tunnel_key flags. */ |
1879 | #define BPF_F_ZERO_CSUM_TX (1ULL << 1) | 1942 | #define BPF_F_ZERO_CSUM_TX (1ULL << 1) |
@@ -1893,6 +1956,12 @@ enum bpf_adj_room_mode { | |||
1893 | BPF_ADJ_ROOM_NET, | 1956 | BPF_ADJ_ROOM_NET, |
1894 | }; | 1957 | }; |
1895 | 1958 | ||
1959 | /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */ | ||
1960 | enum bpf_hdr_start_off { | ||
1961 | BPF_HDR_START_MAC, | ||
1962 | BPF_HDR_START_NET, | ||
1963 | }; | ||
1964 | |||
1896 | /* user accessible mirror of in-kernel sk_buff. | 1965 | /* user accessible mirror of in-kernel sk_buff. |
1897 | * new fields can only be added to the end of this structure | 1966 | * new fields can only be added to the end of this structure |
1898 | */ | 1967 | */ |
diff --git a/tools/include/uapi/linux/erspan.h b/tools/include/uapi/linux/erspan.h new file mode 100644 index 000000000000..841573019ae1 --- /dev/null +++ b/tools/include/uapi/linux/erspan.h | |||
@@ -0,0 +1,52 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ | ||
2 | /* | ||
3 | * ERSPAN Tunnel Metadata | ||
4 | * | ||
5 | * Copyright (c) 2018 VMware | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 | ||
9 | * as published by the Free Software Foundation. | ||
10 | * | ||
11 | * Userspace API for metadata mode ERSPAN tunnel | ||
12 | */ | ||
13 | #ifndef _UAPI_ERSPAN_H | ||
14 | #define _UAPI_ERSPAN_H | ||
15 | |||
16 | #include <linux/types.h> /* For __beXX in userspace */ | ||
17 | #include <asm/byteorder.h> | ||
18 | |||
19 | /* ERSPAN version 2 metadata header */ | ||
20 | struct erspan_md2 { | ||
21 | __be32 timestamp; | ||
22 | __be16 sgt; /* security group tag */ | ||
23 | #if defined(__LITTLE_ENDIAN_BITFIELD) | ||
24 | __u8 hwid_upper:2, | ||
25 | ft:5, | ||
26 | p:1; | ||
27 | __u8 o:1, | ||
28 | gra:2, | ||
29 | dir:1, | ||
30 | hwid:4; | ||
31 | #elif defined(__BIG_ENDIAN_BITFIELD) | ||
32 | __u8 p:1, | ||
33 | ft:5, | ||
34 | hwid_upper:2; | ||
35 | __u8 hwid:4, | ||
36 | dir:1, | ||
37 | gra:2, | ||
38 | o:1; | ||
39 | #else | ||
40 | #error "Please fix <asm/byteorder.h>" | ||
41 | #endif | ||
42 | }; | ||
43 | |||
44 | struct erspan_metadata { | ||
45 | int version; | ||
46 | union { | ||
47 | __be32 index; /* Version 1 (type II)*/ | ||
48 | struct erspan_md2 md2; /* Version 2 (type III) */ | ||
49 | } u; | ||
50 | }; | ||
51 | |||
52 | #endif /* _UAPI_ERSPAN_H */ | ||
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index b64a7a39cbc8..9d762184b805 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile | |||
@@ -32,7 +32,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test | |||
32 | test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ | 32 | test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ |
33 | sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \ | 33 | sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \ |
34 | sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \ | 34 | sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \ |
35 | test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o | 35 | test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \ |
36 | test_get_stack_rawtp.o | ||
36 | 37 | ||
37 | # Order correspond to 'make run_tests' order | 38 | # Order correspond to 'make run_tests' order |
38 | TEST_PROGS := test_kmod.sh \ | 39 | TEST_PROGS := test_kmod.sh \ |
@@ -58,6 +59,7 @@ $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c | |||
58 | $(OUTPUT)/test_sock: cgroup_helpers.c | 59 | $(OUTPUT)/test_sock: cgroup_helpers.c |
59 | $(OUTPUT)/test_sock_addr: cgroup_helpers.c | 60 | $(OUTPUT)/test_sock_addr: cgroup_helpers.c |
60 | $(OUTPUT)/test_sockmap: cgroup_helpers.c | 61 | $(OUTPUT)/test_sockmap: cgroup_helpers.c |
62 | $(OUTPUT)/test_progs: trace_helpers.c | ||
61 | 63 | ||
62 | .PHONY: force | 64 | .PHONY: force |
63 | 65 | ||
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 69d7b918e66a..265f8e0e8ada 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h | |||
@@ -101,6 +101,8 @@ static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) = | |||
101 | static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state, | 101 | static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state, |
102 | int size, int flags) = | 102 | int size, int flags) = |
103 | (void *) BPF_FUNC_skb_get_xfrm_state; | 103 | (void *) BPF_FUNC_skb_get_xfrm_state; |
104 | static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) = | ||
105 | (void *) BPF_FUNC_get_stack; | ||
104 | 106 | ||
105 | /* llvm builtin functions that eBPF C program may use to | 107 | /* llvm builtin functions that eBPF C program may use to |
106 | * emit BPF_LD_ABS and BPF_LD_IND instructions | 108 | * emit BPF_LD_ABS and BPF_LD_IND instructions |
diff --git a/tools/testing/selftests/bpf/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/test_get_stack_rawtp.c new file mode 100644 index 000000000000..f6d9f238e00a --- /dev/null +++ b/tools/testing/selftests/bpf/test_get_stack_rawtp.c | |||
@@ -0,0 +1,102 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | |||
3 | #include <linux/bpf.h> | ||
4 | #include "bpf_helpers.h" | ||
5 | |||
6 | /* Permit pretty deep stack traces */ | ||
7 | #define MAX_STACK_RAWTP 100 | ||
8 | struct stack_trace_t { | ||
9 | int pid; | ||
10 | int kern_stack_size; | ||
11 | int user_stack_size; | ||
12 | int user_stack_buildid_size; | ||
13 | __u64 kern_stack[MAX_STACK_RAWTP]; | ||
14 | __u64 user_stack[MAX_STACK_RAWTP]; | ||
15 | struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP]; | ||
16 | }; | ||
17 | |||
18 | struct bpf_map_def SEC("maps") perfmap = { | ||
19 | .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, | ||
20 | .key_size = sizeof(int), | ||
21 | .value_size = sizeof(__u32), | ||
22 | .max_entries = 2, | ||
23 | }; | ||
24 | |||
25 | struct bpf_map_def SEC("maps") stackdata_map = { | ||
26 | .type = BPF_MAP_TYPE_PERCPU_ARRAY, | ||
27 | .key_size = sizeof(__u32), | ||
28 | .value_size = sizeof(struct stack_trace_t), | ||
29 | .max_entries = 1, | ||
30 | }; | ||
31 | |||
32 | /* Allocate per-cpu space twice the needed. For the code below | ||
33 | * usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK); | ||
34 | * if (usize < 0) | ||
35 | * return 0; | ||
36 | * ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0); | ||
37 | * | ||
38 | * If we have value_size = MAX_STACK_RAWTP * sizeof(__u64), | ||
39 | * verifier will complain that access "raw_data + usize" | ||
40 | * with size "max_len - usize" may be out of bound. | ||
41 | * The maximum "raw_data + usize" is "raw_data + max_len" | ||
42 | * and the maximum "max_len - usize" is "max_len", verifier | ||
43 | * concludes that the maximum buffer access range is | ||
44 | * "raw_data[0...max_len * 2 - 1]" and hence reject the program. | ||
45 | * | ||
46 | * Doubling the to-be-used max buffer size can fix this verifier | ||
47 | * issue and avoid complicated C programming massaging. | ||
48 | * This is an acceptable workaround since there is one entry here. | ||
49 | */ | ||
50 | struct bpf_map_def SEC("maps") rawdata_map = { | ||
51 | .type = BPF_MAP_TYPE_PERCPU_ARRAY, | ||
52 | .key_size = sizeof(__u32), | ||
53 | .value_size = MAX_STACK_RAWTP * sizeof(__u64) * 2, | ||
54 | .max_entries = 1, | ||
55 | }; | ||
56 | |||
57 | SEC("tracepoint/raw_syscalls/sys_enter") | ||
58 | int bpf_prog1(void *ctx) | ||
59 | { | ||
60 | int max_len, max_buildid_len, usize, ksize, total_size; | ||
61 | struct stack_trace_t *data; | ||
62 | void *raw_data; | ||
63 | __u32 key = 0; | ||
64 | |||
65 | data = bpf_map_lookup_elem(&stackdata_map, &key); | ||
66 | if (!data) | ||
67 | return 0; | ||
68 | |||
69 | max_len = MAX_STACK_RAWTP * sizeof(__u64); | ||
70 | max_buildid_len = MAX_STACK_RAWTP * sizeof(struct bpf_stack_build_id); | ||
71 | data->pid = bpf_get_current_pid_tgid(); | ||
72 | data->kern_stack_size = bpf_get_stack(ctx, data->kern_stack, | ||
73 | max_len, 0); | ||
74 | data->user_stack_size = bpf_get_stack(ctx, data->user_stack, max_len, | ||
75 | BPF_F_USER_STACK); | ||
76 | data->user_stack_buildid_size = bpf_get_stack( | ||
77 | ctx, data->user_stack_buildid, max_buildid_len, | ||
78 | BPF_F_USER_STACK | BPF_F_USER_BUILD_ID); | ||
79 | bpf_perf_event_output(ctx, &perfmap, 0, data, sizeof(*data)); | ||
80 | |||
81 | /* write both kernel and user stacks to the same buffer */ | ||
82 | raw_data = bpf_map_lookup_elem(&rawdata_map, &key); | ||
83 | if (!raw_data) | ||
84 | return 0; | ||
85 | |||
86 | usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK); | ||
87 | if (usize < 0) | ||
88 | return 0; | ||
89 | |||
90 | ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0); | ||
91 | if (ksize < 0) | ||
92 | return 0; | ||
93 | |||
94 | total_size = usize + ksize; | ||
95 | if (total_size > 0 && total_size <= max_len) | ||
96 | bpf_perf_event_output(ctx, &perfmap, 0, raw_data, total_size); | ||
97 | |||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | char _license[] SEC("license") = "GPL"; | ||
102 | __u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ | ||
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index fac581f1c57f..ed197eef1cfc 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c | |||
@@ -38,8 +38,10 @@ typedef __u16 __sum16; | |||
38 | #include "bpf_util.h" | 38 | #include "bpf_util.h" |
39 | #include "bpf_endian.h" | 39 | #include "bpf_endian.h" |
40 | #include "bpf_rlimit.h" | 40 | #include "bpf_rlimit.h" |
41 | #include "trace_helpers.h" | ||
41 | 42 | ||
42 | static int error_cnt, pass_cnt; | 43 | static int error_cnt, pass_cnt; |
44 | static bool jit_enabled; | ||
43 | 45 | ||
44 | #define MAGIC_BYTES 123 | 46 | #define MAGIC_BYTES 123 |
45 | 47 | ||
@@ -391,13 +393,30 @@ static inline __u64 ptr_to_u64(const void *ptr) | |||
391 | return (__u64) (unsigned long) ptr; | 393 | return (__u64) (unsigned long) ptr; |
392 | } | 394 | } |
393 | 395 | ||
396 | static bool is_jit_enabled(void) | ||
397 | { | ||
398 | const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable"; | ||
399 | bool enabled = false; | ||
400 | int sysctl_fd; | ||
401 | |||
402 | sysctl_fd = open(jit_sysctl, 0, O_RDONLY); | ||
403 | if (sysctl_fd != -1) { | ||
404 | char tmpc; | ||
405 | |||
406 | if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1) | ||
407 | enabled = (tmpc != '0'); | ||
408 | close(sysctl_fd); | ||
409 | } | ||
410 | |||
411 | return enabled; | ||
412 | } | ||
413 | |||
394 | static void test_bpf_obj_id(void) | 414 | static void test_bpf_obj_id(void) |
395 | { | 415 | { |
396 | const __u64 array_magic_value = 0xfaceb00c; | 416 | const __u64 array_magic_value = 0xfaceb00c; |
397 | const __u32 array_key = 0; | 417 | const __u32 array_key = 0; |
398 | const int nr_iters = 2; | 418 | const int nr_iters = 2; |
399 | const char *file = "./test_obj_id.o"; | 419 | const char *file = "./test_obj_id.o"; |
400 | const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable"; | ||
401 | const char *expected_prog_name = "test_obj_id"; | 420 | const char *expected_prog_name = "test_obj_id"; |
402 | const char *expected_map_name = "test_map_id"; | 421 | const char *expected_map_name = "test_map_id"; |
403 | const __u64 nsec_per_sec = 1000000000; | 422 | const __u64 nsec_per_sec = 1000000000; |
@@ -414,20 +433,11 @@ static void test_bpf_obj_id(void) | |||
414 | char jited_insns[128], xlated_insns[128], zeros[128]; | 433 | char jited_insns[128], xlated_insns[128], zeros[128]; |
415 | __u32 i, next_id, info_len, nr_id_found, duration = 0; | 434 | __u32 i, next_id, info_len, nr_id_found, duration = 0; |
416 | struct timespec real_time_ts, boot_time_ts; | 435 | struct timespec real_time_ts, boot_time_ts; |
417 | int sysctl_fd, jit_enabled = 0, err = 0; | 436 | int err = 0; |
418 | __u64 array_value; | 437 | __u64 array_value; |
419 | uid_t my_uid = getuid(); | 438 | uid_t my_uid = getuid(); |
420 | time_t now, load_time; | 439 | time_t now, load_time; |
421 | 440 | ||
422 | sysctl_fd = open(jit_sysctl, 0, O_RDONLY); | ||
423 | if (sysctl_fd != -1) { | ||
424 | char tmpc; | ||
425 | |||
426 | if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1) | ||
427 | jit_enabled = (tmpc != '0'); | ||
428 | close(sysctl_fd); | ||
429 | } | ||
430 | |||
431 | err = bpf_prog_get_fd_by_id(0); | 441 | err = bpf_prog_get_fd_by_id(0); |
432 | CHECK(err >= 0 || errno != ENOENT, | 442 | CHECK(err >= 0 || errno != ENOENT, |
433 | "get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno); | 443 | "get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno); |
@@ -896,11 +906,47 @@ static int compare_map_keys(int map1_fd, int map2_fd) | |||
896 | return 0; | 906 | return 0; |
897 | } | 907 | } |
898 | 908 | ||
909 | static int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len) | ||
910 | { | ||
911 | __u32 key, next_key, *cur_key_p, *next_key_p; | ||
912 | char *val_buf1, *val_buf2; | ||
913 | int i, err = 0; | ||
914 | |||
915 | val_buf1 = malloc(stack_trace_len); | ||
916 | val_buf2 = malloc(stack_trace_len); | ||
917 | cur_key_p = NULL; | ||
918 | next_key_p = &key; | ||
919 | while (bpf_map_get_next_key(smap_fd, cur_key_p, next_key_p) == 0) { | ||
920 | err = bpf_map_lookup_elem(smap_fd, next_key_p, val_buf1); | ||
921 | if (err) | ||
922 | goto out; | ||
923 | err = bpf_map_lookup_elem(amap_fd, next_key_p, val_buf2); | ||
924 | if (err) | ||
925 | goto out; | ||
926 | for (i = 0; i < stack_trace_len; i++) { | ||
927 | if (val_buf1[i] != val_buf2[i]) { | ||
928 | err = -1; | ||
929 | goto out; | ||
930 | } | ||
931 | } | ||
932 | key = *next_key_p; | ||
933 | cur_key_p = &key; | ||
934 | next_key_p = &next_key; | ||
935 | } | ||
936 | if (errno != ENOENT) | ||
937 | err = -1; | ||
938 | |||
939 | out: | ||
940 | free(val_buf1); | ||
941 | free(val_buf2); | ||
942 | return err; | ||
943 | } | ||
944 | |||
899 | static void test_stacktrace_map() | 945 | static void test_stacktrace_map() |
900 | { | 946 | { |
901 | int control_map_fd, stackid_hmap_fd, stackmap_fd; | 947 | int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; |
902 | const char *file = "./test_stacktrace_map.o"; | 948 | const char *file = "./test_stacktrace_map.o"; |
903 | int bytes, efd, err, pmu_fd, prog_fd; | 949 | int bytes, efd, err, pmu_fd, prog_fd, stack_trace_len; |
904 | struct perf_event_attr attr = {}; | 950 | struct perf_event_attr attr = {}; |
905 | __u32 key, val, duration = 0; | 951 | __u32 key, val, duration = 0; |
906 | struct bpf_object *obj; | 952 | struct bpf_object *obj; |
@@ -956,6 +1002,10 @@ static void test_stacktrace_map() | |||
956 | if (stackmap_fd < 0) | 1002 | if (stackmap_fd < 0) |
957 | goto disable_pmu; | 1003 | goto disable_pmu; |
958 | 1004 | ||
1005 | stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap"); | ||
1006 | if (stack_amap_fd < 0) | ||
1007 | goto disable_pmu; | ||
1008 | |||
959 | /* give some time for bpf program run */ | 1009 | /* give some time for bpf program run */ |
960 | sleep(1); | 1010 | sleep(1); |
961 | 1011 | ||
@@ -977,6 +1027,12 @@ static void test_stacktrace_map() | |||
977 | "err %d errno %d\n", err, errno)) | 1027 | "err %d errno %d\n", err, errno)) |
978 | goto disable_pmu_noerr; | 1028 | goto disable_pmu_noerr; |
979 | 1029 | ||
1030 | stack_trace_len = PERF_MAX_STACK_DEPTH * sizeof(__u64); | ||
1031 | err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len); | ||
1032 | if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap", | ||
1033 | "err %d errno %d\n", err, errno)) | ||
1034 | goto disable_pmu_noerr; | ||
1035 | |||
980 | goto disable_pmu_noerr; | 1036 | goto disable_pmu_noerr; |
981 | disable_pmu: | 1037 | disable_pmu: |
982 | error_cnt++; | 1038 | error_cnt++; |
@@ -1070,9 +1126,9 @@ err: | |||
1070 | 1126 | ||
1071 | static void test_stacktrace_build_id(void) | 1127 | static void test_stacktrace_build_id(void) |
1072 | { | 1128 | { |
1073 | int control_map_fd, stackid_hmap_fd, stackmap_fd; | 1129 | int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; |
1074 | const char *file = "./test_stacktrace_build_id.o"; | 1130 | const char *file = "./test_stacktrace_build_id.o"; |
1075 | int bytes, efd, err, pmu_fd, prog_fd; | 1131 | int bytes, efd, err, pmu_fd, prog_fd, stack_trace_len; |
1076 | struct perf_event_attr attr = {}; | 1132 | struct perf_event_attr attr = {}; |
1077 | __u32 key, previous_key, val, duration = 0; | 1133 | __u32 key, previous_key, val, duration = 0; |
1078 | struct bpf_object *obj; | 1134 | struct bpf_object *obj; |
@@ -1137,6 +1193,11 @@ static void test_stacktrace_build_id(void) | |||
1137 | err, errno)) | 1193 | err, errno)) |
1138 | goto disable_pmu; | 1194 | goto disable_pmu; |
1139 | 1195 | ||
1196 | stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap"); | ||
1197 | if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap", | ||
1198 | "err %d errno %d\n", err, errno)) | ||
1199 | goto disable_pmu; | ||
1200 | |||
1140 | assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null") | 1201 | assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null") |
1141 | == 0); | 1202 | == 0); |
1142 | assert(system("./urandom_read") == 0); | 1203 | assert(system("./urandom_read") == 0); |
@@ -1188,8 +1249,15 @@ static void test_stacktrace_build_id(void) | |||
1188 | previous_key = key; | 1249 | previous_key = key; |
1189 | } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0); | 1250 | } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0); |
1190 | 1251 | ||
1191 | CHECK(build_id_matches < 1, "build id match", | 1252 | if (CHECK(build_id_matches < 1, "build id match", |
1192 | "Didn't find expected build ID from the map\n"); | 1253 | "Didn't find expected build ID from the map\n")) |
1254 | goto disable_pmu; | ||
1255 | |||
1256 | stack_trace_len = PERF_MAX_STACK_DEPTH | ||
1257 | * sizeof(struct bpf_stack_build_id); | ||
1258 | err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len); | ||
1259 | CHECK(err, "compare_stack_ips stackmap vs. stack_amap", | ||
1260 | "err %d errno %d\n", err, errno); | ||
1193 | 1261 | ||
1194 | disable_pmu: | 1262 | disable_pmu: |
1195 | ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); | 1263 | ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); |
@@ -1204,8 +1272,147 @@ out: | |||
1204 | return; | 1272 | return; |
1205 | } | 1273 | } |
1206 | 1274 | ||
1275 | #define MAX_CNT_RAWTP 10ull | ||
1276 | #define MAX_STACK_RAWTP 100 | ||
1277 | struct get_stack_trace_t { | ||
1278 | int pid; | ||
1279 | int kern_stack_size; | ||
1280 | int user_stack_size; | ||
1281 | int user_stack_buildid_size; | ||
1282 | __u64 kern_stack[MAX_STACK_RAWTP]; | ||
1283 | __u64 user_stack[MAX_STACK_RAWTP]; | ||
1284 | struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP]; | ||
1285 | }; | ||
1286 | |||
1287 | static int get_stack_print_output(void *data, int size) | ||
1288 | { | ||
1289 | bool good_kern_stack = false, good_user_stack = false; | ||
1290 | const char *nonjit_func = "___bpf_prog_run"; | ||
1291 | struct get_stack_trace_t *e = data; | ||
1292 | int i, num_stack; | ||
1293 | static __u64 cnt; | ||
1294 | struct ksym *ks; | ||
1295 | |||
1296 | cnt++; | ||
1297 | |||
1298 | if (size < sizeof(struct get_stack_trace_t)) { | ||
1299 | __u64 *raw_data = data; | ||
1300 | bool found = false; | ||
1301 | |||
1302 | num_stack = size / sizeof(__u64); | ||
1303 | /* If jit is enabled, we do not have a good way to | ||
1304 | * verify the sanity of the kernel stack. So we | ||
1305 | * just assume it is good if the stack is not empty. | ||
1306 | * This could be improved in the future. | ||
1307 | */ | ||
1308 | if (jit_enabled) { | ||
1309 | found = num_stack > 0; | ||
1310 | } else { | ||
1311 | for (i = 0; i < num_stack; i++) { | ||
1312 | ks = ksym_search(raw_data[i]); | ||
1313 | if (strcmp(ks->name, nonjit_func) == 0) { | ||
1314 | found = true; | ||
1315 | break; | ||
1316 | } | ||
1317 | } | ||
1318 | } | ||
1319 | if (found) { | ||
1320 | good_kern_stack = true; | ||
1321 | good_user_stack = true; | ||
1322 | } | ||
1323 | } else { | ||
1324 | num_stack = e->kern_stack_size / sizeof(__u64); | ||
1325 | if (jit_enabled) { | ||
1326 | good_kern_stack = num_stack > 0; | ||
1327 | } else { | ||
1328 | for (i = 0; i < num_stack; i++) { | ||
1329 | ks = ksym_search(e->kern_stack[i]); | ||
1330 | if (strcmp(ks->name, nonjit_func) == 0) { | ||
1331 | good_kern_stack = true; | ||
1332 | break; | ||
1333 | } | ||
1334 | } | ||
1335 | } | ||
1336 | if (e->user_stack_size > 0 && e->user_stack_buildid_size > 0) | ||
1337 | good_user_stack = true; | ||
1338 | } | ||
1339 | if (!good_kern_stack || !good_user_stack) | ||
1340 | return PERF_EVENT_ERROR; | ||
1341 | |||
1342 | if (cnt == MAX_CNT_RAWTP) | ||
1343 | return PERF_EVENT_DONE; | ||
1344 | |||
1345 | return PERF_EVENT_CONT; | ||
1346 | } | ||
1347 | |||
1348 | static void test_get_stack_raw_tp(void) | ||
1349 | { | ||
1350 | const char *file = "./test_get_stack_rawtp.o"; | ||
1351 | int i, efd, err, prog_fd, pmu_fd, perfmap_fd; | ||
1352 | struct perf_event_attr attr = {}; | ||
1353 | struct timespec tv = {0, 10}; | ||
1354 | __u32 key = 0, duration = 0; | ||
1355 | struct bpf_object *obj; | ||
1356 | |||
1357 | err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); | ||
1358 | if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno)) | ||
1359 | return; | ||
1360 | |||
1361 | efd = bpf_raw_tracepoint_open("sys_enter", prog_fd); | ||
1362 | if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno)) | ||
1363 | goto close_prog; | ||
1364 | |||
1365 | perfmap_fd = bpf_find_map(__func__, obj, "perfmap"); | ||
1366 | if (CHECK(perfmap_fd < 0, "bpf_find_map", "err %d errno %d\n", | ||
1367 | perfmap_fd, errno)) | ||
1368 | goto close_prog; | ||
1369 | |||
1370 | err = load_kallsyms(); | ||
1371 | if (CHECK(err < 0, "load_kallsyms", "err %d errno %d\n", err, errno)) | ||
1372 | goto close_prog; | ||
1373 | |||
1374 | attr.sample_type = PERF_SAMPLE_RAW; | ||
1375 | attr.type = PERF_TYPE_SOFTWARE; | ||
1376 | attr.config = PERF_COUNT_SW_BPF_OUTPUT; | ||
1377 | pmu_fd = syscall(__NR_perf_event_open, &attr, getpid()/*pid*/, -1/*cpu*/, | ||
1378 | -1/*group_fd*/, 0); | ||
1379 | if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd, | ||
1380 | errno)) | ||
1381 | goto close_prog; | ||
1382 | |||
1383 | err = bpf_map_update_elem(perfmap_fd, &key, &pmu_fd, BPF_ANY); | ||
1384 | if (CHECK(err < 0, "bpf_map_update_elem", "err %d errno %d\n", err, | ||
1385 | errno)) | ||
1386 | goto close_prog; | ||
1387 | |||
1388 | err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); | ||
1389 | if (CHECK(err < 0, "ioctl PERF_EVENT_IOC_ENABLE", "err %d errno %d\n", | ||
1390 | err, errno)) | ||
1391 | goto close_prog; | ||
1392 | |||
1393 | err = perf_event_mmap(pmu_fd); | ||
1394 | if (CHECK(err < 0, "perf_event_mmap", "err %d errno %d\n", err, errno)) | ||
1395 | goto close_prog; | ||
1396 | |||
1397 | /* trigger some syscall action */ | ||
1398 | for (i = 0; i < MAX_CNT_RAWTP; i++) | ||
1399 | nanosleep(&tv, NULL); | ||
1400 | |||
1401 | err = perf_event_poller(pmu_fd, get_stack_print_output); | ||
1402 | if (CHECK(err < 0, "perf_event_poller", "err %d errno %d\n", err, errno)) | ||
1403 | goto close_prog; | ||
1404 | |||
1405 | goto close_prog_noerr; | ||
1406 | close_prog: | ||
1407 | error_cnt++; | ||
1408 | close_prog_noerr: | ||
1409 | bpf_object__close(obj); | ||
1410 | } | ||
1411 | |||
1207 | int main(void) | 1412 | int main(void) |
1208 | { | 1413 | { |
1414 | jit_enabled = is_jit_enabled(); | ||
1415 | |||
1209 | test_pkt_access(); | 1416 | test_pkt_access(); |
1210 | test_xdp(); | 1417 | test_xdp(); |
1211 | test_xdp_adjust_tail(); | 1418 | test_xdp_adjust_tail(); |
@@ -1219,6 +1426,7 @@ int main(void) | |||
1219 | test_stacktrace_map(); | 1426 | test_stacktrace_map(); |
1220 | test_stacktrace_build_id(); | 1427 | test_stacktrace_build_id(); |
1221 | test_stacktrace_map_raw_tp(); | 1428 | test_stacktrace_map_raw_tp(); |
1429 | test_get_stack_raw_tp(); | ||
1222 | 1430 | ||
1223 | printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); | 1431 | printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); |
1224 | return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; | 1432 | return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; |
diff --git a/tools/testing/selftests/bpf/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/test_stacktrace_build_id.c index b755bd783ce5..d86c281e957f 100644 --- a/tools/testing/selftests/bpf/test_stacktrace_build_id.c +++ b/tools/testing/selftests/bpf/test_stacktrace_build_id.c | |||
@@ -19,7 +19,7 @@ struct bpf_map_def SEC("maps") stackid_hmap = { | |||
19 | .type = BPF_MAP_TYPE_HASH, | 19 | .type = BPF_MAP_TYPE_HASH, |
20 | .key_size = sizeof(__u32), | 20 | .key_size = sizeof(__u32), |
21 | .value_size = sizeof(__u32), | 21 | .value_size = sizeof(__u32), |
22 | .max_entries = 10000, | 22 | .max_entries = 16384, |
23 | }; | 23 | }; |
24 | 24 | ||
25 | struct bpf_map_def SEC("maps") stackmap = { | 25 | struct bpf_map_def SEC("maps") stackmap = { |
@@ -31,6 +31,14 @@ struct bpf_map_def SEC("maps") stackmap = { | |||
31 | .map_flags = BPF_F_STACK_BUILD_ID, | 31 | .map_flags = BPF_F_STACK_BUILD_ID, |
32 | }; | 32 | }; |
33 | 33 | ||
34 | struct bpf_map_def SEC("maps") stack_amap = { | ||
35 | .type = BPF_MAP_TYPE_ARRAY, | ||
36 | .key_size = sizeof(__u32), | ||
37 | .value_size = sizeof(struct bpf_stack_build_id) | ||
38 | * PERF_MAX_STACK_DEPTH, | ||
39 | .max_entries = 128, | ||
40 | }; | ||
41 | |||
34 | /* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */ | 42 | /* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */ |
35 | struct random_urandom_args { | 43 | struct random_urandom_args { |
36 | unsigned long long pad; | 44 | unsigned long long pad; |
@@ -42,7 +50,10 @@ struct random_urandom_args { | |||
42 | SEC("tracepoint/random/urandom_read") | 50 | SEC("tracepoint/random/urandom_read") |
43 | int oncpu(struct random_urandom_args *args) | 51 | int oncpu(struct random_urandom_args *args) |
44 | { | 52 | { |
53 | __u32 max_len = sizeof(struct bpf_stack_build_id) | ||
54 | * PERF_MAX_STACK_DEPTH; | ||
45 | __u32 key = 0, val = 0, *value_p; | 55 | __u32 key = 0, val = 0, *value_p; |
56 | void *stack_p; | ||
46 | 57 | ||
47 | value_p = bpf_map_lookup_elem(&control_map, &key); | 58 | value_p = bpf_map_lookup_elem(&control_map, &key); |
48 | if (value_p && *value_p) | 59 | if (value_p && *value_p) |
@@ -50,8 +61,13 @@ int oncpu(struct random_urandom_args *args) | |||
50 | 61 | ||
51 | /* The size of stackmap and stackid_hmap should be the same */ | 62 | /* The size of stackmap and stackid_hmap should be the same */ |
52 | key = bpf_get_stackid(args, &stackmap, BPF_F_USER_STACK); | 63 | key = bpf_get_stackid(args, &stackmap, BPF_F_USER_STACK); |
53 | if ((int)key >= 0) | 64 | if ((int)key >= 0) { |
54 | bpf_map_update_elem(&stackid_hmap, &key, &val, 0); | 65 | bpf_map_update_elem(&stackid_hmap, &key, &val, 0); |
66 | stack_p = bpf_map_lookup_elem(&stack_amap, &key); | ||
67 | if (stack_p) | ||
68 | bpf_get_stack(args, stack_p, max_len, | ||
69 | BPF_F_USER_STACK | BPF_F_USER_BUILD_ID); | ||
70 | } | ||
55 | 71 | ||
56 | return 0; | 72 | return 0; |
57 | } | 73 | } |
diff --git a/tools/testing/selftests/bpf/test_stacktrace_map.c b/tools/testing/selftests/bpf/test_stacktrace_map.c index 76d85c5d08bd..af111af7ca1a 100644 --- a/tools/testing/selftests/bpf/test_stacktrace_map.c +++ b/tools/testing/selftests/bpf/test_stacktrace_map.c | |||
@@ -19,14 +19,21 @@ struct bpf_map_def SEC("maps") stackid_hmap = { | |||
19 | .type = BPF_MAP_TYPE_HASH, | 19 | .type = BPF_MAP_TYPE_HASH, |
20 | .key_size = sizeof(__u32), | 20 | .key_size = sizeof(__u32), |
21 | .value_size = sizeof(__u32), | 21 | .value_size = sizeof(__u32), |
22 | .max_entries = 10000, | 22 | .max_entries = 16384, |
23 | }; | 23 | }; |
24 | 24 | ||
25 | struct bpf_map_def SEC("maps") stackmap = { | 25 | struct bpf_map_def SEC("maps") stackmap = { |
26 | .type = BPF_MAP_TYPE_STACK_TRACE, | 26 | .type = BPF_MAP_TYPE_STACK_TRACE, |
27 | .key_size = sizeof(__u32), | 27 | .key_size = sizeof(__u32), |
28 | .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH, | 28 | .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH, |
29 | .max_entries = 10000, | 29 | .max_entries = 16384, |
30 | }; | ||
31 | |||
32 | struct bpf_map_def SEC("maps") stack_amap = { | ||
33 | .type = BPF_MAP_TYPE_ARRAY, | ||
34 | .key_size = sizeof(__u32), | ||
35 | .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH, | ||
36 | .max_entries = 16384, | ||
30 | }; | 37 | }; |
31 | 38 | ||
32 | /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ | 39 | /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ |
@@ -44,7 +51,9 @@ struct sched_switch_args { | |||
44 | SEC("tracepoint/sched/sched_switch") | 51 | SEC("tracepoint/sched/sched_switch") |
45 | int oncpu(struct sched_switch_args *ctx) | 52 | int oncpu(struct sched_switch_args *ctx) |
46 | { | 53 | { |
54 | __u32 max_len = PERF_MAX_STACK_DEPTH * sizeof(__u64); | ||
47 | __u32 key = 0, val = 0, *value_p; | 55 | __u32 key = 0, val = 0, *value_p; |
56 | void *stack_p; | ||
48 | 57 | ||
49 | value_p = bpf_map_lookup_elem(&control_map, &key); | 58 | value_p = bpf_map_lookup_elem(&control_map, &key); |
50 | if (value_p && *value_p) | 59 | if (value_p && *value_p) |
@@ -52,8 +61,12 @@ int oncpu(struct sched_switch_args *ctx) | |||
52 | 61 | ||
53 | /* The size of stackmap and stackid_hmap should be the same */ | 62 | /* The size of stackmap and stackid_hmap should be the same */ |
54 | key = bpf_get_stackid(ctx, &stackmap, 0); | 63 | key = bpf_get_stackid(ctx, &stackmap, 0); |
55 | if ((int)key >= 0) | 64 | if ((int)key >= 0) { |
56 | bpf_map_update_elem(&stackid_hmap, &key, &val, 0); | 65 | bpf_map_update_elem(&stackid_hmap, &key, &val, 0); |
66 | stack_p = bpf_map_lookup_elem(&stack_amap, &key); | ||
67 | if (stack_p) | ||
68 | bpf_get_stack(ctx, stack_p, max_len, 0); | ||
69 | } | ||
57 | 70 | ||
58 | return 0; | 71 | return 0; |
59 | } | 72 | } |
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 165e9ddfa446..275b4570b5b8 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c | |||
@@ -47,7 +47,7 @@ | |||
47 | # define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) | 47 | # define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) |
48 | #endif | 48 | #endif |
49 | 49 | ||
50 | #define MAX_INSNS 512 | 50 | #define MAX_INSNS BPF_MAXINSNS |
51 | #define MAX_FIXUPS 8 | 51 | #define MAX_FIXUPS 8 |
52 | #define MAX_NR_MAPS 4 | 52 | #define MAX_NR_MAPS 4 |
53 | #define POINTER_VALUE 0xcafe4all | 53 | #define POINTER_VALUE 0xcafe4all |
@@ -77,6 +77,8 @@ struct bpf_test { | |||
77 | } result, result_unpriv; | 77 | } result, result_unpriv; |
78 | enum bpf_prog_type prog_type; | 78 | enum bpf_prog_type prog_type; |
79 | uint8_t flags; | 79 | uint8_t flags; |
80 | __u8 data[TEST_DATA_LEN]; | ||
81 | void (*fill_helper)(struct bpf_test *self); | ||
80 | }; | 82 | }; |
81 | 83 | ||
82 | /* Note we want this to be 64 bit aligned so that the end of our array is | 84 | /* Note we want this to be 64 bit aligned so that the end of our array is |
@@ -94,6 +96,62 @@ struct other_val { | |||
94 | long long bar; | 96 | long long bar; |
95 | }; | 97 | }; |
96 | 98 | ||
99 | static void bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self) | ||
100 | { | ||
101 | /* test: {skb->data[0], vlan_push} x 68 + {skb->data[0], vlan_pop} x 68 */ | ||
102 | #define PUSH_CNT 51 | ||
103 | unsigned int len = BPF_MAXINSNS; | ||
104 | struct bpf_insn *insn = self->insns; | ||
105 | int i = 0, j, k = 0; | ||
106 | |||
107 | insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1); | ||
108 | loop: | ||
109 | for (j = 0; j < PUSH_CNT; j++) { | ||
110 | insn[i++] = BPF_LD_ABS(BPF_B, 0); | ||
111 | insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 2); | ||
112 | i++; | ||
113 | insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6); | ||
114 | insn[i++] = BPF_MOV64_IMM(BPF_REG_2, 1); | ||
115 | insn[i++] = BPF_MOV64_IMM(BPF_REG_3, 2); | ||
116 | insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
117 | BPF_FUNC_skb_vlan_push), | ||
118 | insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 2); | ||
119 | i++; | ||
120 | } | ||
121 | |||
122 | for (j = 0; j < PUSH_CNT; j++) { | ||
123 | insn[i++] = BPF_LD_ABS(BPF_B, 0); | ||
124 | insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 2); | ||
125 | i++; | ||
126 | insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6); | ||
127 | insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
128 | BPF_FUNC_skb_vlan_pop), | ||
129 | insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 2); | ||
130 | i++; | ||
131 | } | ||
132 | if (++k < 5) | ||
133 | goto loop; | ||
134 | |||
135 | for (; i < len - 1; i++) | ||
136 | insn[i] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 0xbef); | ||
137 | insn[len - 1] = BPF_EXIT_INSN(); | ||
138 | } | ||
139 | |||
140 | static void bpf_fill_jump_around_ld_abs(struct bpf_test *self) | ||
141 | { | ||
142 | struct bpf_insn *insn = self->insns; | ||
143 | unsigned int len = BPF_MAXINSNS; | ||
144 | int i = 0; | ||
145 | |||
146 | insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1); | ||
147 | insn[i++] = BPF_LD_ABS(BPF_B, 0); | ||
148 | insn[i] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 10, len - i - 2); | ||
149 | i++; | ||
150 | while (i < len - 1) | ||
151 | insn[i++] = BPF_LD_ABS(BPF_B, 1); | ||
152 | insn[i] = BPF_EXIT_INSN(); | ||
153 | } | ||
154 | |||
97 | static struct bpf_test tests[] = { | 155 | static struct bpf_test tests[] = { |
98 | { | 156 | { |
99 | "add+sub+mul", | 157 | "add+sub+mul", |
@@ -11680,6 +11738,242 @@ static struct bpf_test tests[] = { | |||
11680 | .errstr = "BPF_XADD stores into R2 packet", | 11738 | .errstr = "BPF_XADD stores into R2 packet", |
11681 | .prog_type = BPF_PROG_TYPE_XDP, | 11739 | .prog_type = BPF_PROG_TYPE_XDP, |
11682 | }, | 11740 | }, |
11741 | { | ||
11742 | "bpf_get_stack return R0 within range", | ||
11743 | .insns = { | ||
11744 | BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), | ||
11745 | BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), | ||
11746 | BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), | ||
11747 | BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), | ||
11748 | BPF_LD_MAP_FD(BPF_REG_1, 0), | ||
11749 | BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
11750 | BPF_FUNC_map_lookup_elem), | ||
11751 | BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 28), | ||
11752 | BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), | ||
11753 | BPF_MOV64_IMM(BPF_REG_9, sizeof(struct test_val)), | ||
11754 | BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), | ||
11755 | BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), | ||
11756 | BPF_MOV64_IMM(BPF_REG_3, sizeof(struct test_val)), | ||
11757 | BPF_MOV64_IMM(BPF_REG_4, 256), | ||
11758 | BPF_EMIT_CALL(BPF_FUNC_get_stack), | ||
11759 | BPF_MOV64_IMM(BPF_REG_1, 0), | ||
11760 | BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), | ||
11761 | BPF_ALU64_IMM(BPF_LSH, BPF_REG_8, 32), | ||
11762 | BPF_ALU64_IMM(BPF_ARSH, BPF_REG_8, 32), | ||
11763 | BPF_JMP_REG(BPF_JSLT, BPF_REG_1, BPF_REG_8, 16), | ||
11764 | BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8), | ||
11765 | BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), | ||
11766 | BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_8), | ||
11767 | BPF_MOV64_REG(BPF_REG_1, BPF_REG_9), | ||
11768 | BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32), | ||
11769 | BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 32), | ||
11770 | BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), | ||
11771 | BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1), | ||
11772 | BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), | ||
11773 | BPF_MOV64_IMM(BPF_REG_5, sizeof(struct test_val)), | ||
11774 | BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_5), | ||
11775 | BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 4), | ||
11776 | BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), | ||
11777 | BPF_MOV64_REG(BPF_REG_3, BPF_REG_9), | ||
11778 | BPF_MOV64_IMM(BPF_REG_4, 0), | ||
11779 | BPF_EMIT_CALL(BPF_FUNC_get_stack), | ||
11780 | BPF_EXIT_INSN(), | ||
11781 | }, | ||
11782 | .fixup_map2 = { 4 }, | ||
11783 | .result = ACCEPT, | ||
11784 | .prog_type = BPF_PROG_TYPE_TRACEPOINT, | ||
11785 | }, | ||
11786 | { | ||
11787 | "ld_abs: invalid op 1", | ||
11788 | .insns = { | ||
11789 | BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), | ||
11790 | BPF_LD_ABS(BPF_DW, 0), | ||
11791 | BPF_EXIT_INSN(), | ||
11792 | }, | ||
11793 | .prog_type = BPF_PROG_TYPE_SCHED_CLS, | ||
11794 | .result = REJECT, | ||
11795 | .errstr = "unknown opcode", | ||
11796 | }, | ||
11797 | { | ||
11798 | "ld_abs: invalid op 2", | ||
11799 | .insns = { | ||
11800 | BPF_MOV32_IMM(BPF_REG_0, 256), | ||
11801 | BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), | ||
11802 | BPF_LD_IND(BPF_DW, BPF_REG_0, 0), | ||
11803 | BPF_EXIT_INSN(), | ||
11804 | }, | ||
11805 | .prog_type = BPF_PROG_TYPE_SCHED_CLS, | ||
11806 | .result = REJECT, | ||
11807 | .errstr = "unknown opcode", | ||
11808 | }, | ||
11809 | { | ||
11810 | "ld_abs: nmap reduced", | ||
11811 | .insns = { | ||
11812 | BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), | ||
11813 | BPF_LD_ABS(BPF_H, 12), | ||
11814 | BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 28), | ||
11815 | BPF_LD_ABS(BPF_H, 12), | ||
11816 | BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 26), | ||
11817 | BPF_MOV32_IMM(BPF_REG_0, 18), | ||
11818 | BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -64), | ||
11819 | BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -64), | ||
11820 | BPF_LD_IND(BPF_W, BPF_REG_7, 14), | ||
11821 | BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -60), | ||
11822 | BPF_MOV32_IMM(BPF_REG_0, 280971478), | ||
11823 | BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -56), | ||
11824 | BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -56), | ||
11825 | BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -60), | ||
11826 | BPF_ALU32_REG(BPF_SUB, BPF_REG_0, BPF_REG_7), | ||
11827 | BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 15), | ||
11828 | BPF_LD_ABS(BPF_H, 12), | ||
11829 | BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 13), | ||
11830 | BPF_MOV32_IMM(BPF_REG_0, 22), | ||
11831 | BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -56), | ||
11832 | BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -56), | ||
11833 | BPF_LD_IND(BPF_H, BPF_REG_7, 14), | ||
11834 | BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -52), | ||
11835 | BPF_MOV32_IMM(BPF_REG_0, 17366), | ||
11836 | BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -48), | ||
11837 | BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -48), | ||
11838 | BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -52), | ||
11839 | BPF_ALU32_REG(BPF_SUB, BPF_REG_0, BPF_REG_7), | ||
11840 | BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), | ||
11841 | BPF_MOV32_IMM(BPF_REG_0, 256), | ||
11842 | BPF_EXIT_INSN(), | ||
11843 | BPF_MOV32_IMM(BPF_REG_0, 0), | ||
11844 | BPF_EXIT_INSN(), | ||
11845 | }, | ||
11846 | .data = { | ||
11847 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x06, 0, | ||
11848 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
11849 | 0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6, | ||
11850 | }, | ||
11851 | .prog_type = BPF_PROG_TYPE_SCHED_CLS, | ||
11852 | .result = ACCEPT, | ||
11853 | .retval = 256, | ||
11854 | }, | ||
11855 | { | ||
11856 | "ld_abs: div + abs, test 1", | ||
11857 | .insns = { | ||
11858 | BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), | ||
11859 | BPF_LD_ABS(BPF_B, 3), | ||
11860 | BPF_ALU64_IMM(BPF_MOV, BPF_REG_2, 2), | ||
11861 | BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_2), | ||
11862 | BPF_ALU64_REG(BPF_MOV, BPF_REG_8, BPF_REG_0), | ||
11863 | BPF_LD_ABS(BPF_B, 4), | ||
11864 | BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0), | ||
11865 | BPF_LD_IND(BPF_B, BPF_REG_8, -70), | ||
11866 | BPF_EXIT_INSN(), | ||
11867 | }, | ||
11868 | .data = { | ||
11869 | 10, 20, 30, 40, 50, | ||
11870 | }, | ||
11871 | .prog_type = BPF_PROG_TYPE_SCHED_CLS, | ||
11872 | .result = ACCEPT, | ||
11873 | .retval = 10, | ||
11874 | }, | ||
11875 | { | ||
11876 | "ld_abs: div + abs, test 2", | ||
11877 | .insns = { | ||
11878 | BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), | ||
11879 | BPF_LD_ABS(BPF_B, 3), | ||
11880 | BPF_ALU64_IMM(BPF_MOV, BPF_REG_2, 2), | ||
11881 | BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_2), | ||
11882 | BPF_ALU64_REG(BPF_MOV, BPF_REG_8, BPF_REG_0), | ||
11883 | BPF_LD_ABS(BPF_B, 128), | ||
11884 | BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0), | ||
11885 | BPF_LD_IND(BPF_B, BPF_REG_8, -70), | ||
11886 | BPF_EXIT_INSN(), | ||
11887 | }, | ||
11888 | .data = { | ||
11889 | 10, 20, 30, 40, 50, | ||
11890 | }, | ||
11891 | .prog_type = BPF_PROG_TYPE_SCHED_CLS, | ||
11892 | .result = ACCEPT, | ||
11893 | .retval = 0, | ||
11894 | }, | ||
11895 | { | ||
11896 | "ld_abs: div + abs, test 3", | ||
11897 | .insns = { | ||
11898 | BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), | ||
11899 | BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0), | ||
11900 | BPF_LD_ABS(BPF_B, 3), | ||
11901 | BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_7), | ||
11902 | BPF_EXIT_INSN(), | ||
11903 | }, | ||
11904 | .data = { | ||
11905 | 10, 20, 30, 40, 50, | ||
11906 | }, | ||
11907 | .prog_type = BPF_PROG_TYPE_SCHED_CLS, | ||
11908 | .result = ACCEPT, | ||
11909 | .retval = 0, | ||
11910 | }, | ||
11911 | { | ||
11912 | "ld_abs: div + abs, test 4", | ||
11913 | .insns = { | ||
11914 | BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), | ||
11915 | BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0), | ||
11916 | BPF_LD_ABS(BPF_B, 256), | ||
11917 | BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_7), | ||
11918 | BPF_EXIT_INSN(), | ||
11919 | }, | ||
11920 | .data = { | ||
11921 | 10, 20, 30, 40, 50, | ||
11922 | }, | ||
11923 | .prog_type = BPF_PROG_TYPE_SCHED_CLS, | ||
11924 | .result = ACCEPT, | ||
11925 | .retval = 0, | ||
11926 | }, | ||
11927 | { | ||
11928 | "ld_abs: vlan + abs, test 1", | ||
11929 | .insns = { }, | ||
11930 | .data = { | ||
11931 | 0x34, | ||
11932 | }, | ||
11933 | .fill_helper = bpf_fill_ld_abs_vlan_push_pop, | ||
11934 | .prog_type = BPF_PROG_TYPE_SCHED_CLS, | ||
11935 | .result = ACCEPT, | ||
11936 | .retval = 0xbef, | ||
11937 | }, | ||
11938 | { | ||
11939 | "ld_abs: vlan + abs, test 2", | ||
11940 | .insns = { | ||
11941 | BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), | ||
11942 | BPF_LD_ABS(BPF_B, 0), | ||
11943 | BPF_LD_ABS(BPF_H, 0), | ||
11944 | BPF_LD_ABS(BPF_W, 0), | ||
11945 | BPF_MOV64_REG(BPF_REG_7, BPF_REG_6), | ||
11946 | BPF_MOV64_IMM(BPF_REG_6, 0), | ||
11947 | BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), | ||
11948 | BPF_MOV64_IMM(BPF_REG_2, 1), | ||
11949 | BPF_MOV64_IMM(BPF_REG_3, 2), | ||
11950 | BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
11951 | BPF_FUNC_skb_vlan_push), | ||
11952 | BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), | ||
11953 | BPF_LD_ABS(BPF_B, 0), | ||
11954 | BPF_LD_ABS(BPF_H, 0), | ||
11955 | BPF_LD_ABS(BPF_W, 0), | ||
11956 | BPF_MOV64_IMM(BPF_REG_0, 42), | ||
11957 | BPF_EXIT_INSN(), | ||
11958 | }, | ||
11959 | .data = { | ||
11960 | 0x34, | ||
11961 | }, | ||
11962 | .prog_type = BPF_PROG_TYPE_SCHED_CLS, | ||
11963 | .result = ACCEPT, | ||
11964 | .retval = 42, | ||
11965 | }, | ||
11966 | { | ||
11967 | "ld_abs: jump around ld_abs", | ||
11968 | .insns = { }, | ||
11969 | .data = { | ||
11970 | 10, 11, | ||
11971 | }, | ||
11972 | .fill_helper = bpf_fill_jump_around_ld_abs, | ||
11973 | .prog_type = BPF_PROG_TYPE_SCHED_CLS, | ||
11974 | .result = ACCEPT, | ||
11975 | .retval = 10, | ||
11976 | }, | ||
11683 | }; | 11977 | }; |
11684 | 11978 | ||
11685 | static int probe_filter_length(const struct bpf_insn *fp) | 11979 | static int probe_filter_length(const struct bpf_insn *fp) |
@@ -11783,7 +12077,7 @@ static int create_map_in_map(void) | |||
11783 | return outer_map_fd; | 12077 | return outer_map_fd; |
11784 | } | 12078 | } |
11785 | 12079 | ||
11786 | static char bpf_vlog[32768]; | 12080 | static char bpf_vlog[UINT_MAX >> 8]; |
11787 | 12081 | ||
11788 | static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, | 12082 | static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, |
11789 | int *map_fds) | 12083 | int *map_fds) |
@@ -11794,6 +12088,9 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, | |||
11794 | int *fixup_prog = test->fixup_prog; | 12088 | int *fixup_prog = test->fixup_prog; |
11795 | int *fixup_map_in_map = test->fixup_map_in_map; | 12089 | int *fixup_map_in_map = test->fixup_map_in_map; |
11796 | 12090 | ||
12091 | if (test->fill_helper) | ||
12092 | test->fill_helper(test); | ||
12093 | |||
11797 | /* Allocating HTs with 1 elem is fine here, since we only test | 12094 | /* Allocating HTs with 1 elem is fine here, since we only test |
11798 | * for verifier and not do a runtime lookup, so the only thing | 12095 | * for verifier and not do a runtime lookup, so the only thing |
11799 | * that really matters is value size in this case. | 12096 | * that really matters is value size in this case. |
@@ -11843,10 +12140,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv, | |||
11843 | int *passes, int *errors) | 12140 | int *passes, int *errors) |
11844 | { | 12141 | { |
11845 | int fd_prog, expected_ret, reject_from_alignment; | 12142 | int fd_prog, expected_ret, reject_from_alignment; |
12143 | int prog_len, prog_type = test->prog_type; | ||
11846 | struct bpf_insn *prog = test->insns; | 12144 | struct bpf_insn *prog = test->insns; |
11847 | int prog_len = probe_filter_length(prog); | ||
11848 | char data_in[TEST_DATA_LEN] = {}; | ||
11849 | int prog_type = test->prog_type; | ||
11850 | int map_fds[MAX_NR_MAPS]; | 12145 | int map_fds[MAX_NR_MAPS]; |
11851 | const char *expected_err; | 12146 | const char *expected_err; |
11852 | uint32_t retval; | 12147 | uint32_t retval; |
@@ -11856,6 +12151,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, | |||
11856 | map_fds[i] = -1; | 12151 | map_fds[i] = -1; |
11857 | 12152 | ||
11858 | do_test_fixup(test, prog, map_fds); | 12153 | do_test_fixup(test, prog, map_fds); |
12154 | prog_len = probe_filter_length(prog); | ||
11859 | 12155 | ||
11860 | fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, | 12156 | fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, |
11861 | prog, prog_len, test->flags & F_LOAD_WITH_STRICT_ALIGNMENT, | 12157 | prog, prog_len, test->flags & F_LOAD_WITH_STRICT_ALIGNMENT, |
@@ -11895,8 +12191,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv, | |||
11895 | } | 12191 | } |
11896 | 12192 | ||
11897 | if (fd_prog >= 0) { | 12193 | if (fd_prog >= 0) { |
11898 | err = bpf_prog_test_run(fd_prog, 1, data_in, sizeof(data_in), | 12194 | err = bpf_prog_test_run(fd_prog, 1, test->data, |
11899 | NULL, NULL, &retval, NULL); | 12195 | sizeof(test->data), NULL, NULL, |
12196 | &retval, NULL); | ||
11900 | if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) { | 12197 | if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) { |
11901 | printf("Unexpected bpf_prog_test_run error\n"); | 12198 | printf("Unexpected bpf_prog_test_run error\n"); |
11902 | goto fail_log; | 12199 | goto fail_log; |
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c new file mode 100644 index 000000000000..ad025bd75f1c --- /dev/null +++ b/tools/testing/selftests/bpf/trace_helpers.c | |||
@@ -0,0 +1,180 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | #include <stdio.h> | ||
3 | #include <stdlib.h> | ||
4 | #include <string.h> | ||
5 | #include <assert.h> | ||
6 | #include <errno.h> | ||
7 | #include <poll.h> | ||
8 | #include <unistd.h> | ||
9 | #include <linux/perf_event.h> | ||
10 | #include <sys/mman.h> | ||
11 | #include "trace_helpers.h" | ||
12 | |||
13 | #define MAX_SYMS 300000 | ||
14 | static struct ksym syms[MAX_SYMS]; | ||
15 | static int sym_cnt; | ||
16 | |||
17 | static int ksym_cmp(const void *p1, const void *p2) | ||
18 | { | ||
19 | return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr; | ||
20 | } | ||
21 | |||
22 | int load_kallsyms(void) | ||
23 | { | ||
24 | FILE *f = fopen("/proc/kallsyms", "r"); | ||
25 | char func[256], buf[256]; | ||
26 | char symbol; | ||
27 | void *addr; | ||
28 | int i = 0; | ||
29 | |||
30 | if (!f) | ||
31 | return -ENOENT; | ||
32 | |||
33 | while (!feof(f)) { | ||
34 | if (!fgets(buf, sizeof(buf), f)) | ||
35 | break; | ||
36 | if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3) | ||
37 | break; | ||
38 | if (!addr) | ||
39 | continue; | ||
40 | syms[i].addr = (long) addr; | ||
41 | syms[i].name = strdup(func); | ||
42 | i++; | ||
43 | } | ||
44 | sym_cnt = i; | ||
45 | qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp); | ||
46 | return 0; | ||
47 | } | ||
48 | |||
49 | struct ksym *ksym_search(long key) | ||
50 | { | ||
51 | int start = 0, end = sym_cnt; | ||
52 | int result; | ||
53 | |||
54 | while (start < end) { | ||
55 | size_t mid = start + (end - start) / 2; | ||
56 | |||
57 | result = key - syms[mid].addr; | ||
58 | if (result < 0) | ||
59 | end = mid; | ||
60 | else if (result > 0) | ||
61 | start = mid + 1; | ||
62 | else | ||
63 | return &syms[mid]; | ||
64 | } | ||
65 | |||
66 | if (start >= 1 && syms[start - 1].addr < key && | ||
67 | key < syms[start].addr) | ||
68 | /* valid ksym */ | ||
69 | return &syms[start - 1]; | ||
70 | |||
71 | /* out of range. return _stext */ | ||
72 | return &syms[0]; | ||
73 | } | ||
74 | |||
75 | static int page_size; | ||
76 | static int page_cnt = 8; | ||
77 | static volatile struct perf_event_mmap_page *header; | ||
78 | |||
79 | int perf_event_mmap(int fd) | ||
80 | { | ||
81 | void *base; | ||
82 | int mmap_size; | ||
83 | |||
84 | page_size = getpagesize(); | ||
85 | mmap_size = page_size * (page_cnt + 1); | ||
86 | |||
87 | base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); | ||
88 | if (base == MAP_FAILED) { | ||
89 | printf("mmap err\n"); | ||
90 | return -1; | ||
91 | } | ||
92 | |||
93 | header = base; | ||
94 | return 0; | ||
95 | } | ||
96 | |||
97 | static int perf_event_poll(int fd) | ||
98 | { | ||
99 | struct pollfd pfd = { .fd = fd, .events = POLLIN }; | ||
100 | |||
101 | return poll(&pfd, 1, 1000); | ||
102 | } | ||
103 | |||
104 | struct perf_event_sample { | ||
105 | struct perf_event_header header; | ||
106 | __u32 size; | ||
107 | char data[]; | ||
108 | }; | ||
109 | |||
110 | static int perf_event_read(perf_event_print_fn fn) | ||
111 | { | ||
112 | __u64 data_tail = header->data_tail; | ||
113 | __u64 data_head = header->data_head; | ||
114 | __u64 buffer_size = page_cnt * page_size; | ||
115 | void *base, *begin, *end; | ||
116 | char buf[256]; | ||
117 | int ret; | ||
118 | |||
119 | asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */ | ||
120 | if (data_head == data_tail) | ||
121 | return PERF_EVENT_CONT; | ||
122 | |||
123 | base = ((char *)header) + page_size; | ||
124 | |||
125 | begin = base + data_tail % buffer_size; | ||
126 | end = base + data_head % buffer_size; | ||
127 | |||
128 | while (begin != end) { | ||
129 | struct perf_event_sample *e; | ||
130 | |||
131 | e = begin; | ||
132 | if (begin + e->header.size > base + buffer_size) { | ||
133 | long len = base + buffer_size - begin; | ||
134 | |||
135 | assert(len < e->header.size); | ||
136 | memcpy(buf, begin, len); | ||
137 | memcpy(buf + len, base, e->header.size - len); | ||
138 | e = (void *) buf; | ||
139 | begin = base + e->header.size - len; | ||
140 | } else if (begin + e->header.size == base + buffer_size) { | ||
141 | begin = base; | ||
142 | } else { | ||
143 | begin += e->header.size; | ||
144 | } | ||
145 | |||
146 | if (e->header.type == PERF_RECORD_SAMPLE) { | ||
147 | ret = fn(e->data, e->size); | ||
148 | if (ret != PERF_EVENT_CONT) | ||
149 | return ret; | ||
150 | } else if (e->header.type == PERF_RECORD_LOST) { | ||
151 | struct { | ||
152 | struct perf_event_header header; | ||
153 | __u64 id; | ||
154 | __u64 lost; | ||
155 | } *lost = (void *) e; | ||
156 | printf("lost %lld events\n", lost->lost); | ||
157 | } else { | ||
158 | printf("unknown event type=%d size=%d\n", | ||
159 | e->header.type, e->header.size); | ||
160 | } | ||
161 | } | ||
162 | |||
163 | __sync_synchronize(); /* smp_mb() */ | ||
164 | header->data_tail = data_head; | ||
165 | return PERF_EVENT_CONT; | ||
166 | } | ||
167 | |||
168 | int perf_event_poller(int fd, perf_event_print_fn output_fn) | ||
169 | { | ||
170 | int ret; | ||
171 | |||
172 | for (;;) { | ||
173 | perf_event_poll(fd); | ||
174 | ret = perf_event_read(output_fn); | ||
175 | if (ret != PERF_EVENT_CONT) | ||
176 | return ret; | ||
177 | } | ||
178 | |||
179 | return PERF_EVENT_DONE; | ||
180 | } | ||
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h new file mode 100644 index 000000000000..fe3eefd21e86 --- /dev/null +++ b/tools/testing/selftests/bpf/trace_helpers.h | |||
@@ -0,0 +1,23 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | #ifndef __TRACE_HELPER_H | ||
3 | #define __TRACE_HELPER_H | ||
4 | |||
5 | struct ksym { | ||
6 | long addr; | ||
7 | char *name; | ||
8 | }; | ||
9 | |||
10 | int load_kallsyms(void); | ||
11 | struct ksym *ksym_search(long key); | ||
12 | |||
13 | typedef int (*perf_event_print_fn)(void *data, int size); | ||
14 | |||
15 | /* return code for perf_event_print_fn */ | ||
16 | #define PERF_EVENT_DONE 0 | ||
17 | #define PERF_EVENT_ERROR -1 | ||
18 | #define PERF_EVENT_CONT -2 | ||
19 | |||
20 | int perf_event_mmap(int fd); | ||
21 | /* return PERF_EVENT_DONE or PERF_EVENT_ERROR */ | ||
22 | int perf_event_poller(int fd, perf_event_print_fn output_fn); | ||
23 | #endif | ||