diff options
| author | David S. Miller <davem@davemloft.net> | 2018-05-07 23:35:08 -0400 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2018-05-07 23:35:08 -0400 |
| commit | 01adc4851a8090b46c7a5ed9cfc4b97e65abfbf4 (patch) | |
| tree | 2ae02593d7139962648dff203f3f9701e34ccbc3 | |
| parent | 18b338f5f9539512e76fd9ebd4c6ca1a0e159e2b (diff) | |
| parent | e94fa1d93117e7f1eb783dc9cae6c70650944449 (diff) | |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Minor conflict, a CHECK was placed into an if() statement
in net-next, whilst a newline was added to that CHECK
call in 'net'. Thanks to Daniel for the merge resolution.
Signed-off-by: David S. Miller <davem@davemloft.net>
107 files changed, 8852 insertions, 2713 deletions
diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst new file mode 100644 index 000000000000..91928d9ee4bf --- /dev/null +++ b/Documentation/networking/af_xdp.rst | |||
| @@ -0,0 +1,297 @@ | |||
| 1 | .. SPDX-License-Identifier: GPL-2.0 | ||
| 2 | |||
| 3 | ====== | ||
| 4 | AF_XDP | ||
| 5 | ====== | ||
| 6 | |||
| 7 | Overview | ||
| 8 | ======== | ||
| 9 | |||
| 10 | AF_XDP is an address family that is optimized for high performance | ||
| 11 | packet processing. | ||
| 12 | |||
| 13 | This document assumes that the reader is familiar with BPF and XDP. If | ||
| 14 | not, the Cilium project has an excellent reference guide at | ||
| 15 | http://cilium.readthedocs.io/en/doc-1.0/bpf/. | ||
| 16 | |||
| 17 | Using the XDP_REDIRECT action from an XDP program, the program can | ||
| 18 | redirect ingress frames to other XDP enabled netdevs, using the | ||
| 19 | bpf_redirect_map() function. AF_XDP sockets enable the possibility for | ||
| 20 | XDP programs to redirect frames to a memory buffer in a user-space | ||
| 21 | application. | ||
| 22 | |||
| 23 | An AF_XDP socket (XSK) is created with the normal socket() | ||
| 24 | syscall. Associated with each XSK are two rings: the RX ring and the | ||
| 25 | TX ring. A socket can receive packets on the RX ring and it can send | ||
| 26 | packets on the TX ring. These rings are registered and sized with the | ||
| 27 | setsockopts XDP_RX_RING and XDP_TX_RING, respectively. It is mandatory | ||
| 28 | to have at least one of these rings for each socket. An RX or TX | ||
| 29 | descriptor ring points to a data buffer in a memory area called a | ||
| 30 | UMEM. RX and TX can share the same UMEM so that a packet does not have | ||
| 31 | to be copied between RX and TX. Moreover, if a packet needs to be kept | ||
| 32 | for a while due to a possible retransmit, the descriptor that points | ||
| 33 | to that packet can be changed to point to another and reused right | ||
| 34 | away. This again avoids copying data. | ||
| 35 | |||
| 36 | The UMEM consists of a number of equally size frames and each frame | ||
| 37 | has a unique frame id. A descriptor in one of the rings references a | ||
| 38 | frame by referencing its frame id. The user space allocates memory for | ||
| 39 | this UMEM using whatever means it feels is most appropriate (malloc, | ||
| 40 | mmap, huge pages, etc). This memory area is then registered with the | ||
| 41 | kernel using the new setsockopt XDP_UMEM_REG. The UMEM also has two | ||
| 42 | rings: the FILL ring and the COMPLETION ring. The fill ring is used by | ||
| 43 | the application to send down frame ids for the kernel to fill in with | ||
| 44 | RX packet data. References to these frames will then appear in the RX | ||
| 45 | ring once each packet has been received. The completion ring, on the | ||
| 46 | other hand, contains frame ids that the kernel has transmitted | ||
| 47 | completely and can now be used again by user space, for either TX or | ||
| 48 | RX. Thus, the frame ids appearing in the completion ring are ids that | ||
| 49 | were previously transmitted using the TX ring. In summary, the RX and | ||
| 50 | FILL rings are used for the RX path and the TX and COMPLETION rings | ||
| 51 | are used for the TX path. | ||
| 52 | |||
| 53 | The socket is then finally bound with a bind() call to a device and a | ||
| 54 | specific queue id on that device, and it is not until bind is | ||
| 55 | completed that traffic starts to flow. | ||
| 56 | |||
| 57 | The UMEM can be shared between processes, if desired. If a process | ||
| 58 | wants to do this, it simply skips the registration of the UMEM and its | ||
| 59 | corresponding two rings, sets the XDP_SHARED_UMEM flag in the bind | ||
| 60 | call and submits the XSK of the process it would like to share UMEM | ||
| 61 | with as well as its own newly created XSK socket. The new process will | ||
| 62 | then receive frame id references in its own RX ring that point to this | ||
| 63 | shared UMEM. Note that since the ring structures are single-consumer / | ||
| 64 | single-producer (for performance reasons), the new process has to | ||
| 65 | create its own socket with associated RX and TX rings, since it cannot | ||
| 66 | share this with the other process. This is also the reason that there | ||
| 67 | is only one set of FILL and COMPLETION rings per UMEM. It is the | ||
| 68 | responsibility of a single process to handle the UMEM. | ||
| 69 | |||
| 70 | How is then packets distributed from an XDP program to the XSKs? There | ||
| 71 | is a BPF map called XSKMAP (or BPF_MAP_TYPE_XSKMAP in full). The | ||
| 72 | user-space application can place an XSK at an arbitrary place in this | ||
| 73 | map. The XDP program can then redirect a packet to a specific index in | ||
| 74 | this map and at this point XDP validates that the XSK in that map was | ||
| 75 | indeed bound to that device and ring number. If not, the packet is | ||
| 76 | dropped. If the map is empty at that index, the packet is also | ||
| 77 | dropped. This also means that it is currently mandatory to have an XDP | ||
| 78 | program loaded (and one XSK in the XSKMAP) to be able to get any | ||
| 79 | traffic to user space through the XSK. | ||
| 80 | |||
| 81 | AF_XDP can operate in two different modes: XDP_SKB and XDP_DRV. If the | ||
| 82 | driver does not have support for XDP, or XDP_SKB is explicitly chosen | ||
| 83 | when loading the XDP program, XDP_SKB mode is employed that uses SKBs | ||
| 84 | together with the generic XDP support and copies out the data to user | ||
| 85 | space. A fallback mode that works for any network device. On the other | ||
| 86 | hand, if the driver has support for XDP, it will be used by the AF_XDP | ||
| 87 | code to provide better performance, but there is still a copy of the | ||
| 88 | data into user space. | ||
| 89 | |||
| 90 | Concepts | ||
| 91 | ======== | ||
| 92 | |||
| 93 | In order to use an AF_XDP socket, a number of associated objects need | ||
| 94 | to be setup. | ||
| 95 | |||
| 96 | Jonathan Corbet has also written an excellent article on LWN, | ||
| 97 | "Accelerating networking with AF_XDP". It can be found at | ||
| 98 | https://lwn.net/Articles/750845/. | ||
| 99 | |||
| 100 | UMEM | ||
| 101 | ---- | ||
| 102 | |||
| 103 | UMEM is a region of virtual contiguous memory, divided into | ||
| 104 | equal-sized frames. An UMEM is associated to a netdev and a specific | ||
| 105 | queue id of that netdev. It is created and configured (frame size, | ||
| 106 | frame headroom, start address and size) by using the XDP_UMEM_REG | ||
| 107 | setsockopt system call. A UMEM is bound to a netdev and queue id, via | ||
| 108 | the bind() system call. | ||
| 109 | |||
| 110 | An AF_XDP is socket linked to a single UMEM, but one UMEM can have | ||
| 111 | multiple AF_XDP sockets. To share an UMEM created via one socket A, | ||
| 112 | the next socket B can do this by setting the XDP_SHARED_UMEM flag in | ||
| 113 | struct sockaddr_xdp member sxdp_flags, and passing the file descriptor | ||
| 114 | of A to struct sockaddr_xdp member sxdp_shared_umem_fd. | ||
| 115 | |||
| 116 | The UMEM has two single-producer/single-consumer rings, that are used | ||
| 117 | to transfer ownership of UMEM frames between the kernel and the | ||
| 118 | user-space application. | ||
| 119 | |||
| 120 | Rings | ||
| 121 | ----- | ||
| 122 | |||
| 123 | There are a four different kind of rings: Fill, Completion, RX and | ||
| 124 | TX. All rings are single-producer/single-consumer, so the user-space | ||
| 125 | application need explicit synchronization of multiple | ||
| 126 | processes/threads are reading/writing to them. | ||
| 127 | |||
| 128 | The UMEM uses two rings: Fill and Completion. Each socket associated | ||
| 129 | with the UMEM must have an RX queue, TX queue or both. Say, that there | ||
| 130 | is a setup with four sockets (all doing TX and RX). Then there will be | ||
| 131 | one Fill ring, one Completion ring, four TX rings and four RX rings. | ||
| 132 | |||
| 133 | The rings are head(producer)/tail(consumer) based rings. A producer | ||
| 134 | writes the data ring at the index pointed out by struct xdp_ring | ||
| 135 | producer member, and increasing the producer index. A consumer reads | ||
| 136 | the data ring at the index pointed out by struct xdp_ring consumer | ||
| 137 | member, and increasing the consumer index. | ||
| 138 | |||
| 139 | The rings are configured and created via the _RING setsockopt system | ||
| 140 | calls and mmapped to user-space using the appropriate offset to mmap() | ||
| 141 | (XDP_PGOFF_RX_RING, XDP_PGOFF_TX_RING, XDP_UMEM_PGOFF_FILL_RING and | ||
| 142 | XDP_UMEM_PGOFF_COMPLETION_RING). | ||
| 143 | |||
| 144 | The size of the rings need to be of size power of two. | ||
| 145 | |||
| 146 | UMEM Fill Ring | ||
| 147 | ~~~~~~~~~~~~~~ | ||
| 148 | |||
| 149 | The Fill ring is used to transfer ownership of UMEM frames from | ||
| 150 | user-space to kernel-space. The UMEM indicies are passed in the | ||
| 151 | ring. As an example, if the UMEM is 64k and each frame is 4k, then the | ||
| 152 | UMEM has 16 frames and can pass indicies between 0 and 15. | ||
| 153 | |||
| 154 | Frames passed to the kernel are used for the ingress path (RX rings). | ||
| 155 | |||
| 156 | The user application produces UMEM indicies to this ring. | ||
| 157 | |||
| 158 | UMEM Completetion Ring | ||
| 159 | ~~~~~~~~~~~~~~~~~~~~~~ | ||
| 160 | |||
| 161 | The Completion Ring is used transfer ownership of UMEM frames from | ||
| 162 | kernel-space to user-space. Just like the Fill ring, UMEM indicies are | ||
| 163 | used. | ||
| 164 | |||
| 165 | Frames passed from the kernel to user-space are frames that has been | ||
| 166 | sent (TX ring) and can be used by user-space again. | ||
| 167 | |||
| 168 | The user application consumes UMEM indicies from this ring. | ||
| 169 | |||
| 170 | |||
| 171 | RX Ring | ||
| 172 | ~~~~~~~ | ||
| 173 | |||
| 174 | The RX ring is the receiving side of a socket. Each entry in the ring | ||
| 175 | is a struct xdp_desc descriptor. The descriptor contains UMEM index | ||
| 176 | (idx), the length of the data (len), the offset into the frame | ||
| 177 | (offset). | ||
| 178 | |||
| 179 | If no frames have been passed to kernel via the Fill ring, no | ||
| 180 | descriptors will (or can) appear on the RX ring. | ||
| 181 | |||
| 182 | The user application consumes struct xdp_desc descriptors from this | ||
| 183 | ring. | ||
| 184 | |||
| 185 | TX Ring | ||
| 186 | ~~~~~~~ | ||
| 187 | |||
| 188 | The TX ring is used to send frames. The struct xdp_desc descriptor is | ||
| 189 | filled (index, length and offset) and passed into the ring. | ||
| 190 | |||
| 191 | To start the transfer a sendmsg() system call is required. This might | ||
| 192 | be relaxed in the future. | ||
| 193 | |||
| 194 | The user application produces struct xdp_desc descriptors to this | ||
| 195 | ring. | ||
| 196 | |||
| 197 | XSKMAP / BPF_MAP_TYPE_XSKMAP | ||
| 198 | ---------------------------- | ||
| 199 | |||
| 200 | On XDP side there is a BPF map type BPF_MAP_TYPE_XSKMAP (XSKMAP) that | ||
| 201 | is used in conjunction with bpf_redirect_map() to pass the ingress | ||
| 202 | frame to a socket. | ||
| 203 | |||
| 204 | The user application inserts the socket into the map, via the bpf() | ||
| 205 | system call. | ||
| 206 | |||
| 207 | Note that if an XDP program tries to redirect to a socket that does | ||
| 208 | not match the queue configuration and netdev, the frame will be | ||
| 209 | dropped. E.g. an AF_XDP socket is bound to netdev eth0 and | ||
| 210 | queue 17. Only the XDP program executing for eth0 and queue 17 will | ||
| 211 | successfully pass data to the socket. Please refer to the sample | ||
| 212 | application (samples/bpf/) in for an example. | ||
| 213 | |||
| 214 | Usage | ||
| 215 | ===== | ||
| 216 | |||
| 217 | In order to use AF_XDP sockets there are two parts needed. The | ||
| 218 | user-space application and the XDP program. For a complete setup and | ||
| 219 | usage example, please refer to the sample application. The user-space | ||
| 220 | side is xdpsock_user.c and the XDP side xdpsock_kern.c. | ||
| 221 | |||
| 222 | Naive ring dequeue and enqueue could look like this:: | ||
| 223 | |||
| 224 | // typedef struct xdp_rxtx_ring RING; | ||
| 225 | // typedef struct xdp_umem_ring RING; | ||
| 226 | |||
| 227 | // typedef struct xdp_desc RING_TYPE; | ||
| 228 | // typedef __u32 RING_TYPE; | ||
| 229 | |||
| 230 | int dequeue_one(RING *ring, RING_TYPE *item) | ||
| 231 | { | ||
| 232 | __u32 entries = ring->ptrs.producer - ring->ptrs.consumer; | ||
| 233 | |||
| 234 | if (entries == 0) | ||
| 235 | return -1; | ||
| 236 | |||
| 237 | // read-barrier! | ||
| 238 | |||
| 239 | *item = ring->desc[ring->ptrs.consumer & (RING_SIZE - 1)]; | ||
| 240 | ring->ptrs.consumer++; | ||
| 241 | return 0; | ||
| 242 | } | ||
| 243 | |||
| 244 | int enqueue_one(RING *ring, const RING_TYPE *item) | ||
| 245 | { | ||
| 246 | u32 free_entries = RING_SIZE - (ring->ptrs.producer - ring->ptrs.consumer); | ||
| 247 | |||
| 248 | if (free_entries == 0) | ||
| 249 | return -1; | ||
| 250 | |||
| 251 | ring->desc[ring->ptrs.producer & (RING_SIZE - 1)] = *item; | ||
| 252 | |||
| 253 | // write-barrier! | ||
| 254 | |||
| 255 | ring->ptrs.producer++; | ||
| 256 | return 0; | ||
| 257 | } | ||
| 258 | |||
| 259 | |||
| 260 | For a more optimized version, please refer to the sample application. | ||
| 261 | |||
| 262 | Sample application | ||
| 263 | ================== | ||
| 264 | |||
| 265 | There is a xdpsock benchmarking/test application included that | ||
| 266 | demonstrates how to use AF_XDP sockets with both private and shared | ||
| 267 | UMEMs. Say that you would like your UDP traffic from port 4242 to end | ||
| 268 | up in queue 16, that we will enable AF_XDP on. Here, we use ethtool | ||
| 269 | for this:: | ||
| 270 | |||
| 271 | ethtool -N p3p2 rx-flow-hash udp4 fn | ||
| 272 | ethtool -N p3p2 flow-type udp4 src-port 4242 dst-port 4242 \ | ||
| 273 | action 16 | ||
| 274 | |||
| 275 | Running the rxdrop benchmark in XDP_DRV mode can then be done | ||
| 276 | using:: | ||
| 277 | |||
| 278 | samples/bpf/xdpsock -i p3p2 -q 16 -r -N | ||
| 279 | |||
| 280 | For XDP_SKB mode, use the switch "-S" instead of "-N" and all options | ||
| 281 | can be displayed with "-h", as usual. | ||
| 282 | |||
| 283 | Credits | ||
| 284 | ======= | ||
| 285 | |||
| 286 | - Björn Töpel (AF_XDP core) | ||
| 287 | - Magnus Karlsson (AF_XDP core) | ||
| 288 | - Alexander Duyck | ||
| 289 | - Alexei Starovoitov | ||
| 290 | - Daniel Borkmann | ||
| 291 | - Jesper Dangaard Brouer | ||
| 292 | - John Fastabend | ||
| 293 | - Jonathan Corbet (LWN coverage) | ||
| 294 | - Michael S. Tsirkin | ||
| 295 | - Qi Z Zhang | ||
| 296 | - Willem de Bruijn | ||
| 297 | |||
diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index fd55c7de9991..5032e1263bc9 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt | |||
| @@ -483,6 +483,12 @@ Example output from dmesg: | |||
| 483 | [ 3389.935851] JIT code: 00000030: 00 e8 28 94 ff e0 83 f8 01 75 07 b8 ff ff 00 00 | 483 | [ 3389.935851] JIT code: 00000030: 00 e8 28 94 ff e0 83 f8 01 75 07 b8 ff ff 00 00 |
| 484 | [ 3389.935852] JIT code: 00000040: eb 02 31 c0 c9 c3 | 484 | [ 3389.935852] JIT code: 00000040: eb 02 31 c0 c9 c3 |
| 485 | 485 | ||
| 486 | When CONFIG_BPF_JIT_ALWAYS_ON is enabled, bpf_jit_enable is permanently set to 1 and | ||
| 487 | setting any other value than that will return in failure. This is even the case for | ||
| 488 | setting bpf_jit_enable to 2, since dumping the final JIT image into the kernel log | ||
| 489 | is discouraged and introspection through bpftool (under tools/bpf/bpftool/) is the | ||
| 490 | generally recommended approach instead. | ||
| 491 | |||
| 486 | In the kernel source tree under tools/bpf/, there's bpf_jit_disasm for | 492 | In the kernel source tree under tools/bpf/, there's bpf_jit_disasm for |
| 487 | generating disassembly out of the kernel log's hexdump: | 493 | generating disassembly out of the kernel log's hexdump: |
| 488 | 494 | ||
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index f204eaff657d..cbd9bdd4a79e 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst | |||
| @@ -6,6 +6,7 @@ Contents: | |||
| 6 | .. toctree:: | 6 | .. toctree:: |
| 7 | :maxdepth: 2 | 7 | :maxdepth: 2 |
| 8 | 8 | ||
| 9 | af_xdp | ||
| 9 | batman-adv | 10 | batman-adv |
| 10 | can | 11 | can |
| 11 | dpaa2/index | 12 | dpaa2/index |
diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt index 5992602469d8..9ecde517728c 100644 --- a/Documentation/sysctl/net.txt +++ b/Documentation/sysctl/net.txt | |||
| @@ -45,6 +45,7 @@ through bpf(2) and passing a verifier in the kernel, a JIT will then | |||
| 45 | translate these BPF proglets into native CPU instructions. There are | 45 | translate these BPF proglets into native CPU instructions. There are |
| 46 | two flavors of JITs, the newer eBPF JIT currently supported on: | 46 | two flavors of JITs, the newer eBPF JIT currently supported on: |
| 47 | - x86_64 | 47 | - x86_64 |
| 48 | - x86_32 | ||
| 48 | - arm64 | 49 | - arm64 |
| 49 | - arm32 | 50 | - arm32 |
| 50 | - ppc64 | 51 | - ppc64 |
diff --git a/MAINTAINERS b/MAINTAINERS index ebe0b9ed7805..b22be10d5916 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
| @@ -2729,7 +2729,6 @@ F: Documentation/networking/filter.txt | |||
| 2729 | F: Documentation/bpf/ | 2729 | F: Documentation/bpf/ |
| 2730 | F: include/linux/bpf* | 2730 | F: include/linux/bpf* |
| 2731 | F: include/linux/filter.h | 2731 | F: include/linux/filter.h |
| 2732 | F: include/trace/events/bpf.h | ||
| 2733 | F: include/trace/events/xdp.h | 2732 | F: include/trace/events/xdp.h |
| 2734 | F: include/uapi/linux/bpf* | 2733 | F: include/uapi/linux/bpf* |
| 2735 | F: include/uapi/linux/filter.h | 2734 | F: include/uapi/linux/filter.h |
| @@ -15408,6 +15407,14 @@ T: git git://linuxtv.org/media_tree.git | |||
| 15408 | S: Maintained | 15407 | S: Maintained |
| 15409 | F: drivers/media/tuners/tuner-xc2028.* | 15408 | F: drivers/media/tuners/tuner-xc2028.* |
| 15410 | 15409 | ||
| 15410 | XDP SOCKETS (AF_XDP) | ||
| 15411 | M: Björn Töpel <bjorn.topel@intel.com> | ||
| 15412 | M: Magnus Karlsson <magnus.karlsson@intel.com> | ||
| 15413 | L: netdev@vger.kernel.org | ||
| 15414 | S: Maintained | ||
| 15415 | F: kernel/bpf/xskmap.c | ||
| 15416 | F: net/xdp/ | ||
| 15417 | |||
| 15411 | XEN BLOCK SUBSYSTEM | 15418 | XEN BLOCK SUBSYSTEM |
| 15412 | M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 15419 | M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> |
| 15413 | M: Roger Pau Monné <roger.pau@citrix.com> | 15420 | M: Roger Pau Monné <roger.pau@citrix.com> |
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index b5030e1a41d8..82689b999257 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c | |||
| @@ -1452,83 +1452,6 @@ exit: | |||
| 1452 | emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); | 1452 | emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); |
| 1453 | emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code)); | 1453 | emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code)); |
| 1454 | break; | 1454 | break; |
| 1455 | /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */ | ||
| 1456 | case BPF_LD | BPF_ABS | BPF_W: | ||
| 1457 | case BPF_LD | BPF_ABS | BPF_H: | ||
| 1458 | case BPF_LD | BPF_ABS | BPF_B: | ||
| 1459 | /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */ | ||
| 1460 | case BPF_LD | BPF_IND | BPF_W: | ||
| 1461 | case BPF_LD | BPF_IND | BPF_H: | ||
| 1462 | case BPF_LD | BPF_IND | BPF_B: | ||
| 1463 | { | ||
| 1464 | const u8 r4 = bpf2a32[BPF_REG_6][1]; /* r4 = ptr to sk_buff */ | ||
| 1465 | const u8 r0 = bpf2a32[BPF_REG_0][1]; /*r0: struct sk_buff *skb*/ | ||
| 1466 | /* rtn value */ | ||
| 1467 | const u8 r1 = bpf2a32[BPF_REG_0][0]; /* r1: int k */ | ||
| 1468 | const u8 r2 = bpf2a32[BPF_REG_1][1]; /* r2: unsigned int size */ | ||
| 1469 | const u8 r3 = bpf2a32[BPF_REG_1][0]; /* r3: void *buffer */ | ||
| 1470 | const u8 r6 = bpf2a32[TMP_REG_1][1]; /* r6: void *(*func)(..) */ | ||
| 1471 | int size; | ||
| 1472 | |||
| 1473 | /* Setting up first argument */ | ||
| 1474 | emit(ARM_MOV_R(r0, r4), ctx); | ||
| 1475 | |||
| 1476 | /* Setting up second argument */ | ||
| 1477 | emit_a32_mov_i(r1, imm, false, ctx); | ||
| 1478 | if (BPF_MODE(code) == BPF_IND) | ||
| 1479 | emit_a32_alu_r(r1, src_lo, false, sstk, ctx, | ||
| 1480 | false, false, BPF_ADD); | ||
| 1481 | |||
| 1482 | /* Setting up third argument */ | ||
| 1483 | switch (BPF_SIZE(code)) { | ||
| 1484 | case BPF_W: | ||
| 1485 | size = 4; | ||
| 1486 | break; | ||
| 1487 | case BPF_H: | ||
| 1488 | size = 2; | ||
| 1489 | break; | ||
| 1490 | case BPF_B: | ||
| 1491 | size = 1; | ||
| 1492 | break; | ||
| 1493 | default: | ||
| 1494 | return -EINVAL; | ||
| 1495 | } | ||
| 1496 | emit_a32_mov_i(r2, size, false, ctx); | ||
| 1497 | |||
| 1498 | /* Setting up fourth argument */ | ||
| 1499 | emit(ARM_ADD_I(r3, ARM_SP, imm8m(SKB_BUFFER)), ctx); | ||
| 1500 | |||
| 1501 | /* Setting up function pointer to call */ | ||
| 1502 | emit_a32_mov_i(r6, (unsigned int)bpf_load_pointer, false, ctx); | ||
| 1503 | emit_blx_r(r6, ctx); | ||
| 1504 | |||
| 1505 | emit(ARM_EOR_R(r1, r1, r1), ctx); | ||
| 1506 | /* Check if return address is NULL or not. | ||
| 1507 | * if NULL then jump to epilogue | ||
| 1508 | * else continue to load the value from retn address | ||
| 1509 | */ | ||
| 1510 | emit(ARM_CMP_I(r0, 0), ctx); | ||
| 1511 | jmp_offset = epilogue_offset(ctx); | ||
| 1512 | check_imm24(jmp_offset); | ||
| 1513 | _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); | ||
| 1514 | |||
| 1515 | /* Load value from the address */ | ||
| 1516 | switch (BPF_SIZE(code)) { | ||
| 1517 | case BPF_W: | ||
| 1518 | emit(ARM_LDR_I(r0, r0, 0), ctx); | ||
| 1519 | emit_rev32(r0, r0, ctx); | ||
| 1520 | break; | ||
| 1521 | case BPF_H: | ||
| 1522 | emit(ARM_LDRH_I(r0, r0, 0), ctx); | ||
| 1523 | emit_rev16(r0, r0, ctx); | ||
| 1524 | break; | ||
| 1525 | case BPF_B: | ||
| 1526 | emit(ARM_LDRB_I(r0, r0, 0), ctx); | ||
| 1527 | /* No need to reverse */ | ||
| 1528 | break; | ||
| 1529 | } | ||
| 1530 | break; | ||
| 1531 | } | ||
| 1532 | /* ST: *(size *)(dst + off) = imm */ | 1455 | /* ST: *(size *)(dst + off) = imm */ |
| 1533 | case BPF_ST | BPF_MEM | BPF_W: | 1456 | case BPF_ST | BPF_MEM | BPF_W: |
| 1534 | case BPF_ST | BPF_MEM | BPF_H: | 1457 | case BPF_ST | BPF_MEM | BPF_H: |
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index a93350451e8e..0b40c8fb0706 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c | |||
| @@ -723,71 +723,6 @@ emit_cond_jmp: | |||
| 723 | emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); | 723 | emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); |
| 724 | break; | 724 | break; |
| 725 | 725 | ||
| 726 | /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */ | ||
| 727 | case BPF_LD | BPF_ABS | BPF_W: | ||
| 728 | case BPF_LD | BPF_ABS | BPF_H: | ||
| 729 | case BPF_LD | BPF_ABS | BPF_B: | ||
| 730 | /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */ | ||
| 731 | case BPF_LD | BPF_IND | BPF_W: | ||
| 732 | case BPF_LD | BPF_IND | BPF_H: | ||
| 733 | case BPF_LD | BPF_IND | BPF_B: | ||
| 734 | { | ||
| 735 | const u8 r0 = bpf2a64[BPF_REG_0]; /* r0 = return value */ | ||
| 736 | const u8 r6 = bpf2a64[BPF_REG_6]; /* r6 = pointer to sk_buff */ | ||
| 737 | const u8 fp = bpf2a64[BPF_REG_FP]; | ||
| 738 | const u8 r1 = bpf2a64[BPF_REG_1]; /* r1: struct sk_buff *skb */ | ||
| 739 | const u8 r2 = bpf2a64[BPF_REG_2]; /* r2: int k */ | ||
| 740 | const u8 r3 = bpf2a64[BPF_REG_3]; /* r3: unsigned int size */ | ||
| 741 | const u8 r4 = bpf2a64[BPF_REG_4]; /* r4: void *buffer */ | ||
| 742 | const u8 r5 = bpf2a64[BPF_REG_5]; /* r5: void *(*func)(...) */ | ||
| 743 | int size; | ||
| 744 | |||
| 745 | emit(A64_MOV(1, r1, r6), ctx); | ||
| 746 | emit_a64_mov_i(0, r2, imm, ctx); | ||
| 747 | if (BPF_MODE(code) == BPF_IND) | ||
| 748 | emit(A64_ADD(0, r2, r2, src), ctx); | ||
| 749 | switch (BPF_SIZE(code)) { | ||
| 750 | case BPF_W: | ||
| 751 | size = 4; | ||
| 752 | break; | ||
| 753 | case BPF_H: | ||
| 754 | size = 2; | ||
| 755 | break; | ||
| 756 | case BPF_B: | ||
| 757 | size = 1; | ||
| 758 | break; | ||
| 759 | default: | ||
| 760 | return -EINVAL; | ||
| 761 | } | ||
| 762 | emit_a64_mov_i64(r3, size, ctx); | ||
| 763 | emit(A64_SUB_I(1, r4, fp, ctx->stack_size), ctx); | ||
| 764 | emit_a64_mov_i64(r5, (unsigned long)bpf_load_pointer, ctx); | ||
| 765 | emit(A64_BLR(r5), ctx); | ||
| 766 | emit(A64_MOV(1, r0, A64_R(0)), ctx); | ||
| 767 | |||
| 768 | jmp_offset = epilogue_offset(ctx); | ||
| 769 | check_imm19(jmp_offset); | ||
| 770 | emit(A64_CBZ(1, r0, jmp_offset), ctx); | ||
| 771 | emit(A64_MOV(1, r5, r0), ctx); | ||
| 772 | switch (BPF_SIZE(code)) { | ||
| 773 | case BPF_W: | ||
| 774 | emit(A64_LDR32(r0, r5, A64_ZR), ctx); | ||
| 775 | #ifndef CONFIG_CPU_BIG_ENDIAN | ||
| 776 | emit(A64_REV32(0, r0, r0), ctx); | ||
| 777 | #endif | ||
| 778 | break; | ||
| 779 | case BPF_H: | ||
| 780 | emit(A64_LDRH(r0, r5, A64_ZR), ctx); | ||
| 781 | #ifndef CONFIG_CPU_BIG_ENDIAN | ||
| 782 | emit(A64_REV16(0, r0, r0), ctx); | ||
| 783 | #endif | ||
| 784 | break; | ||
| 785 | case BPF_B: | ||
| 786 | emit(A64_LDRB(r0, r5, A64_ZR), ctx); | ||
| 787 | break; | ||
| 788 | } | ||
| 789 | break; | ||
| 790 | } | ||
| 791 | default: | 726 | default: |
| 792 | pr_err_once("unknown opcode %02x\n", code); | 727 | pr_err_once("unknown opcode %02x\n", code); |
| 793 | return -EINVAL; | 728 | return -EINVAL; |
diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 3e2798bfea4f..7ba7df9c28fc 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c | |||
| @@ -1267,110 +1267,6 @@ jeq_common: | |||
| 1267 | return -EINVAL; | 1267 | return -EINVAL; |
| 1268 | break; | 1268 | break; |
| 1269 | 1269 | ||
| 1270 | case BPF_LD | BPF_B | BPF_ABS: | ||
| 1271 | case BPF_LD | BPF_H | BPF_ABS: | ||
| 1272 | case BPF_LD | BPF_W | BPF_ABS: | ||
| 1273 | case BPF_LD | BPF_DW | BPF_ABS: | ||
| 1274 | ctx->flags |= EBPF_SAVE_RA; | ||
| 1275 | |||
| 1276 | gen_imm_to_reg(insn, MIPS_R_A1, ctx); | ||
| 1277 | emit_instr(ctx, addiu, MIPS_R_A2, MIPS_R_ZERO, size_to_len(insn)); | ||
| 1278 | |||
| 1279 | if (insn->imm < 0) { | ||
| 1280 | emit_const_to_reg(ctx, MIPS_R_T9, (u64)bpf_internal_load_pointer_neg_helper); | ||
| 1281 | } else { | ||
| 1282 | emit_const_to_reg(ctx, MIPS_R_T9, (u64)ool_skb_header_pointer); | ||
| 1283 | emit_instr(ctx, daddiu, MIPS_R_A3, MIPS_R_SP, ctx->tmp_offset); | ||
| 1284 | } | ||
| 1285 | goto ld_skb_common; | ||
| 1286 | |||
| 1287 | case BPF_LD | BPF_B | BPF_IND: | ||
| 1288 | case BPF_LD | BPF_H | BPF_IND: | ||
| 1289 | case BPF_LD | BPF_W | BPF_IND: | ||
| 1290 | case BPF_LD | BPF_DW | BPF_IND: | ||
| 1291 | ctx->flags |= EBPF_SAVE_RA; | ||
| 1292 | src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp); | ||
| 1293 | if (src < 0) | ||
| 1294 | return src; | ||
| 1295 | ts = get_reg_val_type(ctx, this_idx, insn->src_reg); | ||
| 1296 | if (ts == REG_32BIT_ZERO_EX) { | ||
| 1297 | /* sign extend */ | ||
| 1298 | emit_instr(ctx, sll, MIPS_R_A1, src, 0); | ||
| 1299 | src = MIPS_R_A1; | ||
| 1300 | } | ||
| 1301 | if (insn->imm >= S16_MIN && insn->imm <= S16_MAX) { | ||
| 1302 | emit_instr(ctx, daddiu, MIPS_R_A1, src, insn->imm); | ||
| 1303 | } else { | ||
| 1304 | gen_imm_to_reg(insn, MIPS_R_AT, ctx); | ||
| 1305 | emit_instr(ctx, daddu, MIPS_R_A1, MIPS_R_AT, src); | ||
| 1306 | } | ||
| 1307 | /* truncate to 32-bit int */ | ||
| 1308 | emit_instr(ctx, sll, MIPS_R_A1, MIPS_R_A1, 0); | ||
| 1309 | emit_instr(ctx, daddiu, MIPS_R_A3, MIPS_R_SP, ctx->tmp_offset); | ||
| 1310 | emit_instr(ctx, slt, MIPS_R_AT, MIPS_R_A1, MIPS_R_ZERO); | ||
| 1311 | |||
| 1312 | emit_const_to_reg(ctx, MIPS_R_T8, (u64)bpf_internal_load_pointer_neg_helper); | ||
| 1313 | emit_const_to_reg(ctx, MIPS_R_T9, (u64)ool_skb_header_pointer); | ||
| 1314 | emit_instr(ctx, addiu, MIPS_R_A2, MIPS_R_ZERO, size_to_len(insn)); | ||
| 1315 | emit_instr(ctx, movn, MIPS_R_T9, MIPS_R_T8, MIPS_R_AT); | ||
| 1316 | |||
| 1317 | ld_skb_common: | ||
| 1318 | emit_instr(ctx, jalr, MIPS_R_RA, MIPS_R_T9); | ||
| 1319 | /* delay slot move */ | ||
| 1320 | emit_instr(ctx, daddu, MIPS_R_A0, MIPS_R_S0, MIPS_R_ZERO); | ||
| 1321 | |||
| 1322 | /* Check the error value */ | ||
| 1323 | b_off = b_imm(exit_idx, ctx); | ||
| 1324 | if (is_bad_offset(b_off)) { | ||
| 1325 | target = j_target(ctx, exit_idx); | ||
| 1326 | if (target == (unsigned int)-1) | ||
| 1327 | return -E2BIG; | ||
| 1328 | |||
| 1329 | if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) { | ||
| 1330 | ctx->offsets[this_idx] |= OFFSETS_B_CONV; | ||
| 1331 | ctx->long_b_conversion = 1; | ||
| 1332 | } | ||
| 1333 | emit_instr(ctx, bne, MIPS_R_V0, MIPS_R_ZERO, 4 * 3); | ||
| 1334 | emit_instr(ctx, nop); | ||
| 1335 | emit_instr(ctx, j, target); | ||
| 1336 | emit_instr(ctx, nop); | ||
| 1337 | } else { | ||
| 1338 | emit_instr(ctx, beq, MIPS_R_V0, MIPS_R_ZERO, b_off); | ||
| 1339 | emit_instr(ctx, nop); | ||
| 1340 | } | ||
| 1341 | |||
| 1342 | #ifdef __BIG_ENDIAN | ||
| 1343 | need_swap = false; | ||
| 1344 | #else | ||
| 1345 | need_swap = true; | ||
| 1346 | #endif | ||
| 1347 | dst = MIPS_R_V0; | ||
| 1348 | switch (BPF_SIZE(insn->code)) { | ||
| 1349 | case BPF_B: | ||
| 1350 | emit_instr(ctx, lbu, dst, 0, MIPS_R_V0); | ||
| 1351 | break; | ||
| 1352 | case BPF_H: | ||
| 1353 | emit_instr(ctx, lhu, dst, 0, MIPS_R_V0); | ||
| 1354 | if (need_swap) | ||
| 1355 | emit_instr(ctx, wsbh, dst, dst); | ||
| 1356 | break; | ||
| 1357 | case BPF_W: | ||
| 1358 | emit_instr(ctx, lw, dst, 0, MIPS_R_V0); | ||
| 1359 | if (need_swap) { | ||
| 1360 | emit_instr(ctx, wsbh, dst, dst); | ||
| 1361 | emit_instr(ctx, rotr, dst, dst, 16); | ||
| 1362 | } | ||
| 1363 | break; | ||
| 1364 | case BPF_DW: | ||
| 1365 | emit_instr(ctx, ld, dst, 0, MIPS_R_V0); | ||
| 1366 | if (need_swap) { | ||
| 1367 | emit_instr(ctx, dsbh, dst, dst); | ||
| 1368 | emit_instr(ctx, dshd, dst, dst); | ||
| 1369 | } | ||
| 1370 | break; | ||
| 1371 | } | ||
| 1372 | |||
| 1373 | break; | ||
| 1374 | case BPF_ALU | BPF_END | BPF_FROM_BE: | 1270 | case BPF_ALU | BPF_END | BPF_FROM_BE: |
| 1375 | case BPF_ALU | BPF_END | BPF_FROM_LE: | 1271 | case BPF_ALU | BPF_END | BPF_FROM_LE: |
| 1376 | dst = ebpf_to_mips_reg(ctx, insn, dst_reg); | 1272 | dst = ebpf_to_mips_reg(ctx, insn, dst_reg); |
diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile index 02d369ca6a53..809f019d3cba 100644 --- a/arch/powerpc/net/Makefile +++ b/arch/powerpc/net/Makefile | |||
| @@ -3,7 +3,7 @@ | |||
| 3 | # Arch-specific network modules | 3 | # Arch-specific network modules |
| 4 | # | 4 | # |
| 5 | ifeq ($(CONFIG_PPC64),y) | 5 | ifeq ($(CONFIG_PPC64),y) |
| 6 | obj-$(CONFIG_BPF_JIT) += bpf_jit_asm64.o bpf_jit_comp64.o | 6 | obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o |
| 7 | else | 7 | else |
| 8 | obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o | 8 | obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o |
| 9 | endif | 9 | endif |
diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h index 8bdef7ed28a8..3609be4692b3 100644 --- a/arch/powerpc/net/bpf_jit64.h +++ b/arch/powerpc/net/bpf_jit64.h | |||
| @@ -20,7 +20,7 @@ | |||
| 20 | * with our redzone usage. | 20 | * with our redzone usage. |
| 21 | * | 21 | * |
| 22 | * [ prev sp ] <------------- | 22 | * [ prev sp ] <------------- |
| 23 | * [ nv gpr save area ] 8*8 | | 23 | * [ nv gpr save area ] 6*8 | |
| 24 | * [ tail_call_cnt ] 8 | | 24 | * [ tail_call_cnt ] 8 | |
| 25 | * [ local_tmp_var ] 8 | | 25 | * [ local_tmp_var ] 8 | |
| 26 | * fp (r31) --> [ ebpf stack space ] upto 512 | | 26 | * fp (r31) --> [ ebpf stack space ] upto 512 | |
| @@ -28,8 +28,8 @@ | |||
| 28 | * sp (r1) ---> [ stack pointer ] -------------- | 28 | * sp (r1) ---> [ stack pointer ] -------------- |
| 29 | */ | 29 | */ |
| 30 | 30 | ||
| 31 | /* for gpr non volatile registers BPG_REG_6 to 10, plus skb cache registers */ | 31 | /* for gpr non volatile registers BPG_REG_6 to 10 */ |
| 32 | #define BPF_PPC_STACK_SAVE (8*8) | 32 | #define BPF_PPC_STACK_SAVE (6*8) |
| 33 | /* for bpf JIT code internal usage */ | 33 | /* for bpf JIT code internal usage */ |
| 34 | #define BPF_PPC_STACK_LOCALS 16 | 34 | #define BPF_PPC_STACK_LOCALS 16 |
| 35 | /* stack frame excluding BPF stack, ensure this is quadword aligned */ | 35 | /* stack frame excluding BPF stack, ensure this is quadword aligned */ |
| @@ -39,10 +39,8 @@ | |||
| 39 | #ifndef __ASSEMBLY__ | 39 | #ifndef __ASSEMBLY__ |
| 40 | 40 | ||
| 41 | /* BPF register usage */ | 41 | /* BPF register usage */ |
| 42 | #define SKB_HLEN_REG (MAX_BPF_JIT_REG + 0) | 42 | #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) |
| 43 | #define SKB_DATA_REG (MAX_BPF_JIT_REG + 1) | 43 | #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) |
| 44 | #define TMP_REG_1 (MAX_BPF_JIT_REG + 2) | ||
| 45 | #define TMP_REG_2 (MAX_BPF_JIT_REG + 3) | ||
| 46 | 44 | ||
| 47 | /* BPF to ppc register mappings */ | 45 | /* BPF to ppc register mappings */ |
| 48 | static const int b2p[] = { | 46 | static const int b2p[] = { |
| @@ -63,40 +61,23 @@ static const int b2p[] = { | |||
| 63 | [BPF_REG_FP] = 31, | 61 | [BPF_REG_FP] = 31, |
| 64 | /* eBPF jit internal registers */ | 62 | /* eBPF jit internal registers */ |
| 65 | [BPF_REG_AX] = 2, | 63 | [BPF_REG_AX] = 2, |
| 66 | [SKB_HLEN_REG] = 25, | ||
| 67 | [SKB_DATA_REG] = 26, | ||
| 68 | [TMP_REG_1] = 9, | 64 | [TMP_REG_1] = 9, |
| 69 | [TMP_REG_2] = 10 | 65 | [TMP_REG_2] = 10 |
| 70 | }; | 66 | }; |
| 71 | 67 | ||
| 72 | /* PPC NVR range -- update this if we ever use NVRs below r24 */ | 68 | /* PPC NVR range -- update this if we ever use NVRs below r27 */ |
| 73 | #define BPF_PPC_NVR_MIN 24 | 69 | #define BPF_PPC_NVR_MIN 27 |
| 74 | |||
| 75 | /* Assembly helpers */ | ||
| 76 | #define DECLARE_LOAD_FUNC(func) u64 func(u64 r3, u64 r4); \ | ||
| 77 | u64 func##_negative_offset(u64 r3, u64 r4); \ | ||
| 78 | u64 func##_positive_offset(u64 r3, u64 r4); | ||
| 79 | |||
| 80 | DECLARE_LOAD_FUNC(sk_load_word); | ||
| 81 | DECLARE_LOAD_FUNC(sk_load_half); | ||
| 82 | DECLARE_LOAD_FUNC(sk_load_byte); | ||
| 83 | |||
| 84 | #define CHOOSE_LOAD_FUNC(imm, func) \ | ||
| 85 | (imm < 0 ? \ | ||
| 86 | (imm >= SKF_LL_OFF ? func##_negative_offset : func) : \ | ||
| 87 | func##_positive_offset) | ||
| 88 | 70 | ||
| 89 | #define SEEN_FUNC 0x1000 /* might call external helpers */ | 71 | #define SEEN_FUNC 0x1000 /* might call external helpers */ |
| 90 | #define SEEN_STACK 0x2000 /* uses BPF stack */ | 72 | #define SEEN_STACK 0x2000 /* uses BPF stack */ |
| 91 | #define SEEN_SKB 0x4000 /* uses sk_buff */ | 73 | #define SEEN_TAILCALL 0x4000 /* uses tail calls */ |
| 92 | #define SEEN_TAILCALL 0x8000 /* uses tail calls */ | ||
| 93 | 74 | ||
| 94 | struct codegen_context { | 75 | struct codegen_context { |
| 95 | /* | 76 | /* |
| 96 | * This is used to track register usage as well | 77 | * This is used to track register usage as well |
| 97 | * as calls to external helpers. | 78 | * as calls to external helpers. |
| 98 | * - register usage is tracked with corresponding | 79 | * - register usage is tracked with corresponding |
| 99 | * bits (r3-r10 and r25-r31) | 80 | * bits (r3-r10 and r27-r31) |
| 100 | * - rest of the bits can be used to track other | 81 | * - rest of the bits can be used to track other |
| 101 | * things -- for now, we use bits 16 to 23 | 82 | * things -- for now, we use bits 16 to 23 |
| 102 | * encoded in SEEN_* macros above | 83 | * encoded in SEEN_* macros above |
diff --git a/arch/powerpc/net/bpf_jit_asm64.S b/arch/powerpc/net/bpf_jit_asm64.S deleted file mode 100644 index 7e4c51430b84..000000000000 --- a/arch/powerpc/net/bpf_jit_asm64.S +++ /dev/null | |||
| @@ -1,180 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * bpf_jit_asm64.S: Packet/header access helper functions | ||
| 3 | * for PPC64 BPF compiler. | ||
| 4 | * | ||
| 5 | * Copyright 2016, Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> | ||
| 6 | * IBM Corporation | ||
| 7 | * | ||
| 8 | * Based on bpf_jit_asm.S by Matt Evans | ||
| 9 | * | ||
| 10 | * This program is free software; you can redistribute it and/or | ||
| 11 | * modify it under the terms of the GNU General Public License | ||
| 12 | * as published by the Free Software Foundation; version 2 | ||
| 13 | * of the License. | ||
| 14 | */ | ||
| 15 | |||
| 16 | #include <asm/ppc_asm.h> | ||
| 17 | #include <asm/ptrace.h> | ||
| 18 | #include "bpf_jit64.h" | ||
| 19 | |||
| 20 | /* | ||
| 21 | * All of these routines are called directly from generated code, | ||
| 22 | * with the below register usage: | ||
| 23 | * r27 skb pointer (ctx) | ||
| 24 | * r25 skb header length | ||
| 25 | * r26 skb->data pointer | ||
| 26 | * r4 offset | ||
| 27 | * | ||
| 28 | * Result is passed back in: | ||
| 29 | * r8 data read in host endian format (accumulator) | ||
| 30 | * | ||
| 31 | * r9 is used as a temporary register | ||
| 32 | */ | ||
| 33 | |||
| 34 | #define r_skb r27 | ||
| 35 | #define r_hlen r25 | ||
| 36 | #define r_data r26 | ||
| 37 | #define r_off r4 | ||
| 38 | #define r_val r8 | ||
| 39 | #define r_tmp r9 | ||
| 40 | |||
| 41 | _GLOBAL_TOC(sk_load_word) | ||
| 42 | cmpdi r_off, 0 | ||
| 43 | blt bpf_slow_path_word_neg | ||
| 44 | b sk_load_word_positive_offset | ||
| 45 | |||
| 46 | _GLOBAL_TOC(sk_load_word_positive_offset) | ||
| 47 | /* Are we accessing past headlen? */ | ||
| 48 | subi r_tmp, r_hlen, 4 | ||
| 49 | cmpd r_tmp, r_off | ||
| 50 | blt bpf_slow_path_word | ||
| 51 | /* Nope, just hitting the header. cr0 here is eq or gt! */ | ||
| 52 | LWZX_BE r_val, r_data, r_off | ||
| 53 | blr /* Return success, cr0 != LT */ | ||
| 54 | |||
| 55 | _GLOBAL_TOC(sk_load_half) | ||
| 56 | cmpdi r_off, 0 | ||
| 57 | blt bpf_slow_path_half_neg | ||
| 58 | b sk_load_half_positive_offset | ||
| 59 | |||
| 60 | _GLOBAL_TOC(sk_load_half_positive_offset) | ||
| 61 | subi r_tmp, r_hlen, 2 | ||
| 62 | cmpd r_tmp, r_off | ||
| 63 | blt bpf_slow_path_half | ||
| 64 | LHZX_BE r_val, r_data, r_off | ||
| 65 | blr | ||
| 66 | |||
| 67 | _GLOBAL_TOC(sk_load_byte) | ||
| 68 | cmpdi r_off, 0 | ||
| 69 | blt bpf_slow_path_byte_neg | ||
| 70 | b sk_load_byte_positive_offset | ||
| 71 | |||
| 72 | _GLOBAL_TOC(sk_load_byte_positive_offset) | ||
| 73 | cmpd r_hlen, r_off | ||
| 74 | ble bpf_slow_path_byte | ||
| 75 | lbzx r_val, r_data, r_off | ||
| 76 | blr | ||
| 77 | |||
| 78 | /* | ||
| 79 | * Call out to skb_copy_bits: | ||
| 80 | * Allocate a new stack frame here to remain ABI-compliant in | ||
| 81 | * stashing LR. | ||
| 82 | */ | ||
| 83 | #define bpf_slow_path_common(SIZE) \ | ||
| 84 | mflr r0; \ | ||
| 85 | std r0, PPC_LR_STKOFF(r1); \ | ||
| 86 | stdu r1, -(STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS)(r1); \ | ||
| 87 | mr r3, r_skb; \ | ||
| 88 | /* r4 = r_off as passed */ \ | ||
| 89 | addi r5, r1, STACK_FRAME_MIN_SIZE; \ | ||
| 90 | li r6, SIZE; \ | ||
| 91 | bl skb_copy_bits; \ | ||
| 92 | nop; \ | ||
| 93 | /* save r5 */ \ | ||
| 94 | addi r5, r1, STACK_FRAME_MIN_SIZE; \ | ||
| 95 | /* r3 = 0 on success */ \ | ||
| 96 | addi r1, r1, STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS; \ | ||
| 97 | ld r0, PPC_LR_STKOFF(r1); \ | ||
| 98 | mtlr r0; \ | ||
| 99 | cmpdi r3, 0; \ | ||
| 100 | blt bpf_error; /* cr0 = LT */ | ||
| 101 | |||
| 102 | bpf_slow_path_word: | ||
| 103 | bpf_slow_path_common(4) | ||
| 104 | /* Data value is on stack, and cr0 != LT */ | ||
| 105 | LWZX_BE r_val, 0, r5 | ||
| 106 | blr | ||
| 107 | |||
| 108 | bpf_slow_path_half: | ||
| 109 | bpf_slow_path_common(2) | ||
| 110 | LHZX_BE r_val, 0, r5 | ||
| 111 | blr | ||
| 112 | |||
| 113 | bpf_slow_path_byte: | ||
| 114 | bpf_slow_path_common(1) | ||
| 115 | lbzx r_val, 0, r5 | ||
| 116 | blr | ||
| 117 | |||
| 118 | /* | ||
| 119 | * Call out to bpf_internal_load_pointer_neg_helper | ||
| 120 | */ | ||
| 121 | #define sk_negative_common(SIZE) \ | ||
| 122 | mflr r0; \ | ||
| 123 | std r0, PPC_LR_STKOFF(r1); \ | ||
| 124 | stdu r1, -STACK_FRAME_MIN_SIZE(r1); \ | ||
| 125 | mr r3, r_skb; \ | ||
| 126 | /* r4 = r_off, as passed */ \ | ||
| 127 | li r5, SIZE; \ | ||
| 128 | bl bpf_internal_load_pointer_neg_helper; \ | ||
| 129 | nop; \ | ||
| 130 | addi r1, r1, STACK_FRAME_MIN_SIZE; \ | ||
| 131 | ld r0, PPC_LR_STKOFF(r1); \ | ||
| 132 | mtlr r0; \ | ||
| 133 | /* R3 != 0 on success */ \ | ||
| 134 | cmpldi r3, 0; \ | ||
| 135 | beq bpf_error_slow; /* cr0 = EQ */ | ||
| 136 | |||
| 137 | bpf_slow_path_word_neg: | ||
| 138 | lis r_tmp, -32 /* SKF_LL_OFF */ | ||
| 139 | cmpd r_off, r_tmp /* addr < SKF_* */ | ||
| 140 | blt bpf_error /* cr0 = LT */ | ||
| 141 | b sk_load_word_negative_offset | ||
| 142 | |||
| 143 | _GLOBAL_TOC(sk_load_word_negative_offset) | ||
| 144 | sk_negative_common(4) | ||
| 145 | LWZX_BE r_val, 0, r3 | ||
| 146 | blr | ||
| 147 | |||
| 148 | bpf_slow_path_half_neg: | ||
| 149 | lis r_tmp, -32 /* SKF_LL_OFF */ | ||
| 150 | cmpd r_off, r_tmp /* addr < SKF_* */ | ||
| 151 | blt bpf_error /* cr0 = LT */ | ||
| 152 | b sk_load_half_negative_offset | ||
| 153 | |||
| 154 | _GLOBAL_TOC(sk_load_half_negative_offset) | ||
| 155 | sk_negative_common(2) | ||
| 156 | LHZX_BE r_val, 0, r3 | ||
| 157 | blr | ||
| 158 | |||
| 159 | bpf_slow_path_byte_neg: | ||
| 160 | lis r_tmp, -32 /* SKF_LL_OFF */ | ||
| 161 | cmpd r_off, r_tmp /* addr < SKF_* */ | ||
| 162 | blt bpf_error /* cr0 = LT */ | ||
| 163 | b sk_load_byte_negative_offset | ||
| 164 | |||
| 165 | _GLOBAL_TOC(sk_load_byte_negative_offset) | ||
| 166 | sk_negative_common(1) | ||
| 167 | lbzx r_val, 0, r3 | ||
| 168 | blr | ||
| 169 | |||
| 170 | bpf_error_slow: | ||
| 171 | /* fabricate a cr0 = lt */ | ||
| 172 | li r_tmp, -1 | ||
| 173 | cmpdi r_tmp, 0 | ||
| 174 | bpf_error: | ||
| 175 | /* | ||
| 176 | * Entered with cr0 = lt | ||
| 177 | * Generated code will 'blt epilogue', returning 0. | ||
| 178 | */ | ||
| 179 | li r_val, 0 | ||
| 180 | blr | ||
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 0ef3d9580e98..1bdb1aff0619 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c | |||
| @@ -59,7 +59,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) | |||
| 59 | * [ prev sp ] <------------- | 59 | * [ prev sp ] <------------- |
| 60 | * [ ... ] | | 60 | * [ ... ] | |
| 61 | * sp (r1) ---> [ stack pointer ] -------------- | 61 | * sp (r1) ---> [ stack pointer ] -------------- |
| 62 | * [ nv gpr save area ] 8*8 | 62 | * [ nv gpr save area ] 6*8 |
| 63 | * [ tail_call_cnt ] 8 | 63 | * [ tail_call_cnt ] 8 |
| 64 | * [ local_tmp_var ] 8 | 64 | * [ local_tmp_var ] 8 |
| 65 | * [ unused red zone ] 208 bytes protected | 65 | * [ unused red zone ] 208 bytes protected |
| @@ -88,21 +88,6 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) | |||
| 88 | BUG(); | 88 | BUG(); |
| 89 | } | 89 | } |
| 90 | 90 | ||
| 91 | static void bpf_jit_emit_skb_loads(u32 *image, struct codegen_context *ctx) | ||
| 92 | { | ||
| 93 | /* | ||
| 94 | * Load skb->len and skb->data_len | ||
| 95 | * r3 points to skb | ||
| 96 | */ | ||
| 97 | PPC_LWZ(b2p[SKB_HLEN_REG], 3, offsetof(struct sk_buff, len)); | ||
| 98 | PPC_LWZ(b2p[TMP_REG_1], 3, offsetof(struct sk_buff, data_len)); | ||
| 99 | /* header_len = len - data_len */ | ||
| 100 | PPC_SUB(b2p[SKB_HLEN_REG], b2p[SKB_HLEN_REG], b2p[TMP_REG_1]); | ||
| 101 | |||
| 102 | /* skb->data pointer */ | ||
| 103 | PPC_BPF_LL(b2p[SKB_DATA_REG], 3, offsetof(struct sk_buff, data)); | ||
| 104 | } | ||
| 105 | |||
| 106 | static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) | 91 | static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) |
| 107 | { | 92 | { |
| 108 | int i; | 93 | int i; |
| @@ -145,18 +130,6 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) | |||
| 145 | if (bpf_is_seen_register(ctx, i)) | 130 | if (bpf_is_seen_register(ctx, i)) |
| 146 | PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); | 131 | PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); |
| 147 | 132 | ||
| 148 | /* | ||
| 149 | * Save additional non-volatile regs if we cache skb | ||
| 150 | * Also, setup skb data | ||
| 151 | */ | ||
| 152 | if (ctx->seen & SEEN_SKB) { | ||
| 153 | PPC_BPF_STL(b2p[SKB_HLEN_REG], 1, | ||
| 154 | bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG])); | ||
| 155 | PPC_BPF_STL(b2p[SKB_DATA_REG], 1, | ||
| 156 | bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG])); | ||
| 157 | bpf_jit_emit_skb_loads(image, ctx); | ||
| 158 | } | ||
| 159 | |||
| 160 | /* Setup frame pointer to point to the bpf stack area */ | 133 | /* Setup frame pointer to point to the bpf stack area */ |
| 161 | if (bpf_is_seen_register(ctx, BPF_REG_FP)) | 134 | if (bpf_is_seen_register(ctx, BPF_REG_FP)) |
| 162 | PPC_ADDI(b2p[BPF_REG_FP], 1, | 135 | PPC_ADDI(b2p[BPF_REG_FP], 1, |
| @@ -172,14 +145,6 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx | |||
| 172 | if (bpf_is_seen_register(ctx, i)) | 145 | if (bpf_is_seen_register(ctx, i)) |
| 173 | PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); | 146 | PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); |
| 174 | 147 | ||
| 175 | /* Restore non-volatile registers used for skb cache */ | ||
| 176 | if (ctx->seen & SEEN_SKB) { | ||
| 177 | PPC_BPF_LL(b2p[SKB_HLEN_REG], 1, | ||
| 178 | bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG])); | ||
| 179 | PPC_BPF_LL(b2p[SKB_DATA_REG], 1, | ||
| 180 | bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG])); | ||
| 181 | } | ||
| 182 | |||
| 183 | /* Tear down our stack frame */ | 148 | /* Tear down our stack frame */ |
| 184 | if (bpf_has_stack_frame(ctx)) { | 149 | if (bpf_has_stack_frame(ctx)) { |
| 185 | PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size); | 150 | PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size); |
| @@ -753,23 +718,10 @@ emit_clear: | |||
| 753 | ctx->seen |= SEEN_FUNC; | 718 | ctx->seen |= SEEN_FUNC; |
| 754 | func = (u8 *) __bpf_call_base + imm; | 719 | func = (u8 *) __bpf_call_base + imm; |
| 755 | 720 | ||
| 756 | /* Save skb pointer if we need to re-cache skb data */ | ||
| 757 | if ((ctx->seen & SEEN_SKB) && | ||
| 758 | bpf_helper_changes_pkt_data(func)) | ||
| 759 | PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx)); | ||
| 760 | |||
| 761 | bpf_jit_emit_func_call(image, ctx, (u64)func); | 721 | bpf_jit_emit_func_call(image, ctx, (u64)func); |
| 762 | 722 | ||
| 763 | /* move return value from r3 to BPF_REG_0 */ | 723 | /* move return value from r3 to BPF_REG_0 */ |
| 764 | PPC_MR(b2p[BPF_REG_0], 3); | 724 | PPC_MR(b2p[BPF_REG_0], 3); |
| 765 | |||
| 766 | /* refresh skb cache */ | ||
| 767 | if ((ctx->seen & SEEN_SKB) && | ||
| 768 | bpf_helper_changes_pkt_data(func)) { | ||
| 769 | /* reload skb pointer to r3 */ | ||
| 770 | PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx)); | ||
| 771 | bpf_jit_emit_skb_loads(image, ctx); | ||
| 772 | } | ||
| 773 | break; | 725 | break; |
| 774 | 726 | ||
| 775 | /* | 727 | /* |
| @@ -887,65 +839,6 @@ cond_branch: | |||
| 887 | break; | 839 | break; |
| 888 | 840 | ||
| 889 | /* | 841 | /* |
| 890 | * Loads from packet header/data | ||
| 891 | * Assume 32-bit input value in imm and X (src_reg) | ||
| 892 | */ | ||
| 893 | |||
| 894 | /* Absolute loads */ | ||
| 895 | case BPF_LD | BPF_W | BPF_ABS: | ||
| 896 | func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_word); | ||
| 897 | goto common_load_abs; | ||
| 898 | case BPF_LD | BPF_H | BPF_ABS: | ||
| 899 | func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_half); | ||
| 900 | goto common_load_abs; | ||
| 901 | case BPF_LD | BPF_B | BPF_ABS: | ||
| 902 | func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_byte); | ||
| 903 | common_load_abs: | ||
| 904 | /* | ||
| 905 | * Load from [imm] | ||
| 906 | * Load into r4, which can just be passed onto | ||
| 907 | * skb load helpers as the second parameter | ||
| 908 | */ | ||
| 909 | PPC_LI32(4, imm); | ||
| 910 | goto common_load; | ||
| 911 | |||
| 912 | /* Indirect loads */ | ||
| 913 | case BPF_LD | BPF_W | BPF_IND: | ||
| 914 | func = (u8 *)sk_load_word; | ||
| 915 | goto common_load_ind; | ||
| 916 | case BPF_LD | BPF_H | BPF_IND: | ||
| 917 | func = (u8 *)sk_load_half; | ||
| 918 | goto common_load_ind; | ||
| 919 | case BPF_LD | BPF_B | BPF_IND: | ||
| 920 | func = (u8 *)sk_load_byte; | ||
| 921 | common_load_ind: | ||
| 922 | /* | ||
| 923 | * Load from [src_reg + imm] | ||
| 924 | * Treat src_reg as a 32-bit value | ||
| 925 | */ | ||
| 926 | PPC_EXTSW(4, src_reg); | ||
| 927 | if (imm) { | ||
| 928 | if (imm >= -32768 && imm < 32768) | ||
| 929 | PPC_ADDI(4, 4, IMM_L(imm)); | ||
| 930 | else { | ||
| 931 | PPC_LI32(b2p[TMP_REG_1], imm); | ||
| 932 | PPC_ADD(4, 4, b2p[TMP_REG_1]); | ||
| 933 | } | ||
| 934 | } | ||
| 935 | |||
| 936 | common_load: | ||
| 937 | ctx->seen |= SEEN_SKB; | ||
| 938 | ctx->seen |= SEEN_FUNC; | ||
| 939 | bpf_jit_emit_func_call(image, ctx, (u64)func); | ||
| 940 | |||
| 941 | /* | ||
| 942 | * Helper returns 'lt' condition on error, and an | ||
| 943 | * appropriate return value in BPF_REG_0 | ||
| 944 | */ | ||
| 945 | PPC_BCC(COND_LT, exit_addr); | ||
| 946 | break; | ||
| 947 | |||
| 948 | /* | ||
| 949 | * Tail call | 842 | * Tail call |
| 950 | */ | 843 | */ |
| 951 | case BPF_JMP | BPF_TAIL_CALL: | 844 | case BPF_JMP | BPF_TAIL_CALL: |
diff --git a/arch/s390/net/Makefile b/arch/s390/net/Makefile index e0d5f245e42b..d4663b4bf509 100644 --- a/arch/s390/net/Makefile +++ b/arch/s390/net/Makefile | |||
| @@ -2,4 +2,4 @@ | |||
| 2 | # | 2 | # |
| 3 | # Arch-specific network modules | 3 | # Arch-specific network modules |
| 4 | # | 4 | # |
| 5 | obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o | 5 | obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o |
diff --git a/arch/s390/net/bpf_jit.S b/arch/s390/net/bpf_jit.S deleted file mode 100644 index 25bb4643c4f4..000000000000 --- a/arch/s390/net/bpf_jit.S +++ /dev/null | |||
| @@ -1,116 +0,0 @@ | |||
| 1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
| 2 | /* | ||
| 3 | * BPF Jit compiler for s390, help functions. | ||
| 4 | * | ||
| 5 | * Copyright IBM Corp. 2012,2015 | ||
| 6 | * | ||
| 7 | * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> | ||
| 8 | * Michael Holzheu <holzheu@linux.vnet.ibm.com> | ||
| 9 | */ | ||
| 10 | |||
| 11 | #include <linux/linkage.h> | ||
| 12 | #include "bpf_jit.h" | ||
| 13 | |||
| 14 | /* | ||
| 15 | * Calling convention: | ||
| 16 | * registers %r7-%r10, %r11,%r13, and %r15 are call saved | ||
| 17 | * | ||
| 18 | * Input (64 bit): | ||
| 19 | * %r3 (%b2) = offset into skb data | ||
| 20 | * %r6 (%b5) = return address | ||
| 21 | * %r7 (%b6) = skb pointer | ||
| 22 | * %r12 = skb data pointer | ||
| 23 | * | ||
| 24 | * Output: | ||
| 25 | * %r14= %b0 = return value (read skb value) | ||
| 26 | * | ||
| 27 | * Work registers: %r2,%r4,%r5,%r14 | ||
| 28 | * | ||
| 29 | * skb_copy_bits takes 4 parameters: | ||
| 30 | * %r2 = skb pointer | ||
| 31 | * %r3 = offset into skb data | ||
| 32 | * %r4 = pointer to temp buffer | ||
| 33 | * %r5 = length to copy | ||
| 34 | * Return value in %r2: 0 = ok | ||
| 35 | * | ||
| 36 | * bpf_internal_load_pointer_neg_helper takes 3 parameters: | ||
| 37 | * %r2 = skb pointer | ||
| 38 | * %r3 = offset into data | ||
| 39 | * %r4 = length to copy | ||
| 40 | * Return value in %r2: Pointer to data | ||
| 41 | */ | ||
| 42 | |||
| 43 | #define SKF_MAX_NEG_OFF -0x200000 /* SKF_LL_OFF from filter.h */ | ||
| 44 | |||
| 45 | /* | ||
| 46 | * Load SIZE bytes from SKB | ||
| 47 | */ | ||
| 48 | #define sk_load_common(NAME, SIZE, LOAD) \ | ||
| 49 | ENTRY(sk_load_##NAME); \ | ||
| 50 | ltgr %r3,%r3; /* Is offset negative? */ \ | ||
| 51 | jl sk_load_##NAME##_slow_neg; \ | ||
| 52 | ENTRY(sk_load_##NAME##_pos); \ | ||
| 53 | aghi %r3,SIZE; /* Offset + SIZE */ \ | ||
| 54 | clg %r3,STK_OFF_HLEN(%r15); /* Offset + SIZE > hlen? */ \ | ||
| 55 | jh sk_load_##NAME##_slow; \ | ||
| 56 | LOAD %r14,-SIZE(%r3,%r12); /* Get data from skb */ \ | ||
| 57 | b OFF_OK(%r6); /* Return */ \ | ||
| 58 | \ | ||
| 59 | sk_load_##NAME##_slow:; \ | ||
| 60 | lgr %r2,%r7; /* Arg1 = skb pointer */ \ | ||
| 61 | aghi %r3,-SIZE; /* Arg2 = offset */ \ | ||
| 62 | la %r4,STK_OFF_TMP(%r15); /* Arg3 = temp bufffer */ \ | ||
| 63 | lghi %r5,SIZE; /* Arg4 = size */ \ | ||
| 64 | brasl %r14,skb_copy_bits; /* Get data from skb */ \ | ||
| 65 | LOAD %r14,STK_OFF_TMP(%r15); /* Load from temp bufffer */ \ | ||
| 66 | ltgr %r2,%r2; /* Set cc to (%r2 != 0) */ \ | ||
| 67 | br %r6; /* Return */ | ||
| 68 | |||
| 69 | sk_load_common(word, 4, llgf) /* r14 = *(u32 *) (skb->data+offset) */ | ||
| 70 | sk_load_common(half, 2, llgh) /* r14 = *(u16 *) (skb->data+offset) */ | ||
| 71 | |||
| 72 | /* | ||
| 73 | * Load 1 byte from SKB (optimized version) | ||
| 74 | */ | ||
| 75 | /* r14 = *(u8 *) (skb->data+offset) */ | ||
| 76 | ENTRY(sk_load_byte) | ||
| 77 | ltgr %r3,%r3 # Is offset negative? | ||
| 78 | jl sk_load_byte_slow_neg | ||
| 79 | ENTRY(sk_load_byte_pos) | ||
| 80 | clg %r3,STK_OFF_HLEN(%r15) # Offset >= hlen? | ||
| 81 | jnl sk_load_byte_slow | ||
| 82 | llgc %r14,0(%r3,%r12) # Get byte from skb | ||
| 83 | b OFF_OK(%r6) # Return OK | ||
| 84 | |||
| 85 | sk_load_byte_slow: | ||
| 86 | lgr %r2,%r7 # Arg1 = skb pointer | ||
| 87 | # Arg2 = offset | ||
| 88 | la %r4,STK_OFF_TMP(%r15) # Arg3 = pointer to temp buffer | ||
| 89 | lghi %r5,1 # Arg4 = size (1 byte) | ||
| 90 | brasl %r14,skb_copy_bits # Get data from skb | ||
| 91 | llgc %r14,STK_OFF_TMP(%r15) # Load result from temp buffer | ||
| 92 | ltgr %r2,%r2 # Set cc to (%r2 != 0) | ||
| 93 | br %r6 # Return cc | ||
| 94 | |||
| 95 | #define sk_negative_common(NAME, SIZE, LOAD) \ | ||
| 96 | sk_load_##NAME##_slow_neg:; \ | ||
| 97 | cgfi %r3,SKF_MAX_NEG_OFF; \ | ||
| 98 | jl bpf_error; \ | ||
| 99 | lgr %r2,%r7; /* Arg1 = skb pointer */ \ | ||
| 100 | /* Arg2 = offset */ \ | ||
| 101 | lghi %r4,SIZE; /* Arg3 = size */ \ | ||
| 102 | brasl %r14,bpf_internal_load_pointer_neg_helper; \ | ||
| 103 | ltgr %r2,%r2; \ | ||
| 104 | jz bpf_error; \ | ||
| 105 | LOAD %r14,0(%r2); /* Get data from pointer */ \ | ||
| 106 | xr %r3,%r3; /* Set cc to zero */ \ | ||
| 107 | br %r6; /* Return cc */ | ||
| 108 | |||
| 109 | sk_negative_common(word, 4, llgf) | ||
| 110 | sk_negative_common(half, 2, llgh) | ||
| 111 | sk_negative_common(byte, 1, llgc) | ||
| 112 | |||
| 113 | bpf_error: | ||
| 114 | # force a return 0 from jit handler | ||
| 115 | ltgr %r15,%r15 # Set condition code | ||
| 116 | br %r6 | ||
diff --git a/arch/s390/net/bpf_jit.h b/arch/s390/net/bpf_jit.h index 5e1e5133132d..7822ea92e54a 100644 --- a/arch/s390/net/bpf_jit.h +++ b/arch/s390/net/bpf_jit.h | |||
| @@ -16,9 +16,6 @@ | |||
| 16 | #include <linux/filter.h> | 16 | #include <linux/filter.h> |
| 17 | #include <linux/types.h> | 17 | #include <linux/types.h> |
| 18 | 18 | ||
| 19 | extern u8 sk_load_word_pos[], sk_load_half_pos[], sk_load_byte_pos[]; | ||
| 20 | extern u8 sk_load_word[], sk_load_half[], sk_load_byte[]; | ||
| 21 | |||
| 22 | #endif /* __ASSEMBLY__ */ | 19 | #endif /* __ASSEMBLY__ */ |
| 23 | 20 | ||
| 24 | /* | 21 | /* |
| @@ -36,15 +33,6 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[]; | |||
| 36 | * | | | | 33 | * | | | |
| 37 | * | BPF stack | | | 34 | * | BPF stack | | |
| 38 | * | | | | 35 | * | | | |
| 39 | * +---------------+ | | ||
| 40 | * | 8 byte skbp | | | ||
| 41 | * R15+176 -> +---------------+ | | ||
| 42 | * | 8 byte hlen | | | ||
| 43 | * R15+168 -> +---------------+ | | ||
| 44 | * | 4 byte align | | | ||
| 45 | * +---------------+ | | ||
| 46 | * | 4 byte temp | | | ||
| 47 | * | for bpf_jit.S | | | ||
| 48 | * R15+160 -> +---------------+ | | 36 | * R15+160 -> +---------------+ | |
| 49 | * | new backchain | | | 37 | * | new backchain | | |
| 50 | * R15+152 -> +---------------+ | | 38 | * R15+152 -> +---------------+ | |
| @@ -57,17 +45,11 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[]; | |||
| 57 | * The stack size used by the BPF program ("BPF stack" above) is passed | 45 | * The stack size used by the BPF program ("BPF stack" above) is passed |
| 58 | * via "aux->stack_depth". | 46 | * via "aux->stack_depth". |
| 59 | */ | 47 | */ |
| 60 | #define STK_SPACE_ADD (8 + 8 + 4 + 4 + 160) | 48 | #define STK_SPACE_ADD (160) |
| 61 | #define STK_160_UNUSED (160 - 12 * 8) | 49 | #define STK_160_UNUSED (160 - 12 * 8) |
| 62 | #define STK_OFF (STK_SPACE_ADD - STK_160_UNUSED) | 50 | #define STK_OFF (STK_SPACE_ADD - STK_160_UNUSED) |
| 63 | #define STK_OFF_TMP 160 /* Offset of tmp buffer on stack */ | ||
| 64 | #define STK_OFF_HLEN 168 /* Offset of SKB header length on stack */ | ||
| 65 | #define STK_OFF_SKBP 176 /* Offset of SKB pointer on stack */ | ||
| 66 | 51 | ||
| 67 | #define STK_OFF_R6 (160 - 11 * 8) /* Offset of r6 on stack */ | 52 | #define STK_OFF_R6 (160 - 11 * 8) /* Offset of r6 on stack */ |
| 68 | #define STK_OFF_TCCNT (160 - 12 * 8) /* Offset of tail_call_cnt on stack */ | 53 | #define STK_OFF_TCCNT (160 - 12 * 8) /* Offset of tail_call_cnt on stack */ |
| 69 | 54 | ||
| 70 | /* Offset to skip condition code check */ | ||
| 71 | #define OFF_OK 4 | ||
| 72 | |||
| 73 | #endif /* __ARCH_S390_NET_BPF_JIT_H */ | 55 | #endif /* __ARCH_S390_NET_BPF_JIT_H */ |
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 78a19c93b380..b020bea040b7 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c | |||
| @@ -47,23 +47,21 @@ struct bpf_jit { | |||
| 47 | 47 | ||
| 48 | #define BPF_SIZE_MAX 0xffff /* Max size for program (16 bit branches) */ | 48 | #define BPF_SIZE_MAX 0xffff /* Max size for program (16 bit branches) */ |
| 49 | 49 | ||
| 50 | #define SEEN_SKB 1 /* skb access */ | 50 | #define SEEN_MEM (1 << 0) /* use mem[] for temporary storage */ |
| 51 | #define SEEN_MEM 2 /* use mem[] for temporary storage */ | 51 | #define SEEN_RET0 (1 << 1) /* ret0_ip points to a valid return 0 */ |
| 52 | #define SEEN_RET0 4 /* ret0_ip points to a valid return 0 */ | 52 | #define SEEN_LITERAL (1 << 2) /* code uses literals */ |
| 53 | #define SEEN_LITERAL 8 /* code uses literals */ | 53 | #define SEEN_FUNC (1 << 3) /* calls C functions */ |
| 54 | #define SEEN_FUNC 16 /* calls C functions */ | 54 | #define SEEN_TAIL_CALL (1 << 4) /* code uses tail calls */ |
| 55 | #define SEEN_TAIL_CALL 32 /* code uses tail calls */ | 55 | #define SEEN_REG_AX (1 << 5) /* code uses constant blinding */ |
| 56 | #define SEEN_REG_AX 64 /* code uses constant blinding */ | 56 | #define SEEN_STACK (SEEN_FUNC | SEEN_MEM) |
| 57 | #define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB) | ||
| 58 | 57 | ||
| 59 | /* | 58 | /* |
| 60 | * s390 registers | 59 | * s390 registers |
| 61 | */ | 60 | */ |
| 62 | #define REG_W0 (MAX_BPF_JIT_REG + 0) /* Work register 1 (even) */ | 61 | #define REG_W0 (MAX_BPF_JIT_REG + 0) /* Work register 1 (even) */ |
| 63 | #define REG_W1 (MAX_BPF_JIT_REG + 1) /* Work register 2 (odd) */ | 62 | #define REG_W1 (MAX_BPF_JIT_REG + 1) /* Work register 2 (odd) */ |
| 64 | #define REG_SKB_DATA (MAX_BPF_JIT_REG + 2) /* SKB data register */ | 63 | #define REG_L (MAX_BPF_JIT_REG + 2) /* Literal pool register */ |
| 65 | #define REG_L (MAX_BPF_JIT_REG + 3) /* Literal pool register */ | 64 | #define REG_15 (MAX_BPF_JIT_REG + 3) /* Register 15 */ |
| 66 | #define REG_15 (MAX_BPF_JIT_REG + 4) /* Register 15 */ | ||
| 67 | #define REG_0 REG_W0 /* Register 0 */ | 65 | #define REG_0 REG_W0 /* Register 0 */ |
| 68 | #define REG_1 REG_W1 /* Register 1 */ | 66 | #define REG_1 REG_W1 /* Register 1 */ |
| 69 | #define REG_2 BPF_REG_1 /* Register 2 */ | 67 | #define REG_2 BPF_REG_1 /* Register 2 */ |
| @@ -88,10 +86,8 @@ static const int reg2hex[] = { | |||
| 88 | [BPF_REG_9] = 10, | 86 | [BPF_REG_9] = 10, |
| 89 | /* BPF stack pointer */ | 87 | /* BPF stack pointer */ |
| 90 | [BPF_REG_FP] = 13, | 88 | [BPF_REG_FP] = 13, |
| 91 | /* Register for blinding (shared with REG_SKB_DATA) */ | 89 | /* Register for blinding */ |
| 92 | [BPF_REG_AX] = 12, | 90 | [BPF_REG_AX] = 12, |
| 93 | /* SKB data pointer */ | ||
| 94 | [REG_SKB_DATA] = 12, | ||
| 95 | /* Work registers for s390x backend */ | 91 | /* Work registers for s390x backend */ |
| 96 | [REG_W0] = 0, | 92 | [REG_W0] = 0, |
| 97 | [REG_W1] = 1, | 93 | [REG_W1] = 1, |
| @@ -385,27 +381,6 @@ static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth) | |||
| 385 | } | 381 | } |
| 386 | 382 | ||
| 387 | /* | 383 | /* |
| 388 | * For SKB access %b1 contains the SKB pointer. For "bpf_jit.S" | ||
| 389 | * we store the SKB header length on the stack and the SKB data | ||
| 390 | * pointer in REG_SKB_DATA if BPF_REG_AX is not used. | ||
| 391 | */ | ||
| 392 | static void emit_load_skb_data_hlen(struct bpf_jit *jit) | ||
| 393 | { | ||
| 394 | /* Header length: llgf %w1,<len>(%b1) */ | ||
| 395 | EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_1, | ||
| 396 | offsetof(struct sk_buff, len)); | ||
| 397 | /* s %w1,<data_len>(%b1) */ | ||
| 398 | EMIT4_DISP(0x5b000000, REG_W1, BPF_REG_1, | ||
| 399 | offsetof(struct sk_buff, data_len)); | ||
| 400 | /* stg %w1,ST_OFF_HLEN(%r0,%r15) */ | ||
| 401 | EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, STK_OFF_HLEN); | ||
| 402 | if (!(jit->seen & SEEN_REG_AX)) | ||
| 403 | /* lg %skb_data,data_off(%b1) */ | ||
| 404 | EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0, | ||
| 405 | BPF_REG_1, offsetof(struct sk_buff, data)); | ||
| 406 | } | ||
| 407 | |||
| 408 | /* | ||
| 409 | * Emit function prologue | 384 | * Emit function prologue |
| 410 | * | 385 | * |
| 411 | * Save registers and create stack frame if necessary. | 386 | * Save registers and create stack frame if necessary. |
| @@ -445,12 +420,6 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth) | |||
| 445 | EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, | 420 | EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, |
| 446 | REG_15, 152); | 421 | REG_15, 152); |
| 447 | } | 422 | } |
| 448 | if (jit->seen & SEEN_SKB) { | ||
| 449 | emit_load_skb_data_hlen(jit); | ||
| 450 | /* stg %b1,ST_OFF_SKBP(%r0,%r15) */ | ||
| 451 | EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15, | ||
| 452 | STK_OFF_SKBP); | ||
| 453 | } | ||
| 454 | } | 423 | } |
| 455 | 424 | ||
| 456 | /* | 425 | /* |
| @@ -483,12 +452,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i | |||
| 483 | { | 452 | { |
| 484 | struct bpf_insn *insn = &fp->insnsi[i]; | 453 | struct bpf_insn *insn = &fp->insnsi[i]; |
| 485 | int jmp_off, last, insn_count = 1; | 454 | int jmp_off, last, insn_count = 1; |
| 486 | unsigned int func_addr, mask; | ||
| 487 | u32 dst_reg = insn->dst_reg; | 455 | u32 dst_reg = insn->dst_reg; |
| 488 | u32 src_reg = insn->src_reg; | 456 | u32 src_reg = insn->src_reg; |
| 489 | u32 *addrs = jit->addrs; | 457 | u32 *addrs = jit->addrs; |
| 490 | s32 imm = insn->imm; | 458 | s32 imm = insn->imm; |
| 491 | s16 off = insn->off; | 459 | s16 off = insn->off; |
| 460 | unsigned int mask; | ||
| 492 | 461 | ||
| 493 | if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX) | 462 | if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX) |
| 494 | jit->seen |= SEEN_REG_AX; | 463 | jit->seen |= SEEN_REG_AX; |
| @@ -970,13 +939,6 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i | |||
| 970 | EMIT2(0x0d00, REG_14, REG_W1); | 939 | EMIT2(0x0d00, REG_14, REG_W1); |
| 971 | /* lgr %b0,%r2: load return value into %b0 */ | 940 | /* lgr %b0,%r2: load return value into %b0 */ |
| 972 | EMIT4(0xb9040000, BPF_REG_0, REG_2); | 941 | EMIT4(0xb9040000, BPF_REG_0, REG_2); |
| 973 | if ((jit->seen & SEEN_SKB) && | ||
| 974 | bpf_helper_changes_pkt_data((void *)func)) { | ||
| 975 | /* lg %b1,ST_OFF_SKBP(%r15) */ | ||
| 976 | EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0, | ||
| 977 | REG_15, STK_OFF_SKBP); | ||
| 978 | emit_load_skb_data_hlen(jit); | ||
| 979 | } | ||
| 980 | break; | 942 | break; |
| 981 | } | 943 | } |
| 982 | case BPF_JMP | BPF_TAIL_CALL: | 944 | case BPF_JMP | BPF_TAIL_CALL: |
| @@ -1176,73 +1138,6 @@ branch_oc: | |||
| 1176 | jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4); | 1138 | jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4); |
| 1177 | EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off); | 1139 | EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off); |
| 1178 | break; | 1140 | break; |
| 1179 | /* | ||
| 1180 | * BPF_LD | ||
| 1181 | */ | ||
| 1182 | case BPF_LD | BPF_ABS | BPF_B: /* b0 = *(u8 *) (skb->data+imm) */ | ||
| 1183 | case BPF_LD | BPF_IND | BPF_B: /* b0 = *(u8 *) (skb->data+imm+src) */ | ||
| 1184 | if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0)) | ||
| 1185 | func_addr = __pa(sk_load_byte_pos); | ||
| 1186 | else | ||
| 1187 | func_addr = __pa(sk_load_byte); | ||
| 1188 | goto call_fn; | ||
| 1189 | case BPF_LD | BPF_ABS | BPF_H: /* b0 = *(u16 *) (skb->data+imm) */ | ||
| 1190 | case BPF_LD | BPF_IND | BPF_H: /* b0 = *(u16 *) (skb->data+imm+src) */ | ||
| 1191 | if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0)) | ||
| 1192 | func_addr = __pa(sk_load_half_pos); | ||
| 1193 | else | ||
| 1194 | func_addr = __pa(sk_load_half); | ||
| 1195 | goto call_fn; | ||
| 1196 | case BPF_LD | BPF_ABS | BPF_W: /* b0 = *(u32 *) (skb->data+imm) */ | ||
| 1197 | case BPF_LD | BPF_IND | BPF_W: /* b0 = *(u32 *) (skb->data+imm+src) */ | ||
| 1198 | if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0)) | ||
| 1199 | func_addr = __pa(sk_load_word_pos); | ||
| 1200 | else | ||
| 1201 | func_addr = __pa(sk_load_word); | ||
| 1202 | goto call_fn; | ||
| 1203 | call_fn: | ||
| 1204 | jit->seen |= SEEN_SKB | SEEN_RET0 | SEEN_FUNC; | ||
| 1205 | REG_SET_SEEN(REG_14); /* Return address of possible func call */ | ||
| 1206 | |||
| 1207 | /* | ||
| 1208 | * Implicit input: | ||
| 1209 | * BPF_REG_6 (R7) : skb pointer | ||
| 1210 | * REG_SKB_DATA (R12): skb data pointer (if no BPF_REG_AX) | ||
| 1211 | * | ||
| 1212 | * Calculated input: | ||
| 1213 | * BPF_REG_2 (R3) : offset of byte(s) to fetch in skb | ||
| 1214 | * BPF_REG_5 (R6) : return address | ||
| 1215 | * | ||
| 1216 | * Output: | ||
| 1217 | * BPF_REG_0 (R14): data read from skb | ||
| 1218 | * | ||
| 1219 | * Scratch registers (BPF_REG_1-5) | ||
| 1220 | */ | ||
| 1221 | |||
| 1222 | /* Call function: llilf %w1,func_addr */ | ||
| 1223 | EMIT6_IMM(0xc00f0000, REG_W1, func_addr); | ||
| 1224 | |||
| 1225 | /* Offset: lgfi %b2,imm */ | ||
| 1226 | EMIT6_IMM(0xc0010000, BPF_REG_2, imm); | ||
| 1227 | if (BPF_MODE(insn->code) == BPF_IND) | ||
| 1228 | /* agfr %b2,%src (%src is s32 here) */ | ||
| 1229 | EMIT4(0xb9180000, BPF_REG_2, src_reg); | ||
| 1230 | |||
| 1231 | /* Reload REG_SKB_DATA if BPF_REG_AX is used */ | ||
| 1232 | if (jit->seen & SEEN_REG_AX) | ||
| 1233 | /* lg %skb_data,data_off(%b6) */ | ||
| 1234 | EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0, | ||
| 1235 | BPF_REG_6, offsetof(struct sk_buff, data)); | ||
| 1236 | /* basr %b5,%w1 (%b5 is call saved) */ | ||
| 1237 | EMIT2(0x0d00, BPF_REG_5, REG_W1); | ||
| 1238 | |||
| 1239 | /* | ||
| 1240 | * Note: For fast access we jump directly after the | ||
| 1241 | * jnz instruction from bpf_jit.S | ||
| 1242 | */ | ||
| 1243 | /* jnz <ret0> */ | ||
| 1244 | EMIT4_PCREL(0xa7740000, jit->ret0_ip - jit->prg); | ||
| 1245 | break; | ||
| 1246 | default: /* too complex, give up */ | 1141 | default: /* too complex, give up */ |
| 1247 | pr_err("Unknown opcode %02x\n", insn->code); | 1142 | pr_err("Unknown opcode %02x\n", insn->code); |
| 1248 | return -1; | 1143 | return -1; |
diff --git a/arch/sparc/net/Makefile b/arch/sparc/net/Makefile index 76fa8e95b721..d32aac3a25b8 100644 --- a/arch/sparc/net/Makefile +++ b/arch/sparc/net/Makefile | |||
| @@ -1,4 +1,7 @@ | |||
| 1 | # | 1 | # |
| 2 | # Arch-specific network modules | 2 | # Arch-specific network modules |
| 3 | # | 3 | # |
| 4 | obj-$(CONFIG_BPF_JIT) += bpf_jit_asm_$(BITS).o bpf_jit_comp_$(BITS).o | 4 | obj-$(CONFIG_BPF_JIT) += bpf_jit_comp_$(BITS).o |
| 5 | ifeq ($(BITS),32) | ||
| 6 | obj-$(CONFIG_BPF_JIT) += bpf_jit_asm_32.o | ||
| 7 | endif | ||
diff --git a/arch/sparc/net/bpf_jit_64.h b/arch/sparc/net/bpf_jit_64.h index 428f7fd19175..fbc836f1c51c 100644 --- a/arch/sparc/net/bpf_jit_64.h +++ b/arch/sparc/net/bpf_jit_64.h | |||
| @@ -33,35 +33,6 @@ | |||
| 33 | #define I5 0x1d | 33 | #define I5 0x1d |
| 34 | #define FP 0x1e | 34 | #define FP 0x1e |
| 35 | #define I7 0x1f | 35 | #define I7 0x1f |
| 36 | |||
| 37 | #define r_SKB L0 | ||
| 38 | #define r_HEADLEN L4 | ||
| 39 | #define r_SKB_DATA L5 | ||
| 40 | #define r_TMP G1 | ||
| 41 | #define r_TMP2 G3 | ||
| 42 | |||
| 43 | /* assembly code in arch/sparc/net/bpf_jit_asm_64.S */ | ||
| 44 | extern u32 bpf_jit_load_word[]; | ||
| 45 | extern u32 bpf_jit_load_half[]; | ||
| 46 | extern u32 bpf_jit_load_byte[]; | ||
| 47 | extern u32 bpf_jit_load_byte_msh[]; | ||
| 48 | extern u32 bpf_jit_load_word_positive_offset[]; | ||
| 49 | extern u32 bpf_jit_load_half_positive_offset[]; | ||
| 50 | extern u32 bpf_jit_load_byte_positive_offset[]; | ||
| 51 | extern u32 bpf_jit_load_byte_msh_positive_offset[]; | ||
| 52 | extern u32 bpf_jit_load_word_negative_offset[]; | ||
| 53 | extern u32 bpf_jit_load_half_negative_offset[]; | ||
| 54 | extern u32 bpf_jit_load_byte_negative_offset[]; | ||
| 55 | extern u32 bpf_jit_load_byte_msh_negative_offset[]; | ||
| 56 | |||
| 57 | #else | ||
| 58 | #define r_RESULT %o0 | ||
| 59 | #define r_SKB %o0 | ||
| 60 | #define r_OFF %o1 | ||
| 61 | #define r_HEADLEN %l4 | ||
| 62 | #define r_SKB_DATA %l5 | ||
| 63 | #define r_TMP %g1 | ||
| 64 | #define r_TMP2 %g3 | ||
| 65 | #endif | 36 | #endif |
| 66 | 37 | ||
| 67 | #endif /* _BPF_JIT_H */ | 38 | #endif /* _BPF_JIT_H */ |
diff --git a/arch/sparc/net/bpf_jit_asm_64.S b/arch/sparc/net/bpf_jit_asm_64.S deleted file mode 100644 index 7177867052a1..000000000000 --- a/arch/sparc/net/bpf_jit_asm_64.S +++ /dev/null | |||
| @@ -1,162 +0,0 @@ | |||
| 1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
| 2 | #include <asm/ptrace.h> | ||
| 3 | |||
| 4 | #include "bpf_jit_64.h" | ||
| 5 | |||
| 6 | #define SAVE_SZ 176 | ||
| 7 | #define SCRATCH_OFF STACK_BIAS + 128 | ||
| 8 | #define BE_PTR(label) be,pn %xcc, label | ||
| 9 | #define SIGN_EXTEND(reg) sra reg, 0, reg | ||
| 10 | |||
| 11 | #define SKF_MAX_NEG_OFF (-0x200000) /* SKF_LL_OFF from filter.h */ | ||
| 12 | |||
| 13 | .text | ||
| 14 | .globl bpf_jit_load_word | ||
| 15 | bpf_jit_load_word: | ||
| 16 | cmp r_OFF, 0 | ||
| 17 | bl bpf_slow_path_word_neg | ||
| 18 | nop | ||
| 19 | .globl bpf_jit_load_word_positive_offset | ||
| 20 | bpf_jit_load_word_positive_offset: | ||
| 21 | sub r_HEADLEN, r_OFF, r_TMP | ||
| 22 | cmp r_TMP, 3 | ||
| 23 | ble bpf_slow_path_word | ||
| 24 | add r_SKB_DATA, r_OFF, r_TMP | ||
| 25 | andcc r_TMP, 3, %g0 | ||
| 26 | bne load_word_unaligned | ||
| 27 | nop | ||
| 28 | retl | ||
| 29 | ld [r_TMP], r_RESULT | ||
| 30 | load_word_unaligned: | ||
| 31 | ldub [r_TMP + 0x0], r_OFF | ||
| 32 | ldub [r_TMP + 0x1], r_TMP2 | ||
| 33 | sll r_OFF, 8, r_OFF | ||
| 34 | or r_OFF, r_TMP2, r_OFF | ||
| 35 | ldub [r_TMP + 0x2], r_TMP2 | ||
| 36 | sll r_OFF, 8, r_OFF | ||
| 37 | or r_OFF, r_TMP2, r_OFF | ||
| 38 | ldub [r_TMP + 0x3], r_TMP2 | ||
| 39 | sll r_OFF, 8, r_OFF | ||
| 40 | retl | ||
| 41 | or r_OFF, r_TMP2, r_RESULT | ||
| 42 | |||
| 43 | .globl bpf_jit_load_half | ||
| 44 | bpf_jit_load_half: | ||
| 45 | cmp r_OFF, 0 | ||
| 46 | bl bpf_slow_path_half_neg | ||
| 47 | nop | ||
| 48 | .globl bpf_jit_load_half_positive_offset | ||
| 49 | bpf_jit_load_half_positive_offset: | ||
| 50 | sub r_HEADLEN, r_OFF, r_TMP | ||
| 51 | cmp r_TMP, 1 | ||
| 52 | ble bpf_slow_path_half | ||
| 53 | add r_SKB_DATA, r_OFF, r_TMP | ||
| 54 | andcc r_TMP, 1, %g0 | ||
| 55 | bne load_half_unaligned | ||
| 56 | nop | ||
| 57 | retl | ||
| 58 | lduh [r_TMP], r_RESULT | ||
| 59 | load_half_unaligned: | ||
| 60 | ldub [r_TMP + 0x0], r_OFF | ||
| 61 | ldub [r_TMP + 0x1], r_TMP2 | ||
| 62 | sll r_OFF, 8, r_OFF | ||
| 63 | retl | ||
| 64 | or r_OFF, r_TMP2, r_RESULT | ||
| 65 | |||
| 66 | .globl bpf_jit_load_byte | ||
| 67 | bpf_jit_load_byte: | ||
| 68 | cmp r_OFF, 0 | ||
| 69 | bl bpf_slow_path_byte_neg | ||
| 70 | nop | ||
| 71 | .globl bpf_jit_load_byte_positive_offset | ||
| 72 | bpf_jit_load_byte_positive_offset: | ||
| 73 | cmp r_OFF, r_HEADLEN | ||
| 74 | bge bpf_slow_path_byte | ||
| 75 | nop | ||
| 76 | retl | ||
| 77 | ldub [r_SKB_DATA + r_OFF], r_RESULT | ||
| 78 | |||
| 79 | #define bpf_slow_path_common(LEN) \ | ||
| 80 | save %sp, -SAVE_SZ, %sp; \ | ||
| 81 | mov %i0, %o0; \ | ||
| 82 | mov %i1, %o1; \ | ||
| 83 | add %fp, SCRATCH_OFF, %o2; \ | ||
| 84 | call skb_copy_bits; \ | ||
| 85 | mov (LEN), %o3; \ | ||
| 86 | cmp %o0, 0; \ | ||
| 87 | restore; | ||
| 88 | |||
| 89 | bpf_slow_path_word: | ||
| 90 | bpf_slow_path_common(4) | ||
| 91 | bl bpf_error | ||
| 92 | ld [%sp + SCRATCH_OFF], r_RESULT | ||
| 93 | retl | ||
| 94 | nop | ||
| 95 | bpf_slow_path_half: | ||
| 96 | bpf_slow_path_common(2) | ||
| 97 | bl bpf_error | ||
| 98 | lduh [%sp + SCRATCH_OFF], r_RESULT | ||
| 99 | retl | ||
| 100 | nop | ||
| 101 | bpf_slow_path_byte: | ||
| 102 | bpf_slow_path_common(1) | ||
| 103 | bl bpf_error | ||
| 104 | ldub [%sp + SCRATCH_OFF], r_RESULT | ||
| 105 | retl | ||
| 106 | nop | ||
| 107 | |||
| 108 | #define bpf_negative_common(LEN) \ | ||
| 109 | save %sp, -SAVE_SZ, %sp; \ | ||
| 110 | mov %i0, %o0; \ | ||
| 111 | mov %i1, %o1; \ | ||
| 112 | SIGN_EXTEND(%o1); \ | ||
| 113 | call bpf_internal_load_pointer_neg_helper; \ | ||
| 114 | mov (LEN), %o2; \ | ||
| 115 | mov %o0, r_TMP; \ | ||
| 116 | cmp %o0, 0; \ | ||
| 117 | BE_PTR(bpf_error); \ | ||
| 118 | restore; | ||
| 119 | |||
| 120 | bpf_slow_path_word_neg: | ||
| 121 | sethi %hi(SKF_MAX_NEG_OFF), r_TMP | ||
| 122 | cmp r_OFF, r_TMP | ||
| 123 | bl bpf_error | ||
| 124 | nop | ||
| 125 | .globl bpf_jit_load_word_negative_offset | ||
| 126 | bpf_jit_load_word_negative_offset: | ||
| 127 | bpf_negative_common(4) | ||
| 128 | andcc r_TMP, 3, %g0 | ||
| 129 | bne load_word_unaligned | ||
| 130 | nop | ||
| 131 | retl | ||
| 132 | ld [r_TMP], r_RESULT | ||
| 133 | |||
| 134 | bpf_slow_path_half_neg: | ||
| 135 | sethi %hi(SKF_MAX_NEG_OFF), r_TMP | ||
| 136 | cmp r_OFF, r_TMP | ||
| 137 | bl bpf_error | ||
| 138 | nop | ||
| 139 | .globl bpf_jit_load_half_negative_offset | ||
| 140 | bpf_jit_load_half_negative_offset: | ||
| 141 | bpf_negative_common(2) | ||
| 142 | andcc r_TMP, 1, %g0 | ||
| 143 | bne load_half_unaligned | ||
| 144 | nop | ||
| 145 | retl | ||
| 146 | lduh [r_TMP], r_RESULT | ||
| 147 | |||
| 148 | bpf_slow_path_byte_neg: | ||
| 149 | sethi %hi(SKF_MAX_NEG_OFF), r_TMP | ||
| 150 | cmp r_OFF, r_TMP | ||
| 151 | bl bpf_error | ||
| 152 | nop | ||
| 153 | .globl bpf_jit_load_byte_negative_offset | ||
| 154 | bpf_jit_load_byte_negative_offset: | ||
| 155 | bpf_negative_common(1) | ||
| 156 | retl | ||
| 157 | ldub [r_TMP], r_RESULT | ||
| 158 | |||
| 159 | bpf_error: | ||
| 160 | /* Make the JIT program itself return zero. */ | ||
| 161 | ret | ||
| 162 | restore %g0, %g0, %o0 | ||
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index 48a25869349b..9f5918e0693a 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c | |||
| @@ -48,10 +48,6 @@ static void bpf_flush_icache(void *start_, void *end_) | |||
| 48 | } | 48 | } |
| 49 | } | 49 | } |
| 50 | 50 | ||
| 51 | #define SEEN_DATAREF 1 /* might call external helpers */ | ||
| 52 | #define SEEN_XREG 2 /* ebx is used */ | ||
| 53 | #define SEEN_MEM 4 /* use mem[] for temporary storage */ | ||
| 54 | |||
| 55 | #define S13(X) ((X) & 0x1fff) | 51 | #define S13(X) ((X) & 0x1fff) |
| 56 | #define S5(X) ((X) & 0x1f) | 52 | #define S5(X) ((X) & 0x1f) |
| 57 | #define IMMED 0x00002000 | 53 | #define IMMED 0x00002000 |
| @@ -198,7 +194,6 @@ struct jit_ctx { | |||
| 198 | bool tmp_1_used; | 194 | bool tmp_1_used; |
| 199 | bool tmp_2_used; | 195 | bool tmp_2_used; |
| 200 | bool tmp_3_used; | 196 | bool tmp_3_used; |
| 201 | bool saw_ld_abs_ind; | ||
| 202 | bool saw_frame_pointer; | 197 | bool saw_frame_pointer; |
| 203 | bool saw_call; | 198 | bool saw_call; |
| 204 | bool saw_tail_call; | 199 | bool saw_tail_call; |
| @@ -207,9 +202,7 @@ struct jit_ctx { | |||
| 207 | 202 | ||
| 208 | #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) | 203 | #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) |
| 209 | #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) | 204 | #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) |
| 210 | #define SKB_HLEN_REG (MAX_BPF_JIT_REG + 2) | 205 | #define TMP_REG_3 (MAX_BPF_JIT_REG + 2) |
| 211 | #define SKB_DATA_REG (MAX_BPF_JIT_REG + 3) | ||
| 212 | #define TMP_REG_3 (MAX_BPF_JIT_REG + 4) | ||
| 213 | 206 | ||
| 214 | /* Map BPF registers to SPARC registers */ | 207 | /* Map BPF registers to SPARC registers */ |
| 215 | static const int bpf2sparc[] = { | 208 | static const int bpf2sparc[] = { |
| @@ -238,9 +231,6 @@ static const int bpf2sparc[] = { | |||
| 238 | [TMP_REG_1] = G1, | 231 | [TMP_REG_1] = G1, |
| 239 | [TMP_REG_2] = G2, | 232 | [TMP_REG_2] = G2, |
| 240 | [TMP_REG_3] = G3, | 233 | [TMP_REG_3] = G3, |
| 241 | |||
| 242 | [SKB_HLEN_REG] = L4, | ||
| 243 | [SKB_DATA_REG] = L5, | ||
| 244 | }; | 234 | }; |
| 245 | 235 | ||
| 246 | static void emit(const u32 insn, struct jit_ctx *ctx) | 236 | static void emit(const u32 insn, struct jit_ctx *ctx) |
| @@ -800,25 +790,6 @@ static int emit_compare_and_branch(const u8 code, const u8 dst, u8 src, | |||
| 800 | return 0; | 790 | return 0; |
| 801 | } | 791 | } |
| 802 | 792 | ||
| 803 | static void load_skb_regs(struct jit_ctx *ctx, u8 r_skb) | ||
| 804 | { | ||
| 805 | const u8 r_headlen = bpf2sparc[SKB_HLEN_REG]; | ||
| 806 | const u8 r_data = bpf2sparc[SKB_DATA_REG]; | ||
| 807 | const u8 r_tmp = bpf2sparc[TMP_REG_1]; | ||
| 808 | unsigned int off; | ||
| 809 | |||
| 810 | off = offsetof(struct sk_buff, len); | ||
| 811 | emit(LD32I | RS1(r_skb) | S13(off) | RD(r_headlen), ctx); | ||
| 812 | |||
| 813 | off = offsetof(struct sk_buff, data_len); | ||
| 814 | emit(LD32I | RS1(r_skb) | S13(off) | RD(r_tmp), ctx); | ||
| 815 | |||
| 816 | emit(SUB | RS1(r_headlen) | RS2(r_tmp) | RD(r_headlen), ctx); | ||
| 817 | |||
| 818 | off = offsetof(struct sk_buff, data); | ||
| 819 | emit(LDPTRI | RS1(r_skb) | S13(off) | RD(r_data), ctx); | ||
| 820 | } | ||
| 821 | |||
| 822 | /* Just skip the save instruction and the ctx register move. */ | 793 | /* Just skip the save instruction and the ctx register move. */ |
| 823 | #define BPF_TAILCALL_PROLOGUE_SKIP 16 | 794 | #define BPF_TAILCALL_PROLOGUE_SKIP 16 |
| 824 | #define BPF_TAILCALL_CNT_SP_OFF (STACK_BIAS + 128) | 795 | #define BPF_TAILCALL_CNT_SP_OFF (STACK_BIAS + 128) |
| @@ -857,9 +828,6 @@ static void build_prologue(struct jit_ctx *ctx) | |||
| 857 | 828 | ||
| 858 | emit_reg_move(I0, O0, ctx); | 829 | emit_reg_move(I0, O0, ctx); |
| 859 | /* If you add anything here, adjust BPF_TAILCALL_PROLOGUE_SKIP above. */ | 830 | /* If you add anything here, adjust BPF_TAILCALL_PROLOGUE_SKIP above. */ |
| 860 | |||
| 861 | if (ctx->saw_ld_abs_ind) | ||
| 862 | load_skb_regs(ctx, bpf2sparc[BPF_REG_1]); | ||
| 863 | } | 831 | } |
| 864 | 832 | ||
| 865 | static void build_epilogue(struct jit_ctx *ctx) | 833 | static void build_epilogue(struct jit_ctx *ctx) |
| @@ -1225,16 +1193,11 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) | |||
| 1225 | u8 *func = ((u8 *)__bpf_call_base) + imm; | 1193 | u8 *func = ((u8 *)__bpf_call_base) + imm; |
| 1226 | 1194 | ||
| 1227 | ctx->saw_call = true; | 1195 | ctx->saw_call = true; |
| 1228 | if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func)) | ||
| 1229 | emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx); | ||
| 1230 | 1196 | ||
| 1231 | emit_call((u32 *)func, ctx); | 1197 | emit_call((u32 *)func, ctx); |
| 1232 | emit_nop(ctx); | 1198 | emit_nop(ctx); |
| 1233 | 1199 | ||
| 1234 | emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx); | 1200 | emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx); |
| 1235 | |||
| 1236 | if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func)) | ||
| 1237 | load_skb_regs(ctx, L7); | ||
| 1238 | break; | 1201 | break; |
| 1239 | } | 1202 | } |
| 1240 | 1203 | ||
| @@ -1412,43 +1375,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) | |||
| 1412 | emit_nop(ctx); | 1375 | emit_nop(ctx); |
| 1413 | break; | 1376 | break; |
| 1414 | } | 1377 | } |
| 1415 | #define CHOOSE_LOAD_FUNC(K, func) \ | ||
| 1416 | ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) | ||
| 1417 | |||
| 1418 | /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */ | ||
| 1419 | case BPF_LD | BPF_ABS | BPF_W: | ||
| 1420 | func = CHOOSE_LOAD_FUNC(imm, bpf_jit_load_word); | ||
| 1421 | goto common_load; | ||
| 1422 | case BPF_LD | BPF_ABS | BPF_H: | ||
| 1423 | func = CHOOSE_LOAD_FUNC(imm, bpf_jit_load_half); | ||
| 1424 | goto common_load; | ||
| 1425 | case BPF_LD | BPF_ABS | BPF_B: | ||
| 1426 | func = CHOOSE_LOAD_FUNC(imm, bpf_jit_load_byte); | ||
| 1427 | goto common_load; | ||
| 1428 | /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */ | ||
| 1429 | case BPF_LD | BPF_IND | BPF_W: | ||
| 1430 | func = bpf_jit_load_word; | ||
| 1431 | goto common_load; | ||
| 1432 | case BPF_LD | BPF_IND | BPF_H: | ||
| 1433 | func = bpf_jit_load_half; | ||
| 1434 | goto common_load; | ||
| 1435 | |||
| 1436 | case BPF_LD | BPF_IND | BPF_B: | ||
| 1437 | func = bpf_jit_load_byte; | ||
| 1438 | common_load: | ||
| 1439 | ctx->saw_ld_abs_ind = true; | ||
| 1440 | |||
| 1441 | emit_reg_move(bpf2sparc[BPF_REG_6], O0, ctx); | ||
| 1442 | emit_loadimm(imm, O1, ctx); | ||
| 1443 | |||
| 1444 | if (BPF_MODE(code) == BPF_IND) | ||
| 1445 | emit_alu(ADD, src, O1, ctx); | ||
| 1446 | |||
| 1447 | emit_call(func, ctx); | ||
| 1448 | emit_alu_K(SRA, O1, 0, ctx); | ||
| 1449 | |||
| 1450 | emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx); | ||
| 1451 | break; | ||
| 1452 | 1378 | ||
| 1453 | default: | 1379 | default: |
| 1454 | pr_err_once("unknown opcode %02x\n", code); | 1380 | pr_err_once("unknown opcode %02x\n", code); |
| @@ -1583,12 +1509,11 @@ skip_init_ctx: | |||
| 1583 | build_epilogue(&ctx); | 1509 | build_epilogue(&ctx); |
| 1584 | 1510 | ||
| 1585 | if (bpf_jit_enable > 1) | 1511 | if (bpf_jit_enable > 1) |
| 1586 | pr_info("Pass %d: shrink = %d, seen = [%c%c%c%c%c%c%c]\n", pass, | 1512 | pr_info("Pass %d: shrink = %d, seen = [%c%c%c%c%c%c]\n", pass, |
| 1587 | image_size - (ctx.idx * 4), | 1513 | image_size - (ctx.idx * 4), |
| 1588 | ctx.tmp_1_used ? '1' : ' ', | 1514 | ctx.tmp_1_used ? '1' : ' ', |
| 1589 | ctx.tmp_2_used ? '2' : ' ', | 1515 | ctx.tmp_2_used ? '2' : ' ', |
| 1590 | ctx.tmp_3_used ? '3' : ' ', | 1516 | ctx.tmp_3_used ? '3' : ' ', |
| 1591 | ctx.saw_ld_abs_ind ? 'L' : ' ', | ||
| 1592 | ctx.saw_frame_pointer ? 'F' : ' ', | 1517 | ctx.saw_frame_pointer ? 'F' : ' ', |
| 1593 | ctx.saw_call ? 'C' : ' ', | 1518 | ctx.saw_call ? 'C' : ' ', |
| 1594 | ctx.saw_tail_call ? 'T' : ' '); | 1519 | ctx.saw_tail_call ? 'T' : ' '); |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c07f492b871a..d51a71dcbac2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
| @@ -138,7 +138,7 @@ config X86 | |||
| 138 | select HAVE_DMA_CONTIGUOUS | 138 | select HAVE_DMA_CONTIGUOUS |
| 139 | select HAVE_DYNAMIC_FTRACE | 139 | select HAVE_DYNAMIC_FTRACE |
| 140 | select HAVE_DYNAMIC_FTRACE_WITH_REGS | 140 | select HAVE_DYNAMIC_FTRACE_WITH_REGS |
| 141 | select HAVE_EBPF_JIT if X86_64 | 141 | select HAVE_EBPF_JIT |
| 142 | select HAVE_EFFICIENT_UNALIGNED_ACCESS | 142 | select HAVE_EFFICIENT_UNALIGNED_ACCESS |
| 143 | select HAVE_EXIT_THREAD | 143 | select HAVE_EXIT_THREAD |
| 144 | select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE | 144 | select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE |
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index f928ad9b143f..2cd344d1a6e5 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h | |||
| @@ -291,16 +291,20 @@ do { \ | |||
| 291 | * lfence | 291 | * lfence |
| 292 | * jmp spec_trap | 292 | * jmp spec_trap |
| 293 | * do_rop: | 293 | * do_rop: |
| 294 | * mov %rax,(%rsp) | 294 | * mov %rax,(%rsp) for x86_64 |
| 295 | * mov %edx,(%esp) for x86_32 | ||
| 295 | * retq | 296 | * retq |
| 296 | * | 297 | * |
| 297 | * Without retpolines configured: | 298 | * Without retpolines configured: |
| 298 | * | 299 | * |
| 299 | * jmp *%rax | 300 | * jmp *%rax for x86_64 |
| 301 | * jmp *%edx for x86_32 | ||
| 300 | */ | 302 | */ |
| 301 | #ifdef CONFIG_RETPOLINE | 303 | #ifdef CONFIG_RETPOLINE |
| 304 | #ifdef CONFIG_X86_64 | ||
| 302 | # define RETPOLINE_RAX_BPF_JIT_SIZE 17 | 305 | # define RETPOLINE_RAX_BPF_JIT_SIZE 17 |
| 303 | # define RETPOLINE_RAX_BPF_JIT() \ | 306 | # define RETPOLINE_RAX_BPF_JIT() \ |
| 307 | do { \ | ||
| 304 | EMIT1_off32(0xE8, 7); /* callq do_rop */ \ | 308 | EMIT1_off32(0xE8, 7); /* callq do_rop */ \ |
| 305 | /* spec_trap: */ \ | 309 | /* spec_trap: */ \ |
| 306 | EMIT2(0xF3, 0x90); /* pause */ \ | 310 | EMIT2(0xF3, 0x90); /* pause */ \ |
| @@ -308,11 +312,31 @@ do { \ | |||
| 308 | EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ | 312 | EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ |
| 309 | /* do_rop: */ \ | 313 | /* do_rop: */ \ |
| 310 | EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \ | 314 | EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \ |
| 311 | EMIT1(0xC3); /* retq */ | 315 | EMIT1(0xC3); /* retq */ \ |
| 316 | } while (0) | ||
| 312 | #else | 317 | #else |
| 318 | # define RETPOLINE_EDX_BPF_JIT() \ | ||
| 319 | do { \ | ||
| 320 | EMIT1_off32(0xE8, 7); /* call do_rop */ \ | ||
| 321 | /* spec_trap: */ \ | ||
| 322 | EMIT2(0xF3, 0x90); /* pause */ \ | ||
| 323 | EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \ | ||
| 324 | EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ | ||
| 325 | /* do_rop: */ \ | ||
| 326 | EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \ | ||
| 327 | EMIT1(0xC3); /* ret */ \ | ||
| 328 | } while (0) | ||
| 329 | #endif | ||
| 330 | #else /* !CONFIG_RETPOLINE */ | ||
| 331 | |||
| 332 | #ifdef CONFIG_X86_64 | ||
| 313 | # define RETPOLINE_RAX_BPF_JIT_SIZE 2 | 333 | # define RETPOLINE_RAX_BPF_JIT_SIZE 2 |
| 314 | # define RETPOLINE_RAX_BPF_JIT() \ | 334 | # define RETPOLINE_RAX_BPF_JIT() \ |
| 315 | EMIT2(0xFF, 0xE0); /* jmp *%rax */ | 335 | EMIT2(0xFF, 0xE0); /* jmp *%rax */ |
| 336 | #else | ||
| 337 | # define RETPOLINE_EDX_BPF_JIT() \ | ||
| 338 | EMIT2(0xFF, 0xE2) /* jmp *%edx */ | ||
| 339 | #endif | ||
| 316 | #endif | 340 | #endif |
| 317 | 341 | ||
| 318 | #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ | 342 | #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ |
diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile index fefb4b619598..59e123da580c 100644 --- a/arch/x86/net/Makefile +++ b/arch/x86/net/Makefile | |||
| @@ -1,6 +1,9 @@ | |||
| 1 | # | 1 | # |
| 2 | # Arch-specific network modules | 2 | # Arch-specific network modules |
| 3 | # | 3 | # |
| 4 | OBJECT_FILES_NON_STANDARD_bpf_jit.o += y | ||
| 5 | 4 | ||
| 6 | obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o | 5 | ifeq ($(CONFIG_X86_32),y) |
| 6 | obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o | ||
| 7 | else | ||
| 8 | obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o | ||
| 9 | endif | ||
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S deleted file mode 100644 index b33093f84528..000000000000 --- a/arch/x86/net/bpf_jit.S +++ /dev/null | |||
| @@ -1,154 +0,0 @@ | |||
| 1 | /* bpf_jit.S : BPF JIT helper functions | ||
| 2 | * | ||
| 3 | * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com) | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or | ||
| 6 | * modify it under the terms of the GNU General Public License | ||
| 7 | * as published by the Free Software Foundation; version 2 | ||
| 8 | * of the License. | ||
| 9 | */ | ||
| 10 | #include <linux/linkage.h> | ||
| 11 | #include <asm/frame.h> | ||
| 12 | |||
| 13 | /* | ||
| 14 | * Calling convention : | ||
| 15 | * rbx : skb pointer (callee saved) | ||
| 16 | * esi : offset of byte(s) to fetch in skb (can be scratched) | ||
| 17 | * r10 : copy of skb->data | ||
| 18 | * r9d : hlen = skb->len - skb->data_len | ||
| 19 | */ | ||
| 20 | #define SKBDATA %r10 | ||
| 21 | #define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */ | ||
| 22 | |||
| 23 | #define FUNC(name) \ | ||
| 24 | .globl name; \ | ||
| 25 | .type name, @function; \ | ||
| 26 | name: | ||
| 27 | |||
| 28 | FUNC(sk_load_word) | ||
| 29 | test %esi,%esi | ||
| 30 | js bpf_slow_path_word_neg | ||
| 31 | |||
| 32 | FUNC(sk_load_word_positive_offset) | ||
| 33 | mov %r9d,%eax # hlen | ||
| 34 | sub %esi,%eax # hlen - offset | ||
| 35 | cmp $3,%eax | ||
| 36 | jle bpf_slow_path_word | ||
| 37 | mov (SKBDATA,%rsi),%eax | ||
| 38 | bswap %eax /* ntohl() */ | ||
| 39 | ret | ||
| 40 | |||
| 41 | FUNC(sk_load_half) | ||
| 42 | test %esi,%esi | ||
| 43 | js bpf_slow_path_half_neg | ||
| 44 | |||
| 45 | FUNC(sk_load_half_positive_offset) | ||
| 46 | mov %r9d,%eax | ||
| 47 | sub %esi,%eax # hlen - offset | ||
| 48 | cmp $1,%eax | ||
| 49 | jle bpf_slow_path_half | ||
| 50 | movzwl (SKBDATA,%rsi),%eax | ||
| 51 | rol $8,%ax # ntohs() | ||
| 52 | ret | ||
| 53 | |||
| 54 | FUNC(sk_load_byte) | ||
| 55 | test %esi,%esi | ||
| 56 | js bpf_slow_path_byte_neg | ||
| 57 | |||
| 58 | FUNC(sk_load_byte_positive_offset) | ||
| 59 | cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */ | ||
| 60 | jle bpf_slow_path_byte | ||
| 61 | movzbl (SKBDATA,%rsi),%eax | ||
| 62 | ret | ||
| 63 | |||
| 64 | /* rsi contains offset and can be scratched */ | ||
| 65 | #define bpf_slow_path_common(LEN) \ | ||
| 66 | lea 32(%rbp), %rdx;\ | ||
| 67 | FRAME_BEGIN; \ | ||
| 68 | mov %rbx, %rdi; /* arg1 == skb */ \ | ||
| 69 | push %r9; \ | ||
| 70 | push SKBDATA; \ | ||
| 71 | /* rsi already has offset */ \ | ||
| 72 | mov $LEN,%ecx; /* len */ \ | ||
| 73 | call skb_copy_bits; \ | ||
| 74 | test %eax,%eax; \ | ||
| 75 | pop SKBDATA; \ | ||
| 76 | pop %r9; \ | ||
| 77 | FRAME_END | ||
| 78 | |||
| 79 | |||
| 80 | bpf_slow_path_word: | ||
| 81 | bpf_slow_path_common(4) | ||
| 82 | js bpf_error | ||
| 83 | mov 32(%rbp),%eax | ||
| 84 | bswap %eax | ||
| 85 | ret | ||
| 86 | |||
| 87 | bpf_slow_path_half: | ||
| 88 | bpf_slow_path_common(2) | ||
| 89 | js bpf_error | ||
| 90 | mov 32(%rbp),%ax | ||
| 91 | rol $8,%ax | ||
| 92 | movzwl %ax,%eax | ||
| 93 | ret | ||
| 94 | |||
| 95 | bpf_slow_path_byte: | ||
| 96 | bpf_slow_path_common(1) | ||
| 97 | js bpf_error | ||
| 98 | movzbl 32(%rbp),%eax | ||
| 99 | ret | ||
| 100 | |||
| 101 | #define sk_negative_common(SIZE) \ | ||
| 102 | FRAME_BEGIN; \ | ||
| 103 | mov %rbx, %rdi; /* arg1 == skb */ \ | ||
| 104 | push %r9; \ | ||
| 105 | push SKBDATA; \ | ||
| 106 | /* rsi already has offset */ \ | ||
| 107 | mov $SIZE,%edx; /* size */ \ | ||
| 108 | call bpf_internal_load_pointer_neg_helper; \ | ||
| 109 | test %rax,%rax; \ | ||
| 110 | pop SKBDATA; \ | ||
| 111 | pop %r9; \ | ||
| 112 | FRAME_END; \ | ||
| 113 | jz bpf_error | ||
| 114 | |||
| 115 | bpf_slow_path_word_neg: | ||
| 116 | cmp SKF_MAX_NEG_OFF, %esi /* test range */ | ||
| 117 | jl bpf_error /* offset lower -> error */ | ||
| 118 | |||
| 119 | FUNC(sk_load_word_negative_offset) | ||
| 120 | sk_negative_common(4) | ||
| 121 | mov (%rax), %eax | ||
| 122 | bswap %eax | ||
| 123 | ret | ||
| 124 | |||
| 125 | bpf_slow_path_half_neg: | ||
| 126 | cmp SKF_MAX_NEG_OFF, %esi | ||
| 127 | jl bpf_error | ||
| 128 | |||
| 129 | FUNC(sk_load_half_negative_offset) | ||
| 130 | sk_negative_common(2) | ||
| 131 | mov (%rax),%ax | ||
| 132 | rol $8,%ax | ||
| 133 | movzwl %ax,%eax | ||
| 134 | ret | ||
| 135 | |||
| 136 | bpf_slow_path_byte_neg: | ||
| 137 | cmp SKF_MAX_NEG_OFF, %esi | ||
| 138 | jl bpf_error | ||
| 139 | |||
| 140 | FUNC(sk_load_byte_negative_offset) | ||
| 141 | sk_negative_common(1) | ||
| 142 | movzbl (%rax), %eax | ||
| 143 | ret | ||
| 144 | |||
| 145 | bpf_error: | ||
| 146 | # force a return 0 from jit handler | ||
| 147 | xor %eax,%eax | ||
| 148 | mov (%rbp),%rbx | ||
| 149 | mov 8(%rbp),%r13 | ||
| 150 | mov 16(%rbp),%r14 | ||
| 151 | mov 24(%rbp),%r15 | ||
| 152 | add $40, %rbp | ||
| 153 | leaveq | ||
| 154 | ret | ||
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 263c8453815e..8fca446aaef6 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c | |||
| @@ -1,4 +1,5 @@ | |||
| 1 | /* bpf_jit_comp.c : BPF JIT compiler | 1 | /* |
| 2 | * bpf_jit_comp.c: BPF JIT compiler | ||
| 2 | * | 3 | * |
| 3 | * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com) | 4 | * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com) |
| 4 | * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com | 5 | * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com |
| @@ -16,15 +17,6 @@ | |||
| 16 | #include <asm/set_memory.h> | 17 | #include <asm/set_memory.h> |
| 17 | #include <asm/nospec-branch.h> | 18 | #include <asm/nospec-branch.h> |
| 18 | 19 | ||
| 19 | /* | ||
| 20 | * assembly code in arch/x86/net/bpf_jit.S | ||
| 21 | */ | ||
| 22 | extern u8 sk_load_word[], sk_load_half[], sk_load_byte[]; | ||
| 23 | extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[]; | ||
| 24 | extern u8 sk_load_byte_positive_offset[]; | ||
| 25 | extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[]; | ||
| 26 | extern u8 sk_load_byte_negative_offset[]; | ||
| 27 | |||
| 28 | static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) | 20 | static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) |
| 29 | { | 21 | { |
| 30 | if (len == 1) | 22 | if (len == 1) |
| @@ -45,14 +37,15 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) | |||
| 45 | #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) | 37 | #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) |
| 46 | #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) | 38 | #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) |
| 47 | #define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) | 39 | #define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) |
| 40 | |||
| 48 | #define EMIT1_off32(b1, off) \ | 41 | #define EMIT1_off32(b1, off) \ |
| 49 | do {EMIT1(b1); EMIT(off, 4); } while (0) | 42 | do { EMIT1(b1); EMIT(off, 4); } while (0) |
| 50 | #define EMIT2_off32(b1, b2, off) \ | 43 | #define EMIT2_off32(b1, b2, off) \ |
| 51 | do {EMIT2(b1, b2); EMIT(off, 4); } while (0) | 44 | do { EMIT2(b1, b2); EMIT(off, 4); } while (0) |
| 52 | #define EMIT3_off32(b1, b2, b3, off) \ | 45 | #define EMIT3_off32(b1, b2, b3, off) \ |
| 53 | do {EMIT3(b1, b2, b3); EMIT(off, 4); } while (0) | 46 | do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0) |
| 54 | #define EMIT4_off32(b1, b2, b3, b4, off) \ | 47 | #define EMIT4_off32(b1, b2, b3, b4, off) \ |
| 55 | do {EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) | 48 | do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) |
| 56 | 49 | ||
| 57 | static bool is_imm8(int value) | 50 | static bool is_imm8(int value) |
| 58 | { | 51 | { |
| @@ -70,9 +63,10 @@ static bool is_uimm32(u64 value) | |||
| 70 | } | 63 | } |
| 71 | 64 | ||
| 72 | /* mov dst, src */ | 65 | /* mov dst, src */ |
| 73 | #define EMIT_mov(DST, SRC) \ | 66 | #define EMIT_mov(DST, SRC) \ |
| 74 | do {if (DST != SRC) \ | 67 | do { \ |
| 75 | EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \ | 68 | if (DST != SRC) \ |
| 69 | EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \ | ||
| 76 | } while (0) | 70 | } while (0) |
| 77 | 71 | ||
| 78 | static int bpf_size_to_x86_bytes(int bpf_size) | 72 | static int bpf_size_to_x86_bytes(int bpf_size) |
| @@ -89,7 +83,8 @@ static int bpf_size_to_x86_bytes(int bpf_size) | |||
| 89 | return 0; | 83 | return 0; |
| 90 | } | 84 | } |
| 91 | 85 | ||
| 92 | /* list of x86 cond jumps opcodes (. + s8) | 86 | /* |
| 87 | * List of x86 cond jumps opcodes (. + s8) | ||
| 93 | * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) | 88 | * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) |
| 94 | */ | 89 | */ |
| 95 | #define X86_JB 0x72 | 90 | #define X86_JB 0x72 |
| @@ -103,38 +98,37 @@ static int bpf_size_to_x86_bytes(int bpf_size) | |||
| 103 | #define X86_JLE 0x7E | 98 | #define X86_JLE 0x7E |
| 104 | #define X86_JG 0x7F | 99 | #define X86_JG 0x7F |
| 105 | 100 | ||
| 106 | #define CHOOSE_LOAD_FUNC(K, func) \ | 101 | /* Pick a register outside of BPF range for JIT internal work */ |
| 107 | ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) | ||
| 108 | |||
| 109 | /* pick a register outside of BPF range for JIT internal work */ | ||
| 110 | #define AUX_REG (MAX_BPF_JIT_REG + 1) | 102 | #define AUX_REG (MAX_BPF_JIT_REG + 1) |
| 111 | 103 | ||
| 112 | /* The following table maps BPF registers to x64 registers. | 104 | /* |
| 105 | * The following table maps BPF registers to x86-64 registers. | ||
| 113 | * | 106 | * |
| 114 | * x64 register r12 is unused, since if used as base address | 107 | * x86-64 register R12 is unused, since if used as base address |
| 115 | * register in load/store instructions, it always needs an | 108 | * register in load/store instructions, it always needs an |
| 116 | * extra byte of encoding and is callee saved. | 109 | * extra byte of encoding and is callee saved. |
| 117 | * | 110 | * |
| 118 | * r9 caches skb->len - skb->data_len | 111 | * Also x86-64 register R9 is unused. x86-64 register R10 is |
| 119 | * r10 caches skb->data, and used for blinding (if enabled) | 112 | * used for blinding (if enabled). |
| 120 | */ | 113 | */ |
| 121 | static const int reg2hex[] = { | 114 | static const int reg2hex[] = { |
| 122 | [BPF_REG_0] = 0, /* rax */ | 115 | [BPF_REG_0] = 0, /* RAX */ |
| 123 | [BPF_REG_1] = 7, /* rdi */ | 116 | [BPF_REG_1] = 7, /* RDI */ |
| 124 | [BPF_REG_2] = 6, /* rsi */ | 117 | [BPF_REG_2] = 6, /* RSI */ |
| 125 | [BPF_REG_3] = 2, /* rdx */ | 118 | [BPF_REG_3] = 2, /* RDX */ |
| 126 | [BPF_REG_4] = 1, /* rcx */ | 119 | [BPF_REG_4] = 1, /* RCX */ |
| 127 | [BPF_REG_5] = 0, /* r8 */ | 120 | [BPF_REG_5] = 0, /* R8 */ |
| 128 | [BPF_REG_6] = 3, /* rbx callee saved */ | 121 | [BPF_REG_6] = 3, /* RBX callee saved */ |
| 129 | [BPF_REG_7] = 5, /* r13 callee saved */ | 122 | [BPF_REG_7] = 5, /* R13 callee saved */ |
| 130 | [BPF_REG_8] = 6, /* r14 callee saved */ | 123 | [BPF_REG_8] = 6, /* R14 callee saved */ |
| 131 | [BPF_REG_9] = 7, /* r15 callee saved */ | 124 | [BPF_REG_9] = 7, /* R15 callee saved */ |
| 132 | [BPF_REG_FP] = 5, /* rbp readonly */ | 125 | [BPF_REG_FP] = 5, /* RBP readonly */ |
| 133 | [BPF_REG_AX] = 2, /* r10 temp register */ | 126 | [BPF_REG_AX] = 2, /* R10 temp register */ |
| 134 | [AUX_REG] = 3, /* r11 temp register */ | 127 | [AUX_REG] = 3, /* R11 temp register */ |
| 135 | }; | 128 | }; |
| 136 | 129 | ||
| 137 | /* is_ereg() == true if BPF register 'reg' maps to x64 r8..r15 | 130 | /* |
| 131 | * is_ereg() == true if BPF register 'reg' maps to x86-64 r8..r15 | ||
| 138 | * which need extra byte of encoding. | 132 | * which need extra byte of encoding. |
| 139 | * rax,rcx,...,rbp have simpler encoding | 133 | * rax,rcx,...,rbp have simpler encoding |
| 140 | */ | 134 | */ |
| @@ -153,7 +147,7 @@ static bool is_axreg(u32 reg) | |||
| 153 | return reg == BPF_REG_0; | 147 | return reg == BPF_REG_0; |
| 154 | } | 148 | } |
| 155 | 149 | ||
| 156 | /* add modifiers if 'reg' maps to x64 registers r8..r15 */ | 150 | /* Add modifiers if 'reg' maps to x86-64 registers R8..R15 */ |
| 157 | static u8 add_1mod(u8 byte, u32 reg) | 151 | static u8 add_1mod(u8 byte, u32 reg) |
| 158 | { | 152 | { |
| 159 | if (is_ereg(reg)) | 153 | if (is_ereg(reg)) |
| @@ -170,13 +164,13 @@ static u8 add_2mod(u8 byte, u32 r1, u32 r2) | |||
| 170 | return byte; | 164 | return byte; |
| 171 | } | 165 | } |
| 172 | 166 | ||
| 173 | /* encode 'dst_reg' register into x64 opcode 'byte' */ | 167 | /* Encode 'dst_reg' register into x86-64 opcode 'byte' */ |
| 174 | static u8 add_1reg(u8 byte, u32 dst_reg) | 168 | static u8 add_1reg(u8 byte, u32 dst_reg) |
| 175 | { | 169 | { |
| 176 | return byte + reg2hex[dst_reg]; | 170 | return byte + reg2hex[dst_reg]; |
| 177 | } | 171 | } |
| 178 | 172 | ||
| 179 | /* encode 'dst_reg' and 'src_reg' registers into x64 opcode 'byte' */ | 173 | /* Encode 'dst_reg' and 'src_reg' registers into x86-64 opcode 'byte' */ |
| 180 | static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) | 174 | static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) |
| 181 | { | 175 | { |
| 182 | return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3); | 176 | return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3); |
| @@ -184,27 +178,24 @@ static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) | |||
| 184 | 178 | ||
| 185 | static void jit_fill_hole(void *area, unsigned int size) | 179 | static void jit_fill_hole(void *area, unsigned int size) |
| 186 | { | 180 | { |
| 187 | /* fill whole space with int3 instructions */ | 181 | /* Fill whole space with INT3 instructions */ |
| 188 | memset(area, 0xcc, size); | 182 | memset(area, 0xcc, size); |
| 189 | } | 183 | } |
| 190 | 184 | ||
| 191 | struct jit_context { | 185 | struct jit_context { |
| 192 | int cleanup_addr; /* epilogue code offset */ | 186 | int cleanup_addr; /* Epilogue code offset */ |
| 193 | bool seen_ld_abs; | ||
| 194 | bool seen_ax_reg; | ||
| 195 | }; | 187 | }; |
| 196 | 188 | ||
| 197 | /* maximum number of bytes emitted while JITing one eBPF insn */ | 189 | /* Maximum number of bytes emitted while JITing one eBPF insn */ |
| 198 | #define BPF_MAX_INSN_SIZE 128 | 190 | #define BPF_MAX_INSN_SIZE 128 |
| 199 | #define BPF_INSN_SAFETY 64 | 191 | #define BPF_INSN_SAFETY 64 |
| 200 | 192 | ||
| 201 | #define AUX_STACK_SPACE \ | 193 | #define AUX_STACK_SPACE 40 /* Space for RBX, R13, R14, R15, tailcnt */ |
| 202 | (32 /* space for rbx, r13, r14, r15 */ + \ | ||
| 203 | 8 /* space for skb_copy_bits() buffer */) | ||
| 204 | 194 | ||
| 205 | #define PROLOGUE_SIZE 37 | 195 | #define PROLOGUE_SIZE 37 |
| 206 | 196 | ||
| 207 | /* emit x64 prologue code for BPF program and check it's size. | 197 | /* |
| 198 | * Emit x86-64 prologue code for BPF program and check its size. | ||
| 208 | * bpf_tail_call helper will skip it while jumping into another program | 199 | * bpf_tail_call helper will skip it while jumping into another program |
| 209 | */ | 200 | */ |
| 210 | static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) | 201 | static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) |
| @@ -212,8 +203,11 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) | |||
| 212 | u8 *prog = *pprog; | 203 | u8 *prog = *pprog; |
| 213 | int cnt = 0; | 204 | int cnt = 0; |
| 214 | 205 | ||
| 215 | EMIT1(0x55); /* push rbp */ | 206 | /* push rbp */ |
| 216 | EMIT3(0x48, 0x89, 0xE5); /* mov rbp,rsp */ | 207 | EMIT1(0x55); |
| 208 | |||
| 209 | /* mov rbp,rsp */ | ||
| 210 | EMIT3(0x48, 0x89, 0xE5); | ||
| 217 | 211 | ||
| 218 | /* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */ | 212 | /* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */ |
| 219 | EMIT3_off32(0x48, 0x81, 0xEC, | 213 | EMIT3_off32(0x48, 0x81, 0xEC, |
| @@ -222,19 +216,8 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) | |||
| 222 | /* sub rbp, AUX_STACK_SPACE */ | 216 | /* sub rbp, AUX_STACK_SPACE */ |
| 223 | EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE); | 217 | EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE); |
| 224 | 218 | ||
| 225 | /* all classic BPF filters use R6(rbx) save it */ | ||
| 226 | |||
| 227 | /* mov qword ptr [rbp+0],rbx */ | 219 | /* mov qword ptr [rbp+0],rbx */ |
| 228 | EMIT4(0x48, 0x89, 0x5D, 0); | 220 | EMIT4(0x48, 0x89, 0x5D, 0); |
| 229 | |||
| 230 | /* bpf_convert_filter() maps classic BPF register X to R7 and uses R8 | ||
| 231 | * as temporary, so all tcpdump filters need to spill/fill R7(r13) and | ||
| 232 | * R8(r14). R9(r15) spill could be made conditional, but there is only | ||
| 233 | * one 'bpf_error' return path out of helper functions inside bpf_jit.S | ||
| 234 | * The overhead of extra spill is negligible for any filter other | ||
| 235 | * than synthetic ones. Therefore not worth adding complexity. | ||
| 236 | */ | ||
| 237 | |||
| 238 | /* mov qword ptr [rbp+8],r13 */ | 221 | /* mov qword ptr [rbp+8],r13 */ |
| 239 | EMIT4(0x4C, 0x89, 0x6D, 8); | 222 | EMIT4(0x4C, 0x89, 0x6D, 8); |
| 240 | /* mov qword ptr [rbp+16],r14 */ | 223 | /* mov qword ptr [rbp+16],r14 */ |
| @@ -243,9 +226,10 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) | |||
| 243 | EMIT4(0x4C, 0x89, 0x7D, 24); | 226 | EMIT4(0x4C, 0x89, 0x7D, 24); |
| 244 | 227 | ||
| 245 | if (!ebpf_from_cbpf) { | 228 | if (!ebpf_from_cbpf) { |
| 246 | /* Clear the tail call counter (tail_call_cnt): for eBPF tail | 229 | /* |
| 230 | * Clear the tail call counter (tail_call_cnt): for eBPF tail | ||
| 247 | * calls we need to reset the counter to 0. It's done in two | 231 | * calls we need to reset the counter to 0. It's done in two |
| 248 | * instructions, resetting rax register to 0, and moving it | 232 | * instructions, resetting RAX register to 0, and moving it |
| 249 | * to the counter location. | 233 | * to the counter location. |
| 250 | */ | 234 | */ |
| 251 | 235 | ||
| @@ -260,7 +244,9 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) | |||
| 260 | *pprog = prog; | 244 | *pprog = prog; |
| 261 | } | 245 | } |
| 262 | 246 | ||
| 263 | /* generate the following code: | 247 | /* |
| 248 | * Generate the following code: | ||
| 249 | * | ||
| 264 | * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... | 250 | * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... |
| 265 | * if (index >= array->map.max_entries) | 251 | * if (index >= array->map.max_entries) |
| 266 | * goto out; | 252 | * goto out; |
| @@ -278,23 +264,26 @@ static void emit_bpf_tail_call(u8 **pprog) | |||
| 278 | int label1, label2, label3; | 264 | int label1, label2, label3; |
| 279 | int cnt = 0; | 265 | int cnt = 0; |
| 280 | 266 | ||
| 281 | /* rdi - pointer to ctx | 267 | /* |
| 268 | * rdi - pointer to ctx | ||
| 282 | * rsi - pointer to bpf_array | 269 | * rsi - pointer to bpf_array |
| 283 | * rdx - index in bpf_array | 270 | * rdx - index in bpf_array |
| 284 | */ | 271 | */ |
| 285 | 272 | ||
| 286 | /* if (index >= array->map.max_entries) | 273 | /* |
| 287 | * goto out; | 274 | * if (index >= array->map.max_entries) |
| 275 | * goto out; | ||
| 288 | */ | 276 | */ |
| 289 | EMIT2(0x89, 0xD2); /* mov edx, edx */ | 277 | EMIT2(0x89, 0xD2); /* mov edx, edx */ |
| 290 | EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ | 278 | EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ |
| 291 | offsetof(struct bpf_array, map.max_entries)); | 279 | offsetof(struct bpf_array, map.max_entries)); |
| 292 | #define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */ | 280 | #define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* Number of bytes to jump */ |
| 293 | EMIT2(X86_JBE, OFFSET1); /* jbe out */ | 281 | EMIT2(X86_JBE, OFFSET1); /* jbe out */ |
| 294 | label1 = cnt; | 282 | label1 = cnt; |
| 295 | 283 | ||
| 296 | /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) | 284 | /* |
| 297 | * goto out; | 285 | * if (tail_call_cnt > MAX_TAIL_CALL_CNT) |
| 286 | * goto out; | ||
| 298 | */ | 287 | */ |
| 299 | EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */ | 288 | EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */ |
| 300 | EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ | 289 | EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ |
| @@ -308,8 +297,9 @@ static void emit_bpf_tail_call(u8 **pprog) | |||
| 308 | EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */ | 297 | EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */ |
| 309 | offsetof(struct bpf_array, ptrs)); | 298 | offsetof(struct bpf_array, ptrs)); |
| 310 | 299 | ||
| 311 | /* if (prog == NULL) | 300 | /* |
| 312 | * goto out; | 301 | * if (prog == NULL) |
| 302 | * goto out; | ||
| 313 | */ | 303 | */ |
| 314 | EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ | 304 | EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ |
| 315 | #define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE) | 305 | #define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE) |
| @@ -321,7 +311,8 @@ static void emit_bpf_tail_call(u8 **pprog) | |||
| 321 | offsetof(struct bpf_prog, bpf_func)); | 311 | offsetof(struct bpf_prog, bpf_func)); |
| 322 | EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE); /* add rax, prologue_size */ | 312 | EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE); /* add rax, prologue_size */ |
| 323 | 313 | ||
| 324 | /* now we're ready to jump into next BPF program | 314 | /* |
| 315 | * Wow we're ready to jump into next BPF program | ||
| 325 | * rdi == ctx (1st arg) | 316 | * rdi == ctx (1st arg) |
| 326 | * rax == prog->bpf_func + prologue_size | 317 | * rax == prog->bpf_func + prologue_size |
| 327 | */ | 318 | */ |
| @@ -334,26 +325,6 @@ static void emit_bpf_tail_call(u8 **pprog) | |||
| 334 | *pprog = prog; | 325 | *pprog = prog; |
| 335 | } | 326 | } |
| 336 | 327 | ||
| 337 | |||
| 338 | static void emit_load_skb_data_hlen(u8 **pprog) | ||
| 339 | { | ||
| 340 | u8 *prog = *pprog; | ||
| 341 | int cnt = 0; | ||
| 342 | |||
| 343 | /* r9d = skb->len - skb->data_len (headlen) | ||
| 344 | * r10 = skb->data | ||
| 345 | */ | ||
| 346 | /* mov %r9d, off32(%rdi) */ | ||
| 347 | EMIT3_off32(0x44, 0x8b, 0x8f, offsetof(struct sk_buff, len)); | ||
| 348 | |||
| 349 | /* sub %r9d, off32(%rdi) */ | ||
| 350 | EMIT3_off32(0x44, 0x2b, 0x8f, offsetof(struct sk_buff, data_len)); | ||
| 351 | |||
| 352 | /* mov %r10, off32(%rdi) */ | ||
| 353 | EMIT3_off32(0x4c, 0x8b, 0x97, offsetof(struct sk_buff, data)); | ||
| 354 | *pprog = prog; | ||
| 355 | } | ||
| 356 | |||
| 357 | static void emit_mov_imm32(u8 **pprog, bool sign_propagate, | 328 | static void emit_mov_imm32(u8 **pprog, bool sign_propagate, |
| 358 | u32 dst_reg, const u32 imm32) | 329 | u32 dst_reg, const u32 imm32) |
| 359 | { | 330 | { |
| @@ -361,7 +332,8 @@ static void emit_mov_imm32(u8 **pprog, bool sign_propagate, | |||
| 361 | u8 b1, b2, b3; | 332 | u8 b1, b2, b3; |
| 362 | int cnt = 0; | 333 | int cnt = 0; |
| 363 | 334 | ||
| 364 | /* optimization: if imm32 is positive, use 'mov %eax, imm32' | 335 | /* |
| 336 | * Optimization: if imm32 is positive, use 'mov %eax, imm32' | ||
| 365 | * (which zero-extends imm32) to save 2 bytes. | 337 | * (which zero-extends imm32) to save 2 bytes. |
| 366 | */ | 338 | */ |
| 367 | if (sign_propagate && (s32)imm32 < 0) { | 339 | if (sign_propagate && (s32)imm32 < 0) { |
| @@ -373,7 +345,8 @@ static void emit_mov_imm32(u8 **pprog, bool sign_propagate, | |||
| 373 | goto done; | 345 | goto done; |
| 374 | } | 346 | } |
| 375 | 347 | ||
| 376 | /* optimization: if imm32 is zero, use 'xor %eax, %eax' | 348 | /* |
| 349 | * Optimization: if imm32 is zero, use 'xor %eax, %eax' | ||
| 377 | * to save 3 bytes. | 350 | * to save 3 bytes. |
| 378 | */ | 351 | */ |
| 379 | if (imm32 == 0) { | 352 | if (imm32 == 0) { |
| @@ -400,7 +373,8 @@ static void emit_mov_imm64(u8 **pprog, u32 dst_reg, | |||
| 400 | int cnt = 0; | 373 | int cnt = 0; |
| 401 | 374 | ||
| 402 | if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) { | 375 | if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) { |
| 403 | /* For emitting plain u32, where sign bit must not be | 376 | /* |
| 377 | * For emitting plain u32, where sign bit must not be | ||
| 404 | * propagated LLVM tends to load imm64 over mov32 | 378 | * propagated LLVM tends to load imm64 over mov32 |
| 405 | * directly, so save couple of bytes by just doing | 379 | * directly, so save couple of bytes by just doing |
| 406 | * 'mov %eax, imm32' instead. | 380 | * 'mov %eax, imm32' instead. |
| @@ -439,8 +413,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 439 | { | 413 | { |
| 440 | struct bpf_insn *insn = bpf_prog->insnsi; | 414 | struct bpf_insn *insn = bpf_prog->insnsi; |
| 441 | int insn_cnt = bpf_prog->len; | 415 | int insn_cnt = bpf_prog->len; |
| 442 | bool seen_ld_abs = ctx->seen_ld_abs | (oldproglen == 0); | ||
| 443 | bool seen_ax_reg = ctx->seen_ax_reg | (oldproglen == 0); | ||
| 444 | bool seen_exit = false; | 416 | bool seen_exit = false; |
| 445 | u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; | 417 | u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; |
| 446 | int i, cnt = 0; | 418 | int i, cnt = 0; |
| @@ -450,9 +422,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 450 | emit_prologue(&prog, bpf_prog->aux->stack_depth, | 422 | emit_prologue(&prog, bpf_prog->aux->stack_depth, |
| 451 | bpf_prog_was_classic(bpf_prog)); | 423 | bpf_prog_was_classic(bpf_prog)); |
| 452 | 424 | ||
| 453 | if (seen_ld_abs) | ||
| 454 | emit_load_skb_data_hlen(&prog); | ||
| 455 | |||
| 456 | for (i = 0; i < insn_cnt; i++, insn++) { | 425 | for (i = 0; i < insn_cnt; i++, insn++) { |
| 457 | const s32 imm32 = insn->imm; | 426 | const s32 imm32 = insn->imm; |
| 458 | u32 dst_reg = insn->dst_reg; | 427 | u32 dst_reg = insn->dst_reg; |
| @@ -460,13 +429,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 460 | u8 b2 = 0, b3 = 0; | 429 | u8 b2 = 0, b3 = 0; |
| 461 | s64 jmp_offset; | 430 | s64 jmp_offset; |
| 462 | u8 jmp_cond; | 431 | u8 jmp_cond; |
| 463 | bool reload_skb_data; | ||
| 464 | int ilen; | 432 | int ilen; |
| 465 | u8 *func; | 433 | u8 *func; |
| 466 | 434 | ||
| 467 | if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX) | ||
| 468 | ctx->seen_ax_reg = seen_ax_reg = true; | ||
| 469 | |||
| 470 | switch (insn->code) { | 435 | switch (insn->code) { |
| 471 | /* ALU */ | 436 | /* ALU */ |
| 472 | case BPF_ALU | BPF_ADD | BPF_X: | 437 | case BPF_ALU | BPF_ADD | BPF_X: |
| @@ -525,7 +490,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 525 | else if (is_ereg(dst_reg)) | 490 | else if (is_ereg(dst_reg)) |
| 526 | EMIT1(add_1mod(0x40, dst_reg)); | 491 | EMIT1(add_1mod(0x40, dst_reg)); |
| 527 | 492 | ||
| 528 | /* b3 holds 'normal' opcode, b2 short form only valid | 493 | /* |
| 494 | * b3 holds 'normal' opcode, b2 short form only valid | ||
| 529 | * in case dst is eax/rax. | 495 | * in case dst is eax/rax. |
| 530 | */ | 496 | */ |
| 531 | switch (BPF_OP(insn->code)) { | 497 | switch (BPF_OP(insn->code)) { |
| @@ -593,7 +559,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 593 | /* mov rax, dst_reg */ | 559 | /* mov rax, dst_reg */ |
| 594 | EMIT_mov(BPF_REG_0, dst_reg); | 560 | EMIT_mov(BPF_REG_0, dst_reg); |
| 595 | 561 | ||
| 596 | /* xor edx, edx | 562 | /* |
| 563 | * xor edx, edx | ||
| 597 | * equivalent to 'xor rdx, rdx', but one byte less | 564 | * equivalent to 'xor rdx, rdx', but one byte less |
| 598 | */ | 565 | */ |
| 599 | EMIT2(0x31, 0xd2); | 566 | EMIT2(0x31, 0xd2); |
| @@ -655,7 +622,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 655 | } | 622 | } |
| 656 | break; | 623 | break; |
| 657 | } | 624 | } |
| 658 | /* shifts */ | 625 | /* Shifts */ |
| 659 | case BPF_ALU | BPF_LSH | BPF_K: | 626 | case BPF_ALU | BPF_LSH | BPF_K: |
| 660 | case BPF_ALU | BPF_RSH | BPF_K: | 627 | case BPF_ALU | BPF_RSH | BPF_K: |
| 661 | case BPF_ALU | BPF_ARSH | BPF_K: | 628 | case BPF_ALU | BPF_ARSH | BPF_K: |
| @@ -686,7 +653,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 686 | case BPF_ALU64 | BPF_RSH | BPF_X: | 653 | case BPF_ALU64 | BPF_RSH | BPF_X: |
| 687 | case BPF_ALU64 | BPF_ARSH | BPF_X: | 654 | case BPF_ALU64 | BPF_ARSH | BPF_X: |
| 688 | 655 | ||
| 689 | /* check for bad case when dst_reg == rcx */ | 656 | /* Check for bad case when dst_reg == rcx */ |
| 690 | if (dst_reg == BPF_REG_4) { | 657 | if (dst_reg == BPF_REG_4) { |
| 691 | /* mov r11, dst_reg */ | 658 | /* mov r11, dst_reg */ |
| 692 | EMIT_mov(AUX_REG, dst_reg); | 659 | EMIT_mov(AUX_REG, dst_reg); |
| @@ -724,13 +691,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 724 | case BPF_ALU | BPF_END | BPF_FROM_BE: | 691 | case BPF_ALU | BPF_END | BPF_FROM_BE: |
| 725 | switch (imm32) { | 692 | switch (imm32) { |
| 726 | case 16: | 693 | case 16: |
| 727 | /* emit 'ror %ax, 8' to swap lower 2 bytes */ | 694 | /* Emit 'ror %ax, 8' to swap lower 2 bytes */ |
| 728 | EMIT1(0x66); | 695 | EMIT1(0x66); |
| 729 | if (is_ereg(dst_reg)) | 696 | if (is_ereg(dst_reg)) |
| 730 | EMIT1(0x41); | 697 | EMIT1(0x41); |
| 731 | EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8); | 698 | EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8); |
| 732 | 699 | ||
| 733 | /* emit 'movzwl eax, ax' */ | 700 | /* Emit 'movzwl eax, ax' */ |
| 734 | if (is_ereg(dst_reg)) | 701 | if (is_ereg(dst_reg)) |
| 735 | EMIT3(0x45, 0x0F, 0xB7); | 702 | EMIT3(0x45, 0x0F, 0xB7); |
| 736 | else | 703 | else |
| @@ -738,7 +705,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 738 | EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); | 705 | EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); |
| 739 | break; | 706 | break; |
| 740 | case 32: | 707 | case 32: |
| 741 | /* emit 'bswap eax' to swap lower 4 bytes */ | 708 | /* Emit 'bswap eax' to swap lower 4 bytes */ |
| 742 | if (is_ereg(dst_reg)) | 709 | if (is_ereg(dst_reg)) |
| 743 | EMIT2(0x41, 0x0F); | 710 | EMIT2(0x41, 0x0F); |
| 744 | else | 711 | else |
| @@ -746,7 +713,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 746 | EMIT1(add_1reg(0xC8, dst_reg)); | 713 | EMIT1(add_1reg(0xC8, dst_reg)); |
| 747 | break; | 714 | break; |
| 748 | case 64: | 715 | case 64: |
| 749 | /* emit 'bswap rax' to swap 8 bytes */ | 716 | /* Emit 'bswap rax' to swap 8 bytes */ |
| 750 | EMIT3(add_1mod(0x48, dst_reg), 0x0F, | 717 | EMIT3(add_1mod(0x48, dst_reg), 0x0F, |
| 751 | add_1reg(0xC8, dst_reg)); | 718 | add_1reg(0xC8, dst_reg)); |
| 752 | break; | 719 | break; |
| @@ -756,7 +723,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 756 | case BPF_ALU | BPF_END | BPF_FROM_LE: | 723 | case BPF_ALU | BPF_END | BPF_FROM_LE: |
| 757 | switch (imm32) { | 724 | switch (imm32) { |
| 758 | case 16: | 725 | case 16: |
| 759 | /* emit 'movzwl eax, ax' to zero extend 16-bit | 726 | /* |
| 727 | * Emit 'movzwl eax, ax' to zero extend 16-bit | ||
| 760 | * into 64 bit | 728 | * into 64 bit |
| 761 | */ | 729 | */ |
| 762 | if (is_ereg(dst_reg)) | 730 | if (is_ereg(dst_reg)) |
| @@ -766,7 +734,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
| 766 | EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); | 734 | EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); |
| 767 | break; | 735 | break; |
| 768 | case 32: | 736 | case 32: |
| 769 | /* emit 'mov eax, eax' to clear upper 32-bits */ | 737 | /* Emit 'mov eax, eax' to clear upper 32-bits */ |
| 770 | if (is_ereg(dst_reg)) | 738 | if (is_ereg(dst_reg)) |
| 771 | EMIT1(0x45); | 739 | EMIT1(0x45); |
| 772 | EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg)); | 740 | EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg)); |
| @@ -809,9 +777,9 @@ st: if (is_imm8(insn->off)) | |||
| 809 | 777 | ||
| 810 | /* STX: *(u8*)(dst_reg + off) = src_reg */ | 778 | /* STX: *(u8*)(dst_reg + off) = src_reg */ |
| 811 | case BPF_STX | BPF_MEM | BPF_B: | 779 | case BPF_STX | BPF_MEM | BPF_B: |
| 812 | /* emit 'mov byte ptr [rax + off], al' */ | 780 | /* Emit 'mov byte ptr [rax + off], al' */ |
| 813 | if (is_ereg(dst_reg) || is_ereg(src_reg) || | 781 | if (is_ereg(dst_reg) || is_ereg(src_reg) || |
| 814 | /* have to add extra byte for x86 SIL, DIL regs */ | 782 | /* We have to add extra byte for x86 SIL, DIL regs */ |
| 815 | src_reg == BPF_REG_1 || src_reg == BPF_REG_2) | 783 | src_reg == BPF_REG_1 || src_reg == BPF_REG_2) |
| 816 | EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); | 784 | EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); |
| 817 | else | 785 | else |
| @@ -840,25 +808,26 @@ stx: if (is_imm8(insn->off)) | |||
| 840 | 808 | ||
| 841 | /* LDX: dst_reg = *(u8*)(src_reg + off) */ | 809 | /* LDX: dst_reg = *(u8*)(src_reg + off) */ |
| 842 | case BPF_LDX | BPF_MEM | BPF_B: | 810 | case BPF_LDX | BPF_MEM | BPF_B: |
| 843 | /* emit 'movzx rax, byte ptr [rax + off]' */ | 811 | /* Emit 'movzx rax, byte ptr [rax + off]' */ |
| 844 | EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6); | 812 | EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6); |
| 845 | goto ldx; | 813 | goto ldx; |
| 846 | case BPF_LDX | BPF_MEM | BPF_H: | 814 | case BPF_LDX | BPF_MEM | BPF_H: |
| 847 | /* emit 'movzx rax, word ptr [rax + off]' */ | 815 | /* Emit 'movzx rax, word ptr [rax + off]' */ |
| 848 | EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7); | 816 | EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7); |
| 849 | goto ldx; | 817 | goto ldx; |
| 850 | case BPF_LDX | BPF_MEM | BPF_W: | 818 | case BPF_LDX | BPF_MEM | BPF_W: |
| 851 | /* emit 'mov eax, dword ptr [rax+0x14]' */ | 819 | /* Emit 'mov eax, dword ptr [rax+0x14]' */ |
| 852 | if (is_ereg(dst_reg) || is_ereg(src_reg)) | 820 | if (is_ereg(dst_reg) || is_ereg(src_reg)) |
| 853 | EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B); | 821 | EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B); |
| 854 | else | 822 | else |
| 855 | EMIT1(0x8B); | 823 | EMIT1(0x8B); |
| 856 | goto ldx; | 824 | goto ldx; |
| 857 | case BPF_LDX | BPF_MEM | BPF_DW: | 825 | case BPF_LDX | BPF_MEM | BPF_DW: |
| 858 | /* emit 'mov rax, qword ptr [rax+0x14]' */ | 826 | /* Emit 'mov rax, qword ptr [rax+0x14]' */ |
| 859 | EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); | 827 | EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); |
| 860 | ldx: /* if insn->off == 0 we can save one extra byte, but | 828 | ldx: /* |
| 861 | * special case of x86 r13 which always needs an offset | 829 | * If insn->off == 0 we can save one extra byte, but |
| 830 | * special case of x86 R13 which always needs an offset | ||
| 862 | * is not worth the hassle | 831 | * is not worth the hassle |
| 863 | */ | 832 | */ |
| 864 | if (is_imm8(insn->off)) | 833 | if (is_imm8(insn->off)) |
| @@ -870,7 +839,7 @@ ldx: /* if insn->off == 0 we can save one extra byte, but | |||
| 870 | 839 | ||
| 871 | /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */ | 840 | /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */ |
| 872 | case BPF_STX | BPF_XADD | BPF_W: | 841 | case BPF_STX | BPF_XADD | BPF_W: |
| 873 | /* emit 'lock add dword ptr [rax + off], eax' */ | 842 | /* Emit 'lock add dword ptr [rax + off], eax' */ |
| 874 | if (is_ereg(dst_reg) || is_ereg(src_reg)) | 843 | if (is_ereg(dst_reg) || is_ereg(src_reg)) |
| 875 | EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01); | 844 | EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01); |
| 876 | else | 845 | else |
| @@ -889,35 +858,12 @@ xadd: if (is_imm8(insn->off)) | |||
| 889 | case BPF_JMP | BPF_CALL: | 858 | case BPF_JMP | BPF_CALL: |
| 890 | func = (u8 *) __bpf_call_base + imm32; | 859 | func = (u8 *) __bpf_call_base + imm32; |
| 891 | jmp_offset = func - (image + addrs[i]); | 860 | jmp_offset = func - (image + addrs[i]); |
| 892 | if (seen_ld_abs) { | ||
| 893 | reload_skb_data = bpf_helper_changes_pkt_data(func); | ||
| 894 | if (reload_skb_data) { | ||
| 895 | EMIT1(0x57); /* push %rdi */ | ||
| 896 | jmp_offset += 22; /* pop, mov, sub, mov */ | ||
| 897 | } else { | ||
| 898 | EMIT2(0x41, 0x52); /* push %r10 */ | ||
| 899 | EMIT2(0x41, 0x51); /* push %r9 */ | ||
| 900 | /* need to adjust jmp offset, since | ||
| 901 | * pop %r9, pop %r10 take 4 bytes after call insn | ||
| 902 | */ | ||
| 903 | jmp_offset += 4; | ||
| 904 | } | ||
| 905 | } | ||
| 906 | if (!imm32 || !is_simm32(jmp_offset)) { | 861 | if (!imm32 || !is_simm32(jmp_offset)) { |
| 907 | pr_err("unsupported bpf func %d addr %p image %p\n", | 862 | pr_err("unsupported BPF func %d addr %p image %p\n", |
| 908 | imm32, func, image); | 863 | imm32, func, image); |
| 909 | return -EINVAL; | 864 | return -EINVAL; |
| 910 | } | 865 | } |
| 911 | EMIT1_off32(0xE8, jmp_offset); | 866 | EMIT1_off32(0xE8, jmp_offset); |
| 912 | if (seen_ld_abs) { | ||
| 913 | if (reload_skb_data) { | ||
| 914 | EMIT1(0x5F); /* pop %rdi */ | ||
| 915 | emit_load_skb_data_hlen(&prog); | ||
| 916 | } else { | ||
| 917 | EMIT2(0x41, 0x59); /* pop %r9 */ | ||
| 918 | EMIT2(0x41, 0x5A); /* pop %r10 */ | ||
| 919 | } | ||
| 920 | } | ||
| 921 | break; | 867 | break; |
| 922 | 868 | ||
| 923 | case BPF_JMP | BPF_TAIL_CALL: | 869 | case BPF_JMP | BPF_TAIL_CALL: |
| @@ -970,7 +916,7 @@ xadd: if (is_imm8(insn->off)) | |||
| 970 | else | 916 | else |
| 971 | EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32); | 917 | EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32); |
| 972 | 918 | ||
| 973 | emit_cond_jmp: /* convert BPF opcode to x86 */ | 919 | emit_cond_jmp: /* Convert BPF opcode to x86 */ |
| 974 | switch (BPF_OP(insn->code)) { | 920 | switch (BPF_OP(insn->code)) { |
| 975 | case BPF_JEQ: | 921 | case BPF_JEQ: |
| 976 | jmp_cond = X86_JE; | 922 | jmp_cond = X86_JE; |
| @@ -996,22 +942,22 @@ emit_cond_jmp: /* convert BPF opcode to x86 */ | |||
| 996 | jmp_cond = X86_JBE; | 942 | jmp_cond = X86_JBE; |
| 997 | break; | 943 | break; |
| 998 | case BPF_JSGT: | 944 | case BPF_JSGT: |
| 999 | /* signed '>', GT in x86 */ | 945 | /* Signed '>', GT in x86 */ |
| 1000 | jmp_cond = X86_JG; | 946 | jmp_cond = X86_JG; |
| 1001 | break; | 947 | break; |
| 1002 | case BPF_JSLT: | 948 | case BPF_JSLT: |
| 1003 | /* signed '<', LT in x86 */ | 949 | /* Signed '<', LT in x86 */ |
| 1004 | jmp_cond = X86_JL; | 950 | jmp_cond = X86_JL; |
| 1005 | break; | 951 | break; |
| 1006 | case BPF_JSGE: | 952 | case BPF_JSGE: |
| 1007 | /* signed '>=', GE in x86 */ | 953 | /* Signed '>=', GE in x86 */ |
| 1008 | jmp_cond = X86_JGE; | 954 | jmp_cond = X86_JGE; |
| 1009 | break; | 955 | break; |
| 1010 | case BPF_JSLE: | 956 | case BPF_JSLE: |
| 1011 | /* signed '<=', LE in x86 */ | 957 | /* Signed '<=', LE in x86 */ |
| 1012 | jmp_cond = X86_JLE; | 958 | jmp_cond = X86_JLE; |
| 1013 | break; | 959 | break; |
| 1014 | default: /* to silence gcc warning */ | 960 | default: /* to silence GCC warning */ |
| 1015 | return -EFAULT; | 961 | return -EFAULT; |
| 1016 | } | 962 | } |
| 1017 | jmp_offset = addrs[i + insn->off] - addrs[i]; | 963 | jmp_offset = addrs[i + insn->off] - addrs[i]; |
| @@ -1039,7 +985,7 @@ emit_cond_jmp: /* convert BPF opcode to x86 */ | |||
| 1039 | jmp_offset = addrs[i + insn->off] - addrs[i]; | 985 | jmp_offset = addrs[i + insn->off] - addrs[i]; |
| 1040 | 986 | ||
| 1041 | if (!jmp_offset) | 987 | if (!jmp_offset) |
| 1042 | /* optimize out nop jumps */ | 988 | /* Optimize out nop jumps */ |
| 1043 | break; | 989 | break; |
| 1044 | emit_jmp: | 990 | emit_jmp: |
| 1045 | if (is_imm8(jmp_offset)) { | 991 | if (is_imm8(jmp_offset)) { |
| @@ -1052,66 +998,13 @@ emit_jmp: | |||
| 1052 | } | 998 | } |
| 1053 | break; | 999 | break; |
| 1054 | 1000 | ||
| 1055 | case BPF_LD | BPF_IND | BPF_W: | ||
| 1056 | func = sk_load_word; | ||
| 1057 | goto common_load; | ||
| 1058 | case BPF_LD | BPF_ABS | BPF_W: | ||
| 1059 | func = CHOOSE_LOAD_FUNC(imm32, sk_load_word); | ||
| 1060 | common_load: | ||
| 1061 | ctx->seen_ld_abs = seen_ld_abs = true; | ||
| 1062 | jmp_offset = func - (image + addrs[i]); | ||
| 1063 | if (!func || !is_simm32(jmp_offset)) { | ||
| 1064 | pr_err("unsupported bpf func %d addr %p image %p\n", | ||
| 1065 | imm32, func, image); | ||
| 1066 | return -EINVAL; | ||
| 1067 | } | ||
| 1068 | if (BPF_MODE(insn->code) == BPF_ABS) { | ||
| 1069 | /* mov %esi, imm32 */ | ||
| 1070 | EMIT1_off32(0xBE, imm32); | ||
| 1071 | } else { | ||
| 1072 | /* mov %rsi, src_reg */ | ||
| 1073 | EMIT_mov(BPF_REG_2, src_reg); | ||
| 1074 | if (imm32) { | ||
| 1075 | if (is_imm8(imm32)) | ||
| 1076 | /* add %esi, imm8 */ | ||
| 1077 | EMIT3(0x83, 0xC6, imm32); | ||
| 1078 | else | ||
| 1079 | /* add %esi, imm32 */ | ||
| 1080 | EMIT2_off32(0x81, 0xC6, imm32); | ||
| 1081 | } | ||
| 1082 | } | ||
| 1083 | /* skb pointer is in R6 (%rbx), it will be copied into | ||
| 1084 | * %rdi if skb_copy_bits() call is necessary. | ||
| 1085 | * sk_load_* helpers also use %r10 and %r9d. | ||
| 1086 | * See bpf_jit.S | ||
| 1087 | */ | ||
| 1088 | if (seen_ax_reg) | ||
| 1089 | /* r10 = skb->data, mov %r10, off32(%rbx) */ | ||
| 1090 | EMIT3_off32(0x4c, 0x8b, 0x93, | ||
| 1091 | offsetof(struct sk_buff, data)); | ||
| 1092 | EMIT1_off32(0xE8, jmp_offset); /* call */ | ||
| 1093 | break; | ||
| 1094 | |||
| 1095 | case BPF_LD | BPF_IND | BPF_H: | ||
| 1096 | func = sk_load_half; | ||
| 1097 | goto common_load; | ||
| 1098 | case BPF_LD | BPF_ABS | BPF_H: | ||
| 1099 | func = CHOOSE_LOAD_FUNC(imm32, sk_load_half); | ||
| 1100 | goto common_load; | ||
| 1101 | case BPF_LD | BPF_IND | BPF_B: | ||
| 1102 | func = sk_load_byte; | ||
| 1103 | goto common_load; | ||
| 1104 | case BPF_LD | BPF_ABS | BPF_B: | ||
| 1105 | func = CHOOSE_LOAD_FUNC(imm32, sk_load_byte); | ||
| 1106 | goto common_load; | ||
| 1107 | |||
| 1108 | case BPF_JMP | BPF_EXIT: | 1001 | case BPF_JMP | BPF_EXIT: |
| 1109 | if (seen_exit) { | 1002 | if (seen_exit) { |
| 1110 | jmp_offset = ctx->cleanup_addr - addrs[i]; | 1003 | jmp_offset = ctx->cleanup_addr - addrs[i]; |
| 1111 | goto emit_jmp; | 1004 | goto emit_jmp; |
| 1112 | } | 1005 | } |
| 1113 | seen_exit = true; | 1006 | seen_exit = true; |
| 1114 | /* update cleanup_addr */ | 1007 | /* Update cleanup_addr */ |
| 1115 | ctx->cleanup_addr = proglen; | 1008 | ctx->cleanup_addr = proglen; |
| 1116 | /* mov rbx, qword ptr [rbp+0] */ | 1009 | /* mov rbx, qword ptr [rbp+0] */ |
| 1117 | EMIT4(0x48, 0x8B, 0x5D, 0); | 1010 | EMIT4(0x48, 0x8B, 0x5D, 0); |
| @@ -1129,10 +1022,11 @@ common_load: | |||
| 1129 | break; | 1022 | break; |
| 1130 | 1023 | ||
| 1131 | default: | 1024 | default: |
| 1132 | /* By design x64 JIT should support all BPF instructions | 1025 | /* |
| 1026 | * By design x86-64 JIT should support all BPF instructions. | ||
| 1133 | * This error will be seen if new instruction was added | 1027 | * This error will be seen if new instruction was added |
| 1134 | * to interpreter, but not to JIT | 1028 | * to the interpreter, but not to the JIT, or if there is |
| 1135 | * or if there is junk in bpf_prog | 1029 | * junk in bpf_prog. |
| 1136 | */ | 1030 | */ |
| 1137 | pr_err("bpf_jit: unknown opcode %02x\n", insn->code); | 1031 | pr_err("bpf_jit: unknown opcode %02x\n", insn->code); |
| 1138 | return -EINVAL; | 1032 | return -EINVAL; |
| @@ -1184,7 +1078,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) | |||
| 1184 | return orig_prog; | 1078 | return orig_prog; |
| 1185 | 1079 | ||
| 1186 | tmp = bpf_jit_blind_constants(prog); | 1080 | tmp = bpf_jit_blind_constants(prog); |
| 1187 | /* If blinding was requested and we failed during blinding, | 1081 | /* |
| 1082 | * If blinding was requested and we failed during blinding, | ||
| 1188 | * we must fall back to the interpreter. | 1083 | * we must fall back to the interpreter. |
| 1189 | */ | 1084 | */ |
| 1190 | if (IS_ERR(tmp)) | 1085 | if (IS_ERR(tmp)) |
| @@ -1218,8 +1113,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) | |||
| 1218 | goto out_addrs; | 1113 | goto out_addrs; |
| 1219 | } | 1114 | } |
| 1220 | 1115 | ||
| 1221 | /* Before first pass, make a rough estimation of addrs[] | 1116 | /* |
| 1222 | * each bpf instruction is translated to less than 64 bytes | 1117 | * Before first pass, make a rough estimation of addrs[] |
| 1118 | * each BPF instruction is translated to less than 64 bytes | ||
| 1223 | */ | 1119 | */ |
| 1224 | for (proglen = 0, i = 0; i < prog->len; i++) { | 1120 | for (proglen = 0, i = 0; i < prog->len; i++) { |
| 1225 | proglen += 64; | 1121 | proglen += 64; |
| @@ -1228,10 +1124,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) | |||
| 1228 | ctx.cleanup_addr = proglen; | 1124 | ctx.cleanup_addr = proglen; |
| 1229 | skip_init_addrs: | 1125 | skip_init_addrs: |
| 1230 | 1126 | ||
| 1231 | /* JITed image shrinks with every pass and the loop iterates | 1127 | /* |
| 1232 | * until the image stops shrinking. Very large bpf programs | 1128 | * JITed image shrinks with every pass and the loop iterates |
| 1129 | * until the image stops shrinking. Very large BPF programs | ||
| 1233 | * may converge on the last pass. In such case do one more | 1130 | * may converge on the last pass. In such case do one more |
| 1234 | * pass to emit the final image | 1131 | * pass to emit the final image. |
| 1235 | */ | 1132 | */ |
| 1236 | for (pass = 0; pass < 20 || image; pass++) { | 1133 | for (pass = 0; pass < 20 || image; pass++) { |
| 1237 | proglen = do_jit(prog, addrs, image, oldproglen, &ctx); | 1134 | proglen = do_jit(prog, addrs, image, oldproglen, &ctx); |
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c new file mode 100644 index 000000000000..0cc04e30adc1 --- /dev/null +++ b/arch/x86/net/bpf_jit_comp32.c | |||
| @@ -0,0 +1,2419 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | ||
| 2 | /* | ||
| 3 | * Just-In-Time compiler for eBPF filters on IA32 (32bit x86) | ||
| 4 | * | ||
| 5 | * Author: Wang YanQing (udknight@gmail.com) | ||
| 6 | * The code based on code and ideas from: | ||
| 7 | * Eric Dumazet (eric.dumazet@gmail.com) | ||
| 8 | * and from: | ||
| 9 | * Shubham Bansal <illusionist.neo@gmail.com> | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include <linux/netdevice.h> | ||
| 13 | #include <linux/filter.h> | ||
| 14 | #include <linux/if_vlan.h> | ||
| 15 | #include <asm/cacheflush.h> | ||
| 16 | #include <asm/set_memory.h> | ||
| 17 | #include <asm/nospec-branch.h> | ||
| 18 | #include <linux/bpf.h> | ||
| 19 | |||
| 20 | /* | ||
| 21 | * eBPF prog stack layout: | ||
| 22 | * | ||
| 23 | * high | ||
| 24 | * original ESP => +-----+ | ||
| 25 | * | | callee saved registers | ||
| 26 | * +-----+ | ||
| 27 | * | ... | eBPF JIT scratch space | ||
| 28 | * BPF_FP,IA32_EBP => +-----+ | ||
| 29 | * | ... | eBPF prog stack | ||
| 30 | * +-----+ | ||
| 31 | * |RSVD | JIT scratchpad | ||
| 32 | * current ESP => +-----+ | ||
| 33 | * | | | ||
| 34 | * | ... | Function call stack | ||
| 35 | * | | | ||
| 36 | * +-----+ | ||
| 37 | * low | ||
| 38 | * | ||
| 39 | * The callee saved registers: | ||
| 40 | * | ||
| 41 | * high | ||
| 42 | * original ESP => +------------------+ \ | ||
| 43 | * | ebp | | | ||
| 44 | * current EBP => +------------------+ } callee saved registers | ||
| 45 | * | ebx,esi,edi | | | ||
| 46 | * +------------------+ / | ||
| 47 | * low | ||
| 48 | */ | ||
| 49 | |||
| 50 | static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) | ||
| 51 | { | ||
| 52 | if (len == 1) | ||
| 53 | *ptr = bytes; | ||
| 54 | else if (len == 2) | ||
| 55 | *(u16 *)ptr = bytes; | ||
| 56 | else { | ||
| 57 | *(u32 *)ptr = bytes; | ||
| 58 | barrier(); | ||
| 59 | } | ||
| 60 | return ptr + len; | ||
| 61 | } | ||
| 62 | |||
| 63 | #define EMIT(bytes, len) \ | ||
| 64 | do { prog = emit_code(prog, bytes, len); cnt += len; } while (0) | ||
| 65 | |||
| 66 | #define EMIT1(b1) EMIT(b1, 1) | ||
| 67 | #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) | ||
| 68 | #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) | ||
| 69 | #define EMIT4(b1, b2, b3, b4) \ | ||
| 70 | EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) | ||
| 71 | |||
| 72 | #define EMIT1_off32(b1, off) \ | ||
| 73 | do { EMIT1(b1); EMIT(off, 4); } while (0) | ||
| 74 | #define EMIT2_off32(b1, b2, off) \ | ||
| 75 | do { EMIT2(b1, b2); EMIT(off, 4); } while (0) | ||
| 76 | #define EMIT3_off32(b1, b2, b3, off) \ | ||
| 77 | do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0) | ||
| 78 | #define EMIT4_off32(b1, b2, b3, b4, off) \ | ||
| 79 | do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) | ||
| 80 | |||
| 81 | #define jmp_label(label, jmp_insn_len) (label - cnt - jmp_insn_len) | ||
| 82 | |||
| 83 | static bool is_imm8(int value) | ||
| 84 | { | ||
| 85 | return value <= 127 && value >= -128; | ||
| 86 | } | ||
| 87 | |||
| 88 | static bool is_simm32(s64 value) | ||
| 89 | { | ||
| 90 | return value == (s64) (s32) value; | ||
| 91 | } | ||
| 92 | |||
| 93 | #define STACK_OFFSET(k) (k) | ||
| 94 | #define TCALL_CNT (MAX_BPF_JIT_REG + 0) /* Tail Call Count */ | ||
| 95 | |||
| 96 | #define IA32_EAX (0x0) | ||
| 97 | #define IA32_EBX (0x3) | ||
| 98 | #define IA32_ECX (0x1) | ||
| 99 | #define IA32_EDX (0x2) | ||
| 100 | #define IA32_ESI (0x6) | ||
| 101 | #define IA32_EDI (0x7) | ||
| 102 | #define IA32_EBP (0x5) | ||
| 103 | #define IA32_ESP (0x4) | ||
| 104 | |||
| 105 | /* | ||
| 106 | * List of x86 cond jumps opcodes (. + s8) | ||
| 107 | * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) | ||
| 108 | */ | ||
| 109 | #define IA32_JB 0x72 | ||
| 110 | #define IA32_JAE 0x73 | ||
| 111 | #define IA32_JE 0x74 | ||
| 112 | #define IA32_JNE 0x75 | ||
| 113 | #define IA32_JBE 0x76 | ||
| 114 | #define IA32_JA 0x77 | ||
| 115 | #define IA32_JL 0x7C | ||
| 116 | #define IA32_JGE 0x7D | ||
| 117 | #define IA32_JLE 0x7E | ||
| 118 | #define IA32_JG 0x7F | ||
| 119 | |||
| 120 | /* | ||
| 121 | * Map eBPF registers to IA32 32bit registers or stack scratch space. | ||
| 122 | * | ||
| 123 | * 1. All the registers, R0-R10, are mapped to scratch space on stack. | ||
| 124 | * 2. We need two 64 bit temp registers to do complex operations on eBPF | ||
| 125 | * registers. | ||
| 126 | * 3. For performance reason, the BPF_REG_AX for blinding constant, is | ||
| 127 | * mapped to real hardware register pair, IA32_ESI and IA32_EDI. | ||
| 128 | * | ||
| 129 | * As the eBPF registers are all 64 bit registers and IA32 has only 32 bit | ||
| 130 | * registers, we have to map each eBPF registers with two IA32 32 bit regs | ||
| 131 | * or scratch memory space and we have to build eBPF 64 bit register from those. | ||
| 132 | * | ||
| 133 | * We use IA32_EAX, IA32_EDX, IA32_ECX, IA32_EBX as temporary registers. | ||
| 134 | */ | ||
| 135 | static const u8 bpf2ia32[][2] = { | ||
| 136 | /* Return value from in-kernel function, and exit value from eBPF */ | ||
| 137 | [BPF_REG_0] = {STACK_OFFSET(0), STACK_OFFSET(4)}, | ||
| 138 | |||
| 139 | /* The arguments from eBPF program to in-kernel function */ | ||
| 140 | /* Stored on stack scratch space */ | ||
| 141 | [BPF_REG_1] = {STACK_OFFSET(8), STACK_OFFSET(12)}, | ||
| 142 | [BPF_REG_2] = {STACK_OFFSET(16), STACK_OFFSET(20)}, | ||
| 143 | [BPF_REG_3] = {STACK_OFFSET(24), STACK_OFFSET(28)}, | ||
| 144 | [BPF_REG_4] = {STACK_OFFSET(32), STACK_OFFSET(36)}, | ||
| 145 | [BPF_REG_5] = {STACK_OFFSET(40), STACK_OFFSET(44)}, | ||
| 146 | |||
| 147 | /* Callee saved registers that in-kernel function will preserve */ | ||
| 148 | /* Stored on stack scratch space */ | ||
| 149 | [BPF_REG_6] = {STACK_OFFSET(48), STACK_OFFSET(52)}, | ||
| 150 | [BPF_REG_7] = {STACK_OFFSET(56), STACK_OFFSET(60)}, | ||
| 151 | [BPF_REG_8] = {STACK_OFFSET(64), STACK_OFFSET(68)}, | ||
| 152 | [BPF_REG_9] = {STACK_OFFSET(72), STACK_OFFSET(76)}, | ||
| 153 | |||
| 154 | /* Read only Frame Pointer to access Stack */ | ||
| 155 | [BPF_REG_FP] = {STACK_OFFSET(80), STACK_OFFSET(84)}, | ||
| 156 | |||
| 157 | /* Temporary register for blinding constants. */ | ||
| 158 | [BPF_REG_AX] = {IA32_ESI, IA32_EDI}, | ||
| 159 | |||
| 160 | /* Tail call count. Stored on stack scratch space. */ | ||
| 161 | [TCALL_CNT] = {STACK_OFFSET(88), STACK_OFFSET(92)}, | ||
| 162 | }; | ||
| 163 | |||
| 164 | #define dst_lo dst[0] | ||
| 165 | #define dst_hi dst[1] | ||
| 166 | #define src_lo src[0] | ||
| 167 | #define src_hi src[1] | ||
| 168 | |||
| 169 | #define STACK_ALIGNMENT 8 | ||
| 170 | /* | ||
| 171 | * Stack space for BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, | ||
| 172 | * BPF_REG_5, BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9, | ||
| 173 | * BPF_REG_FP, BPF_REG_AX and Tail call counts. | ||
| 174 | */ | ||
| 175 | #define SCRATCH_SIZE 96 | ||
| 176 | |||
| 177 | /* Total stack size used in JITed code */ | ||
| 178 | #define _STACK_SIZE (stack_depth + SCRATCH_SIZE) | ||
| 179 | |||
| 180 | #define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT) | ||
| 181 | |||
| 182 | /* Get the offset of eBPF REGISTERs stored on scratch space. */ | ||
| 183 | #define STACK_VAR(off) (off) | ||
| 184 | |||
| 185 | /* Encode 'dst_reg' register into IA32 opcode 'byte' */ | ||
| 186 | static u8 add_1reg(u8 byte, u32 dst_reg) | ||
| 187 | { | ||
| 188 | return byte + dst_reg; | ||
| 189 | } | ||
| 190 | |||
| 191 | /* Encode 'dst_reg' and 'src_reg' registers into IA32 opcode 'byte' */ | ||
| 192 | static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) | ||
| 193 | { | ||
| 194 | return byte + dst_reg + (src_reg << 3); | ||
| 195 | } | ||
| 196 | |||
| 197 | static void jit_fill_hole(void *area, unsigned int size) | ||
| 198 | { | ||
| 199 | /* Fill whole space with int3 instructions */ | ||
| 200 | memset(area, 0xcc, size); | ||
| 201 | } | ||
| 202 | |||
| 203 | static inline void emit_ia32_mov_i(const u8 dst, const u32 val, bool dstk, | ||
| 204 | u8 **pprog) | ||
| 205 | { | ||
| 206 | u8 *prog = *pprog; | ||
| 207 | int cnt = 0; | ||
| 208 | |||
| 209 | if (dstk) { | ||
| 210 | if (val == 0) { | ||
| 211 | /* xor eax,eax */ | ||
| 212 | EMIT2(0x33, add_2reg(0xC0, IA32_EAX, IA32_EAX)); | ||
| 213 | /* mov dword ptr [ebp+off],eax */ | ||
| 214 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 215 | STACK_VAR(dst)); | ||
| 216 | } else { | ||
| 217 | EMIT3_off32(0xC7, add_1reg(0x40, IA32_EBP), | ||
| 218 | STACK_VAR(dst), val); | ||
| 219 | } | ||
| 220 | } else { | ||
| 221 | if (val == 0) | ||
| 222 | EMIT2(0x33, add_2reg(0xC0, dst, dst)); | ||
| 223 | else | ||
| 224 | EMIT2_off32(0xC7, add_1reg(0xC0, dst), | ||
| 225 | val); | ||
| 226 | } | ||
| 227 | *pprog = prog; | ||
| 228 | } | ||
| 229 | |||
| 230 | /* dst = imm (4 bytes)*/ | ||
| 231 | static inline void emit_ia32_mov_r(const u8 dst, const u8 src, bool dstk, | ||
| 232 | bool sstk, u8 **pprog) | ||
| 233 | { | ||
| 234 | u8 *prog = *pprog; | ||
| 235 | int cnt = 0; | ||
| 236 | u8 sreg = sstk ? IA32_EAX : src; | ||
| 237 | |||
| 238 | if (sstk) | ||
| 239 | /* mov eax,dword ptr [ebp+off] */ | ||
| 240 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src)); | ||
| 241 | if (dstk) | ||
| 242 | /* mov dword ptr [ebp+off],eax */ | ||
| 243 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, sreg), STACK_VAR(dst)); | ||
| 244 | else | ||
| 245 | /* mov dst,sreg */ | ||
| 246 | EMIT2(0x89, add_2reg(0xC0, dst, sreg)); | ||
| 247 | |||
| 248 | *pprog = prog; | ||
| 249 | } | ||
| 250 | |||
| 251 | /* dst = src */ | ||
| 252 | static inline void emit_ia32_mov_r64(const bool is64, const u8 dst[], | ||
| 253 | const u8 src[], bool dstk, | ||
| 254 | bool sstk, u8 **pprog) | ||
| 255 | { | ||
| 256 | emit_ia32_mov_r(dst_lo, src_lo, dstk, sstk, pprog); | ||
| 257 | if (is64) | ||
| 258 | /* complete 8 byte move */ | ||
| 259 | emit_ia32_mov_r(dst_hi, src_hi, dstk, sstk, pprog); | ||
| 260 | else | ||
| 261 | /* zero out high 4 bytes */ | ||
| 262 | emit_ia32_mov_i(dst_hi, 0, dstk, pprog); | ||
| 263 | } | ||
| 264 | |||
| 265 | /* Sign extended move */ | ||
| 266 | static inline void emit_ia32_mov_i64(const bool is64, const u8 dst[], | ||
| 267 | const u32 val, bool dstk, u8 **pprog) | ||
| 268 | { | ||
| 269 | u32 hi = 0; | ||
| 270 | |||
| 271 | if (is64 && (val & (1<<31))) | ||
| 272 | hi = (u32)~0; | ||
| 273 | emit_ia32_mov_i(dst_lo, val, dstk, pprog); | ||
| 274 | emit_ia32_mov_i(dst_hi, hi, dstk, pprog); | ||
| 275 | } | ||
| 276 | |||
| 277 | /* | ||
| 278 | * ALU operation (32 bit) | ||
| 279 | * dst = dst * src | ||
| 280 | */ | ||
| 281 | static inline void emit_ia32_mul_r(const u8 dst, const u8 src, bool dstk, | ||
| 282 | bool sstk, u8 **pprog) | ||
| 283 | { | ||
| 284 | u8 *prog = *pprog; | ||
| 285 | int cnt = 0; | ||
| 286 | u8 sreg = sstk ? IA32_ECX : src; | ||
| 287 | |||
| 288 | if (sstk) | ||
| 289 | /* mov ecx,dword ptr [ebp+off] */ | ||
| 290 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src)); | ||
| 291 | |||
| 292 | if (dstk) | ||
| 293 | /* mov eax,dword ptr [ebp+off] */ | ||
| 294 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst)); | ||
| 295 | else | ||
| 296 | /* mov eax,dst */ | ||
| 297 | EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX)); | ||
| 298 | |||
| 299 | |||
| 300 | EMIT2(0xF7, add_1reg(0xE0, sreg)); | ||
| 301 | |||
| 302 | if (dstk) | ||
| 303 | /* mov dword ptr [ebp+off],eax */ | ||
| 304 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 305 | STACK_VAR(dst)); | ||
| 306 | else | ||
| 307 | /* mov dst,eax */ | ||
| 308 | EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX)); | ||
| 309 | |||
| 310 | *pprog = prog; | ||
| 311 | } | ||
| 312 | |||
| 313 | static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val, | ||
| 314 | bool dstk, u8 **pprog) | ||
| 315 | { | ||
| 316 | u8 *prog = *pprog; | ||
| 317 | int cnt = 0; | ||
| 318 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
| 319 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
| 320 | |||
| 321 | if (dstk && val != 64) { | ||
| 322 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 323 | STACK_VAR(dst_lo)); | ||
| 324 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 325 | STACK_VAR(dst_hi)); | ||
| 326 | } | ||
| 327 | switch (val) { | ||
| 328 | case 16: | ||
| 329 | /* | ||
| 330 | * Emit 'movzwl eax,ax' to zero extend 16-bit | ||
| 331 | * into 64 bit | ||
| 332 | */ | ||
| 333 | EMIT2(0x0F, 0xB7); | ||
| 334 | EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
| 335 | /* xor dreg_hi,dreg_hi */ | ||
| 336 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
| 337 | break; | ||
| 338 | case 32: | ||
| 339 | /* xor dreg_hi,dreg_hi */ | ||
| 340 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
| 341 | break; | ||
| 342 | case 64: | ||
| 343 | /* nop */ | ||
| 344 | break; | ||
| 345 | } | ||
| 346 | |||
| 347 | if (dstk && val != 64) { | ||
| 348 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
| 349 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
| 350 | STACK_VAR(dst_lo)); | ||
| 351 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
| 352 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
| 353 | STACK_VAR(dst_hi)); | ||
| 354 | } | ||
| 355 | *pprog = prog; | ||
| 356 | } | ||
| 357 | |||
| 358 | static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val, | ||
| 359 | bool dstk, u8 **pprog) | ||
| 360 | { | ||
| 361 | u8 *prog = *pprog; | ||
| 362 | int cnt = 0; | ||
| 363 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
| 364 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
| 365 | |||
| 366 | if (dstk) { | ||
| 367 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 368 | STACK_VAR(dst_lo)); | ||
| 369 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 370 | STACK_VAR(dst_hi)); | ||
| 371 | } | ||
| 372 | switch (val) { | ||
| 373 | case 16: | ||
| 374 | /* Emit 'ror %ax, 8' to swap lower 2 bytes */ | ||
| 375 | EMIT1(0x66); | ||
| 376 | EMIT3(0xC1, add_1reg(0xC8, dreg_lo), 8); | ||
| 377 | |||
| 378 | EMIT2(0x0F, 0xB7); | ||
| 379 | EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
| 380 | |||
| 381 | /* xor dreg_hi,dreg_hi */ | ||
| 382 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
| 383 | break; | ||
| 384 | case 32: | ||
| 385 | /* Emit 'bswap eax' to swap lower 4 bytes */ | ||
| 386 | EMIT1(0x0F); | ||
| 387 | EMIT1(add_1reg(0xC8, dreg_lo)); | ||
| 388 | |||
| 389 | /* xor dreg_hi,dreg_hi */ | ||
| 390 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
| 391 | break; | ||
| 392 | case 64: | ||
| 393 | /* Emit 'bswap eax' to swap lower 4 bytes */ | ||
| 394 | EMIT1(0x0F); | ||
| 395 | EMIT1(add_1reg(0xC8, dreg_lo)); | ||
| 396 | |||
| 397 | /* Emit 'bswap edx' to swap lower 4 bytes */ | ||
| 398 | EMIT1(0x0F); | ||
| 399 | EMIT1(add_1reg(0xC8, dreg_hi)); | ||
| 400 | |||
| 401 | /* mov ecx,dreg_hi */ | ||
| 402 | EMIT2(0x89, add_2reg(0xC0, IA32_ECX, dreg_hi)); | ||
| 403 | /* mov dreg_hi,dreg_lo */ | ||
| 404 | EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo)); | ||
| 405 | /* mov dreg_lo,ecx */ | ||
| 406 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX)); | ||
| 407 | |||
| 408 | break; | ||
| 409 | } | ||
| 410 | if (dstk) { | ||
| 411 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
| 412 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
| 413 | STACK_VAR(dst_lo)); | ||
| 414 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
| 415 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
| 416 | STACK_VAR(dst_hi)); | ||
| 417 | } | ||
| 418 | *pprog = prog; | ||
| 419 | } | ||
| 420 | |||
| 421 | /* | ||
| 422 | * ALU operation (32 bit) | ||
| 423 | * dst = dst (div|mod) src | ||
| 424 | */ | ||
| 425 | static inline void emit_ia32_div_mod_r(const u8 op, const u8 dst, const u8 src, | ||
| 426 | bool dstk, bool sstk, u8 **pprog) | ||
| 427 | { | ||
| 428 | u8 *prog = *pprog; | ||
| 429 | int cnt = 0; | ||
| 430 | |||
| 431 | if (sstk) | ||
| 432 | /* mov ecx,dword ptr [ebp+off] */ | ||
| 433 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
| 434 | STACK_VAR(src)); | ||
| 435 | else if (src != IA32_ECX) | ||
| 436 | /* mov ecx,src */ | ||
| 437 | EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX)); | ||
| 438 | |||
| 439 | if (dstk) | ||
| 440 | /* mov eax,dword ptr [ebp+off] */ | ||
| 441 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 442 | STACK_VAR(dst)); | ||
| 443 | else | ||
| 444 | /* mov eax,dst */ | ||
| 445 | EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX)); | ||
| 446 | |||
| 447 | /* xor edx,edx */ | ||
| 448 | EMIT2(0x31, add_2reg(0xC0, IA32_EDX, IA32_EDX)); | ||
| 449 | /* div ecx */ | ||
| 450 | EMIT2(0xF7, add_1reg(0xF0, IA32_ECX)); | ||
| 451 | |||
| 452 | if (op == BPF_MOD) { | ||
| 453 | if (dstk) | ||
| 454 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 455 | STACK_VAR(dst)); | ||
| 456 | else | ||
| 457 | EMIT2(0x89, add_2reg(0xC0, dst, IA32_EDX)); | ||
| 458 | } else { | ||
| 459 | if (dstk) | ||
| 460 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 461 | STACK_VAR(dst)); | ||
| 462 | else | ||
| 463 | EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX)); | ||
| 464 | } | ||
| 465 | *pprog = prog; | ||
| 466 | } | ||
| 467 | |||
| 468 | /* | ||
| 469 | * ALU operation (32 bit) | ||
| 470 | * dst = dst (shift) src | ||
| 471 | */ | ||
| 472 | static inline void emit_ia32_shift_r(const u8 op, const u8 dst, const u8 src, | ||
| 473 | bool dstk, bool sstk, u8 **pprog) | ||
| 474 | { | ||
| 475 | u8 *prog = *pprog; | ||
| 476 | int cnt = 0; | ||
| 477 | u8 dreg = dstk ? IA32_EAX : dst; | ||
| 478 | u8 b2; | ||
| 479 | |||
| 480 | if (dstk) | ||
| 481 | /* mov eax,dword ptr [ebp+off] */ | ||
| 482 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst)); | ||
| 483 | |||
| 484 | if (sstk) | ||
| 485 | /* mov ecx,dword ptr [ebp+off] */ | ||
| 486 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src)); | ||
| 487 | else if (src != IA32_ECX) | ||
| 488 | /* mov ecx,src */ | ||
| 489 | EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX)); | ||
| 490 | |||
| 491 | switch (op) { | ||
| 492 | case BPF_LSH: | ||
| 493 | b2 = 0xE0; break; | ||
| 494 | case BPF_RSH: | ||
| 495 | b2 = 0xE8; break; | ||
| 496 | case BPF_ARSH: | ||
| 497 | b2 = 0xF8; break; | ||
| 498 | default: | ||
| 499 | return; | ||
| 500 | } | ||
| 501 | EMIT2(0xD3, add_1reg(b2, dreg)); | ||
| 502 | |||
| 503 | if (dstk) | ||
| 504 | /* mov dword ptr [ebp+off],dreg */ | ||
| 505 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg), STACK_VAR(dst)); | ||
| 506 | *pprog = prog; | ||
| 507 | } | ||
| 508 | |||
| 509 | /* | ||
| 510 | * ALU operation (32 bit) | ||
| 511 | * dst = dst (op) src | ||
| 512 | */ | ||
| 513 | static inline void emit_ia32_alu_r(const bool is64, const bool hi, const u8 op, | ||
| 514 | const u8 dst, const u8 src, bool dstk, | ||
| 515 | bool sstk, u8 **pprog) | ||
| 516 | { | ||
| 517 | u8 *prog = *pprog; | ||
| 518 | int cnt = 0; | ||
| 519 | u8 sreg = sstk ? IA32_EAX : src; | ||
| 520 | u8 dreg = dstk ? IA32_EDX : dst; | ||
| 521 | |||
| 522 | if (sstk) | ||
| 523 | /* mov eax,dword ptr [ebp+off] */ | ||
| 524 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src)); | ||
| 525 | |||
| 526 | if (dstk) | ||
| 527 | /* mov eax,dword ptr [ebp+off] */ | ||
| 528 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(dst)); | ||
| 529 | |||
| 530 | switch (BPF_OP(op)) { | ||
| 531 | /* dst = dst + src */ | ||
| 532 | case BPF_ADD: | ||
| 533 | if (hi && is64) | ||
| 534 | EMIT2(0x11, add_2reg(0xC0, dreg, sreg)); | ||
| 535 | else | ||
| 536 | EMIT2(0x01, add_2reg(0xC0, dreg, sreg)); | ||
| 537 | break; | ||
| 538 | /* dst = dst - src */ | ||
| 539 | case BPF_SUB: | ||
| 540 | if (hi && is64) | ||
| 541 | EMIT2(0x19, add_2reg(0xC0, dreg, sreg)); | ||
| 542 | else | ||
| 543 | EMIT2(0x29, add_2reg(0xC0, dreg, sreg)); | ||
| 544 | break; | ||
| 545 | /* dst = dst | src */ | ||
| 546 | case BPF_OR: | ||
| 547 | EMIT2(0x09, add_2reg(0xC0, dreg, sreg)); | ||
| 548 | break; | ||
| 549 | /* dst = dst & src */ | ||
| 550 | case BPF_AND: | ||
| 551 | EMIT2(0x21, add_2reg(0xC0, dreg, sreg)); | ||
| 552 | break; | ||
| 553 | /* dst = dst ^ src */ | ||
| 554 | case BPF_XOR: | ||
| 555 | EMIT2(0x31, add_2reg(0xC0, dreg, sreg)); | ||
| 556 | break; | ||
| 557 | } | ||
| 558 | |||
| 559 | if (dstk) | ||
| 560 | /* mov dword ptr [ebp+off],dreg */ | ||
| 561 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg), | ||
| 562 | STACK_VAR(dst)); | ||
| 563 | *pprog = prog; | ||
| 564 | } | ||
| 565 | |||
| 566 | /* ALU operation (64 bit) */ | ||
| 567 | static inline void emit_ia32_alu_r64(const bool is64, const u8 op, | ||
| 568 | const u8 dst[], const u8 src[], | ||
| 569 | bool dstk, bool sstk, | ||
| 570 | u8 **pprog) | ||
| 571 | { | ||
| 572 | u8 *prog = *pprog; | ||
| 573 | |||
| 574 | emit_ia32_alu_r(is64, false, op, dst_lo, src_lo, dstk, sstk, &prog); | ||
| 575 | if (is64) | ||
| 576 | emit_ia32_alu_r(is64, true, op, dst_hi, src_hi, dstk, sstk, | ||
| 577 | &prog); | ||
| 578 | else | ||
| 579 | emit_ia32_mov_i(dst_hi, 0, dstk, &prog); | ||
| 580 | *pprog = prog; | ||
| 581 | } | ||
| 582 | |||
| 583 | /* | ||
| 584 | * ALU operation (32 bit) | ||
| 585 | * dst = dst (op) val | ||
| 586 | */ | ||
| 587 | static inline void emit_ia32_alu_i(const bool is64, const bool hi, const u8 op, | ||
| 588 | const u8 dst, const s32 val, bool dstk, | ||
| 589 | u8 **pprog) | ||
| 590 | { | ||
| 591 | u8 *prog = *pprog; | ||
| 592 | int cnt = 0; | ||
| 593 | u8 dreg = dstk ? IA32_EAX : dst; | ||
| 594 | u8 sreg = IA32_EDX; | ||
| 595 | |||
| 596 | if (dstk) | ||
| 597 | /* mov eax,dword ptr [ebp+off] */ | ||
| 598 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst)); | ||
| 599 | |||
| 600 | if (!is_imm8(val)) | ||
| 601 | /* mov edx,imm32*/ | ||
| 602 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EDX), val); | ||
| 603 | |||
| 604 | switch (op) { | ||
| 605 | /* dst = dst + val */ | ||
| 606 | case BPF_ADD: | ||
| 607 | if (hi && is64) { | ||
| 608 | if (is_imm8(val)) | ||
| 609 | EMIT3(0x83, add_1reg(0xD0, dreg), val); | ||
| 610 | else | ||
| 611 | EMIT2(0x11, add_2reg(0xC0, dreg, sreg)); | ||
| 612 | } else { | ||
| 613 | if (is_imm8(val)) | ||
| 614 | EMIT3(0x83, add_1reg(0xC0, dreg), val); | ||
| 615 | else | ||
| 616 | EMIT2(0x01, add_2reg(0xC0, dreg, sreg)); | ||
| 617 | } | ||
| 618 | break; | ||
| 619 | /* dst = dst - val */ | ||
| 620 | case BPF_SUB: | ||
| 621 | if (hi && is64) { | ||
| 622 | if (is_imm8(val)) | ||
| 623 | EMIT3(0x83, add_1reg(0xD8, dreg), val); | ||
| 624 | else | ||
| 625 | EMIT2(0x19, add_2reg(0xC0, dreg, sreg)); | ||
| 626 | } else { | ||
| 627 | if (is_imm8(val)) | ||
| 628 | EMIT3(0x83, add_1reg(0xE8, dreg), val); | ||
| 629 | else | ||
| 630 | EMIT2(0x29, add_2reg(0xC0, dreg, sreg)); | ||
| 631 | } | ||
| 632 | break; | ||
| 633 | /* dst = dst | val */ | ||
| 634 | case BPF_OR: | ||
| 635 | if (is_imm8(val)) | ||
| 636 | EMIT3(0x83, add_1reg(0xC8, dreg), val); | ||
| 637 | else | ||
| 638 | EMIT2(0x09, add_2reg(0xC0, dreg, sreg)); | ||
| 639 | break; | ||
| 640 | /* dst = dst & val */ | ||
| 641 | case BPF_AND: | ||
| 642 | if (is_imm8(val)) | ||
| 643 | EMIT3(0x83, add_1reg(0xE0, dreg), val); | ||
| 644 | else | ||
| 645 | EMIT2(0x21, add_2reg(0xC0, dreg, sreg)); | ||
| 646 | break; | ||
| 647 | /* dst = dst ^ val */ | ||
| 648 | case BPF_XOR: | ||
| 649 | if (is_imm8(val)) | ||
| 650 | EMIT3(0x83, add_1reg(0xF0, dreg), val); | ||
| 651 | else | ||
| 652 | EMIT2(0x31, add_2reg(0xC0, dreg, sreg)); | ||
| 653 | break; | ||
| 654 | case BPF_NEG: | ||
| 655 | EMIT2(0xF7, add_1reg(0xD8, dreg)); | ||
| 656 | break; | ||
| 657 | } | ||
| 658 | |||
| 659 | if (dstk) | ||
| 660 | /* mov dword ptr [ebp+off],dreg */ | ||
| 661 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg), | ||
| 662 | STACK_VAR(dst)); | ||
| 663 | *pprog = prog; | ||
| 664 | } | ||
| 665 | |||
| 666 | /* ALU operation (64 bit) */ | ||
| 667 | static inline void emit_ia32_alu_i64(const bool is64, const u8 op, | ||
| 668 | const u8 dst[], const u32 val, | ||
| 669 | bool dstk, u8 **pprog) | ||
| 670 | { | ||
| 671 | u8 *prog = *pprog; | ||
| 672 | u32 hi = 0; | ||
| 673 | |||
| 674 | if (is64 && (val & (1<<31))) | ||
| 675 | hi = (u32)~0; | ||
| 676 | |||
| 677 | emit_ia32_alu_i(is64, false, op, dst_lo, val, dstk, &prog); | ||
| 678 | if (is64) | ||
| 679 | emit_ia32_alu_i(is64, true, op, dst_hi, hi, dstk, &prog); | ||
| 680 | else | ||
| 681 | emit_ia32_mov_i(dst_hi, 0, dstk, &prog); | ||
| 682 | |||
| 683 | *pprog = prog; | ||
| 684 | } | ||
| 685 | |||
| 686 | /* dst = ~dst (64 bit) */ | ||
| 687 | static inline void emit_ia32_neg64(const u8 dst[], bool dstk, u8 **pprog) | ||
| 688 | { | ||
| 689 | u8 *prog = *pprog; | ||
| 690 | int cnt = 0; | ||
| 691 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
| 692 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
| 693 | |||
| 694 | if (dstk) { | ||
| 695 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 696 | STACK_VAR(dst_lo)); | ||
| 697 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 698 | STACK_VAR(dst_hi)); | ||
| 699 | } | ||
| 700 | |||
| 701 | /* xor ecx,ecx */ | ||
| 702 | EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX)); | ||
| 703 | /* sub dreg_lo,ecx */ | ||
| 704 | EMIT2(0x2B, add_2reg(0xC0, dreg_lo, IA32_ECX)); | ||
| 705 | /* mov dreg_lo,ecx */ | ||
| 706 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX)); | ||
| 707 | |||
| 708 | /* xor ecx,ecx */ | ||
| 709 | EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX)); | ||
| 710 | /* sbb dreg_hi,ecx */ | ||
| 711 | EMIT2(0x19, add_2reg(0xC0, dreg_hi, IA32_ECX)); | ||
| 712 | /* mov dreg_hi,ecx */ | ||
| 713 | EMIT2(0x89, add_2reg(0xC0, dreg_hi, IA32_ECX)); | ||
| 714 | |||
| 715 | if (dstk) { | ||
| 716 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
| 717 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
| 718 | STACK_VAR(dst_lo)); | ||
| 719 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
| 720 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
| 721 | STACK_VAR(dst_hi)); | ||
| 722 | } | ||
| 723 | *pprog = prog; | ||
| 724 | } | ||
| 725 | |||
| 726 | /* dst = dst << src */ | ||
| 727 | static inline void emit_ia32_lsh_r64(const u8 dst[], const u8 src[], | ||
| 728 | bool dstk, bool sstk, u8 **pprog) | ||
| 729 | { | ||
| 730 | u8 *prog = *pprog; | ||
| 731 | int cnt = 0; | ||
| 732 | static int jmp_label1 = -1; | ||
| 733 | static int jmp_label2 = -1; | ||
| 734 | static int jmp_label3 = -1; | ||
| 735 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
| 736 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
| 737 | |||
| 738 | if (dstk) { | ||
| 739 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 740 | STACK_VAR(dst_lo)); | ||
| 741 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 742 | STACK_VAR(dst_hi)); | ||
| 743 | } | ||
| 744 | |||
| 745 | if (sstk) | ||
| 746 | /* mov ecx,dword ptr [ebp+off] */ | ||
| 747 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
| 748 | STACK_VAR(src_lo)); | ||
| 749 | else | ||
| 750 | /* mov ecx,src_lo */ | ||
| 751 | EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX)); | ||
| 752 | |||
| 753 | /* cmp ecx,32 */ | ||
| 754 | EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); | ||
| 755 | /* Jumps when >= 32 */ | ||
| 756 | if (is_imm8(jmp_label(jmp_label1, 2))) | ||
| 757 | EMIT2(IA32_JAE, jmp_label(jmp_label1, 2)); | ||
| 758 | else | ||
| 759 | EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6)); | ||
| 760 | |||
| 761 | /* < 32 */ | ||
| 762 | /* shl dreg_hi,cl */ | ||
| 763 | EMIT2(0xD3, add_1reg(0xE0, dreg_hi)); | ||
| 764 | /* mov ebx,dreg_lo */ | ||
| 765 | EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX)); | ||
| 766 | /* shl dreg_lo,cl */ | ||
| 767 | EMIT2(0xD3, add_1reg(0xE0, dreg_lo)); | ||
| 768 | |||
| 769 | /* IA32_ECX = -IA32_ECX + 32 */ | ||
| 770 | /* neg ecx */ | ||
| 771 | EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); | ||
| 772 | /* add ecx,32 */ | ||
| 773 | EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); | ||
| 774 | |||
| 775 | /* shr ebx,cl */ | ||
| 776 | EMIT2(0xD3, add_1reg(0xE8, IA32_EBX)); | ||
| 777 | /* or dreg_hi,ebx */ | ||
| 778 | EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX)); | ||
| 779 | |||
| 780 | /* goto out; */ | ||
| 781 | if (is_imm8(jmp_label(jmp_label3, 2))) | ||
| 782 | EMIT2(0xEB, jmp_label(jmp_label3, 2)); | ||
| 783 | else | ||
| 784 | EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); | ||
| 785 | |||
| 786 | /* >= 32 */ | ||
| 787 | if (jmp_label1 == -1) | ||
| 788 | jmp_label1 = cnt; | ||
| 789 | |||
| 790 | /* cmp ecx,64 */ | ||
| 791 | EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64); | ||
| 792 | /* Jumps when >= 64 */ | ||
| 793 | if (is_imm8(jmp_label(jmp_label2, 2))) | ||
| 794 | EMIT2(IA32_JAE, jmp_label(jmp_label2, 2)); | ||
| 795 | else | ||
| 796 | EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6)); | ||
| 797 | |||
| 798 | /* >= 32 && < 64 */ | ||
| 799 | /* sub ecx,32 */ | ||
| 800 | EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32); | ||
| 801 | /* shl dreg_lo,cl */ | ||
| 802 | EMIT2(0xD3, add_1reg(0xE0, dreg_lo)); | ||
| 803 | /* mov dreg_hi,dreg_lo */ | ||
| 804 | EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo)); | ||
| 805 | |||
| 806 | /* xor dreg_lo,dreg_lo */ | ||
| 807 | EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
| 808 | |||
| 809 | /* goto out; */ | ||
| 810 | if (is_imm8(jmp_label(jmp_label3, 2))) | ||
| 811 | EMIT2(0xEB, jmp_label(jmp_label3, 2)); | ||
| 812 | else | ||
| 813 | EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); | ||
| 814 | |||
| 815 | /* >= 64 */ | ||
| 816 | if (jmp_label2 == -1) | ||
| 817 | jmp_label2 = cnt; | ||
| 818 | /* xor dreg_lo,dreg_lo */ | ||
| 819 | EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
| 820 | /* xor dreg_hi,dreg_hi */ | ||
| 821 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
| 822 | |||
| 823 | if (jmp_label3 == -1) | ||
| 824 | jmp_label3 = cnt; | ||
| 825 | |||
| 826 | if (dstk) { | ||
| 827 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
| 828 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
| 829 | STACK_VAR(dst_lo)); | ||
| 830 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
| 831 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
| 832 | STACK_VAR(dst_hi)); | ||
| 833 | } | ||
| 834 | /* out: */ | ||
| 835 | *pprog = prog; | ||
| 836 | } | ||
| 837 | |||
| 838 | /* dst = dst >> src (signed)*/ | ||
| 839 | static inline void emit_ia32_arsh_r64(const u8 dst[], const u8 src[], | ||
| 840 | bool dstk, bool sstk, u8 **pprog) | ||
| 841 | { | ||
| 842 | u8 *prog = *pprog; | ||
| 843 | int cnt = 0; | ||
| 844 | static int jmp_label1 = -1; | ||
| 845 | static int jmp_label2 = -1; | ||
| 846 | static int jmp_label3 = -1; | ||
| 847 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
| 848 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
| 849 | |||
| 850 | if (dstk) { | ||
| 851 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 852 | STACK_VAR(dst_lo)); | ||
| 853 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 854 | STACK_VAR(dst_hi)); | ||
| 855 | } | ||
| 856 | |||
| 857 | if (sstk) | ||
| 858 | /* mov ecx,dword ptr [ebp+off] */ | ||
| 859 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
| 860 | STACK_VAR(src_lo)); | ||
| 861 | else | ||
| 862 | /* mov ecx,src_lo */ | ||
| 863 | EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX)); | ||
| 864 | |||
| 865 | /* cmp ecx,32 */ | ||
| 866 | EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); | ||
| 867 | /* Jumps when >= 32 */ | ||
| 868 | if (is_imm8(jmp_label(jmp_label1, 2))) | ||
| 869 | EMIT2(IA32_JAE, jmp_label(jmp_label1, 2)); | ||
| 870 | else | ||
| 871 | EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6)); | ||
| 872 | |||
| 873 | /* < 32 */ | ||
| 874 | /* lshr dreg_lo,cl */ | ||
| 875 | EMIT2(0xD3, add_1reg(0xE8, dreg_lo)); | ||
| 876 | /* mov ebx,dreg_hi */ | ||
| 877 | EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX)); | ||
| 878 | /* ashr dreg_hi,cl */ | ||
| 879 | EMIT2(0xD3, add_1reg(0xF8, dreg_hi)); | ||
| 880 | |||
| 881 | /* IA32_ECX = -IA32_ECX + 32 */ | ||
| 882 | /* neg ecx */ | ||
| 883 | EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); | ||
| 884 | /* add ecx,32 */ | ||
| 885 | EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); | ||
| 886 | |||
| 887 | /* shl ebx,cl */ | ||
| 888 | EMIT2(0xD3, add_1reg(0xE0, IA32_EBX)); | ||
| 889 | /* or dreg_lo,ebx */ | ||
| 890 | EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX)); | ||
| 891 | |||
| 892 | /* goto out; */ | ||
| 893 | if (is_imm8(jmp_label(jmp_label3, 2))) | ||
| 894 | EMIT2(0xEB, jmp_label(jmp_label3, 2)); | ||
| 895 | else | ||
| 896 | EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); | ||
| 897 | |||
| 898 | /* >= 32 */ | ||
| 899 | if (jmp_label1 == -1) | ||
| 900 | jmp_label1 = cnt; | ||
| 901 | |||
| 902 | /* cmp ecx,64 */ | ||
| 903 | EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64); | ||
| 904 | /* Jumps when >= 64 */ | ||
| 905 | if (is_imm8(jmp_label(jmp_label2, 2))) | ||
| 906 | EMIT2(IA32_JAE, jmp_label(jmp_label2, 2)); | ||
| 907 | else | ||
| 908 | EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6)); | ||
| 909 | |||
| 910 | /* >= 32 && < 64 */ | ||
| 911 | /* sub ecx,32 */ | ||
| 912 | EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32); | ||
| 913 | /* ashr dreg_hi,cl */ | ||
| 914 | EMIT2(0xD3, add_1reg(0xF8, dreg_hi)); | ||
| 915 | /* mov dreg_lo,dreg_hi */ | ||
| 916 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
| 917 | |||
| 918 | /* ashr dreg_hi,imm8 */ | ||
| 919 | EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31); | ||
| 920 | |||
| 921 | /* goto out; */ | ||
| 922 | if (is_imm8(jmp_label(jmp_label3, 2))) | ||
| 923 | EMIT2(0xEB, jmp_label(jmp_label3, 2)); | ||
| 924 | else | ||
| 925 | EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); | ||
| 926 | |||
| 927 | /* >= 64 */ | ||
| 928 | if (jmp_label2 == -1) | ||
| 929 | jmp_label2 = cnt; | ||
| 930 | /* ashr dreg_hi,imm8 */ | ||
| 931 | EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31); | ||
| 932 | /* mov dreg_lo,dreg_hi */ | ||
| 933 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
| 934 | |||
| 935 | if (jmp_label3 == -1) | ||
| 936 | jmp_label3 = cnt; | ||
| 937 | |||
| 938 | if (dstk) { | ||
| 939 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
| 940 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
| 941 | STACK_VAR(dst_lo)); | ||
| 942 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
| 943 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
| 944 | STACK_VAR(dst_hi)); | ||
| 945 | } | ||
| 946 | /* out: */ | ||
| 947 | *pprog = prog; | ||
| 948 | } | ||
| 949 | |||
| 950 | /* dst = dst >> src */ | ||
| 951 | static inline void emit_ia32_rsh_r64(const u8 dst[], const u8 src[], bool dstk, | ||
| 952 | bool sstk, u8 **pprog) | ||
| 953 | { | ||
| 954 | u8 *prog = *pprog; | ||
| 955 | int cnt = 0; | ||
| 956 | static int jmp_label1 = -1; | ||
| 957 | static int jmp_label2 = -1; | ||
| 958 | static int jmp_label3 = -1; | ||
| 959 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
| 960 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
| 961 | |||
| 962 | if (dstk) { | ||
| 963 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 964 | STACK_VAR(dst_lo)); | ||
| 965 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 966 | STACK_VAR(dst_hi)); | ||
| 967 | } | ||
| 968 | |||
| 969 | if (sstk) | ||
| 970 | /* mov ecx,dword ptr [ebp+off] */ | ||
| 971 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
| 972 | STACK_VAR(src_lo)); | ||
| 973 | else | ||
| 974 | /* mov ecx,src_lo */ | ||
| 975 | EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX)); | ||
| 976 | |||
| 977 | /* cmp ecx,32 */ | ||
| 978 | EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); | ||
| 979 | /* Jumps when >= 32 */ | ||
| 980 | if (is_imm8(jmp_label(jmp_label1, 2))) | ||
| 981 | EMIT2(IA32_JAE, jmp_label(jmp_label1, 2)); | ||
| 982 | else | ||
| 983 | EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6)); | ||
| 984 | |||
| 985 | /* < 32 */ | ||
| 986 | /* lshr dreg_lo,cl */ | ||
| 987 | EMIT2(0xD3, add_1reg(0xE8, dreg_lo)); | ||
| 988 | /* mov ebx,dreg_hi */ | ||
| 989 | EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX)); | ||
| 990 | /* shr dreg_hi,cl */ | ||
| 991 | EMIT2(0xD3, add_1reg(0xE8, dreg_hi)); | ||
| 992 | |||
| 993 | /* IA32_ECX = -IA32_ECX + 32 */ | ||
| 994 | /* neg ecx */ | ||
| 995 | EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); | ||
| 996 | /* add ecx,32 */ | ||
| 997 | EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); | ||
| 998 | |||
| 999 | /* shl ebx,cl */ | ||
| 1000 | EMIT2(0xD3, add_1reg(0xE0, IA32_EBX)); | ||
| 1001 | /* or dreg_lo,ebx */ | ||
| 1002 | EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX)); | ||
| 1003 | |||
| 1004 | /* goto out; */ | ||
| 1005 | if (is_imm8(jmp_label(jmp_label3, 2))) | ||
| 1006 | EMIT2(0xEB, jmp_label(jmp_label3, 2)); | ||
| 1007 | else | ||
| 1008 | EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); | ||
| 1009 | |||
| 1010 | /* >= 32 */ | ||
| 1011 | if (jmp_label1 == -1) | ||
| 1012 | jmp_label1 = cnt; | ||
| 1013 | /* cmp ecx,64 */ | ||
| 1014 | EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64); | ||
| 1015 | /* Jumps when >= 64 */ | ||
| 1016 | if (is_imm8(jmp_label(jmp_label2, 2))) | ||
| 1017 | EMIT2(IA32_JAE, jmp_label(jmp_label2, 2)); | ||
| 1018 | else | ||
| 1019 | EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6)); | ||
| 1020 | |||
| 1021 | /* >= 32 && < 64 */ | ||
| 1022 | /* sub ecx,32 */ | ||
| 1023 | EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32); | ||
| 1024 | /* shr dreg_hi,cl */ | ||
| 1025 | EMIT2(0xD3, add_1reg(0xE8, dreg_hi)); | ||
| 1026 | /* mov dreg_lo,dreg_hi */ | ||
| 1027 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
| 1028 | /* xor dreg_hi,dreg_hi */ | ||
| 1029 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
| 1030 | |||
| 1031 | /* goto out; */ | ||
| 1032 | if (is_imm8(jmp_label(jmp_label3, 2))) | ||
| 1033 | EMIT2(0xEB, jmp_label(jmp_label3, 2)); | ||
| 1034 | else | ||
| 1035 | EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); | ||
| 1036 | |||
| 1037 | /* >= 64 */ | ||
| 1038 | if (jmp_label2 == -1) | ||
| 1039 | jmp_label2 = cnt; | ||
| 1040 | /* xor dreg_lo,dreg_lo */ | ||
| 1041 | EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
| 1042 | /* xor dreg_hi,dreg_hi */ | ||
| 1043 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
| 1044 | |||
| 1045 | if (jmp_label3 == -1) | ||
| 1046 | jmp_label3 = cnt; | ||
| 1047 | |||
| 1048 | if (dstk) { | ||
| 1049 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
| 1050 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
| 1051 | STACK_VAR(dst_lo)); | ||
| 1052 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
| 1053 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
| 1054 | STACK_VAR(dst_hi)); | ||
| 1055 | } | ||
| 1056 | /* out: */ | ||
| 1057 | *pprog = prog; | ||
| 1058 | } | ||
| 1059 | |||
| 1060 | /* dst = dst << val */ | ||
| 1061 | static inline void emit_ia32_lsh_i64(const u8 dst[], const u32 val, | ||
| 1062 | bool dstk, u8 **pprog) | ||
| 1063 | { | ||
| 1064 | u8 *prog = *pprog; | ||
| 1065 | int cnt = 0; | ||
| 1066 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
| 1067 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
| 1068 | |||
| 1069 | if (dstk) { | ||
| 1070 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 1071 | STACK_VAR(dst_lo)); | ||
| 1072 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 1073 | STACK_VAR(dst_hi)); | ||
| 1074 | } | ||
| 1075 | /* Do LSH operation */ | ||
| 1076 | if (val < 32) { | ||
| 1077 | /* shl dreg_hi,imm8 */ | ||
| 1078 | EMIT3(0xC1, add_1reg(0xE0, dreg_hi), val); | ||
| 1079 | /* mov ebx,dreg_lo */ | ||
| 1080 | EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX)); | ||
| 1081 | /* shl dreg_lo,imm8 */ | ||
| 1082 | EMIT3(0xC1, add_1reg(0xE0, dreg_lo), val); | ||
| 1083 | |||
| 1084 | /* IA32_ECX = 32 - val */ | ||
| 1085 | /* mov ecx,val */ | ||
| 1086 | EMIT2(0xB1, val); | ||
| 1087 | /* movzx ecx,ecx */ | ||
| 1088 | EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX)); | ||
| 1089 | /* neg ecx */ | ||
| 1090 | EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); | ||
| 1091 | /* add ecx,32 */ | ||
| 1092 | EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); | ||
| 1093 | |||
| 1094 | /* shr ebx,cl */ | ||
| 1095 | EMIT2(0xD3, add_1reg(0xE8, IA32_EBX)); | ||
| 1096 | /* or dreg_hi,ebx */ | ||
| 1097 | EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX)); | ||
| 1098 | } else if (val >= 32 && val < 64) { | ||
| 1099 | u32 value = val - 32; | ||
| 1100 | |||
| 1101 | /* shl dreg_lo,imm8 */ | ||
| 1102 | EMIT3(0xC1, add_1reg(0xE0, dreg_lo), value); | ||
| 1103 | /* mov dreg_hi,dreg_lo */ | ||
| 1104 | EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo)); | ||
| 1105 | /* xor dreg_lo,dreg_lo */ | ||
| 1106 | EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
| 1107 | } else { | ||
| 1108 | /* xor dreg_lo,dreg_lo */ | ||
| 1109 | EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
| 1110 | /* xor dreg_hi,dreg_hi */ | ||
| 1111 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
| 1112 | } | ||
| 1113 | |||
| 1114 | if (dstk) { | ||
| 1115 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
| 1116 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
| 1117 | STACK_VAR(dst_lo)); | ||
| 1118 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
| 1119 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
| 1120 | STACK_VAR(dst_hi)); | ||
| 1121 | } | ||
| 1122 | *pprog = prog; | ||
| 1123 | } | ||
| 1124 | |||
| 1125 | /* dst = dst >> val */ | ||
| 1126 | static inline void emit_ia32_rsh_i64(const u8 dst[], const u32 val, | ||
| 1127 | bool dstk, u8 **pprog) | ||
| 1128 | { | ||
| 1129 | u8 *prog = *pprog; | ||
| 1130 | int cnt = 0; | ||
| 1131 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
| 1132 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
| 1133 | |||
| 1134 | if (dstk) { | ||
| 1135 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 1136 | STACK_VAR(dst_lo)); | ||
| 1137 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 1138 | STACK_VAR(dst_hi)); | ||
| 1139 | } | ||
| 1140 | |||
| 1141 | /* Do RSH operation */ | ||
| 1142 | if (val < 32) { | ||
| 1143 | /* shr dreg_lo,imm8 */ | ||
| 1144 | EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val); | ||
| 1145 | /* mov ebx,dreg_hi */ | ||
| 1146 | EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX)); | ||
| 1147 | /* shr dreg_hi,imm8 */ | ||
| 1148 | EMIT3(0xC1, add_1reg(0xE8, dreg_hi), val); | ||
| 1149 | |||
| 1150 | /* IA32_ECX = 32 - val */ | ||
| 1151 | /* mov ecx,val */ | ||
| 1152 | EMIT2(0xB1, val); | ||
| 1153 | /* movzx ecx,ecx */ | ||
| 1154 | EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX)); | ||
| 1155 | /* neg ecx */ | ||
| 1156 | EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); | ||
| 1157 | /* add ecx,32 */ | ||
| 1158 | EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); | ||
| 1159 | |||
| 1160 | /* shl ebx,cl */ | ||
| 1161 | EMIT2(0xD3, add_1reg(0xE0, IA32_EBX)); | ||
| 1162 | /* or dreg_lo,ebx */ | ||
| 1163 | EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX)); | ||
| 1164 | } else if (val >= 32 && val < 64) { | ||
| 1165 | u32 value = val - 32; | ||
| 1166 | |||
| 1167 | /* shr dreg_hi,imm8 */ | ||
| 1168 | EMIT3(0xC1, add_1reg(0xE8, dreg_hi), value); | ||
| 1169 | /* mov dreg_lo,dreg_hi */ | ||
| 1170 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
| 1171 | /* xor dreg_hi,dreg_hi */ | ||
| 1172 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
| 1173 | } else { | ||
| 1174 | /* xor dreg_lo,dreg_lo */ | ||
| 1175 | EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); | ||
| 1176 | /* xor dreg_hi,dreg_hi */ | ||
| 1177 | EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); | ||
| 1178 | } | ||
| 1179 | |||
| 1180 | if (dstk) { | ||
| 1181 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
| 1182 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
| 1183 | STACK_VAR(dst_lo)); | ||
| 1184 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
| 1185 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
| 1186 | STACK_VAR(dst_hi)); | ||
| 1187 | } | ||
| 1188 | *pprog = prog; | ||
| 1189 | } | ||
| 1190 | |||
| 1191 | /* dst = dst >> val (signed) */ | ||
| 1192 | static inline void emit_ia32_arsh_i64(const u8 dst[], const u32 val, | ||
| 1193 | bool dstk, u8 **pprog) | ||
| 1194 | { | ||
| 1195 | u8 *prog = *pprog; | ||
| 1196 | int cnt = 0; | ||
| 1197 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
| 1198 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
| 1199 | |||
| 1200 | if (dstk) { | ||
| 1201 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 1202 | STACK_VAR(dst_lo)); | ||
| 1203 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 1204 | STACK_VAR(dst_hi)); | ||
| 1205 | } | ||
| 1206 | /* Do RSH operation */ | ||
| 1207 | if (val < 32) { | ||
| 1208 | /* shr dreg_lo,imm8 */ | ||
| 1209 | EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val); | ||
| 1210 | /* mov ebx,dreg_hi */ | ||
| 1211 | EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX)); | ||
| 1212 | /* ashr dreg_hi,imm8 */ | ||
| 1213 | EMIT3(0xC1, add_1reg(0xF8, dreg_hi), val); | ||
| 1214 | |||
| 1215 | /* IA32_ECX = 32 - val */ | ||
| 1216 | /* mov ecx,val */ | ||
| 1217 | EMIT2(0xB1, val); | ||
| 1218 | /* movzx ecx,ecx */ | ||
| 1219 | EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX)); | ||
| 1220 | /* neg ecx */ | ||
| 1221 | EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); | ||
| 1222 | /* add ecx,32 */ | ||
| 1223 | EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); | ||
| 1224 | |||
| 1225 | /* shl ebx,cl */ | ||
| 1226 | EMIT2(0xD3, add_1reg(0xE0, IA32_EBX)); | ||
| 1227 | /* or dreg_lo,ebx */ | ||
| 1228 | EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX)); | ||
| 1229 | } else if (val >= 32 && val < 64) { | ||
| 1230 | u32 value = val - 32; | ||
| 1231 | |||
| 1232 | /* ashr dreg_hi,imm8 */ | ||
| 1233 | EMIT3(0xC1, add_1reg(0xF8, dreg_hi), value); | ||
| 1234 | /* mov dreg_lo,dreg_hi */ | ||
| 1235 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
| 1236 | |||
| 1237 | /* ashr dreg_hi,imm8 */ | ||
| 1238 | EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31); | ||
| 1239 | } else { | ||
| 1240 | /* ashr dreg_hi,imm8 */ | ||
| 1241 | EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31); | ||
| 1242 | /* mov dreg_lo,dreg_hi */ | ||
| 1243 | EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
| 1244 | } | ||
| 1245 | |||
| 1246 | if (dstk) { | ||
| 1247 | /* mov dword ptr [ebp+off],dreg_lo */ | ||
| 1248 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), | ||
| 1249 | STACK_VAR(dst_lo)); | ||
| 1250 | /* mov dword ptr [ebp+off],dreg_hi */ | ||
| 1251 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi), | ||
| 1252 | STACK_VAR(dst_hi)); | ||
| 1253 | } | ||
| 1254 | *pprog = prog; | ||
| 1255 | } | ||
| 1256 | |||
| 1257 | static inline void emit_ia32_mul_r64(const u8 dst[], const u8 src[], bool dstk, | ||
| 1258 | bool sstk, u8 **pprog) | ||
| 1259 | { | ||
| 1260 | u8 *prog = *pprog; | ||
| 1261 | int cnt = 0; | ||
| 1262 | |||
| 1263 | if (dstk) | ||
| 1264 | /* mov eax,dword ptr [ebp+off] */ | ||
| 1265 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 1266 | STACK_VAR(dst_hi)); | ||
| 1267 | else | ||
| 1268 | /* mov eax,dst_hi */ | ||
| 1269 | EMIT2(0x8B, add_2reg(0xC0, dst_hi, IA32_EAX)); | ||
| 1270 | |||
| 1271 | if (sstk) | ||
| 1272 | /* mul dword ptr [ebp+off] */ | ||
| 1273 | EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo)); | ||
| 1274 | else | ||
| 1275 | /* mul src_lo */ | ||
| 1276 | EMIT2(0xF7, add_1reg(0xE0, src_lo)); | ||
| 1277 | |||
| 1278 | /* mov ecx,eax */ | ||
| 1279 | EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX)); | ||
| 1280 | |||
| 1281 | if (dstk) | ||
| 1282 | /* mov eax,dword ptr [ebp+off] */ | ||
| 1283 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 1284 | STACK_VAR(dst_lo)); | ||
| 1285 | else | ||
| 1286 | /* mov eax,dst_lo */ | ||
| 1287 | EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX)); | ||
| 1288 | |||
| 1289 | if (sstk) | ||
| 1290 | /* mul dword ptr [ebp+off] */ | ||
| 1291 | EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_hi)); | ||
| 1292 | else | ||
| 1293 | /* mul src_hi */ | ||
| 1294 | EMIT2(0xF7, add_1reg(0xE0, src_hi)); | ||
| 1295 | |||
| 1296 | /* add eax,eax */ | ||
| 1297 | EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX)); | ||
| 1298 | |||
| 1299 | if (dstk) | ||
| 1300 | /* mov eax,dword ptr [ebp+off] */ | ||
| 1301 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 1302 | STACK_VAR(dst_lo)); | ||
| 1303 | else | ||
| 1304 | /* mov eax,dst_lo */ | ||
| 1305 | EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX)); | ||
| 1306 | |||
| 1307 | if (sstk) | ||
| 1308 | /* mul dword ptr [ebp+off] */ | ||
| 1309 | EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo)); | ||
| 1310 | else | ||
| 1311 | /* mul src_lo */ | ||
| 1312 | EMIT2(0xF7, add_1reg(0xE0, src_lo)); | ||
| 1313 | |||
| 1314 | /* add ecx,edx */ | ||
| 1315 | EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX)); | ||
| 1316 | |||
| 1317 | if (dstk) { | ||
| 1318 | /* mov dword ptr [ebp+off],eax */ | ||
| 1319 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 1320 | STACK_VAR(dst_lo)); | ||
| 1321 | /* mov dword ptr [ebp+off],ecx */ | ||
| 1322 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
| 1323 | STACK_VAR(dst_hi)); | ||
| 1324 | } else { | ||
| 1325 | /* mov dst_lo,eax */ | ||
| 1326 | EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX)); | ||
| 1327 | /* mov dst_hi,ecx */ | ||
| 1328 | EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX)); | ||
| 1329 | } | ||
| 1330 | |||
| 1331 | *pprog = prog; | ||
| 1332 | } | ||
| 1333 | |||
| 1334 | static inline void emit_ia32_mul_i64(const u8 dst[], const u32 val, | ||
| 1335 | bool dstk, u8 **pprog) | ||
| 1336 | { | ||
| 1337 | u8 *prog = *pprog; | ||
| 1338 | int cnt = 0; | ||
| 1339 | u32 hi; | ||
| 1340 | |||
| 1341 | hi = val & (1<<31) ? (u32)~0 : 0; | ||
| 1342 | /* movl eax,imm32 */ | ||
| 1343 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val); | ||
| 1344 | if (dstk) | ||
| 1345 | /* mul dword ptr [ebp+off] */ | ||
| 1346 | EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_hi)); | ||
| 1347 | else | ||
| 1348 | /* mul dst_hi */ | ||
| 1349 | EMIT2(0xF7, add_1reg(0xE0, dst_hi)); | ||
| 1350 | |||
| 1351 | /* mov ecx,eax */ | ||
| 1352 | EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX)); | ||
| 1353 | |||
| 1354 | /* movl eax,imm32 */ | ||
| 1355 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), hi); | ||
| 1356 | if (dstk) | ||
| 1357 | /* mul dword ptr [ebp+off] */ | ||
| 1358 | EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo)); | ||
| 1359 | else | ||
| 1360 | /* mul dst_lo */ | ||
| 1361 | EMIT2(0xF7, add_1reg(0xE0, dst_lo)); | ||
| 1362 | /* add ecx,eax */ | ||
| 1363 | EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX)); | ||
| 1364 | |||
| 1365 | /* movl eax,imm32 */ | ||
| 1366 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val); | ||
| 1367 | if (dstk) | ||
| 1368 | /* mul dword ptr [ebp+off] */ | ||
| 1369 | EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo)); | ||
| 1370 | else | ||
| 1371 | /* mul dst_lo */ | ||
| 1372 | EMIT2(0xF7, add_1reg(0xE0, dst_lo)); | ||
| 1373 | |||
| 1374 | /* add ecx,edx */ | ||
| 1375 | EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX)); | ||
| 1376 | |||
| 1377 | if (dstk) { | ||
| 1378 | /* mov dword ptr [ebp+off],eax */ | ||
| 1379 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 1380 | STACK_VAR(dst_lo)); | ||
| 1381 | /* mov dword ptr [ebp+off],ecx */ | ||
| 1382 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
| 1383 | STACK_VAR(dst_hi)); | ||
| 1384 | } else { | ||
| 1385 | /* mov dword ptr [ebp+off],eax */ | ||
| 1386 | EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX)); | ||
| 1387 | /* mov dword ptr [ebp+off],ecx */ | ||
| 1388 | EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX)); | ||
| 1389 | } | ||
| 1390 | |||
| 1391 | *pprog = prog; | ||
| 1392 | } | ||
| 1393 | |||
| 1394 | static int bpf_size_to_x86_bytes(int bpf_size) | ||
| 1395 | { | ||
| 1396 | if (bpf_size == BPF_W) | ||
| 1397 | return 4; | ||
| 1398 | else if (bpf_size == BPF_H) | ||
| 1399 | return 2; | ||
| 1400 | else if (bpf_size == BPF_B) | ||
| 1401 | return 1; | ||
| 1402 | else if (bpf_size == BPF_DW) | ||
| 1403 | return 4; /* imm32 */ | ||
| 1404 | else | ||
| 1405 | return 0; | ||
| 1406 | } | ||
| 1407 | |||
| 1408 | struct jit_context { | ||
| 1409 | int cleanup_addr; /* Epilogue code offset */ | ||
| 1410 | }; | ||
| 1411 | |||
| 1412 | /* Maximum number of bytes emitted while JITing one eBPF insn */ | ||
| 1413 | #define BPF_MAX_INSN_SIZE 128 | ||
| 1414 | #define BPF_INSN_SAFETY 64 | ||
| 1415 | |||
| 1416 | #define PROLOGUE_SIZE 35 | ||
| 1417 | |||
| 1418 | /* | ||
| 1419 | * Emit prologue code for BPF program and check it's size. | ||
| 1420 | * bpf_tail_call helper will skip it while jumping into another program. | ||
| 1421 | */ | ||
| 1422 | static void emit_prologue(u8 **pprog, u32 stack_depth) | ||
| 1423 | { | ||
| 1424 | u8 *prog = *pprog; | ||
| 1425 | int cnt = 0; | ||
| 1426 | const u8 *r1 = bpf2ia32[BPF_REG_1]; | ||
| 1427 | const u8 fplo = bpf2ia32[BPF_REG_FP][0]; | ||
| 1428 | const u8 fphi = bpf2ia32[BPF_REG_FP][1]; | ||
| 1429 | const u8 *tcc = bpf2ia32[TCALL_CNT]; | ||
| 1430 | |||
| 1431 | /* push ebp */ | ||
| 1432 | EMIT1(0x55); | ||
| 1433 | /* mov ebp,esp */ | ||
| 1434 | EMIT2(0x89, 0xE5); | ||
| 1435 | /* push edi */ | ||
| 1436 | EMIT1(0x57); | ||
| 1437 | /* push esi */ | ||
| 1438 | EMIT1(0x56); | ||
| 1439 | /* push ebx */ | ||
| 1440 | EMIT1(0x53); | ||
| 1441 | |||
| 1442 | /* sub esp,STACK_SIZE */ | ||
| 1443 | EMIT2_off32(0x81, 0xEC, STACK_SIZE); | ||
| 1444 | /* sub ebp,SCRATCH_SIZE+4+12*/ | ||
| 1445 | EMIT3(0x83, add_1reg(0xE8, IA32_EBP), SCRATCH_SIZE + 16); | ||
| 1446 | /* xor ebx,ebx */ | ||
| 1447 | EMIT2(0x31, add_2reg(0xC0, IA32_EBX, IA32_EBX)); | ||
| 1448 | |||
| 1449 | /* Set up BPF prog stack base register */ | ||
| 1450 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBP), STACK_VAR(fplo)); | ||
| 1451 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(fphi)); | ||
| 1452 | |||
| 1453 | /* Move BPF_CTX (EAX) to BPF_REG_R1 */ | ||
| 1454 | /* mov dword ptr [ebp+off],eax */ | ||
| 1455 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0])); | ||
| 1456 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(r1[1])); | ||
| 1457 | |||
| 1458 | /* Initialize Tail Count */ | ||
| 1459 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[0])); | ||
| 1460 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1])); | ||
| 1461 | |||
| 1462 | BUILD_BUG_ON(cnt != PROLOGUE_SIZE); | ||
| 1463 | *pprog = prog; | ||
| 1464 | } | ||
| 1465 | |||
| 1466 | /* Emit epilogue code for BPF program */ | ||
| 1467 | static void emit_epilogue(u8 **pprog, u32 stack_depth) | ||
| 1468 | { | ||
| 1469 | u8 *prog = *pprog; | ||
| 1470 | const u8 *r0 = bpf2ia32[BPF_REG_0]; | ||
| 1471 | int cnt = 0; | ||
| 1472 | |||
| 1473 | /* mov eax,dword ptr [ebp+off]*/ | ||
| 1474 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r0[0])); | ||
| 1475 | /* mov edx,dword ptr [ebp+off]*/ | ||
| 1476 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r0[1])); | ||
| 1477 | |||
| 1478 | /* add ebp,SCRATCH_SIZE+4+12*/ | ||
| 1479 | EMIT3(0x83, add_1reg(0xC0, IA32_EBP), SCRATCH_SIZE + 16); | ||
| 1480 | |||
| 1481 | /* mov ebx,dword ptr [ebp-12]*/ | ||
| 1482 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), -12); | ||
| 1483 | /* mov esi,dword ptr [ebp-8]*/ | ||
| 1484 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ESI), -8); | ||
| 1485 | /* mov edi,dword ptr [ebp-4]*/ | ||
| 1486 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDI), -4); | ||
| 1487 | |||
| 1488 | EMIT1(0xC9); /* leave */ | ||
| 1489 | EMIT1(0xC3); /* ret */ | ||
| 1490 | *pprog = prog; | ||
| 1491 | } | ||
| 1492 | |||
| 1493 | /* | ||
| 1494 | * Generate the following code: | ||
| 1495 | * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... | ||
| 1496 | * if (index >= array->map.max_entries) | ||
| 1497 | * goto out; | ||
| 1498 | * if (++tail_call_cnt > MAX_TAIL_CALL_CNT) | ||
| 1499 | * goto out; | ||
| 1500 | * prog = array->ptrs[index]; | ||
| 1501 | * if (prog == NULL) | ||
| 1502 | * goto out; | ||
| 1503 | * goto *(prog->bpf_func + prologue_size); | ||
| 1504 | * out: | ||
| 1505 | */ | ||
| 1506 | static void emit_bpf_tail_call(u8 **pprog) | ||
| 1507 | { | ||
| 1508 | u8 *prog = *pprog; | ||
| 1509 | int cnt = 0; | ||
| 1510 | const u8 *r1 = bpf2ia32[BPF_REG_1]; | ||
| 1511 | const u8 *r2 = bpf2ia32[BPF_REG_2]; | ||
| 1512 | const u8 *r3 = bpf2ia32[BPF_REG_3]; | ||
| 1513 | const u8 *tcc = bpf2ia32[TCALL_CNT]; | ||
| 1514 | u32 lo, hi; | ||
| 1515 | static int jmp_label1 = -1; | ||
| 1516 | |||
| 1517 | /* | ||
| 1518 | * if (index >= array->map.max_entries) | ||
| 1519 | * goto out; | ||
| 1520 | */ | ||
| 1521 | /* mov eax,dword ptr [ebp+off] */ | ||
| 1522 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r2[0])); | ||
| 1523 | /* mov edx,dword ptr [ebp+off] */ | ||
| 1524 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r3[0])); | ||
| 1525 | |||
| 1526 | /* cmp dword ptr [eax+off],edx */ | ||
| 1527 | EMIT3(0x39, add_2reg(0x40, IA32_EAX, IA32_EDX), | ||
| 1528 | offsetof(struct bpf_array, map.max_entries)); | ||
| 1529 | /* jbe out */ | ||
| 1530 | EMIT2(IA32_JBE, jmp_label(jmp_label1, 2)); | ||
| 1531 | |||
| 1532 | /* | ||
| 1533 | * if (tail_call_cnt > MAX_TAIL_CALL_CNT) | ||
| 1534 | * goto out; | ||
| 1535 | */ | ||
| 1536 | lo = (u32)MAX_TAIL_CALL_CNT; | ||
| 1537 | hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32); | ||
| 1538 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0])); | ||
| 1539 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1])); | ||
| 1540 | |||
| 1541 | /* cmp edx,hi */ | ||
| 1542 | EMIT3(0x83, add_1reg(0xF8, IA32_EBX), hi); | ||
| 1543 | EMIT2(IA32_JNE, 3); | ||
| 1544 | /* cmp ecx,lo */ | ||
| 1545 | EMIT3(0x83, add_1reg(0xF8, IA32_ECX), lo); | ||
| 1546 | |||
| 1547 | /* ja out */ | ||
| 1548 | EMIT2(IA32_JAE, jmp_label(jmp_label1, 2)); | ||
| 1549 | |||
| 1550 | /* add eax,0x1 */ | ||
| 1551 | EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 0x01); | ||
| 1552 | /* adc ebx,0x0 */ | ||
| 1553 | EMIT3(0x83, add_1reg(0xD0, IA32_EBX), 0x00); | ||
| 1554 | |||
| 1555 | /* mov dword ptr [ebp+off],eax */ | ||
| 1556 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0])); | ||
| 1557 | /* mov dword ptr [ebp+off],edx */ | ||
| 1558 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1])); | ||
| 1559 | |||
| 1560 | /* prog = array->ptrs[index]; */ | ||
| 1561 | /* mov edx, [eax + edx * 4 + offsetof(...)] */ | ||
| 1562 | EMIT3_off32(0x8B, 0x94, 0x90, offsetof(struct bpf_array, ptrs)); | ||
| 1563 | |||
| 1564 | /* | ||
| 1565 | * if (prog == NULL) | ||
| 1566 | * goto out; | ||
| 1567 | */ | ||
| 1568 | /* test edx,edx */ | ||
| 1569 | EMIT2(0x85, add_2reg(0xC0, IA32_EDX, IA32_EDX)); | ||
| 1570 | /* je out */ | ||
| 1571 | EMIT2(IA32_JE, jmp_label(jmp_label1, 2)); | ||
| 1572 | |||
| 1573 | /* goto *(prog->bpf_func + prologue_size); */ | ||
| 1574 | /* mov edx, dword ptr [edx + 32] */ | ||
| 1575 | EMIT3(0x8B, add_2reg(0x40, IA32_EDX, IA32_EDX), | ||
| 1576 | offsetof(struct bpf_prog, bpf_func)); | ||
| 1577 | /* add edx,prologue_size */ | ||
| 1578 | EMIT3(0x83, add_1reg(0xC0, IA32_EDX), PROLOGUE_SIZE); | ||
| 1579 | |||
| 1580 | /* mov eax,dword ptr [ebp+off] */ | ||
| 1581 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0])); | ||
| 1582 | |||
| 1583 | /* | ||
| 1584 | * Now we're ready to jump into next BPF program: | ||
| 1585 | * eax == ctx (1st arg) | ||
| 1586 | * edx == prog->bpf_func + prologue_size | ||
| 1587 | */ | ||
| 1588 | RETPOLINE_EDX_BPF_JIT(); | ||
| 1589 | |||
| 1590 | if (jmp_label1 == -1) | ||
| 1591 | jmp_label1 = cnt; | ||
| 1592 | |||
| 1593 | /* out: */ | ||
| 1594 | *pprog = prog; | ||
| 1595 | } | ||
| 1596 | |||
| 1597 | /* Push the scratch stack register on top of the stack. */ | ||
| 1598 | static inline void emit_push_r64(const u8 src[], u8 **pprog) | ||
| 1599 | { | ||
| 1600 | u8 *prog = *pprog; | ||
| 1601 | int cnt = 0; | ||
| 1602 | |||
| 1603 | /* mov ecx,dword ptr [ebp+off] */ | ||
| 1604 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_hi)); | ||
| 1605 | /* push ecx */ | ||
| 1606 | EMIT1(0x51); | ||
| 1607 | |||
| 1608 | /* mov ecx,dword ptr [ebp+off] */ | ||
| 1609 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo)); | ||
| 1610 | /* push ecx */ | ||
| 1611 | EMIT1(0x51); | ||
| 1612 | |||
| 1613 | *pprog = prog; | ||
| 1614 | } | ||
| 1615 | |||
| 1616 | static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | ||
| 1617 | int oldproglen, struct jit_context *ctx) | ||
| 1618 | { | ||
| 1619 | struct bpf_insn *insn = bpf_prog->insnsi; | ||
| 1620 | int insn_cnt = bpf_prog->len; | ||
| 1621 | bool seen_exit = false; | ||
| 1622 | u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; | ||
| 1623 | int i, cnt = 0; | ||
| 1624 | int proglen = 0; | ||
| 1625 | u8 *prog = temp; | ||
| 1626 | |||
| 1627 | emit_prologue(&prog, bpf_prog->aux->stack_depth); | ||
| 1628 | |||
| 1629 | for (i = 0; i < insn_cnt; i++, insn++) { | ||
| 1630 | const s32 imm32 = insn->imm; | ||
| 1631 | const bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; | ||
| 1632 | const bool dstk = insn->dst_reg == BPF_REG_AX ? false : true; | ||
| 1633 | const bool sstk = insn->src_reg == BPF_REG_AX ? false : true; | ||
| 1634 | const u8 code = insn->code; | ||
| 1635 | const u8 *dst = bpf2ia32[insn->dst_reg]; | ||
| 1636 | const u8 *src = bpf2ia32[insn->src_reg]; | ||
| 1637 | const u8 *r0 = bpf2ia32[BPF_REG_0]; | ||
| 1638 | s64 jmp_offset; | ||
| 1639 | u8 jmp_cond; | ||
| 1640 | int ilen; | ||
| 1641 | u8 *func; | ||
| 1642 | |||
| 1643 | switch (code) { | ||
| 1644 | /* ALU operations */ | ||
| 1645 | /* dst = src */ | ||
| 1646 | case BPF_ALU | BPF_MOV | BPF_K: | ||
| 1647 | case BPF_ALU | BPF_MOV | BPF_X: | ||
| 1648 | case BPF_ALU64 | BPF_MOV | BPF_K: | ||
| 1649 | case BPF_ALU64 | BPF_MOV | BPF_X: | ||
| 1650 | switch (BPF_SRC(code)) { | ||
| 1651 | case BPF_X: | ||
| 1652 | emit_ia32_mov_r64(is64, dst, src, dstk, | ||
| 1653 | sstk, &prog); | ||
| 1654 | break; | ||
| 1655 | case BPF_K: | ||
| 1656 | /* Sign-extend immediate value to dst reg */ | ||
| 1657 | emit_ia32_mov_i64(is64, dst, imm32, | ||
| 1658 | dstk, &prog); | ||
| 1659 | break; | ||
| 1660 | } | ||
| 1661 | break; | ||
| 1662 | /* dst = dst + src/imm */ | ||
| 1663 | /* dst = dst - src/imm */ | ||
| 1664 | /* dst = dst | src/imm */ | ||
| 1665 | /* dst = dst & src/imm */ | ||
| 1666 | /* dst = dst ^ src/imm */ | ||
| 1667 | /* dst = dst * src/imm */ | ||
| 1668 | /* dst = dst << src */ | ||
| 1669 | /* dst = dst >> src */ | ||
| 1670 | case BPF_ALU | BPF_ADD | BPF_K: | ||
| 1671 | case BPF_ALU | BPF_ADD | BPF_X: | ||
| 1672 | case BPF_ALU | BPF_SUB | BPF_K: | ||
| 1673 | case BPF_ALU | BPF_SUB | BPF_X: | ||
| 1674 | case BPF_ALU | BPF_OR | BPF_K: | ||
| 1675 | case BPF_ALU | BPF_OR | BPF_X: | ||
| 1676 | case BPF_ALU | BPF_AND | BPF_K: | ||
| 1677 | case BPF_ALU | BPF_AND | BPF_X: | ||
| 1678 | case BPF_ALU | BPF_XOR | BPF_K: | ||
| 1679 | case BPF_ALU | BPF_XOR | BPF_X: | ||
| 1680 | case BPF_ALU64 | BPF_ADD | BPF_K: | ||
| 1681 | case BPF_ALU64 | BPF_ADD | BPF_X: | ||
| 1682 | case BPF_ALU64 | BPF_SUB | BPF_K: | ||
| 1683 | case BPF_ALU64 | BPF_SUB | BPF_X: | ||
| 1684 | case BPF_ALU64 | BPF_OR | BPF_K: | ||
| 1685 | case BPF_ALU64 | BPF_OR | BPF_X: | ||
| 1686 | case BPF_ALU64 | BPF_AND | BPF_K: | ||
| 1687 | case BPF_ALU64 | BPF_AND | BPF_X: | ||
| 1688 | case BPF_ALU64 | BPF_XOR | BPF_K: | ||
| 1689 | case BPF_ALU64 | BPF_XOR | BPF_X: | ||
| 1690 | switch (BPF_SRC(code)) { | ||
| 1691 | case BPF_X: | ||
| 1692 | emit_ia32_alu_r64(is64, BPF_OP(code), dst, | ||
| 1693 | src, dstk, sstk, &prog); | ||
| 1694 | break; | ||
| 1695 | case BPF_K: | ||
| 1696 | emit_ia32_alu_i64(is64, BPF_OP(code), dst, | ||
| 1697 | imm32, dstk, &prog); | ||
| 1698 | break; | ||
| 1699 | } | ||
| 1700 | break; | ||
| 1701 | case BPF_ALU | BPF_MUL | BPF_K: | ||
| 1702 | case BPF_ALU | BPF_MUL | BPF_X: | ||
| 1703 | switch (BPF_SRC(code)) { | ||
| 1704 | case BPF_X: | ||
| 1705 | emit_ia32_mul_r(dst_lo, src_lo, dstk, | ||
| 1706 | sstk, &prog); | ||
| 1707 | break; | ||
| 1708 | case BPF_K: | ||
| 1709 | /* mov ecx,imm32*/ | ||
| 1710 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), | ||
| 1711 | imm32); | ||
| 1712 | emit_ia32_mul_r(dst_lo, IA32_ECX, dstk, | ||
| 1713 | false, &prog); | ||
| 1714 | break; | ||
| 1715 | } | ||
| 1716 | emit_ia32_mov_i(dst_hi, 0, dstk, &prog); | ||
| 1717 | break; | ||
| 1718 | case BPF_ALU | BPF_LSH | BPF_X: | ||
| 1719 | case BPF_ALU | BPF_RSH | BPF_X: | ||
| 1720 | case BPF_ALU | BPF_ARSH | BPF_K: | ||
| 1721 | case BPF_ALU | BPF_ARSH | BPF_X: | ||
| 1722 | switch (BPF_SRC(code)) { | ||
| 1723 | case BPF_X: | ||
| 1724 | emit_ia32_shift_r(BPF_OP(code), dst_lo, src_lo, | ||
| 1725 | dstk, sstk, &prog); | ||
| 1726 | break; | ||
| 1727 | case BPF_K: | ||
| 1728 | /* mov ecx,imm32*/ | ||
| 1729 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), | ||
| 1730 | imm32); | ||
| 1731 | emit_ia32_shift_r(BPF_OP(code), dst_lo, | ||
| 1732 | IA32_ECX, dstk, false, | ||
| 1733 | &prog); | ||
| 1734 | break; | ||
| 1735 | } | ||
| 1736 | emit_ia32_mov_i(dst_hi, 0, dstk, &prog); | ||
| 1737 | break; | ||
| 1738 | /* dst = dst / src(imm) */ | ||
| 1739 | /* dst = dst % src(imm) */ | ||
| 1740 | case BPF_ALU | BPF_DIV | BPF_K: | ||
| 1741 | case BPF_ALU | BPF_DIV | BPF_X: | ||
| 1742 | case BPF_ALU | BPF_MOD | BPF_K: | ||
| 1743 | case BPF_ALU | BPF_MOD | BPF_X: | ||
| 1744 | switch (BPF_SRC(code)) { | ||
| 1745 | case BPF_X: | ||
| 1746 | emit_ia32_div_mod_r(BPF_OP(code), dst_lo, | ||
| 1747 | src_lo, dstk, sstk, &prog); | ||
| 1748 | break; | ||
| 1749 | case BPF_K: | ||
| 1750 | /* mov ecx,imm32*/ | ||
| 1751 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), | ||
| 1752 | imm32); | ||
| 1753 | emit_ia32_div_mod_r(BPF_OP(code), dst_lo, | ||
| 1754 | IA32_ECX, dstk, false, | ||
| 1755 | &prog); | ||
| 1756 | break; | ||
| 1757 | } | ||
| 1758 | emit_ia32_mov_i(dst_hi, 0, dstk, &prog); | ||
| 1759 | break; | ||
| 1760 | case BPF_ALU64 | BPF_DIV | BPF_K: | ||
| 1761 | case BPF_ALU64 | BPF_DIV | BPF_X: | ||
| 1762 | case BPF_ALU64 | BPF_MOD | BPF_K: | ||
| 1763 | case BPF_ALU64 | BPF_MOD | BPF_X: | ||
| 1764 | goto notyet; | ||
| 1765 | /* dst = dst >> imm */ | ||
| 1766 | /* dst = dst << imm */ | ||
| 1767 | case BPF_ALU | BPF_RSH | BPF_K: | ||
| 1768 | case BPF_ALU | BPF_LSH | BPF_K: | ||
| 1769 | if (unlikely(imm32 > 31)) | ||
| 1770 | return -EINVAL; | ||
| 1771 | /* mov ecx,imm32*/ | ||
| 1772 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32); | ||
| 1773 | emit_ia32_shift_r(BPF_OP(code), dst_lo, IA32_ECX, dstk, | ||
| 1774 | false, &prog); | ||
| 1775 | emit_ia32_mov_i(dst_hi, 0, dstk, &prog); | ||
| 1776 | break; | ||
| 1777 | /* dst = dst << imm */ | ||
| 1778 | case BPF_ALU64 | BPF_LSH | BPF_K: | ||
| 1779 | if (unlikely(imm32 > 63)) | ||
| 1780 | return -EINVAL; | ||
| 1781 | emit_ia32_lsh_i64(dst, imm32, dstk, &prog); | ||
| 1782 | break; | ||
| 1783 | /* dst = dst >> imm */ | ||
| 1784 | case BPF_ALU64 | BPF_RSH | BPF_K: | ||
| 1785 | if (unlikely(imm32 > 63)) | ||
| 1786 | return -EINVAL; | ||
| 1787 | emit_ia32_rsh_i64(dst, imm32, dstk, &prog); | ||
| 1788 | break; | ||
| 1789 | /* dst = dst << src */ | ||
| 1790 | case BPF_ALU64 | BPF_LSH | BPF_X: | ||
| 1791 | emit_ia32_lsh_r64(dst, src, dstk, sstk, &prog); | ||
| 1792 | break; | ||
| 1793 | /* dst = dst >> src */ | ||
| 1794 | case BPF_ALU64 | BPF_RSH | BPF_X: | ||
| 1795 | emit_ia32_rsh_r64(dst, src, dstk, sstk, &prog); | ||
| 1796 | break; | ||
| 1797 | /* dst = dst >> src (signed) */ | ||
| 1798 | case BPF_ALU64 | BPF_ARSH | BPF_X: | ||
| 1799 | emit_ia32_arsh_r64(dst, src, dstk, sstk, &prog); | ||
| 1800 | break; | ||
| 1801 | /* dst = dst >> imm (signed) */ | ||
| 1802 | case BPF_ALU64 | BPF_ARSH | BPF_K: | ||
| 1803 | if (unlikely(imm32 > 63)) | ||
| 1804 | return -EINVAL; | ||
| 1805 | emit_ia32_arsh_i64(dst, imm32, dstk, &prog); | ||
| 1806 | break; | ||
| 1807 | /* dst = ~dst */ | ||
| 1808 | case BPF_ALU | BPF_NEG: | ||
| 1809 | emit_ia32_alu_i(is64, false, BPF_OP(code), | ||
| 1810 | dst_lo, 0, dstk, &prog); | ||
| 1811 | emit_ia32_mov_i(dst_hi, 0, dstk, &prog); | ||
| 1812 | break; | ||
| 1813 | /* dst = ~dst (64 bit) */ | ||
| 1814 | case BPF_ALU64 | BPF_NEG: | ||
| 1815 | emit_ia32_neg64(dst, dstk, &prog); | ||
| 1816 | break; | ||
| 1817 | /* dst = dst * src/imm */ | ||
| 1818 | case BPF_ALU64 | BPF_MUL | BPF_X: | ||
| 1819 | case BPF_ALU64 | BPF_MUL | BPF_K: | ||
| 1820 | switch (BPF_SRC(code)) { | ||
| 1821 | case BPF_X: | ||
| 1822 | emit_ia32_mul_r64(dst, src, dstk, sstk, &prog); | ||
| 1823 | break; | ||
| 1824 | case BPF_K: | ||
| 1825 | emit_ia32_mul_i64(dst, imm32, dstk, &prog); | ||
| 1826 | break; | ||
| 1827 | } | ||
| 1828 | break; | ||
| 1829 | /* dst = htole(dst) */ | ||
| 1830 | case BPF_ALU | BPF_END | BPF_FROM_LE: | ||
| 1831 | emit_ia32_to_le_r64(dst, imm32, dstk, &prog); | ||
| 1832 | break; | ||
| 1833 | /* dst = htobe(dst) */ | ||
| 1834 | case BPF_ALU | BPF_END | BPF_FROM_BE: | ||
| 1835 | emit_ia32_to_be_r64(dst, imm32, dstk, &prog); | ||
| 1836 | break; | ||
| 1837 | /* dst = imm64 */ | ||
| 1838 | case BPF_LD | BPF_IMM | BPF_DW: { | ||
| 1839 | s32 hi, lo = imm32; | ||
| 1840 | |||
| 1841 | hi = insn[1].imm; | ||
| 1842 | emit_ia32_mov_i(dst_lo, lo, dstk, &prog); | ||
| 1843 | emit_ia32_mov_i(dst_hi, hi, dstk, &prog); | ||
| 1844 | insn++; | ||
| 1845 | i++; | ||
| 1846 | break; | ||
| 1847 | } | ||
| 1848 | /* ST: *(u8*)(dst_reg + off) = imm */ | ||
| 1849 | case BPF_ST | BPF_MEM | BPF_H: | ||
| 1850 | case BPF_ST | BPF_MEM | BPF_B: | ||
| 1851 | case BPF_ST | BPF_MEM | BPF_W: | ||
| 1852 | case BPF_ST | BPF_MEM | BPF_DW: | ||
| 1853 | if (dstk) | ||
| 1854 | /* mov eax,dword ptr [ebp+off] */ | ||
| 1855 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 1856 | STACK_VAR(dst_lo)); | ||
| 1857 | else | ||
| 1858 | /* mov eax,dst_lo */ | ||
| 1859 | EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX)); | ||
| 1860 | |||
| 1861 | switch (BPF_SIZE(code)) { | ||
| 1862 | case BPF_B: | ||
| 1863 | EMIT(0xC6, 1); break; | ||
| 1864 | case BPF_H: | ||
| 1865 | EMIT2(0x66, 0xC7); break; | ||
| 1866 | case BPF_W: | ||
| 1867 | case BPF_DW: | ||
| 1868 | EMIT(0xC7, 1); break; | ||
| 1869 | } | ||
| 1870 | |||
| 1871 | if (is_imm8(insn->off)) | ||
| 1872 | EMIT2(add_1reg(0x40, IA32_EAX), insn->off); | ||
| 1873 | else | ||
| 1874 | EMIT1_off32(add_1reg(0x80, IA32_EAX), | ||
| 1875 | insn->off); | ||
| 1876 | EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(code))); | ||
| 1877 | |||
| 1878 | if (BPF_SIZE(code) == BPF_DW) { | ||
| 1879 | u32 hi; | ||
| 1880 | |||
| 1881 | hi = imm32 & (1<<31) ? (u32)~0 : 0; | ||
| 1882 | EMIT2_off32(0xC7, add_1reg(0x80, IA32_EAX), | ||
| 1883 | insn->off + 4); | ||
| 1884 | EMIT(hi, 4); | ||
| 1885 | } | ||
| 1886 | break; | ||
| 1887 | |||
| 1888 | /* STX: *(u8*)(dst_reg + off) = src_reg */ | ||
| 1889 | case BPF_STX | BPF_MEM | BPF_B: | ||
| 1890 | case BPF_STX | BPF_MEM | BPF_H: | ||
| 1891 | case BPF_STX | BPF_MEM | BPF_W: | ||
| 1892 | case BPF_STX | BPF_MEM | BPF_DW: | ||
| 1893 | if (dstk) | ||
| 1894 | /* mov eax,dword ptr [ebp+off] */ | ||
| 1895 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 1896 | STACK_VAR(dst_lo)); | ||
| 1897 | else | ||
| 1898 | /* mov eax,dst_lo */ | ||
| 1899 | EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX)); | ||
| 1900 | |||
| 1901 | if (sstk) | ||
| 1902 | /* mov edx,dword ptr [ebp+off] */ | ||
| 1903 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 1904 | STACK_VAR(src_lo)); | ||
| 1905 | else | ||
| 1906 | /* mov edx,src_lo */ | ||
| 1907 | EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EDX)); | ||
| 1908 | |||
| 1909 | switch (BPF_SIZE(code)) { | ||
| 1910 | case BPF_B: | ||
| 1911 | EMIT(0x88, 1); break; | ||
| 1912 | case BPF_H: | ||
| 1913 | EMIT2(0x66, 0x89); break; | ||
| 1914 | case BPF_W: | ||
| 1915 | case BPF_DW: | ||
| 1916 | EMIT(0x89, 1); break; | ||
| 1917 | } | ||
| 1918 | |||
| 1919 | if (is_imm8(insn->off)) | ||
| 1920 | EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX), | ||
| 1921 | insn->off); | ||
| 1922 | else | ||
| 1923 | EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX), | ||
| 1924 | insn->off); | ||
| 1925 | |||
| 1926 | if (BPF_SIZE(code) == BPF_DW) { | ||
| 1927 | if (sstk) | ||
| 1928 | /* mov edi,dword ptr [ebp+off] */ | ||
| 1929 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, | ||
| 1930 | IA32_EDX), | ||
| 1931 | STACK_VAR(src_hi)); | ||
| 1932 | else | ||
| 1933 | /* mov edi,src_hi */ | ||
| 1934 | EMIT2(0x8B, add_2reg(0xC0, src_hi, | ||
| 1935 | IA32_EDX)); | ||
| 1936 | EMIT1(0x89); | ||
| 1937 | if (is_imm8(insn->off + 4)) { | ||
| 1938 | EMIT2(add_2reg(0x40, IA32_EAX, | ||
| 1939 | IA32_EDX), | ||
| 1940 | insn->off + 4); | ||
| 1941 | } else { | ||
| 1942 | EMIT1(add_2reg(0x80, IA32_EAX, | ||
| 1943 | IA32_EDX)); | ||
| 1944 | EMIT(insn->off + 4, 4); | ||
| 1945 | } | ||
| 1946 | } | ||
| 1947 | break; | ||
| 1948 | |||
| 1949 | /* LDX: dst_reg = *(u8*)(src_reg + off) */ | ||
| 1950 | case BPF_LDX | BPF_MEM | BPF_B: | ||
| 1951 | case BPF_LDX | BPF_MEM | BPF_H: | ||
| 1952 | case BPF_LDX | BPF_MEM | BPF_W: | ||
| 1953 | case BPF_LDX | BPF_MEM | BPF_DW: | ||
| 1954 | if (sstk) | ||
| 1955 | /* mov eax,dword ptr [ebp+off] */ | ||
| 1956 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 1957 | STACK_VAR(src_lo)); | ||
| 1958 | else | ||
| 1959 | /* mov eax,dword ptr [ebp+off] */ | ||
| 1960 | EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EAX)); | ||
| 1961 | |||
| 1962 | switch (BPF_SIZE(code)) { | ||
| 1963 | case BPF_B: | ||
| 1964 | EMIT2(0x0F, 0xB6); break; | ||
| 1965 | case BPF_H: | ||
| 1966 | EMIT2(0x0F, 0xB7); break; | ||
| 1967 | case BPF_W: | ||
| 1968 | case BPF_DW: | ||
| 1969 | EMIT(0x8B, 1); break; | ||
| 1970 | } | ||
| 1971 | |||
| 1972 | if (is_imm8(insn->off)) | ||
| 1973 | EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX), | ||
| 1974 | insn->off); | ||
| 1975 | else | ||
| 1976 | EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX), | ||
| 1977 | insn->off); | ||
| 1978 | |||
| 1979 | if (dstk) | ||
| 1980 | /* mov dword ptr [ebp+off],edx */ | ||
| 1981 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 1982 | STACK_VAR(dst_lo)); | ||
| 1983 | else | ||
| 1984 | /* mov dst_lo,edx */ | ||
| 1985 | EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EDX)); | ||
| 1986 | switch (BPF_SIZE(code)) { | ||
| 1987 | case BPF_B: | ||
| 1988 | case BPF_H: | ||
| 1989 | case BPF_W: | ||
| 1990 | if (dstk) { | ||
| 1991 | EMIT3(0xC7, add_1reg(0x40, IA32_EBP), | ||
| 1992 | STACK_VAR(dst_hi)); | ||
| 1993 | EMIT(0x0, 4); | ||
| 1994 | } else { | ||
| 1995 | EMIT3(0xC7, add_1reg(0xC0, dst_hi), 0); | ||
| 1996 | } | ||
| 1997 | break; | ||
| 1998 | case BPF_DW: | ||
| 1999 | EMIT2_off32(0x8B, | ||
| 2000 | add_2reg(0x80, IA32_EAX, IA32_EDX), | ||
| 2001 | insn->off + 4); | ||
| 2002 | if (dstk) | ||
| 2003 | EMIT3(0x89, | ||
| 2004 | add_2reg(0x40, IA32_EBP, | ||
| 2005 | IA32_EDX), | ||
| 2006 | STACK_VAR(dst_hi)); | ||
| 2007 | else | ||
| 2008 | EMIT2(0x89, | ||
| 2009 | add_2reg(0xC0, dst_hi, IA32_EDX)); | ||
| 2010 | break; | ||
| 2011 | default: | ||
| 2012 | break; | ||
| 2013 | } | ||
| 2014 | break; | ||
| 2015 | /* call */ | ||
| 2016 | case BPF_JMP | BPF_CALL: | ||
| 2017 | { | ||
| 2018 | const u8 *r1 = bpf2ia32[BPF_REG_1]; | ||
| 2019 | const u8 *r2 = bpf2ia32[BPF_REG_2]; | ||
| 2020 | const u8 *r3 = bpf2ia32[BPF_REG_3]; | ||
| 2021 | const u8 *r4 = bpf2ia32[BPF_REG_4]; | ||
| 2022 | const u8 *r5 = bpf2ia32[BPF_REG_5]; | ||
| 2023 | |||
| 2024 | if (insn->src_reg == BPF_PSEUDO_CALL) | ||
| 2025 | goto notyet; | ||
| 2026 | |||
| 2027 | func = (u8 *) __bpf_call_base + imm32; | ||
| 2028 | jmp_offset = func - (image + addrs[i]); | ||
| 2029 | |||
| 2030 | if (!imm32 || !is_simm32(jmp_offset)) { | ||
| 2031 | pr_err("unsupported BPF func %d addr %p image %p\n", | ||
| 2032 | imm32, func, image); | ||
| 2033 | return -EINVAL; | ||
| 2034 | } | ||
| 2035 | |||
| 2036 | /* mov eax,dword ptr [ebp+off] */ | ||
| 2037 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 2038 | STACK_VAR(r1[0])); | ||
| 2039 | /* mov edx,dword ptr [ebp+off] */ | ||
| 2040 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 2041 | STACK_VAR(r1[1])); | ||
| 2042 | |||
| 2043 | emit_push_r64(r5, &prog); | ||
| 2044 | emit_push_r64(r4, &prog); | ||
| 2045 | emit_push_r64(r3, &prog); | ||
| 2046 | emit_push_r64(r2, &prog); | ||
| 2047 | |||
| 2048 | EMIT1_off32(0xE8, jmp_offset + 9); | ||
| 2049 | |||
| 2050 | /* mov dword ptr [ebp+off],eax */ | ||
| 2051 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 2052 | STACK_VAR(r0[0])); | ||
| 2053 | /* mov dword ptr [ebp+off],edx */ | ||
| 2054 | EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 2055 | STACK_VAR(r0[1])); | ||
| 2056 | |||
| 2057 | /* add esp,32 */ | ||
| 2058 | EMIT3(0x83, add_1reg(0xC0, IA32_ESP), 32); | ||
| 2059 | break; | ||
| 2060 | } | ||
| 2061 | case BPF_JMP | BPF_TAIL_CALL: | ||
| 2062 | emit_bpf_tail_call(&prog); | ||
| 2063 | break; | ||
| 2064 | |||
| 2065 | /* cond jump */ | ||
| 2066 | case BPF_JMP | BPF_JEQ | BPF_X: | ||
| 2067 | case BPF_JMP | BPF_JNE | BPF_X: | ||
| 2068 | case BPF_JMP | BPF_JGT | BPF_X: | ||
| 2069 | case BPF_JMP | BPF_JLT | BPF_X: | ||
| 2070 | case BPF_JMP | BPF_JGE | BPF_X: | ||
| 2071 | case BPF_JMP | BPF_JLE | BPF_X: | ||
| 2072 | case BPF_JMP | BPF_JSGT | BPF_X: | ||
| 2073 | case BPF_JMP | BPF_JSLE | BPF_X: | ||
| 2074 | case BPF_JMP | BPF_JSLT | BPF_X: | ||
| 2075 | case BPF_JMP | BPF_JSGE | BPF_X: { | ||
| 2076 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
| 2077 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
| 2078 | u8 sreg_lo = sstk ? IA32_ECX : src_lo; | ||
| 2079 | u8 sreg_hi = sstk ? IA32_EBX : src_hi; | ||
| 2080 | |||
| 2081 | if (dstk) { | ||
| 2082 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 2083 | STACK_VAR(dst_lo)); | ||
| 2084 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 2085 | STACK_VAR(dst_hi)); | ||
| 2086 | } | ||
| 2087 | |||
| 2088 | if (sstk) { | ||
| 2089 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
| 2090 | STACK_VAR(src_lo)); | ||
| 2091 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), | ||
| 2092 | STACK_VAR(src_hi)); | ||
| 2093 | } | ||
| 2094 | |||
| 2095 | /* cmp dreg_hi,sreg_hi */ | ||
| 2096 | EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi)); | ||
| 2097 | EMIT2(IA32_JNE, 2); | ||
| 2098 | /* cmp dreg_lo,sreg_lo */ | ||
| 2099 | EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo)); | ||
| 2100 | goto emit_cond_jmp; | ||
| 2101 | } | ||
| 2102 | case BPF_JMP | BPF_JSET | BPF_X: { | ||
| 2103 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
| 2104 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
| 2105 | u8 sreg_lo = sstk ? IA32_ECX : src_lo; | ||
| 2106 | u8 sreg_hi = sstk ? IA32_EBX : src_hi; | ||
| 2107 | |||
| 2108 | if (dstk) { | ||
| 2109 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 2110 | STACK_VAR(dst_lo)); | ||
| 2111 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 2112 | STACK_VAR(dst_hi)); | ||
| 2113 | } | ||
| 2114 | |||
| 2115 | if (sstk) { | ||
| 2116 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), | ||
| 2117 | STACK_VAR(src_lo)); | ||
| 2118 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), | ||
| 2119 | STACK_VAR(src_hi)); | ||
| 2120 | } | ||
| 2121 | /* and dreg_lo,sreg_lo */ | ||
| 2122 | EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo)); | ||
| 2123 | /* and dreg_hi,sreg_hi */ | ||
| 2124 | EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi)); | ||
| 2125 | /* or dreg_lo,dreg_hi */ | ||
| 2126 | EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
| 2127 | goto emit_cond_jmp; | ||
| 2128 | } | ||
| 2129 | case BPF_JMP | BPF_JSET | BPF_K: { | ||
| 2130 | u32 hi; | ||
| 2131 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
| 2132 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
| 2133 | u8 sreg_lo = IA32_ECX; | ||
| 2134 | u8 sreg_hi = IA32_EBX; | ||
| 2135 | |||
| 2136 | if (dstk) { | ||
| 2137 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 2138 | STACK_VAR(dst_lo)); | ||
| 2139 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 2140 | STACK_VAR(dst_hi)); | ||
| 2141 | } | ||
| 2142 | hi = imm32 & (1<<31) ? (u32)~0 : 0; | ||
| 2143 | |||
| 2144 | /* mov ecx,imm32 */ | ||
| 2145 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32); | ||
| 2146 | /* mov ebx,imm32 */ | ||
| 2147 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi); | ||
| 2148 | |||
| 2149 | /* and dreg_lo,sreg_lo */ | ||
| 2150 | EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo)); | ||
| 2151 | /* and dreg_hi,sreg_hi */ | ||
| 2152 | EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi)); | ||
| 2153 | /* or dreg_lo,dreg_hi */ | ||
| 2154 | EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi)); | ||
| 2155 | goto emit_cond_jmp; | ||
| 2156 | } | ||
| 2157 | case BPF_JMP | BPF_JEQ | BPF_K: | ||
| 2158 | case BPF_JMP | BPF_JNE | BPF_K: | ||
| 2159 | case BPF_JMP | BPF_JGT | BPF_K: | ||
| 2160 | case BPF_JMP | BPF_JLT | BPF_K: | ||
| 2161 | case BPF_JMP | BPF_JGE | BPF_K: | ||
| 2162 | case BPF_JMP | BPF_JLE | BPF_K: | ||
| 2163 | case BPF_JMP | BPF_JSGT | BPF_K: | ||
| 2164 | case BPF_JMP | BPF_JSLE | BPF_K: | ||
| 2165 | case BPF_JMP | BPF_JSLT | BPF_K: | ||
| 2166 | case BPF_JMP | BPF_JSGE | BPF_K: { | ||
| 2167 | u32 hi; | ||
| 2168 | u8 dreg_lo = dstk ? IA32_EAX : dst_lo; | ||
| 2169 | u8 dreg_hi = dstk ? IA32_EDX : dst_hi; | ||
| 2170 | u8 sreg_lo = IA32_ECX; | ||
| 2171 | u8 sreg_hi = IA32_EBX; | ||
| 2172 | |||
| 2173 | if (dstk) { | ||
| 2174 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), | ||
| 2175 | STACK_VAR(dst_lo)); | ||
| 2176 | EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), | ||
| 2177 | STACK_VAR(dst_hi)); | ||
| 2178 | } | ||
| 2179 | |||
| 2180 | hi = imm32 & (1<<31) ? (u32)~0 : 0; | ||
| 2181 | /* mov ecx,imm32 */ | ||
| 2182 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32); | ||
| 2183 | /* mov ebx,imm32 */ | ||
| 2184 | EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi); | ||
| 2185 | |||
| 2186 | /* cmp dreg_hi,sreg_hi */ | ||
| 2187 | EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi)); | ||
| 2188 | EMIT2(IA32_JNE, 2); | ||
| 2189 | /* cmp dreg_lo,sreg_lo */ | ||
| 2190 | EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo)); | ||
| 2191 | |||
| 2192 | emit_cond_jmp: /* Convert BPF opcode to x86 */ | ||
| 2193 | switch (BPF_OP(code)) { | ||
| 2194 | case BPF_JEQ: | ||
| 2195 | jmp_cond = IA32_JE; | ||
| 2196 | break; | ||
| 2197 | case BPF_JSET: | ||
| 2198 | case BPF_JNE: | ||
| 2199 | jmp_cond = IA32_JNE; | ||
| 2200 | break; | ||
| 2201 | case BPF_JGT: | ||
| 2202 | /* GT is unsigned '>', JA in x86 */ | ||
| 2203 | jmp_cond = IA32_JA; | ||
| 2204 | break; | ||
| 2205 | case BPF_JLT: | ||
| 2206 | /* LT is unsigned '<', JB in x86 */ | ||
| 2207 | jmp_cond = IA32_JB; | ||
| 2208 | break; | ||
| 2209 | case BPF_JGE: | ||
| 2210 | /* GE is unsigned '>=', JAE in x86 */ | ||
| 2211 | jmp_cond = IA32_JAE; | ||
| 2212 | break; | ||
| 2213 | case BPF_JLE: | ||
| 2214 | /* LE is unsigned '<=', JBE in x86 */ | ||
| 2215 | jmp_cond = IA32_JBE; | ||
| 2216 | break; | ||
| 2217 | case BPF_JSGT: | ||
| 2218 | /* Signed '>', GT in x86 */ | ||
| 2219 | jmp_cond = IA32_JG; | ||
| 2220 | break; | ||
| 2221 | case BPF_JSLT: | ||
| 2222 | /* Signed '<', LT in x86 */ | ||
| 2223 | jmp_cond = IA32_JL; | ||
| 2224 | break; | ||
| 2225 | case BPF_JSGE: | ||
| 2226 | /* Signed '>=', GE in x86 */ | ||
| 2227 | jmp_cond = IA32_JGE; | ||
| 2228 | break; | ||
| 2229 | case BPF_JSLE: | ||
| 2230 | /* Signed '<=', LE in x86 */ | ||
| 2231 | jmp_cond = IA32_JLE; | ||
| 2232 | break; | ||
| 2233 | default: /* to silence GCC warning */ | ||
| 2234 | return -EFAULT; | ||
| 2235 | } | ||
| 2236 | jmp_offset = addrs[i + insn->off] - addrs[i]; | ||
| 2237 | if (is_imm8(jmp_offset)) { | ||
| 2238 | EMIT2(jmp_cond, jmp_offset); | ||
| 2239 | } else if (is_simm32(jmp_offset)) { | ||
| 2240 | EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset); | ||
| 2241 | } else { | ||
| 2242 | pr_err("cond_jmp gen bug %llx\n", jmp_offset); | ||
| 2243 | return -EFAULT; | ||
| 2244 | } | ||
| 2245 | |||
| 2246 | break; | ||
| 2247 | } | ||
| 2248 | case BPF_JMP | BPF_JA: | ||
| 2249 | if (insn->off == -1) | ||
| 2250 | /* -1 jmp instructions will always jump | ||
| 2251 | * backwards two bytes. Explicitly handling | ||
| 2252 | * this case avoids wasting too many passes | ||
| 2253 | * when there are long sequences of replaced | ||
| 2254 | * dead code. | ||
| 2255 | */ | ||
| 2256 | jmp_offset = -2; | ||
| 2257 | else | ||
| 2258 | jmp_offset = addrs[i + insn->off] - addrs[i]; | ||
| 2259 | |||
| 2260 | if (!jmp_offset) | ||
| 2261 | /* Optimize out nop jumps */ | ||
| 2262 | break; | ||
| 2263 | emit_jmp: | ||
| 2264 | if (is_imm8(jmp_offset)) { | ||
| 2265 | EMIT2(0xEB, jmp_offset); | ||
| 2266 | } else if (is_simm32(jmp_offset)) { | ||
| 2267 | EMIT1_off32(0xE9, jmp_offset); | ||
| 2268 | } else { | ||
| 2269 | pr_err("jmp gen bug %llx\n", jmp_offset); | ||
| 2270 | return -EFAULT; | ||
| 2271 | } | ||
| 2272 | break; | ||
| 2273 | /* STX XADD: lock *(u32 *)(dst + off) += src */ | ||
| 2274 | case BPF_STX | BPF_XADD | BPF_W: | ||
| 2275 | /* STX XADD: lock *(u64 *)(dst + off) += src */ | ||
| 2276 | case BPF_STX | BPF_XADD | BPF_DW: | ||
| 2277 | goto notyet; | ||
| 2278 | case BPF_JMP | BPF_EXIT: | ||
| 2279 | if (seen_exit) { | ||
| 2280 | jmp_offset = ctx->cleanup_addr - addrs[i]; | ||
| 2281 | goto emit_jmp; | ||
| 2282 | } | ||
| 2283 | seen_exit = true; | ||
| 2284 | /* Update cleanup_addr */ | ||
| 2285 | ctx->cleanup_addr = proglen; | ||
| 2286 | emit_epilogue(&prog, bpf_prog->aux->stack_depth); | ||
| 2287 | break; | ||
| 2288 | notyet: | ||
| 2289 | pr_info_once("*** NOT YET: opcode %02x ***\n", code); | ||
| 2290 | return -EFAULT; | ||
| 2291 | default: | ||
| 2292 | /* | ||
| 2293 | * This error will be seen if new instruction was added | ||
| 2294 | * to interpreter, but not to JIT or if there is junk in | ||
| 2295 | * bpf_prog | ||
| 2296 | */ | ||
| 2297 | pr_err("bpf_jit: unknown opcode %02x\n", code); | ||
| 2298 | return -EINVAL; | ||
| 2299 | } | ||
| 2300 | |||
| 2301 | ilen = prog - temp; | ||
| 2302 | if (ilen > BPF_MAX_INSN_SIZE) { | ||
| 2303 | pr_err("bpf_jit: fatal insn size error\n"); | ||
| 2304 | return -EFAULT; | ||
| 2305 | } | ||
| 2306 | |||
| 2307 | if (image) { | ||
| 2308 | if (unlikely(proglen + ilen > oldproglen)) { | ||
| 2309 | pr_err("bpf_jit: fatal error\n"); | ||
| 2310 | return -EFAULT; | ||
| 2311 | } | ||
| 2312 | memcpy(image + proglen, temp, ilen); | ||
| 2313 | } | ||
| 2314 | proglen += ilen; | ||
| 2315 | addrs[i] = proglen; | ||
| 2316 | prog = temp; | ||
| 2317 | } | ||
| 2318 | return proglen; | ||
| 2319 | } | ||
| 2320 | |||
| 2321 | struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) | ||
| 2322 | { | ||
| 2323 | struct bpf_binary_header *header = NULL; | ||
| 2324 | struct bpf_prog *tmp, *orig_prog = prog; | ||
| 2325 | int proglen, oldproglen = 0; | ||
| 2326 | struct jit_context ctx = {}; | ||
| 2327 | bool tmp_blinded = false; | ||
| 2328 | u8 *image = NULL; | ||
| 2329 | int *addrs; | ||
| 2330 | int pass; | ||
| 2331 | int i; | ||
| 2332 | |||
| 2333 | if (!prog->jit_requested) | ||
| 2334 | return orig_prog; | ||
| 2335 | |||
| 2336 | tmp = bpf_jit_blind_constants(prog); | ||
| 2337 | /* | ||
| 2338 | * If blinding was requested and we failed during blinding, | ||
| 2339 | * we must fall back to the interpreter. | ||
| 2340 | */ | ||
| 2341 | if (IS_ERR(tmp)) | ||
| 2342 | return orig_prog; | ||
| 2343 | if (tmp != prog) { | ||
| 2344 | tmp_blinded = true; | ||
| 2345 | prog = tmp; | ||
| 2346 | } | ||
| 2347 | |||
| 2348 | addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL); | ||
| 2349 | if (!addrs) { | ||
| 2350 | prog = orig_prog; | ||
| 2351 | goto out; | ||
| 2352 | } | ||
| 2353 | |||
| 2354 | /* | ||
| 2355 | * Before first pass, make a rough estimation of addrs[] | ||
| 2356 | * each BPF instruction is translated to less than 64 bytes | ||
| 2357 | */ | ||
| 2358 | for (proglen = 0, i = 0; i < prog->len; i++) { | ||
| 2359 | proglen += 64; | ||
| 2360 | addrs[i] = proglen; | ||
| 2361 | } | ||
| 2362 | ctx.cleanup_addr = proglen; | ||
| 2363 | |||
| 2364 | /* | ||
| 2365 | * JITed image shrinks with every pass and the loop iterates | ||
| 2366 | * until the image stops shrinking. Very large BPF programs | ||
| 2367 | * may converge on the last pass. In such case do one more | ||
| 2368 | * pass to emit the final image. | ||
| 2369 | */ | ||
| 2370 | for (pass = 0; pass < 20 || image; pass++) { | ||
| 2371 | proglen = do_jit(prog, addrs, image, oldproglen, &ctx); | ||
| 2372 | if (proglen <= 0) { | ||
| 2373 | out_image: | ||
| 2374 | image = NULL; | ||
| 2375 | if (header) | ||
| 2376 | bpf_jit_binary_free(header); | ||
| 2377 | prog = orig_prog; | ||
| 2378 | goto out_addrs; | ||
| 2379 | } | ||
| 2380 | if (image) { | ||
| 2381 | if (proglen != oldproglen) { | ||
| 2382 | pr_err("bpf_jit: proglen=%d != oldproglen=%d\n", | ||
| 2383 | proglen, oldproglen); | ||
| 2384 | goto out_image; | ||
| 2385 | } | ||
| 2386 | break; | ||
| 2387 | } | ||
| 2388 | if (proglen == oldproglen) { | ||
| 2389 | header = bpf_jit_binary_alloc(proglen, &image, | ||
| 2390 | 1, jit_fill_hole); | ||
| 2391 | if (!header) { | ||
| 2392 | prog = orig_prog; | ||
| 2393 | goto out_addrs; | ||
| 2394 | } | ||
| 2395 | } | ||
| 2396 | oldproglen = proglen; | ||
| 2397 | cond_resched(); | ||
| 2398 | } | ||
| 2399 | |||
| 2400 | if (bpf_jit_enable > 1) | ||
| 2401 | bpf_jit_dump(prog->len, proglen, pass + 1, image); | ||
| 2402 | |||
| 2403 | if (image) { | ||
| 2404 | bpf_jit_binary_lock_ro(header); | ||
| 2405 | prog->bpf_func = (void *)image; | ||
| 2406 | prog->jited = 1; | ||
| 2407 | prog->jited_len = proglen; | ||
| 2408 | } else { | ||
| 2409 | prog = orig_prog; | ||
| 2410 | } | ||
| 2411 | |||
| 2412 | out_addrs: | ||
| 2413 | kfree(addrs); | ||
| 2414 | out: | ||
| 2415 | if (tmp_blinded) | ||
| 2416 | bpf_jit_prog_release_other(prog, prog == orig_prog ? | ||
| 2417 | tmp : orig_prog); | ||
| 2418 | return prog; | ||
| 2419 | } | ||
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c index 7e298148ca26..cb87fccb9f6a 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2017 Netronome Systems, Inc. | 2 | * Copyright (C) 2017-2018 Netronome Systems, Inc. |
| 3 | * | 3 | * |
| 4 | * This software is dual licensed under the GNU General License Version 2, | 4 | * This software is dual licensed under the GNU General License Version 2, |
| 5 | * June 1991 as shown in the file COPYING in the top-level directory of this | 5 | * June 1991 as shown in the file COPYING in the top-level directory of this |
| @@ -102,6 +102,15 @@ nfp_bpf_cmsg_map_req_alloc(struct nfp_app_bpf *bpf, unsigned int n) | |||
| 102 | return nfp_bpf_cmsg_alloc(bpf, size); | 102 | return nfp_bpf_cmsg_alloc(bpf, size); |
| 103 | } | 103 | } |
| 104 | 104 | ||
| 105 | static u8 nfp_bpf_cmsg_get_type(struct sk_buff *skb) | ||
| 106 | { | ||
| 107 | struct cmsg_hdr *hdr; | ||
| 108 | |||
| 109 | hdr = (struct cmsg_hdr *)skb->data; | ||
| 110 | |||
| 111 | return hdr->type; | ||
| 112 | } | ||
| 113 | |||
| 105 | static unsigned int nfp_bpf_cmsg_get_tag(struct sk_buff *skb) | 114 | static unsigned int nfp_bpf_cmsg_get_tag(struct sk_buff *skb) |
| 106 | { | 115 | { |
| 107 | struct cmsg_hdr *hdr; | 116 | struct cmsg_hdr *hdr; |
| @@ -431,6 +440,11 @@ void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb) | |||
| 431 | goto err_free; | 440 | goto err_free; |
| 432 | } | 441 | } |
| 433 | 442 | ||
| 443 | if (nfp_bpf_cmsg_get_type(skb) == CMSG_TYPE_BPF_EVENT) { | ||
| 444 | nfp_bpf_event_output(bpf, skb); | ||
| 445 | return; | ||
| 446 | } | ||
| 447 | |||
| 434 | nfp_ctrl_lock(bpf->app->ctrl); | 448 | nfp_ctrl_lock(bpf->app->ctrl); |
| 435 | 449 | ||
| 436 | tag = nfp_bpf_cmsg_get_tag(skb); | 450 | tag = nfp_bpf_cmsg_get_tag(skb); |
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/fw.h b/drivers/net/ethernet/netronome/nfp/bpf/fw.h index 39639ac28b01..3dbc21653ce5 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/fw.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/fw.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2017 Netronome Systems, Inc. | 2 | * Copyright (C) 2017-2018 Netronome Systems, Inc. |
| 3 | * | 3 | * |
| 4 | * This software is dual licensed under the GNU General License Version 2, | 4 | * This software is dual licensed under the GNU General License Version 2, |
| 5 | * June 1991 as shown in the file COPYING in the top-level directory of this | 5 | * June 1991 as shown in the file COPYING in the top-level directory of this |
| @@ -37,6 +37,14 @@ | |||
| 37 | #include <linux/bitops.h> | 37 | #include <linux/bitops.h> |
| 38 | #include <linux/types.h> | 38 | #include <linux/types.h> |
| 39 | 39 | ||
| 40 | /* Kernel's enum bpf_reg_type is not uABI so people may change it breaking | ||
| 41 | * our FW ABI. In that case we will do translation in the driver. | ||
| 42 | */ | ||
| 43 | #define NFP_BPF_SCALAR_VALUE 1 | ||
| 44 | #define NFP_BPF_MAP_VALUE 4 | ||
| 45 | #define NFP_BPF_STACK 6 | ||
| 46 | #define NFP_BPF_PACKET_DATA 8 | ||
| 47 | |||
| 40 | enum bpf_cap_tlv_type { | 48 | enum bpf_cap_tlv_type { |
| 41 | NFP_BPF_CAP_TYPE_FUNC = 1, | 49 | NFP_BPF_CAP_TYPE_FUNC = 1, |
| 42 | NFP_BPF_CAP_TYPE_ADJUST_HEAD = 2, | 50 | NFP_BPF_CAP_TYPE_ADJUST_HEAD = 2, |
| @@ -81,6 +89,7 @@ enum nfp_bpf_cmsg_type { | |||
| 81 | CMSG_TYPE_MAP_DELETE = 5, | 89 | CMSG_TYPE_MAP_DELETE = 5, |
| 82 | CMSG_TYPE_MAP_GETNEXT = 6, | 90 | CMSG_TYPE_MAP_GETNEXT = 6, |
| 83 | CMSG_TYPE_MAP_GETFIRST = 7, | 91 | CMSG_TYPE_MAP_GETFIRST = 7, |
| 92 | CMSG_TYPE_BPF_EVENT = 8, | ||
| 84 | __CMSG_TYPE_MAP_MAX, | 93 | __CMSG_TYPE_MAP_MAX, |
| 85 | }; | 94 | }; |
| 86 | 95 | ||
| @@ -155,4 +164,13 @@ struct cmsg_reply_map_op { | |||
| 155 | __be32 resv; | 164 | __be32 resv; |
| 156 | struct cmsg_key_value_pair elem[0]; | 165 | struct cmsg_key_value_pair elem[0]; |
| 157 | }; | 166 | }; |
| 167 | |||
| 168 | struct cmsg_bpf_event { | ||
| 169 | struct cmsg_hdr hdr; | ||
| 170 | __be32 cpu_id; | ||
| 171 | __be64 map_ptr; | ||
| 172 | __be32 data_size; | ||
| 173 | __be32 pkt_size; | ||
| 174 | u8 data[0]; | ||
| 175 | }; | ||
| 158 | #endif | 176 | #endif |
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 65f0791cae0c..326a2085d650 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2016-2017 Netronome Systems, Inc. | 2 | * Copyright (C) 2016-2018 Netronome Systems, Inc. |
| 3 | * | 3 | * |
| 4 | * This software is dual licensed under the GNU General License Version 2, | 4 | * This software is dual licensed under the GNU General License Version 2, |
| 5 | * June 1991 as shown in the file COPYING in the top-level directory of this | 5 | * June 1991 as shown in the file COPYING in the top-level directory of this |
| @@ -1395,15 +1395,9 @@ static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |||
| 1395 | static int | 1395 | static int |
| 1396 | map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | 1396 | map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
| 1397 | { | 1397 | { |
| 1398 | struct bpf_offloaded_map *offmap; | ||
| 1399 | struct nfp_bpf_map *nfp_map; | ||
| 1400 | bool load_lm_ptr; | 1398 | bool load_lm_ptr; |
| 1401 | u32 ret_tgt; | 1399 | u32 ret_tgt; |
| 1402 | s64 lm_off; | 1400 | s64 lm_off; |
| 1403 | swreg tid; | ||
| 1404 | |||
| 1405 | offmap = (struct bpf_offloaded_map *)meta->arg1.map_ptr; | ||
| 1406 | nfp_map = offmap->dev_priv; | ||
| 1407 | 1401 | ||
| 1408 | /* We only have to reload LM0 if the key is not at start of stack */ | 1402 | /* We only have to reload LM0 if the key is not at start of stack */ |
| 1409 | lm_off = nfp_prog->stack_depth; | 1403 | lm_off = nfp_prog->stack_depth; |
| @@ -1416,17 +1410,12 @@ map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |||
| 1416 | if (meta->func_id == BPF_FUNC_map_update_elem) | 1410 | if (meta->func_id == BPF_FUNC_map_update_elem) |
| 1417 | emit_csr_wr(nfp_prog, reg_b(3 * 2), NFP_CSR_ACT_LM_ADDR2); | 1411 | emit_csr_wr(nfp_prog, reg_b(3 * 2), NFP_CSR_ACT_LM_ADDR2); |
| 1418 | 1412 | ||
| 1419 | /* Load map ID into a register, it should actually fit as an immediate | ||
| 1420 | * but in case it doesn't deal with it here, not in the delay slots. | ||
| 1421 | */ | ||
| 1422 | tid = ur_load_imm_any(nfp_prog, nfp_map->tid, imm_a(nfp_prog)); | ||
| 1423 | |||
| 1424 | emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id, | 1413 | emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id, |
| 1425 | 2, RELO_BR_HELPER); | 1414 | 2, RELO_BR_HELPER); |
| 1426 | ret_tgt = nfp_prog_current_offset(nfp_prog) + 2; | 1415 | ret_tgt = nfp_prog_current_offset(nfp_prog) + 2; |
| 1427 | 1416 | ||
| 1428 | /* Load map ID into A0 */ | 1417 | /* Load map ID into A0 */ |
| 1429 | wrp_mov(nfp_prog, reg_a(0), tid); | 1418 | wrp_mov(nfp_prog, reg_a(0), reg_a(2)); |
| 1430 | 1419 | ||
| 1431 | /* Load the return address into B0 */ | 1420 | /* Load the return address into B0 */ |
| 1432 | wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL); | 1421 | wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL); |
| @@ -1456,6 +1445,31 @@ nfp_get_prandom_u32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |||
| 1456 | return 0; | 1445 | return 0; |
| 1457 | } | 1446 | } |
| 1458 | 1447 | ||
| 1448 | static int | ||
| 1449 | nfp_perf_event_output(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | ||
| 1450 | { | ||
| 1451 | swreg ptr_type; | ||
| 1452 | u32 ret_tgt; | ||
| 1453 | |||
| 1454 | ptr_type = ur_load_imm_any(nfp_prog, meta->arg1.type, imm_a(nfp_prog)); | ||
| 1455 | |||
| 1456 | ret_tgt = nfp_prog_current_offset(nfp_prog) + 3; | ||
| 1457 | |||
| 1458 | emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id, | ||
| 1459 | 2, RELO_BR_HELPER); | ||
| 1460 | |||
| 1461 | /* Load ptr type into A1 */ | ||
| 1462 | wrp_mov(nfp_prog, reg_a(1), ptr_type); | ||
| 1463 | |||
| 1464 | /* Load the return address into B0 */ | ||
| 1465 | wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL); | ||
| 1466 | |||
| 1467 | if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt)) | ||
| 1468 | return -EINVAL; | ||
| 1469 | |||
| 1470 | return 0; | ||
| 1471 | } | ||
| 1472 | |||
| 1459 | /* --- Callbacks --- */ | 1473 | /* --- Callbacks --- */ |
| 1460 | static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | 1474 | static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
| 1461 | { | 1475 | { |
| @@ -2411,6 +2425,8 @@ static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |||
| 2411 | return map_call_stack_common(nfp_prog, meta); | 2425 | return map_call_stack_common(nfp_prog, meta); |
| 2412 | case BPF_FUNC_get_prandom_u32: | 2426 | case BPF_FUNC_get_prandom_u32: |
| 2413 | return nfp_get_prandom_u32(nfp_prog, meta); | 2427 | return nfp_get_prandom_u32(nfp_prog, meta); |
| 2428 | case BPF_FUNC_perf_event_output: | ||
| 2429 | return nfp_perf_event_output(nfp_prog, meta); | ||
| 2414 | default: | 2430 | default: |
| 2415 | WARN_ONCE(1, "verifier allowed unsupported function\n"); | 2431 | WARN_ONCE(1, "verifier allowed unsupported function\n"); |
| 2416 | return -EOPNOTSUPP; | 2432 | return -EOPNOTSUPP; |
| @@ -3227,6 +3243,33 @@ static int nfp_bpf_optimize(struct nfp_prog *nfp_prog) | |||
| 3227 | return 0; | 3243 | return 0; |
| 3228 | } | 3244 | } |
| 3229 | 3245 | ||
| 3246 | static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog) | ||
| 3247 | { | ||
| 3248 | struct nfp_insn_meta *meta1, *meta2; | ||
| 3249 | struct nfp_bpf_map *nfp_map; | ||
| 3250 | struct bpf_map *map; | ||
| 3251 | |||
| 3252 | nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { | ||
| 3253 | if (meta1->skip || meta2->skip) | ||
| 3254 | continue; | ||
| 3255 | |||
| 3256 | if (meta1->insn.code != (BPF_LD | BPF_IMM | BPF_DW) || | ||
| 3257 | meta1->insn.src_reg != BPF_PSEUDO_MAP_FD) | ||
| 3258 | continue; | ||
| 3259 | |||
| 3260 | map = (void *)(unsigned long)((u32)meta1->insn.imm | | ||
| 3261 | (u64)meta2->insn.imm << 32); | ||
| 3262 | if (bpf_map_offload_neutral(map)) | ||
| 3263 | continue; | ||
| 3264 | nfp_map = map_to_offmap(map)->dev_priv; | ||
| 3265 | |||
| 3266 | meta1->insn.imm = nfp_map->tid; | ||
| 3267 | meta2->insn.imm = 0; | ||
| 3268 | } | ||
| 3269 | |||
| 3270 | return 0; | ||
| 3271 | } | ||
| 3272 | |||
| 3230 | static int nfp_bpf_ustore_calc(u64 *prog, unsigned int len) | 3273 | static int nfp_bpf_ustore_calc(u64 *prog, unsigned int len) |
| 3231 | { | 3274 | { |
| 3232 | __le64 *ustore = (__force __le64 *)prog; | 3275 | __le64 *ustore = (__force __le64 *)prog; |
| @@ -3263,6 +3306,10 @@ int nfp_bpf_jit(struct nfp_prog *nfp_prog) | |||
| 3263 | { | 3306 | { |
| 3264 | int ret; | 3307 | int ret; |
| 3265 | 3308 | ||
| 3309 | ret = nfp_bpf_replace_map_ptrs(nfp_prog); | ||
| 3310 | if (ret) | ||
| 3311 | return ret; | ||
| 3312 | |||
| 3266 | ret = nfp_bpf_optimize(nfp_prog); | 3313 | ret = nfp_bpf_optimize(nfp_prog); |
| 3267 | if (ret) | 3314 | if (ret) |
| 3268 | return ret; | 3315 | return ret; |
| @@ -3353,6 +3400,9 @@ void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv) | |||
| 3353 | case BPF_FUNC_map_delete_elem: | 3400 | case BPF_FUNC_map_delete_elem: |
| 3354 | val = nfp_prog->bpf->helpers.map_delete; | 3401 | val = nfp_prog->bpf->helpers.map_delete; |
| 3355 | break; | 3402 | break; |
| 3403 | case BPF_FUNC_perf_event_output: | ||
| 3404 | val = nfp_prog->bpf->helpers.perf_event_output; | ||
| 3405 | break; | ||
| 3356 | default: | 3406 | default: |
| 3357 | pr_err("relocation of unknown helper %d\n", | 3407 | pr_err("relocation of unknown helper %d\n", |
| 3358 | val); | 3408 | val); |
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 1dc424685f4e..d72f9e7f42da 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2017 Netronome Systems, Inc. | 2 | * Copyright (C) 2017-2018 Netronome Systems, Inc. |
| 3 | * | 3 | * |
| 4 | * This software is dual licensed under the GNU General License Version 2, | 4 | * This software is dual licensed under the GNU General License Version 2, |
| 5 | * June 1991 as shown in the file COPYING in the top-level directory of this | 5 | * June 1991 as shown in the file COPYING in the top-level directory of this |
| @@ -43,6 +43,14 @@ | |||
| 43 | #include "fw.h" | 43 | #include "fw.h" |
| 44 | #include "main.h" | 44 | #include "main.h" |
| 45 | 45 | ||
| 46 | const struct rhashtable_params nfp_bpf_maps_neutral_params = { | ||
| 47 | .nelem_hint = 4, | ||
| 48 | .key_len = FIELD_SIZEOF(struct nfp_bpf_neutral_map, ptr), | ||
| 49 | .key_offset = offsetof(struct nfp_bpf_neutral_map, ptr), | ||
| 50 | .head_offset = offsetof(struct nfp_bpf_neutral_map, l), | ||
| 51 | .automatic_shrinking = true, | ||
| 52 | }; | ||
| 53 | |||
| 46 | static bool nfp_net_ebpf_capable(struct nfp_net *nn) | 54 | static bool nfp_net_ebpf_capable(struct nfp_net *nn) |
| 47 | { | 55 | { |
| 48 | #ifdef __LITTLE_ENDIAN | 56 | #ifdef __LITTLE_ENDIAN |
| @@ -290,6 +298,9 @@ nfp_bpf_parse_cap_func(struct nfp_app_bpf *bpf, void __iomem *value, u32 length) | |||
| 290 | case BPF_FUNC_map_delete_elem: | 298 | case BPF_FUNC_map_delete_elem: |
| 291 | bpf->helpers.map_delete = readl(&cap->func_addr); | 299 | bpf->helpers.map_delete = readl(&cap->func_addr); |
| 292 | break; | 300 | break; |
| 301 | case BPF_FUNC_perf_event_output: | ||
| 302 | bpf->helpers.perf_event_output = readl(&cap->func_addr); | ||
| 303 | break; | ||
| 293 | } | 304 | } |
| 294 | 305 | ||
| 295 | return 0; | 306 | return 0; |
| @@ -401,17 +412,28 @@ static int nfp_bpf_init(struct nfp_app *app) | |||
| 401 | init_waitqueue_head(&bpf->cmsg_wq); | 412 | init_waitqueue_head(&bpf->cmsg_wq); |
| 402 | INIT_LIST_HEAD(&bpf->map_list); | 413 | INIT_LIST_HEAD(&bpf->map_list); |
| 403 | 414 | ||
| 404 | err = nfp_bpf_parse_capabilities(app); | 415 | err = rhashtable_init(&bpf->maps_neutral, &nfp_bpf_maps_neutral_params); |
| 405 | if (err) | 416 | if (err) |
| 406 | goto err_free_bpf; | 417 | goto err_free_bpf; |
| 407 | 418 | ||
| 419 | err = nfp_bpf_parse_capabilities(app); | ||
| 420 | if (err) | ||
| 421 | goto err_free_neutral_maps; | ||
| 422 | |||
| 408 | return 0; | 423 | return 0; |
| 409 | 424 | ||
| 425 | err_free_neutral_maps: | ||
| 426 | rhashtable_destroy(&bpf->maps_neutral); | ||
| 410 | err_free_bpf: | 427 | err_free_bpf: |
| 411 | kfree(bpf); | 428 | kfree(bpf); |
| 412 | return err; | 429 | return err; |
| 413 | } | 430 | } |
| 414 | 431 | ||
| 432 | static void nfp_check_rhashtable_empty(void *ptr, void *arg) | ||
| 433 | { | ||
| 434 | WARN_ON_ONCE(1); | ||
| 435 | } | ||
| 436 | |||
| 415 | static void nfp_bpf_clean(struct nfp_app *app) | 437 | static void nfp_bpf_clean(struct nfp_app *app) |
| 416 | { | 438 | { |
| 417 | struct nfp_app_bpf *bpf = app->priv; | 439 | struct nfp_app_bpf *bpf = app->priv; |
| @@ -419,6 +441,8 @@ static void nfp_bpf_clean(struct nfp_app *app) | |||
| 419 | WARN_ON(!skb_queue_empty(&bpf->cmsg_replies)); | 441 | WARN_ON(!skb_queue_empty(&bpf->cmsg_replies)); |
| 420 | WARN_ON(!list_empty(&bpf->map_list)); | 442 | WARN_ON(!list_empty(&bpf->map_list)); |
| 421 | WARN_ON(bpf->maps_in_use || bpf->map_elems_in_use); | 443 | WARN_ON(bpf->maps_in_use || bpf->map_elems_in_use); |
| 444 | rhashtable_free_and_destroy(&bpf->maps_neutral, | ||
| 445 | nfp_check_rhashtable_empty, NULL); | ||
| 422 | kfree(bpf); | 446 | kfree(bpf); |
| 423 | } | 447 | } |
| 424 | 448 | ||
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 68b5d326483d..82682378d57f 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2016-2017 Netronome Systems, Inc. | 2 | * Copyright (C) 2016-2018 Netronome Systems, Inc. |
| 3 | * | 3 | * |
| 4 | * This software is dual licensed under the GNU General License Version 2, | 4 | * This software is dual licensed under the GNU General License Version 2, |
| 5 | * June 1991 as shown in the file COPYING in the top-level directory of this | 5 | * June 1991 as shown in the file COPYING in the top-level directory of this |
| @@ -39,6 +39,7 @@ | |||
| 39 | #include <linux/bpf_verifier.h> | 39 | #include <linux/bpf_verifier.h> |
| 40 | #include <linux/kernel.h> | 40 | #include <linux/kernel.h> |
| 41 | #include <linux/list.h> | 41 | #include <linux/list.h> |
| 42 | #include <linux/rhashtable.h> | ||
| 42 | #include <linux/skbuff.h> | 43 | #include <linux/skbuff.h> |
| 43 | #include <linux/types.h> | 44 | #include <linux/types.h> |
| 44 | #include <linux/wait.h> | 45 | #include <linux/wait.h> |
| @@ -114,6 +115,8 @@ enum pkt_vec { | |||
| 114 | * @maps_in_use: number of currently offloaded maps | 115 | * @maps_in_use: number of currently offloaded maps |
| 115 | * @map_elems_in_use: number of elements allocated to offloaded maps | 116 | * @map_elems_in_use: number of elements allocated to offloaded maps |
| 116 | * | 117 | * |
| 118 | * @maps_neutral: hash table of offload-neutral maps (on pointer) | ||
| 119 | * | ||
| 117 | * @adjust_head: adjust head capability | 120 | * @adjust_head: adjust head capability |
| 118 | * @adjust_head.flags: extra flags for adjust head | 121 | * @adjust_head.flags: extra flags for adjust head |
| 119 | * @adjust_head.off_min: minimal packet offset within buffer required | 122 | * @adjust_head.off_min: minimal packet offset within buffer required |
| @@ -133,6 +136,7 @@ enum pkt_vec { | |||
| 133 | * @helpers.map_lookup: map lookup helper address | 136 | * @helpers.map_lookup: map lookup helper address |
| 134 | * @helpers.map_update: map update helper address | 137 | * @helpers.map_update: map update helper address |
| 135 | * @helpers.map_delete: map delete helper address | 138 | * @helpers.map_delete: map delete helper address |
| 139 | * @helpers.perf_event_output: output perf event to a ring buffer | ||
| 136 | * | 140 | * |
| 137 | * @pseudo_random: FW initialized the pseudo-random machinery (CSRs) | 141 | * @pseudo_random: FW initialized the pseudo-random machinery (CSRs) |
| 138 | */ | 142 | */ |
| @@ -150,6 +154,8 @@ struct nfp_app_bpf { | |||
| 150 | unsigned int maps_in_use; | 154 | unsigned int maps_in_use; |
| 151 | unsigned int map_elems_in_use; | 155 | unsigned int map_elems_in_use; |
| 152 | 156 | ||
| 157 | struct rhashtable maps_neutral; | ||
| 158 | |||
| 153 | struct nfp_bpf_cap_adjust_head { | 159 | struct nfp_bpf_cap_adjust_head { |
| 154 | u32 flags; | 160 | u32 flags; |
| 155 | int off_min; | 161 | int off_min; |
| @@ -171,6 +177,7 @@ struct nfp_app_bpf { | |||
| 171 | u32 map_lookup; | 177 | u32 map_lookup; |
| 172 | u32 map_update; | 178 | u32 map_update; |
| 173 | u32 map_delete; | 179 | u32 map_delete; |
| 180 | u32 perf_event_output; | ||
| 174 | } helpers; | 181 | } helpers; |
| 175 | 182 | ||
| 176 | bool pseudo_random; | 183 | bool pseudo_random; |
| @@ -199,6 +206,14 @@ struct nfp_bpf_map { | |||
| 199 | enum nfp_bpf_map_use use_map[]; | 206 | enum nfp_bpf_map_use use_map[]; |
| 200 | }; | 207 | }; |
| 201 | 208 | ||
| 209 | struct nfp_bpf_neutral_map { | ||
| 210 | struct rhash_head l; | ||
| 211 | struct bpf_map *ptr; | ||
| 212 | u32 count; | ||
| 213 | }; | ||
| 214 | |||
| 215 | extern const struct rhashtable_params nfp_bpf_maps_neutral_params; | ||
| 216 | |||
| 202 | struct nfp_prog; | 217 | struct nfp_prog; |
| 203 | struct nfp_insn_meta; | 218 | struct nfp_insn_meta; |
| 204 | typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *); | 219 | typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *); |
| @@ -367,6 +382,8 @@ static inline bool is_mbpf_xadd(const struct nfp_insn_meta *meta) | |||
| 367 | * @error: error code if something went wrong | 382 | * @error: error code if something went wrong |
| 368 | * @stack_depth: max stack depth from the verifier | 383 | * @stack_depth: max stack depth from the verifier |
| 369 | * @adjust_head_location: if program has single adjust head call - the insn no. | 384 | * @adjust_head_location: if program has single adjust head call - the insn no. |
| 385 | * @map_records_cnt: the number of map pointers recorded for this prog | ||
| 386 | * @map_records: the map record pointers from bpf->maps_neutral | ||
| 370 | * @insns: list of BPF instruction wrappers (struct nfp_insn_meta) | 387 | * @insns: list of BPF instruction wrappers (struct nfp_insn_meta) |
| 371 | */ | 388 | */ |
| 372 | struct nfp_prog { | 389 | struct nfp_prog { |
| @@ -390,6 +407,9 @@ struct nfp_prog { | |||
| 390 | unsigned int stack_depth; | 407 | unsigned int stack_depth; |
| 391 | unsigned int adjust_head_location; | 408 | unsigned int adjust_head_location; |
| 392 | 409 | ||
| 410 | unsigned int map_records_cnt; | ||
| 411 | struct nfp_bpf_neutral_map **map_records; | ||
| 412 | |||
| 393 | struct list_head insns; | 413 | struct list_head insns; |
| 394 | }; | 414 | }; |
| 395 | 415 | ||
| @@ -440,5 +460,7 @@ int nfp_bpf_ctrl_lookup_entry(struct bpf_offloaded_map *offmap, | |||
| 440 | int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap, | 460 | int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap, |
| 441 | void *key, void *next_key); | 461 | void *key, void *next_key); |
| 442 | 462 | ||
| 463 | int nfp_bpf_event_output(struct nfp_app_bpf *bpf, struct sk_buff *skb); | ||
| 464 | |||
| 443 | void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb); | 465 | void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb); |
| 444 | #endif | 466 | #endif |
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 42d98792bd25..4db0ac1e42a8 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2016-2017 Netronome Systems, Inc. | 2 | * Copyright (C) 2016-2018 Netronome Systems, Inc. |
| 3 | * | 3 | * |
| 4 | * This software is dual licensed under the GNU General License Version 2, | 4 | * This software is dual licensed under the GNU General License Version 2, |
| 5 | * June 1991 as shown in the file COPYING in the top-level directory of this | 5 | * June 1991 as shown in the file COPYING in the top-level directory of this |
| @@ -57,6 +57,126 @@ | |||
| 57 | #include "../nfp_net.h" | 57 | #include "../nfp_net.h" |
| 58 | 58 | ||
| 59 | static int | 59 | static int |
| 60 | nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog, | ||
| 61 | struct bpf_map *map) | ||
| 62 | { | ||
| 63 | struct nfp_bpf_neutral_map *record; | ||
| 64 | int err; | ||
| 65 | |||
| 66 | /* Map record paths are entered via ndo, update side is protected. */ | ||
| 67 | ASSERT_RTNL(); | ||
| 68 | |||
| 69 | /* Reuse path - other offloaded program is already tracking this map. */ | ||
| 70 | record = rhashtable_lookup_fast(&bpf->maps_neutral, &map, | ||
| 71 | nfp_bpf_maps_neutral_params); | ||
| 72 | if (record) { | ||
| 73 | nfp_prog->map_records[nfp_prog->map_records_cnt++] = record; | ||
| 74 | record->count++; | ||
| 75 | return 0; | ||
| 76 | } | ||
| 77 | |||
| 78 | /* Grab a single ref to the map for our record. The prog destroy ndo | ||
| 79 | * happens after free_used_maps(). | ||
| 80 | */ | ||
| 81 | map = bpf_map_inc(map, false); | ||
| 82 | if (IS_ERR(map)) | ||
| 83 | return PTR_ERR(map); | ||
| 84 | |||
| 85 | record = kmalloc(sizeof(*record), GFP_KERNEL); | ||
| 86 | if (!record) { | ||
| 87 | err = -ENOMEM; | ||
| 88 | goto err_map_put; | ||
| 89 | } | ||
| 90 | |||
| 91 | record->ptr = map; | ||
| 92 | record->count = 1; | ||
| 93 | |||
| 94 | err = rhashtable_insert_fast(&bpf->maps_neutral, &record->l, | ||
| 95 | nfp_bpf_maps_neutral_params); | ||
| 96 | if (err) | ||
| 97 | goto err_free_rec; | ||
| 98 | |||
| 99 | nfp_prog->map_records[nfp_prog->map_records_cnt++] = record; | ||
| 100 | |||
| 101 | return 0; | ||
| 102 | |||
| 103 | err_free_rec: | ||
| 104 | kfree(record); | ||
| 105 | err_map_put: | ||
| 106 | bpf_map_put(map); | ||
| 107 | return err; | ||
| 108 | } | ||
| 109 | |||
| 110 | static void | ||
| 111 | nfp_map_ptrs_forget(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog) | ||
| 112 | { | ||
| 113 | bool freed = false; | ||
| 114 | int i; | ||
| 115 | |||
| 116 | ASSERT_RTNL(); | ||
| 117 | |||
| 118 | for (i = 0; i < nfp_prog->map_records_cnt; i++) { | ||
| 119 | if (--nfp_prog->map_records[i]->count) { | ||
| 120 | nfp_prog->map_records[i] = NULL; | ||
| 121 | continue; | ||
| 122 | } | ||
| 123 | |||
| 124 | WARN_ON(rhashtable_remove_fast(&bpf->maps_neutral, | ||
| 125 | &nfp_prog->map_records[i]->l, | ||
| 126 | nfp_bpf_maps_neutral_params)); | ||
| 127 | freed = true; | ||
| 128 | } | ||
| 129 | |||
| 130 | if (freed) { | ||
| 131 | synchronize_rcu(); | ||
| 132 | |||
| 133 | for (i = 0; i < nfp_prog->map_records_cnt; i++) | ||
| 134 | if (nfp_prog->map_records[i]) { | ||
| 135 | bpf_map_put(nfp_prog->map_records[i]->ptr); | ||
| 136 | kfree(nfp_prog->map_records[i]); | ||
| 137 | } | ||
| 138 | } | ||
| 139 | |||
| 140 | kfree(nfp_prog->map_records); | ||
| 141 | nfp_prog->map_records = NULL; | ||
| 142 | nfp_prog->map_records_cnt = 0; | ||
| 143 | } | ||
| 144 | |||
| 145 | static int | ||
| 146 | nfp_map_ptrs_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog, | ||
| 147 | struct bpf_prog *prog) | ||
| 148 | { | ||
| 149 | int i, cnt, err; | ||
| 150 | |||
| 151 | /* Quickly count the maps we will have to remember */ | ||
| 152 | cnt = 0; | ||
| 153 | for (i = 0; i < prog->aux->used_map_cnt; i++) | ||
| 154 | if (bpf_map_offload_neutral(prog->aux->used_maps[i])) | ||
| 155 | cnt++; | ||
| 156 | if (!cnt) | ||
| 157 | return 0; | ||
| 158 | |||
| 159 | nfp_prog->map_records = kmalloc_array(cnt, | ||
| 160 | sizeof(nfp_prog->map_records[0]), | ||
| 161 | GFP_KERNEL); | ||
| 162 | if (!nfp_prog->map_records) | ||
| 163 | return -ENOMEM; | ||
| 164 | |||
| 165 | for (i = 0; i < prog->aux->used_map_cnt; i++) | ||
| 166 | if (bpf_map_offload_neutral(prog->aux->used_maps[i])) { | ||
| 167 | err = nfp_map_ptr_record(bpf, nfp_prog, | ||
| 168 | prog->aux->used_maps[i]); | ||
| 169 | if (err) { | ||
| 170 | nfp_map_ptrs_forget(bpf, nfp_prog); | ||
| 171 | return err; | ||
| 172 | } | ||
| 173 | } | ||
| 174 | WARN_ON(cnt != nfp_prog->map_records_cnt); | ||
| 175 | |||
| 176 | return 0; | ||
| 177 | } | ||
| 178 | |||
| 179 | static int | ||
| 60 | nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, | 180 | nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, |
| 61 | unsigned int cnt) | 181 | unsigned int cnt) |
| 62 | { | 182 | { |
| @@ -151,7 +271,7 @@ static int nfp_bpf_translate(struct nfp_net *nn, struct bpf_prog *prog) | |||
| 151 | prog->aux->offload->jited_len = nfp_prog->prog_len * sizeof(u64); | 271 | prog->aux->offload->jited_len = nfp_prog->prog_len * sizeof(u64); |
| 152 | prog->aux->offload->jited_image = nfp_prog->prog; | 272 | prog->aux->offload->jited_image = nfp_prog->prog; |
| 153 | 273 | ||
| 154 | return 0; | 274 | return nfp_map_ptrs_record(nfp_prog->bpf, nfp_prog, prog); |
| 155 | } | 275 | } |
| 156 | 276 | ||
| 157 | static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog) | 277 | static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog) |
| @@ -159,6 +279,7 @@ static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog) | |||
| 159 | struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; | 279 | struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; |
| 160 | 280 | ||
| 161 | kvfree(nfp_prog->prog); | 281 | kvfree(nfp_prog->prog); |
| 282 | nfp_map_ptrs_forget(nfp_prog->bpf, nfp_prog); | ||
| 162 | nfp_prog_free(nfp_prog); | 283 | nfp_prog_free(nfp_prog); |
| 163 | 284 | ||
| 164 | return 0; | 285 | return 0; |
| @@ -320,6 +441,53 @@ int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf) | |||
| 320 | } | 441 | } |
| 321 | } | 442 | } |
| 322 | 443 | ||
| 444 | static unsigned long | ||
| 445 | nfp_bpf_perf_event_copy(void *dst, const void *src, | ||
| 446 | unsigned long off, unsigned long len) | ||
| 447 | { | ||
| 448 | memcpy(dst, src + off, len); | ||
| 449 | return 0; | ||
| 450 | } | ||
| 451 | |||
| 452 | int nfp_bpf_event_output(struct nfp_app_bpf *bpf, struct sk_buff *skb) | ||
| 453 | { | ||
| 454 | struct cmsg_bpf_event *cbe = (void *)skb->data; | ||
| 455 | u32 pkt_size, data_size; | ||
| 456 | struct bpf_map *map; | ||
| 457 | |||
| 458 | if (skb->len < sizeof(struct cmsg_bpf_event)) | ||
| 459 | goto err_drop; | ||
| 460 | |||
| 461 | pkt_size = be32_to_cpu(cbe->pkt_size); | ||
| 462 | data_size = be32_to_cpu(cbe->data_size); | ||
| 463 | map = (void *)(unsigned long)be64_to_cpu(cbe->map_ptr); | ||
| 464 | |||
| 465 | if (skb->len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size) | ||
| 466 | goto err_drop; | ||
| 467 | if (cbe->hdr.ver != CMSG_MAP_ABI_VERSION) | ||
| 468 | goto err_drop; | ||
| 469 | |||
| 470 | rcu_read_lock(); | ||
| 471 | if (!rhashtable_lookup_fast(&bpf->maps_neutral, &map, | ||
| 472 | nfp_bpf_maps_neutral_params)) { | ||
| 473 | rcu_read_unlock(); | ||
| 474 | pr_warn("perf event: dest map pointer %px not recognized, dropping event\n", | ||
| 475 | map); | ||
| 476 | goto err_drop; | ||
| 477 | } | ||
| 478 | |||
| 479 | bpf_event_output(map, be32_to_cpu(cbe->cpu_id), | ||
| 480 | &cbe->data[round_up(pkt_size, 4)], data_size, | ||
| 481 | cbe->data, pkt_size, nfp_bpf_perf_event_copy); | ||
| 482 | rcu_read_unlock(); | ||
| 483 | |||
| 484 | dev_consume_skb_any(skb); | ||
| 485 | return 0; | ||
| 486 | err_drop: | ||
| 487 | dev_kfree_skb_any(skb); | ||
| 488 | return -EINVAL; | ||
| 489 | } | ||
| 490 | |||
| 323 | static int | 491 | static int |
| 324 | nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog, | 492 | nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog, |
| 325 | struct netlink_ext_ack *extack) | 493 | struct netlink_ext_ack *extack) |
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index 06ad53ce4ad9..e163f3cfa47d 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2016-2017 Netronome Systems, Inc. | 2 | * Copyright (C) 2016-2018 Netronome Systems, Inc. |
| 3 | * | 3 | * |
| 4 | * This software is dual licensed under the GNU General License Version 2, | 4 | * This software is dual licensed under the GNU General License Version 2, |
| 5 | * June 1991 as shown in the file COPYING in the top-level directory of this | 5 | * June 1991 as shown in the file COPYING in the top-level directory of this |
| @@ -36,6 +36,8 @@ | |||
| 36 | #include <linux/kernel.h> | 36 | #include <linux/kernel.h> |
| 37 | #include <linux/pkt_cls.h> | 37 | #include <linux/pkt_cls.h> |
| 38 | 38 | ||
| 39 | #include "../nfp_app.h" | ||
| 40 | #include "../nfp_main.h" | ||
| 39 | #include "fw.h" | 41 | #include "fw.h" |
| 40 | #include "main.h" | 42 | #include "main.h" |
| 41 | 43 | ||
| @@ -149,15 +151,6 @@ nfp_bpf_map_call_ok(const char *fname, struct bpf_verifier_env *env, | |||
| 149 | return false; | 151 | return false; |
| 150 | } | 152 | } |
| 151 | 153 | ||
| 152 | /* Rest of the checks is only if we re-parse the same insn */ | ||
| 153 | if (!meta->func_id) | ||
| 154 | return true; | ||
| 155 | |||
| 156 | if (meta->arg1.map_ptr != reg1->map_ptr) { | ||
| 157 | pr_vlog(env, "%s: called for different map\n", fname); | ||
| 158 | return false; | ||
| 159 | } | ||
| 160 | |||
| 161 | return true; | 154 | return true; |
| 162 | } | 155 | } |
| 163 | 156 | ||
| @@ -216,6 +209,71 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env, | |||
| 216 | pr_vlog(env, "bpf_get_prandom_u32(): FW doesn't support random number generation\n"); | 209 | pr_vlog(env, "bpf_get_prandom_u32(): FW doesn't support random number generation\n"); |
| 217 | return -EOPNOTSUPP; | 210 | return -EOPNOTSUPP; |
| 218 | 211 | ||
| 212 | case BPF_FUNC_perf_event_output: | ||
| 213 | BUILD_BUG_ON(NFP_BPF_SCALAR_VALUE != SCALAR_VALUE || | ||
| 214 | NFP_BPF_MAP_VALUE != PTR_TO_MAP_VALUE || | ||
| 215 | NFP_BPF_STACK != PTR_TO_STACK || | ||
| 216 | NFP_BPF_PACKET_DATA != PTR_TO_PACKET); | ||
| 217 | |||
| 218 | if (!bpf->helpers.perf_event_output) { | ||
| 219 | pr_vlog(env, "event_output: not supported by FW\n"); | ||
| 220 | return -EOPNOTSUPP; | ||
| 221 | } | ||
| 222 | |||
| 223 | /* Force current CPU to make sure we can report the event | ||
| 224 | * wherever we get the control message from FW. | ||
| 225 | */ | ||
| 226 | if (reg3->var_off.mask & BPF_F_INDEX_MASK || | ||
| 227 | (reg3->var_off.value & BPF_F_INDEX_MASK) != | ||
| 228 | BPF_F_CURRENT_CPU) { | ||
| 229 | char tn_buf[48]; | ||
| 230 | |||
| 231 | tnum_strn(tn_buf, sizeof(tn_buf), reg3->var_off); | ||
| 232 | pr_vlog(env, "event_output: must use BPF_F_CURRENT_CPU, var_off: %s\n", | ||
| 233 | tn_buf); | ||
| 234 | return -EOPNOTSUPP; | ||
| 235 | } | ||
| 236 | |||
| 237 | /* Save space in meta, we don't care about arguments other | ||
| 238 | * than 4th meta, shove it into arg1. | ||
| 239 | */ | ||
| 240 | reg1 = cur_regs(env) + BPF_REG_4; | ||
| 241 | |||
| 242 | if (reg1->type != SCALAR_VALUE /* NULL ptr */ && | ||
| 243 | reg1->type != PTR_TO_STACK && | ||
| 244 | reg1->type != PTR_TO_MAP_VALUE && | ||
| 245 | reg1->type != PTR_TO_PACKET) { | ||
| 246 | pr_vlog(env, "event_output: unsupported ptr type: %d\n", | ||
| 247 | reg1->type); | ||
| 248 | return -EOPNOTSUPP; | ||
| 249 | } | ||
| 250 | |||
| 251 | if (reg1->type == PTR_TO_STACK && | ||
| 252 | !nfp_bpf_stack_arg_ok("event_output", env, reg1, NULL)) | ||
| 253 | return -EOPNOTSUPP; | ||
| 254 | |||
| 255 | /* Warn user that on offload NFP may return success even if map | ||
| 256 | * is not going to accept the event, since the event output is | ||
| 257 | * fully async and device won't know the state of the map. | ||
| 258 | * There is also FW limitation on the event length. | ||
| 259 | * | ||
| 260 | * Lost events will not show up on the perf ring, driver | ||
| 261 | * won't see them at all. Events may also get reordered. | ||
| 262 | */ | ||
| 263 | dev_warn_once(&nfp_prog->bpf->app->pf->pdev->dev, | ||
| 264 | "bpf: note: return codes and behavior of bpf_event_output() helper differs for offloaded programs!\n"); | ||
| 265 | pr_vlog(env, "warning: return codes and behavior of event_output helper differ for offload!\n"); | ||
| 266 | |||
| 267 | if (!meta->func_id) | ||
| 268 | break; | ||
| 269 | |||
| 270 | if (reg1->type != meta->arg1.type) { | ||
| 271 | pr_vlog(env, "event_output: ptr type changed: %d %d\n", | ||
| 272 | meta->arg1.type, reg1->type); | ||
| 273 | return -EINVAL; | ||
| 274 | } | ||
| 275 | break; | ||
| 276 | |||
| 219 | default: | 277 | default: |
| 220 | pr_vlog(env, "unsupported function id: %d\n", func_id); | 278 | pr_vlog(env, "unsupported function id: %d\n", func_id); |
| 221 | return -EOPNOTSUPP; | 279 | return -EOPNOTSUPP; |
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c index 6aedef0ad433..0e0253c7e17b 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2017 Netronome Systems, Inc. | 2 | * Copyright (C) 2017-2018 Netronome Systems, Inc. |
| 3 | * | 3 | * |
| 4 | * This software is dual licensed under the GNU General License Version 2, | 4 | * This software is dual licensed under the GNU General License Version 2, |
| 5 | * June 1991 as shown in the file COPYING in the top-level directory of this | 5 | * June 1991 as shown in the file COPYING in the top-level directory of this |
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 38ebbc61ed99..321969da67b7 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h | |||
| @@ -110,6 +110,11 @@ static inline struct bpf_offloaded_map *map_to_offmap(struct bpf_map *map) | |||
| 110 | return container_of(map, struct bpf_offloaded_map, map); | 110 | return container_of(map, struct bpf_offloaded_map, map); |
| 111 | } | 111 | } |
| 112 | 112 | ||
| 113 | static inline bool bpf_map_offload_neutral(const struct bpf_map *map) | ||
| 114 | { | ||
| 115 | return map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY; | ||
| 116 | } | ||
| 117 | |||
| 113 | static inline bool bpf_map_support_seq_show(const struct bpf_map *map) | 118 | static inline bool bpf_map_support_seq_show(const struct bpf_map *map) |
| 114 | { | 119 | { |
| 115 | return map->ops->map_seq_show_elem && map->ops->map_check_btf; | 120 | return map->ops->map_seq_show_elem && map->ops->map_check_btf; |
| @@ -235,6 +240,8 @@ struct bpf_verifier_ops { | |||
| 235 | struct bpf_insn_access_aux *info); | 240 | struct bpf_insn_access_aux *info); |
| 236 | int (*gen_prologue)(struct bpf_insn *insn, bool direct_write, | 241 | int (*gen_prologue)(struct bpf_insn *insn, bool direct_write, |
| 237 | const struct bpf_prog *prog); | 242 | const struct bpf_prog *prog); |
| 243 | int (*gen_ld_abs)(const struct bpf_insn *orig, | ||
| 244 | struct bpf_insn *insn_buf); | ||
| 238 | u32 (*convert_ctx_access)(enum bpf_access_type type, | 245 | u32 (*convert_ctx_access)(enum bpf_access_type type, |
| 239 | const struct bpf_insn *src, | 246 | const struct bpf_insn *src, |
| 240 | struct bpf_insn *dst, | 247 | struct bpf_insn *dst, |
| @@ -676,6 +683,31 @@ static inline int sock_map_prog(struct bpf_map *map, | |||
| 676 | } | 683 | } |
| 677 | #endif | 684 | #endif |
| 678 | 685 | ||
| 686 | #if defined(CONFIG_XDP_SOCKETS) | ||
| 687 | struct xdp_sock; | ||
| 688 | struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key); | ||
| 689 | int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp, | ||
| 690 | struct xdp_sock *xs); | ||
| 691 | void __xsk_map_flush(struct bpf_map *map); | ||
| 692 | #else | ||
| 693 | struct xdp_sock; | ||
| 694 | static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, | ||
| 695 | u32 key) | ||
| 696 | { | ||
| 697 | return NULL; | ||
| 698 | } | ||
| 699 | |||
| 700 | static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp, | ||
| 701 | struct xdp_sock *xs) | ||
| 702 | { | ||
| 703 | return -EOPNOTSUPP; | ||
| 704 | } | ||
| 705 | |||
| 706 | static inline void __xsk_map_flush(struct bpf_map *map) | ||
| 707 | { | ||
| 708 | } | ||
| 709 | #endif | ||
| 710 | |||
| 679 | /* verifier prototypes for helper functions called from eBPF programs */ | 711 | /* verifier prototypes for helper functions called from eBPF programs */ |
| 680 | extern const struct bpf_func_proto bpf_map_lookup_elem_proto; | 712 | extern const struct bpf_func_proto bpf_map_lookup_elem_proto; |
| 681 | extern const struct bpf_func_proto bpf_map_update_elem_proto; | 713 | extern const struct bpf_func_proto bpf_map_update_elem_proto; |
| @@ -689,9 +721,8 @@ extern const struct bpf_func_proto bpf_ktime_get_ns_proto; | |||
| 689 | extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto; | 721 | extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto; |
| 690 | extern const struct bpf_func_proto bpf_get_current_uid_gid_proto; | 722 | extern const struct bpf_func_proto bpf_get_current_uid_gid_proto; |
| 691 | extern const struct bpf_func_proto bpf_get_current_comm_proto; | 723 | extern const struct bpf_func_proto bpf_get_current_comm_proto; |
| 692 | extern const struct bpf_func_proto bpf_skb_vlan_push_proto; | ||
| 693 | extern const struct bpf_func_proto bpf_skb_vlan_pop_proto; | ||
| 694 | extern const struct bpf_func_proto bpf_get_stackid_proto; | 724 | extern const struct bpf_func_proto bpf_get_stackid_proto; |
| 725 | extern const struct bpf_func_proto bpf_get_stack_proto; | ||
| 695 | extern const struct bpf_func_proto bpf_sock_map_update_proto; | 726 | extern const struct bpf_func_proto bpf_sock_map_update_proto; |
| 696 | 727 | ||
| 697 | /* Shared helpers among cBPF and eBPF. */ | 728 | /* Shared helpers among cBPF and eBPF. */ |
diff --git a/include/linux/bpf_trace.h b/include/linux/bpf_trace.h index e6fe98ae3794..ddf896abcfb6 100644 --- a/include/linux/bpf_trace.h +++ b/include/linux/bpf_trace.h | |||
| @@ -2,7 +2,6 @@ | |||
| 2 | #ifndef __LINUX_BPF_TRACE_H__ | 2 | #ifndef __LINUX_BPF_TRACE_H__ |
| 3 | #define __LINUX_BPF_TRACE_H__ | 3 | #define __LINUX_BPF_TRACE_H__ |
| 4 | 4 | ||
| 5 | #include <trace/events/bpf.h> | ||
| 6 | #include <trace/events/xdp.h> | 5 | #include <trace/events/xdp.h> |
| 7 | 6 | ||
| 8 | #endif /* __LINUX_BPF_TRACE_H__ */ | 7 | #endif /* __LINUX_BPF_TRACE_H__ */ |
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 2b28fcf6f6ae..d7df1b323082 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h | |||
| @@ -49,4 +49,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) | |||
| 49 | BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) | 49 | BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) |
| 50 | #endif | 50 | #endif |
| 51 | BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) | 51 | BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) |
| 52 | #if defined(CONFIG_XDP_SOCKETS) | ||
| 53 | BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops) | ||
| 54 | #endif | ||
| 52 | #endif | 55 | #endif |
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7e61c395fddf..8f70dc181e23 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h | |||
| @@ -173,6 +173,11 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) | |||
| 173 | 173 | ||
| 174 | #define BPF_MAX_SUBPROGS 256 | 174 | #define BPF_MAX_SUBPROGS 256 |
| 175 | 175 | ||
| 176 | struct bpf_subprog_info { | ||
| 177 | u32 start; /* insn idx of function entry point */ | ||
| 178 | u16 stack_depth; /* max. stack depth used by this function */ | ||
| 179 | }; | ||
| 180 | |||
| 176 | /* single container for all structs | 181 | /* single container for all structs |
| 177 | * one verifier_env per bpf_check() call | 182 | * one verifier_env per bpf_check() call |
| 178 | */ | 183 | */ |
| @@ -191,9 +196,7 @@ struct bpf_verifier_env { | |||
| 191 | bool seen_direct_write; | 196 | bool seen_direct_write; |
| 192 | struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ | 197 | struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ |
| 193 | struct bpf_verifier_log log; | 198 | struct bpf_verifier_log log; |
| 194 | u32 subprog_starts[BPF_MAX_SUBPROGS]; | 199 | struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1]; |
| 195 | /* computes the stack depth of each bpf function */ | ||
| 196 | u16 subprog_stack_depth[BPF_MAX_SUBPROGS + 1]; | ||
| 197 | u32 subprog_cnt; | 200 | u32 subprog_cnt; |
| 198 | }; | 201 | }; |
| 199 | 202 | ||
diff --git a/include/linux/filter.h b/include/linux/filter.h index 4da8b2308174..da7e16523128 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h | |||
| @@ -47,7 +47,9 @@ struct xdp_buff; | |||
| 47 | /* Additional register mappings for converted user programs. */ | 47 | /* Additional register mappings for converted user programs. */ |
| 48 | #define BPF_REG_A BPF_REG_0 | 48 | #define BPF_REG_A BPF_REG_0 |
| 49 | #define BPF_REG_X BPF_REG_7 | 49 | #define BPF_REG_X BPF_REG_7 |
| 50 | #define BPF_REG_TMP BPF_REG_8 | 50 | #define BPF_REG_TMP BPF_REG_2 /* scratch reg */ |
| 51 | #define BPF_REG_D BPF_REG_8 /* data, callee-saved */ | ||
| 52 | #define BPF_REG_H BPF_REG_9 /* hlen, callee-saved */ | ||
| 51 | 53 | ||
| 52 | /* Kernel hidden auxiliary/helper register for hardening step. | 54 | /* Kernel hidden auxiliary/helper register for hardening step. |
| 53 | * Only used by eBPF JITs. It's nothing more than a temporary | 55 | * Only used by eBPF JITs. It's nothing more than a temporary |
| @@ -468,7 +470,8 @@ struct bpf_prog { | |||
| 468 | dst_needed:1, /* Do we need dst entry? */ | 470 | dst_needed:1, /* Do we need dst entry? */ |
| 469 | blinded:1, /* Was blinded */ | 471 | blinded:1, /* Was blinded */ |
| 470 | is_func:1, /* program is a bpf function */ | 472 | is_func:1, /* program is a bpf function */ |
| 471 | kprobe_override:1; /* Do we override a kprobe? */ | 473 | kprobe_override:1, /* Do we override a kprobe? */ |
| 474 | has_callchain_buf:1; /* callchain buffer allocated? */ | ||
| 472 | enum bpf_prog_type type; /* Type of BPF program */ | 475 | enum bpf_prog_type type; /* Type of BPF program */ |
| 473 | enum bpf_attach_type expected_attach_type; /* For some prog types */ | 476 | enum bpf_attach_type expected_attach_type; /* For some prog types */ |
| 474 | u32 len; /* Number of filter blocks */ | 477 | u32 len; /* Number of filter blocks */ |
| @@ -759,7 +762,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, | |||
| 759 | * This does not appear to be a real limitation for existing software. | 762 | * This does not appear to be a real limitation for existing software. |
| 760 | */ | 763 | */ |
| 761 | int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, | 764 | int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, |
| 762 | struct bpf_prog *prog); | 765 | struct xdp_buff *xdp, struct bpf_prog *prog); |
| 763 | int xdp_do_redirect(struct net_device *dev, | 766 | int xdp_do_redirect(struct net_device *dev, |
| 764 | struct xdp_buff *xdp, | 767 | struct xdp_buff *xdp, |
| 765 | struct bpf_prog *prog); | 768 | struct bpf_prog *prog); |
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 46dcb5f7522f..03ed492c4e14 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h | |||
| @@ -2510,6 +2510,7 @@ void dev_disable_lro(struct net_device *dev); | |||
| 2510 | int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); | 2510 | int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); |
| 2511 | int dev_queue_xmit(struct sk_buff *skb); | 2511 | int dev_queue_xmit(struct sk_buff *skb); |
| 2512 | int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv); | 2512 | int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv); |
| 2513 | int dev_direct_xmit(struct sk_buff *skb, u16 queue_id); | ||
| 2513 | int register_netdevice(struct net_device *dev); | 2514 | int register_netdevice(struct net_device *dev); |
| 2514 | void unregister_netdevice_queue(struct net_device *dev, struct list_head *head); | 2515 | void unregister_netdevice_queue(struct net_device *dev, struct list_head *head); |
| 2515 | void unregister_netdevice_many(struct list_head *head); | 2516 | void unregister_netdevice_many(struct list_head *head); |
diff --git a/include/linux/socket.h b/include/linux/socket.h index ea50f4a65816..7ed4713d5337 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h | |||
| @@ -207,8 +207,9 @@ struct ucred { | |||
| 207 | * PF_SMC protocol family that | 207 | * PF_SMC protocol family that |
| 208 | * reuses AF_INET address family | 208 | * reuses AF_INET address family |
| 209 | */ | 209 | */ |
| 210 | #define AF_XDP 44 /* XDP sockets */ | ||
| 210 | 211 | ||
| 211 | #define AF_MAX 44 /* For now.. */ | 212 | #define AF_MAX 45 /* For now.. */ |
| 212 | 213 | ||
| 213 | /* Protocol families, same as address families. */ | 214 | /* Protocol families, same as address families. */ |
| 214 | #define PF_UNSPEC AF_UNSPEC | 215 | #define PF_UNSPEC AF_UNSPEC |
| @@ -257,6 +258,7 @@ struct ucred { | |||
| 257 | #define PF_KCM AF_KCM | 258 | #define PF_KCM AF_KCM |
| 258 | #define PF_QIPCRTR AF_QIPCRTR | 259 | #define PF_QIPCRTR AF_QIPCRTR |
| 259 | #define PF_SMC AF_SMC | 260 | #define PF_SMC AF_SMC |
| 261 | #define PF_XDP AF_XDP | ||
| 260 | #define PF_MAX AF_MAX | 262 | #define PF_MAX AF_MAX |
| 261 | 263 | ||
| 262 | /* Maximum queue length specifiable by listen. */ | 264 | /* Maximum queue length specifiable by listen. */ |
| @@ -338,6 +340,7 @@ struct ucred { | |||
| 338 | #define SOL_NFC 280 | 340 | #define SOL_NFC 280 |
| 339 | #define SOL_KCM 281 | 341 | #define SOL_KCM 281 |
| 340 | #define SOL_TLS 282 | 342 | #define SOL_TLS 282 |
| 343 | #define SOL_XDP 283 | ||
| 341 | 344 | ||
| 342 | /* IPX options */ | 345 | /* IPX options */ |
| 343 | #define IPX_TYPE 1 | 346 | #define IPX_TYPE 1 |
diff --git a/include/linux/tnum.h b/include/linux/tnum.h index 0d2d3da46139..c7dc2b5902c0 100644 --- a/include/linux/tnum.h +++ b/include/linux/tnum.h | |||
| @@ -23,8 +23,10 @@ struct tnum tnum_range(u64 min, u64 max); | |||
| 23 | /* Arithmetic and logical ops */ | 23 | /* Arithmetic and logical ops */ |
| 24 | /* Shift a tnum left (by a fixed shift) */ | 24 | /* Shift a tnum left (by a fixed shift) */ |
| 25 | struct tnum tnum_lshift(struct tnum a, u8 shift); | 25 | struct tnum tnum_lshift(struct tnum a, u8 shift); |
| 26 | /* Shift a tnum right (by a fixed shift) */ | 26 | /* Shift (rsh) a tnum right (by a fixed shift) */ |
| 27 | struct tnum tnum_rshift(struct tnum a, u8 shift); | 27 | struct tnum tnum_rshift(struct tnum a, u8 shift); |
| 28 | /* Shift (arsh) a tnum right (by a fixed min_shift) */ | ||
| 29 | struct tnum tnum_arshift(struct tnum a, u8 min_shift); | ||
| 28 | /* Add two tnums, return @a + @b */ | 30 | /* Add two tnums, return @a + @b */ |
| 29 | struct tnum tnum_add(struct tnum a, struct tnum b); | 31 | struct tnum tnum_add(struct tnum a, struct tnum b); |
| 30 | /* Subtract two tnums, return @a - @b */ | 32 | /* Subtract two tnums, return @a - @b */ |
diff --git a/include/net/xdp.h b/include/net/xdp.h index 137ad5f9f40f..0b689cf561c7 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h | |||
| @@ -104,6 +104,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp) | |||
| 104 | } | 104 | } |
| 105 | 105 | ||
| 106 | void xdp_return_frame(struct xdp_frame *xdpf); | 106 | void xdp_return_frame(struct xdp_frame *xdpf); |
| 107 | void xdp_return_buff(struct xdp_buff *xdp); | ||
| 107 | 108 | ||
| 108 | int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, | 109 | int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, |
| 109 | struct net_device *dev, u32 queue_index); | 110 | struct net_device *dev, u32 queue_index); |
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h new file mode 100644 index 000000000000..185f4928fbda --- /dev/null +++ b/include/net/xdp_sock.h | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | /* SPDX-License-Identifier: GPL-2.0 | ||
| 2 | * AF_XDP internal functions | ||
| 3 | * Copyright(c) 2018 Intel Corporation. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify it | ||
| 6 | * under the terms and conditions of the GNU General Public License, | ||
| 7 | * version 2, as published by the Free Software Foundation. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 12 | * more details. | ||
| 13 | */ | ||
| 14 | |||
| 15 | #ifndef _LINUX_XDP_SOCK_H | ||
| 16 | #define _LINUX_XDP_SOCK_H | ||
| 17 | |||
| 18 | #include <linux/mutex.h> | ||
| 19 | #include <net/sock.h> | ||
| 20 | |||
| 21 | struct net_device; | ||
| 22 | struct xsk_queue; | ||
| 23 | struct xdp_umem; | ||
| 24 | |||
| 25 | struct xdp_sock { | ||
| 26 | /* struct sock must be the first member of struct xdp_sock */ | ||
| 27 | struct sock sk; | ||
| 28 | struct xsk_queue *rx; | ||
| 29 | struct net_device *dev; | ||
| 30 | struct xdp_umem *umem; | ||
| 31 | struct list_head flush_node; | ||
| 32 | u16 queue_id; | ||
| 33 | struct xsk_queue *tx ____cacheline_aligned_in_smp; | ||
| 34 | /* Protects multiple processes in the control path */ | ||
| 35 | struct mutex mutex; | ||
| 36 | u64 rx_dropped; | ||
| 37 | }; | ||
| 38 | |||
| 39 | struct xdp_buff; | ||
| 40 | #ifdef CONFIG_XDP_SOCKETS | ||
| 41 | int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); | ||
| 42 | int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); | ||
| 43 | void xsk_flush(struct xdp_sock *xs); | ||
| 44 | bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs); | ||
| 45 | #else | ||
| 46 | static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) | ||
| 47 | { | ||
| 48 | return -ENOTSUPP; | ||
| 49 | } | ||
| 50 | |||
| 51 | static inline int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) | ||
| 52 | { | ||
| 53 | return -ENOTSUPP; | ||
| 54 | } | ||
| 55 | |||
| 56 | static inline void xsk_flush(struct xdp_sock *xs) | ||
| 57 | { | ||
| 58 | } | ||
| 59 | |||
| 60 | static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs) | ||
| 61 | { | ||
| 62 | return false; | ||
| 63 | } | ||
| 64 | #endif /* CONFIG_XDP_SOCKETS */ | ||
| 65 | |||
| 66 | #endif /* _LINUX_XDP_SOCK_H */ | ||
diff --git a/include/trace/events/bpf.h b/include/trace/events/bpf.h deleted file mode 100644 index 150185647e6b..000000000000 --- a/include/trace/events/bpf.h +++ /dev/null | |||
| @@ -1,355 +0,0 @@ | |||
| 1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
| 2 | #undef TRACE_SYSTEM | ||
| 3 | #define TRACE_SYSTEM bpf | ||
| 4 | |||
| 5 | #if !defined(_TRACE_BPF_H) || defined(TRACE_HEADER_MULTI_READ) | ||
| 6 | #define _TRACE_BPF_H | ||
| 7 | |||
| 8 | /* These are only used within the BPF_SYSCALL code */ | ||
| 9 | #ifdef CONFIG_BPF_SYSCALL | ||
| 10 | |||
| 11 | #include <linux/filter.h> | ||
| 12 | #include <linux/bpf.h> | ||
| 13 | #include <linux/fs.h> | ||
| 14 | #include <linux/tracepoint.h> | ||
| 15 | |||
| 16 | #define __PROG_TYPE_MAP(FN) \ | ||
| 17 | FN(SOCKET_FILTER) \ | ||
| 18 | FN(KPROBE) \ | ||
| 19 | FN(SCHED_CLS) \ | ||
| 20 | FN(SCHED_ACT) \ | ||
| 21 | FN(TRACEPOINT) \ | ||
| 22 | FN(XDP) \ | ||
| 23 | FN(PERF_EVENT) \ | ||
| 24 | FN(CGROUP_SKB) \ | ||
| 25 | FN(CGROUP_SOCK) \ | ||
| 26 | FN(LWT_IN) \ | ||
| 27 | FN(LWT_OUT) \ | ||
| 28 | FN(LWT_XMIT) | ||
| 29 | |||
| 30 | #define __MAP_TYPE_MAP(FN) \ | ||
| 31 | FN(HASH) \ | ||
| 32 | FN(ARRAY) \ | ||
| 33 | FN(PROG_ARRAY) \ | ||
| 34 | FN(PERF_EVENT_ARRAY) \ | ||
| 35 | FN(PERCPU_HASH) \ | ||
| 36 | FN(PERCPU_ARRAY) \ | ||
| 37 | FN(STACK_TRACE) \ | ||
| 38 | FN(CGROUP_ARRAY) \ | ||
| 39 | FN(LRU_HASH) \ | ||
| 40 | FN(LRU_PERCPU_HASH) \ | ||
| 41 | FN(LPM_TRIE) | ||
| 42 | |||
| 43 | #define __PROG_TYPE_TP_FN(x) \ | ||
| 44 | TRACE_DEFINE_ENUM(BPF_PROG_TYPE_##x); | ||
| 45 | #define __PROG_TYPE_SYM_FN(x) \ | ||
| 46 | { BPF_PROG_TYPE_##x, #x }, | ||
| 47 | #define __PROG_TYPE_SYM_TAB \ | ||
| 48 | __PROG_TYPE_MAP(__PROG_TYPE_SYM_FN) { -1, 0 } | ||
| 49 | __PROG_TYPE_MAP(__PROG_TYPE_TP_FN) | ||
| 50 | |||
| 51 | #define __MAP_TYPE_TP_FN(x) \ | ||
| 52 | TRACE_DEFINE_ENUM(BPF_MAP_TYPE_##x); | ||
| 53 | #define __MAP_TYPE_SYM_FN(x) \ | ||
| 54 | { BPF_MAP_TYPE_##x, #x }, | ||
| 55 | #define __MAP_TYPE_SYM_TAB \ | ||
| 56 | __MAP_TYPE_MAP(__MAP_TYPE_SYM_FN) { -1, 0 } | ||
| 57 | __MAP_TYPE_MAP(__MAP_TYPE_TP_FN) | ||
| 58 | |||
| 59 | DECLARE_EVENT_CLASS(bpf_prog_event, | ||
| 60 | |||
| 61 | TP_PROTO(const struct bpf_prog *prg), | ||
| 62 | |||
| 63 | TP_ARGS(prg), | ||
| 64 | |||
| 65 | TP_STRUCT__entry( | ||
| 66 | __array(u8, prog_tag, 8) | ||
| 67 | __field(u32, type) | ||
| 68 | ), | ||
| 69 | |||
| 70 | TP_fast_assign( | ||
| 71 | BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag)); | ||
| 72 | memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag)); | ||
| 73 | __entry->type = prg->type; | ||
| 74 | ), | ||
| 75 | |||
| 76 | TP_printk("prog=%s type=%s", | ||
| 77 | __print_hex_str(__entry->prog_tag, 8), | ||
| 78 | __print_symbolic(__entry->type, __PROG_TYPE_SYM_TAB)) | ||
| 79 | ); | ||
| 80 | |||
| 81 | DEFINE_EVENT(bpf_prog_event, bpf_prog_get_type, | ||
| 82 | |||
| 83 | TP_PROTO(const struct bpf_prog *prg), | ||
| 84 | |||
| 85 | TP_ARGS(prg) | ||
| 86 | ); | ||
| 87 | |||
| 88 | DEFINE_EVENT(bpf_prog_event, bpf_prog_put_rcu, | ||
| 89 | |||
| 90 | TP_PROTO(const struct bpf_prog *prg), | ||
| 91 | |||
| 92 | TP_ARGS(prg) | ||
| 93 | ); | ||
| 94 | |||
| 95 | TRACE_EVENT(bpf_prog_load, | ||
| 96 | |||
| 97 | TP_PROTO(const struct bpf_prog *prg, int ufd), | ||
| 98 | |||
| 99 | TP_ARGS(prg, ufd), | ||
| 100 | |||
| 101 | TP_STRUCT__entry( | ||
| 102 | __array(u8, prog_tag, 8) | ||
| 103 | __field(u32, type) | ||
| 104 | __field(int, ufd) | ||
| 105 | ), | ||
| 106 | |||
| 107 | TP_fast_assign( | ||
| 108 | BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag)); | ||
| 109 | memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag)); | ||
| 110 | __entry->type = prg->type; | ||
| 111 | __entry->ufd = ufd; | ||
| 112 | ), | ||
| 113 | |||
| 114 | TP_printk("prog=%s type=%s ufd=%d", | ||
| 115 | __print_hex_str(__entry->prog_tag, 8), | ||
| 116 | __print_symbolic(__entry->type, __PROG_TYPE_SYM_TAB), | ||
| 117 | __entry->ufd) | ||
| 118 | ); | ||
| 119 | |||
| 120 | TRACE_EVENT(bpf_map_create, | ||
| 121 | |||
| 122 | TP_PROTO(const struct bpf_map *map, int ufd), | ||
| 123 | |||
| 124 | TP_ARGS(map, ufd), | ||
| 125 | |||
| 126 | TP_STRUCT__entry( | ||
| 127 | __field(u32, type) | ||
| 128 | __field(u32, size_key) | ||
| 129 | __field(u32, size_value) | ||
| 130 | __field(u32, max_entries) | ||
| 131 | __field(u32, flags) | ||
| 132 | __field(int, ufd) | ||
| 133 | ), | ||
| 134 | |||
| 135 | TP_fast_assign( | ||
| 136 | __entry->type = map->map_type; | ||
| 137 | __entry->size_key = map->key_size; | ||
| 138 | __entry->size_value = map->value_size; | ||
| 139 | __entry->max_entries = map->max_entries; | ||
| 140 | __entry->flags = map->map_flags; | ||
| 141 | __entry->ufd = ufd; | ||
| 142 | ), | ||
| 143 | |||
| 144 | TP_printk("map type=%s ufd=%d key=%u val=%u max=%u flags=%x", | ||
| 145 | __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB), | ||
| 146 | __entry->ufd, __entry->size_key, __entry->size_value, | ||
| 147 | __entry->max_entries, __entry->flags) | ||
| 148 | ); | ||
| 149 | |||
| 150 | DECLARE_EVENT_CLASS(bpf_obj_prog, | ||
| 151 | |||
| 152 | TP_PROTO(const struct bpf_prog *prg, int ufd, | ||
| 153 | const struct filename *pname), | ||
| 154 | |||
| 155 | TP_ARGS(prg, ufd, pname), | ||
| 156 | |||
| 157 | TP_STRUCT__entry( | ||
| 158 | __array(u8, prog_tag, 8) | ||
| 159 | __field(int, ufd) | ||
| 160 | __string(path, pname->name) | ||
| 161 | ), | ||
| 162 | |||
| 163 | TP_fast_assign( | ||
| 164 | BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag)); | ||
| 165 | memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag)); | ||
| 166 | __assign_str(path, pname->name); | ||
| 167 | __entry->ufd = ufd; | ||
| 168 | ), | ||
| 169 | |||
| 170 | TP_printk("prog=%s path=%s ufd=%d", | ||
| 171 | __print_hex_str(__entry->prog_tag, 8), | ||
| 172 | __get_str(path), __entry->ufd) | ||
| 173 | ); | ||
| 174 | |||
| 175 | DEFINE_EVENT(bpf_obj_prog, bpf_obj_pin_prog, | ||
| 176 | |||
| 177 | TP_PROTO(const struct bpf_prog *prg, int ufd, | ||
| 178 | const struct filename *pname), | ||
| 179 | |||
| 180 | TP_ARGS(prg, ufd, pname) | ||
| 181 | ); | ||
| 182 | |||
| 183 | DEFINE_EVENT(bpf_obj_prog, bpf_obj_get_prog, | ||
| 184 | |||
| 185 | TP_PROTO(const struct bpf_prog *prg, int ufd, | ||
| 186 | const struct filename *pname), | ||
| 187 | |||
| 188 | TP_ARGS(prg, ufd, pname) | ||
| 189 | ); | ||
| 190 | |||
| 191 | DECLARE_EVENT_CLASS(bpf_obj_map, | ||
| 192 | |||
| 193 | TP_PROTO(const struct bpf_map *map, int ufd, | ||
| 194 | const struct filename *pname), | ||
| 195 | |||
| 196 | TP_ARGS(map, ufd, pname), | ||
| 197 | |||
| 198 | TP_STRUCT__entry( | ||
| 199 | __field(u32, type) | ||
| 200 | __field(int, ufd) | ||
| 201 | __string(path, pname->name) | ||
| 202 | ), | ||
| 203 | |||
| 204 | TP_fast_assign( | ||
| 205 | __assign_str(path, pname->name); | ||
| 206 | __entry->type = map->map_type; | ||
| 207 | __entry->ufd = ufd; | ||
| 208 | ), | ||
| 209 | |||
| 210 | TP_printk("map type=%s ufd=%d path=%s", | ||
| 211 | __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB), | ||
| 212 | __entry->ufd, __get_str(path)) | ||
| 213 | ); | ||
| 214 | |||
| 215 | DEFINE_EVENT(bpf_obj_map, bpf_obj_pin_map, | ||
| 216 | |||
| 217 | TP_PROTO(const struct bpf_map *map, int ufd, | ||
| 218 | const struct filename *pname), | ||
| 219 | |||
| 220 | TP_ARGS(map, ufd, pname) | ||
| 221 | ); | ||
| 222 | |||
| 223 | DEFINE_EVENT(bpf_obj_map, bpf_obj_get_map, | ||
| 224 | |||
| 225 | TP_PROTO(const struct bpf_map *map, int ufd, | ||
| 226 | const struct filename *pname), | ||
| 227 | |||
| 228 | TP_ARGS(map, ufd, pname) | ||
| 229 | ); | ||
| 230 | |||
| 231 | DECLARE_EVENT_CLASS(bpf_map_keyval, | ||
| 232 | |||
| 233 | TP_PROTO(const struct bpf_map *map, int ufd, | ||
| 234 | const void *key, const void *val), | ||
| 235 | |||
| 236 | TP_ARGS(map, ufd, key, val), | ||
| 237 | |||
| 238 | TP_STRUCT__entry( | ||
| 239 | __field(u32, type) | ||
| 240 | __field(u32, key_len) | ||
| 241 | __dynamic_array(u8, key, map->key_size) | ||
| 242 | __field(bool, key_trunc) | ||
| 243 | __field(u32, val_len) | ||
| 244 | __dynamic_array(u8, val, map->value_size) | ||
| 245 | __field(bool, val_trunc) | ||
| 246 | __field(int, ufd) | ||
| 247 | ), | ||
| 248 | |||
| 249 | TP_fast_assign( | ||
| 250 | memcpy(__get_dynamic_array(key), key, map->key_size); | ||
| 251 | memcpy(__get_dynamic_array(val), val, map->value_size); | ||
| 252 | __entry->type = map->map_type; | ||
| 253 | __entry->key_len = min(map->key_size, 16U); | ||
| 254 | __entry->key_trunc = map->key_size != __entry->key_len; | ||
| 255 | __entry->val_len = min(map->value_size, 16U); | ||
| 256 | __entry->val_trunc = map->value_size != __entry->val_len; | ||
| 257 | __entry->ufd = ufd; | ||
| 258 | ), | ||
| 259 | |||
| 260 | TP_printk("map type=%s ufd=%d key=[%s%s] val=[%s%s]", | ||
| 261 | __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB), | ||
| 262 | __entry->ufd, | ||
| 263 | __print_hex(__get_dynamic_array(key), __entry->key_len), | ||
| 264 | __entry->key_trunc ? " ..." : "", | ||
| 265 | __print_hex(__get_dynamic_array(val), __entry->val_len), | ||
| 266 | __entry->val_trunc ? " ..." : "") | ||
| 267 | ); | ||
| 268 | |||
| 269 | DEFINE_EVENT(bpf_map_keyval, bpf_map_lookup_elem, | ||
| 270 | |||
| 271 | TP_PROTO(const struct bpf_map *map, int ufd, | ||
| 272 | const void *key, const void *val), | ||
| 273 | |||
| 274 | TP_ARGS(map, ufd, key, val) | ||
| 275 | ); | ||
| 276 | |||
| 277 | DEFINE_EVENT(bpf_map_keyval, bpf_map_update_elem, | ||
| 278 | |||
| 279 | TP_PROTO(const struct bpf_map *map, int ufd, | ||
| 280 | const void *key, const void *val), | ||
| 281 | |||
| 282 | TP_ARGS(map, ufd, key, val) | ||
| 283 | ); | ||
| 284 | |||
| 285 | TRACE_EVENT(bpf_map_delete_elem, | ||
| 286 | |||
| 287 | TP_PROTO(const struct bpf_map *map, int ufd, | ||
| 288 | const void *key), | ||
| 289 | |||
| 290 | TP_ARGS(map, ufd, key), | ||
| 291 | |||
| 292 | TP_STRUCT__entry( | ||
| 293 | __field(u32, type) | ||
| 294 | __field(u32, key_len) | ||
| 295 | __dynamic_array(u8, key, map->key_size) | ||
| 296 | __field(bool, key_trunc) | ||
| 297 | __field(int, ufd) | ||
| 298 | ), | ||
| 299 | |||
| 300 | TP_fast_assign( | ||
| 301 | memcpy(__get_dynamic_array(key), key, map->key_size); | ||
| 302 | __entry->type = map->map_type; | ||
| 303 | __entry->key_len = min(map->key_size, 16U); | ||
| 304 | __entry->key_trunc = map->key_size != __entry->key_len; | ||
| 305 | __entry->ufd = ufd; | ||
| 306 | ), | ||
| 307 | |||
| 308 | TP_printk("map type=%s ufd=%d key=[%s%s]", | ||
| 309 | __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB), | ||
| 310 | __entry->ufd, | ||
| 311 | __print_hex(__get_dynamic_array(key), __entry->key_len), | ||
| 312 | __entry->key_trunc ? " ..." : "") | ||
| 313 | ); | ||
| 314 | |||
| 315 | TRACE_EVENT(bpf_map_next_key, | ||
| 316 | |||
| 317 | TP_PROTO(const struct bpf_map *map, int ufd, | ||
| 318 | const void *key, const void *key_next), | ||
| 319 | |||
| 320 | TP_ARGS(map, ufd, key, key_next), | ||
| 321 | |||
| 322 | TP_STRUCT__entry( | ||
| 323 | __field(u32, type) | ||
| 324 | __field(u32, key_len) | ||
| 325 | __dynamic_array(u8, key, map->key_size) | ||
| 326 | __dynamic_array(u8, nxt, map->key_size) | ||
| 327 | __field(bool, key_trunc) | ||
| 328 | __field(bool, key_null) | ||
| 329 | __field(int, ufd) | ||
| 330 | ), | ||
| 331 | |||
| 332 | TP_fast_assign( | ||
| 333 | if (key) | ||
| 334 | memcpy(__get_dynamic_array(key), key, map->key_size); | ||
| 335 | __entry->key_null = !key; | ||
| 336 | memcpy(__get_dynamic_array(nxt), key_next, map->key_size); | ||
| 337 | __entry->type = map->map_type; | ||
| 338 | __entry->key_len = min(map->key_size, 16U); | ||
| 339 | __entry->key_trunc = map->key_size != __entry->key_len; | ||
| 340 | __entry->ufd = ufd; | ||
| 341 | ), | ||
| 342 | |||
| 343 | TP_printk("map type=%s ufd=%d key=[%s%s] next=[%s%s]", | ||
| 344 | __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB), | ||
| 345 | __entry->ufd, | ||
| 346 | __entry->key_null ? "NULL" : __print_hex(__get_dynamic_array(key), | ||
| 347 | __entry->key_len), | ||
| 348 | __entry->key_trunc && !__entry->key_null ? " ..." : "", | ||
| 349 | __print_hex(__get_dynamic_array(nxt), __entry->key_len), | ||
| 350 | __entry->key_trunc ? " ..." : "") | ||
| 351 | ); | ||
| 352 | #endif /* CONFIG_BPF_SYSCALL */ | ||
| 353 | #endif /* _TRACE_BPF_H */ | ||
| 354 | |||
| 355 | #include <trace/define_trace.h> | ||
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index da77a9388947..93d5a4eeec2a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h | |||
| @@ -116,6 +116,7 @@ enum bpf_map_type { | |||
| 116 | BPF_MAP_TYPE_DEVMAP, | 116 | BPF_MAP_TYPE_DEVMAP, |
| 117 | BPF_MAP_TYPE_SOCKMAP, | 117 | BPF_MAP_TYPE_SOCKMAP, |
| 118 | BPF_MAP_TYPE_CPUMAP, | 118 | BPF_MAP_TYPE_CPUMAP, |
| 119 | BPF_MAP_TYPE_XSKMAP, | ||
| 119 | }; | 120 | }; |
| 120 | 121 | ||
| 121 | enum bpf_prog_type { | 122 | enum bpf_prog_type { |
| @@ -828,12 +829,12 @@ union bpf_attr { | |||
| 828 | * | 829 | * |
| 829 | * Also, be aware that the newer helper | 830 | * Also, be aware that the newer helper |
| 830 | * **bpf_perf_event_read_value**\ () is recommended over | 831 | * **bpf_perf_event_read_value**\ () is recommended over |
| 831 | * **bpf_perf_event_read*\ () in general. The latter has some ABI | 832 | * **bpf_perf_event_read**\ () in general. The latter has some ABI |
| 832 | * quirks where error and counter value are used as a return code | 833 | * quirks where error and counter value are used as a return code |
| 833 | * (which is wrong to do since ranges may overlap). This issue is | 834 | * (which is wrong to do since ranges may overlap). This issue is |
| 834 | * fixed with bpf_perf_event_read_value(), which at the same time | 835 | * fixed with **bpf_perf_event_read_value**\ (), which at the same |
| 835 | * provides more features over the **bpf_perf_event_read**\ () | 836 | * time provides more features over the **bpf_perf_event_read**\ |
| 836 | * interface. Please refer to the description of | 837 | * () interface. Please refer to the description of |
| 837 | * **bpf_perf_event_read_value**\ () for details. | 838 | * **bpf_perf_event_read_value**\ () for details. |
| 838 | * Return | 839 | * Return |
| 839 | * The value of the perf event counter read from the map, or a | 840 | * The value of the perf event counter read from the map, or a |
| @@ -1361,7 +1362,7 @@ union bpf_attr { | |||
| 1361 | * Return | 1362 | * Return |
| 1362 | * 0 | 1363 | * 0 |
| 1363 | * | 1364 | * |
| 1364 | * int bpf_setsockopt(struct bpf_sock_ops_kern *bpf_socket, int level, int optname, char *optval, int optlen) | 1365 | * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen) |
| 1365 | * Description | 1366 | * Description |
| 1366 | * Emulate a call to **setsockopt()** on the socket associated to | 1367 | * Emulate a call to **setsockopt()** on the socket associated to |
| 1367 | * *bpf_socket*, which must be a full socket. The *level* at | 1368 | * *bpf_socket*, which must be a full socket. The *level* at |
| @@ -1435,7 +1436,7 @@ union bpf_attr { | |||
| 1435 | * Return | 1436 | * Return |
| 1436 | * **SK_PASS** on success, or **SK_DROP** on error. | 1437 | * **SK_PASS** on success, or **SK_DROP** on error. |
| 1437 | * | 1438 | * |
| 1438 | * int bpf_sock_map_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags) | 1439 | * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) |
| 1439 | * Description | 1440 | * Description |
| 1440 | * Add an entry to, or update a *map* referencing sockets. The | 1441 | * Add an entry to, or update a *map* referencing sockets. The |
| 1441 | * *skops* is used as a new value for the entry associated to | 1442 | * *skops* is used as a new value for the entry associated to |
| @@ -1533,7 +1534,7 @@ union bpf_attr { | |||
| 1533 | * Return | 1534 | * Return |
| 1534 | * 0 on success, or a negative error in case of failure. | 1535 | * 0 on success, or a negative error in case of failure. |
| 1535 | * | 1536 | * |
| 1536 | * int bpf_perf_prog_read_value(struct bpf_perf_event_data_kern *ctx, struct bpf_perf_event_value *buf, u32 buf_size) | 1537 | * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) |
| 1537 | * Description | 1538 | * Description |
| 1538 | * For en eBPF program attached to a perf event, retrieve the | 1539 | * For en eBPF program attached to a perf event, retrieve the |
| 1539 | * value of the event counter associated to *ctx* and store it in | 1540 | * value of the event counter associated to *ctx* and store it in |
| @@ -1544,7 +1545,7 @@ union bpf_attr { | |||
| 1544 | * Return | 1545 | * Return |
| 1545 | * 0 on success, or a negative error in case of failure. | 1546 | * 0 on success, or a negative error in case of failure. |
| 1546 | * | 1547 | * |
| 1547 | * int bpf_getsockopt(struct bpf_sock_ops_kern *bpf_socket, int level, int optname, char *optval, int optlen) | 1548 | * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen) |
| 1548 | * Description | 1549 | * Description |
| 1549 | * Emulate a call to **getsockopt()** on the socket associated to | 1550 | * Emulate a call to **getsockopt()** on the socket associated to |
| 1550 | * *bpf_socket*, which must be a full socket. The *level* at | 1551 | * *bpf_socket*, which must be a full socket. The *level* at |
| @@ -1588,7 +1589,7 @@ union bpf_attr { | |||
| 1588 | * Return | 1589 | * Return |
| 1589 | * 0 | 1590 | * 0 |
| 1590 | * | 1591 | * |
| 1591 | * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops_kern *bpf_sock, int argval) | 1592 | * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval) |
| 1592 | * Description | 1593 | * Description |
| 1593 | * Attempt to set the value of the **bpf_sock_ops_cb_flags** field | 1594 | * Attempt to set the value of the **bpf_sock_ops_cb_flags** field |
| 1594 | * for the full TCP socket associated to *bpf_sock_ops* to | 1595 | * for the full TCP socket associated to *bpf_sock_ops* to |
| @@ -1721,7 +1722,7 @@ union bpf_attr { | |||
| 1721 | * Return | 1722 | * Return |
| 1722 | * 0 on success, or a negative error in case of failure. | 1723 | * 0 on success, or a negative error in case of failure. |
| 1723 | * | 1724 | * |
| 1724 | * int bpf_bind(struct bpf_sock_addr_kern *ctx, struct sockaddr *addr, int addr_len) | 1725 | * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) |
| 1725 | * Description | 1726 | * Description |
| 1726 | * Bind the socket associated to *ctx* to the address pointed by | 1727 | * Bind the socket associated to *ctx* to the address pointed by |
| 1727 | * *addr*, of length *addr_len*. This allows for making outgoing | 1728 | * *addr*, of length *addr_len*. This allows for making outgoing |
| @@ -1767,6 +1768,64 @@ union bpf_attr { | |||
| 1767 | * **CONFIG_XFRM** configuration option. | 1768 | * **CONFIG_XFRM** configuration option. |
| 1768 | * Return | 1769 | * Return |
| 1769 | * 0 on success, or a negative error in case of failure. | 1770 | * 0 on success, or a negative error in case of failure. |
| 1771 | * | ||
| 1772 | * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags) | ||
| 1773 | * Description | ||
| 1774 | * Return a user or a kernel stack in bpf program provided buffer. | ||
| 1775 | * To achieve this, the helper needs *ctx*, which is a pointer | ||
| 1776 | * to the context on which the tracing program is executed. | ||
| 1777 | * To store the stacktrace, the bpf program provides *buf* with | ||
| 1778 | * a nonnegative *size*. | ||
| 1779 | * | ||
| 1780 | * The last argument, *flags*, holds the number of stack frames to | ||
| 1781 | * skip (from 0 to 255), masked with | ||
| 1782 | * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set | ||
| 1783 | * the following flags: | ||
| 1784 | * | ||
| 1785 | * **BPF_F_USER_STACK** | ||
| 1786 | * Collect a user space stack instead of a kernel stack. | ||
| 1787 | * **BPF_F_USER_BUILD_ID** | ||
| 1788 | * Collect buildid+offset instead of ips for user stack, | ||
| 1789 | * only valid if **BPF_F_USER_STACK** is also specified. | ||
| 1790 | * | ||
| 1791 | * **bpf_get_stack**\ () can collect up to | ||
| 1792 | * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject | ||
| 1793 | * to sufficient large buffer size. Note that | ||
| 1794 | * this limit can be controlled with the **sysctl** program, and | ||
| 1795 | * that it should be manually increased in order to profile long | ||
| 1796 | * user stacks (such as stacks for Java programs). To do so, use: | ||
| 1797 | * | ||
| 1798 | * :: | ||
| 1799 | * | ||
| 1800 | * # sysctl kernel.perf_event_max_stack=<new value> | ||
| 1801 | * | ||
| 1802 | * Return | ||
| 1803 | * a non-negative value equal to or less than size on success, or | ||
| 1804 | * a negative error in case of failure. | ||
| 1805 | * | ||
| 1806 | * int skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header) | ||
| 1807 | * Description | ||
| 1808 | * This helper is similar to **bpf_skb_load_bytes**\ () in that | ||
| 1809 | * it provides an easy way to load *len* bytes from *offset* | ||
| 1810 | * from the packet associated to *skb*, into the buffer pointed | ||
| 1811 | * by *to*. The difference to **bpf_skb_load_bytes**\ () is that | ||
| 1812 | * a fifth argument *start_header* exists in order to select a | ||
| 1813 | * base offset to start from. *start_header* can be one of: | ||
| 1814 | * | ||
| 1815 | * **BPF_HDR_START_MAC** | ||
| 1816 | * Base offset to load data from is *skb*'s mac header. | ||
| 1817 | * **BPF_HDR_START_NET** | ||
| 1818 | * Base offset to load data from is *skb*'s network header. | ||
| 1819 | * | ||
| 1820 | * In general, "direct packet access" is the preferred method to | ||
| 1821 | * access packet data, however, this helper is in particular useful | ||
| 1822 | * in socket filters where *skb*\ **->data** does not always point | ||
| 1823 | * to the start of the mac header and where "direct packet access" | ||
| 1824 | * is not available. | ||
| 1825 | * | ||
| 1826 | * Return | ||
| 1827 | * 0 on success, or a negative error in case of failure. | ||
| 1828 | * | ||
| 1770 | */ | 1829 | */ |
| 1771 | #define __BPF_FUNC_MAPPER(FN) \ | 1830 | #define __BPF_FUNC_MAPPER(FN) \ |
| 1772 | FN(unspec), \ | 1831 | FN(unspec), \ |
| @@ -1835,7 +1894,9 @@ union bpf_attr { | |||
| 1835 | FN(msg_pull_data), \ | 1894 | FN(msg_pull_data), \ |
| 1836 | FN(bind), \ | 1895 | FN(bind), \ |
| 1837 | FN(xdp_adjust_tail), \ | 1896 | FN(xdp_adjust_tail), \ |
| 1838 | FN(skb_get_xfrm_state), | 1897 | FN(skb_get_xfrm_state), \ |
| 1898 | FN(get_stack), \ | ||
| 1899 | FN(skb_load_bytes_relative), | ||
| 1839 | 1900 | ||
| 1840 | /* integer value in 'imm' field of BPF_CALL instruction selects which helper | 1901 | /* integer value in 'imm' field of BPF_CALL instruction selects which helper |
| 1841 | * function eBPF program intends to call | 1902 | * function eBPF program intends to call |
| @@ -1869,11 +1930,14 @@ enum bpf_func_id { | |||
| 1869 | /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ | 1930 | /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ |
| 1870 | #define BPF_F_TUNINFO_IPV6 (1ULL << 0) | 1931 | #define BPF_F_TUNINFO_IPV6 (1ULL << 0) |
| 1871 | 1932 | ||
| 1872 | /* BPF_FUNC_get_stackid flags. */ | 1933 | /* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */ |
| 1873 | #define BPF_F_SKIP_FIELD_MASK 0xffULL | 1934 | #define BPF_F_SKIP_FIELD_MASK 0xffULL |
| 1874 | #define BPF_F_USER_STACK (1ULL << 8) | 1935 | #define BPF_F_USER_STACK (1ULL << 8) |
| 1936 | /* flags used by BPF_FUNC_get_stackid only. */ | ||
| 1875 | #define BPF_F_FAST_STACK_CMP (1ULL << 9) | 1937 | #define BPF_F_FAST_STACK_CMP (1ULL << 9) |
| 1876 | #define BPF_F_REUSE_STACKID (1ULL << 10) | 1938 | #define BPF_F_REUSE_STACKID (1ULL << 10) |
| 1939 | /* flags used by BPF_FUNC_get_stack only. */ | ||
| 1940 | #define BPF_F_USER_BUILD_ID (1ULL << 11) | ||
| 1877 | 1941 | ||
| 1878 | /* BPF_FUNC_skb_set_tunnel_key flags. */ | 1942 | /* BPF_FUNC_skb_set_tunnel_key flags. */ |
| 1879 | #define BPF_F_ZERO_CSUM_TX (1ULL << 1) | 1943 | #define BPF_F_ZERO_CSUM_TX (1ULL << 1) |
| @@ -1893,6 +1957,12 @@ enum bpf_adj_room_mode { | |||
| 1893 | BPF_ADJ_ROOM_NET, | 1957 | BPF_ADJ_ROOM_NET, |
| 1894 | }; | 1958 | }; |
| 1895 | 1959 | ||
| 1960 | /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */ | ||
| 1961 | enum bpf_hdr_start_off { | ||
| 1962 | BPF_HDR_START_MAC, | ||
| 1963 | BPF_HDR_START_NET, | ||
| 1964 | }; | ||
| 1965 | |||
| 1896 | /* user accessible mirror of in-kernel sk_buff. | 1966 | /* user accessible mirror of in-kernel sk_buff. |
| 1897 | * new fields can only be added to the end of this structure | 1967 | * new fields can only be added to the end of this structure |
| 1898 | */ | 1968 | */ |
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h new file mode 100644 index 000000000000..77b88c4efe98 --- /dev/null +++ b/include/uapi/linux/if_xdp.h | |||
| @@ -0,0 +1,87 @@ | |||
| 1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note | ||
| 2 | * | ||
| 3 | * if_xdp: XDP socket user-space interface | ||
| 4 | * Copyright(c) 2018 Intel Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms and conditions of the GNU General Public License, | ||
| 8 | * version 2, as published by the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * Author(s): Björn Töpel <bjorn.topel@intel.com> | ||
| 16 | * Magnus Karlsson <magnus.karlsson@intel.com> | ||
| 17 | */ | ||
| 18 | |||
| 19 | #ifndef _LINUX_IF_XDP_H | ||
| 20 | #define _LINUX_IF_XDP_H | ||
| 21 | |||
| 22 | #include <linux/types.h> | ||
| 23 | |||
| 24 | /* Options for the sxdp_flags field */ | ||
| 25 | #define XDP_SHARED_UMEM 1 | ||
| 26 | |||
| 27 | struct sockaddr_xdp { | ||
| 28 | __u16 sxdp_family; | ||
| 29 | __u32 sxdp_ifindex; | ||
| 30 | __u32 sxdp_queue_id; | ||
| 31 | __u32 sxdp_shared_umem_fd; | ||
| 32 | __u16 sxdp_flags; | ||
| 33 | }; | ||
| 34 | |||
| 35 | /* XDP socket options */ | ||
| 36 | #define XDP_RX_RING 1 | ||
| 37 | #define XDP_TX_RING 2 | ||
| 38 | #define XDP_UMEM_REG 3 | ||
| 39 | #define XDP_UMEM_FILL_RING 4 | ||
| 40 | #define XDP_UMEM_COMPLETION_RING 5 | ||
| 41 | #define XDP_STATISTICS 6 | ||
| 42 | |||
| 43 | struct xdp_umem_reg { | ||
| 44 | __u64 addr; /* Start of packet data area */ | ||
| 45 | __u64 len; /* Length of packet data area */ | ||
| 46 | __u32 frame_size; /* Frame size */ | ||
| 47 | __u32 frame_headroom; /* Frame head room */ | ||
| 48 | }; | ||
| 49 | |||
| 50 | struct xdp_statistics { | ||
| 51 | __u64 rx_dropped; /* Dropped for reasons other than invalid desc */ | ||
| 52 | __u64 rx_invalid_descs; /* Dropped due to invalid descriptor */ | ||
| 53 | __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */ | ||
| 54 | }; | ||
| 55 | |||
| 56 | /* Pgoff for mmaping the rings */ | ||
| 57 | #define XDP_PGOFF_RX_RING 0 | ||
| 58 | #define XDP_PGOFF_TX_RING 0x80000000 | ||
| 59 | #define XDP_UMEM_PGOFF_FILL_RING 0x100000000 | ||
| 60 | #define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000 | ||
| 61 | |||
| 62 | struct xdp_desc { | ||
| 63 | __u32 idx; | ||
| 64 | __u32 len; | ||
| 65 | __u16 offset; | ||
| 66 | __u8 flags; | ||
| 67 | __u8 padding[5]; | ||
| 68 | }; | ||
| 69 | |||
| 70 | struct xdp_ring { | ||
| 71 | __u32 producer __attribute__((aligned(64))); | ||
| 72 | __u32 consumer __attribute__((aligned(64))); | ||
| 73 | }; | ||
| 74 | |||
| 75 | /* Used for the RX and TX queues for packets */ | ||
| 76 | struct xdp_rxtx_ring { | ||
| 77 | struct xdp_ring ptrs; | ||
| 78 | struct xdp_desc desc[0] __attribute__((aligned(64))); | ||
| 79 | }; | ||
| 80 | |||
| 81 | /* Used for the fill and completion queues for buffers */ | ||
| 82 | struct xdp_umem_ring { | ||
| 83 | struct xdp_ring ptrs; | ||
| 84 | __u32 desc[0] __attribute__((aligned(64))); | ||
| 85 | }; | ||
| 86 | |||
| 87 | #endif /* _LINUX_IF_XDP_H */ | ||
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 35c485fa9ea3..f27f5496d6fe 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile | |||
| @@ -8,6 +8,9 @@ obj-$(CONFIG_BPF_SYSCALL) += btf.o | |||
| 8 | ifeq ($(CONFIG_NET),y) | 8 | ifeq ($(CONFIG_NET),y) |
| 9 | obj-$(CONFIG_BPF_SYSCALL) += devmap.o | 9 | obj-$(CONFIG_BPF_SYSCALL) += devmap.o |
| 10 | obj-$(CONFIG_BPF_SYSCALL) += cpumap.o | 10 | obj-$(CONFIG_BPF_SYSCALL) += cpumap.o |
| 11 | ifeq ($(CONFIG_XDP_SOCKETS),y) | ||
| 12 | obj-$(CONFIG_BPF_SYSCALL) += xskmap.o | ||
| 13 | endif | ||
| 11 | obj-$(CONFIG_BPF_SYSCALL) += offload.o | 14 | obj-$(CONFIG_BPF_SYSCALL) += offload.o |
| 12 | ifeq ($(CONFIG_STREAM_PARSER),y) | 15 | ifeq ($(CONFIG_STREAM_PARSER),y) |
| 13 | ifeq ($(CONFIG_INET),y) | 16 | ifeq ($(CONFIG_INET),y) |
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index ba03ec39efb3..d0d7d9462368 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c | |||
| @@ -31,6 +31,7 @@ | |||
| 31 | #include <linux/rbtree_latch.h> | 31 | #include <linux/rbtree_latch.h> |
| 32 | #include <linux/kallsyms.h> | 32 | #include <linux/kallsyms.h> |
| 33 | #include <linux/rcupdate.h> | 33 | #include <linux/rcupdate.h> |
| 34 | #include <linux/perf_event.h> | ||
| 34 | 35 | ||
| 35 | #include <asm/unaligned.h> | 36 | #include <asm/unaligned.h> |
| 36 | 37 | ||
| @@ -633,23 +634,6 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from, | |||
| 633 | *to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off); | 634 | *to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off); |
| 634 | break; | 635 | break; |
| 635 | 636 | ||
| 636 | case BPF_LD | BPF_ABS | BPF_W: | ||
| 637 | case BPF_LD | BPF_ABS | BPF_H: | ||
| 638 | case BPF_LD | BPF_ABS | BPF_B: | ||
| 639 | *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); | ||
| 640 | *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); | ||
| 641 | *to++ = BPF_LD_IND(from->code, BPF_REG_AX, 0); | ||
| 642 | break; | ||
| 643 | |||
| 644 | case BPF_LD | BPF_IND | BPF_W: | ||
| 645 | case BPF_LD | BPF_IND | BPF_H: | ||
| 646 | case BPF_LD | BPF_IND | BPF_B: | ||
| 647 | *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); | ||
| 648 | *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); | ||
| 649 | *to++ = BPF_ALU32_REG(BPF_ADD, BPF_REG_AX, from->src_reg); | ||
| 650 | *to++ = BPF_LD_IND(from->code, BPF_REG_AX, 0); | ||
| 651 | break; | ||
| 652 | |||
| 653 | case BPF_LD | BPF_IMM | BPF_DW: | 637 | case BPF_LD | BPF_IMM | BPF_DW: |
| 654 | *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm); | 638 | *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm); |
| 655 | *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); | 639 | *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); |
| @@ -890,14 +874,7 @@ EXPORT_SYMBOL_GPL(__bpf_call_base); | |||
| 890 | INSN_3(LDX, MEM, W), \ | 874 | INSN_3(LDX, MEM, W), \ |
| 891 | INSN_3(LDX, MEM, DW), \ | 875 | INSN_3(LDX, MEM, DW), \ |
| 892 | /* Immediate based. */ \ | 876 | /* Immediate based. */ \ |
| 893 | INSN_3(LD, IMM, DW), \ | 877 | INSN_3(LD, IMM, DW) |
| 894 | /* Misc (old cBPF carry-over). */ \ | ||
| 895 | INSN_3(LD, ABS, B), \ | ||
| 896 | INSN_3(LD, ABS, H), \ | ||
| 897 | INSN_3(LD, ABS, W), \ | ||
| 898 | INSN_3(LD, IND, B), \ | ||
| 899 | INSN_3(LD, IND, H), \ | ||
| 900 | INSN_3(LD, IND, W) | ||
| 901 | 878 | ||
| 902 | bool bpf_opcode_in_insntable(u8 code) | 879 | bool bpf_opcode_in_insntable(u8 code) |
| 903 | { | 880 | { |
| @@ -907,6 +884,13 @@ bool bpf_opcode_in_insntable(u8 code) | |||
| 907 | [0 ... 255] = false, | 884 | [0 ... 255] = false, |
| 908 | /* Now overwrite non-defaults ... */ | 885 | /* Now overwrite non-defaults ... */ |
| 909 | BPF_INSN_MAP(BPF_INSN_2_TBL, BPF_INSN_3_TBL), | 886 | BPF_INSN_MAP(BPF_INSN_2_TBL, BPF_INSN_3_TBL), |
| 887 | /* UAPI exposed, but rewritten opcodes. cBPF carry-over. */ | ||
| 888 | [BPF_LD | BPF_ABS | BPF_B] = true, | ||
| 889 | [BPF_LD | BPF_ABS | BPF_H] = true, | ||
| 890 | [BPF_LD | BPF_ABS | BPF_W] = true, | ||
| 891 | [BPF_LD | BPF_IND | BPF_B] = true, | ||
| 892 | [BPF_LD | BPF_IND | BPF_H] = true, | ||
| 893 | [BPF_LD | BPF_IND | BPF_W] = true, | ||
| 910 | }; | 894 | }; |
| 911 | #undef BPF_INSN_3_TBL | 895 | #undef BPF_INSN_3_TBL |
| 912 | #undef BPF_INSN_2_TBL | 896 | #undef BPF_INSN_2_TBL |
| @@ -937,8 +921,6 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) | |||
| 937 | #undef BPF_INSN_3_LBL | 921 | #undef BPF_INSN_3_LBL |
| 938 | #undef BPF_INSN_2_LBL | 922 | #undef BPF_INSN_2_LBL |
| 939 | u32 tail_call_cnt = 0; | 923 | u32 tail_call_cnt = 0; |
| 940 | void *ptr; | ||
| 941 | int off; | ||
| 942 | 924 | ||
| 943 | #define CONT ({ insn++; goto select_insn; }) | 925 | #define CONT ({ insn++; goto select_insn; }) |
| 944 | #define CONT_JMP ({ insn++; goto select_insn; }) | 926 | #define CONT_JMP ({ insn++; goto select_insn; }) |
| @@ -1265,67 +1247,6 @@ out: | |||
| 1265 | atomic64_add((u64) SRC, (atomic64_t *)(unsigned long) | 1247 | atomic64_add((u64) SRC, (atomic64_t *)(unsigned long) |
| 1266 | (DST + insn->off)); | 1248 | (DST + insn->off)); |
| 1267 | CONT; | 1249 | CONT; |
| 1268 | LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */ | ||
| 1269 | off = IMM; | ||
| 1270 | load_word: | ||
| 1271 | /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only | ||
| 1272 | * appearing in the programs where ctx == skb | ||
| 1273 | * (see may_access_skb() in the verifier). All programs | ||
| 1274 | * keep 'ctx' in regs[BPF_REG_CTX] == BPF_R6, | ||
| 1275 | * bpf_convert_filter() saves it in BPF_R6, internal BPF | ||
| 1276 | * verifier will check that BPF_R6 == ctx. | ||
| 1277 | * | ||
| 1278 | * BPF_ABS and BPF_IND are wrappers of function calls, | ||
| 1279 | * so they scratch BPF_R1-BPF_R5 registers, preserve | ||
| 1280 | * BPF_R6-BPF_R9, and store return value into BPF_R0. | ||
| 1281 | * | ||
| 1282 | * Implicit input: | ||
| 1283 | * ctx == skb == BPF_R6 == CTX | ||
| 1284 | * | ||
| 1285 | * Explicit input: | ||
| 1286 | * SRC == any register | ||
| 1287 | * IMM == 32-bit immediate | ||
| 1288 | * | ||
| 1289 | * Output: | ||
| 1290 | * BPF_R0 - 8/16/32-bit skb data converted to cpu endianness | ||
| 1291 | */ | ||
| 1292 | |||
| 1293 | ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp); | ||
| 1294 | if (likely(ptr != NULL)) { | ||
| 1295 | BPF_R0 = get_unaligned_be32(ptr); | ||
| 1296 | CONT; | ||
| 1297 | } | ||
| 1298 | |||
| 1299 | return 0; | ||
| 1300 | LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */ | ||
| 1301 | off = IMM; | ||
| 1302 | load_half: | ||
| 1303 | ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp); | ||
| 1304 | if (likely(ptr != NULL)) { | ||
| 1305 | BPF_R0 = get_unaligned_be16(ptr); | ||
| 1306 | CONT; | ||
| 1307 | } | ||
| 1308 | |||
| 1309 | return 0; | ||
| 1310 | LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */ | ||
| 1311 | off = IMM; | ||
| 1312 | load_byte: | ||
| 1313 | ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp); | ||
| 1314 | if (likely(ptr != NULL)) { | ||
| 1315 | BPF_R0 = *(u8 *)ptr; | ||
| 1316 | CONT; | ||
| 1317 | } | ||
| 1318 | |||
| 1319 | return 0; | ||
| 1320 | LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + src_reg + imm32)) */ | ||
| 1321 | off = IMM + SRC; | ||
| 1322 | goto load_word; | ||
| 1323 | LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + src_reg + imm32)) */ | ||
| 1324 | off = IMM + SRC; | ||
| 1325 | goto load_half; | ||
| 1326 | LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + src_reg + imm32) */ | ||
| 1327 | off = IMM + SRC; | ||
| 1328 | goto load_byte; | ||
| 1329 | 1250 | ||
| 1330 | default_label: | 1251 | default_label: |
| 1331 | /* If we ever reach this, we have a bug somewhere. Die hard here | 1252 | /* If we ever reach this, we have a bug somewhere. Die hard here |
| @@ -1722,6 +1643,10 @@ static void bpf_prog_free_deferred(struct work_struct *work) | |||
| 1722 | aux = container_of(work, struct bpf_prog_aux, work); | 1643 | aux = container_of(work, struct bpf_prog_aux, work); |
| 1723 | if (bpf_prog_is_dev_bound(aux)) | 1644 | if (bpf_prog_is_dev_bound(aux)) |
| 1724 | bpf_prog_offload_destroy(aux->prog); | 1645 | bpf_prog_offload_destroy(aux->prog); |
| 1646 | #ifdef CONFIG_PERF_EVENTS | ||
| 1647 | if (aux->prog->has_callchain_buf) | ||
| 1648 | put_callchain_buffers(); | ||
| 1649 | #endif | ||
| 1725 | for (i = 0; i < aux->func_cnt; i++) | 1650 | for (i = 0; i < aux->func_cnt; i++) |
| 1726 | bpf_jit_free(aux->func[i]); | 1651 | bpf_jit_free(aux->func[i]); |
| 1727 | if (aux->func_cnt) { | 1652 | if (aux->func_cnt) { |
| @@ -1794,6 +1719,7 @@ bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, | |||
| 1794 | { | 1719 | { |
| 1795 | return -ENOTSUPP; | 1720 | return -ENOTSUPP; |
| 1796 | } | 1721 | } |
| 1722 | EXPORT_SYMBOL_GPL(bpf_event_output); | ||
| 1797 | 1723 | ||
| 1798 | /* Always built-in helper functions. */ | 1724 | /* Always built-in helper functions. */ |
| 1799 | const struct bpf_func_proto bpf_tail_call_proto = { | 1725 | const struct bpf_func_proto bpf_tail_call_proto = { |
| @@ -1840,9 +1766,3 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to, | |||
| 1840 | #include <linux/bpf_trace.h> | 1766 | #include <linux/bpf_trace.h> |
| 1841 | 1767 | ||
| 1842 | EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception); | 1768 | EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception); |
| 1843 | |||
| 1844 | /* These are only used within the BPF_SYSCALL code */ | ||
| 1845 | #ifdef CONFIG_BPF_SYSCALL | ||
| 1846 | EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_get_type); | ||
| 1847 | EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_put_rcu); | ||
| 1848 | #endif | ||
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index a41343009ccc..ed13645bd80c 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c | |||
| @@ -429,13 +429,6 @@ int bpf_obj_pin_user(u32 ufd, const char __user *pathname) | |||
| 429 | ret = bpf_obj_do_pin(pname, raw, type); | 429 | ret = bpf_obj_do_pin(pname, raw, type); |
| 430 | if (ret != 0) | 430 | if (ret != 0) |
| 431 | bpf_any_put(raw, type); | 431 | bpf_any_put(raw, type); |
| 432 | if ((trace_bpf_obj_pin_prog_enabled() || | ||
| 433 | trace_bpf_obj_pin_map_enabled()) && !ret) { | ||
| 434 | if (type == BPF_TYPE_PROG) | ||
| 435 | trace_bpf_obj_pin_prog(raw, ufd, pname); | ||
| 436 | if (type == BPF_TYPE_MAP) | ||
| 437 | trace_bpf_obj_pin_map(raw, ufd, pname); | ||
| 438 | } | ||
| 439 | out: | 432 | out: |
| 440 | putname(pname); | 433 | putname(pname); |
| 441 | return ret; | 434 | return ret; |
| @@ -502,15 +495,8 @@ int bpf_obj_get_user(const char __user *pathname, int flags) | |||
| 502 | else | 495 | else |
| 503 | goto out; | 496 | goto out; |
| 504 | 497 | ||
| 505 | if (ret < 0) { | 498 | if (ret < 0) |
| 506 | bpf_any_put(raw, type); | 499 | bpf_any_put(raw, type); |
| 507 | } else if (trace_bpf_obj_get_prog_enabled() || | ||
| 508 | trace_bpf_obj_get_map_enabled()) { | ||
| 509 | if (type == BPF_TYPE_PROG) | ||
| 510 | trace_bpf_obj_get_prog(raw, ret, pname); | ||
| 511 | if (type == BPF_TYPE_MAP) | ||
| 512 | trace_bpf_obj_get_map(raw, ret, pname); | ||
| 513 | } | ||
| 514 | out: | 500 | out: |
| 515 | putname(pname); | 501 | putname(pname); |
| 516 | return ret; | 502 | return ret; |
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index c9401075b58c..ac747d5cf7c6 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2017 Netronome Systems, Inc. | 2 | * Copyright (C) 2017-2018 Netronome Systems, Inc. |
| 3 | * | 3 | * |
| 4 | * This software is licensed under the GNU General License Version 2, | 4 | * This software is licensed under the GNU General License Version 2, |
| 5 | * June 1991 as shown in the file COPYING in the top-level directory of this | 5 | * June 1991 as shown in the file COPYING in the top-level directory of this |
| @@ -474,8 +474,10 @@ bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map) | |||
| 474 | struct bpf_prog_offload *offload; | 474 | struct bpf_prog_offload *offload; |
| 475 | bool ret; | 475 | bool ret; |
| 476 | 476 | ||
| 477 | if (!bpf_prog_is_dev_bound(prog->aux) || !bpf_map_is_dev_bound(map)) | 477 | if (!bpf_prog_is_dev_bound(prog->aux)) |
| 478 | return false; | 478 | return false; |
| 479 | if (!bpf_map_is_dev_bound(map)) | ||
| 480 | return bpf_map_offload_neutral(map); | ||
| 479 | 481 | ||
| 480 | down_read(&bpf_devs_lock); | 482 | down_read(&bpf_devs_lock); |
| 481 | offload = prog->aux->offload; | 483 | offload = prog->aux->offload; |
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 57eeb1234b67..3ba102b41512 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c | |||
| @@ -262,16 +262,11 @@ out: | |||
| 262 | return ret; | 262 | return ret; |
| 263 | } | 263 | } |
| 264 | 264 | ||
| 265 | static void stack_map_get_build_id_offset(struct bpf_map *map, | 265 | static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, |
| 266 | struct stack_map_bucket *bucket, | ||
| 267 | u64 *ips, u32 trace_nr, bool user) | 266 | u64 *ips, u32 trace_nr, bool user) |
| 268 | { | 267 | { |
| 269 | int i; | 268 | int i; |
| 270 | struct vm_area_struct *vma; | 269 | struct vm_area_struct *vma; |
| 271 | struct bpf_stack_build_id *id_offs; | ||
| 272 | |||
| 273 | bucket->nr = trace_nr; | ||
| 274 | id_offs = (struct bpf_stack_build_id *)bucket->data; | ||
| 275 | 270 | ||
| 276 | /* | 271 | /* |
| 277 | * We cannot do up_read() in nmi context, so build_id lookup is | 272 | * We cannot do up_read() in nmi context, so build_id lookup is |
| @@ -361,8 +356,10 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, | |||
| 361 | pcpu_freelist_pop(&smap->freelist); | 356 | pcpu_freelist_pop(&smap->freelist); |
| 362 | if (unlikely(!new_bucket)) | 357 | if (unlikely(!new_bucket)) |
| 363 | return -ENOMEM; | 358 | return -ENOMEM; |
| 364 | stack_map_get_build_id_offset(map, new_bucket, ips, | 359 | new_bucket->nr = trace_nr; |
| 365 | trace_nr, user); | 360 | stack_map_get_build_id_offset( |
| 361 | (struct bpf_stack_build_id *)new_bucket->data, | ||
| 362 | ips, trace_nr, user); | ||
| 366 | trace_len = trace_nr * sizeof(struct bpf_stack_build_id); | 363 | trace_len = trace_nr * sizeof(struct bpf_stack_build_id); |
| 367 | if (hash_matches && bucket->nr == trace_nr && | 364 | if (hash_matches && bucket->nr == trace_nr && |
| 368 | memcmp(bucket->data, new_bucket->data, trace_len) == 0) { | 365 | memcmp(bucket->data, new_bucket->data, trace_len) == 0) { |
| @@ -405,6 +402,73 @@ const struct bpf_func_proto bpf_get_stackid_proto = { | |||
| 405 | .arg3_type = ARG_ANYTHING, | 402 | .arg3_type = ARG_ANYTHING, |
| 406 | }; | 403 | }; |
| 407 | 404 | ||
| 405 | BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size, | ||
| 406 | u64, flags) | ||
| 407 | { | ||
| 408 | u32 init_nr, trace_nr, copy_len, elem_size, num_elem; | ||
| 409 | bool user_build_id = flags & BPF_F_USER_BUILD_ID; | ||
| 410 | u32 skip = flags & BPF_F_SKIP_FIELD_MASK; | ||
| 411 | bool user = flags & BPF_F_USER_STACK; | ||
| 412 | struct perf_callchain_entry *trace; | ||
| 413 | bool kernel = !user; | ||
| 414 | int err = -EINVAL; | ||
| 415 | u64 *ips; | ||
| 416 | |||
| 417 | if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | | ||
| 418 | BPF_F_USER_BUILD_ID))) | ||
| 419 | goto clear; | ||
| 420 | if (kernel && user_build_id) | ||
| 421 | goto clear; | ||
| 422 | |||
| 423 | elem_size = (user && user_build_id) ? sizeof(struct bpf_stack_build_id) | ||
| 424 | : sizeof(u64); | ||
| 425 | if (unlikely(size % elem_size)) | ||
| 426 | goto clear; | ||
| 427 | |||
| 428 | num_elem = size / elem_size; | ||
| 429 | if (sysctl_perf_event_max_stack < num_elem) | ||
| 430 | init_nr = 0; | ||
| 431 | else | ||
| 432 | init_nr = sysctl_perf_event_max_stack - num_elem; | ||
| 433 | trace = get_perf_callchain(regs, init_nr, kernel, user, | ||
| 434 | sysctl_perf_event_max_stack, false, false); | ||
| 435 | if (unlikely(!trace)) | ||
| 436 | goto err_fault; | ||
| 437 | |||
| 438 | trace_nr = trace->nr - init_nr; | ||
| 439 | if (trace_nr < skip) | ||
| 440 | goto err_fault; | ||
| 441 | |||
| 442 | trace_nr -= skip; | ||
| 443 | trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem; | ||
| 444 | copy_len = trace_nr * elem_size; | ||
| 445 | ips = trace->ip + skip + init_nr; | ||
| 446 | if (user && user_build_id) | ||
| 447 | stack_map_get_build_id_offset(buf, ips, trace_nr, user); | ||
| 448 | else | ||
| 449 | memcpy(buf, ips, copy_len); | ||
| 450 | |||
| 451 | if (size > copy_len) | ||
| 452 | memset(buf + copy_len, 0, size - copy_len); | ||
| 453 | return copy_len; | ||
| 454 | |||
| 455 | err_fault: | ||
| 456 | err = -EFAULT; | ||
| 457 | clear: | ||
| 458 | memset(buf, 0, size); | ||
| 459 | return err; | ||
| 460 | } | ||
| 461 | |||
| 462 | const struct bpf_func_proto bpf_get_stack_proto = { | ||
| 463 | .func = bpf_get_stack, | ||
| 464 | .gpl_only = true, | ||
| 465 | .ret_type = RET_INTEGER, | ||
| 466 | .arg1_type = ARG_PTR_TO_CTX, | ||
| 467 | .arg2_type = ARG_PTR_TO_UNINIT_MEM, | ||
| 468 | .arg3_type = ARG_CONST_SIZE_OR_ZERO, | ||
| 469 | .arg4_type = ARG_ANYTHING, | ||
| 470 | }; | ||
| 471 | |||
| 408 | /* Called from eBPF program */ | 472 | /* Called from eBPF program */ |
| 409 | static void *stack_map_lookup_elem(struct bpf_map *map, void *key) | 473 | static void *stack_map_lookup_elem(struct bpf_map *map, void *key) |
| 410 | { | 474 | { |
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0bd2944eafb9..9b87198deea2 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c | |||
| @@ -282,6 +282,7 @@ void bpf_map_put(struct bpf_map *map) | |||
| 282 | { | 282 | { |
| 283 | __bpf_map_put(map, true); | 283 | __bpf_map_put(map, true); |
| 284 | } | 284 | } |
| 285 | EXPORT_SYMBOL_GPL(bpf_map_put); | ||
| 285 | 286 | ||
| 286 | void bpf_map_put_with_uref(struct bpf_map *map) | 287 | void bpf_map_put_with_uref(struct bpf_map *map) |
| 287 | { | 288 | { |
| @@ -503,7 +504,6 @@ static int map_create(union bpf_attr *attr) | |||
| 503 | return err; | 504 | return err; |
| 504 | } | 505 | } |
| 505 | 506 | ||
| 506 | trace_bpf_map_create(map, err); | ||
| 507 | return err; | 507 | return err; |
| 508 | 508 | ||
| 509 | free_map: | 509 | free_map: |
| @@ -544,6 +544,7 @@ struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) | |||
| 544 | atomic_inc(&map->usercnt); | 544 | atomic_inc(&map->usercnt); |
| 545 | return map; | 545 | return map; |
| 546 | } | 546 | } |
| 547 | EXPORT_SYMBOL_GPL(bpf_map_inc); | ||
| 547 | 548 | ||
| 548 | struct bpf_map *bpf_map_get_with_uref(u32 ufd) | 549 | struct bpf_map *bpf_map_get_with_uref(u32 ufd) |
| 549 | { | 550 | { |
| @@ -663,7 +664,6 @@ static int map_lookup_elem(union bpf_attr *attr) | |||
| 663 | if (copy_to_user(uvalue, value, value_size) != 0) | 664 | if (copy_to_user(uvalue, value, value_size) != 0) |
| 664 | goto free_value; | 665 | goto free_value; |
| 665 | 666 | ||
| 666 | trace_bpf_map_lookup_elem(map, ufd, key, value); | ||
| 667 | err = 0; | 667 | err = 0; |
| 668 | 668 | ||
| 669 | free_value: | 669 | free_value: |
| @@ -760,8 +760,6 @@ static int map_update_elem(union bpf_attr *attr) | |||
| 760 | __this_cpu_dec(bpf_prog_active); | 760 | __this_cpu_dec(bpf_prog_active); |
| 761 | preempt_enable(); | 761 | preempt_enable(); |
| 762 | out: | 762 | out: |
| 763 | if (!err) | ||
| 764 | trace_bpf_map_update_elem(map, ufd, key, value); | ||
| 765 | free_value: | 763 | free_value: |
| 766 | kfree(value); | 764 | kfree(value); |
| 767 | free_key: | 765 | free_key: |
| @@ -814,8 +812,6 @@ static int map_delete_elem(union bpf_attr *attr) | |||
| 814 | __this_cpu_dec(bpf_prog_active); | 812 | __this_cpu_dec(bpf_prog_active); |
| 815 | preempt_enable(); | 813 | preempt_enable(); |
| 816 | out: | 814 | out: |
| 817 | if (!err) | ||
| 818 | trace_bpf_map_delete_elem(map, ufd, key); | ||
| 819 | kfree(key); | 815 | kfree(key); |
| 820 | err_put: | 816 | err_put: |
| 821 | fdput(f); | 817 | fdput(f); |
| @@ -879,7 +875,6 @@ out: | |||
| 879 | if (copy_to_user(unext_key, next_key, map->key_size) != 0) | 875 | if (copy_to_user(unext_key, next_key, map->key_size) != 0) |
| 880 | goto free_next_key; | 876 | goto free_next_key; |
| 881 | 877 | ||
| 882 | trace_bpf_map_next_key(map, ufd, key, next_key); | ||
| 883 | err = 0; | 878 | err = 0; |
| 884 | 879 | ||
| 885 | free_next_key: | 880 | free_next_key: |
| @@ -1027,7 +1022,6 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) | |||
| 1027 | if (atomic_dec_and_test(&prog->aux->refcnt)) { | 1022 | if (atomic_dec_and_test(&prog->aux->refcnt)) { |
| 1028 | int i; | 1023 | int i; |
| 1029 | 1024 | ||
| 1030 | trace_bpf_prog_put_rcu(prog); | ||
| 1031 | /* bpf_prog_free_id() must be called first */ | 1025 | /* bpf_prog_free_id() must be called first */ |
| 1032 | bpf_prog_free_id(prog, do_idr_lock); | 1026 | bpf_prog_free_id(prog, do_idr_lock); |
| 1033 | 1027 | ||
| @@ -1194,11 +1188,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd) | |||
| 1194 | struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, | 1188 | struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, |
| 1195 | bool attach_drv) | 1189 | bool attach_drv) |
| 1196 | { | 1190 | { |
| 1197 | struct bpf_prog *prog = __bpf_prog_get(ufd, &type, attach_drv); | 1191 | return __bpf_prog_get(ufd, &type, attach_drv); |
| 1198 | |||
| 1199 | if (!IS_ERR(prog)) | ||
| 1200 | trace_bpf_prog_get_type(prog); | ||
| 1201 | return prog; | ||
| 1202 | } | 1192 | } |
| 1203 | EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); | 1193 | EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); |
| 1204 | 1194 | ||
| @@ -1373,7 +1363,6 @@ static int bpf_prog_load(union bpf_attr *attr) | |||
| 1373 | } | 1363 | } |
| 1374 | 1364 | ||
| 1375 | bpf_prog_kallsyms_add(prog); | 1365 | bpf_prog_kallsyms_add(prog); |
| 1376 | trace_bpf_prog_load(prog, err); | ||
| 1377 | return err; | 1366 | return err; |
| 1378 | 1367 | ||
| 1379 | free_used_maps: | 1368 | free_used_maps: |
diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c index 1f4bf68c12db..938d41211be7 100644 --- a/kernel/bpf/tnum.c +++ b/kernel/bpf/tnum.c | |||
| @@ -43,6 +43,16 @@ struct tnum tnum_rshift(struct tnum a, u8 shift) | |||
| 43 | return TNUM(a.value >> shift, a.mask >> shift); | 43 | return TNUM(a.value >> shift, a.mask >> shift); |
| 44 | } | 44 | } |
| 45 | 45 | ||
| 46 | struct tnum tnum_arshift(struct tnum a, u8 min_shift) | ||
| 47 | { | ||
| 48 | /* if a.value is negative, arithmetic shifting by minimum shift | ||
| 49 | * will have larger negative offset compared to more shifting. | ||
| 50 | * If a.value is nonnegative, arithmetic shifting by minimum shift | ||
| 51 | * will have larger positive offset compare to more shifting. | ||
| 52 | */ | ||
| 53 | return TNUM((s64)a.value >> min_shift, (s64)a.mask >> min_shift); | ||
| 54 | } | ||
| 55 | |||
| 46 | struct tnum tnum_add(struct tnum a, struct tnum b) | 56 | struct tnum tnum_add(struct tnum a, struct tnum b) |
| 47 | { | 57 | { |
| 48 | u64 sm, sv, sigma, chi, mu; | 58 | u64 sm, sv, sigma, chi, mu; |
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index eb1a596aebd3..d5e1a6c4165d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | #include <linux/stringify.h> | 22 | #include <linux/stringify.h> |
| 23 | #include <linux/bsearch.h> | 23 | #include <linux/bsearch.h> |
| 24 | #include <linux/sort.h> | 24 | #include <linux/sort.h> |
| 25 | #include <linux/perf_event.h> | ||
| 25 | 26 | ||
| 26 | #include "disasm.h" | 27 | #include "disasm.h" |
| 27 | 28 | ||
| @@ -164,6 +165,8 @@ struct bpf_call_arg_meta { | |||
| 164 | bool pkt_access; | 165 | bool pkt_access; |
| 165 | int regno; | 166 | int regno; |
| 166 | int access_size; | 167 | int access_size; |
| 168 | s64 msize_smax_value; | ||
| 169 | u64 msize_umax_value; | ||
| 167 | }; | 170 | }; |
| 168 | 171 | ||
| 169 | static DEFINE_MUTEX(bpf_verifier_lock); | 172 | static DEFINE_MUTEX(bpf_verifier_lock); |
| @@ -738,18 +741,19 @@ enum reg_arg_type { | |||
| 738 | 741 | ||
| 739 | static int cmp_subprogs(const void *a, const void *b) | 742 | static int cmp_subprogs(const void *a, const void *b) |
| 740 | { | 743 | { |
| 741 | return *(int *)a - *(int *)b; | 744 | return ((struct bpf_subprog_info *)a)->start - |
| 745 | ((struct bpf_subprog_info *)b)->start; | ||
| 742 | } | 746 | } |
| 743 | 747 | ||
| 744 | static int find_subprog(struct bpf_verifier_env *env, int off) | 748 | static int find_subprog(struct bpf_verifier_env *env, int off) |
| 745 | { | 749 | { |
| 746 | u32 *p; | 750 | struct bpf_subprog_info *p; |
| 747 | 751 | ||
| 748 | p = bsearch(&off, env->subprog_starts, env->subprog_cnt, | 752 | p = bsearch(&off, env->subprog_info, env->subprog_cnt, |
| 749 | sizeof(env->subprog_starts[0]), cmp_subprogs); | 753 | sizeof(env->subprog_info[0]), cmp_subprogs); |
| 750 | if (!p) | 754 | if (!p) |
| 751 | return -ENOENT; | 755 | return -ENOENT; |
| 752 | return p - env->subprog_starts; | 756 | return p - env->subprog_info; |
| 753 | 757 | ||
| 754 | } | 758 | } |
| 755 | 759 | ||
| @@ -769,18 +773,24 @@ static int add_subprog(struct bpf_verifier_env *env, int off) | |||
| 769 | verbose(env, "too many subprograms\n"); | 773 | verbose(env, "too many subprograms\n"); |
| 770 | return -E2BIG; | 774 | return -E2BIG; |
| 771 | } | 775 | } |
| 772 | env->subprog_starts[env->subprog_cnt++] = off; | 776 | env->subprog_info[env->subprog_cnt++].start = off; |
| 773 | sort(env->subprog_starts, env->subprog_cnt, | 777 | sort(env->subprog_info, env->subprog_cnt, |
| 774 | sizeof(env->subprog_starts[0]), cmp_subprogs, NULL); | 778 | sizeof(env->subprog_info[0]), cmp_subprogs, NULL); |
| 775 | return 0; | 779 | return 0; |
| 776 | } | 780 | } |
| 777 | 781 | ||
| 778 | static int check_subprogs(struct bpf_verifier_env *env) | 782 | static int check_subprogs(struct bpf_verifier_env *env) |
| 779 | { | 783 | { |
| 780 | int i, ret, subprog_start, subprog_end, off, cur_subprog = 0; | 784 | int i, ret, subprog_start, subprog_end, off, cur_subprog = 0; |
| 785 | struct bpf_subprog_info *subprog = env->subprog_info; | ||
| 781 | struct bpf_insn *insn = env->prog->insnsi; | 786 | struct bpf_insn *insn = env->prog->insnsi; |
| 782 | int insn_cnt = env->prog->len; | 787 | int insn_cnt = env->prog->len; |
| 783 | 788 | ||
| 789 | /* Add entry function. */ | ||
| 790 | ret = add_subprog(env, 0); | ||
| 791 | if (ret < 0) | ||
| 792 | return ret; | ||
| 793 | |||
| 784 | /* determine subprog starts. The end is one before the next starts */ | 794 | /* determine subprog starts. The end is one before the next starts */ |
| 785 | for (i = 0; i < insn_cnt; i++) { | 795 | for (i = 0; i < insn_cnt; i++) { |
| 786 | if (insn[i].code != (BPF_JMP | BPF_CALL)) | 796 | if (insn[i].code != (BPF_JMP | BPF_CALL)) |
| @@ -800,16 +810,18 @@ static int check_subprogs(struct bpf_verifier_env *env) | |||
| 800 | return ret; | 810 | return ret; |
| 801 | } | 811 | } |
| 802 | 812 | ||
| 813 | /* Add a fake 'exit' subprog which could simplify subprog iteration | ||
| 814 | * logic. 'subprog_cnt' should not be increased. | ||
| 815 | */ | ||
| 816 | subprog[env->subprog_cnt].start = insn_cnt; | ||
| 817 | |||
| 803 | if (env->log.level > 1) | 818 | if (env->log.level > 1) |
| 804 | for (i = 0; i < env->subprog_cnt; i++) | 819 | for (i = 0; i < env->subprog_cnt; i++) |
| 805 | verbose(env, "func#%d @%d\n", i, env->subprog_starts[i]); | 820 | verbose(env, "func#%d @%d\n", i, subprog[i].start); |
| 806 | 821 | ||
| 807 | /* now check that all jumps are within the same subprog */ | 822 | /* now check that all jumps are within the same subprog */ |
| 808 | subprog_start = 0; | 823 | subprog_start = subprog[cur_subprog].start; |
| 809 | if (env->subprog_cnt == cur_subprog) | 824 | subprog_end = subprog[cur_subprog + 1].start; |
| 810 | subprog_end = insn_cnt; | ||
| 811 | else | ||
| 812 | subprog_end = env->subprog_starts[cur_subprog++]; | ||
| 813 | for (i = 0; i < insn_cnt; i++) { | 825 | for (i = 0; i < insn_cnt; i++) { |
| 814 | u8 code = insn[i].code; | 826 | u8 code = insn[i].code; |
| 815 | 827 | ||
| @@ -834,10 +846,9 @@ next: | |||
| 834 | return -EINVAL; | 846 | return -EINVAL; |
| 835 | } | 847 | } |
| 836 | subprog_start = subprog_end; | 848 | subprog_start = subprog_end; |
| 837 | if (env->subprog_cnt == cur_subprog) | 849 | cur_subprog++; |
| 838 | subprog_end = insn_cnt; | 850 | if (cur_subprog < env->subprog_cnt) |
| 839 | else | 851 | subprog_end = subprog[cur_subprog + 1].start; |
| 840 | subprog_end = env->subprog_starts[cur_subprog++]; | ||
| 841 | } | 852 | } |
| 842 | } | 853 | } |
| 843 | return 0; | 854 | return 0; |
| @@ -1470,13 +1481,13 @@ static int update_stack_depth(struct bpf_verifier_env *env, | |||
| 1470 | const struct bpf_func_state *func, | 1481 | const struct bpf_func_state *func, |
| 1471 | int off) | 1482 | int off) |
| 1472 | { | 1483 | { |
| 1473 | u16 stack = env->subprog_stack_depth[func->subprogno]; | 1484 | u16 stack = env->subprog_info[func->subprogno].stack_depth; |
| 1474 | 1485 | ||
| 1475 | if (stack >= -off) | 1486 | if (stack >= -off) |
| 1476 | return 0; | 1487 | return 0; |
| 1477 | 1488 | ||
| 1478 | /* update known max for given subprogram */ | 1489 | /* update known max for given subprogram */ |
| 1479 | env->subprog_stack_depth[func->subprogno] = -off; | 1490 | env->subprog_info[func->subprogno].stack_depth = -off; |
| 1480 | return 0; | 1491 | return 0; |
| 1481 | } | 1492 | } |
| 1482 | 1493 | ||
| @@ -1488,9 +1499,9 @@ static int update_stack_depth(struct bpf_verifier_env *env, | |||
| 1488 | */ | 1499 | */ |
| 1489 | static int check_max_stack_depth(struct bpf_verifier_env *env) | 1500 | static int check_max_stack_depth(struct bpf_verifier_env *env) |
| 1490 | { | 1501 | { |
| 1491 | int depth = 0, frame = 0, subprog = 0, i = 0, subprog_end; | 1502 | int depth = 0, frame = 0, idx = 0, i = 0, subprog_end; |
| 1503 | struct bpf_subprog_info *subprog = env->subprog_info; | ||
| 1492 | struct bpf_insn *insn = env->prog->insnsi; | 1504 | struct bpf_insn *insn = env->prog->insnsi; |
| 1493 | int insn_cnt = env->prog->len; | ||
| 1494 | int ret_insn[MAX_CALL_FRAMES]; | 1505 | int ret_insn[MAX_CALL_FRAMES]; |
| 1495 | int ret_prog[MAX_CALL_FRAMES]; | 1506 | int ret_prog[MAX_CALL_FRAMES]; |
| 1496 | 1507 | ||
| @@ -1498,17 +1509,14 @@ process_func: | |||
| 1498 | /* round up to 32-bytes, since this is granularity | 1509 | /* round up to 32-bytes, since this is granularity |
| 1499 | * of interpreter stack size | 1510 | * of interpreter stack size |
| 1500 | */ | 1511 | */ |
| 1501 | depth += round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32); | 1512 | depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32); |
| 1502 | if (depth > MAX_BPF_STACK) { | 1513 | if (depth > MAX_BPF_STACK) { |
| 1503 | verbose(env, "combined stack size of %d calls is %d. Too large\n", | 1514 | verbose(env, "combined stack size of %d calls is %d. Too large\n", |
| 1504 | frame + 1, depth); | 1515 | frame + 1, depth); |
| 1505 | return -EACCES; | 1516 | return -EACCES; |
| 1506 | } | 1517 | } |
| 1507 | continue_func: | 1518 | continue_func: |
| 1508 | if (env->subprog_cnt == subprog) | 1519 | subprog_end = subprog[idx + 1].start; |
| 1509 | subprog_end = insn_cnt; | ||
| 1510 | else | ||
| 1511 | subprog_end = env->subprog_starts[subprog]; | ||
| 1512 | for (; i < subprog_end; i++) { | 1520 | for (; i < subprog_end; i++) { |
| 1513 | if (insn[i].code != (BPF_JMP | BPF_CALL)) | 1521 | if (insn[i].code != (BPF_JMP | BPF_CALL)) |
| 1514 | continue; | 1522 | continue; |
| @@ -1516,17 +1524,16 @@ continue_func: | |||
| 1516 | continue; | 1524 | continue; |
| 1517 | /* remember insn and function to return to */ | 1525 | /* remember insn and function to return to */ |
| 1518 | ret_insn[frame] = i + 1; | 1526 | ret_insn[frame] = i + 1; |
| 1519 | ret_prog[frame] = subprog; | 1527 | ret_prog[frame] = idx; |
| 1520 | 1528 | ||
| 1521 | /* find the callee */ | 1529 | /* find the callee */ |
| 1522 | i = i + insn[i].imm + 1; | 1530 | i = i + insn[i].imm + 1; |
| 1523 | subprog = find_subprog(env, i); | 1531 | idx = find_subprog(env, i); |
| 1524 | if (subprog < 0) { | 1532 | if (idx < 0) { |
| 1525 | WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", | 1533 | WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", |
| 1526 | i); | 1534 | i); |
| 1527 | return -EFAULT; | 1535 | return -EFAULT; |
| 1528 | } | 1536 | } |
| 1529 | subprog++; | ||
| 1530 | frame++; | 1537 | frame++; |
| 1531 | if (frame >= MAX_CALL_FRAMES) { | 1538 | if (frame >= MAX_CALL_FRAMES) { |
| 1532 | WARN_ONCE(1, "verifier bug. Call stack is too deep\n"); | 1539 | WARN_ONCE(1, "verifier bug. Call stack is too deep\n"); |
| @@ -1539,10 +1546,10 @@ continue_func: | |||
| 1539 | */ | 1546 | */ |
| 1540 | if (frame == 0) | 1547 | if (frame == 0) |
| 1541 | return 0; | 1548 | return 0; |
| 1542 | depth -= round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32); | 1549 | depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32); |
| 1543 | frame--; | 1550 | frame--; |
| 1544 | i = ret_insn[frame]; | 1551 | i = ret_insn[frame]; |
| 1545 | subprog = ret_prog[frame]; | 1552 | idx = ret_prog[frame]; |
| 1546 | goto continue_func; | 1553 | goto continue_func; |
| 1547 | } | 1554 | } |
| 1548 | 1555 | ||
| @@ -1558,8 +1565,7 @@ static int get_callee_stack_depth(struct bpf_verifier_env *env, | |||
| 1558 | start); | 1565 | start); |
| 1559 | return -EFAULT; | 1566 | return -EFAULT; |
| 1560 | } | 1567 | } |
| 1561 | subprog++; | 1568 | return env->subprog_info[subprog].stack_depth; |
| 1562 | return env->subprog_stack_depth[subprog]; | ||
| 1563 | } | 1569 | } |
| 1564 | #endif | 1570 | #endif |
| 1565 | 1571 | ||
| @@ -1984,6 +1990,12 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, | |||
| 1984 | } else if (arg_type_is_mem_size(arg_type)) { | 1990 | } else if (arg_type_is_mem_size(arg_type)) { |
| 1985 | bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); | 1991 | bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); |
| 1986 | 1992 | ||
| 1993 | /* remember the mem_size which may be used later | ||
| 1994 | * to refine return values. | ||
| 1995 | */ | ||
| 1996 | meta->msize_smax_value = reg->smax_value; | ||
| 1997 | meta->msize_umax_value = reg->umax_value; | ||
| 1998 | |||
| 1987 | /* The register is SCALAR_VALUE; the access check | 1999 | /* The register is SCALAR_VALUE; the access check |
| 1988 | * happens using its boundaries. | 2000 | * happens using its boundaries. |
| 1989 | */ | 2001 | */ |
| @@ -2061,8 +2073,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, | |||
| 2061 | if (func_id != BPF_FUNC_redirect_map) | 2073 | if (func_id != BPF_FUNC_redirect_map) |
| 2062 | goto error; | 2074 | goto error; |
| 2063 | break; | 2075 | break; |
| 2064 | /* Restrict bpf side of cpumap, open when use-cases appear */ | 2076 | /* Restrict bpf side of cpumap and xskmap, open when use-cases |
| 2077 | * appear. | ||
| 2078 | */ | ||
| 2065 | case BPF_MAP_TYPE_CPUMAP: | 2079 | case BPF_MAP_TYPE_CPUMAP: |
| 2080 | case BPF_MAP_TYPE_XSKMAP: | ||
| 2066 | if (func_id != BPF_FUNC_redirect_map) | 2081 | if (func_id != BPF_FUNC_redirect_map) |
| 2067 | goto error; | 2082 | goto error; |
| 2068 | break; | 2083 | break; |
| @@ -2087,7 +2102,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, | |||
| 2087 | case BPF_FUNC_tail_call: | 2102 | case BPF_FUNC_tail_call: |
| 2088 | if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) | 2103 | if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) |
| 2089 | goto error; | 2104 | goto error; |
| 2090 | if (env->subprog_cnt) { | 2105 | if (env->subprog_cnt > 1) { |
| 2091 | verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n"); | 2106 | verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n"); |
| 2092 | return -EINVAL; | 2107 | return -EINVAL; |
| 2093 | } | 2108 | } |
| @@ -2109,7 +2124,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, | |||
| 2109 | break; | 2124 | break; |
| 2110 | case BPF_FUNC_redirect_map: | 2125 | case BPF_FUNC_redirect_map: |
| 2111 | if (map->map_type != BPF_MAP_TYPE_DEVMAP && | 2126 | if (map->map_type != BPF_MAP_TYPE_DEVMAP && |
| 2112 | map->map_type != BPF_MAP_TYPE_CPUMAP) | 2127 | map->map_type != BPF_MAP_TYPE_CPUMAP && |
| 2128 | map->map_type != BPF_MAP_TYPE_XSKMAP) | ||
| 2113 | goto error; | 2129 | goto error; |
| 2114 | break; | 2130 | break; |
| 2115 | case BPF_FUNC_sk_redirect_map: | 2131 | case BPF_FUNC_sk_redirect_map: |
| @@ -2259,7 +2275,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, | |||
| 2259 | /* remember the callsite, it will be used by bpf_exit */ | 2275 | /* remember the callsite, it will be used by bpf_exit */ |
| 2260 | *insn_idx /* callsite */, | 2276 | *insn_idx /* callsite */, |
| 2261 | state->curframe + 1 /* frameno within this callchain */, | 2277 | state->curframe + 1 /* frameno within this callchain */, |
| 2262 | subprog + 1 /* subprog number within this prog */); | 2278 | subprog /* subprog number within this prog */); |
| 2263 | 2279 | ||
| 2264 | /* copy r1 - r5 args that callee can access */ | 2280 | /* copy r1 - r5 args that callee can access */ |
| 2265 | for (i = BPF_REG_1; i <= BPF_REG_5; i++) | 2281 | for (i = BPF_REG_1; i <= BPF_REG_5; i++) |
| @@ -2323,6 +2339,23 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) | |||
| 2323 | return 0; | 2339 | return 0; |
| 2324 | } | 2340 | } |
| 2325 | 2341 | ||
| 2342 | static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type, | ||
| 2343 | int func_id, | ||
| 2344 | struct bpf_call_arg_meta *meta) | ||
| 2345 | { | ||
| 2346 | struct bpf_reg_state *ret_reg = ®s[BPF_REG_0]; | ||
| 2347 | |||
| 2348 | if (ret_type != RET_INTEGER || | ||
| 2349 | (func_id != BPF_FUNC_get_stack && | ||
| 2350 | func_id != BPF_FUNC_probe_read_str)) | ||
| 2351 | return; | ||
| 2352 | |||
| 2353 | ret_reg->smax_value = meta->msize_smax_value; | ||
| 2354 | ret_reg->umax_value = meta->msize_umax_value; | ||
| 2355 | __reg_deduce_bounds(ret_reg); | ||
| 2356 | __reg_bound_offset(ret_reg); | ||
| 2357 | } | ||
| 2358 | |||
| 2326 | static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx) | 2359 | static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx) |
| 2327 | { | 2360 | { |
| 2328 | const struct bpf_func_proto *fn = NULL; | 2361 | const struct bpf_func_proto *fn = NULL; |
| @@ -2446,10 +2479,30 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn | |||
| 2446 | return -EINVAL; | 2479 | return -EINVAL; |
| 2447 | } | 2480 | } |
| 2448 | 2481 | ||
| 2482 | do_refine_retval_range(regs, fn->ret_type, func_id, &meta); | ||
| 2483 | |||
| 2449 | err = check_map_func_compatibility(env, meta.map_ptr, func_id); | 2484 | err = check_map_func_compatibility(env, meta.map_ptr, func_id); |
| 2450 | if (err) | 2485 | if (err) |
| 2451 | return err; | 2486 | return err; |
| 2452 | 2487 | ||
| 2488 | if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) { | ||
| 2489 | const char *err_str; | ||
| 2490 | |||
| 2491 | #ifdef CONFIG_PERF_EVENTS | ||
| 2492 | err = get_callchain_buffers(sysctl_perf_event_max_stack); | ||
| 2493 | err_str = "cannot get callchain buffer for func %s#%d\n"; | ||
| 2494 | #else | ||
| 2495 | err = -ENOTSUPP; | ||
| 2496 | err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n"; | ||
| 2497 | #endif | ||
| 2498 | if (err) { | ||
| 2499 | verbose(env, err_str, func_id_name(func_id), func_id); | ||
| 2500 | return err; | ||
| 2501 | } | ||
| 2502 | |||
| 2503 | env->prog->has_callchain_buf = true; | ||
| 2504 | } | ||
| 2505 | |||
| 2453 | if (changes_data) | 2506 | if (changes_data) |
| 2454 | clear_all_pkt_pointers(env); | 2507 | clear_all_pkt_pointers(env); |
| 2455 | return 0; | 2508 | return 0; |
| @@ -2894,10 +2947,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, | |||
| 2894 | dst_reg->umin_value <<= umin_val; | 2947 | dst_reg->umin_value <<= umin_val; |
| 2895 | dst_reg->umax_value <<= umax_val; | 2948 | dst_reg->umax_value <<= umax_val; |
| 2896 | } | 2949 | } |
| 2897 | if (src_known) | 2950 | dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val); |
| 2898 | dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val); | ||
| 2899 | else | ||
| 2900 | dst_reg->var_off = tnum_lshift(tnum_unknown, umin_val); | ||
| 2901 | /* We may learn something more from the var_off */ | 2951 | /* We may learn something more from the var_off */ |
| 2902 | __update_reg_bounds(dst_reg); | 2952 | __update_reg_bounds(dst_reg); |
| 2903 | break; | 2953 | break; |
| @@ -2925,16 +2975,35 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, | |||
| 2925 | */ | 2975 | */ |
| 2926 | dst_reg->smin_value = S64_MIN; | 2976 | dst_reg->smin_value = S64_MIN; |
| 2927 | dst_reg->smax_value = S64_MAX; | 2977 | dst_reg->smax_value = S64_MAX; |
| 2928 | if (src_known) | 2978 | dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val); |
| 2929 | dst_reg->var_off = tnum_rshift(dst_reg->var_off, | ||
| 2930 | umin_val); | ||
| 2931 | else | ||
| 2932 | dst_reg->var_off = tnum_rshift(tnum_unknown, umin_val); | ||
| 2933 | dst_reg->umin_value >>= umax_val; | 2979 | dst_reg->umin_value >>= umax_val; |
| 2934 | dst_reg->umax_value >>= umin_val; | 2980 | dst_reg->umax_value >>= umin_val; |
| 2935 | /* We may learn something more from the var_off */ | 2981 | /* We may learn something more from the var_off */ |
| 2936 | __update_reg_bounds(dst_reg); | 2982 | __update_reg_bounds(dst_reg); |
| 2937 | break; | 2983 | break; |
| 2984 | case BPF_ARSH: | ||
| 2985 | if (umax_val >= insn_bitness) { | ||
| 2986 | /* Shifts greater than 31 or 63 are undefined. | ||
| 2987 | * This includes shifts by a negative number. | ||
| 2988 | */ | ||
| 2989 | mark_reg_unknown(env, regs, insn->dst_reg); | ||
| 2990 | break; | ||
| 2991 | } | ||
| 2992 | |||
| 2993 | /* Upon reaching here, src_known is true and | ||
| 2994 | * umax_val is equal to umin_val. | ||
| 2995 | */ | ||
| 2996 | dst_reg->smin_value >>= umin_val; | ||
| 2997 | dst_reg->smax_value >>= umin_val; | ||
| 2998 | dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val); | ||
| 2999 | |||
| 3000 | /* blow away the dst_reg umin_value/umax_value and rely on | ||
| 3001 | * dst_reg var_off to refine the result. | ||
| 3002 | */ | ||
| 3003 | dst_reg->umin_value = 0; | ||
| 3004 | dst_reg->umax_value = U64_MAX; | ||
| 3005 | __update_reg_bounds(dst_reg); | ||
| 3006 | break; | ||
| 2938 | default: | 3007 | default: |
| 2939 | mark_reg_unknown(env, regs, insn->dst_reg); | 3008 | mark_reg_unknown(env, regs, insn->dst_reg); |
| 2940 | break; | 3009 | break; |
| @@ -3818,7 +3887,12 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) | |||
| 3818 | return -EINVAL; | 3887 | return -EINVAL; |
| 3819 | } | 3888 | } |
| 3820 | 3889 | ||
| 3821 | if (env->subprog_cnt) { | 3890 | if (!env->ops->gen_ld_abs) { |
| 3891 | verbose(env, "bpf verifier is misconfigured\n"); | ||
| 3892 | return -EINVAL; | ||
| 3893 | } | ||
| 3894 | |||
| 3895 | if (env->subprog_cnt > 1) { | ||
| 3822 | /* when program has LD_ABS insn JITs and interpreter assume | 3896 | /* when program has LD_ABS insn JITs and interpreter assume |
| 3823 | * that r1 == ctx == skb which is not the case for callees | 3897 | * that r1 == ctx == skb which is not the case for callees |
| 3824 | * that can have arbitrary arguments. It's problematic | 3898 | * that can have arbitrary arguments. It's problematic |
| @@ -4849,15 +4923,15 @@ process_bpf_exit: | |||
| 4849 | 4923 | ||
| 4850 | verbose(env, "processed %d insns (limit %d), stack depth ", | 4924 | verbose(env, "processed %d insns (limit %d), stack depth ", |
| 4851 | insn_processed, BPF_COMPLEXITY_LIMIT_INSNS); | 4925 | insn_processed, BPF_COMPLEXITY_LIMIT_INSNS); |
| 4852 | for (i = 0; i < env->subprog_cnt + 1; i++) { | 4926 | for (i = 0; i < env->subprog_cnt; i++) { |
| 4853 | u32 depth = env->subprog_stack_depth[i]; | 4927 | u32 depth = env->subprog_info[i].stack_depth; |
| 4854 | 4928 | ||
| 4855 | verbose(env, "%d", depth); | 4929 | verbose(env, "%d", depth); |
| 4856 | if (i + 1 < env->subprog_cnt + 1) | 4930 | if (i + 1 < env->subprog_cnt) |
| 4857 | verbose(env, "+"); | 4931 | verbose(env, "+"); |
| 4858 | } | 4932 | } |
| 4859 | verbose(env, "\n"); | 4933 | verbose(env, "\n"); |
| 4860 | env->prog->aux->stack_depth = env->subprog_stack_depth[0]; | 4934 | env->prog->aux->stack_depth = env->subprog_info[0].stack_depth; |
| 4861 | return 0; | 4935 | return 0; |
| 4862 | } | 4936 | } |
| 4863 | 4937 | ||
| @@ -4981,7 +5055,7 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) | |||
| 4981 | /* hold the map. If the program is rejected by verifier, | 5055 | /* hold the map. If the program is rejected by verifier, |
| 4982 | * the map will be released by release_maps() or it | 5056 | * the map will be released by release_maps() or it |
| 4983 | * will be used by the valid program until it's unloaded | 5057 | * will be used by the valid program until it's unloaded |
| 4984 | * and all maps are released in free_bpf_prog_info() | 5058 | * and all maps are released in free_used_maps() |
| 4985 | */ | 5059 | */ |
| 4986 | map = bpf_map_inc(map, false); | 5060 | map = bpf_map_inc(map, false); |
| 4987 | if (IS_ERR(map)) { | 5061 | if (IS_ERR(map)) { |
| @@ -5063,10 +5137,11 @@ static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len | |||
| 5063 | 5137 | ||
| 5064 | if (len == 1) | 5138 | if (len == 1) |
| 5065 | return; | 5139 | return; |
| 5066 | for (i = 0; i < env->subprog_cnt; i++) { | 5140 | /* NOTE: fake 'exit' subprog should be updated as well. */ |
| 5067 | if (env->subprog_starts[i] < off) | 5141 | for (i = 0; i <= env->subprog_cnt; i++) { |
| 5142 | if (env->subprog_info[i].start < off) | ||
| 5068 | continue; | 5143 | continue; |
| 5069 | env->subprog_starts[i] += len - 1; | 5144 | env->subprog_info[i].start += len - 1; |
| 5070 | } | 5145 | } |
| 5071 | } | 5146 | } |
| 5072 | 5147 | ||
| @@ -5230,7 +5305,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) | |||
| 5230 | void *old_bpf_func; | 5305 | void *old_bpf_func; |
| 5231 | int err = -ENOMEM; | 5306 | int err = -ENOMEM; |
| 5232 | 5307 | ||
| 5233 | if (env->subprog_cnt == 0) | 5308 | if (env->subprog_cnt <= 1) |
| 5234 | return 0; | 5309 | return 0; |
| 5235 | 5310 | ||
| 5236 | for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { | 5311 | for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { |
| @@ -5246,7 +5321,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) | |||
| 5246 | /* temporarily remember subprog id inside insn instead of | 5321 | /* temporarily remember subprog id inside insn instead of |
| 5247 | * aux_data, since next loop will split up all insns into funcs | 5322 | * aux_data, since next loop will split up all insns into funcs |
| 5248 | */ | 5323 | */ |
| 5249 | insn->off = subprog + 1; | 5324 | insn->off = subprog; |
| 5250 | /* remember original imm in case JIT fails and fallback | 5325 | /* remember original imm in case JIT fails and fallback |
| 5251 | * to interpreter will be needed | 5326 | * to interpreter will be needed |
| 5252 | */ | 5327 | */ |
| @@ -5255,16 +5330,13 @@ static int jit_subprogs(struct bpf_verifier_env *env) | |||
| 5255 | insn->imm = 1; | 5330 | insn->imm = 1; |
| 5256 | } | 5331 | } |
| 5257 | 5332 | ||
| 5258 | func = kzalloc(sizeof(prog) * (env->subprog_cnt + 1), GFP_KERNEL); | 5333 | func = kzalloc(sizeof(prog) * env->subprog_cnt, GFP_KERNEL); |
| 5259 | if (!func) | 5334 | if (!func) |
| 5260 | return -ENOMEM; | 5335 | return -ENOMEM; |
| 5261 | 5336 | ||
| 5262 | for (i = 0; i <= env->subprog_cnt; i++) { | 5337 | for (i = 0; i < env->subprog_cnt; i++) { |
| 5263 | subprog_start = subprog_end; | 5338 | subprog_start = subprog_end; |
| 5264 | if (env->subprog_cnt == i) | 5339 | subprog_end = env->subprog_info[i + 1].start; |
| 5265 | subprog_end = prog->len; | ||
| 5266 | else | ||
| 5267 | subprog_end = env->subprog_starts[i]; | ||
| 5268 | 5340 | ||
| 5269 | len = subprog_end - subprog_start; | 5341 | len = subprog_end - subprog_start; |
| 5270 | func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER); | 5342 | func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER); |
| @@ -5281,7 +5353,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) | |||
| 5281 | * Long term would need debug info to populate names | 5353 | * Long term would need debug info to populate names |
| 5282 | */ | 5354 | */ |
| 5283 | func[i]->aux->name[0] = 'F'; | 5355 | func[i]->aux->name[0] = 'F'; |
| 5284 | func[i]->aux->stack_depth = env->subprog_stack_depth[i]; | 5356 | func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; |
| 5285 | func[i]->jit_requested = 1; | 5357 | func[i]->jit_requested = 1; |
| 5286 | func[i] = bpf_int_jit_compile(func[i]); | 5358 | func[i] = bpf_int_jit_compile(func[i]); |
| 5287 | if (!func[i]->jited) { | 5359 | if (!func[i]->jited) { |
| @@ -5294,7 +5366,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) | |||
| 5294 | * now populate all bpf_calls with correct addresses and | 5366 | * now populate all bpf_calls with correct addresses and |
| 5295 | * run last pass of JIT | 5367 | * run last pass of JIT |
| 5296 | */ | 5368 | */ |
| 5297 | for (i = 0; i <= env->subprog_cnt; i++) { | 5369 | for (i = 0; i < env->subprog_cnt; i++) { |
| 5298 | insn = func[i]->insnsi; | 5370 | insn = func[i]->insnsi; |
| 5299 | for (j = 0; j < func[i]->len; j++, insn++) { | 5371 | for (j = 0; j < func[i]->len; j++, insn++) { |
| 5300 | if (insn->code != (BPF_JMP | BPF_CALL) || | 5372 | if (insn->code != (BPF_JMP | BPF_CALL) || |
| @@ -5307,7 +5379,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) | |||
| 5307 | __bpf_call_base; | 5379 | __bpf_call_base; |
| 5308 | } | 5380 | } |
| 5309 | } | 5381 | } |
| 5310 | for (i = 0; i <= env->subprog_cnt; i++) { | 5382 | for (i = 0; i < env->subprog_cnt; i++) { |
| 5311 | old_bpf_func = func[i]->bpf_func; | 5383 | old_bpf_func = func[i]->bpf_func; |
| 5312 | tmp = bpf_int_jit_compile(func[i]); | 5384 | tmp = bpf_int_jit_compile(func[i]); |
| 5313 | if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) { | 5385 | if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) { |
| @@ -5321,7 +5393,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) | |||
| 5321 | /* finally lock prog and jit images for all functions and | 5393 | /* finally lock prog and jit images for all functions and |
| 5322 | * populate kallsysm | 5394 | * populate kallsysm |
| 5323 | */ | 5395 | */ |
| 5324 | for (i = 0; i <= env->subprog_cnt; i++) { | 5396 | for (i = 0; i < env->subprog_cnt; i++) { |
| 5325 | bpf_prog_lock_ro(func[i]); | 5397 | bpf_prog_lock_ro(func[i]); |
| 5326 | bpf_prog_kallsyms_add(func[i]); | 5398 | bpf_prog_kallsyms_add(func[i]); |
| 5327 | } | 5399 | } |
| @@ -5338,7 +5410,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) | |||
| 5338 | continue; | 5410 | continue; |
| 5339 | insn->off = env->insn_aux_data[i].call_imm; | 5411 | insn->off = env->insn_aux_data[i].call_imm; |
| 5340 | subprog = find_subprog(env, i + insn->off + 1); | 5412 | subprog = find_subprog(env, i + insn->off + 1); |
| 5341 | addr = (unsigned long)func[subprog + 1]->bpf_func; | 5413 | addr = (unsigned long)func[subprog]->bpf_func; |
| 5342 | addr &= PAGE_MASK; | 5414 | addr &= PAGE_MASK; |
| 5343 | insn->imm = (u64 (*)(u64, u64, u64, u64, u64)) | 5415 | insn->imm = (u64 (*)(u64, u64, u64, u64, u64)) |
| 5344 | addr - __bpf_call_base; | 5416 | addr - __bpf_call_base; |
| @@ -5347,10 +5419,10 @@ static int jit_subprogs(struct bpf_verifier_env *env) | |||
| 5347 | prog->jited = 1; | 5419 | prog->jited = 1; |
| 5348 | prog->bpf_func = func[0]->bpf_func; | 5420 | prog->bpf_func = func[0]->bpf_func; |
| 5349 | prog->aux->func = func; | 5421 | prog->aux->func = func; |
| 5350 | prog->aux->func_cnt = env->subprog_cnt + 1; | 5422 | prog->aux->func_cnt = env->subprog_cnt; |
| 5351 | return 0; | 5423 | return 0; |
| 5352 | out_free: | 5424 | out_free: |
| 5353 | for (i = 0; i <= env->subprog_cnt; i++) | 5425 | for (i = 0; i < env->subprog_cnt; i++) |
| 5354 | if (func[i]) | 5426 | if (func[i]) |
| 5355 | bpf_jit_free(func[i]); | 5427 | bpf_jit_free(func[i]); |
| 5356 | kfree(func); | 5428 | kfree(func); |
| @@ -5453,6 +5525,25 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) | |||
| 5453 | continue; | 5525 | continue; |
| 5454 | } | 5526 | } |
| 5455 | 5527 | ||
| 5528 | if (BPF_CLASS(insn->code) == BPF_LD && | ||
| 5529 | (BPF_MODE(insn->code) == BPF_ABS || | ||
| 5530 | BPF_MODE(insn->code) == BPF_IND)) { | ||
| 5531 | cnt = env->ops->gen_ld_abs(insn, insn_buf); | ||
| 5532 | if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { | ||
| 5533 | verbose(env, "bpf verifier is misconfigured\n"); | ||
| 5534 | return -EINVAL; | ||
| 5535 | } | ||
| 5536 | |||
| 5537 | new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); | ||
| 5538 | if (!new_prog) | ||
| 5539 | return -ENOMEM; | ||
| 5540 | |||
| 5541 | delta += cnt - 1; | ||
| 5542 | env->prog = prog = new_prog; | ||
| 5543 | insn = new_prog->insnsi + i + delta; | ||
| 5544 | continue; | ||
| 5545 | } | ||
| 5546 | |||
| 5456 | if (insn->code != (BPF_JMP | BPF_CALL)) | 5547 | if (insn->code != (BPF_JMP | BPF_CALL)) |
| 5457 | continue; | 5548 | continue; |
| 5458 | if (insn->src_reg == BPF_PSEUDO_CALL) | 5549 | if (insn->src_reg == BPF_PSEUDO_CALL) |
| @@ -5650,16 +5741,16 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) | |||
| 5650 | if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) | 5741 | if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) |
| 5651 | env->strict_alignment = true; | 5742 | env->strict_alignment = true; |
| 5652 | 5743 | ||
| 5744 | ret = replace_map_fd_with_map_ptr(env); | ||
| 5745 | if (ret < 0) | ||
| 5746 | goto skip_full_check; | ||
| 5747 | |||
| 5653 | if (bpf_prog_is_dev_bound(env->prog->aux)) { | 5748 | if (bpf_prog_is_dev_bound(env->prog->aux)) { |
| 5654 | ret = bpf_prog_offload_verifier_prep(env); | 5749 | ret = bpf_prog_offload_verifier_prep(env); |
| 5655 | if (ret) | 5750 | if (ret) |
| 5656 | goto err_unlock; | 5751 | goto skip_full_check; |
| 5657 | } | 5752 | } |
| 5658 | 5753 | ||
| 5659 | ret = replace_map_fd_with_map_ptr(env); | ||
| 5660 | if (ret < 0) | ||
| 5661 | goto skip_full_check; | ||
| 5662 | |||
| 5663 | env->explored_states = kcalloc(env->prog->len, | 5754 | env->explored_states = kcalloc(env->prog->len, |
| 5664 | sizeof(struct bpf_verifier_state_list *), | 5755 | sizeof(struct bpf_verifier_state_list *), |
| 5665 | GFP_USER); | 5756 | GFP_USER); |
| @@ -5730,7 +5821,7 @@ skip_full_check: | |||
| 5730 | err_release_maps: | 5821 | err_release_maps: |
| 5731 | if (!env->prog->aux->used_maps) | 5822 | if (!env->prog->aux->used_maps) |
| 5732 | /* if we didn't copy map pointers into bpf_prog_info, release | 5823 | /* if we didn't copy map pointers into bpf_prog_info, release |
| 5733 | * them now. Otherwise free_bpf_prog_info() will release them. | 5824 | * them now. Otherwise free_used_maps() will release them. |
| 5734 | */ | 5825 | */ |
| 5735 | release_maps(env); | 5826 | release_maps(env); |
| 5736 | *prog = env->prog; | 5827 | *prog = env->prog; |
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c new file mode 100644 index 000000000000..cb3a12137404 --- /dev/null +++ b/kernel/bpf/xskmap.c | |||
| @@ -0,0 +1,241 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | ||
| 2 | /* XSKMAP used for AF_XDP sockets | ||
| 3 | * Copyright(c) 2018 Intel Corporation. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify it | ||
| 6 | * under the terms and conditions of the GNU General Public License, | ||
| 7 | * version 2, as published by the Free Software Foundation. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 12 | * more details. | ||
| 13 | */ | ||
| 14 | |||
| 15 | #include <linux/bpf.h> | ||
| 16 | #include <linux/capability.h> | ||
| 17 | #include <net/xdp_sock.h> | ||
| 18 | #include <linux/slab.h> | ||
| 19 | #include <linux/sched.h> | ||
| 20 | |||
| 21 | struct xsk_map { | ||
| 22 | struct bpf_map map; | ||
| 23 | struct xdp_sock **xsk_map; | ||
| 24 | struct list_head __percpu *flush_list; | ||
| 25 | }; | ||
| 26 | |||
| 27 | static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) | ||
| 28 | { | ||
| 29 | int cpu, err = -EINVAL; | ||
| 30 | struct xsk_map *m; | ||
| 31 | u64 cost; | ||
| 32 | |||
| 33 | if (!capable(CAP_NET_ADMIN)) | ||
| 34 | return ERR_PTR(-EPERM); | ||
| 35 | |||
| 36 | if (attr->max_entries == 0 || attr->key_size != 4 || | ||
| 37 | attr->value_size != 4 || | ||
| 38 | attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)) | ||
| 39 | return ERR_PTR(-EINVAL); | ||
| 40 | |||
| 41 | m = kzalloc(sizeof(*m), GFP_USER); | ||
| 42 | if (!m) | ||
| 43 | return ERR_PTR(-ENOMEM); | ||
| 44 | |||
| 45 | bpf_map_init_from_attr(&m->map, attr); | ||
| 46 | |||
| 47 | cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *); | ||
| 48 | cost += sizeof(struct list_head) * num_possible_cpus(); | ||
| 49 | if (cost >= U32_MAX - PAGE_SIZE) | ||
| 50 | goto free_m; | ||
| 51 | |||
| 52 | m->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; | ||
| 53 | |||
| 54 | /* Notice returns -EPERM on if map size is larger than memlock limit */ | ||
| 55 | err = bpf_map_precharge_memlock(m->map.pages); | ||
| 56 | if (err) | ||
| 57 | goto free_m; | ||
| 58 | |||
| 59 | err = -ENOMEM; | ||
| 60 | |||
| 61 | m->flush_list = alloc_percpu(struct list_head); | ||
| 62 | if (!m->flush_list) | ||
| 63 | goto free_m; | ||
| 64 | |||
| 65 | for_each_possible_cpu(cpu) | ||
| 66 | INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu)); | ||
| 67 | |||
| 68 | m->xsk_map = bpf_map_area_alloc(m->map.max_entries * | ||
| 69 | sizeof(struct xdp_sock *), | ||
| 70 | m->map.numa_node); | ||
| 71 | if (!m->xsk_map) | ||
| 72 | goto free_percpu; | ||
| 73 | return &m->map; | ||
| 74 | |||
| 75 | free_percpu: | ||
| 76 | free_percpu(m->flush_list); | ||
| 77 | free_m: | ||
| 78 | kfree(m); | ||
| 79 | return ERR_PTR(err); | ||
| 80 | } | ||
| 81 | |||
| 82 | static void xsk_map_free(struct bpf_map *map) | ||
| 83 | { | ||
| 84 | struct xsk_map *m = container_of(map, struct xsk_map, map); | ||
| 85 | int i; | ||
| 86 | |||
| 87 | synchronize_net(); | ||
| 88 | |||
| 89 | for (i = 0; i < map->max_entries; i++) { | ||
| 90 | struct xdp_sock *xs; | ||
| 91 | |||
| 92 | xs = m->xsk_map[i]; | ||
| 93 | if (!xs) | ||
| 94 | continue; | ||
| 95 | |||
| 96 | sock_put((struct sock *)xs); | ||
| 97 | } | ||
| 98 | |||
| 99 | free_percpu(m->flush_list); | ||
| 100 | bpf_map_area_free(m->xsk_map); | ||
| 101 | kfree(m); | ||
| 102 | } | ||
| 103 | |||
| 104 | static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key) | ||
| 105 | { | ||
| 106 | struct xsk_map *m = container_of(map, struct xsk_map, map); | ||
| 107 | u32 index = key ? *(u32 *)key : U32_MAX; | ||
| 108 | u32 *next = next_key; | ||
| 109 | |||
| 110 | if (index >= m->map.max_entries) { | ||
| 111 | *next = 0; | ||
| 112 | return 0; | ||
| 113 | } | ||
| 114 | |||
| 115 | if (index == m->map.max_entries - 1) | ||
| 116 | return -ENOENT; | ||
| 117 | *next = index + 1; | ||
| 118 | return 0; | ||
| 119 | } | ||
| 120 | |||
| 121 | struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key) | ||
| 122 | { | ||
| 123 | struct xsk_map *m = container_of(map, struct xsk_map, map); | ||
| 124 | struct xdp_sock *xs; | ||
| 125 | |||
| 126 | if (key >= map->max_entries) | ||
| 127 | return NULL; | ||
| 128 | |||
| 129 | xs = READ_ONCE(m->xsk_map[key]); | ||
| 130 | return xs; | ||
| 131 | } | ||
| 132 | |||
| 133 | int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp, | ||
| 134 | struct xdp_sock *xs) | ||
| 135 | { | ||
| 136 | struct xsk_map *m = container_of(map, struct xsk_map, map); | ||
| 137 | struct list_head *flush_list = this_cpu_ptr(m->flush_list); | ||
| 138 | int err; | ||
| 139 | |||
| 140 | err = xsk_rcv(xs, xdp); | ||
| 141 | if (err) | ||
| 142 | return err; | ||
| 143 | |||
| 144 | if (!xs->flush_node.prev) | ||
| 145 | list_add(&xs->flush_node, flush_list); | ||
| 146 | |||
| 147 | return 0; | ||
| 148 | } | ||
| 149 | |||
| 150 | void __xsk_map_flush(struct bpf_map *map) | ||
| 151 | { | ||
| 152 | struct xsk_map *m = container_of(map, struct xsk_map, map); | ||
| 153 | struct list_head *flush_list = this_cpu_ptr(m->flush_list); | ||
| 154 | struct xdp_sock *xs, *tmp; | ||
| 155 | |||
| 156 | list_for_each_entry_safe(xs, tmp, flush_list, flush_node) { | ||
| 157 | xsk_flush(xs); | ||
| 158 | __list_del(xs->flush_node.prev, xs->flush_node.next); | ||
| 159 | xs->flush_node.prev = NULL; | ||
| 160 | } | ||
| 161 | } | ||
| 162 | |||
| 163 | static void *xsk_map_lookup_elem(struct bpf_map *map, void *key) | ||
| 164 | { | ||
| 165 | return NULL; | ||
| 166 | } | ||
| 167 | |||
| 168 | static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value, | ||
| 169 | u64 map_flags) | ||
| 170 | { | ||
| 171 | struct xsk_map *m = container_of(map, struct xsk_map, map); | ||
| 172 | u32 i = *(u32 *)key, fd = *(u32 *)value; | ||
| 173 | struct xdp_sock *xs, *old_xs; | ||
| 174 | struct socket *sock; | ||
| 175 | int err; | ||
| 176 | |||
| 177 | if (unlikely(map_flags > BPF_EXIST)) | ||
| 178 | return -EINVAL; | ||
| 179 | if (unlikely(i >= m->map.max_entries)) | ||
| 180 | return -E2BIG; | ||
| 181 | if (unlikely(map_flags == BPF_NOEXIST)) | ||
| 182 | return -EEXIST; | ||
| 183 | |||
| 184 | sock = sockfd_lookup(fd, &err); | ||
| 185 | if (!sock) | ||
| 186 | return err; | ||
| 187 | |||
| 188 | if (sock->sk->sk_family != PF_XDP) { | ||
| 189 | sockfd_put(sock); | ||
| 190 | return -EOPNOTSUPP; | ||
| 191 | } | ||
| 192 | |||
| 193 | xs = (struct xdp_sock *)sock->sk; | ||
| 194 | |||
| 195 | if (!xsk_is_setup_for_bpf_map(xs)) { | ||
| 196 | sockfd_put(sock); | ||
| 197 | return -EOPNOTSUPP; | ||
| 198 | } | ||
| 199 | |||
| 200 | sock_hold(sock->sk); | ||
| 201 | |||
| 202 | old_xs = xchg(&m->xsk_map[i], xs); | ||
| 203 | if (old_xs) { | ||
| 204 | /* Make sure we've flushed everything. */ | ||
| 205 | synchronize_net(); | ||
| 206 | sock_put((struct sock *)old_xs); | ||
| 207 | } | ||
| 208 | |||
| 209 | sockfd_put(sock); | ||
| 210 | return 0; | ||
| 211 | } | ||
| 212 | |||
| 213 | static int xsk_map_delete_elem(struct bpf_map *map, void *key) | ||
| 214 | { | ||
| 215 | struct xsk_map *m = container_of(map, struct xsk_map, map); | ||
| 216 | struct xdp_sock *old_xs; | ||
| 217 | int k = *(u32 *)key; | ||
| 218 | |||
| 219 | if (k >= map->max_entries) | ||
| 220 | return -EINVAL; | ||
| 221 | |||
| 222 | old_xs = xchg(&m->xsk_map[k], NULL); | ||
| 223 | if (old_xs) { | ||
| 224 | /* Make sure we've flushed everything. */ | ||
| 225 | synchronize_net(); | ||
| 226 | sock_put((struct sock *)old_xs); | ||
| 227 | } | ||
| 228 | |||
| 229 | return 0; | ||
| 230 | } | ||
| 231 | |||
| 232 | const struct bpf_map_ops xsk_map_ops = { | ||
| 233 | .map_alloc = xsk_map_alloc, | ||
| 234 | .map_free = xsk_map_free, | ||
| 235 | .map_get_next_key = xsk_map_get_next_key, | ||
| 236 | .map_lookup_elem = xsk_map_lookup_elem, | ||
| 237 | .map_update_elem = xsk_map_update_elem, | ||
| 238 | .map_delete_elem = xsk_map_delete_elem, | ||
| 239 | }; | ||
| 240 | |||
| 241 | |||
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 56ba0f2a01db..ce2cbbff27e4 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include "trace.h" | 20 | #include "trace.h" |
| 21 | 21 | ||
| 22 | u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); | 22 | u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); |
| 23 | u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); | ||
| 23 | 24 | ||
| 24 | /** | 25 | /** |
| 25 | * trace_call_bpf - invoke BPF program | 26 | * trace_call_bpf - invoke BPF program |
| @@ -474,8 +475,6 @@ BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx) | |||
| 474 | struct bpf_array *array = container_of(map, struct bpf_array, map); | 475 | struct bpf_array *array = container_of(map, struct bpf_array, map); |
| 475 | struct cgroup *cgrp; | 476 | struct cgroup *cgrp; |
| 476 | 477 | ||
| 477 | if (unlikely(in_interrupt())) | ||
| 478 | return -EINVAL; | ||
| 479 | if (unlikely(idx >= array->map.max_entries)) | 478 | if (unlikely(idx >= array->map.max_entries)) |
| 480 | return -E2BIG; | 479 | return -E2BIG; |
| 481 | 480 | ||
| @@ -577,6 +576,8 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | |||
| 577 | return &bpf_perf_event_output_proto; | 576 | return &bpf_perf_event_output_proto; |
| 578 | case BPF_FUNC_get_stackid: | 577 | case BPF_FUNC_get_stackid: |
| 579 | return &bpf_get_stackid_proto; | 578 | return &bpf_get_stackid_proto; |
| 579 | case BPF_FUNC_get_stack: | ||
| 580 | return &bpf_get_stack_proto; | ||
| 580 | case BPF_FUNC_perf_event_read_value: | 581 | case BPF_FUNC_perf_event_read_value: |
| 581 | return &bpf_perf_event_read_value_proto; | 582 | return &bpf_perf_event_read_value_proto; |
| 582 | #ifdef CONFIG_BPF_KPROBE_OVERRIDE | 583 | #ifdef CONFIG_BPF_KPROBE_OVERRIDE |
| @@ -664,6 +665,25 @@ static const struct bpf_func_proto bpf_get_stackid_proto_tp = { | |||
| 664 | .arg3_type = ARG_ANYTHING, | 665 | .arg3_type = ARG_ANYTHING, |
| 665 | }; | 666 | }; |
| 666 | 667 | ||
| 668 | BPF_CALL_4(bpf_get_stack_tp, void *, tp_buff, void *, buf, u32, size, | ||
| 669 | u64, flags) | ||
| 670 | { | ||
| 671 | struct pt_regs *regs = *(struct pt_regs **)tp_buff; | ||
| 672 | |||
| 673 | return bpf_get_stack((unsigned long) regs, (unsigned long) buf, | ||
| 674 | (unsigned long) size, flags, 0); | ||
| 675 | } | ||
| 676 | |||
| 677 | static const struct bpf_func_proto bpf_get_stack_proto_tp = { | ||
| 678 | .func = bpf_get_stack_tp, | ||
| 679 | .gpl_only = true, | ||
| 680 | .ret_type = RET_INTEGER, | ||
| 681 | .arg1_type = ARG_PTR_TO_CTX, | ||
| 682 | .arg2_type = ARG_PTR_TO_UNINIT_MEM, | ||
| 683 | .arg3_type = ARG_CONST_SIZE_OR_ZERO, | ||
| 684 | .arg4_type = ARG_ANYTHING, | ||
| 685 | }; | ||
| 686 | |||
| 667 | static const struct bpf_func_proto * | 687 | static const struct bpf_func_proto * |
| 668 | tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | 688 | tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) |
| 669 | { | 689 | { |
| @@ -672,6 +692,8 @@ tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | |||
| 672 | return &bpf_perf_event_output_proto_tp; | 692 | return &bpf_perf_event_output_proto_tp; |
| 673 | case BPF_FUNC_get_stackid: | 693 | case BPF_FUNC_get_stackid: |
| 674 | return &bpf_get_stackid_proto_tp; | 694 | return &bpf_get_stackid_proto_tp; |
| 695 | case BPF_FUNC_get_stack: | ||
| 696 | return &bpf_get_stack_proto_tp; | ||
| 675 | default: | 697 | default: |
| 676 | return tracing_func_proto(func_id, prog); | 698 | return tracing_func_proto(func_id, prog); |
| 677 | } | 699 | } |
| @@ -734,6 +756,8 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | |||
| 734 | return &bpf_perf_event_output_proto_tp; | 756 | return &bpf_perf_event_output_proto_tp; |
| 735 | case BPF_FUNC_get_stackid: | 757 | case BPF_FUNC_get_stackid: |
| 736 | return &bpf_get_stackid_proto_tp; | 758 | return &bpf_get_stackid_proto_tp; |
| 759 | case BPF_FUNC_get_stack: | ||
| 760 | return &bpf_get_stack_proto_tp; | ||
| 737 | case BPF_FUNC_perf_prog_read_value: | 761 | case BPF_FUNC_perf_prog_read_value: |
| 738 | return &bpf_perf_prog_read_value_proto; | 762 | return &bpf_perf_prog_read_value_proto; |
| 739 | default: | 763 | default: |
| @@ -744,7 +768,7 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | |||
| 744 | /* | 768 | /* |
| 745 | * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp | 769 | * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp |
| 746 | * to avoid potential recursive reuse issue when/if tracepoints are added | 770 | * to avoid potential recursive reuse issue when/if tracepoints are added |
| 747 | * inside bpf_*_event_output and/or bpf_get_stack_id | 771 | * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack |
| 748 | */ | 772 | */ |
| 749 | static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs); | 773 | static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs); |
| 750 | BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args, | 774 | BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args, |
| @@ -787,6 +811,26 @@ static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = { | |||
| 787 | .arg3_type = ARG_ANYTHING, | 811 | .arg3_type = ARG_ANYTHING, |
| 788 | }; | 812 | }; |
| 789 | 813 | ||
| 814 | BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args, | ||
| 815 | void *, buf, u32, size, u64, flags) | ||
| 816 | { | ||
| 817 | struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs); | ||
| 818 | |||
| 819 | perf_fetch_caller_regs(regs); | ||
| 820 | return bpf_get_stack((unsigned long) regs, (unsigned long) buf, | ||
| 821 | (unsigned long) size, flags, 0); | ||
| 822 | } | ||
| 823 | |||
| 824 | static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = { | ||
| 825 | .func = bpf_get_stack_raw_tp, | ||
| 826 | .gpl_only = true, | ||
| 827 | .ret_type = RET_INTEGER, | ||
| 828 | .arg1_type = ARG_PTR_TO_CTX, | ||
| 829 | .arg2_type = ARG_PTR_TO_MEM, | ||
| 830 | .arg3_type = ARG_CONST_SIZE_OR_ZERO, | ||
| 831 | .arg4_type = ARG_ANYTHING, | ||
| 832 | }; | ||
| 833 | |||
| 790 | static const struct bpf_func_proto * | 834 | static const struct bpf_func_proto * |
| 791 | raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | 835 | raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) |
| 792 | { | 836 | { |
| @@ -795,6 +839,8 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | |||
| 795 | return &bpf_perf_event_output_proto_raw_tp; | 839 | return &bpf_perf_event_output_proto_raw_tp; |
| 796 | case BPF_FUNC_get_stackid: | 840 | case BPF_FUNC_get_stackid: |
| 797 | return &bpf_get_stackid_proto_raw_tp; | 841 | return &bpf_get_stackid_proto_raw_tp; |
| 842 | case BPF_FUNC_get_stack: | ||
| 843 | return &bpf_get_stack_proto_raw_tp; | ||
| 798 | default: | 844 | default: |
| 799 | return tracing_func_proto(func_id, prog); | 845 | return tracing_func_proto(func_id, prog); |
| 800 | } | 846 | } |
diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 8e157806df7a..317f231462d4 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c | |||
| @@ -386,116 +386,6 @@ static int bpf_fill_ld_abs_get_processor_id(struct bpf_test *self) | |||
| 386 | return 0; | 386 | return 0; |
| 387 | } | 387 | } |
| 388 | 388 | ||
| 389 | #define PUSH_CNT 68 | ||
| 390 | /* test: {skb->data[0], vlan_push} x 68 + {skb->data[0], vlan_pop} x 68 */ | ||
| 391 | static int bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self) | ||
| 392 | { | ||
| 393 | unsigned int len = BPF_MAXINSNS; | ||
| 394 | struct bpf_insn *insn; | ||
| 395 | int i = 0, j, k = 0; | ||
| 396 | |||
| 397 | insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); | ||
| 398 | if (!insn) | ||
| 399 | return -ENOMEM; | ||
| 400 | |||
| 401 | insn[i++] = BPF_MOV64_REG(R6, R1); | ||
| 402 | loop: | ||
| 403 | for (j = 0; j < PUSH_CNT; j++) { | ||
| 404 | insn[i++] = BPF_LD_ABS(BPF_B, 0); | ||
| 405 | insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0x34, len - i - 2); | ||
| 406 | i++; | ||
| 407 | insn[i++] = BPF_MOV64_REG(R1, R6); | ||
| 408 | insn[i++] = BPF_MOV64_IMM(R2, 1); | ||
| 409 | insn[i++] = BPF_MOV64_IMM(R3, 2); | ||
| 410 | insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
| 411 | bpf_skb_vlan_push_proto.func - __bpf_call_base); | ||
| 412 | insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0, len - i - 2); | ||
| 413 | i++; | ||
| 414 | } | ||
| 415 | |||
| 416 | for (j = 0; j < PUSH_CNT; j++) { | ||
| 417 | insn[i++] = BPF_LD_ABS(BPF_B, 0); | ||
| 418 | insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0x34, len - i - 2); | ||
| 419 | i++; | ||
| 420 | insn[i++] = BPF_MOV64_REG(R1, R6); | ||
| 421 | insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
| 422 | bpf_skb_vlan_pop_proto.func - __bpf_call_base); | ||
| 423 | insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0, len - i - 2); | ||
| 424 | i++; | ||
| 425 | } | ||
| 426 | if (++k < 5) | ||
| 427 | goto loop; | ||
| 428 | |||
| 429 | for (; i < len - 1; i++) | ||
| 430 | insn[i] = BPF_ALU32_IMM(BPF_MOV, R0, 0xbef); | ||
| 431 | |||
| 432 | insn[len - 1] = BPF_EXIT_INSN(); | ||
| 433 | |||
| 434 | self->u.ptr.insns = insn; | ||
| 435 | self->u.ptr.len = len; | ||
| 436 | |||
| 437 | return 0; | ||
| 438 | } | ||
| 439 | |||
| 440 | static int bpf_fill_ld_abs_vlan_push_pop2(struct bpf_test *self) | ||
| 441 | { | ||
| 442 | struct bpf_insn *insn; | ||
| 443 | |||
| 444 | insn = kmalloc_array(16, sizeof(*insn), GFP_KERNEL); | ||
| 445 | if (!insn) | ||
| 446 | return -ENOMEM; | ||
| 447 | |||
| 448 | /* Due to func address being non-const, we need to | ||
| 449 | * assemble this here. | ||
| 450 | */ | ||
| 451 | insn[0] = BPF_MOV64_REG(R6, R1); | ||
| 452 | insn[1] = BPF_LD_ABS(BPF_B, 0); | ||
| 453 | insn[2] = BPF_LD_ABS(BPF_H, 0); | ||
| 454 | insn[3] = BPF_LD_ABS(BPF_W, 0); | ||
| 455 | insn[4] = BPF_MOV64_REG(R7, R6); | ||
| 456 | insn[5] = BPF_MOV64_IMM(R6, 0); | ||
| 457 | insn[6] = BPF_MOV64_REG(R1, R7); | ||
| 458 | insn[7] = BPF_MOV64_IMM(R2, 1); | ||
| 459 | insn[8] = BPF_MOV64_IMM(R3, 2); | ||
| 460 | insn[9] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
| 461 | bpf_skb_vlan_push_proto.func - __bpf_call_base); | ||
| 462 | insn[10] = BPF_MOV64_REG(R6, R7); | ||
| 463 | insn[11] = BPF_LD_ABS(BPF_B, 0); | ||
| 464 | insn[12] = BPF_LD_ABS(BPF_H, 0); | ||
| 465 | insn[13] = BPF_LD_ABS(BPF_W, 0); | ||
| 466 | insn[14] = BPF_MOV64_IMM(R0, 42); | ||
| 467 | insn[15] = BPF_EXIT_INSN(); | ||
| 468 | |||
| 469 | self->u.ptr.insns = insn; | ||
| 470 | self->u.ptr.len = 16; | ||
| 471 | |||
| 472 | return 0; | ||
| 473 | } | ||
| 474 | |||
| 475 | static int bpf_fill_jump_around_ld_abs(struct bpf_test *self) | ||
| 476 | { | ||
| 477 | unsigned int len = BPF_MAXINSNS; | ||
| 478 | struct bpf_insn *insn; | ||
| 479 | int i = 0; | ||
| 480 | |||
| 481 | insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); | ||
| 482 | if (!insn) | ||
| 483 | return -ENOMEM; | ||
| 484 | |||
| 485 | insn[i++] = BPF_MOV64_REG(R6, R1); | ||
| 486 | insn[i++] = BPF_LD_ABS(BPF_B, 0); | ||
| 487 | insn[i] = BPF_JMP_IMM(BPF_JEQ, R0, 10, len - i - 2); | ||
| 488 | i++; | ||
| 489 | while (i < len - 1) | ||
| 490 | insn[i++] = BPF_LD_ABS(BPF_B, 1); | ||
| 491 | insn[i] = BPF_EXIT_INSN(); | ||
| 492 | |||
| 493 | self->u.ptr.insns = insn; | ||
| 494 | self->u.ptr.len = len; | ||
| 495 | |||
| 496 | return 0; | ||
| 497 | } | ||
| 498 | |||
| 499 | static int __bpf_fill_stxdw(struct bpf_test *self, int size) | 389 | static int __bpf_fill_stxdw(struct bpf_test *self, int size) |
| 500 | { | 390 | { |
| 501 | unsigned int len = BPF_MAXINSNS; | 391 | unsigned int len = BPF_MAXINSNS; |
| @@ -1988,40 +1878,6 @@ static struct bpf_test tests[] = { | |||
| 1988 | { { 0, -1 } } | 1878 | { { 0, -1 } } |
| 1989 | }, | 1879 | }, |
| 1990 | { | 1880 | { |
| 1991 | "INT: DIV + ABS", | ||
| 1992 | .u.insns_int = { | ||
| 1993 | BPF_ALU64_REG(BPF_MOV, R6, R1), | ||
| 1994 | BPF_LD_ABS(BPF_B, 3), | ||
| 1995 | BPF_ALU64_IMM(BPF_MOV, R2, 2), | ||
| 1996 | BPF_ALU32_REG(BPF_DIV, R0, R2), | ||
| 1997 | BPF_ALU64_REG(BPF_MOV, R8, R0), | ||
| 1998 | BPF_LD_ABS(BPF_B, 4), | ||
| 1999 | BPF_ALU64_REG(BPF_ADD, R8, R0), | ||
| 2000 | BPF_LD_IND(BPF_B, R8, -70), | ||
| 2001 | BPF_EXIT_INSN(), | ||
| 2002 | }, | ||
| 2003 | INTERNAL, | ||
| 2004 | { 10, 20, 30, 40, 50 }, | ||
| 2005 | { { 4, 0 }, { 5, 10 } } | ||
| 2006 | }, | ||
| 2007 | { | ||
| 2008 | /* This one doesn't go through verifier, but is just raw insn | ||
| 2009 | * as opposed to cBPF tests from here. Thus div by 0 tests are | ||
| 2010 | * done in test_verifier in BPF kselftests. | ||
| 2011 | */ | ||
| 2012 | "INT: DIV by -1", | ||
| 2013 | .u.insns_int = { | ||
| 2014 | BPF_ALU64_REG(BPF_MOV, R6, R1), | ||
| 2015 | BPF_ALU64_IMM(BPF_MOV, R7, -1), | ||
| 2016 | BPF_LD_ABS(BPF_B, 3), | ||
| 2017 | BPF_ALU32_REG(BPF_DIV, R0, R7), | ||
| 2018 | BPF_EXIT_INSN(), | ||
| 2019 | }, | ||
| 2020 | INTERNAL, | ||
| 2021 | { 10, 20, 30, 40, 50 }, | ||
| 2022 | { { 3, 0 }, { 4, 0 } } | ||
| 2023 | }, | ||
| 2024 | { | ||
| 2025 | "check: missing ret", | 1881 | "check: missing ret", |
| 2026 | .u.insns = { | 1882 | .u.insns = { |
| 2027 | BPF_STMT(BPF_LD | BPF_IMM, 1), | 1883 | BPF_STMT(BPF_LD | BPF_IMM, 1), |
| @@ -2383,50 +2239,6 @@ static struct bpf_test tests[] = { | |||
| 2383 | { }, | 2239 | { }, |
| 2384 | { { 0, 1 } } | 2240 | { { 0, 1 } } |
| 2385 | }, | 2241 | }, |
| 2386 | { | ||
| 2387 | "nmap reduced", | ||
| 2388 | .u.insns_int = { | ||
| 2389 | BPF_MOV64_REG(R6, R1), | ||
| 2390 | BPF_LD_ABS(BPF_H, 12), | ||
| 2391 | BPF_JMP_IMM(BPF_JNE, R0, 0x806, 28), | ||
| 2392 | BPF_LD_ABS(BPF_H, 12), | ||
| 2393 | BPF_JMP_IMM(BPF_JNE, R0, 0x806, 26), | ||
| 2394 | BPF_MOV32_IMM(R0, 18), | ||
| 2395 | BPF_STX_MEM(BPF_W, R10, R0, -64), | ||
| 2396 | BPF_LDX_MEM(BPF_W, R7, R10, -64), | ||
| 2397 | BPF_LD_IND(BPF_W, R7, 14), | ||
| 2398 | BPF_STX_MEM(BPF_W, R10, R0, -60), | ||
| 2399 | BPF_MOV32_IMM(R0, 280971478), | ||
| 2400 | BPF_STX_MEM(BPF_W, R10, R0, -56), | ||
| 2401 | BPF_LDX_MEM(BPF_W, R7, R10, -56), | ||
| 2402 | BPF_LDX_MEM(BPF_W, R0, R10, -60), | ||
| 2403 | BPF_ALU32_REG(BPF_SUB, R0, R7), | ||
| 2404 | BPF_JMP_IMM(BPF_JNE, R0, 0, 15), | ||
| 2405 | BPF_LD_ABS(BPF_H, 12), | ||
| 2406 | BPF_JMP_IMM(BPF_JNE, R0, 0x806, 13), | ||
| 2407 | BPF_MOV32_IMM(R0, 22), | ||
| 2408 | BPF_STX_MEM(BPF_W, R10, R0, -56), | ||
| 2409 | BPF_LDX_MEM(BPF_W, R7, R10, -56), | ||
| 2410 | BPF_LD_IND(BPF_H, R7, 14), | ||
| 2411 | BPF_STX_MEM(BPF_W, R10, R0, -52), | ||
| 2412 | BPF_MOV32_IMM(R0, 17366), | ||
| 2413 | BPF_STX_MEM(BPF_W, R10, R0, -48), | ||
| 2414 | BPF_LDX_MEM(BPF_W, R7, R10, -48), | ||
| 2415 | BPF_LDX_MEM(BPF_W, R0, R10, -52), | ||
| 2416 | BPF_ALU32_REG(BPF_SUB, R0, R7), | ||
| 2417 | BPF_JMP_IMM(BPF_JNE, R0, 0, 2), | ||
| 2418 | BPF_MOV32_IMM(R0, 256), | ||
| 2419 | BPF_EXIT_INSN(), | ||
| 2420 | BPF_MOV32_IMM(R0, 0), | ||
| 2421 | BPF_EXIT_INSN(), | ||
| 2422 | }, | ||
| 2423 | INTERNAL, | ||
| 2424 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x06, 0, 0, | ||
| 2425 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 2426 | 0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6}, | ||
| 2427 | { { 38, 256 } }, | ||
| 2428 | .stack_depth = 64, | ||
| 2429 | }, | ||
| 2430 | /* BPF_ALU | BPF_MOV | BPF_X */ | 2242 | /* BPF_ALU | BPF_MOV | BPF_X */ |
| 2431 | { | 2243 | { |
| 2432 | "ALU_MOV_X: dst = 2", | 2244 | "ALU_MOV_X: dst = 2", |
| @@ -5485,22 +5297,6 @@ static struct bpf_test tests[] = { | |||
| 5485 | { { 1, 0xbee } }, | 5297 | { { 1, 0xbee } }, |
| 5486 | .fill_helper = bpf_fill_ld_abs_get_processor_id, | 5298 | .fill_helper = bpf_fill_ld_abs_get_processor_id, |
| 5487 | }, | 5299 | }, |
| 5488 | { | ||
| 5489 | "BPF_MAXINSNS: ld_abs+vlan_push/pop", | ||
| 5490 | { }, | ||
| 5491 | INTERNAL, | ||
| 5492 | { 0x34 }, | ||
| 5493 | { { ETH_HLEN, 0xbef } }, | ||
| 5494 | .fill_helper = bpf_fill_ld_abs_vlan_push_pop, | ||
| 5495 | }, | ||
| 5496 | { | ||
| 5497 | "BPF_MAXINSNS: jump around ld_abs", | ||
| 5498 | { }, | ||
| 5499 | INTERNAL, | ||
| 5500 | { 10, 11 }, | ||
| 5501 | { { 2, 10 } }, | ||
| 5502 | .fill_helper = bpf_fill_jump_around_ld_abs, | ||
| 5503 | }, | ||
| 5504 | /* | 5300 | /* |
| 5505 | * LD_IND / LD_ABS on fragmented SKBs | 5301 | * LD_IND / LD_ABS on fragmented SKBs |
| 5506 | */ | 5302 | */ |
| @@ -5683,6 +5479,53 @@ static struct bpf_test tests[] = { | |||
| 5683 | { {0x40, 0x05 } }, | 5479 | { {0x40, 0x05 } }, |
| 5684 | }, | 5480 | }, |
| 5685 | { | 5481 | { |
| 5482 | "LD_IND byte positive offset, all ff", | ||
| 5483 | .u.insns = { | ||
| 5484 | BPF_STMT(BPF_LDX | BPF_IMM, 0x3e), | ||
| 5485 | BPF_STMT(BPF_LD | BPF_IND | BPF_B, 0x1), | ||
| 5486 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 5487 | }, | ||
| 5488 | CLASSIC, | ||
| 5489 | { [0x3c] = 0xff, [0x3d] = 0xff, [0x3e] = 0xff, [0x3f] = 0xff }, | ||
| 5490 | { {0x40, 0xff } }, | ||
| 5491 | }, | ||
| 5492 | { | ||
| 5493 | "LD_IND byte positive offset, out of bounds", | ||
| 5494 | .u.insns = { | ||
| 5495 | BPF_STMT(BPF_LDX | BPF_IMM, 0x3e), | ||
| 5496 | BPF_STMT(BPF_LD | BPF_IND | BPF_B, 0x1), | ||
| 5497 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 5498 | }, | ||
| 5499 | CLASSIC, | ||
| 5500 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
| 5501 | { {0x3f, 0 }, }, | ||
| 5502 | }, | ||
| 5503 | { | ||
| 5504 | "LD_IND byte negative offset, out of bounds", | ||
| 5505 | .u.insns = { | ||
| 5506 | BPF_STMT(BPF_LDX | BPF_IMM, 0x3e), | ||
| 5507 | BPF_STMT(BPF_LD | BPF_IND | BPF_B, -0x3f), | ||
| 5508 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 5509 | }, | ||
| 5510 | CLASSIC, | ||
| 5511 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
| 5512 | { {0x3f, 0 } }, | ||
| 5513 | }, | ||
| 5514 | { | ||
| 5515 | "LD_IND byte negative offset, multiple calls", | ||
| 5516 | .u.insns = { | ||
| 5517 | BPF_STMT(BPF_LDX | BPF_IMM, 0x3b), | ||
| 5518 | BPF_STMT(BPF_LD | BPF_IND | BPF_B, SKF_LL_OFF + 1), | ||
| 5519 | BPF_STMT(BPF_LD | BPF_IND | BPF_B, SKF_LL_OFF + 2), | ||
| 5520 | BPF_STMT(BPF_LD | BPF_IND | BPF_B, SKF_LL_OFF + 3), | ||
| 5521 | BPF_STMT(BPF_LD | BPF_IND | BPF_B, SKF_LL_OFF + 4), | ||
| 5522 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 5523 | }, | ||
| 5524 | CLASSIC, | ||
| 5525 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
| 5526 | { {0x40, 0x82 }, }, | ||
| 5527 | }, | ||
| 5528 | { | ||
| 5686 | "LD_IND halfword positive offset", | 5529 | "LD_IND halfword positive offset", |
| 5687 | .u.insns = { | 5530 | .u.insns = { |
| 5688 | BPF_STMT(BPF_LDX | BPF_IMM, 0x20), | 5531 | BPF_STMT(BPF_LDX | BPF_IMM, 0x20), |
| @@ -5731,6 +5574,39 @@ static struct bpf_test tests[] = { | |||
| 5731 | { {0x40, 0x66cc } }, | 5574 | { {0x40, 0x66cc } }, |
| 5732 | }, | 5575 | }, |
| 5733 | { | 5576 | { |
| 5577 | "LD_IND halfword positive offset, all ff", | ||
| 5578 | .u.insns = { | ||
| 5579 | BPF_STMT(BPF_LDX | BPF_IMM, 0x3d), | ||
| 5580 | BPF_STMT(BPF_LD | BPF_IND | BPF_H, 0x1), | ||
| 5581 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 5582 | }, | ||
| 5583 | CLASSIC, | ||
| 5584 | { [0x3c] = 0xff, [0x3d] = 0xff, [0x3e] = 0xff, [0x3f] = 0xff }, | ||
| 5585 | { {0x40, 0xffff } }, | ||
| 5586 | }, | ||
| 5587 | { | ||
| 5588 | "LD_IND halfword positive offset, out of bounds", | ||
| 5589 | .u.insns = { | ||
| 5590 | BPF_STMT(BPF_LDX | BPF_IMM, 0x3e), | ||
| 5591 | BPF_STMT(BPF_LD | BPF_IND | BPF_H, 0x1), | ||
| 5592 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 5593 | }, | ||
| 5594 | CLASSIC, | ||
| 5595 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
| 5596 | { {0x3f, 0 }, }, | ||
| 5597 | }, | ||
| 5598 | { | ||
| 5599 | "LD_IND halfword negative offset, out of bounds", | ||
| 5600 | .u.insns = { | ||
| 5601 | BPF_STMT(BPF_LDX | BPF_IMM, 0x3e), | ||
| 5602 | BPF_STMT(BPF_LD | BPF_IND | BPF_H, -0x3f), | ||
| 5603 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 5604 | }, | ||
| 5605 | CLASSIC, | ||
| 5606 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
| 5607 | { {0x3f, 0 } }, | ||
| 5608 | }, | ||
| 5609 | { | ||
| 5734 | "LD_IND word positive offset", | 5610 | "LD_IND word positive offset", |
| 5735 | .u.insns = { | 5611 | .u.insns = { |
| 5736 | BPF_STMT(BPF_LDX | BPF_IMM, 0x20), | 5612 | BPF_STMT(BPF_LDX | BPF_IMM, 0x20), |
| @@ -5821,6 +5697,39 @@ static struct bpf_test tests[] = { | |||
| 5821 | { {0x40, 0x66cc77dd } }, | 5697 | { {0x40, 0x66cc77dd } }, |
| 5822 | }, | 5698 | }, |
| 5823 | { | 5699 | { |
| 5700 | "LD_IND word positive offset, all ff", | ||
| 5701 | .u.insns = { | ||
| 5702 | BPF_STMT(BPF_LDX | BPF_IMM, 0x3b), | ||
| 5703 | BPF_STMT(BPF_LD | BPF_IND | BPF_W, 0x1), | ||
| 5704 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 5705 | }, | ||
| 5706 | CLASSIC, | ||
| 5707 | { [0x3c] = 0xff, [0x3d] = 0xff, [0x3e] = 0xff, [0x3f] = 0xff }, | ||
| 5708 | { {0x40, 0xffffffff } }, | ||
| 5709 | }, | ||
| 5710 | { | ||
| 5711 | "LD_IND word positive offset, out of bounds", | ||
| 5712 | .u.insns = { | ||
| 5713 | BPF_STMT(BPF_LDX | BPF_IMM, 0x3e), | ||
| 5714 | BPF_STMT(BPF_LD | BPF_IND | BPF_W, 0x1), | ||
| 5715 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 5716 | }, | ||
| 5717 | CLASSIC, | ||
| 5718 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
| 5719 | { {0x3f, 0 }, }, | ||
| 5720 | }, | ||
| 5721 | { | ||
| 5722 | "LD_IND word negative offset, out of bounds", | ||
| 5723 | .u.insns = { | ||
| 5724 | BPF_STMT(BPF_LDX | BPF_IMM, 0x3e), | ||
| 5725 | BPF_STMT(BPF_LD | BPF_IND | BPF_W, -0x3f), | ||
| 5726 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 5727 | }, | ||
| 5728 | CLASSIC, | ||
| 5729 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
| 5730 | { {0x3f, 0 } }, | ||
| 5731 | }, | ||
| 5732 | { | ||
| 5824 | "LD_ABS byte", | 5733 | "LD_ABS byte", |
| 5825 | .u.insns = { | 5734 | .u.insns = { |
| 5826 | BPF_STMT(BPF_LD | BPF_ABS | BPF_B, 0x20), | 5735 | BPF_STMT(BPF_LD | BPF_ABS | BPF_B, 0x20), |
| @@ -5838,6 +5747,68 @@ static struct bpf_test tests[] = { | |||
| 5838 | { {0x40, 0xcc } }, | 5747 | { {0x40, 0xcc } }, |
| 5839 | }, | 5748 | }, |
| 5840 | { | 5749 | { |
| 5750 | "LD_ABS byte positive offset, all ff", | ||
| 5751 | .u.insns = { | ||
| 5752 | BPF_STMT(BPF_LD | BPF_ABS | BPF_B, 0x3f), | ||
| 5753 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 5754 | }, | ||
| 5755 | CLASSIC, | ||
| 5756 | { [0x3c] = 0xff, [0x3d] = 0xff, [0x3e] = 0xff, [0x3f] = 0xff }, | ||
| 5757 | { {0x40, 0xff } }, | ||
| 5758 | }, | ||
| 5759 | { | ||
| 5760 | "LD_ABS byte positive offset, out of bounds", | ||
| 5761 | .u.insns = { | ||
| 5762 | BPF_STMT(BPF_LD | BPF_ABS | BPF_B, 0x3f), | ||
| 5763 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 5764 | }, | ||
| 5765 | CLASSIC, | ||
| 5766 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
| 5767 | { {0x3f, 0 }, }, | ||
| 5768 | }, | ||
| 5769 | { | ||
| 5770 | "LD_ABS byte negative offset, out of bounds load", | ||
| 5771 | .u.insns = { | ||
| 5772 | BPF_STMT(BPF_LD | BPF_ABS | BPF_B, -1), | ||
| 5773 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 5774 | }, | ||
| 5775 | CLASSIC | FLAG_EXPECTED_FAIL, | ||
| 5776 | .expected_errcode = -EINVAL, | ||
| 5777 | }, | ||
| 5778 | { | ||
| 5779 | "LD_ABS byte negative offset, in bounds", | ||
| 5780 | .u.insns = { | ||
| 5781 | BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3f), | ||
| 5782 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 5783 | }, | ||
| 5784 | CLASSIC, | ||
| 5785 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
| 5786 | { {0x40, 0x82 }, }, | ||
| 5787 | }, | ||
| 5788 | { | ||
| 5789 | "LD_ABS byte negative offset, out of bounds", | ||
| 5790 | .u.insns = { | ||
| 5791 | BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3f), | ||
| 5792 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 5793 | }, | ||
| 5794 | CLASSIC, | ||
| 5795 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
| 5796 | { {0x3f, 0 }, }, | ||
| 5797 | }, | ||
| 5798 | { | ||
| 5799 | "LD_ABS byte negative offset, multiple calls", | ||
| 5800 | .u.insns = { | ||
| 5801 | BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3c), | ||
| 5802 | BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3d), | ||
| 5803 | BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3e), | ||
| 5804 | BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3f), | ||
| 5805 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 5806 | }, | ||
| 5807 | CLASSIC, | ||
| 5808 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
| 5809 | { {0x40, 0x82 }, }, | ||
| 5810 | }, | ||
| 5811 | { | ||
| 5841 | "LD_ABS halfword", | 5812 | "LD_ABS halfword", |
| 5842 | .u.insns = { | 5813 | .u.insns = { |
| 5843 | BPF_STMT(BPF_LD | BPF_ABS | BPF_H, 0x22), | 5814 | BPF_STMT(BPF_LD | BPF_ABS | BPF_H, 0x22), |
| @@ -5872,6 +5843,55 @@ static struct bpf_test tests[] = { | |||
| 5872 | { {0x40, 0x99ff } }, | 5843 | { {0x40, 0x99ff } }, |
| 5873 | }, | 5844 | }, |
| 5874 | { | 5845 | { |
| 5846 | "LD_ABS halfword positive offset, all ff", | ||
| 5847 | .u.insns = { | ||
| 5848 | BPF_STMT(BPF_LD | BPF_ABS | BPF_H, 0x3e), | ||
| 5849 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 5850 | }, | ||
| 5851 | CLASSIC, | ||
| 5852 | { [0x3c] = 0xff, [0x3d] = 0xff, [0x3e] = 0xff, [0x3f] = 0xff }, | ||
| 5853 | { {0x40, 0xffff } }, | ||
| 5854 | }, | ||
| 5855 | { | ||
| 5856 | "LD_ABS halfword positive offset, out of bounds", | ||
| 5857 | .u.insns = { | ||
| 5858 | BPF_STMT(BPF_LD | BPF_ABS | BPF_H, 0x3f), | ||
| 5859 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 5860 | }, | ||
| 5861 | CLASSIC, | ||
| 5862 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
| 5863 | { {0x3f, 0 }, }, | ||
| 5864 | }, | ||
| 5865 | { | ||
| 5866 | "LD_ABS halfword negative offset, out of bounds load", | ||
| 5867 | .u.insns = { | ||
| 5868 | BPF_STMT(BPF_LD | BPF_ABS | BPF_H, -1), | ||
| 5869 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 5870 | }, | ||
| 5871 | CLASSIC | FLAG_EXPECTED_FAIL, | ||
| 5872 | .expected_errcode = -EINVAL, | ||
| 5873 | }, | ||
| 5874 | { | ||
| 5875 | "LD_ABS halfword negative offset, in bounds", | ||
| 5876 | .u.insns = { | ||
| 5877 | BPF_STMT(BPF_LD | BPF_ABS | BPF_H, SKF_LL_OFF + 0x3e), | ||
| 5878 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 5879 | }, | ||
| 5880 | CLASSIC, | ||
| 5881 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
| 5882 | { {0x40, 0x1982 }, }, | ||
| 5883 | }, | ||
| 5884 | { | ||
| 5885 | "LD_ABS halfword negative offset, out of bounds", | ||
| 5886 | .u.insns = { | ||
| 5887 | BPF_STMT(BPF_LD | BPF_ABS | BPF_H, SKF_LL_OFF + 0x3e), | ||
| 5888 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 5889 | }, | ||
| 5890 | CLASSIC, | ||
| 5891 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
| 5892 | { {0x3f, 0 }, }, | ||
| 5893 | }, | ||
| 5894 | { | ||
| 5875 | "LD_ABS word", | 5895 | "LD_ABS word", |
| 5876 | .u.insns = { | 5896 | .u.insns = { |
| 5877 | BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0x1c), | 5897 | BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0x1c), |
| @@ -5939,6 +5959,140 @@ static struct bpf_test tests[] = { | |||
| 5939 | }, | 5959 | }, |
| 5940 | { {0x40, 0x88ee99ff } }, | 5960 | { {0x40, 0x88ee99ff } }, |
| 5941 | }, | 5961 | }, |
| 5962 | { | ||
| 5963 | "LD_ABS word positive offset, all ff", | ||
| 5964 | .u.insns = { | ||
| 5965 | BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0x3c), | ||
| 5966 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 5967 | }, | ||
| 5968 | CLASSIC, | ||
| 5969 | { [0x3c] = 0xff, [0x3d] = 0xff, [0x3e] = 0xff, [0x3f] = 0xff }, | ||
| 5970 | { {0x40, 0xffffffff } }, | ||
| 5971 | }, | ||
| 5972 | { | ||
| 5973 | "LD_ABS word positive offset, out of bounds", | ||
| 5974 | .u.insns = { | ||
| 5975 | BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0x3f), | ||
| 5976 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 5977 | }, | ||
| 5978 | CLASSIC, | ||
| 5979 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
| 5980 | { {0x3f, 0 }, }, | ||
| 5981 | }, | ||
| 5982 | { | ||
| 5983 | "LD_ABS word negative offset, out of bounds load", | ||
| 5984 | .u.insns = { | ||
| 5985 | BPF_STMT(BPF_LD | BPF_ABS | BPF_W, -1), | ||
| 5986 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 5987 | }, | ||
| 5988 | CLASSIC | FLAG_EXPECTED_FAIL, | ||
| 5989 | .expected_errcode = -EINVAL, | ||
| 5990 | }, | ||
| 5991 | { | ||
| 5992 | "LD_ABS word negative offset, in bounds", | ||
| 5993 | .u.insns = { | ||
| 5994 | BPF_STMT(BPF_LD | BPF_ABS | BPF_W, SKF_LL_OFF + 0x3c), | ||
| 5995 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 5996 | }, | ||
| 5997 | CLASSIC, | ||
| 5998 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
| 5999 | { {0x40, 0x25051982 }, }, | ||
| 6000 | }, | ||
| 6001 | { | ||
| 6002 | "LD_ABS word negative offset, out of bounds", | ||
| 6003 | .u.insns = { | ||
| 6004 | BPF_STMT(BPF_LD | BPF_ABS | BPF_W, SKF_LL_OFF + 0x3c), | ||
| 6005 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 6006 | }, | ||
| 6007 | CLASSIC, | ||
| 6008 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
| 6009 | { {0x3f, 0 }, }, | ||
| 6010 | }, | ||
| 6011 | { | ||
| 6012 | "LDX_MSH standalone, preserved A", | ||
| 6013 | .u.insns = { | ||
| 6014 | BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa), | ||
| 6015 | BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3c), | ||
| 6016 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 6017 | }, | ||
| 6018 | CLASSIC, | ||
| 6019 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
| 6020 | { {0x40, 0xffeebbaa }, }, | ||
| 6021 | }, | ||
| 6022 | { | ||
| 6023 | "LDX_MSH standalone, preserved A 2", | ||
| 6024 | .u.insns = { | ||
| 6025 | BPF_STMT(BPF_LD | BPF_IMM, 0x175e9d63), | ||
| 6026 | BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3c), | ||
| 6027 | BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3d), | ||
| 6028 | BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3e), | ||
| 6029 | BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3f), | ||
| 6030 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 6031 | }, | ||
| 6032 | CLASSIC, | ||
| 6033 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
| 6034 | { {0x40, 0x175e9d63 }, }, | ||
| 6035 | }, | ||
| 6036 | { | ||
| 6037 | "LDX_MSH standalone, test result 1", | ||
| 6038 | .u.insns = { | ||
| 6039 | BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa), | ||
| 6040 | BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3c), | ||
| 6041 | BPF_STMT(BPF_MISC | BPF_TXA, 0), | ||
| 6042 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 6043 | }, | ||
| 6044 | CLASSIC, | ||
| 6045 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
| 6046 | { {0x40, 0x14 }, }, | ||
| 6047 | }, | ||
| 6048 | { | ||
| 6049 | "LDX_MSH standalone, test result 2", | ||
| 6050 | .u.insns = { | ||
| 6051 | BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa), | ||
| 6052 | BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3e), | ||
| 6053 | BPF_STMT(BPF_MISC | BPF_TXA, 0), | ||
| 6054 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 6055 | }, | ||
| 6056 | CLASSIC, | ||
| 6057 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
| 6058 | { {0x40, 0x24 }, }, | ||
| 6059 | }, | ||
| 6060 | { | ||
| 6061 | "LDX_MSH standalone, negative offset", | ||
| 6062 | .u.insns = { | ||
| 6063 | BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa), | ||
| 6064 | BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, -1), | ||
| 6065 | BPF_STMT(BPF_MISC | BPF_TXA, 0), | ||
| 6066 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 6067 | }, | ||
| 6068 | CLASSIC, | ||
| 6069 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
| 6070 | { {0x40, 0 }, }, | ||
| 6071 | }, | ||
| 6072 | { | ||
| 6073 | "LDX_MSH standalone, negative offset 2", | ||
| 6074 | .u.insns = { | ||
| 6075 | BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa), | ||
| 6076 | BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, SKF_LL_OFF + 0x3e), | ||
| 6077 | BPF_STMT(BPF_MISC | BPF_TXA, 0), | ||
| 6078 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 6079 | }, | ||
| 6080 | CLASSIC, | ||
| 6081 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
| 6082 | { {0x40, 0x24 }, }, | ||
| 6083 | }, | ||
| 6084 | { | ||
| 6085 | "LDX_MSH standalone, out of bounds", | ||
| 6086 | .u.insns = { | ||
| 6087 | BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa), | ||
| 6088 | BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x40), | ||
| 6089 | BPF_STMT(BPF_MISC | BPF_TXA, 0), | ||
| 6090 | BPF_STMT(BPF_RET | BPF_A, 0x0), | ||
| 6091 | }, | ||
| 6092 | CLASSIC, | ||
| 6093 | { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, | ||
| 6094 | { {0x40, 0 }, }, | ||
| 6095 | }, | ||
| 5942 | /* | 6096 | /* |
| 5943 | * verify that the interpreter or JIT correctly sets A and X | 6097 | * verify that the interpreter or JIT correctly sets A and X |
| 5944 | * to 0. | 6098 | * to 0. |
| @@ -6127,14 +6281,6 @@ static struct bpf_test tests[] = { | |||
| 6127 | {}, | 6281 | {}, |
| 6128 | { {0x1, 0x42 } }, | 6282 | { {0x1, 0x42 } }, |
| 6129 | }, | 6283 | }, |
| 6130 | { | ||
| 6131 | "LD_ABS with helper changing skb data", | ||
| 6132 | { }, | ||
| 6133 | INTERNAL, | ||
| 6134 | { 0x34 }, | ||
| 6135 | { { ETH_HLEN, 42 } }, | ||
| 6136 | .fill_helper = bpf_fill_ld_abs_vlan_push_pop2, | ||
| 6137 | }, | ||
| 6138 | /* Checking interpreter vs JIT wrt signed extended imms. */ | 6284 | /* Checking interpreter vs JIT wrt signed extended imms. */ |
| 6139 | { | 6285 | { |
| 6140 | "JNE signed compare, test 1", | 6286 | "JNE signed compare, test 1", |
diff --git a/net/Kconfig b/net/Kconfig index b62089fb1332..df8d45ef47d8 100644 --- a/net/Kconfig +++ b/net/Kconfig | |||
| @@ -59,6 +59,7 @@ source "net/tls/Kconfig" | |||
| 59 | source "net/xfrm/Kconfig" | 59 | source "net/xfrm/Kconfig" |
| 60 | source "net/iucv/Kconfig" | 60 | source "net/iucv/Kconfig" |
| 61 | source "net/smc/Kconfig" | 61 | source "net/smc/Kconfig" |
| 62 | source "net/xdp/Kconfig" | ||
| 62 | 63 | ||
| 63 | config INET | 64 | config INET |
| 64 | bool "TCP/IP networking" | 65 | bool "TCP/IP networking" |
diff --git a/net/Makefile b/net/Makefile index a6147c61b174..77aaddedbd29 100644 --- a/net/Makefile +++ b/net/Makefile | |||
| @@ -85,3 +85,4 @@ obj-y += l3mdev/ | |||
| 85 | endif | 85 | endif |
| 86 | obj-$(CONFIG_QRTR) += qrtr/ | 86 | obj-$(CONFIG_QRTR) += qrtr/ |
| 87 | obj-$(CONFIG_NET_NCSI) += ncsi/ | 87 | obj-$(CONFIG_NET_NCSI) += ncsi/ |
| 88 | obj-$(CONFIG_XDP_SOCKETS) += xdp/ | ||
diff --git a/net/core/dev.c b/net/core/dev.c index bb81a6e1d354..29bf39174900 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
| @@ -3627,6 +3627,44 @@ int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv) | |||
| 3627 | } | 3627 | } |
| 3628 | EXPORT_SYMBOL(dev_queue_xmit_accel); | 3628 | EXPORT_SYMBOL(dev_queue_xmit_accel); |
| 3629 | 3629 | ||
| 3630 | int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) | ||
| 3631 | { | ||
| 3632 | struct net_device *dev = skb->dev; | ||
| 3633 | struct sk_buff *orig_skb = skb; | ||
| 3634 | struct netdev_queue *txq; | ||
| 3635 | int ret = NETDEV_TX_BUSY; | ||
| 3636 | bool again = false; | ||
| 3637 | |||
| 3638 | if (unlikely(!netif_running(dev) || | ||
| 3639 | !netif_carrier_ok(dev))) | ||
| 3640 | goto drop; | ||
| 3641 | |||
| 3642 | skb = validate_xmit_skb_list(skb, dev, &again); | ||
| 3643 | if (skb != orig_skb) | ||
| 3644 | goto drop; | ||
| 3645 | |||
| 3646 | skb_set_queue_mapping(skb, queue_id); | ||
| 3647 | txq = skb_get_tx_queue(dev, skb); | ||
| 3648 | |||
| 3649 | local_bh_disable(); | ||
| 3650 | |||
| 3651 | HARD_TX_LOCK(dev, txq, smp_processor_id()); | ||
| 3652 | if (!netif_xmit_frozen_or_drv_stopped(txq)) | ||
| 3653 | ret = netdev_start_xmit(skb, dev, txq, false); | ||
| 3654 | HARD_TX_UNLOCK(dev, txq); | ||
| 3655 | |||
| 3656 | local_bh_enable(); | ||
| 3657 | |||
| 3658 | if (!dev_xmit_complete(ret)) | ||
| 3659 | kfree_skb(skb); | ||
| 3660 | |||
| 3661 | return ret; | ||
| 3662 | drop: | ||
| 3663 | atomic_long_inc(&dev->tx_dropped); | ||
| 3664 | kfree_skb_list(skb); | ||
| 3665 | return NET_XMIT_DROP; | ||
| 3666 | } | ||
| 3667 | EXPORT_SYMBOL(dev_direct_xmit); | ||
| 3630 | 3668 | ||
| 3631 | /************************************************************************* | 3669 | /************************************************************************* |
| 3632 | * Receiver routines | 3670 | * Receiver routines |
| @@ -3996,12 +4034,12 @@ static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb) | |||
| 3996 | } | 4034 | } |
| 3997 | 4035 | ||
| 3998 | static u32 netif_receive_generic_xdp(struct sk_buff *skb, | 4036 | static u32 netif_receive_generic_xdp(struct sk_buff *skb, |
| 4037 | struct xdp_buff *xdp, | ||
| 3999 | struct bpf_prog *xdp_prog) | 4038 | struct bpf_prog *xdp_prog) |
| 4000 | { | 4039 | { |
| 4001 | struct netdev_rx_queue *rxqueue; | 4040 | struct netdev_rx_queue *rxqueue; |
| 4002 | void *orig_data, *orig_data_end; | 4041 | void *orig_data, *orig_data_end; |
| 4003 | u32 metalen, act = XDP_DROP; | 4042 | u32 metalen, act = XDP_DROP; |
| 4004 | struct xdp_buff xdp; | ||
| 4005 | int hlen, off; | 4043 | int hlen, off; |
| 4006 | u32 mac_len; | 4044 | u32 mac_len; |
| 4007 | 4045 | ||
| @@ -4036,19 +4074,19 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, | |||
| 4036 | */ | 4074 | */ |
| 4037 | mac_len = skb->data - skb_mac_header(skb); | 4075 | mac_len = skb->data - skb_mac_header(skb); |
| 4038 | hlen = skb_headlen(skb) + mac_len; | 4076 | hlen = skb_headlen(skb) + mac_len; |
| 4039 | xdp.data = skb->data - mac_len; | 4077 | xdp->data = skb->data - mac_len; |
| 4040 | xdp.data_meta = xdp.data; | 4078 | xdp->data_meta = xdp->data; |
| 4041 | xdp.data_end = xdp.data + hlen; | 4079 | xdp->data_end = xdp->data + hlen; |
| 4042 | xdp.data_hard_start = skb->data - skb_headroom(skb); | 4080 | xdp->data_hard_start = skb->data - skb_headroom(skb); |
| 4043 | orig_data_end = xdp.data_end; | 4081 | orig_data_end = xdp->data_end; |
| 4044 | orig_data = xdp.data; | 4082 | orig_data = xdp->data; |
| 4045 | 4083 | ||
| 4046 | rxqueue = netif_get_rxqueue(skb); | 4084 | rxqueue = netif_get_rxqueue(skb); |
| 4047 | xdp.rxq = &rxqueue->xdp_rxq; | 4085 | xdp->rxq = &rxqueue->xdp_rxq; |
| 4048 | 4086 | ||
| 4049 | act = bpf_prog_run_xdp(xdp_prog, &xdp); | 4087 | act = bpf_prog_run_xdp(xdp_prog, xdp); |
| 4050 | 4088 | ||
| 4051 | off = xdp.data - orig_data; | 4089 | off = xdp->data - orig_data; |
| 4052 | if (off > 0) | 4090 | if (off > 0) |
| 4053 | __skb_pull(skb, off); | 4091 | __skb_pull(skb, off); |
| 4054 | else if (off < 0) | 4092 | else if (off < 0) |
| @@ -4058,10 +4096,11 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, | |||
| 4058 | /* check if bpf_xdp_adjust_tail was used. it can only "shrink" | 4096 | /* check if bpf_xdp_adjust_tail was used. it can only "shrink" |
| 4059 | * pckt. | 4097 | * pckt. |
| 4060 | */ | 4098 | */ |
| 4061 | off = orig_data_end - xdp.data_end; | 4099 | off = orig_data_end - xdp->data_end; |
| 4062 | if (off != 0) { | 4100 | if (off != 0) { |
| 4063 | skb_set_tail_pointer(skb, xdp.data_end - xdp.data); | 4101 | skb_set_tail_pointer(skb, xdp->data_end - xdp->data); |
| 4064 | skb->len -= off; | 4102 | skb->len -= off; |
| 4103 | |||
| 4065 | } | 4104 | } |
| 4066 | 4105 | ||
| 4067 | switch (act) { | 4106 | switch (act) { |
| @@ -4070,7 +4109,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, | |||
| 4070 | __skb_push(skb, mac_len); | 4109 | __skb_push(skb, mac_len); |
| 4071 | break; | 4110 | break; |
| 4072 | case XDP_PASS: | 4111 | case XDP_PASS: |
| 4073 | metalen = xdp.data - xdp.data_meta; | 4112 | metalen = xdp->data - xdp->data_meta; |
| 4074 | if (metalen) | 4113 | if (metalen) |
| 4075 | skb_metadata_set(skb, metalen); | 4114 | skb_metadata_set(skb, metalen); |
| 4076 | break; | 4115 | break; |
| @@ -4120,17 +4159,19 @@ static struct static_key generic_xdp_needed __read_mostly; | |||
| 4120 | int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb) | 4159 | int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb) |
| 4121 | { | 4160 | { |
| 4122 | if (xdp_prog) { | 4161 | if (xdp_prog) { |
| 4123 | u32 act = netif_receive_generic_xdp(skb, xdp_prog); | 4162 | struct xdp_buff xdp; |
| 4163 | u32 act; | ||
| 4124 | int err; | 4164 | int err; |
| 4125 | 4165 | ||
| 4166 | act = netif_receive_generic_xdp(skb, &xdp, xdp_prog); | ||
| 4126 | if (act != XDP_PASS) { | 4167 | if (act != XDP_PASS) { |
| 4127 | switch (act) { | 4168 | switch (act) { |
| 4128 | case XDP_REDIRECT: | 4169 | case XDP_REDIRECT: |
| 4129 | err = xdp_do_generic_redirect(skb->dev, skb, | 4170 | err = xdp_do_generic_redirect(skb->dev, skb, |
| 4130 | xdp_prog); | 4171 | &xdp, xdp_prog); |
| 4131 | if (err) | 4172 | if (err) |
| 4132 | goto out_redir; | 4173 | goto out_redir; |
| 4133 | /* fallthru to submit skb */ | 4174 | break; |
| 4134 | case XDP_TX: | 4175 | case XDP_TX: |
| 4135 | generic_xdp_tx(skb, xdp_prog); | 4176 | generic_xdp_tx(skb, xdp_prog); |
| 4136 | break; | 4177 | break; |
diff --git a/net/core/filter.c b/net/core/filter.c index d3781daa26ab..6877426c23a6 100644 --- a/net/core/filter.c +++ b/net/core/filter.c | |||
| @@ -59,6 +59,7 @@ | |||
| 59 | #include <net/tcp.h> | 59 | #include <net/tcp.h> |
| 60 | #include <net/xfrm.h> | 60 | #include <net/xfrm.h> |
| 61 | #include <linux/bpf_trace.h> | 61 | #include <linux/bpf_trace.h> |
| 62 | #include <net/xdp_sock.h> | ||
| 62 | 63 | ||
| 63 | /** | 64 | /** |
| 64 | * sk_filter_trim_cap - run a packet through a socket filter | 65 | * sk_filter_trim_cap - run a packet through a socket filter |
| @@ -112,12 +113,12 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap) | |||
| 112 | } | 113 | } |
| 113 | EXPORT_SYMBOL(sk_filter_trim_cap); | 114 | EXPORT_SYMBOL(sk_filter_trim_cap); |
| 114 | 115 | ||
| 115 | BPF_CALL_1(__skb_get_pay_offset, struct sk_buff *, skb) | 116 | BPF_CALL_1(bpf_skb_get_pay_offset, struct sk_buff *, skb) |
| 116 | { | 117 | { |
| 117 | return skb_get_poff(skb); | 118 | return skb_get_poff(skb); |
| 118 | } | 119 | } |
| 119 | 120 | ||
| 120 | BPF_CALL_3(__skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x) | 121 | BPF_CALL_3(bpf_skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x) |
| 121 | { | 122 | { |
| 122 | struct nlattr *nla; | 123 | struct nlattr *nla; |
| 123 | 124 | ||
| @@ -137,7 +138,7 @@ BPF_CALL_3(__skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x) | |||
| 137 | return 0; | 138 | return 0; |
| 138 | } | 139 | } |
| 139 | 140 | ||
| 140 | BPF_CALL_3(__skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x) | 141 | BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x) |
| 141 | { | 142 | { |
| 142 | struct nlattr *nla; | 143 | struct nlattr *nla; |
| 143 | 144 | ||
| @@ -161,13 +162,94 @@ BPF_CALL_3(__skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x) | |||
| 161 | return 0; | 162 | return 0; |
| 162 | } | 163 | } |
| 163 | 164 | ||
| 164 | BPF_CALL_0(__get_raw_cpu_id) | 165 | BPF_CALL_4(bpf_skb_load_helper_8, const struct sk_buff *, skb, const void *, |
| 166 | data, int, headlen, int, offset) | ||
| 167 | { | ||
| 168 | u8 tmp, *ptr; | ||
| 169 | const int len = sizeof(tmp); | ||
| 170 | |||
| 171 | if (offset >= 0) { | ||
| 172 | if (headlen - offset >= len) | ||
| 173 | return *(u8 *)(data + offset); | ||
| 174 | if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) | ||
| 175 | return tmp; | ||
| 176 | } else { | ||
| 177 | ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len); | ||
| 178 | if (likely(ptr)) | ||
| 179 | return *(u8 *)ptr; | ||
| 180 | } | ||
| 181 | |||
| 182 | return -EFAULT; | ||
| 183 | } | ||
| 184 | |||
| 185 | BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb, | ||
| 186 | int, offset) | ||
| 187 | { | ||
| 188 | return ____bpf_skb_load_helper_8(skb, skb->data, skb->len - skb->data_len, | ||
| 189 | offset); | ||
| 190 | } | ||
| 191 | |||
| 192 | BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *, | ||
| 193 | data, int, headlen, int, offset) | ||
| 194 | { | ||
| 195 | u16 tmp, *ptr; | ||
| 196 | const int len = sizeof(tmp); | ||
| 197 | |||
| 198 | if (offset >= 0) { | ||
| 199 | if (headlen - offset >= len) | ||
| 200 | return get_unaligned_be16(data + offset); | ||
| 201 | if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) | ||
| 202 | return be16_to_cpu(tmp); | ||
| 203 | } else { | ||
| 204 | ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len); | ||
| 205 | if (likely(ptr)) | ||
| 206 | return get_unaligned_be16(ptr); | ||
| 207 | } | ||
| 208 | |||
| 209 | return -EFAULT; | ||
| 210 | } | ||
| 211 | |||
| 212 | BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb, | ||
| 213 | int, offset) | ||
| 214 | { | ||
| 215 | return ____bpf_skb_load_helper_16(skb, skb->data, skb->len - skb->data_len, | ||
| 216 | offset); | ||
| 217 | } | ||
| 218 | |||
| 219 | BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *, | ||
| 220 | data, int, headlen, int, offset) | ||
| 221 | { | ||
| 222 | u32 tmp, *ptr; | ||
| 223 | const int len = sizeof(tmp); | ||
| 224 | |||
| 225 | if (likely(offset >= 0)) { | ||
| 226 | if (headlen - offset >= len) | ||
| 227 | return get_unaligned_be32(data + offset); | ||
| 228 | if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) | ||
| 229 | return be32_to_cpu(tmp); | ||
| 230 | } else { | ||
| 231 | ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len); | ||
| 232 | if (likely(ptr)) | ||
| 233 | return get_unaligned_be32(ptr); | ||
| 234 | } | ||
| 235 | |||
| 236 | return -EFAULT; | ||
| 237 | } | ||
| 238 | |||
| 239 | BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb, | ||
| 240 | int, offset) | ||
| 241 | { | ||
| 242 | return ____bpf_skb_load_helper_32(skb, skb->data, skb->len - skb->data_len, | ||
| 243 | offset); | ||
| 244 | } | ||
| 245 | |||
| 246 | BPF_CALL_0(bpf_get_raw_cpu_id) | ||
| 165 | { | 247 | { |
| 166 | return raw_smp_processor_id(); | 248 | return raw_smp_processor_id(); |
| 167 | } | 249 | } |
| 168 | 250 | ||
| 169 | static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = { | 251 | static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = { |
| 170 | .func = __get_raw_cpu_id, | 252 | .func = bpf_get_raw_cpu_id, |
| 171 | .gpl_only = false, | 253 | .gpl_only = false, |
| 172 | .ret_type = RET_INTEGER, | 254 | .ret_type = RET_INTEGER, |
| 173 | }; | 255 | }; |
| @@ -317,16 +399,16 @@ static bool convert_bpf_extensions(struct sock_filter *fp, | |||
| 317 | /* Emit call(arg1=CTX, arg2=A, arg3=X) */ | 399 | /* Emit call(arg1=CTX, arg2=A, arg3=X) */ |
| 318 | switch (fp->k) { | 400 | switch (fp->k) { |
| 319 | case SKF_AD_OFF + SKF_AD_PAY_OFFSET: | 401 | case SKF_AD_OFF + SKF_AD_PAY_OFFSET: |
| 320 | *insn = BPF_EMIT_CALL(__skb_get_pay_offset); | 402 | *insn = BPF_EMIT_CALL(bpf_skb_get_pay_offset); |
| 321 | break; | 403 | break; |
| 322 | case SKF_AD_OFF + SKF_AD_NLATTR: | 404 | case SKF_AD_OFF + SKF_AD_NLATTR: |
| 323 | *insn = BPF_EMIT_CALL(__skb_get_nlattr); | 405 | *insn = BPF_EMIT_CALL(bpf_skb_get_nlattr); |
| 324 | break; | 406 | break; |
| 325 | case SKF_AD_OFF + SKF_AD_NLATTR_NEST: | 407 | case SKF_AD_OFF + SKF_AD_NLATTR_NEST: |
| 326 | *insn = BPF_EMIT_CALL(__skb_get_nlattr_nest); | 408 | *insn = BPF_EMIT_CALL(bpf_skb_get_nlattr_nest); |
| 327 | break; | 409 | break; |
| 328 | case SKF_AD_OFF + SKF_AD_CPU: | 410 | case SKF_AD_OFF + SKF_AD_CPU: |
| 329 | *insn = BPF_EMIT_CALL(__get_raw_cpu_id); | 411 | *insn = BPF_EMIT_CALL(bpf_get_raw_cpu_id); |
| 330 | break; | 412 | break; |
| 331 | case SKF_AD_OFF + SKF_AD_RANDOM: | 413 | case SKF_AD_OFF + SKF_AD_RANDOM: |
| 332 | *insn = BPF_EMIT_CALL(bpf_user_rnd_u32); | 414 | *insn = BPF_EMIT_CALL(bpf_user_rnd_u32); |
| @@ -353,26 +435,87 @@ static bool convert_bpf_extensions(struct sock_filter *fp, | |||
| 353 | return true; | 435 | return true; |
| 354 | } | 436 | } |
| 355 | 437 | ||
| 438 | static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp) | ||
| 439 | { | ||
| 440 | const bool unaligned_ok = IS_BUILTIN(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS); | ||
| 441 | int size = bpf_size_to_bytes(BPF_SIZE(fp->code)); | ||
| 442 | bool endian = BPF_SIZE(fp->code) == BPF_H || | ||
| 443 | BPF_SIZE(fp->code) == BPF_W; | ||
| 444 | bool indirect = BPF_MODE(fp->code) == BPF_IND; | ||
| 445 | const int ip_align = NET_IP_ALIGN; | ||
| 446 | struct bpf_insn *insn = *insnp; | ||
| 447 | int offset = fp->k; | ||
| 448 | |||
| 449 | if (!indirect && | ||
| 450 | ((unaligned_ok && offset >= 0) || | ||
| 451 | (!unaligned_ok && offset >= 0 && | ||
| 452 | offset + ip_align >= 0 && | ||
| 453 | offset + ip_align % size == 0))) { | ||
| 454 | *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H); | ||
| 455 | *insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset); | ||
| 456 | *insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP, size, 2 + endian); | ||
| 457 | *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A, BPF_REG_D, | ||
| 458 | offset); | ||
| 459 | if (endian) | ||
| 460 | *insn++ = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, size * 8); | ||
| 461 | *insn++ = BPF_JMP_A(8); | ||
| 462 | } | ||
| 463 | |||
| 464 | *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX); | ||
| 465 | *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_D); | ||
| 466 | *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_H); | ||
| 467 | if (!indirect) { | ||
| 468 | *insn++ = BPF_MOV64_IMM(BPF_REG_ARG4, offset); | ||
| 469 | } else { | ||
| 470 | *insn++ = BPF_MOV64_REG(BPF_REG_ARG4, BPF_REG_X); | ||
| 471 | if (fp->k) | ||
| 472 | *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG4, offset); | ||
| 473 | } | ||
| 474 | |||
| 475 | switch (BPF_SIZE(fp->code)) { | ||
| 476 | case BPF_B: | ||
| 477 | *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8); | ||
| 478 | break; | ||
| 479 | case BPF_H: | ||
| 480 | *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16); | ||
| 481 | break; | ||
| 482 | case BPF_W: | ||
| 483 | *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32); | ||
| 484 | break; | ||
| 485 | default: | ||
| 486 | return false; | ||
| 487 | } | ||
| 488 | |||
| 489 | *insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_A, 0, 2); | ||
| 490 | *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A); | ||
| 491 | *insn = BPF_EXIT_INSN(); | ||
| 492 | |||
| 493 | *insnp = insn; | ||
| 494 | return true; | ||
| 495 | } | ||
| 496 | |||
| 356 | /** | 497 | /** |
| 357 | * bpf_convert_filter - convert filter program | 498 | * bpf_convert_filter - convert filter program |
| 358 | * @prog: the user passed filter program | 499 | * @prog: the user passed filter program |
| 359 | * @len: the length of the user passed filter program | 500 | * @len: the length of the user passed filter program |
| 360 | * @new_prog: allocated 'struct bpf_prog' or NULL | 501 | * @new_prog: allocated 'struct bpf_prog' or NULL |
| 361 | * @new_len: pointer to store length of converted program | 502 | * @new_len: pointer to store length of converted program |
| 503 | * @seen_ld_abs: bool whether we've seen ld_abs/ind | ||
| 362 | * | 504 | * |
| 363 | * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn' | 505 | * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn' |
| 364 | * style extended BPF (eBPF). | 506 | * style extended BPF (eBPF). |
| 365 | * Conversion workflow: | 507 | * Conversion workflow: |
| 366 | * | 508 | * |
| 367 | * 1) First pass for calculating the new program length: | 509 | * 1) First pass for calculating the new program length: |
| 368 | * bpf_convert_filter(old_prog, old_len, NULL, &new_len) | 510 | * bpf_convert_filter(old_prog, old_len, NULL, &new_len, &seen_ld_abs) |
| 369 | * | 511 | * |
| 370 | * 2) 2nd pass to remap in two passes: 1st pass finds new | 512 | * 2) 2nd pass to remap in two passes: 1st pass finds new |
| 371 | * jump offsets, 2nd pass remapping: | 513 | * jump offsets, 2nd pass remapping: |
| 372 | * bpf_convert_filter(old_prog, old_len, new_prog, &new_len); | 514 | * bpf_convert_filter(old_prog, old_len, new_prog, &new_len, &seen_ld_abs) |
| 373 | */ | 515 | */ |
| 374 | static int bpf_convert_filter(struct sock_filter *prog, int len, | 516 | static int bpf_convert_filter(struct sock_filter *prog, int len, |
| 375 | struct bpf_prog *new_prog, int *new_len) | 517 | struct bpf_prog *new_prog, int *new_len, |
| 518 | bool *seen_ld_abs) | ||
| 376 | { | 519 | { |
| 377 | int new_flen = 0, pass = 0, target, i, stack_off; | 520 | int new_flen = 0, pass = 0, target, i, stack_off; |
| 378 | struct bpf_insn *new_insn, *first_insn = NULL; | 521 | struct bpf_insn *new_insn, *first_insn = NULL; |
| @@ -411,12 +554,27 @@ do_pass: | |||
| 411 | * do this ourself. Initial CTX is present in BPF_REG_ARG1. | 554 | * do this ourself. Initial CTX is present in BPF_REG_ARG1. |
| 412 | */ | 555 | */ |
| 413 | *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1); | 556 | *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1); |
| 557 | if (*seen_ld_abs) { | ||
| 558 | /* For packet access in classic BPF, cache skb->data | ||
| 559 | * in callee-saved BPF R8 and skb->len - skb->data_len | ||
| 560 | * (headlen) in BPF R9. Since classic BPF is read-only | ||
| 561 | * on CTX, we only need to cache it once. | ||
| 562 | */ | ||
| 563 | *new_insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data), | ||
| 564 | BPF_REG_D, BPF_REG_CTX, | ||
| 565 | offsetof(struct sk_buff, data)); | ||
| 566 | *new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_H, BPF_REG_CTX, | ||
| 567 | offsetof(struct sk_buff, len)); | ||
| 568 | *new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_TMP, BPF_REG_CTX, | ||
| 569 | offsetof(struct sk_buff, data_len)); | ||
| 570 | *new_insn++ = BPF_ALU32_REG(BPF_SUB, BPF_REG_H, BPF_REG_TMP); | ||
| 571 | } | ||
| 414 | } else { | 572 | } else { |
| 415 | new_insn += 3; | 573 | new_insn += 3; |
| 416 | } | 574 | } |
| 417 | 575 | ||
| 418 | for (i = 0; i < len; fp++, i++) { | 576 | for (i = 0; i < len; fp++, i++) { |
| 419 | struct bpf_insn tmp_insns[6] = { }; | 577 | struct bpf_insn tmp_insns[32] = { }; |
| 420 | struct bpf_insn *insn = tmp_insns; | 578 | struct bpf_insn *insn = tmp_insns; |
| 421 | 579 | ||
| 422 | if (addrs) | 580 | if (addrs) |
| @@ -459,6 +617,11 @@ do_pass: | |||
| 459 | BPF_MODE(fp->code) == BPF_ABS && | 617 | BPF_MODE(fp->code) == BPF_ABS && |
| 460 | convert_bpf_extensions(fp, &insn)) | 618 | convert_bpf_extensions(fp, &insn)) |
| 461 | break; | 619 | break; |
| 620 | if (BPF_CLASS(fp->code) == BPF_LD && | ||
| 621 | convert_bpf_ld_abs(fp, &insn)) { | ||
| 622 | *seen_ld_abs = true; | ||
| 623 | break; | ||
| 624 | } | ||
| 462 | 625 | ||
| 463 | if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) || | 626 | if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) || |
| 464 | fp->code == (BPF_ALU | BPF_MOD | BPF_X)) { | 627 | fp->code == (BPF_ALU | BPF_MOD | BPF_X)) { |
| @@ -561,21 +724,31 @@ jmp_rest: | |||
| 561 | break; | 724 | break; |
| 562 | 725 | ||
| 563 | /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */ | 726 | /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */ |
| 564 | case BPF_LDX | BPF_MSH | BPF_B: | 727 | case BPF_LDX | BPF_MSH | BPF_B: { |
| 565 | /* tmp = A */ | 728 | struct sock_filter tmp = { |
| 566 | *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A); | 729 | .code = BPF_LD | BPF_ABS | BPF_B, |
| 730 | .k = fp->k, | ||
| 731 | }; | ||
| 732 | |||
| 733 | *seen_ld_abs = true; | ||
| 734 | |||
| 735 | /* X = A */ | ||
| 736 | *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); | ||
| 567 | /* A = BPF_R0 = *(u8 *) (skb->data + K) */ | 737 | /* A = BPF_R0 = *(u8 *) (skb->data + K) */ |
| 568 | *insn++ = BPF_LD_ABS(BPF_B, fp->k); | 738 | convert_bpf_ld_abs(&tmp, &insn); |
| 739 | insn++; | ||
| 569 | /* A &= 0xf */ | 740 | /* A &= 0xf */ |
| 570 | *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf); | 741 | *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf); |
| 571 | /* A <<= 2 */ | 742 | /* A <<= 2 */ |
| 572 | *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2); | 743 | *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2); |
| 744 | /* tmp = X */ | ||
| 745 | *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_X); | ||
| 573 | /* X = A */ | 746 | /* X = A */ |
| 574 | *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); | 747 | *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); |
| 575 | /* A = tmp */ | 748 | /* A = tmp */ |
| 576 | *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP); | 749 | *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP); |
| 577 | break; | 750 | break; |
| 578 | 751 | } | |
| 579 | /* RET_K is remaped into 2 insns. RET_A case doesn't need an | 752 | /* RET_K is remaped into 2 insns. RET_A case doesn't need an |
| 580 | * extra mov as BPF_REG_0 is already mapped into BPF_REG_A. | 753 | * extra mov as BPF_REG_0 is already mapped into BPF_REG_A. |
| 581 | */ | 754 | */ |
| @@ -657,6 +830,8 @@ jmp_rest: | |||
| 657 | if (!new_prog) { | 830 | if (!new_prog) { |
| 658 | /* Only calculating new length. */ | 831 | /* Only calculating new length. */ |
| 659 | *new_len = new_insn - first_insn; | 832 | *new_len = new_insn - first_insn; |
| 833 | if (*seen_ld_abs) | ||
| 834 | *new_len += 4; /* Prologue bits. */ | ||
| 660 | return 0; | 835 | return 0; |
| 661 | } | 836 | } |
| 662 | 837 | ||
| @@ -1018,6 +1193,7 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) | |||
| 1018 | struct sock_filter *old_prog; | 1193 | struct sock_filter *old_prog; |
| 1019 | struct bpf_prog *old_fp; | 1194 | struct bpf_prog *old_fp; |
| 1020 | int err, new_len, old_len = fp->len; | 1195 | int err, new_len, old_len = fp->len; |
| 1196 | bool seen_ld_abs = false; | ||
| 1021 | 1197 | ||
| 1022 | /* We are free to overwrite insns et al right here as it | 1198 | /* We are free to overwrite insns et al right here as it |
| 1023 | * won't be used at this point in time anymore internally | 1199 | * won't be used at this point in time anymore internally |
| @@ -1039,7 +1215,8 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) | |||
| 1039 | } | 1215 | } |
| 1040 | 1216 | ||
| 1041 | /* 1st pass: calculate the new program length. */ | 1217 | /* 1st pass: calculate the new program length. */ |
| 1042 | err = bpf_convert_filter(old_prog, old_len, NULL, &new_len); | 1218 | err = bpf_convert_filter(old_prog, old_len, NULL, &new_len, |
| 1219 | &seen_ld_abs); | ||
| 1043 | if (err) | 1220 | if (err) |
| 1044 | goto out_err_free; | 1221 | goto out_err_free; |
| 1045 | 1222 | ||
| @@ -1058,7 +1235,8 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) | |||
| 1058 | fp->len = new_len; | 1235 | fp->len = new_len; |
| 1059 | 1236 | ||
| 1060 | /* 2nd pass: remap sock_filter insns into bpf_insn insns. */ | 1237 | /* 2nd pass: remap sock_filter insns into bpf_insn insns. */ |
| 1061 | err = bpf_convert_filter(old_prog, old_len, fp, &new_len); | 1238 | err = bpf_convert_filter(old_prog, old_len, fp, &new_len, |
| 1239 | &seen_ld_abs); | ||
| 1062 | if (err) | 1240 | if (err) |
| 1063 | /* 2nd bpf_convert_filter() can fail only if it fails | 1241 | /* 2nd bpf_convert_filter() can fail only if it fails |
| 1064 | * to allocate memory, remapping must succeed. Note, | 1242 | * to allocate memory, remapping must succeed. Note, |
| @@ -1506,6 +1684,47 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = { | |||
| 1506 | .arg4_type = ARG_CONST_SIZE, | 1684 | .arg4_type = ARG_CONST_SIZE, |
| 1507 | }; | 1685 | }; |
| 1508 | 1686 | ||
| 1687 | BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb, | ||
| 1688 | u32, offset, void *, to, u32, len, u32, start_header) | ||
| 1689 | { | ||
| 1690 | u8 *ptr; | ||
| 1691 | |||
| 1692 | if (unlikely(offset > 0xffff || len > skb_headlen(skb))) | ||
| 1693 | goto err_clear; | ||
| 1694 | |||
| 1695 | switch (start_header) { | ||
| 1696 | case BPF_HDR_START_MAC: | ||
| 1697 | ptr = skb_mac_header(skb) + offset; | ||
| 1698 | break; | ||
| 1699 | case BPF_HDR_START_NET: | ||
| 1700 | ptr = skb_network_header(skb) + offset; | ||
| 1701 | break; | ||
| 1702 | default: | ||
| 1703 | goto err_clear; | ||
| 1704 | } | ||
| 1705 | |||
| 1706 | if (likely(ptr >= skb_mac_header(skb) && | ||
| 1707 | ptr + len <= skb_tail_pointer(skb))) { | ||
| 1708 | memcpy(to, ptr, len); | ||
| 1709 | return 0; | ||
| 1710 | } | ||
| 1711 | |||
| 1712 | err_clear: | ||
| 1713 | memset(to, 0, len); | ||
| 1714 | return -EFAULT; | ||
| 1715 | } | ||
| 1716 | |||
| 1717 | static const struct bpf_func_proto bpf_skb_load_bytes_relative_proto = { | ||
| 1718 | .func = bpf_skb_load_bytes_relative, | ||
| 1719 | .gpl_only = false, | ||
| 1720 | .ret_type = RET_INTEGER, | ||
| 1721 | .arg1_type = ARG_PTR_TO_CTX, | ||
| 1722 | .arg2_type = ARG_ANYTHING, | ||
| 1723 | .arg3_type = ARG_PTR_TO_UNINIT_MEM, | ||
| 1724 | .arg4_type = ARG_CONST_SIZE, | ||
| 1725 | .arg5_type = ARG_ANYTHING, | ||
| 1726 | }; | ||
| 1727 | |||
| 1509 | BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len) | 1728 | BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len) |
| 1510 | { | 1729 | { |
| 1511 | /* Idea is the following: should the needed direct read/write | 1730 | /* Idea is the following: should the needed direct read/write |
| @@ -2180,7 +2399,7 @@ BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto, | |||
| 2180 | return ret; | 2399 | return ret; |
| 2181 | } | 2400 | } |
| 2182 | 2401 | ||
| 2183 | const struct bpf_func_proto bpf_skb_vlan_push_proto = { | 2402 | static const struct bpf_func_proto bpf_skb_vlan_push_proto = { |
| 2184 | .func = bpf_skb_vlan_push, | 2403 | .func = bpf_skb_vlan_push, |
| 2185 | .gpl_only = false, | 2404 | .gpl_only = false, |
| 2186 | .ret_type = RET_INTEGER, | 2405 | .ret_type = RET_INTEGER, |
| @@ -2188,7 +2407,6 @@ const struct bpf_func_proto bpf_skb_vlan_push_proto = { | |||
| 2188 | .arg2_type = ARG_ANYTHING, | 2407 | .arg2_type = ARG_ANYTHING, |
| 2189 | .arg3_type = ARG_ANYTHING, | 2408 | .arg3_type = ARG_ANYTHING, |
| 2190 | }; | 2409 | }; |
| 2191 | EXPORT_SYMBOL_GPL(bpf_skb_vlan_push_proto); | ||
| 2192 | 2410 | ||
| 2193 | BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb) | 2411 | BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb) |
| 2194 | { | 2412 | { |
| @@ -2202,13 +2420,12 @@ BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb) | |||
| 2202 | return ret; | 2420 | return ret; |
| 2203 | } | 2421 | } |
| 2204 | 2422 | ||
| 2205 | const struct bpf_func_proto bpf_skb_vlan_pop_proto = { | 2423 | static const struct bpf_func_proto bpf_skb_vlan_pop_proto = { |
| 2206 | .func = bpf_skb_vlan_pop, | 2424 | .func = bpf_skb_vlan_pop, |
| 2207 | .gpl_only = false, | 2425 | .gpl_only = false, |
| 2208 | .ret_type = RET_INTEGER, | 2426 | .ret_type = RET_INTEGER, |
| 2209 | .arg1_type = ARG_PTR_TO_CTX, | 2427 | .arg1_type = ARG_PTR_TO_CTX, |
| 2210 | }; | 2428 | }; |
| 2211 | EXPORT_SYMBOL_GPL(bpf_skb_vlan_pop_proto); | ||
| 2212 | 2429 | ||
| 2213 | static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len) | 2430 | static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len) |
| 2214 | { | 2431 | { |
| @@ -2801,7 +3018,8 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, | |||
| 2801 | { | 3018 | { |
| 2802 | int err; | 3019 | int err; |
| 2803 | 3020 | ||
| 2804 | if (map->map_type == BPF_MAP_TYPE_DEVMAP) { | 3021 | switch (map->map_type) { |
| 3022 | case BPF_MAP_TYPE_DEVMAP: { | ||
| 2805 | struct net_device *dev = fwd; | 3023 | struct net_device *dev = fwd; |
| 2806 | struct xdp_frame *xdpf; | 3024 | struct xdp_frame *xdpf; |
| 2807 | 3025 | ||
| @@ -2819,14 +3037,25 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, | |||
| 2819 | if (err) | 3037 | if (err) |
| 2820 | return err; | 3038 | return err; |
| 2821 | __dev_map_insert_ctx(map, index); | 3039 | __dev_map_insert_ctx(map, index); |
| 2822 | 3040 | break; | |
| 2823 | } else if (map->map_type == BPF_MAP_TYPE_CPUMAP) { | 3041 | } |
| 3042 | case BPF_MAP_TYPE_CPUMAP: { | ||
| 2824 | struct bpf_cpu_map_entry *rcpu = fwd; | 3043 | struct bpf_cpu_map_entry *rcpu = fwd; |
| 2825 | 3044 | ||
| 2826 | err = cpu_map_enqueue(rcpu, xdp, dev_rx); | 3045 | err = cpu_map_enqueue(rcpu, xdp, dev_rx); |
| 2827 | if (err) | 3046 | if (err) |
| 2828 | return err; | 3047 | return err; |
| 2829 | __cpu_map_insert_ctx(map, index); | 3048 | __cpu_map_insert_ctx(map, index); |
| 3049 | break; | ||
| 3050 | } | ||
| 3051 | case BPF_MAP_TYPE_XSKMAP: { | ||
| 3052 | struct xdp_sock *xs = fwd; | ||
| 3053 | |||
| 3054 | err = __xsk_map_redirect(map, xdp, xs); | ||
| 3055 | return err; | ||
| 3056 | } | ||
| 3057 | default: | ||
| 3058 | break; | ||
| 2830 | } | 3059 | } |
| 2831 | return 0; | 3060 | return 0; |
| 2832 | } | 3061 | } |
| @@ -2845,6 +3074,9 @@ void xdp_do_flush_map(void) | |||
| 2845 | case BPF_MAP_TYPE_CPUMAP: | 3074 | case BPF_MAP_TYPE_CPUMAP: |
| 2846 | __cpu_map_flush(map); | 3075 | __cpu_map_flush(map); |
| 2847 | break; | 3076 | break; |
| 3077 | case BPF_MAP_TYPE_XSKMAP: | ||
| 3078 | __xsk_map_flush(map); | ||
| 3079 | break; | ||
| 2848 | default: | 3080 | default: |
| 2849 | break; | 3081 | break; |
| 2850 | } | 3082 | } |
| @@ -2859,6 +3091,8 @@ static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index) | |||
| 2859 | return __dev_map_lookup_elem(map, index); | 3091 | return __dev_map_lookup_elem(map, index); |
| 2860 | case BPF_MAP_TYPE_CPUMAP: | 3092 | case BPF_MAP_TYPE_CPUMAP: |
| 2861 | return __cpu_map_lookup_elem(map, index); | 3093 | return __cpu_map_lookup_elem(map, index); |
| 3094 | case BPF_MAP_TYPE_XSKMAP: | ||
| 3095 | return __xsk_map_lookup_elem(map, index); | ||
| 2862 | default: | 3096 | default: |
| 2863 | return NULL; | 3097 | return NULL; |
| 2864 | } | 3098 | } |
| @@ -2956,13 +3190,14 @@ static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd) | |||
| 2956 | 3190 | ||
| 2957 | static int xdp_do_generic_redirect_map(struct net_device *dev, | 3191 | static int xdp_do_generic_redirect_map(struct net_device *dev, |
| 2958 | struct sk_buff *skb, | 3192 | struct sk_buff *skb, |
| 3193 | struct xdp_buff *xdp, | ||
| 2959 | struct bpf_prog *xdp_prog) | 3194 | struct bpf_prog *xdp_prog) |
| 2960 | { | 3195 | { |
| 2961 | struct redirect_info *ri = this_cpu_ptr(&redirect_info); | 3196 | struct redirect_info *ri = this_cpu_ptr(&redirect_info); |
| 2962 | unsigned long map_owner = ri->map_owner; | 3197 | unsigned long map_owner = ri->map_owner; |
| 2963 | struct bpf_map *map = ri->map; | 3198 | struct bpf_map *map = ri->map; |
| 2964 | struct net_device *fwd = NULL; | ||
| 2965 | u32 index = ri->ifindex; | 3199 | u32 index = ri->ifindex; |
| 3200 | void *fwd = NULL; | ||
| 2966 | int err = 0; | 3201 | int err = 0; |
| 2967 | 3202 | ||
| 2968 | ri->ifindex = 0; | 3203 | ri->ifindex = 0; |
| @@ -2984,6 +3219,14 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, | |||
| 2984 | if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd)))) | 3219 | if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd)))) |
| 2985 | goto err; | 3220 | goto err; |
| 2986 | skb->dev = fwd; | 3221 | skb->dev = fwd; |
| 3222 | generic_xdp_tx(skb, xdp_prog); | ||
| 3223 | } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) { | ||
| 3224 | struct xdp_sock *xs = fwd; | ||
| 3225 | |||
| 3226 | err = xsk_generic_rcv(xs, xdp); | ||
| 3227 | if (err) | ||
| 3228 | goto err; | ||
| 3229 | consume_skb(skb); | ||
| 2987 | } else { | 3230 | } else { |
| 2988 | /* TODO: Handle BPF_MAP_TYPE_CPUMAP */ | 3231 | /* TODO: Handle BPF_MAP_TYPE_CPUMAP */ |
| 2989 | err = -EBADRQC; | 3232 | err = -EBADRQC; |
| @@ -2998,7 +3241,7 @@ err: | |||
| 2998 | } | 3241 | } |
| 2999 | 3242 | ||
| 3000 | int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, | 3243 | int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, |
| 3001 | struct bpf_prog *xdp_prog) | 3244 | struct xdp_buff *xdp, struct bpf_prog *xdp_prog) |
| 3002 | { | 3245 | { |
| 3003 | struct redirect_info *ri = this_cpu_ptr(&redirect_info); | 3246 | struct redirect_info *ri = this_cpu_ptr(&redirect_info); |
| 3004 | u32 index = ri->ifindex; | 3247 | u32 index = ri->ifindex; |
| @@ -3006,7 +3249,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, | |||
| 3006 | int err = 0; | 3249 | int err = 0; |
| 3007 | 3250 | ||
| 3008 | if (ri->map) | 3251 | if (ri->map) |
| 3009 | return xdp_do_generic_redirect_map(dev, skb, xdp_prog); | 3252 | return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog); |
| 3010 | 3253 | ||
| 3011 | ri->ifindex = 0; | 3254 | ri->ifindex = 0; |
| 3012 | fwd = dev_get_by_index_rcu(dev_net(dev), index); | 3255 | fwd = dev_get_by_index_rcu(dev_net(dev), index); |
| @@ -3020,6 +3263,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, | |||
| 3020 | 3263 | ||
| 3021 | skb->dev = fwd; | 3264 | skb->dev = fwd; |
| 3022 | _trace_xdp_redirect(dev, xdp_prog, index); | 3265 | _trace_xdp_redirect(dev, xdp_prog, index); |
| 3266 | generic_xdp_tx(skb, xdp_prog); | ||
| 3023 | return 0; | 3267 | return 0; |
| 3024 | err: | 3268 | err: |
| 3025 | _trace_xdp_redirect_err(dev, xdp_prog, index, err); | 3269 | _trace_xdp_redirect_err(dev, xdp_prog, index, err); |
| @@ -3858,6 +4102,8 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | |||
| 3858 | switch (func_id) { | 4102 | switch (func_id) { |
| 3859 | case BPF_FUNC_skb_load_bytes: | 4103 | case BPF_FUNC_skb_load_bytes: |
| 3860 | return &bpf_skb_load_bytes_proto; | 4104 | return &bpf_skb_load_bytes_proto; |
| 4105 | case BPF_FUNC_skb_load_bytes_relative: | ||
| 4106 | return &bpf_skb_load_bytes_relative_proto; | ||
| 3861 | case BPF_FUNC_get_socket_cookie: | 4107 | case BPF_FUNC_get_socket_cookie: |
| 3862 | return &bpf_get_socket_cookie_proto; | 4108 | return &bpf_get_socket_cookie_proto; |
| 3863 | case BPF_FUNC_get_socket_uid: | 4109 | case BPF_FUNC_get_socket_uid: |
| @@ -3875,6 +4121,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | |||
| 3875 | return &bpf_skb_store_bytes_proto; | 4121 | return &bpf_skb_store_bytes_proto; |
| 3876 | case BPF_FUNC_skb_load_bytes: | 4122 | case BPF_FUNC_skb_load_bytes: |
| 3877 | return &bpf_skb_load_bytes_proto; | 4123 | return &bpf_skb_load_bytes_proto; |
| 4124 | case BPF_FUNC_skb_load_bytes_relative: | ||
| 4125 | return &bpf_skb_load_bytes_relative_proto; | ||
| 3878 | case BPF_FUNC_skb_pull_data: | 4126 | case BPF_FUNC_skb_pull_data: |
| 3879 | return &bpf_skb_pull_data_proto; | 4127 | return &bpf_skb_pull_data_proto; |
| 3880 | case BPF_FUNC_csum_diff: | 4128 | case BPF_FUNC_csum_diff: |
| @@ -4304,6 +4552,41 @@ static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write, | |||
| 4304 | return insn - insn_buf; | 4552 | return insn - insn_buf; |
| 4305 | } | 4553 | } |
| 4306 | 4554 | ||
| 4555 | static int bpf_gen_ld_abs(const struct bpf_insn *orig, | ||
| 4556 | struct bpf_insn *insn_buf) | ||
| 4557 | { | ||
| 4558 | bool indirect = BPF_MODE(orig->code) == BPF_IND; | ||
| 4559 | struct bpf_insn *insn = insn_buf; | ||
| 4560 | |||
| 4561 | /* We're guaranteed here that CTX is in R6. */ | ||
| 4562 | *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_CTX); | ||
| 4563 | if (!indirect) { | ||
| 4564 | *insn++ = BPF_MOV64_IMM(BPF_REG_2, orig->imm); | ||
| 4565 | } else { | ||
| 4566 | *insn++ = BPF_MOV64_REG(BPF_REG_2, orig->src_reg); | ||
| 4567 | if (orig->imm) | ||
| 4568 | *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, orig->imm); | ||
| 4569 | } | ||
| 4570 | |||
| 4571 | switch (BPF_SIZE(orig->code)) { | ||
| 4572 | case BPF_B: | ||
| 4573 | *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8_no_cache); | ||
| 4574 | break; | ||
| 4575 | case BPF_H: | ||
| 4576 | *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16_no_cache); | ||
| 4577 | break; | ||
| 4578 | case BPF_W: | ||
| 4579 | *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32_no_cache); | ||
| 4580 | break; | ||
| 4581 | } | ||
| 4582 | |||
| 4583 | *insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 2); | ||
| 4584 | *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0); | ||
| 4585 | *insn++ = BPF_EXIT_INSN(); | ||
| 4586 | |||
| 4587 | return insn - insn_buf; | ||
| 4588 | } | ||
| 4589 | |||
| 4307 | static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write, | 4590 | static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write, |
| 4308 | const struct bpf_prog *prog) | 4591 | const struct bpf_prog *prog) |
| 4309 | { | 4592 | { |
| @@ -5573,6 +5856,7 @@ const struct bpf_verifier_ops sk_filter_verifier_ops = { | |||
| 5573 | .get_func_proto = sk_filter_func_proto, | 5856 | .get_func_proto = sk_filter_func_proto, |
| 5574 | .is_valid_access = sk_filter_is_valid_access, | 5857 | .is_valid_access = sk_filter_is_valid_access, |
| 5575 | .convert_ctx_access = bpf_convert_ctx_access, | 5858 | .convert_ctx_access = bpf_convert_ctx_access, |
| 5859 | .gen_ld_abs = bpf_gen_ld_abs, | ||
| 5576 | }; | 5860 | }; |
| 5577 | 5861 | ||
| 5578 | const struct bpf_prog_ops sk_filter_prog_ops = { | 5862 | const struct bpf_prog_ops sk_filter_prog_ops = { |
| @@ -5584,6 +5868,7 @@ const struct bpf_verifier_ops tc_cls_act_verifier_ops = { | |||
| 5584 | .is_valid_access = tc_cls_act_is_valid_access, | 5868 | .is_valid_access = tc_cls_act_is_valid_access, |
| 5585 | .convert_ctx_access = tc_cls_act_convert_ctx_access, | 5869 | .convert_ctx_access = tc_cls_act_convert_ctx_access, |
| 5586 | .gen_prologue = tc_cls_act_prologue, | 5870 | .gen_prologue = tc_cls_act_prologue, |
| 5871 | .gen_ld_abs = bpf_gen_ld_abs, | ||
| 5587 | }; | 5872 | }; |
| 5588 | 5873 | ||
| 5589 | const struct bpf_prog_ops tc_cls_act_prog_ops = { | 5874 | const struct bpf_prog_ops tc_cls_act_prog_ops = { |
diff --git a/net/core/sock.c b/net/core/sock.c index b2c3db169ca1..e7d8b6c955c6 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
| @@ -226,7 +226,8 @@ static struct lock_class_key af_family_kern_slock_keys[AF_MAX]; | |||
| 226 | x "AF_RXRPC" , x "AF_ISDN" , x "AF_PHONET" , \ | 226 | x "AF_RXRPC" , x "AF_ISDN" , x "AF_PHONET" , \ |
| 227 | x "AF_IEEE802154", x "AF_CAIF" , x "AF_ALG" , \ | 227 | x "AF_IEEE802154", x "AF_CAIF" , x "AF_ALG" , \ |
| 228 | x "AF_NFC" , x "AF_VSOCK" , x "AF_KCM" , \ | 228 | x "AF_NFC" , x "AF_VSOCK" , x "AF_KCM" , \ |
| 229 | x "AF_QIPCRTR", x "AF_SMC" , x "AF_MAX" | 229 | x "AF_QIPCRTR", x "AF_SMC" , x "AF_XDP" , \ |
| 230 | x "AF_MAX" | ||
| 230 | 231 | ||
| 231 | static const char *const af_family_key_strings[AF_MAX+1] = { | 232 | static const char *const af_family_key_strings[AF_MAX+1] = { |
| 232 | _sock_locks("sk_lock-") | 233 | _sock_locks("sk_lock-") |
| @@ -262,7 +263,8 @@ static const char *const af_family_rlock_key_strings[AF_MAX+1] = { | |||
| 262 | "rlock-AF_RXRPC" , "rlock-AF_ISDN" , "rlock-AF_PHONET" , | 263 | "rlock-AF_RXRPC" , "rlock-AF_ISDN" , "rlock-AF_PHONET" , |
| 263 | "rlock-AF_IEEE802154", "rlock-AF_CAIF" , "rlock-AF_ALG" , | 264 | "rlock-AF_IEEE802154", "rlock-AF_CAIF" , "rlock-AF_ALG" , |
| 264 | "rlock-AF_NFC" , "rlock-AF_VSOCK" , "rlock-AF_KCM" , | 265 | "rlock-AF_NFC" , "rlock-AF_VSOCK" , "rlock-AF_KCM" , |
| 265 | "rlock-AF_QIPCRTR", "rlock-AF_SMC" , "rlock-AF_MAX" | 266 | "rlock-AF_QIPCRTR", "rlock-AF_SMC" , "rlock-AF_XDP" , |
| 267 | "rlock-AF_MAX" | ||
| 266 | }; | 268 | }; |
| 267 | static const char *const af_family_wlock_key_strings[AF_MAX+1] = { | 269 | static const char *const af_family_wlock_key_strings[AF_MAX+1] = { |
| 268 | "wlock-AF_UNSPEC", "wlock-AF_UNIX" , "wlock-AF_INET" , | 270 | "wlock-AF_UNSPEC", "wlock-AF_UNIX" , "wlock-AF_INET" , |
| @@ -279,7 +281,8 @@ static const char *const af_family_wlock_key_strings[AF_MAX+1] = { | |||
| 279 | "wlock-AF_RXRPC" , "wlock-AF_ISDN" , "wlock-AF_PHONET" , | 281 | "wlock-AF_RXRPC" , "wlock-AF_ISDN" , "wlock-AF_PHONET" , |
| 280 | "wlock-AF_IEEE802154", "wlock-AF_CAIF" , "wlock-AF_ALG" , | 282 | "wlock-AF_IEEE802154", "wlock-AF_CAIF" , "wlock-AF_ALG" , |
| 281 | "wlock-AF_NFC" , "wlock-AF_VSOCK" , "wlock-AF_KCM" , | 283 | "wlock-AF_NFC" , "wlock-AF_VSOCK" , "wlock-AF_KCM" , |
| 282 | "wlock-AF_QIPCRTR", "wlock-AF_SMC" , "wlock-AF_MAX" | 284 | "wlock-AF_QIPCRTR", "wlock-AF_SMC" , "wlock-AF_XDP" , |
| 285 | "wlock-AF_MAX" | ||
| 283 | }; | 286 | }; |
| 284 | static const char *const af_family_elock_key_strings[AF_MAX+1] = { | 287 | static const char *const af_family_elock_key_strings[AF_MAX+1] = { |
| 285 | "elock-AF_UNSPEC", "elock-AF_UNIX" , "elock-AF_INET" , | 288 | "elock-AF_UNSPEC", "elock-AF_UNIX" , "elock-AF_INET" , |
| @@ -296,7 +299,8 @@ static const char *const af_family_elock_key_strings[AF_MAX+1] = { | |||
| 296 | "elock-AF_RXRPC" , "elock-AF_ISDN" , "elock-AF_PHONET" , | 299 | "elock-AF_RXRPC" , "elock-AF_ISDN" , "elock-AF_PHONET" , |
| 297 | "elock-AF_IEEE802154", "elock-AF_CAIF" , "elock-AF_ALG" , | 300 | "elock-AF_IEEE802154", "elock-AF_CAIF" , "elock-AF_ALG" , |
| 298 | "elock-AF_NFC" , "elock-AF_VSOCK" , "elock-AF_KCM" , | 301 | "elock-AF_NFC" , "elock-AF_VSOCK" , "elock-AF_KCM" , |
| 299 | "elock-AF_QIPCRTR", "elock-AF_SMC" , "elock-AF_MAX" | 302 | "elock-AF_QIPCRTR", "elock-AF_SMC" , "elock-AF_XDP" , |
| 303 | "elock-AF_MAX" | ||
| 300 | }; | 304 | }; |
| 301 | 305 | ||
| 302 | /* | 306 | /* |
diff --git a/net/core/xdp.c b/net/core/xdp.c index 0c86b53a3a63..bf6758f74339 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c | |||
| @@ -308,11 +308,9 @@ err: | |||
| 308 | } | 308 | } |
| 309 | EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); | 309 | EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); |
| 310 | 310 | ||
| 311 | void xdp_return_frame(struct xdp_frame *xdpf) | 311 | static void xdp_return(void *data, struct xdp_mem_info *mem) |
| 312 | { | 312 | { |
| 313 | struct xdp_mem_info *mem = &xdpf->mem; | ||
| 314 | struct xdp_mem_allocator *xa; | 313 | struct xdp_mem_allocator *xa; |
| 315 | void *data = xdpf->data; | ||
| 316 | struct page *page; | 314 | struct page *page; |
| 317 | 315 | ||
| 318 | switch (mem->type) { | 316 | switch (mem->type) { |
| @@ -339,4 +337,15 @@ void xdp_return_frame(struct xdp_frame *xdpf) | |||
| 339 | break; | 337 | break; |
| 340 | } | 338 | } |
| 341 | } | 339 | } |
| 340 | |||
| 341 | void xdp_return_frame(struct xdp_frame *xdpf) | ||
| 342 | { | ||
| 343 | xdp_return(xdpf->data, &xdpf->mem); | ||
| 344 | } | ||
| 342 | EXPORT_SYMBOL_GPL(xdp_return_frame); | 345 | EXPORT_SYMBOL_GPL(xdp_return_frame); |
| 346 | |||
| 347 | void xdp_return_buff(struct xdp_buff *xdp) | ||
| 348 | { | ||
| 349 | xdp_return(xdp->data, &xdp->rxq->mem); | ||
| 350 | } | ||
| 351 | EXPORT_SYMBOL_GPL(xdp_return_buff); | ||
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 01f3515cada0..611a26d5235c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
| @@ -209,7 +209,7 @@ static void prb_clear_rxhash(struct tpacket_kbdq_core *, | |||
| 209 | static void prb_fill_vlan_info(struct tpacket_kbdq_core *, | 209 | static void prb_fill_vlan_info(struct tpacket_kbdq_core *, |
| 210 | struct tpacket3_hdr *); | 210 | struct tpacket3_hdr *); |
| 211 | static void packet_flush_mclist(struct sock *sk); | 211 | static void packet_flush_mclist(struct sock *sk); |
| 212 | static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb); | 212 | static u16 packet_pick_tx_queue(struct sk_buff *skb); |
| 213 | 213 | ||
| 214 | struct packet_skb_cb { | 214 | struct packet_skb_cb { |
| 215 | union { | 215 | union { |
| @@ -243,40 +243,7 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po); | |||
| 243 | 243 | ||
| 244 | static int packet_direct_xmit(struct sk_buff *skb) | 244 | static int packet_direct_xmit(struct sk_buff *skb) |
| 245 | { | 245 | { |
| 246 | struct net_device *dev = skb->dev; | 246 | return dev_direct_xmit(skb, packet_pick_tx_queue(skb)); |
| 247 | struct sk_buff *orig_skb = skb; | ||
| 248 | struct netdev_queue *txq; | ||
| 249 | int ret = NETDEV_TX_BUSY; | ||
| 250 | bool again = false; | ||
| 251 | |||
| 252 | if (unlikely(!netif_running(dev) || | ||
| 253 | !netif_carrier_ok(dev))) | ||
| 254 | goto drop; | ||
| 255 | |||
| 256 | skb = validate_xmit_skb_list(skb, dev, &again); | ||
| 257 | if (skb != orig_skb) | ||
| 258 | goto drop; | ||
| 259 | |||
| 260 | packet_pick_tx_queue(dev, skb); | ||
| 261 | txq = skb_get_tx_queue(dev, skb); | ||
| 262 | |||
| 263 | local_bh_disable(); | ||
| 264 | |||
| 265 | HARD_TX_LOCK(dev, txq, smp_processor_id()); | ||
| 266 | if (!netif_xmit_frozen_or_drv_stopped(txq)) | ||
| 267 | ret = netdev_start_xmit(skb, dev, txq, false); | ||
| 268 | HARD_TX_UNLOCK(dev, txq); | ||
| 269 | |||
| 270 | local_bh_enable(); | ||
| 271 | |||
| 272 | if (!dev_xmit_complete(ret)) | ||
| 273 | kfree_skb(skb); | ||
| 274 | |||
| 275 | return ret; | ||
| 276 | drop: | ||
| 277 | atomic_long_inc(&dev->tx_dropped); | ||
| 278 | kfree_skb_list(skb); | ||
| 279 | return NET_XMIT_DROP; | ||
| 280 | } | 247 | } |
| 281 | 248 | ||
| 282 | static struct net_device *packet_cached_dev_get(struct packet_sock *po) | 249 | static struct net_device *packet_cached_dev_get(struct packet_sock *po) |
| @@ -313,8 +280,9 @@ static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) | |||
| 313 | return (u16) raw_smp_processor_id() % dev->real_num_tx_queues; | 280 | return (u16) raw_smp_processor_id() % dev->real_num_tx_queues; |
| 314 | } | 281 | } |
| 315 | 282 | ||
| 316 | static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) | 283 | static u16 packet_pick_tx_queue(struct sk_buff *skb) |
| 317 | { | 284 | { |
| 285 | struct net_device *dev = skb->dev; | ||
| 318 | const struct net_device_ops *ops = dev->netdev_ops; | 286 | const struct net_device_ops *ops = dev->netdev_ops; |
| 319 | u16 queue_index; | 287 | u16 queue_index; |
| 320 | 288 | ||
| @@ -326,7 +294,7 @@ static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) | |||
| 326 | queue_index = __packet_pick_tx_queue(dev, skb); | 294 | queue_index = __packet_pick_tx_queue(dev, skb); |
| 327 | } | 295 | } |
| 328 | 296 | ||
| 329 | skb_set_queue_mapping(skb, queue_index); | 297 | return queue_index; |
| 330 | } | 298 | } |
| 331 | 299 | ||
| 332 | /* __register_prot_hook must be invoked through register_prot_hook | 300 | /* __register_prot_hook must be invoked through register_prot_hook |
diff --git a/net/xdp/Kconfig b/net/xdp/Kconfig new file mode 100644 index 000000000000..90e4a7152854 --- /dev/null +++ b/net/xdp/Kconfig | |||
| @@ -0,0 +1,7 @@ | |||
| 1 | config XDP_SOCKETS | ||
| 2 | bool "XDP sockets" | ||
| 3 | depends on BPF_SYSCALL | ||
| 4 | default n | ||
| 5 | help | ||
| 6 | XDP sockets allows a channel between XDP programs and | ||
| 7 | userspace applications. | ||
diff --git a/net/xdp/Makefile b/net/xdp/Makefile new file mode 100644 index 000000000000..074fb2b2d51c --- /dev/null +++ b/net/xdp/Makefile | |||
| @@ -0,0 +1,2 @@ | |||
| 1 | obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o | ||
| 2 | |||
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c new file mode 100644 index 000000000000..881dfdefe235 --- /dev/null +++ b/net/xdp/xdp_umem.c | |||
| @@ -0,0 +1,260 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | ||
| 2 | /* XDP user-space packet buffer | ||
| 3 | * Copyright(c) 2018 Intel Corporation. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify it | ||
| 6 | * under the terms and conditions of the GNU General Public License, | ||
| 7 | * version 2, as published by the Free Software Foundation. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 12 | * more details. | ||
| 13 | */ | ||
| 14 | |||
| 15 | #include <linux/init.h> | ||
| 16 | #include <linux/sched/mm.h> | ||
| 17 | #include <linux/sched/signal.h> | ||
| 18 | #include <linux/sched/task.h> | ||
| 19 | #include <linux/uaccess.h> | ||
| 20 | #include <linux/slab.h> | ||
| 21 | #include <linux/bpf.h> | ||
| 22 | #include <linux/mm.h> | ||
| 23 | |||
| 24 | #include "xdp_umem.h" | ||
| 25 | |||
| 26 | #define XDP_UMEM_MIN_FRAME_SIZE 2048 | ||
| 27 | |||
| 28 | int xdp_umem_create(struct xdp_umem **umem) | ||
| 29 | { | ||
| 30 | *umem = kzalloc(sizeof(**umem), GFP_KERNEL); | ||
| 31 | |||
| 32 | if (!(*umem)) | ||
| 33 | return -ENOMEM; | ||
| 34 | |||
| 35 | return 0; | ||
| 36 | } | ||
| 37 | |||
| 38 | static void xdp_umem_unpin_pages(struct xdp_umem *umem) | ||
| 39 | { | ||
| 40 | unsigned int i; | ||
| 41 | |||
| 42 | if (umem->pgs) { | ||
| 43 | for (i = 0; i < umem->npgs; i++) { | ||
| 44 | struct page *page = umem->pgs[i]; | ||
| 45 | |||
| 46 | set_page_dirty_lock(page); | ||
| 47 | put_page(page); | ||
| 48 | } | ||
| 49 | |||
| 50 | kfree(umem->pgs); | ||
| 51 | umem->pgs = NULL; | ||
| 52 | } | ||
| 53 | } | ||
| 54 | |||
| 55 | static void xdp_umem_unaccount_pages(struct xdp_umem *umem) | ||
| 56 | { | ||
| 57 | if (umem->user) { | ||
| 58 | atomic_long_sub(umem->npgs, &umem->user->locked_vm); | ||
| 59 | free_uid(umem->user); | ||
| 60 | } | ||
| 61 | } | ||
| 62 | |||
| 63 | static void xdp_umem_release(struct xdp_umem *umem) | ||
| 64 | { | ||
| 65 | struct task_struct *task; | ||
| 66 | struct mm_struct *mm; | ||
| 67 | |||
| 68 | if (umem->fq) { | ||
| 69 | xskq_destroy(umem->fq); | ||
| 70 | umem->fq = NULL; | ||
| 71 | } | ||
| 72 | |||
| 73 | if (umem->cq) { | ||
| 74 | xskq_destroy(umem->cq); | ||
| 75 | umem->cq = NULL; | ||
| 76 | } | ||
| 77 | |||
| 78 | if (umem->pgs) { | ||
| 79 | xdp_umem_unpin_pages(umem); | ||
| 80 | |||
| 81 | task = get_pid_task(umem->pid, PIDTYPE_PID); | ||
| 82 | put_pid(umem->pid); | ||
| 83 | if (!task) | ||
| 84 | goto out; | ||
| 85 | mm = get_task_mm(task); | ||
| 86 | put_task_struct(task); | ||
| 87 | if (!mm) | ||
| 88 | goto out; | ||
| 89 | |||
| 90 | mmput(mm); | ||
| 91 | umem->pgs = NULL; | ||
| 92 | } | ||
| 93 | |||
| 94 | xdp_umem_unaccount_pages(umem); | ||
| 95 | out: | ||
| 96 | kfree(umem); | ||
| 97 | } | ||
| 98 | |||
| 99 | static void xdp_umem_release_deferred(struct work_struct *work) | ||
| 100 | { | ||
| 101 | struct xdp_umem *umem = container_of(work, struct xdp_umem, work); | ||
| 102 | |||
| 103 | xdp_umem_release(umem); | ||
| 104 | } | ||
| 105 | |||
| 106 | void xdp_get_umem(struct xdp_umem *umem) | ||
| 107 | { | ||
| 108 | atomic_inc(&umem->users); | ||
| 109 | } | ||
| 110 | |||
| 111 | void xdp_put_umem(struct xdp_umem *umem) | ||
| 112 | { | ||
| 113 | if (!umem) | ||
| 114 | return; | ||
| 115 | |||
| 116 | if (atomic_dec_and_test(&umem->users)) { | ||
| 117 | INIT_WORK(&umem->work, xdp_umem_release_deferred); | ||
| 118 | schedule_work(&umem->work); | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | static int xdp_umem_pin_pages(struct xdp_umem *umem) | ||
| 123 | { | ||
| 124 | unsigned int gup_flags = FOLL_WRITE; | ||
| 125 | long npgs; | ||
| 126 | int err; | ||
| 127 | |||
| 128 | umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL); | ||
| 129 | if (!umem->pgs) | ||
| 130 | return -ENOMEM; | ||
| 131 | |||
| 132 | down_write(¤t->mm->mmap_sem); | ||
| 133 | npgs = get_user_pages(umem->address, umem->npgs, | ||
| 134 | gup_flags, &umem->pgs[0], NULL); | ||
| 135 | up_write(¤t->mm->mmap_sem); | ||
| 136 | |||
| 137 | if (npgs != umem->npgs) { | ||
| 138 | if (npgs >= 0) { | ||
| 139 | umem->npgs = npgs; | ||
| 140 | err = -ENOMEM; | ||
| 141 | goto out_pin; | ||
| 142 | } | ||
| 143 | err = npgs; | ||
| 144 | goto out_pgs; | ||
| 145 | } | ||
| 146 | return 0; | ||
| 147 | |||
| 148 | out_pin: | ||
| 149 | xdp_umem_unpin_pages(umem); | ||
| 150 | out_pgs: | ||
| 151 | kfree(umem->pgs); | ||
| 152 | umem->pgs = NULL; | ||
| 153 | return err; | ||
| 154 | } | ||
| 155 | |||
| 156 | static int xdp_umem_account_pages(struct xdp_umem *umem) | ||
| 157 | { | ||
| 158 | unsigned long lock_limit, new_npgs, old_npgs; | ||
| 159 | |||
| 160 | if (capable(CAP_IPC_LOCK)) | ||
| 161 | return 0; | ||
| 162 | |||
| 163 | lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; | ||
| 164 | umem->user = get_uid(current_user()); | ||
| 165 | |||
| 166 | do { | ||
| 167 | old_npgs = atomic_long_read(&umem->user->locked_vm); | ||
| 168 | new_npgs = old_npgs + umem->npgs; | ||
| 169 | if (new_npgs > lock_limit) { | ||
| 170 | free_uid(umem->user); | ||
| 171 | umem->user = NULL; | ||
| 172 | return -ENOBUFS; | ||
| 173 | } | ||
| 174 | } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs, | ||
| 175 | new_npgs) != old_npgs); | ||
| 176 | return 0; | ||
| 177 | } | ||
| 178 | |||
| 179 | int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) | ||
| 180 | { | ||
| 181 | u32 frame_size = mr->frame_size, frame_headroom = mr->frame_headroom; | ||
| 182 | u64 addr = mr->addr, size = mr->len; | ||
| 183 | unsigned int nframes, nfpp; | ||
| 184 | int size_chk, err; | ||
| 185 | |||
| 186 | if (!umem) | ||
| 187 | return -EINVAL; | ||
| 188 | |||
| 189 | if (frame_size < XDP_UMEM_MIN_FRAME_SIZE || frame_size > PAGE_SIZE) { | ||
| 190 | /* Strictly speaking we could support this, if: | ||
| 191 | * - huge pages, or* | ||
| 192 | * - using an IOMMU, or | ||
| 193 | * - making sure the memory area is consecutive | ||
| 194 | * but for now, we simply say "computer says no". | ||
| 195 | */ | ||
| 196 | return -EINVAL; | ||
| 197 | } | ||
| 198 | |||
| 199 | if (!is_power_of_2(frame_size)) | ||
| 200 | return -EINVAL; | ||
| 201 | |||
| 202 | if (!PAGE_ALIGNED(addr)) { | ||
| 203 | /* Memory area has to be page size aligned. For | ||
| 204 | * simplicity, this might change. | ||
| 205 | */ | ||
| 206 | return -EINVAL; | ||
| 207 | } | ||
| 208 | |||
| 209 | if ((addr + size) < addr) | ||
| 210 | return -EINVAL; | ||
| 211 | |||
| 212 | nframes = size / frame_size; | ||
| 213 | if (nframes == 0 || nframes > UINT_MAX) | ||
| 214 | return -EINVAL; | ||
| 215 | |||
| 216 | nfpp = PAGE_SIZE / frame_size; | ||
| 217 | if (nframes < nfpp || nframes % nfpp) | ||
| 218 | return -EINVAL; | ||
| 219 | |||
| 220 | frame_headroom = ALIGN(frame_headroom, 64); | ||
| 221 | |||
| 222 | size_chk = frame_size - frame_headroom - XDP_PACKET_HEADROOM; | ||
| 223 | if (size_chk < 0) | ||
| 224 | return -EINVAL; | ||
| 225 | |||
| 226 | umem->pid = get_task_pid(current, PIDTYPE_PID); | ||
| 227 | umem->size = (size_t)size; | ||
| 228 | umem->address = (unsigned long)addr; | ||
| 229 | umem->props.frame_size = frame_size; | ||
| 230 | umem->props.nframes = nframes; | ||
| 231 | umem->frame_headroom = frame_headroom; | ||
| 232 | umem->npgs = size / PAGE_SIZE; | ||
| 233 | umem->pgs = NULL; | ||
| 234 | umem->user = NULL; | ||
| 235 | |||
| 236 | umem->frame_size_log2 = ilog2(frame_size); | ||
| 237 | umem->nfpp_mask = nfpp - 1; | ||
| 238 | umem->nfpplog2 = ilog2(nfpp); | ||
| 239 | atomic_set(&umem->users, 1); | ||
| 240 | |||
| 241 | err = xdp_umem_account_pages(umem); | ||
| 242 | if (err) | ||
| 243 | goto out; | ||
| 244 | |||
| 245 | err = xdp_umem_pin_pages(umem); | ||
| 246 | if (err) | ||
| 247 | goto out_account; | ||
| 248 | return 0; | ||
| 249 | |||
| 250 | out_account: | ||
| 251 | xdp_umem_unaccount_pages(umem); | ||
| 252 | out: | ||
| 253 | put_pid(umem->pid); | ||
| 254 | return err; | ||
| 255 | } | ||
| 256 | |||
| 257 | bool xdp_umem_validate_queues(struct xdp_umem *umem) | ||
| 258 | { | ||
| 259 | return (umem->fq && umem->cq); | ||
| 260 | } | ||
diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h new file mode 100644 index 000000000000..7e0b2fab8522 --- /dev/null +++ b/net/xdp/xdp_umem.h | |||
| @@ -0,0 +1,67 @@ | |||
| 1 | /* SPDX-License-Identifier: GPL-2.0 | ||
| 2 | * XDP user-space packet buffer | ||
| 3 | * Copyright(c) 2018 Intel Corporation. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify it | ||
| 6 | * under the terms and conditions of the GNU General Public License, | ||
| 7 | * version 2, as published by the Free Software Foundation. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 12 | * more details. | ||
| 13 | */ | ||
| 14 | |||
| 15 | #ifndef XDP_UMEM_H_ | ||
| 16 | #define XDP_UMEM_H_ | ||
| 17 | |||
| 18 | #include <linux/mm.h> | ||
| 19 | #include <linux/if_xdp.h> | ||
| 20 | #include <linux/workqueue.h> | ||
| 21 | |||
| 22 | #include "xsk_queue.h" | ||
| 23 | #include "xdp_umem_props.h" | ||
| 24 | |||
| 25 | struct xdp_umem { | ||
| 26 | struct xsk_queue *fq; | ||
| 27 | struct xsk_queue *cq; | ||
| 28 | struct page **pgs; | ||
| 29 | struct xdp_umem_props props; | ||
| 30 | u32 npgs; | ||
| 31 | u32 frame_headroom; | ||
| 32 | u32 nfpp_mask; | ||
| 33 | u32 nfpplog2; | ||
| 34 | u32 frame_size_log2; | ||
| 35 | struct user_struct *user; | ||
| 36 | struct pid *pid; | ||
| 37 | unsigned long address; | ||
| 38 | size_t size; | ||
| 39 | atomic_t users; | ||
| 40 | struct work_struct work; | ||
| 41 | }; | ||
| 42 | |||
| 43 | static inline char *xdp_umem_get_data(struct xdp_umem *umem, u32 idx) | ||
| 44 | { | ||
| 45 | u64 pg, off; | ||
| 46 | char *data; | ||
| 47 | |||
| 48 | pg = idx >> umem->nfpplog2; | ||
| 49 | off = (idx & umem->nfpp_mask) << umem->frame_size_log2; | ||
| 50 | |||
| 51 | data = page_address(umem->pgs[pg]); | ||
| 52 | return data + off; | ||
| 53 | } | ||
| 54 | |||
| 55 | static inline char *xdp_umem_get_data_with_headroom(struct xdp_umem *umem, | ||
| 56 | u32 idx) | ||
| 57 | { | ||
| 58 | return xdp_umem_get_data(umem, idx) + umem->frame_headroom; | ||
| 59 | } | ||
| 60 | |||
| 61 | bool xdp_umem_validate_queues(struct xdp_umem *umem); | ||
| 62 | int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr); | ||
| 63 | void xdp_get_umem(struct xdp_umem *umem); | ||
| 64 | void xdp_put_umem(struct xdp_umem *umem); | ||
| 65 | int xdp_umem_create(struct xdp_umem **umem); | ||
| 66 | |||
| 67 | #endif /* XDP_UMEM_H_ */ | ||
diff --git a/net/xdp/xdp_umem_props.h b/net/xdp/xdp_umem_props.h new file mode 100644 index 000000000000..77fb5daf29f3 --- /dev/null +++ b/net/xdp/xdp_umem_props.h | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | /* SPDX-License-Identifier: GPL-2.0 | ||
| 2 | * XDP user-space packet buffer | ||
| 3 | * Copyright(c) 2018 Intel Corporation. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify it | ||
| 6 | * under the terms and conditions of the GNU General Public License, | ||
| 7 | * version 2, as published by the Free Software Foundation. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 12 | * more details. | ||
| 13 | */ | ||
| 14 | |||
| 15 | #ifndef XDP_UMEM_PROPS_H_ | ||
| 16 | #define XDP_UMEM_PROPS_H_ | ||
| 17 | |||
| 18 | struct xdp_umem_props { | ||
| 19 | u32 frame_size; | ||
| 20 | u32 nframes; | ||
| 21 | }; | ||
| 22 | |||
| 23 | #endif /* XDP_UMEM_PROPS_H_ */ | ||
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c new file mode 100644 index 000000000000..009c5af5bba5 --- /dev/null +++ b/net/xdp/xsk.c | |||
| @@ -0,0 +1,656 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | ||
| 2 | /* XDP sockets | ||
| 3 | * | ||
| 4 | * AF_XDP sockets allows a channel between XDP programs and userspace | ||
| 5 | * applications. | ||
| 6 | * Copyright(c) 2018 Intel Corporation. | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or modify it | ||
| 9 | * under the terms and conditions of the GNU General Public License, | ||
| 10 | * version 2, as published by the Free Software Foundation. | ||
| 11 | * | ||
| 12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 15 | * more details. | ||
| 16 | * | ||
| 17 | * Author(s): Björn Töpel <bjorn.topel@intel.com> | ||
| 18 | * Magnus Karlsson <magnus.karlsson@intel.com> | ||
| 19 | */ | ||
| 20 | |||
| 21 | #define pr_fmt(fmt) "AF_XDP: %s: " fmt, __func__ | ||
| 22 | |||
| 23 | #include <linux/if_xdp.h> | ||
| 24 | #include <linux/init.h> | ||
| 25 | #include <linux/sched/mm.h> | ||
| 26 | #include <linux/sched/signal.h> | ||
| 27 | #include <linux/sched/task.h> | ||
| 28 | #include <linux/socket.h> | ||
| 29 | #include <linux/file.h> | ||
| 30 | #include <linux/uaccess.h> | ||
| 31 | #include <linux/net.h> | ||
| 32 | #include <linux/netdevice.h> | ||
| 33 | #include <net/xdp_sock.h> | ||
| 34 | #include <net/xdp.h> | ||
| 35 | |||
| 36 | #include "xsk_queue.h" | ||
| 37 | #include "xdp_umem.h" | ||
| 38 | |||
| 39 | #define TX_BATCH_SIZE 16 | ||
| 40 | |||
| 41 | static struct xdp_sock *xdp_sk(struct sock *sk) | ||
| 42 | { | ||
| 43 | return (struct xdp_sock *)sk; | ||
| 44 | } | ||
| 45 | |||
| 46 | bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs) | ||
| 47 | { | ||
| 48 | return !!xs->rx; | ||
| 49 | } | ||
| 50 | |||
| 51 | static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) | ||
| 52 | { | ||
| 53 | u32 *id, len = xdp->data_end - xdp->data; | ||
| 54 | void *buffer; | ||
| 55 | int err = 0; | ||
| 56 | |||
| 57 | if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) | ||
| 58 | return -EINVAL; | ||
| 59 | |||
| 60 | id = xskq_peek_id(xs->umem->fq); | ||
| 61 | if (!id) | ||
| 62 | return -ENOSPC; | ||
| 63 | |||
| 64 | buffer = xdp_umem_get_data_with_headroom(xs->umem, *id); | ||
| 65 | memcpy(buffer, xdp->data, len); | ||
| 66 | err = xskq_produce_batch_desc(xs->rx, *id, len, | ||
| 67 | xs->umem->frame_headroom); | ||
| 68 | if (!err) | ||
| 69 | xskq_discard_id(xs->umem->fq); | ||
| 70 | |||
| 71 | return err; | ||
| 72 | } | ||
| 73 | |||
| 74 | int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) | ||
| 75 | { | ||
| 76 | int err; | ||
| 77 | |||
| 78 | err = __xsk_rcv(xs, xdp); | ||
| 79 | if (likely(!err)) | ||
| 80 | xdp_return_buff(xdp); | ||
| 81 | else | ||
| 82 | xs->rx_dropped++; | ||
| 83 | |||
| 84 | return err; | ||
| 85 | } | ||
| 86 | |||
| 87 | void xsk_flush(struct xdp_sock *xs) | ||
| 88 | { | ||
| 89 | xskq_produce_flush_desc(xs->rx); | ||
| 90 | xs->sk.sk_data_ready(&xs->sk); | ||
| 91 | } | ||
| 92 | |||
| 93 | int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) | ||
| 94 | { | ||
| 95 | int err; | ||
| 96 | |||
| 97 | err = __xsk_rcv(xs, xdp); | ||
| 98 | if (!err) | ||
| 99 | xsk_flush(xs); | ||
| 100 | else | ||
| 101 | xs->rx_dropped++; | ||
| 102 | |||
| 103 | return err; | ||
| 104 | } | ||
| 105 | |||
| 106 | static void xsk_destruct_skb(struct sk_buff *skb) | ||
| 107 | { | ||
| 108 | u32 id = (u32)(long)skb_shinfo(skb)->destructor_arg; | ||
| 109 | struct xdp_sock *xs = xdp_sk(skb->sk); | ||
| 110 | |||
| 111 | WARN_ON_ONCE(xskq_produce_id(xs->umem->cq, id)); | ||
| 112 | |||
| 113 | sock_wfree(skb); | ||
| 114 | } | ||
| 115 | |||
| 116 | static int xsk_generic_xmit(struct sock *sk, struct msghdr *m, | ||
| 117 | size_t total_len) | ||
| 118 | { | ||
| 119 | bool need_wait = !(m->msg_flags & MSG_DONTWAIT); | ||
| 120 | u32 max_batch = TX_BATCH_SIZE; | ||
| 121 | struct xdp_sock *xs = xdp_sk(sk); | ||
| 122 | bool sent_frame = false; | ||
| 123 | struct xdp_desc desc; | ||
| 124 | struct sk_buff *skb; | ||
| 125 | int err = 0; | ||
| 126 | |||
| 127 | if (unlikely(!xs->tx)) | ||
| 128 | return -ENOBUFS; | ||
| 129 | if (need_wait) | ||
| 130 | return -EOPNOTSUPP; | ||
| 131 | |||
| 132 | mutex_lock(&xs->mutex); | ||
| 133 | |||
| 134 | while (xskq_peek_desc(xs->tx, &desc)) { | ||
| 135 | char *buffer; | ||
| 136 | u32 id, len; | ||
| 137 | |||
| 138 | if (max_batch-- == 0) { | ||
| 139 | err = -EAGAIN; | ||
| 140 | goto out; | ||
| 141 | } | ||
| 142 | |||
| 143 | if (xskq_reserve_id(xs->umem->cq)) { | ||
| 144 | err = -EAGAIN; | ||
| 145 | goto out; | ||
| 146 | } | ||
| 147 | |||
| 148 | len = desc.len; | ||
| 149 | if (unlikely(len > xs->dev->mtu)) { | ||
| 150 | err = -EMSGSIZE; | ||
| 151 | goto out; | ||
| 152 | } | ||
| 153 | |||
| 154 | skb = sock_alloc_send_skb(sk, len, !need_wait, &err); | ||
| 155 | if (unlikely(!skb)) { | ||
| 156 | err = -EAGAIN; | ||
| 157 | goto out; | ||
| 158 | } | ||
| 159 | |||
| 160 | skb_put(skb, len); | ||
| 161 | id = desc.idx; | ||
| 162 | buffer = xdp_umem_get_data(xs->umem, id) + desc.offset; | ||
| 163 | err = skb_store_bits(skb, 0, buffer, len); | ||
| 164 | if (unlikely(err)) { | ||
| 165 | kfree_skb(skb); | ||
| 166 | goto out; | ||
| 167 | } | ||
| 168 | |||
| 169 | skb->dev = xs->dev; | ||
| 170 | skb->priority = sk->sk_priority; | ||
| 171 | skb->mark = sk->sk_mark; | ||
| 172 | skb_shinfo(skb)->destructor_arg = (void *)(long)id; | ||
| 173 | skb->destructor = xsk_destruct_skb; | ||
| 174 | |||
| 175 | err = dev_direct_xmit(skb, xs->queue_id); | ||
| 176 | /* Ignore NET_XMIT_CN as packet might have been sent */ | ||
| 177 | if (err == NET_XMIT_DROP || err == NETDEV_TX_BUSY) { | ||
| 178 | err = -EAGAIN; | ||
| 179 | /* SKB consumed by dev_direct_xmit() */ | ||
| 180 | goto out; | ||
| 181 | } | ||
| 182 | |||
| 183 | sent_frame = true; | ||
| 184 | xskq_discard_desc(xs->tx); | ||
| 185 | } | ||
| 186 | |||
| 187 | out: | ||
| 188 | if (sent_frame) | ||
| 189 | sk->sk_write_space(sk); | ||
| 190 | |||
| 191 | mutex_unlock(&xs->mutex); | ||
| 192 | return err; | ||
| 193 | } | ||
| 194 | |||
| 195 | static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) | ||
| 196 | { | ||
| 197 | struct sock *sk = sock->sk; | ||
| 198 | struct xdp_sock *xs = xdp_sk(sk); | ||
| 199 | |||
| 200 | if (unlikely(!xs->dev)) | ||
| 201 | return -ENXIO; | ||
| 202 | if (unlikely(!(xs->dev->flags & IFF_UP))) | ||
| 203 | return -ENETDOWN; | ||
| 204 | |||
| 205 | return xsk_generic_xmit(sk, m, total_len); | ||
| 206 | } | ||
| 207 | |||
| 208 | static unsigned int xsk_poll(struct file *file, struct socket *sock, | ||
| 209 | struct poll_table_struct *wait) | ||
| 210 | { | ||
| 211 | unsigned int mask = datagram_poll(file, sock, wait); | ||
| 212 | struct sock *sk = sock->sk; | ||
| 213 | struct xdp_sock *xs = xdp_sk(sk); | ||
| 214 | |||
| 215 | if (xs->rx && !xskq_empty_desc(xs->rx)) | ||
| 216 | mask |= POLLIN | POLLRDNORM; | ||
| 217 | if (xs->tx && !xskq_full_desc(xs->tx)) | ||
| 218 | mask |= POLLOUT | POLLWRNORM; | ||
| 219 | |||
| 220 | return mask; | ||
| 221 | } | ||
| 222 | |||
| 223 | static int xsk_init_queue(u32 entries, struct xsk_queue **queue, | ||
| 224 | bool umem_queue) | ||
| 225 | { | ||
| 226 | struct xsk_queue *q; | ||
| 227 | |||
| 228 | if (entries == 0 || *queue || !is_power_of_2(entries)) | ||
| 229 | return -EINVAL; | ||
| 230 | |||
| 231 | q = xskq_create(entries, umem_queue); | ||
| 232 | if (!q) | ||
| 233 | return -ENOMEM; | ||
| 234 | |||
| 235 | *queue = q; | ||
| 236 | return 0; | ||
| 237 | } | ||
| 238 | |||
| 239 | static void __xsk_release(struct xdp_sock *xs) | ||
| 240 | { | ||
| 241 | /* Wait for driver to stop using the xdp socket. */ | ||
| 242 | synchronize_net(); | ||
| 243 | |||
| 244 | dev_put(xs->dev); | ||
| 245 | } | ||
| 246 | |||
| 247 | static int xsk_release(struct socket *sock) | ||
| 248 | { | ||
| 249 | struct sock *sk = sock->sk; | ||
| 250 | struct xdp_sock *xs = xdp_sk(sk); | ||
| 251 | struct net *net; | ||
| 252 | |||
| 253 | if (!sk) | ||
| 254 | return 0; | ||
| 255 | |||
| 256 | net = sock_net(sk); | ||
| 257 | |||
| 258 | local_bh_disable(); | ||
| 259 | sock_prot_inuse_add(net, sk->sk_prot, -1); | ||
| 260 | local_bh_enable(); | ||
| 261 | |||
| 262 | if (xs->dev) { | ||
| 263 | __xsk_release(xs); | ||
| 264 | xs->dev = NULL; | ||
| 265 | } | ||
| 266 | |||
| 267 | sock_orphan(sk); | ||
| 268 | sock->sk = NULL; | ||
| 269 | |||
| 270 | sk_refcnt_debug_release(sk); | ||
| 271 | sock_put(sk); | ||
| 272 | |||
| 273 | return 0; | ||
| 274 | } | ||
| 275 | |||
| 276 | static struct socket *xsk_lookup_xsk_from_fd(int fd) | ||
| 277 | { | ||
| 278 | struct socket *sock; | ||
| 279 | int err; | ||
| 280 | |||
| 281 | sock = sockfd_lookup(fd, &err); | ||
| 282 | if (!sock) | ||
| 283 | return ERR_PTR(-ENOTSOCK); | ||
| 284 | |||
| 285 | if (sock->sk->sk_family != PF_XDP) { | ||
| 286 | sockfd_put(sock); | ||
| 287 | return ERR_PTR(-ENOPROTOOPT); | ||
| 288 | } | ||
| 289 | |||
| 290 | return sock; | ||
| 291 | } | ||
| 292 | |||
| 293 | static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) | ||
| 294 | { | ||
| 295 | struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr; | ||
| 296 | struct sock *sk = sock->sk; | ||
| 297 | struct net_device *dev, *dev_curr; | ||
| 298 | struct xdp_sock *xs = xdp_sk(sk); | ||
| 299 | struct xdp_umem *old_umem = NULL; | ||
| 300 | int err = 0; | ||
| 301 | |||
| 302 | if (addr_len < sizeof(struct sockaddr_xdp)) | ||
| 303 | return -EINVAL; | ||
| 304 | if (sxdp->sxdp_family != AF_XDP) | ||
| 305 | return -EINVAL; | ||
| 306 | |||
| 307 | mutex_lock(&xs->mutex); | ||
| 308 | dev_curr = xs->dev; | ||
| 309 | dev = dev_get_by_index(sock_net(sk), sxdp->sxdp_ifindex); | ||
| 310 | if (!dev) { | ||
| 311 | err = -ENODEV; | ||
| 312 | goto out_release; | ||
| 313 | } | ||
| 314 | |||
| 315 | if (!xs->rx && !xs->tx) { | ||
| 316 | err = -EINVAL; | ||
| 317 | goto out_unlock; | ||
| 318 | } | ||
| 319 | |||
| 320 | if (sxdp->sxdp_queue_id >= dev->num_rx_queues) { | ||
| 321 | err = -EINVAL; | ||
| 322 | goto out_unlock; | ||
| 323 | } | ||
| 324 | |||
| 325 | if (sxdp->sxdp_flags & XDP_SHARED_UMEM) { | ||
| 326 | struct xdp_sock *umem_xs; | ||
| 327 | struct socket *sock; | ||
| 328 | |||
| 329 | if (xs->umem) { | ||
| 330 | /* We have already our own. */ | ||
| 331 | err = -EINVAL; | ||
| 332 | goto out_unlock; | ||
| 333 | } | ||
| 334 | |||
| 335 | sock = xsk_lookup_xsk_from_fd(sxdp->sxdp_shared_umem_fd); | ||
| 336 | if (IS_ERR(sock)) { | ||
| 337 | err = PTR_ERR(sock); | ||
| 338 | goto out_unlock; | ||
| 339 | } | ||
| 340 | |||
| 341 | umem_xs = xdp_sk(sock->sk); | ||
| 342 | if (!umem_xs->umem) { | ||
| 343 | /* No umem to inherit. */ | ||
| 344 | err = -EBADF; | ||
| 345 | sockfd_put(sock); | ||
| 346 | goto out_unlock; | ||
| 347 | } else if (umem_xs->dev != dev || | ||
| 348 | umem_xs->queue_id != sxdp->sxdp_queue_id) { | ||
| 349 | err = -EINVAL; | ||
| 350 | sockfd_put(sock); | ||
| 351 | goto out_unlock; | ||
| 352 | } | ||
| 353 | |||
| 354 | xdp_get_umem(umem_xs->umem); | ||
| 355 | old_umem = xs->umem; | ||
| 356 | xs->umem = umem_xs->umem; | ||
| 357 | sockfd_put(sock); | ||
| 358 | } else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) { | ||
| 359 | err = -EINVAL; | ||
| 360 | goto out_unlock; | ||
| 361 | } else { | ||
| 362 | /* This xsk has its own umem. */ | ||
| 363 | xskq_set_umem(xs->umem->fq, &xs->umem->props); | ||
| 364 | xskq_set_umem(xs->umem->cq, &xs->umem->props); | ||
| 365 | } | ||
| 366 | |||
| 367 | /* Rebind? */ | ||
| 368 | if (dev_curr && (dev_curr != dev || | ||
| 369 | xs->queue_id != sxdp->sxdp_queue_id)) { | ||
| 370 | __xsk_release(xs); | ||
| 371 | if (old_umem) | ||
| 372 | xdp_put_umem(old_umem); | ||
| 373 | } | ||
| 374 | |||
| 375 | xs->dev = dev; | ||
| 376 | xs->queue_id = sxdp->sxdp_queue_id; | ||
| 377 | |||
| 378 | xskq_set_umem(xs->rx, &xs->umem->props); | ||
| 379 | xskq_set_umem(xs->tx, &xs->umem->props); | ||
| 380 | |||
| 381 | out_unlock: | ||
| 382 | if (err) | ||
| 383 | dev_put(dev); | ||
| 384 | out_release: | ||
| 385 | mutex_unlock(&xs->mutex); | ||
| 386 | return err; | ||
| 387 | } | ||
| 388 | |||
| 389 | static int xsk_setsockopt(struct socket *sock, int level, int optname, | ||
| 390 | char __user *optval, unsigned int optlen) | ||
| 391 | { | ||
| 392 | struct sock *sk = sock->sk; | ||
| 393 | struct xdp_sock *xs = xdp_sk(sk); | ||
| 394 | int err; | ||
| 395 | |||
| 396 | if (level != SOL_XDP) | ||
| 397 | return -ENOPROTOOPT; | ||
| 398 | |||
| 399 | switch (optname) { | ||
| 400 | case XDP_RX_RING: | ||
| 401 | case XDP_TX_RING: | ||
| 402 | { | ||
| 403 | struct xsk_queue **q; | ||
| 404 | int entries; | ||
| 405 | |||
| 406 | if (optlen < sizeof(entries)) | ||
| 407 | return -EINVAL; | ||
| 408 | if (copy_from_user(&entries, optval, sizeof(entries))) | ||
| 409 | return -EFAULT; | ||
| 410 | |||
| 411 | mutex_lock(&xs->mutex); | ||
| 412 | q = (optname == XDP_TX_RING) ? &xs->tx : &xs->rx; | ||
| 413 | err = xsk_init_queue(entries, q, false); | ||
| 414 | mutex_unlock(&xs->mutex); | ||
| 415 | return err; | ||
| 416 | } | ||
| 417 | case XDP_UMEM_REG: | ||
| 418 | { | ||
| 419 | struct xdp_umem_reg mr; | ||
| 420 | struct xdp_umem *umem; | ||
| 421 | |||
| 422 | if (xs->umem) | ||
| 423 | return -EBUSY; | ||
| 424 | |||
| 425 | if (copy_from_user(&mr, optval, sizeof(mr))) | ||
| 426 | return -EFAULT; | ||
| 427 | |||
| 428 | mutex_lock(&xs->mutex); | ||
| 429 | err = xdp_umem_create(&umem); | ||
| 430 | |||
| 431 | err = xdp_umem_reg(umem, &mr); | ||
| 432 | if (err) { | ||
| 433 | kfree(umem); | ||
| 434 | mutex_unlock(&xs->mutex); | ||
| 435 | return err; | ||
| 436 | } | ||
| 437 | |||
| 438 | /* Make sure umem is ready before it can be seen by others */ | ||
| 439 | smp_wmb(); | ||
| 440 | |||
| 441 | xs->umem = umem; | ||
| 442 | mutex_unlock(&xs->mutex); | ||
| 443 | return 0; | ||
| 444 | } | ||
| 445 | case XDP_UMEM_FILL_RING: | ||
| 446 | case XDP_UMEM_COMPLETION_RING: | ||
| 447 | { | ||
| 448 | struct xsk_queue **q; | ||
| 449 | int entries; | ||
| 450 | |||
| 451 | if (!xs->umem) | ||
| 452 | return -EINVAL; | ||
| 453 | |||
| 454 | if (copy_from_user(&entries, optval, sizeof(entries))) | ||
| 455 | return -EFAULT; | ||
| 456 | |||
| 457 | mutex_lock(&xs->mutex); | ||
| 458 | q = (optname == XDP_UMEM_FILL_RING) ? &xs->umem->fq : | ||
| 459 | &xs->umem->cq; | ||
| 460 | err = xsk_init_queue(entries, q, true); | ||
| 461 | mutex_unlock(&xs->mutex); | ||
| 462 | return err; | ||
| 463 | } | ||
| 464 | default: | ||
| 465 | break; | ||
| 466 | } | ||
| 467 | |||
| 468 | return -ENOPROTOOPT; | ||
| 469 | } | ||
| 470 | |||
| 471 | static int xsk_getsockopt(struct socket *sock, int level, int optname, | ||
| 472 | char __user *optval, int __user *optlen) | ||
| 473 | { | ||
| 474 | struct sock *sk = sock->sk; | ||
| 475 | struct xdp_sock *xs = xdp_sk(sk); | ||
| 476 | int len; | ||
| 477 | |||
| 478 | if (level != SOL_XDP) | ||
| 479 | return -ENOPROTOOPT; | ||
| 480 | |||
| 481 | if (get_user(len, optlen)) | ||
| 482 | return -EFAULT; | ||
| 483 | if (len < 0) | ||
| 484 | return -EINVAL; | ||
| 485 | |||
| 486 | switch (optname) { | ||
| 487 | case XDP_STATISTICS: | ||
| 488 | { | ||
| 489 | struct xdp_statistics stats; | ||
| 490 | |||
| 491 | if (len < sizeof(stats)) | ||
| 492 | return -EINVAL; | ||
| 493 | |||
| 494 | mutex_lock(&xs->mutex); | ||
| 495 | stats.rx_dropped = xs->rx_dropped; | ||
| 496 | stats.rx_invalid_descs = xskq_nb_invalid_descs(xs->rx); | ||
| 497 | stats.tx_invalid_descs = xskq_nb_invalid_descs(xs->tx); | ||
| 498 | mutex_unlock(&xs->mutex); | ||
| 499 | |||
| 500 | if (copy_to_user(optval, &stats, sizeof(stats))) | ||
| 501 | return -EFAULT; | ||
| 502 | if (put_user(sizeof(stats), optlen)) | ||
| 503 | return -EFAULT; | ||
| 504 | |||
| 505 | return 0; | ||
| 506 | } | ||
| 507 | default: | ||
| 508 | break; | ||
| 509 | } | ||
| 510 | |||
| 511 | return -EOPNOTSUPP; | ||
| 512 | } | ||
| 513 | |||
| 514 | static int xsk_mmap(struct file *file, struct socket *sock, | ||
| 515 | struct vm_area_struct *vma) | ||
| 516 | { | ||
| 517 | unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; | ||
| 518 | unsigned long size = vma->vm_end - vma->vm_start; | ||
| 519 | struct xdp_sock *xs = xdp_sk(sock->sk); | ||
| 520 | struct xsk_queue *q = NULL; | ||
| 521 | unsigned long pfn; | ||
| 522 | struct page *qpg; | ||
| 523 | |||
| 524 | if (offset == XDP_PGOFF_RX_RING) { | ||
| 525 | q = xs->rx; | ||
| 526 | } else if (offset == XDP_PGOFF_TX_RING) { | ||
| 527 | q = xs->tx; | ||
| 528 | } else { | ||
| 529 | if (!xs->umem) | ||
| 530 | return -EINVAL; | ||
| 531 | |||
| 532 | if (offset == XDP_UMEM_PGOFF_FILL_RING) | ||
| 533 | q = xs->umem->fq; | ||
| 534 | else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING) | ||
| 535 | q = xs->umem->cq; | ||
| 536 | } | ||
| 537 | |||
| 538 | if (!q) | ||
| 539 | return -EINVAL; | ||
| 540 | |||
| 541 | qpg = virt_to_head_page(q->ring); | ||
| 542 | if (size > (PAGE_SIZE << compound_order(qpg))) | ||
| 543 | return -EINVAL; | ||
| 544 | |||
| 545 | pfn = virt_to_phys(q->ring) >> PAGE_SHIFT; | ||
| 546 | return remap_pfn_range(vma, vma->vm_start, pfn, | ||
| 547 | size, vma->vm_page_prot); | ||
| 548 | } | ||
| 549 | |||
| 550 | static struct proto xsk_proto = { | ||
| 551 | .name = "XDP", | ||
| 552 | .owner = THIS_MODULE, | ||
| 553 | .obj_size = sizeof(struct xdp_sock), | ||
| 554 | }; | ||
| 555 | |||
| 556 | static const struct proto_ops xsk_proto_ops = { | ||
| 557 | .family = PF_XDP, | ||
| 558 | .owner = THIS_MODULE, | ||
| 559 | .release = xsk_release, | ||
| 560 | .bind = xsk_bind, | ||
| 561 | .connect = sock_no_connect, | ||
| 562 | .socketpair = sock_no_socketpair, | ||
| 563 | .accept = sock_no_accept, | ||
| 564 | .getname = sock_no_getname, | ||
| 565 | .poll = xsk_poll, | ||
| 566 | .ioctl = sock_no_ioctl, | ||
| 567 | .listen = sock_no_listen, | ||
| 568 | .shutdown = sock_no_shutdown, | ||
| 569 | .setsockopt = xsk_setsockopt, | ||
| 570 | .getsockopt = xsk_getsockopt, | ||
| 571 | .sendmsg = xsk_sendmsg, | ||
| 572 | .recvmsg = sock_no_recvmsg, | ||
| 573 | .mmap = xsk_mmap, | ||
| 574 | .sendpage = sock_no_sendpage, | ||
| 575 | }; | ||
| 576 | |||
| 577 | static void xsk_destruct(struct sock *sk) | ||
| 578 | { | ||
| 579 | struct xdp_sock *xs = xdp_sk(sk); | ||
| 580 | |||
| 581 | if (!sock_flag(sk, SOCK_DEAD)) | ||
| 582 | return; | ||
| 583 | |||
| 584 | xskq_destroy(xs->rx); | ||
| 585 | xskq_destroy(xs->tx); | ||
| 586 | xdp_put_umem(xs->umem); | ||
| 587 | |||
| 588 | sk_refcnt_debug_dec(sk); | ||
| 589 | } | ||
| 590 | |||
| 591 | static int xsk_create(struct net *net, struct socket *sock, int protocol, | ||
| 592 | int kern) | ||
| 593 | { | ||
| 594 | struct sock *sk; | ||
| 595 | struct xdp_sock *xs; | ||
| 596 | |||
| 597 | if (!ns_capable(net->user_ns, CAP_NET_RAW)) | ||
| 598 | return -EPERM; | ||
| 599 | if (sock->type != SOCK_RAW) | ||
| 600 | return -ESOCKTNOSUPPORT; | ||
| 601 | |||
| 602 | if (protocol) | ||
| 603 | return -EPROTONOSUPPORT; | ||
| 604 | |||
| 605 | sock->state = SS_UNCONNECTED; | ||
| 606 | |||
| 607 | sk = sk_alloc(net, PF_XDP, GFP_KERNEL, &xsk_proto, kern); | ||
| 608 | if (!sk) | ||
| 609 | return -ENOBUFS; | ||
| 610 | |||
| 611 | sock->ops = &xsk_proto_ops; | ||
| 612 | |||
| 613 | sock_init_data(sock, sk); | ||
| 614 | |||
| 615 | sk->sk_family = PF_XDP; | ||
| 616 | |||
| 617 | sk->sk_destruct = xsk_destruct; | ||
| 618 | sk_refcnt_debug_inc(sk); | ||
| 619 | |||
| 620 | xs = xdp_sk(sk); | ||
| 621 | mutex_init(&xs->mutex); | ||
| 622 | |||
| 623 | local_bh_disable(); | ||
| 624 | sock_prot_inuse_add(net, &xsk_proto, 1); | ||
| 625 | local_bh_enable(); | ||
| 626 | |||
| 627 | return 0; | ||
| 628 | } | ||
| 629 | |||
| 630 | static const struct net_proto_family xsk_family_ops = { | ||
| 631 | .family = PF_XDP, | ||
| 632 | .create = xsk_create, | ||
| 633 | .owner = THIS_MODULE, | ||
| 634 | }; | ||
| 635 | |||
| 636 | static int __init xsk_init(void) | ||
| 637 | { | ||
| 638 | int err; | ||
| 639 | |||
| 640 | err = proto_register(&xsk_proto, 0 /* no slab */); | ||
| 641 | if (err) | ||
| 642 | goto out; | ||
| 643 | |||
| 644 | err = sock_register(&xsk_family_ops); | ||
| 645 | if (err) | ||
| 646 | goto out_proto; | ||
| 647 | |||
| 648 | return 0; | ||
| 649 | |||
| 650 | out_proto: | ||
| 651 | proto_unregister(&xsk_proto); | ||
| 652 | out: | ||
| 653 | return err; | ||
| 654 | } | ||
| 655 | |||
| 656 | fs_initcall(xsk_init); | ||
diff --git a/net/xdp/xsk_queue.c b/net/xdp/xsk_queue.c new file mode 100644 index 000000000000..d012e5e23591 --- /dev/null +++ b/net/xdp/xsk_queue.c | |||
| @@ -0,0 +1,73 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | ||
| 2 | /* XDP user-space ring structure | ||
| 3 | * Copyright(c) 2018 Intel Corporation. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify it | ||
| 6 | * under the terms and conditions of the GNU General Public License, | ||
| 7 | * version 2, as published by the Free Software Foundation. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 12 | * more details. | ||
| 13 | */ | ||
| 14 | |||
| 15 | #include <linux/slab.h> | ||
| 16 | |||
| 17 | #include "xsk_queue.h" | ||
| 18 | |||
| 19 | void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props) | ||
| 20 | { | ||
| 21 | if (!q) | ||
| 22 | return; | ||
| 23 | |||
| 24 | q->umem_props = *umem_props; | ||
| 25 | } | ||
| 26 | |||
| 27 | static u32 xskq_umem_get_ring_size(struct xsk_queue *q) | ||
| 28 | { | ||
| 29 | return sizeof(struct xdp_umem_ring) + q->nentries * sizeof(u32); | ||
| 30 | } | ||
| 31 | |||
| 32 | static u32 xskq_rxtx_get_ring_size(struct xsk_queue *q) | ||
| 33 | { | ||
| 34 | return (sizeof(struct xdp_ring) + | ||
| 35 | q->nentries * sizeof(struct xdp_desc)); | ||
| 36 | } | ||
| 37 | |||
| 38 | struct xsk_queue *xskq_create(u32 nentries, bool umem_queue) | ||
| 39 | { | ||
| 40 | struct xsk_queue *q; | ||
| 41 | gfp_t gfp_flags; | ||
| 42 | size_t size; | ||
| 43 | |||
| 44 | q = kzalloc(sizeof(*q), GFP_KERNEL); | ||
| 45 | if (!q) | ||
| 46 | return NULL; | ||
| 47 | |||
| 48 | q->nentries = nentries; | ||
| 49 | q->ring_mask = nentries - 1; | ||
| 50 | |||
| 51 | gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | | ||
| 52 | __GFP_COMP | __GFP_NORETRY; | ||
| 53 | size = umem_queue ? xskq_umem_get_ring_size(q) : | ||
| 54 | xskq_rxtx_get_ring_size(q); | ||
| 55 | |||
| 56 | q->ring = (struct xdp_ring *)__get_free_pages(gfp_flags, | ||
| 57 | get_order(size)); | ||
| 58 | if (!q->ring) { | ||
| 59 | kfree(q); | ||
| 60 | return NULL; | ||
| 61 | } | ||
| 62 | |||
| 63 | return q; | ||
| 64 | } | ||
| 65 | |||
| 66 | void xskq_destroy(struct xsk_queue *q) | ||
| 67 | { | ||
| 68 | if (!q) | ||
| 69 | return; | ||
| 70 | |||
| 71 | page_frag_free(q->ring); | ||
| 72 | kfree(q); | ||
| 73 | } | ||
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h new file mode 100644 index 000000000000..7aa9a535db0e --- /dev/null +++ b/net/xdp/xsk_queue.h | |||
| @@ -0,0 +1,247 @@ | |||
| 1 | /* SPDX-License-Identifier: GPL-2.0 | ||
| 2 | * XDP user-space ring structure | ||
| 3 | * Copyright(c) 2018 Intel Corporation. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify it | ||
| 6 | * under the terms and conditions of the GNU General Public License, | ||
| 7 | * version 2, as published by the Free Software Foundation. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 12 | * more details. | ||
| 13 | */ | ||
| 14 | |||
| 15 | #ifndef _LINUX_XSK_QUEUE_H | ||
| 16 | #define _LINUX_XSK_QUEUE_H | ||
| 17 | |||
| 18 | #include <linux/types.h> | ||
| 19 | #include <linux/if_xdp.h> | ||
| 20 | |||
| 21 | #include "xdp_umem_props.h" | ||
| 22 | |||
| 23 | #define RX_BATCH_SIZE 16 | ||
| 24 | |||
| 25 | struct xsk_queue { | ||
| 26 | struct xdp_umem_props umem_props; | ||
| 27 | u32 ring_mask; | ||
| 28 | u32 nentries; | ||
| 29 | u32 prod_head; | ||
| 30 | u32 prod_tail; | ||
| 31 | u32 cons_head; | ||
| 32 | u32 cons_tail; | ||
| 33 | struct xdp_ring *ring; | ||
| 34 | u64 invalid_descs; | ||
| 35 | }; | ||
| 36 | |||
| 37 | /* Common functions operating for both RXTX and umem queues */ | ||
| 38 | |||
| 39 | static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q) | ||
| 40 | { | ||
| 41 | return q ? q->invalid_descs : 0; | ||
| 42 | } | ||
| 43 | |||
| 44 | static inline u32 xskq_nb_avail(struct xsk_queue *q, u32 dcnt) | ||
| 45 | { | ||
| 46 | u32 entries = q->prod_tail - q->cons_tail; | ||
| 47 | |||
| 48 | if (entries == 0) { | ||
| 49 | /* Refresh the local pointer */ | ||
| 50 | q->prod_tail = READ_ONCE(q->ring->producer); | ||
| 51 | entries = q->prod_tail - q->cons_tail; | ||
| 52 | } | ||
| 53 | |||
| 54 | return (entries > dcnt) ? dcnt : entries; | ||
| 55 | } | ||
| 56 | |||
| 57 | static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt) | ||
| 58 | { | ||
| 59 | u32 free_entries = q->nentries - (producer - q->cons_tail); | ||
| 60 | |||
| 61 | if (free_entries >= dcnt) | ||
| 62 | return free_entries; | ||
| 63 | |||
| 64 | /* Refresh the local tail pointer */ | ||
| 65 | q->cons_tail = READ_ONCE(q->ring->consumer); | ||
| 66 | return q->nentries - (producer - q->cons_tail); | ||
| 67 | } | ||
| 68 | |||
| 69 | /* UMEM queue */ | ||
| 70 | |||
| 71 | static inline bool xskq_is_valid_id(struct xsk_queue *q, u32 idx) | ||
| 72 | { | ||
| 73 | if (unlikely(idx >= q->umem_props.nframes)) { | ||
| 74 | q->invalid_descs++; | ||
| 75 | return false; | ||
| 76 | } | ||
| 77 | return true; | ||
| 78 | } | ||
| 79 | |||
| 80 | static inline u32 *xskq_validate_id(struct xsk_queue *q) | ||
| 81 | { | ||
| 82 | while (q->cons_tail != q->cons_head) { | ||
| 83 | struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; | ||
| 84 | unsigned int idx = q->cons_tail & q->ring_mask; | ||
| 85 | |||
| 86 | if (xskq_is_valid_id(q, ring->desc[idx])) | ||
| 87 | return &ring->desc[idx]; | ||
| 88 | |||
| 89 | q->cons_tail++; | ||
| 90 | } | ||
| 91 | |||
| 92 | return NULL; | ||
| 93 | } | ||
| 94 | |||
| 95 | static inline u32 *xskq_peek_id(struct xsk_queue *q) | ||
| 96 | { | ||
| 97 | struct xdp_umem_ring *ring; | ||
| 98 | |||
| 99 | if (q->cons_tail == q->cons_head) { | ||
| 100 | WRITE_ONCE(q->ring->consumer, q->cons_tail); | ||
| 101 | q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE); | ||
| 102 | |||
| 103 | /* Order consumer and data */ | ||
| 104 | smp_rmb(); | ||
| 105 | |||
| 106 | return xskq_validate_id(q); | ||
| 107 | } | ||
| 108 | |||
| 109 | ring = (struct xdp_umem_ring *)q->ring; | ||
| 110 | return &ring->desc[q->cons_tail & q->ring_mask]; | ||
| 111 | } | ||
| 112 | |||
| 113 | static inline void xskq_discard_id(struct xsk_queue *q) | ||
| 114 | { | ||
| 115 | q->cons_tail++; | ||
| 116 | (void)xskq_validate_id(q); | ||
| 117 | } | ||
| 118 | |||
| 119 | static inline int xskq_produce_id(struct xsk_queue *q, u32 id) | ||
| 120 | { | ||
| 121 | struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; | ||
| 122 | |||
| 123 | ring->desc[q->prod_tail++ & q->ring_mask] = id; | ||
| 124 | |||
| 125 | /* Order producer and data */ | ||
| 126 | smp_wmb(); | ||
| 127 | |||
| 128 | WRITE_ONCE(q->ring->producer, q->prod_tail); | ||
| 129 | return 0; | ||
| 130 | } | ||
| 131 | |||
| 132 | static inline int xskq_reserve_id(struct xsk_queue *q) | ||
| 133 | { | ||
| 134 | if (xskq_nb_free(q, q->prod_head, 1) == 0) | ||
| 135 | return -ENOSPC; | ||
| 136 | |||
| 137 | q->prod_head++; | ||
| 138 | return 0; | ||
| 139 | } | ||
| 140 | |||
| 141 | /* Rx/Tx queue */ | ||
| 142 | |||
| 143 | static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d) | ||
| 144 | { | ||
| 145 | u32 buff_len; | ||
| 146 | |||
| 147 | if (unlikely(d->idx >= q->umem_props.nframes)) { | ||
| 148 | q->invalid_descs++; | ||
| 149 | return false; | ||
| 150 | } | ||
| 151 | |||
| 152 | buff_len = q->umem_props.frame_size; | ||
| 153 | if (unlikely(d->len > buff_len || d->len == 0 || | ||
| 154 | d->offset > buff_len || d->offset + d->len > buff_len)) { | ||
| 155 | q->invalid_descs++; | ||
| 156 | return false; | ||
| 157 | } | ||
| 158 | |||
| 159 | return true; | ||
| 160 | } | ||
| 161 | |||
| 162 | static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q, | ||
| 163 | struct xdp_desc *desc) | ||
| 164 | { | ||
| 165 | while (q->cons_tail != q->cons_head) { | ||
| 166 | struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; | ||
| 167 | unsigned int idx = q->cons_tail & q->ring_mask; | ||
| 168 | |||
| 169 | if (xskq_is_valid_desc(q, &ring->desc[idx])) { | ||
| 170 | if (desc) | ||
| 171 | *desc = ring->desc[idx]; | ||
| 172 | return desc; | ||
| 173 | } | ||
| 174 | |||
| 175 | q->cons_tail++; | ||
| 176 | } | ||
| 177 | |||
| 178 | return NULL; | ||
| 179 | } | ||
| 180 | |||
| 181 | static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q, | ||
| 182 | struct xdp_desc *desc) | ||
| 183 | { | ||
| 184 | struct xdp_rxtx_ring *ring; | ||
| 185 | |||
| 186 | if (q->cons_tail == q->cons_head) { | ||
| 187 | WRITE_ONCE(q->ring->consumer, q->cons_tail); | ||
| 188 | q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE); | ||
| 189 | |||
| 190 | /* Order consumer and data */ | ||
| 191 | smp_rmb(); | ||
| 192 | |||
| 193 | return xskq_validate_desc(q, desc); | ||
| 194 | } | ||
| 195 | |||
| 196 | ring = (struct xdp_rxtx_ring *)q->ring; | ||
| 197 | *desc = ring->desc[q->cons_tail & q->ring_mask]; | ||
| 198 | return desc; | ||
| 199 | } | ||
| 200 | |||
| 201 | static inline void xskq_discard_desc(struct xsk_queue *q) | ||
| 202 | { | ||
| 203 | q->cons_tail++; | ||
| 204 | (void)xskq_validate_desc(q, NULL); | ||
| 205 | } | ||
| 206 | |||
| 207 | static inline int xskq_produce_batch_desc(struct xsk_queue *q, | ||
| 208 | u32 id, u32 len, u16 offset) | ||
| 209 | { | ||
| 210 | struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; | ||
| 211 | unsigned int idx; | ||
| 212 | |||
| 213 | if (xskq_nb_free(q, q->prod_head, 1) == 0) | ||
| 214 | return -ENOSPC; | ||
| 215 | |||
| 216 | idx = (q->prod_head++) & q->ring_mask; | ||
| 217 | ring->desc[idx].idx = id; | ||
| 218 | ring->desc[idx].len = len; | ||
| 219 | ring->desc[idx].offset = offset; | ||
| 220 | |||
| 221 | return 0; | ||
| 222 | } | ||
| 223 | |||
| 224 | static inline void xskq_produce_flush_desc(struct xsk_queue *q) | ||
| 225 | { | ||
| 226 | /* Order producer and data */ | ||
| 227 | smp_wmb(); | ||
| 228 | |||
| 229 | q->prod_tail = q->prod_head, | ||
| 230 | WRITE_ONCE(q->ring->producer, q->prod_tail); | ||
| 231 | } | ||
| 232 | |||
| 233 | static inline bool xskq_full_desc(struct xsk_queue *q) | ||
| 234 | { | ||
| 235 | return (xskq_nb_avail(q, q->nentries) == q->nentries); | ||
| 236 | } | ||
| 237 | |||
| 238 | static inline bool xskq_empty_desc(struct xsk_queue *q) | ||
| 239 | { | ||
| 240 | return (xskq_nb_free(q, q->prod_tail, 1) == q->nentries); | ||
| 241 | } | ||
| 242 | |||
| 243 | void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props); | ||
| 244 | struct xsk_queue *xskq_create(u32 nentries, bool umem_queue); | ||
| 245 | void xskq_destroy(struct xsk_queue *q_ops); | ||
| 246 | |||
| 247 | #endif /* _LINUX_XSK_QUEUE_H */ | ||
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index b853581592fd..8e0c7fb6d7cc 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile | |||
| @@ -45,10 +45,12 @@ hostprogs-y += xdp_rxq_info | |||
| 45 | hostprogs-y += syscall_tp | 45 | hostprogs-y += syscall_tp |
| 46 | hostprogs-y += cpustat | 46 | hostprogs-y += cpustat |
| 47 | hostprogs-y += xdp_adjust_tail | 47 | hostprogs-y += xdp_adjust_tail |
| 48 | hostprogs-y += xdpsock | ||
| 48 | 49 | ||
| 49 | # Libbpf dependencies | 50 | # Libbpf dependencies |
| 50 | LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o | 51 | LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o |
| 51 | CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o | 52 | CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o |
| 53 | TRACE_HELPERS := ../../tools/testing/selftests/bpf/trace_helpers.o | ||
| 52 | 54 | ||
| 53 | test_lru_dist-objs := test_lru_dist.o $(LIBBPF) | 55 | test_lru_dist-objs := test_lru_dist.o $(LIBBPF) |
| 54 | sock_example-objs := sock_example.o $(LIBBPF) | 56 | sock_example-objs := sock_example.o $(LIBBPF) |
| @@ -65,10 +67,10 @@ tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o | |||
| 65 | tracex7-objs := bpf_load.o $(LIBBPF) tracex7_user.o | 67 | tracex7-objs := bpf_load.o $(LIBBPF) tracex7_user.o |
| 66 | load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o | 68 | load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o |
| 67 | test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o | 69 | test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o |
| 68 | trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o | 70 | trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o $(TRACE_HELPERS) |
| 69 | lathist-objs := bpf_load.o $(LIBBPF) lathist_user.o | 71 | lathist-objs := bpf_load.o $(LIBBPF) lathist_user.o |
| 70 | offwaketime-objs := bpf_load.o $(LIBBPF) offwaketime_user.o | 72 | offwaketime-objs := bpf_load.o $(LIBBPF) offwaketime_user.o $(TRACE_HELPERS) |
| 71 | spintest-objs := bpf_load.o $(LIBBPF) spintest_user.o | 73 | spintest-objs := bpf_load.o $(LIBBPF) spintest_user.o $(TRACE_HELPERS) |
| 72 | map_perf_test-objs := bpf_load.o $(LIBBPF) map_perf_test_user.o | 74 | map_perf_test-objs := bpf_load.o $(LIBBPF) map_perf_test_user.o |
| 73 | test_overhead-objs := bpf_load.o $(LIBBPF) test_overhead_user.o | 75 | test_overhead-objs := bpf_load.o $(LIBBPF) test_overhead_user.o |
| 74 | test_cgrp2_array_pin-objs := $(LIBBPF) test_cgrp2_array_pin.o | 76 | test_cgrp2_array_pin-objs := $(LIBBPF) test_cgrp2_array_pin.o |
| @@ -82,8 +84,8 @@ xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o | |||
| 82 | xdp_router_ipv4-objs := bpf_load.o $(LIBBPF) xdp_router_ipv4_user.o | 84 | xdp_router_ipv4-objs := bpf_load.o $(LIBBPF) xdp_router_ipv4_user.o |
| 83 | test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) $(CGROUP_HELPERS) \ | 85 | test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) $(CGROUP_HELPERS) \ |
| 84 | test_current_task_under_cgroup_user.o | 86 | test_current_task_under_cgroup_user.o |
| 85 | trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o | 87 | trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o $(TRACE_HELPERS) |
| 86 | sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o | 88 | sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o $(TRACE_HELPERS) |
| 87 | tc_l2_redirect-objs := bpf_load.o $(LIBBPF) tc_l2_redirect_user.o | 89 | tc_l2_redirect-objs := bpf_load.o $(LIBBPF) tc_l2_redirect_user.o |
| 88 | lwt_len_hist-objs := bpf_load.o $(LIBBPF) lwt_len_hist_user.o | 90 | lwt_len_hist-objs := bpf_load.o $(LIBBPF) lwt_len_hist_user.o |
| 89 | xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o | 91 | xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o |
| @@ -97,6 +99,7 @@ xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o | |||
| 97 | syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o | 99 | syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o |
| 98 | cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o | 100 | cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o |
| 99 | xdp_adjust_tail-objs := bpf_load.o $(LIBBPF) xdp_adjust_tail_user.o | 101 | xdp_adjust_tail-objs := bpf_load.o $(LIBBPF) xdp_adjust_tail_user.o |
| 102 | xdpsock-objs := bpf_load.o $(LIBBPF) xdpsock_user.o | ||
| 100 | 103 | ||
| 101 | # Tell kbuild to always build the programs | 104 | # Tell kbuild to always build the programs |
| 102 | always := $(hostprogs-y) | 105 | always := $(hostprogs-y) |
| @@ -150,6 +153,7 @@ always += xdp2skb_meta_kern.o | |||
| 150 | always += syscall_tp_kern.o | 153 | always += syscall_tp_kern.o |
| 151 | always += cpustat_kern.o | 154 | always += cpustat_kern.o |
| 152 | always += xdp_adjust_tail_kern.o | 155 | always += xdp_adjust_tail_kern.o |
| 156 | always += xdpsock_kern.o | ||
| 153 | 157 | ||
| 154 | HOSTCFLAGS += -I$(objtree)/usr/include | 158 | HOSTCFLAGS += -I$(objtree)/usr/include |
| 155 | HOSTCFLAGS += -I$(srctree)/tools/lib/ | 159 | HOSTCFLAGS += -I$(srctree)/tools/lib/ |
| @@ -196,6 +200,7 @@ HOSTLOADLIBES_xdp_rxq_info += -lelf | |||
| 196 | HOSTLOADLIBES_syscall_tp += -lelf | 200 | HOSTLOADLIBES_syscall_tp += -lelf |
| 197 | HOSTLOADLIBES_cpustat += -lelf | 201 | HOSTLOADLIBES_cpustat += -lelf |
| 198 | HOSTLOADLIBES_xdp_adjust_tail += -lelf | 202 | HOSTLOADLIBES_xdp_adjust_tail += -lelf |
| 203 | HOSTLOADLIBES_xdpsock += -lelf -pthread | ||
| 199 | 204 | ||
| 200 | # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: | 205 | # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: |
| 201 | # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang | 206 | # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang |
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index feca497d6afd..da9bccfaf391 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c | |||
| @@ -145,6 +145,9 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) | |||
| 145 | } | 145 | } |
| 146 | 146 | ||
| 147 | if (is_kprobe || is_kretprobe) { | 147 | if (is_kprobe || is_kretprobe) { |
| 148 | bool need_normal_check = true; | ||
| 149 | const char *event_prefix = ""; | ||
| 150 | |||
| 148 | if (is_kprobe) | 151 | if (is_kprobe) |
| 149 | event += 7; | 152 | event += 7; |
| 150 | else | 153 | else |
| @@ -158,18 +161,33 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) | |||
| 158 | if (isdigit(*event)) | 161 | if (isdigit(*event)) |
| 159 | return populate_prog_array(event, fd); | 162 | return populate_prog_array(event, fd); |
| 160 | 163 | ||
| 161 | snprintf(buf, sizeof(buf), | 164 | #ifdef __x86_64__ |
| 162 | "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events", | 165 | if (strncmp(event, "sys_", 4) == 0) { |
| 163 | is_kprobe ? 'p' : 'r', event, event); | 166 | snprintf(buf, sizeof(buf), |
| 164 | err = system(buf); | 167 | "echo '%c:__x64_%s __x64_%s' >> /sys/kernel/debug/tracing/kprobe_events", |
| 165 | if (err < 0) { | 168 | is_kprobe ? 'p' : 'r', event, event); |
| 166 | printf("failed to create kprobe '%s' error '%s'\n", | 169 | err = system(buf); |
| 167 | event, strerror(errno)); | 170 | if (err >= 0) { |
| 168 | return -1; | 171 | need_normal_check = false; |
| 172 | event_prefix = "__x64_"; | ||
| 173 | } | ||
| 174 | } | ||
| 175 | #endif | ||
| 176 | if (need_normal_check) { | ||
| 177 | snprintf(buf, sizeof(buf), | ||
| 178 | "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events", | ||
| 179 | is_kprobe ? 'p' : 'r', event, event); | ||
| 180 | err = system(buf); | ||
| 181 | if (err < 0) { | ||
| 182 | printf("failed to create kprobe '%s' error '%s'\n", | ||
| 183 | event, strerror(errno)); | ||
| 184 | return -1; | ||
| 185 | } | ||
| 169 | } | 186 | } |
| 170 | 187 | ||
| 171 | strcpy(buf, DEBUGFS); | 188 | strcpy(buf, DEBUGFS); |
| 172 | strcat(buf, "events/kprobes/"); | 189 | strcat(buf, "events/kprobes/"); |
| 190 | strcat(buf, event_prefix); | ||
| 173 | strcat(buf, event); | 191 | strcat(buf, event); |
| 174 | strcat(buf, "/id"); | 192 | strcat(buf, "/id"); |
| 175 | } else if (is_tracepoint) { | 193 | } else if (is_tracepoint) { |
| @@ -648,66 +666,3 @@ void read_trace_pipe(void) | |||
| 648 | } | 666 | } |
| 649 | } | 667 | } |
| 650 | } | 668 | } |
| 651 | |||
| 652 | #define MAX_SYMS 300000 | ||
| 653 | static struct ksym syms[MAX_SYMS]; | ||
| 654 | static int sym_cnt; | ||
| 655 | |||
| 656 | static int ksym_cmp(const void *p1, const void *p2) | ||
| 657 | { | ||
| 658 | return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr; | ||
| 659 | } | ||
| 660 | |||
| 661 | int load_kallsyms(void) | ||
| 662 | { | ||
| 663 | FILE *f = fopen("/proc/kallsyms", "r"); | ||
| 664 | char func[256], buf[256]; | ||
| 665 | char symbol; | ||
| 666 | void *addr; | ||
| 667 | int i = 0; | ||
| 668 | |||
| 669 | if (!f) | ||
| 670 | return -ENOENT; | ||
| 671 | |||
| 672 | while (!feof(f)) { | ||
| 673 | if (!fgets(buf, sizeof(buf), f)) | ||
| 674 | break; | ||
| 675 | if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3) | ||
| 676 | break; | ||
| 677 | if (!addr) | ||
| 678 | continue; | ||
| 679 | syms[i].addr = (long) addr; | ||
| 680 | syms[i].name = strdup(func); | ||
| 681 | i++; | ||
| 682 | } | ||
| 683 | sym_cnt = i; | ||
| 684 | qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp); | ||
| 685 | return 0; | ||
| 686 | } | ||
| 687 | |||
| 688 | struct ksym *ksym_search(long key) | ||
| 689 | { | ||
| 690 | int start = 0, end = sym_cnt; | ||
| 691 | int result; | ||
| 692 | |||
| 693 | while (start < end) { | ||
| 694 | size_t mid = start + (end - start) / 2; | ||
| 695 | |||
| 696 | result = key - syms[mid].addr; | ||
| 697 | if (result < 0) | ||
| 698 | end = mid; | ||
| 699 | else if (result > 0) | ||
| 700 | start = mid + 1; | ||
| 701 | else | ||
| 702 | return &syms[mid]; | ||
| 703 | } | ||
| 704 | |||
| 705 | if (start >= 1 && syms[start - 1].addr < key && | ||
| 706 | key < syms[start].addr) | ||
| 707 | /* valid ksym */ | ||
| 708 | return &syms[start - 1]; | ||
| 709 | |||
| 710 | /* out of range. return _stext */ | ||
| 711 | return &syms[0]; | ||
| 712 | } | ||
| 713 | |||
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h index 453c200b389b..2c3d0b448632 100644 --- a/samples/bpf/bpf_load.h +++ b/samples/bpf/bpf_load.h | |||
| @@ -54,12 +54,5 @@ int load_bpf_file(char *path); | |||
| 54 | int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map); | 54 | int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map); |
| 55 | 55 | ||
| 56 | void read_trace_pipe(void); | 56 | void read_trace_pipe(void); |
| 57 | struct ksym { | ||
| 58 | long addr; | ||
| 59 | char *name; | ||
| 60 | }; | ||
| 61 | |||
| 62 | int load_kallsyms(void); | ||
| 63 | struct ksym *ksym_search(long key); | ||
| 64 | int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); | 57 | int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); |
| 65 | #endif | 58 | #endif |
diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c index 512f87a5fd20..f06063af9fcb 100644 --- a/samples/bpf/offwaketime_user.c +++ b/samples/bpf/offwaketime_user.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include <sys/resource.h> | 17 | #include <sys/resource.h> |
| 18 | #include "libbpf.h" | 18 | #include "libbpf.h" |
| 19 | #include "bpf_load.h" | 19 | #include "bpf_load.h" |
| 20 | #include "trace_helpers.h" | ||
| 20 | 21 | ||
| 21 | #define PRINT_RAW_ADDR 0 | 22 | #define PRINT_RAW_ADDR 0 |
| 22 | 23 | ||
diff --git a/samples/bpf/sampleip_user.c b/samples/bpf/sampleip_user.c index 4ed690b907ff..60c2b73d1b4d 100644 --- a/samples/bpf/sampleip_user.c +++ b/samples/bpf/sampleip_user.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | #include "libbpf.h" | 22 | #include "libbpf.h" |
| 23 | #include "bpf_load.h" | 23 | #include "bpf_load.h" |
| 24 | #include "perf-sys.h" | 24 | #include "perf-sys.h" |
| 25 | #include "trace_helpers.h" | ||
| 25 | 26 | ||
| 26 | #define DEFAULT_FREQ 99 | 27 | #define DEFAULT_FREQ 99 |
| 27 | #define DEFAULT_SECS 5 | 28 | #define DEFAULT_SECS 5 |
diff --git a/samples/bpf/spintest_user.c b/samples/bpf/spintest_user.c index 3d736219a31c..8d3e9cfa1909 100644 --- a/samples/bpf/spintest_user.c +++ b/samples/bpf/spintest_user.c | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <sys/resource.h> | 7 | #include <sys/resource.h> |
| 8 | #include "libbpf.h" | 8 | #include "libbpf.h" |
| 9 | #include "bpf_load.h" | 9 | #include "bpf_load.h" |
| 10 | #include "trace_helpers.h" | ||
| 10 | 11 | ||
| 11 | int main(int ac, char **argv) | 12 | int main(int ac, char **argv) |
| 12 | { | 13 | { |
diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c index 56f7a259a7c9..1fa1becfa641 100644 --- a/samples/bpf/trace_event_user.c +++ b/samples/bpf/trace_event_user.c | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include "libbpf.h" | 21 | #include "libbpf.h" |
| 22 | #include "bpf_load.h" | 22 | #include "bpf_load.h" |
| 23 | #include "perf-sys.h" | 23 | #include "perf-sys.h" |
| 24 | #include "trace_helpers.h" | ||
| 24 | 25 | ||
| 25 | #define SAMPLE_FREQ 50 | 26 | #define SAMPLE_FREQ 50 |
| 26 | 27 | ||
diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c index ccca1e348017..5e78c2ecd08d 100644 --- a/samples/bpf/trace_output_user.c +++ b/samples/bpf/trace_output_user.c | |||
| @@ -21,100 +21,10 @@ | |||
| 21 | #include "libbpf.h" | 21 | #include "libbpf.h" |
| 22 | #include "bpf_load.h" | 22 | #include "bpf_load.h" |
| 23 | #include "perf-sys.h" | 23 | #include "perf-sys.h" |
| 24 | #include "trace_helpers.h" | ||
| 24 | 25 | ||
| 25 | static int pmu_fd; | 26 | static int pmu_fd; |
| 26 | 27 | ||
| 27 | int page_size; | ||
| 28 | int page_cnt = 8; | ||
| 29 | volatile struct perf_event_mmap_page *header; | ||
| 30 | |||
| 31 | typedef void (*print_fn)(void *data, int size); | ||
| 32 | |||
| 33 | static int perf_event_mmap(int fd) | ||
| 34 | { | ||
| 35 | void *base; | ||
| 36 | int mmap_size; | ||
| 37 | |||
| 38 | page_size = getpagesize(); | ||
| 39 | mmap_size = page_size * (page_cnt + 1); | ||
| 40 | |||
| 41 | base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); | ||
| 42 | if (base == MAP_FAILED) { | ||
| 43 | printf("mmap err\n"); | ||
| 44 | return -1; | ||
| 45 | } | ||
| 46 | |||
| 47 | header = base; | ||
| 48 | return 0; | ||
| 49 | } | ||
| 50 | |||
| 51 | static int perf_event_poll(int fd) | ||
| 52 | { | ||
| 53 | struct pollfd pfd = { .fd = fd, .events = POLLIN }; | ||
| 54 | |||
| 55 | return poll(&pfd, 1, 1000); | ||
| 56 | } | ||
| 57 | |||
| 58 | struct perf_event_sample { | ||
| 59 | struct perf_event_header header; | ||
| 60 | __u32 size; | ||
| 61 | char data[]; | ||
| 62 | }; | ||
| 63 | |||
| 64 | static void perf_event_read(print_fn fn) | ||
| 65 | { | ||
| 66 | __u64 data_tail = header->data_tail; | ||
| 67 | __u64 data_head = header->data_head; | ||
| 68 | __u64 buffer_size = page_cnt * page_size; | ||
| 69 | void *base, *begin, *end; | ||
| 70 | char buf[256]; | ||
| 71 | |||
| 72 | asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */ | ||
| 73 | if (data_head == data_tail) | ||
| 74 | return; | ||
| 75 | |||
| 76 | base = ((char *)header) + page_size; | ||
| 77 | |||
| 78 | begin = base + data_tail % buffer_size; | ||
| 79 | end = base + data_head % buffer_size; | ||
| 80 | |||
| 81 | while (begin != end) { | ||
| 82 | struct perf_event_sample *e; | ||
| 83 | |||
| 84 | e = begin; | ||
| 85 | if (begin + e->header.size > base + buffer_size) { | ||
| 86 | long len = base + buffer_size - begin; | ||
| 87 | |||
| 88 | assert(len < e->header.size); | ||
| 89 | memcpy(buf, begin, len); | ||
| 90 | memcpy(buf + len, base, e->header.size - len); | ||
| 91 | e = (void *) buf; | ||
| 92 | begin = base + e->header.size - len; | ||
| 93 | } else if (begin + e->header.size == base + buffer_size) { | ||
| 94 | begin = base; | ||
| 95 | } else { | ||
| 96 | begin += e->header.size; | ||
| 97 | } | ||
| 98 | |||
| 99 | if (e->header.type == PERF_RECORD_SAMPLE) { | ||
| 100 | fn(e->data, e->size); | ||
| 101 | } else if (e->header.type == PERF_RECORD_LOST) { | ||
| 102 | struct { | ||
| 103 | struct perf_event_header header; | ||
| 104 | __u64 id; | ||
| 105 | __u64 lost; | ||
| 106 | } *lost = (void *) e; | ||
| 107 | printf("lost %lld events\n", lost->lost); | ||
| 108 | } else { | ||
| 109 | printf("unknown event type=%d size=%d\n", | ||
| 110 | e->header.type, e->header.size); | ||
| 111 | } | ||
| 112 | } | ||
| 113 | |||
| 114 | __sync_synchronize(); /* smp_mb() */ | ||
| 115 | header->data_tail = data_head; | ||
| 116 | } | ||
| 117 | |||
| 118 | static __u64 time_get_ns(void) | 28 | static __u64 time_get_ns(void) |
| 119 | { | 29 | { |
| 120 | struct timespec ts; | 30 | struct timespec ts; |
| @@ -127,7 +37,7 @@ static __u64 start_time; | |||
| 127 | 37 | ||
| 128 | #define MAX_CNT 100000ll | 38 | #define MAX_CNT 100000ll |
| 129 | 39 | ||
| 130 | static void print_bpf_output(void *data, int size) | 40 | static int print_bpf_output(void *data, int size) |
| 131 | { | 41 | { |
| 132 | static __u64 cnt; | 42 | static __u64 cnt; |
| 133 | struct { | 43 | struct { |
| @@ -138,7 +48,7 @@ static void print_bpf_output(void *data, int size) | |||
| 138 | if (e->cookie != 0x12345678) { | 48 | if (e->cookie != 0x12345678) { |
| 139 | printf("BUG pid %llx cookie %llx sized %d\n", | 49 | printf("BUG pid %llx cookie %llx sized %d\n", |
| 140 | e->pid, e->cookie, size); | 50 | e->pid, e->cookie, size); |
| 141 | kill(0, SIGINT); | 51 | return PERF_EVENT_ERROR; |
| 142 | } | 52 | } |
| 143 | 53 | ||
| 144 | cnt++; | 54 | cnt++; |
| @@ -146,8 +56,10 @@ static void print_bpf_output(void *data, int size) | |||
| 146 | if (cnt == MAX_CNT) { | 56 | if (cnt == MAX_CNT) { |
| 147 | printf("recv %lld events per sec\n", | 57 | printf("recv %lld events per sec\n", |
| 148 | MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); | 58 | MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); |
| 149 | kill(0, SIGINT); | 59 | return PERF_EVENT_DONE; |
| 150 | } | 60 | } |
| 61 | |||
| 62 | return PERF_EVENT_CONT; | ||
| 151 | } | 63 | } |
| 152 | 64 | ||
| 153 | static void test_bpf_perf_event(void) | 65 | static void test_bpf_perf_event(void) |
| @@ -170,6 +82,7 @@ int main(int argc, char **argv) | |||
| 170 | { | 82 | { |
| 171 | char filename[256]; | 83 | char filename[256]; |
| 172 | FILE *f; | 84 | FILE *f; |
| 85 | int ret; | ||
| 173 | 86 | ||
| 174 | snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); | 87 | snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); |
| 175 | 88 | ||
| @@ -187,10 +100,7 @@ int main(int argc, char **argv) | |||
| 187 | (void) f; | 100 | (void) f; |
| 188 | 101 | ||
| 189 | start_time = time_get_ns(); | 102 | start_time = time_get_ns(); |
| 190 | for (;;) { | 103 | ret = perf_event_poller(pmu_fd, print_bpf_output); |
| 191 | perf_event_poll(pmu_fd); | 104 | kill(0, SIGINT); |
| 192 | perf_event_read(print_bpf_output); | 105 | return ret; |
| 193 | } | ||
| 194 | |||
| 195 | return 0; | ||
| 196 | } | 106 | } |
diff --git a/samples/bpf/xdpsock.h b/samples/bpf/xdpsock.h new file mode 100644 index 000000000000..533ab81adfa1 --- /dev/null +++ b/samples/bpf/xdpsock.h | |||
| @@ -0,0 +1,11 @@ | |||
| 1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
| 2 | #ifndef XDPSOCK_H_ | ||
| 3 | #define XDPSOCK_H_ | ||
| 4 | |||
| 5 | /* Power-of-2 number of sockets */ | ||
| 6 | #define MAX_SOCKS 4 | ||
| 7 | |||
| 8 | /* Round-robin receive */ | ||
| 9 | #define RR_LB 0 | ||
| 10 | |||
| 11 | #endif /* XDPSOCK_H_ */ | ||
diff --git a/samples/bpf/xdpsock_kern.c b/samples/bpf/xdpsock_kern.c new file mode 100644 index 000000000000..d8806c41362e --- /dev/null +++ b/samples/bpf/xdpsock_kern.c | |||
| @@ -0,0 +1,56 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | ||
| 2 | #define KBUILD_MODNAME "foo" | ||
| 3 | #include <uapi/linux/bpf.h> | ||
| 4 | #include "bpf_helpers.h" | ||
| 5 | |||
| 6 | #include "xdpsock.h" | ||
| 7 | |||
| 8 | struct bpf_map_def SEC("maps") qidconf_map = { | ||
| 9 | .type = BPF_MAP_TYPE_ARRAY, | ||
| 10 | .key_size = sizeof(int), | ||
| 11 | .value_size = sizeof(int), | ||
| 12 | .max_entries = 1, | ||
| 13 | }; | ||
| 14 | |||
| 15 | struct bpf_map_def SEC("maps") xsks_map = { | ||
| 16 | .type = BPF_MAP_TYPE_XSKMAP, | ||
| 17 | .key_size = sizeof(int), | ||
| 18 | .value_size = sizeof(int), | ||
| 19 | .max_entries = 4, | ||
| 20 | }; | ||
| 21 | |||
| 22 | struct bpf_map_def SEC("maps") rr_map = { | ||
| 23 | .type = BPF_MAP_TYPE_PERCPU_ARRAY, | ||
| 24 | .key_size = sizeof(int), | ||
| 25 | .value_size = sizeof(unsigned int), | ||
| 26 | .max_entries = 1, | ||
| 27 | }; | ||
| 28 | |||
| 29 | SEC("xdp_sock") | ||
| 30 | int xdp_sock_prog(struct xdp_md *ctx) | ||
| 31 | { | ||
| 32 | int *qidconf, key = 0, idx; | ||
| 33 | unsigned int *rr; | ||
| 34 | |||
| 35 | qidconf = bpf_map_lookup_elem(&qidconf_map, &key); | ||
| 36 | if (!qidconf) | ||
| 37 | return XDP_ABORTED; | ||
| 38 | |||
| 39 | if (*qidconf != ctx->rx_queue_index) | ||
| 40 | return XDP_PASS; | ||
| 41 | |||
| 42 | #if RR_LB /* NB! RR_LB is configured in xdpsock.h */ | ||
| 43 | rr = bpf_map_lookup_elem(&rr_map, &key); | ||
| 44 | if (!rr) | ||
| 45 | return XDP_ABORTED; | ||
| 46 | |||
| 47 | *rr = (*rr + 1) & (MAX_SOCKS - 1); | ||
| 48 | idx = *rr; | ||
| 49 | #else | ||
| 50 | idx = 0; | ||
| 51 | #endif | ||
| 52 | |||
| 53 | return bpf_redirect_map(&xsks_map, idx, 0); | ||
| 54 | } | ||
| 55 | |||
| 56 | char _license[] SEC("license") = "GPL"; | ||
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c new file mode 100644 index 000000000000..4b8a7cf3e63b --- /dev/null +++ b/samples/bpf/xdpsock_user.c | |||
| @@ -0,0 +1,948 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | ||
| 2 | /* Copyright(c) 2017 - 2018 Intel Corporation. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <assert.h> | ||
| 15 | #include <errno.h> | ||
| 16 | #include <getopt.h> | ||
| 17 | #include <libgen.h> | ||
| 18 | #include <linux/bpf.h> | ||
| 19 | #include <linux/if_link.h> | ||
| 20 | #include <linux/if_xdp.h> | ||
| 21 | #include <linux/if_ether.h> | ||
| 22 | #include <net/if.h> | ||
| 23 | #include <signal.h> | ||
| 24 | #include <stdbool.h> | ||
| 25 | #include <stdio.h> | ||
| 26 | #include <stdlib.h> | ||
| 27 | #include <string.h> | ||
| 28 | #include <net/ethernet.h> | ||
| 29 | #include <sys/resource.h> | ||
| 30 | #include <sys/socket.h> | ||
| 31 | #include <sys/mman.h> | ||
| 32 | #include <time.h> | ||
| 33 | #include <unistd.h> | ||
| 34 | #include <pthread.h> | ||
| 35 | #include <locale.h> | ||
| 36 | #include <sys/types.h> | ||
| 37 | #include <poll.h> | ||
| 38 | |||
| 39 | #include "bpf_load.h" | ||
| 40 | #include "bpf_util.h" | ||
| 41 | #include "libbpf.h" | ||
| 42 | |||
| 43 | #include "xdpsock.h" | ||
| 44 | |||
| 45 | #ifndef SOL_XDP | ||
| 46 | #define SOL_XDP 283 | ||
| 47 | #endif | ||
| 48 | |||
| 49 | #ifndef AF_XDP | ||
| 50 | #define AF_XDP 44 | ||
| 51 | #endif | ||
| 52 | |||
| 53 | #ifndef PF_XDP | ||
| 54 | #define PF_XDP AF_XDP | ||
| 55 | #endif | ||
| 56 | |||
| 57 | #define NUM_FRAMES 131072 | ||
| 58 | #define FRAME_HEADROOM 0 | ||
| 59 | #define FRAME_SIZE 2048 | ||
| 60 | #define NUM_DESCS 1024 | ||
| 61 | #define BATCH_SIZE 16 | ||
| 62 | |||
| 63 | #define FQ_NUM_DESCS 1024 | ||
| 64 | #define CQ_NUM_DESCS 1024 | ||
| 65 | |||
| 66 | #define DEBUG_HEXDUMP 0 | ||
| 67 | |||
| 68 | typedef __u32 u32; | ||
| 69 | |||
| 70 | static unsigned long prev_time; | ||
| 71 | |||
| 72 | enum benchmark_type { | ||
| 73 | BENCH_RXDROP = 0, | ||
| 74 | BENCH_TXONLY = 1, | ||
| 75 | BENCH_L2FWD = 2, | ||
| 76 | }; | ||
| 77 | |||
| 78 | static enum benchmark_type opt_bench = BENCH_RXDROP; | ||
| 79 | static u32 opt_xdp_flags; | ||
| 80 | static const char *opt_if = ""; | ||
| 81 | static int opt_ifindex; | ||
| 82 | static int opt_queue; | ||
| 83 | static int opt_poll; | ||
| 84 | static int opt_shared_packet_buffer; | ||
| 85 | static int opt_interval = 1; | ||
| 86 | |||
| 87 | struct xdp_umem_uqueue { | ||
| 88 | u32 cached_prod; | ||
| 89 | u32 cached_cons; | ||
| 90 | u32 mask; | ||
| 91 | u32 size; | ||
| 92 | struct xdp_umem_ring *ring; | ||
| 93 | }; | ||
| 94 | |||
| 95 | struct xdp_umem { | ||
| 96 | char (*frames)[FRAME_SIZE]; | ||
| 97 | struct xdp_umem_uqueue fq; | ||
| 98 | struct xdp_umem_uqueue cq; | ||
| 99 | int fd; | ||
| 100 | }; | ||
| 101 | |||
| 102 | struct xdp_uqueue { | ||
| 103 | u32 cached_prod; | ||
| 104 | u32 cached_cons; | ||
| 105 | u32 mask; | ||
| 106 | u32 size; | ||
| 107 | struct xdp_rxtx_ring *ring; | ||
| 108 | }; | ||
| 109 | |||
| 110 | struct xdpsock { | ||
| 111 | struct xdp_uqueue rx; | ||
| 112 | struct xdp_uqueue tx; | ||
| 113 | int sfd; | ||
| 114 | struct xdp_umem *umem; | ||
| 115 | u32 outstanding_tx; | ||
| 116 | unsigned long rx_npkts; | ||
| 117 | unsigned long tx_npkts; | ||
| 118 | unsigned long prev_rx_npkts; | ||
| 119 | unsigned long prev_tx_npkts; | ||
| 120 | }; | ||
| 121 | |||
| 122 | #define MAX_SOCKS 4 | ||
| 123 | static int num_socks; | ||
| 124 | struct xdpsock *xsks[MAX_SOCKS]; | ||
| 125 | |||
| 126 | static unsigned long get_nsecs(void) | ||
| 127 | { | ||
| 128 | struct timespec ts; | ||
| 129 | |||
| 130 | clock_gettime(CLOCK_MONOTONIC, &ts); | ||
| 131 | return ts.tv_sec * 1000000000UL + ts.tv_nsec; | ||
| 132 | } | ||
| 133 | |||
| 134 | static void dump_stats(void); | ||
| 135 | |||
| 136 | #define lassert(expr) \ | ||
| 137 | do { \ | ||
| 138 | if (!(expr)) { \ | ||
| 139 | fprintf(stderr, "%s:%s:%i: Assertion failed: " \ | ||
| 140 | #expr ": errno: %d/\"%s\"\n", \ | ||
| 141 | __FILE__, __func__, __LINE__, \ | ||
| 142 | errno, strerror(errno)); \ | ||
| 143 | dump_stats(); \ | ||
| 144 | exit(EXIT_FAILURE); \ | ||
| 145 | } \ | ||
| 146 | } while (0) | ||
| 147 | |||
| 148 | #define barrier() __asm__ __volatile__("": : :"memory") | ||
| 149 | #define u_smp_rmb() barrier() | ||
| 150 | #define u_smp_wmb() barrier() | ||
| 151 | #define likely(x) __builtin_expect(!!(x), 1) | ||
| 152 | #define unlikely(x) __builtin_expect(!!(x), 0) | ||
| 153 | |||
| 154 | static const char pkt_data[] = | ||
| 155 | "\x3c\xfd\xfe\x9e\x7f\x71\xec\xb1\xd7\x98\x3a\xc0\x08\x00\x45\x00" | ||
| 156 | "\x00\x2e\x00\x00\x00\x00\x40\x11\x88\x97\x05\x08\x07\x08\xc8\x14" | ||
| 157 | "\x1e\x04\x10\x92\x10\x92\x00\x1a\x6d\xa3\x34\x33\x1f\x69\x40\x6b" | ||
| 158 | "\x54\x59\xb6\x14\x2d\x11\x44\xbf\xaf\xd9\xbe\xaa"; | ||
| 159 | |||
| 160 | static inline u32 umem_nb_free(struct xdp_umem_uqueue *q, u32 nb) | ||
| 161 | { | ||
| 162 | u32 free_entries = q->size - (q->cached_prod - q->cached_cons); | ||
| 163 | |||
| 164 | if (free_entries >= nb) | ||
| 165 | return free_entries; | ||
| 166 | |||
| 167 | /* Refresh the local tail pointer */ | ||
| 168 | q->cached_cons = q->ring->ptrs.consumer; | ||
| 169 | |||
| 170 | return q->size - (q->cached_prod - q->cached_cons); | ||
| 171 | } | ||
| 172 | |||
| 173 | static inline u32 xq_nb_free(struct xdp_uqueue *q, u32 ndescs) | ||
| 174 | { | ||
| 175 | u32 free_entries = q->cached_cons - q->cached_prod; | ||
| 176 | |||
| 177 | if (free_entries >= ndescs) | ||
| 178 | return free_entries; | ||
| 179 | |||
| 180 | /* Refresh the local tail pointer */ | ||
| 181 | q->cached_cons = q->ring->ptrs.consumer + q->size; | ||
| 182 | return q->cached_cons - q->cached_prod; | ||
| 183 | } | ||
| 184 | |||
| 185 | static inline u32 umem_nb_avail(struct xdp_umem_uqueue *q, u32 nb) | ||
| 186 | { | ||
| 187 | u32 entries = q->cached_prod - q->cached_cons; | ||
| 188 | |||
| 189 | if (entries == 0) { | ||
| 190 | q->cached_prod = q->ring->ptrs.producer; | ||
| 191 | entries = q->cached_prod - q->cached_cons; | ||
| 192 | } | ||
| 193 | |||
| 194 | return (entries > nb) ? nb : entries; | ||
| 195 | } | ||
| 196 | |||
| 197 | static inline u32 xq_nb_avail(struct xdp_uqueue *q, u32 ndescs) | ||
| 198 | { | ||
| 199 | u32 entries = q->cached_prod - q->cached_cons; | ||
| 200 | |||
| 201 | if (entries == 0) { | ||
| 202 | q->cached_prod = q->ring->ptrs.producer; | ||
| 203 | entries = q->cached_prod - q->cached_cons; | ||
| 204 | } | ||
| 205 | |||
| 206 | return (entries > ndescs) ? ndescs : entries; | ||
| 207 | } | ||
| 208 | |||
| 209 | static inline int umem_fill_to_kernel_ex(struct xdp_umem_uqueue *fq, | ||
| 210 | struct xdp_desc *d, | ||
| 211 | size_t nb) | ||
| 212 | { | ||
| 213 | u32 i; | ||
| 214 | |||
| 215 | if (umem_nb_free(fq, nb) < nb) | ||
| 216 | return -ENOSPC; | ||
| 217 | |||
| 218 | for (i = 0; i < nb; i++) { | ||
| 219 | u32 idx = fq->cached_prod++ & fq->mask; | ||
| 220 | |||
| 221 | fq->ring->desc[idx] = d[i].idx; | ||
| 222 | } | ||
| 223 | |||
| 224 | u_smp_wmb(); | ||
| 225 | |||
| 226 | fq->ring->ptrs.producer = fq->cached_prod; | ||
| 227 | |||
| 228 | return 0; | ||
| 229 | } | ||
| 230 | |||
| 231 | static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u32 *d, | ||
| 232 | size_t nb) | ||
| 233 | { | ||
| 234 | u32 i; | ||
| 235 | |||
| 236 | if (umem_nb_free(fq, nb) < nb) | ||
| 237 | return -ENOSPC; | ||
| 238 | |||
| 239 | for (i = 0; i < nb; i++) { | ||
| 240 | u32 idx = fq->cached_prod++ & fq->mask; | ||
| 241 | |||
| 242 | fq->ring->desc[idx] = d[i]; | ||
| 243 | } | ||
| 244 | |||
| 245 | u_smp_wmb(); | ||
| 246 | |||
| 247 | fq->ring->ptrs.producer = fq->cached_prod; | ||
| 248 | |||
| 249 | return 0; | ||
| 250 | } | ||
| 251 | |||
| 252 | static inline size_t umem_complete_from_kernel(struct xdp_umem_uqueue *cq, | ||
| 253 | u32 *d, size_t nb) | ||
| 254 | { | ||
| 255 | u32 idx, i, entries = umem_nb_avail(cq, nb); | ||
| 256 | |||
| 257 | u_smp_rmb(); | ||
| 258 | |||
| 259 | for (i = 0; i < entries; i++) { | ||
| 260 | idx = cq->cached_cons++ & cq->mask; | ||
| 261 | d[i] = cq->ring->desc[idx]; | ||
| 262 | } | ||
| 263 | |||
| 264 | if (entries > 0) { | ||
| 265 | u_smp_wmb(); | ||
| 266 | |||
| 267 | cq->ring->ptrs.consumer = cq->cached_cons; | ||
| 268 | } | ||
| 269 | |||
| 270 | return entries; | ||
| 271 | } | ||
| 272 | |||
| 273 | static inline void *xq_get_data(struct xdpsock *xsk, __u32 idx, __u32 off) | ||
| 274 | { | ||
| 275 | lassert(idx < NUM_FRAMES); | ||
| 276 | return &xsk->umem->frames[idx][off]; | ||
| 277 | } | ||
| 278 | |||
| 279 | static inline int xq_enq(struct xdp_uqueue *uq, | ||
| 280 | const struct xdp_desc *descs, | ||
| 281 | unsigned int ndescs) | ||
| 282 | { | ||
| 283 | struct xdp_rxtx_ring *r = uq->ring; | ||
| 284 | unsigned int i; | ||
| 285 | |||
| 286 | if (xq_nb_free(uq, ndescs) < ndescs) | ||
| 287 | return -ENOSPC; | ||
| 288 | |||
| 289 | for (i = 0; i < ndescs; i++) { | ||
| 290 | u32 idx = uq->cached_prod++ & uq->mask; | ||
| 291 | |||
| 292 | r->desc[idx].idx = descs[i].idx; | ||
| 293 | r->desc[idx].len = descs[i].len; | ||
| 294 | r->desc[idx].offset = descs[i].offset; | ||
| 295 | } | ||
| 296 | |||
| 297 | u_smp_wmb(); | ||
| 298 | |||
| 299 | r->ptrs.producer = uq->cached_prod; | ||
| 300 | return 0; | ||
| 301 | } | ||
| 302 | |||
| 303 | static inline int xq_enq_tx_only(struct xdp_uqueue *uq, | ||
| 304 | __u32 idx, unsigned int ndescs) | ||
| 305 | { | ||
| 306 | struct xdp_rxtx_ring *q = uq->ring; | ||
| 307 | unsigned int i; | ||
| 308 | |||
| 309 | if (xq_nb_free(uq, ndescs) < ndescs) | ||
| 310 | return -ENOSPC; | ||
| 311 | |||
| 312 | for (i = 0; i < ndescs; i++) { | ||
| 313 | u32 idx = uq->cached_prod++ & uq->mask; | ||
| 314 | |||
| 315 | q->desc[idx].idx = idx + i; | ||
| 316 | q->desc[idx].len = sizeof(pkt_data) - 1; | ||
| 317 | q->desc[idx].offset = 0; | ||
| 318 | } | ||
| 319 | |||
| 320 | u_smp_wmb(); | ||
| 321 | |||
| 322 | q->ptrs.producer = uq->cached_prod; | ||
| 323 | return 0; | ||
| 324 | } | ||
| 325 | |||
| 326 | static inline int xq_deq(struct xdp_uqueue *uq, | ||
| 327 | struct xdp_desc *descs, | ||
| 328 | int ndescs) | ||
| 329 | { | ||
| 330 | struct xdp_rxtx_ring *r = uq->ring; | ||
| 331 | unsigned int idx; | ||
| 332 | int i, entries; | ||
| 333 | |||
| 334 | entries = xq_nb_avail(uq, ndescs); | ||
| 335 | |||
| 336 | u_smp_rmb(); | ||
| 337 | |||
| 338 | for (i = 0; i < entries; i++) { | ||
| 339 | idx = uq->cached_cons++ & uq->mask; | ||
| 340 | descs[i] = r->desc[idx]; | ||
| 341 | } | ||
| 342 | |||
| 343 | if (entries > 0) { | ||
| 344 | u_smp_wmb(); | ||
| 345 | |||
| 346 | r->ptrs.consumer = uq->cached_cons; | ||
| 347 | } | ||
| 348 | |||
| 349 | return entries; | ||
| 350 | } | ||
| 351 | |||
| 352 | static void swap_mac_addresses(void *data) | ||
| 353 | { | ||
| 354 | struct ether_header *eth = (struct ether_header *)data; | ||
| 355 | struct ether_addr *src_addr = (struct ether_addr *)ð->ether_shost; | ||
| 356 | struct ether_addr *dst_addr = (struct ether_addr *)ð->ether_dhost; | ||
| 357 | struct ether_addr tmp; | ||
| 358 | |||
| 359 | tmp = *src_addr; | ||
| 360 | *src_addr = *dst_addr; | ||
| 361 | *dst_addr = tmp; | ||
| 362 | } | ||
| 363 | |||
| 364 | #if DEBUG_HEXDUMP | ||
| 365 | static void hex_dump(void *pkt, size_t length, const char *prefix) | ||
| 366 | { | ||
| 367 | int i = 0; | ||
| 368 | const unsigned char *address = (unsigned char *)pkt; | ||
| 369 | const unsigned char *line = address; | ||
| 370 | size_t line_size = 32; | ||
| 371 | unsigned char c; | ||
| 372 | |||
| 373 | printf("length = %zu\n", length); | ||
| 374 | printf("%s | ", prefix); | ||
| 375 | while (length-- > 0) { | ||
| 376 | printf("%02X ", *address++); | ||
| 377 | if (!(++i % line_size) || (length == 0 && i % line_size)) { | ||
| 378 | if (length == 0) { | ||
| 379 | while (i++ % line_size) | ||
| 380 | printf("__ "); | ||
| 381 | } | ||
| 382 | printf(" | "); /* right close */ | ||
| 383 | while (line < address) { | ||
| 384 | c = *line++; | ||
| 385 | printf("%c", (c < 33 || c == 255) ? 0x2E : c); | ||
| 386 | } | ||
| 387 | printf("\n"); | ||
| 388 | if (length > 0) | ||
| 389 | printf("%s | ", prefix); | ||
| 390 | } | ||
| 391 | } | ||
| 392 | printf("\n"); | ||
| 393 | } | ||
| 394 | #endif | ||
| 395 | |||
| 396 | static size_t gen_eth_frame(char *frame) | ||
| 397 | { | ||
| 398 | memcpy(frame, pkt_data, sizeof(pkt_data) - 1); | ||
| 399 | return sizeof(pkt_data) - 1; | ||
| 400 | } | ||
| 401 | |||
| 402 | static struct xdp_umem *xdp_umem_configure(int sfd) | ||
| 403 | { | ||
| 404 | int fq_size = FQ_NUM_DESCS, cq_size = CQ_NUM_DESCS; | ||
| 405 | struct xdp_umem_reg mr; | ||
| 406 | struct xdp_umem *umem; | ||
| 407 | void *bufs; | ||
| 408 | |||
| 409 | umem = calloc(1, sizeof(*umem)); | ||
| 410 | lassert(umem); | ||
| 411 | |||
| 412 | lassert(posix_memalign(&bufs, getpagesize(), /* PAGE_SIZE aligned */ | ||
| 413 | NUM_FRAMES * FRAME_SIZE) == 0); | ||
| 414 | |||
| 415 | mr.addr = (__u64)bufs; | ||
| 416 | mr.len = NUM_FRAMES * FRAME_SIZE; | ||
| 417 | mr.frame_size = FRAME_SIZE; | ||
| 418 | mr.frame_headroom = FRAME_HEADROOM; | ||
| 419 | |||
| 420 | lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)) == 0); | ||
| 421 | lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_FILL_RING, &fq_size, | ||
| 422 | sizeof(int)) == 0); | ||
| 423 | lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &cq_size, | ||
| 424 | sizeof(int)) == 0); | ||
| 425 | |||
| 426 | umem->fq.ring = mmap(0, sizeof(struct xdp_umem_ring) + | ||
| 427 | FQ_NUM_DESCS * sizeof(u32), | ||
| 428 | PROT_READ | PROT_WRITE, | ||
| 429 | MAP_SHARED | MAP_POPULATE, sfd, | ||
| 430 | XDP_UMEM_PGOFF_FILL_RING); | ||
| 431 | lassert(umem->fq.ring != MAP_FAILED); | ||
| 432 | |||
| 433 | umem->fq.mask = FQ_NUM_DESCS - 1; | ||
| 434 | umem->fq.size = FQ_NUM_DESCS; | ||
| 435 | |||
| 436 | umem->cq.ring = mmap(0, sizeof(struct xdp_umem_ring) + | ||
| 437 | CQ_NUM_DESCS * sizeof(u32), | ||
| 438 | PROT_READ | PROT_WRITE, | ||
| 439 | MAP_SHARED | MAP_POPULATE, sfd, | ||
| 440 | XDP_UMEM_PGOFF_COMPLETION_RING); | ||
| 441 | lassert(umem->cq.ring != MAP_FAILED); | ||
| 442 | |||
| 443 | umem->cq.mask = CQ_NUM_DESCS - 1; | ||
| 444 | umem->cq.size = CQ_NUM_DESCS; | ||
| 445 | |||
| 446 | umem->frames = (char (*)[FRAME_SIZE])bufs; | ||
| 447 | umem->fd = sfd; | ||
| 448 | |||
| 449 | if (opt_bench == BENCH_TXONLY) { | ||
| 450 | int i; | ||
| 451 | |||
| 452 | for (i = 0; i < NUM_FRAMES; i++) | ||
| 453 | (void)gen_eth_frame(&umem->frames[i][0]); | ||
| 454 | } | ||
| 455 | |||
| 456 | return umem; | ||
| 457 | } | ||
| 458 | |||
| 459 | static struct xdpsock *xsk_configure(struct xdp_umem *umem) | ||
| 460 | { | ||
| 461 | struct sockaddr_xdp sxdp = {}; | ||
| 462 | int sfd, ndescs = NUM_DESCS; | ||
| 463 | struct xdpsock *xsk; | ||
| 464 | bool shared = true; | ||
| 465 | u32 i; | ||
| 466 | |||
| 467 | sfd = socket(PF_XDP, SOCK_RAW, 0); | ||
| 468 | lassert(sfd >= 0); | ||
| 469 | |||
| 470 | xsk = calloc(1, sizeof(*xsk)); | ||
| 471 | lassert(xsk); | ||
| 472 | |||
| 473 | xsk->sfd = sfd; | ||
| 474 | xsk->outstanding_tx = 0; | ||
| 475 | |||
| 476 | if (!umem) { | ||
| 477 | shared = false; | ||
| 478 | xsk->umem = xdp_umem_configure(sfd); | ||
| 479 | } else { | ||
| 480 | xsk->umem = umem; | ||
| 481 | } | ||
| 482 | |||
| 483 | lassert(setsockopt(sfd, SOL_XDP, XDP_RX_RING, | ||
| 484 | &ndescs, sizeof(int)) == 0); | ||
| 485 | lassert(setsockopt(sfd, SOL_XDP, XDP_TX_RING, | ||
| 486 | &ndescs, sizeof(int)) == 0); | ||
| 487 | |||
| 488 | /* Rx */ | ||
| 489 | xsk->rx.ring = mmap(NULL, | ||
| 490 | sizeof(struct xdp_ring) + | ||
| 491 | NUM_DESCS * sizeof(struct xdp_desc), | ||
| 492 | PROT_READ | PROT_WRITE, | ||
| 493 | MAP_SHARED | MAP_POPULATE, sfd, | ||
| 494 | XDP_PGOFF_RX_RING); | ||
| 495 | lassert(xsk->rx.ring != MAP_FAILED); | ||
| 496 | |||
| 497 | if (!shared) { | ||
| 498 | for (i = 0; i < NUM_DESCS / 2; i++) | ||
| 499 | lassert(umem_fill_to_kernel(&xsk->umem->fq, &i, 1) | ||
| 500 | == 0); | ||
| 501 | } | ||
| 502 | |||
| 503 | /* Tx */ | ||
| 504 | xsk->tx.ring = mmap(NULL, | ||
| 505 | sizeof(struct xdp_ring) + | ||
| 506 | NUM_DESCS * sizeof(struct xdp_desc), | ||
| 507 | PROT_READ | PROT_WRITE, | ||
| 508 | MAP_SHARED | MAP_POPULATE, sfd, | ||
| 509 | XDP_PGOFF_TX_RING); | ||
| 510 | lassert(xsk->tx.ring != MAP_FAILED); | ||
| 511 | |||
| 512 | xsk->rx.mask = NUM_DESCS - 1; | ||
| 513 | xsk->rx.size = NUM_DESCS; | ||
| 514 | |||
| 515 | xsk->tx.mask = NUM_DESCS - 1; | ||
| 516 | xsk->tx.size = NUM_DESCS; | ||
| 517 | |||
| 518 | sxdp.sxdp_family = PF_XDP; | ||
| 519 | sxdp.sxdp_ifindex = opt_ifindex; | ||
| 520 | sxdp.sxdp_queue_id = opt_queue; | ||
| 521 | if (shared) { | ||
| 522 | sxdp.sxdp_flags = XDP_SHARED_UMEM; | ||
| 523 | sxdp.sxdp_shared_umem_fd = umem->fd; | ||
| 524 | } | ||
| 525 | |||
| 526 | lassert(bind(sfd, (struct sockaddr *)&sxdp, sizeof(sxdp)) == 0); | ||
| 527 | |||
| 528 | return xsk; | ||
| 529 | } | ||
| 530 | |||
| 531 | static void print_benchmark(bool running) | ||
| 532 | { | ||
| 533 | const char *bench_str = "INVALID"; | ||
| 534 | |||
| 535 | if (opt_bench == BENCH_RXDROP) | ||
| 536 | bench_str = "rxdrop"; | ||
| 537 | else if (opt_bench == BENCH_TXONLY) | ||
| 538 | bench_str = "txonly"; | ||
| 539 | else if (opt_bench == BENCH_L2FWD) | ||
| 540 | bench_str = "l2fwd"; | ||
| 541 | |||
| 542 | printf("%s:%d %s ", opt_if, opt_queue, bench_str); | ||
| 543 | if (opt_xdp_flags & XDP_FLAGS_SKB_MODE) | ||
| 544 | printf("xdp-skb "); | ||
| 545 | else if (opt_xdp_flags & XDP_FLAGS_DRV_MODE) | ||
| 546 | printf("xdp-drv "); | ||
| 547 | else | ||
| 548 | printf(" "); | ||
| 549 | |||
| 550 | if (opt_poll) | ||
| 551 | printf("poll() "); | ||
| 552 | |||
| 553 | if (running) { | ||
| 554 | printf("running..."); | ||
| 555 | fflush(stdout); | ||
| 556 | } | ||
| 557 | } | ||
| 558 | |||
| 559 | static void dump_stats(void) | ||
| 560 | { | ||
| 561 | unsigned long now = get_nsecs(); | ||
| 562 | long dt = now - prev_time; | ||
| 563 | int i; | ||
| 564 | |||
| 565 | prev_time = now; | ||
| 566 | |||
| 567 | for (i = 0; i < num_socks; i++) { | ||
| 568 | char *fmt = "%-15s %'-11.0f %'-11lu\n"; | ||
| 569 | double rx_pps, tx_pps; | ||
| 570 | |||
| 571 | rx_pps = (xsks[i]->rx_npkts - xsks[i]->prev_rx_npkts) * | ||
| 572 | 1000000000. / dt; | ||
| 573 | tx_pps = (xsks[i]->tx_npkts - xsks[i]->prev_tx_npkts) * | ||
| 574 | 1000000000. / dt; | ||
| 575 | |||
| 576 | printf("\n sock%d@", i); | ||
| 577 | print_benchmark(false); | ||
| 578 | printf("\n"); | ||
| 579 | |||
| 580 | printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts", | ||
| 581 | dt / 1000000000.); | ||
| 582 | printf(fmt, "rx", rx_pps, xsks[i]->rx_npkts); | ||
| 583 | printf(fmt, "tx", tx_pps, xsks[i]->tx_npkts); | ||
| 584 | |||
| 585 | xsks[i]->prev_rx_npkts = xsks[i]->rx_npkts; | ||
| 586 | xsks[i]->prev_tx_npkts = xsks[i]->tx_npkts; | ||
| 587 | } | ||
| 588 | } | ||
| 589 | |||
| 590 | static void *poller(void *arg) | ||
| 591 | { | ||
| 592 | (void)arg; | ||
| 593 | for (;;) { | ||
| 594 | sleep(opt_interval); | ||
| 595 | dump_stats(); | ||
| 596 | } | ||
| 597 | |||
| 598 | return NULL; | ||
| 599 | } | ||
| 600 | |||
| 601 | static void int_exit(int sig) | ||
| 602 | { | ||
| 603 | (void)sig; | ||
| 604 | dump_stats(); | ||
| 605 | bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags); | ||
| 606 | exit(EXIT_SUCCESS); | ||
| 607 | } | ||
| 608 | |||
| 609 | static struct option long_options[] = { | ||
| 610 | {"rxdrop", no_argument, 0, 'r'}, | ||
| 611 | {"txonly", no_argument, 0, 't'}, | ||
| 612 | {"l2fwd", no_argument, 0, 'l'}, | ||
| 613 | {"interface", required_argument, 0, 'i'}, | ||
| 614 | {"queue", required_argument, 0, 'q'}, | ||
| 615 | {"poll", no_argument, 0, 'p'}, | ||
| 616 | {"shared-buffer", no_argument, 0, 's'}, | ||
| 617 | {"xdp-skb", no_argument, 0, 'S'}, | ||
| 618 | {"xdp-native", no_argument, 0, 'N'}, | ||
| 619 | {"interval", required_argument, 0, 'n'}, | ||
| 620 | {0, 0, 0, 0} | ||
| 621 | }; | ||
| 622 | |||
| 623 | static void usage(const char *prog) | ||
| 624 | { | ||
| 625 | const char *str = | ||
| 626 | " Usage: %s [OPTIONS]\n" | ||
| 627 | " Options:\n" | ||
| 628 | " -r, --rxdrop Discard all incoming packets (default)\n" | ||
| 629 | " -t, --txonly Only send packets\n" | ||
| 630 | " -l, --l2fwd MAC swap L2 forwarding\n" | ||
| 631 | " -i, --interface=n Run on interface n\n" | ||
| 632 | " -q, --queue=n Use queue n (default 0)\n" | ||
| 633 | " -p, --poll Use poll syscall\n" | ||
| 634 | " -s, --shared-buffer Use shared packet buffer\n" | ||
| 635 | " -S, --xdp-skb=n Use XDP skb-mod\n" | ||
| 636 | " -N, --xdp-native=n Enfore XDP native mode\n" | ||
| 637 | " -n, --interval=n Specify statistics update interval (default 1 sec).\n" | ||
| 638 | "\n"; | ||
| 639 | fprintf(stderr, str, prog); | ||
| 640 | exit(EXIT_FAILURE); | ||
| 641 | } | ||
| 642 | |||
| 643 | static void parse_command_line(int argc, char **argv) | ||
| 644 | { | ||
| 645 | int option_index, c; | ||
| 646 | |||
| 647 | opterr = 0; | ||
| 648 | |||
| 649 | for (;;) { | ||
| 650 | c = getopt_long(argc, argv, "rtli:q:psSNn:", long_options, | ||
| 651 | &option_index); | ||
| 652 | if (c == -1) | ||
| 653 | break; | ||
| 654 | |||
| 655 | switch (c) { | ||
| 656 | case 'r': | ||
| 657 | opt_bench = BENCH_RXDROP; | ||
| 658 | break; | ||
| 659 | case 't': | ||
| 660 | opt_bench = BENCH_TXONLY; | ||
| 661 | break; | ||
| 662 | case 'l': | ||
| 663 | opt_bench = BENCH_L2FWD; | ||
| 664 | break; | ||
| 665 | case 'i': | ||
| 666 | opt_if = optarg; | ||
| 667 | break; | ||
| 668 | case 'q': | ||
| 669 | opt_queue = atoi(optarg); | ||
| 670 | break; | ||
| 671 | case 's': | ||
| 672 | opt_shared_packet_buffer = 1; | ||
| 673 | break; | ||
| 674 | case 'p': | ||
| 675 | opt_poll = 1; | ||
| 676 | break; | ||
| 677 | case 'S': | ||
| 678 | opt_xdp_flags |= XDP_FLAGS_SKB_MODE; | ||
| 679 | break; | ||
| 680 | case 'N': | ||
| 681 | opt_xdp_flags |= XDP_FLAGS_DRV_MODE; | ||
| 682 | break; | ||
| 683 | case 'n': | ||
| 684 | opt_interval = atoi(optarg); | ||
| 685 | break; | ||
| 686 | default: | ||
| 687 | usage(basename(argv[0])); | ||
| 688 | } | ||
| 689 | } | ||
| 690 | |||
| 691 | opt_ifindex = if_nametoindex(opt_if); | ||
| 692 | if (!opt_ifindex) { | ||
| 693 | fprintf(stderr, "ERROR: interface \"%s\" does not exist\n", | ||
| 694 | opt_if); | ||
| 695 | usage(basename(argv[0])); | ||
| 696 | } | ||
| 697 | } | ||
| 698 | |||
| 699 | static void kick_tx(int fd) | ||
| 700 | { | ||
| 701 | int ret; | ||
| 702 | |||
| 703 | ret = sendto(fd, NULL, 0, MSG_DONTWAIT, NULL, 0); | ||
| 704 | if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN) | ||
| 705 | return; | ||
| 706 | lassert(0); | ||
| 707 | } | ||
| 708 | |||
| 709 | static inline void complete_tx_l2fwd(struct xdpsock *xsk) | ||
| 710 | { | ||
| 711 | u32 descs[BATCH_SIZE]; | ||
| 712 | unsigned int rcvd; | ||
| 713 | size_t ndescs; | ||
| 714 | |||
| 715 | if (!xsk->outstanding_tx) | ||
| 716 | return; | ||
| 717 | |||
| 718 | kick_tx(xsk->sfd); | ||
| 719 | ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE : | ||
| 720 | xsk->outstanding_tx; | ||
| 721 | |||
| 722 | /* re-add completed Tx buffers */ | ||
| 723 | rcvd = umem_complete_from_kernel(&xsk->umem->cq, descs, ndescs); | ||
| 724 | if (rcvd > 0) { | ||
| 725 | umem_fill_to_kernel(&xsk->umem->fq, descs, rcvd); | ||
| 726 | xsk->outstanding_tx -= rcvd; | ||
| 727 | xsk->tx_npkts += rcvd; | ||
| 728 | } | ||
| 729 | } | ||
| 730 | |||
| 731 | static inline void complete_tx_only(struct xdpsock *xsk) | ||
| 732 | { | ||
| 733 | u32 descs[BATCH_SIZE]; | ||
| 734 | unsigned int rcvd; | ||
| 735 | |||
| 736 | if (!xsk->outstanding_tx) | ||
| 737 | return; | ||
| 738 | |||
| 739 | kick_tx(xsk->sfd); | ||
| 740 | |||
| 741 | rcvd = umem_complete_from_kernel(&xsk->umem->cq, descs, BATCH_SIZE); | ||
| 742 | if (rcvd > 0) { | ||
| 743 | xsk->outstanding_tx -= rcvd; | ||
| 744 | xsk->tx_npkts += rcvd; | ||
| 745 | } | ||
| 746 | } | ||
| 747 | |||
| 748 | static void rx_drop(struct xdpsock *xsk) | ||
| 749 | { | ||
| 750 | struct xdp_desc descs[BATCH_SIZE]; | ||
| 751 | unsigned int rcvd, i; | ||
| 752 | |||
| 753 | rcvd = xq_deq(&xsk->rx, descs, BATCH_SIZE); | ||
| 754 | if (!rcvd) | ||
| 755 | return; | ||
| 756 | |||
| 757 | for (i = 0; i < rcvd; i++) { | ||
| 758 | u32 idx = descs[i].idx; | ||
| 759 | |||
| 760 | lassert(idx < NUM_FRAMES); | ||
| 761 | #if DEBUG_HEXDUMP | ||
| 762 | char *pkt; | ||
| 763 | char buf[32]; | ||
| 764 | |||
| 765 | pkt = xq_get_data(xsk, idx, descs[i].offset); | ||
| 766 | sprintf(buf, "idx=%d", idx); | ||
| 767 | hex_dump(pkt, descs[i].len, buf); | ||
| 768 | #endif | ||
| 769 | } | ||
| 770 | |||
| 771 | xsk->rx_npkts += rcvd; | ||
| 772 | |||
| 773 | umem_fill_to_kernel_ex(&xsk->umem->fq, descs, rcvd); | ||
| 774 | } | ||
| 775 | |||
| 776 | static void rx_drop_all(void) | ||
| 777 | { | ||
| 778 | struct pollfd fds[MAX_SOCKS + 1]; | ||
| 779 | int i, ret, timeout, nfds = 1; | ||
| 780 | |||
| 781 | memset(fds, 0, sizeof(fds)); | ||
| 782 | |||
| 783 | for (i = 0; i < num_socks; i++) { | ||
| 784 | fds[i].fd = xsks[i]->sfd; | ||
| 785 | fds[i].events = POLLIN; | ||
| 786 | timeout = 1000; /* 1sn */ | ||
| 787 | } | ||
| 788 | |||
| 789 | for (;;) { | ||
| 790 | if (opt_poll) { | ||
| 791 | ret = poll(fds, nfds, timeout); | ||
| 792 | if (ret <= 0) | ||
| 793 | continue; | ||
| 794 | } | ||
| 795 | |||
| 796 | for (i = 0; i < num_socks; i++) | ||
| 797 | rx_drop(xsks[i]); | ||
| 798 | } | ||
| 799 | } | ||
| 800 | |||
| 801 | static void tx_only(struct xdpsock *xsk) | ||
| 802 | { | ||
| 803 | int timeout, ret, nfds = 1; | ||
| 804 | struct pollfd fds[nfds + 1]; | ||
| 805 | unsigned int idx = 0; | ||
| 806 | |||
| 807 | memset(fds, 0, sizeof(fds)); | ||
| 808 | fds[0].fd = xsk->sfd; | ||
| 809 | fds[0].events = POLLOUT; | ||
| 810 | timeout = 1000; /* 1sn */ | ||
| 811 | |||
| 812 | for (;;) { | ||
| 813 | if (opt_poll) { | ||
| 814 | ret = poll(fds, nfds, timeout); | ||
| 815 | if (ret <= 0) | ||
| 816 | continue; | ||
| 817 | |||
| 818 | if (fds[0].fd != xsk->sfd || | ||
| 819 | !(fds[0].revents & POLLOUT)) | ||
| 820 | continue; | ||
| 821 | } | ||
| 822 | |||
| 823 | if (xq_nb_free(&xsk->tx, BATCH_SIZE) >= BATCH_SIZE) { | ||
| 824 | lassert(xq_enq_tx_only(&xsk->tx, idx, BATCH_SIZE) == 0); | ||
| 825 | |||
| 826 | xsk->outstanding_tx += BATCH_SIZE; | ||
| 827 | idx += BATCH_SIZE; | ||
| 828 | idx %= NUM_FRAMES; | ||
| 829 | } | ||
| 830 | |||
| 831 | complete_tx_only(xsk); | ||
| 832 | } | ||
| 833 | } | ||
| 834 | |||
| 835 | static void l2fwd(struct xdpsock *xsk) | ||
| 836 | { | ||
| 837 | for (;;) { | ||
| 838 | struct xdp_desc descs[BATCH_SIZE]; | ||
| 839 | unsigned int rcvd, i; | ||
| 840 | int ret; | ||
| 841 | |||
| 842 | for (;;) { | ||
| 843 | complete_tx_l2fwd(xsk); | ||
| 844 | |||
| 845 | rcvd = xq_deq(&xsk->rx, descs, BATCH_SIZE); | ||
| 846 | if (rcvd > 0) | ||
| 847 | break; | ||
| 848 | } | ||
| 849 | |||
| 850 | for (i = 0; i < rcvd; i++) { | ||
| 851 | char *pkt = xq_get_data(xsk, descs[i].idx, | ||
| 852 | descs[i].offset); | ||
| 853 | |||
| 854 | swap_mac_addresses(pkt); | ||
| 855 | #if DEBUG_HEXDUMP | ||
| 856 | char buf[32]; | ||
| 857 | u32 idx = descs[i].idx; | ||
| 858 | |||
| 859 | sprintf(buf, "idx=%d", idx); | ||
| 860 | hex_dump(pkt, descs[i].len, buf); | ||
| 861 | #endif | ||
| 862 | } | ||
| 863 | |||
| 864 | xsk->rx_npkts += rcvd; | ||
| 865 | |||
| 866 | ret = xq_enq(&xsk->tx, descs, rcvd); | ||
| 867 | lassert(ret == 0); | ||
| 868 | xsk->outstanding_tx += rcvd; | ||
| 869 | } | ||
| 870 | } | ||
| 871 | |||
| 872 | int main(int argc, char **argv) | ||
| 873 | { | ||
| 874 | struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; | ||
| 875 | char xdp_filename[256]; | ||
| 876 | int i, ret, key = 0; | ||
| 877 | pthread_t pt; | ||
| 878 | |||
| 879 | parse_command_line(argc, argv); | ||
| 880 | |||
| 881 | if (setrlimit(RLIMIT_MEMLOCK, &r)) { | ||
| 882 | fprintf(stderr, "ERROR: setrlimit(RLIMIT_MEMLOCK) \"%s\"\n", | ||
| 883 | strerror(errno)); | ||
| 884 | exit(EXIT_FAILURE); | ||
| 885 | } | ||
| 886 | |||
| 887 | snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]); | ||
| 888 | |||
| 889 | if (load_bpf_file(xdp_filename)) { | ||
| 890 | fprintf(stderr, "ERROR: load_bpf_file %s\n", bpf_log_buf); | ||
| 891 | exit(EXIT_FAILURE); | ||
| 892 | } | ||
| 893 | |||
| 894 | if (!prog_fd[0]) { | ||
| 895 | fprintf(stderr, "ERROR: load_bpf_file: \"%s\"\n", | ||
| 896 | strerror(errno)); | ||
| 897 | exit(EXIT_FAILURE); | ||
| 898 | } | ||
| 899 | |||
| 900 | if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd[0], opt_xdp_flags) < 0) { | ||
| 901 | fprintf(stderr, "ERROR: link set xdp fd failed\n"); | ||
| 902 | exit(EXIT_FAILURE); | ||
| 903 | } | ||
| 904 | |||
| 905 | ret = bpf_map_update_elem(map_fd[0], &key, &opt_queue, 0); | ||
| 906 | if (ret) { | ||
| 907 | fprintf(stderr, "ERROR: bpf_map_update_elem qidconf\n"); | ||
| 908 | exit(EXIT_FAILURE); | ||
| 909 | } | ||
| 910 | |||
| 911 | /* Create sockets... */ | ||
| 912 | xsks[num_socks++] = xsk_configure(NULL); | ||
| 913 | |||
| 914 | #if RR_LB | ||
| 915 | for (i = 0; i < MAX_SOCKS - 1; i++) | ||
| 916 | xsks[num_socks++] = xsk_configure(xsks[0]->umem); | ||
| 917 | #endif | ||
| 918 | |||
| 919 | /* ...and insert them into the map. */ | ||
| 920 | for (i = 0; i < num_socks; i++) { | ||
| 921 | key = i; | ||
| 922 | ret = bpf_map_update_elem(map_fd[1], &key, &xsks[i]->sfd, 0); | ||
| 923 | if (ret) { | ||
| 924 | fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i); | ||
| 925 | exit(EXIT_FAILURE); | ||
| 926 | } | ||
| 927 | } | ||
| 928 | |||
| 929 | signal(SIGINT, int_exit); | ||
| 930 | signal(SIGTERM, int_exit); | ||
| 931 | signal(SIGABRT, int_exit); | ||
| 932 | |||
| 933 | setlocale(LC_ALL, ""); | ||
| 934 | |||
| 935 | ret = pthread_create(&pt, NULL, poller, NULL); | ||
| 936 | lassert(ret == 0); | ||
| 937 | |||
| 938 | prev_time = get_nsecs(); | ||
| 939 | |||
| 940 | if (opt_bench == BENCH_RXDROP) | ||
| 941 | rx_drop_all(); | ||
| 942 | else if (opt_bench == BENCH_TXONLY) | ||
| 943 | tx_only(xsks[0]); | ||
| 944 | else | ||
| 945 | l2fwd(xsks[0]); | ||
| 946 | |||
| 947 | return 0; | ||
| 948 | } | ||
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index 30ba0fee36e4..8f59897fbda1 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py | |||
| @@ -39,9 +39,9 @@ class Helper(object): | |||
| 39 | Break down helper function protocol into smaller chunks: return type, | 39 | Break down helper function protocol into smaller chunks: return type, |
| 40 | name, distincts arguments. | 40 | name, distincts arguments. |
| 41 | """ | 41 | """ |
| 42 | arg_re = re.compile('^((const )?(struct )?(\w+|...))( (\**)(\w+))?$') | 42 | arg_re = re.compile('((const )?(struct )?(\w+|...))( (\**)(\w+))?$') |
| 43 | res = {} | 43 | res = {} |
| 44 | proto_re = re.compile('^(.+) (\**)(\w+)\(((([^,]+)(, )?){1,5})\)$') | 44 | proto_re = re.compile('(.+) (\**)(\w+)\(((([^,]+)(, )?){1,5})\)$') |
| 45 | 45 | ||
| 46 | capture = proto_re.match(self.proto) | 46 | capture = proto_re.match(self.proto) |
| 47 | res['ret_type'] = capture.group(1) | 47 | res['ret_type'] = capture.group(1) |
| @@ -87,7 +87,7 @@ class HeaderParser(object): | |||
| 87 | # - Same as above, with "const" and/or "struct" in front of type | 87 | # - Same as above, with "const" and/or "struct" in front of type |
| 88 | # - "..." (undefined number of arguments, for bpf_trace_printk()) | 88 | # - "..." (undefined number of arguments, for bpf_trace_printk()) |
| 89 | # There is at least one term ("void"), and at most five arguments. | 89 | # There is at least one term ("void"), and at most five arguments. |
| 90 | p = re.compile('^ \* ((.+) \**\w+\((((const )?(struct )?(\w+|\.\.\.)( \**\w+)?)(, )?){1,5}\))$') | 90 | p = re.compile(' \* ?((.+) \**\w+\((((const )?(struct )?(\w+|\.\.\.)( \**\w+)?)(, )?){1,5}\))$') |
| 91 | capture = p.match(self.line) | 91 | capture = p.match(self.line) |
| 92 | if not capture: | 92 | if not capture: |
| 93 | raise NoHelperFound | 93 | raise NoHelperFound |
| @@ -95,7 +95,7 @@ class HeaderParser(object): | |||
| 95 | return capture.group(1) | 95 | return capture.group(1) |
| 96 | 96 | ||
| 97 | def parse_desc(self): | 97 | def parse_desc(self): |
| 98 | p = re.compile('^ \* \tDescription$') | 98 | p = re.compile(' \* ?(?:\t| {6,8})Description$') |
| 99 | capture = p.match(self.line) | 99 | capture = p.match(self.line) |
| 100 | if not capture: | 100 | if not capture: |
| 101 | # Helper can have empty description and we might be parsing another | 101 | # Helper can have empty description and we might be parsing another |
| @@ -109,7 +109,7 @@ class HeaderParser(object): | |||
| 109 | if self.line == ' *\n': | 109 | if self.line == ' *\n': |
| 110 | desc += '\n' | 110 | desc += '\n' |
| 111 | else: | 111 | else: |
| 112 | p = re.compile('^ \* \t\t(.*)') | 112 | p = re.compile(' \* ?(?:\t| {6,8})(?:\t| {8})(.*)') |
| 113 | capture = p.match(self.line) | 113 | capture = p.match(self.line) |
| 114 | if capture: | 114 | if capture: |
| 115 | desc += capture.group(1) + '\n' | 115 | desc += capture.group(1) + '\n' |
| @@ -118,7 +118,7 @@ class HeaderParser(object): | |||
| 118 | return desc | 118 | return desc |
| 119 | 119 | ||
| 120 | def parse_ret(self): | 120 | def parse_ret(self): |
| 121 | p = re.compile('^ \* \tReturn$') | 121 | p = re.compile(' \* ?(?:\t| {6,8})Return$') |
| 122 | capture = p.match(self.line) | 122 | capture = p.match(self.line) |
| 123 | if not capture: | 123 | if not capture: |
| 124 | # Helper can have empty retval and we might be parsing another | 124 | # Helper can have empty retval and we might be parsing another |
| @@ -132,7 +132,7 @@ class HeaderParser(object): | |||
| 132 | if self.line == ' *\n': | 132 | if self.line == ' *\n': |
| 133 | ret += '\n' | 133 | ret += '\n' |
| 134 | else: | 134 | else: |
| 135 | p = re.compile('^ \* \t\t(.*)') | 135 | p = re.compile(' \* ?(?:\t| {6,8})(?:\t| {8})(.*)') |
| 136 | capture = p.match(self.line) | 136 | capture = p.match(self.line) |
| 137 | if capture: | 137 | if capture: |
| 138 | ret += capture.group(1) + '\n' | 138 | ret += capture.group(1) + '\n' |
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 4cafe6a19167..5c508d26b367 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c | |||
| @@ -1471,7 +1471,9 @@ static inline u16 socket_type_to_security_class(int family, int type, int protoc | |||
| 1471 | return SECCLASS_QIPCRTR_SOCKET; | 1471 | return SECCLASS_QIPCRTR_SOCKET; |
| 1472 | case PF_SMC: | 1472 | case PF_SMC: |
| 1473 | return SECCLASS_SMC_SOCKET; | 1473 | return SECCLASS_SMC_SOCKET; |
| 1474 | #if PF_MAX > 44 | 1474 | case PF_XDP: |
| 1475 | return SECCLASS_XDP_SOCKET; | ||
| 1476 | #if PF_MAX > 45 | ||
| 1475 | #error New address family defined, please update this function. | 1477 | #error New address family defined, please update this function. |
| 1476 | #endif | 1478 | #endif |
| 1477 | } | 1479 | } |
diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index 7f0372426494..bd5fe0d3204a 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h | |||
| @@ -240,9 +240,11 @@ struct security_class_mapping secclass_map[] = { | |||
| 240 | { "manage_subnet", NULL } }, | 240 | { "manage_subnet", NULL } }, |
| 241 | { "bpf", | 241 | { "bpf", |
| 242 | {"map_create", "map_read", "map_write", "prog_load", "prog_run"} }, | 242 | {"map_create", "map_read", "map_write", "prog_load", "prog_run"} }, |
| 243 | { "xdp_socket", | ||
| 244 | { COMMON_SOCK_PERMS, NULL } }, | ||
| 243 | { NULL } | 245 | { NULL } |
| 244 | }; | 246 | }; |
| 245 | 247 | ||
| 246 | #if PF_MAX > 44 | 248 | #if PF_MAX > 45 |
| 247 | #error New address family defined, please update secclass_map. | 249 | #error New address family defined, please update secclass_map. |
| 248 | #endif | 250 | #endif |
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 5f512b14bff9..a6258bc8ec4f 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst | |||
| @@ -22,17 +22,19 @@ MAP COMMANDS | |||
| 22 | ============= | 22 | ============= |
| 23 | 23 | ||
| 24 | | **bpftool** **map { show | list }** [*MAP*] | 24 | | **bpftool** **map { show | list }** [*MAP*] |
| 25 | | **bpftool** **map dump** *MAP* | 25 | | **bpftool** **map dump** *MAP* |
| 26 | | **bpftool** **map update** *MAP* **key** [**hex**] *BYTES* **value** [**hex**] *VALUE* [*UPDATE_FLAGS*] | 26 | | **bpftool** **map update** *MAP* **key** *DATA* **value** *VALUE* [*UPDATE_FLAGS*] |
| 27 | | **bpftool** **map lookup** *MAP* **key** [**hex**] *BYTES* | 27 | | **bpftool** **map lookup** *MAP* **key** *DATA* |
| 28 | | **bpftool** **map getnext** *MAP* [**key** [**hex**] *BYTES*] | 28 | | **bpftool** **map getnext** *MAP* [**key** *DATA*] |
| 29 | | **bpftool** **map delete** *MAP* **key** [**hex**] *BYTES* | 29 | | **bpftool** **map delete** *MAP* **key** *DATA* |
| 30 | | **bpftool** **map pin** *MAP* *FILE* | 30 | | **bpftool** **map pin** *MAP* *FILE* |
| 31 | | **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*] | ||
| 31 | | **bpftool** **map help** | 32 | | **bpftool** **map help** |
| 32 | | | 33 | | |
| 33 | | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } | 34 | | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } |
| 35 | | *DATA* := { [**hex**] *BYTES* } | ||
| 34 | | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } | 36 | | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } |
| 35 | | *VALUE* := { *BYTES* | *MAP* | *PROG* } | 37 | | *VALUE* := { *DATA* | *MAP* | *PROG* } |
| 36 | | *UPDATE_FLAGS* := { **any** | **exist** | **noexist** } | 38 | | *UPDATE_FLAGS* := { **any** | **exist** | **noexist** } |
| 37 | 39 | ||
| 38 | DESCRIPTION | 40 | DESCRIPTION |
| @@ -48,7 +50,7 @@ DESCRIPTION | |||
| 48 | **bpftool map dump** *MAP* | 50 | **bpftool map dump** *MAP* |
| 49 | Dump all entries in a given *MAP*. | 51 | Dump all entries in a given *MAP*. |
| 50 | 52 | ||
| 51 | **bpftool map update** *MAP* **key** [**hex**] *BYTES* **value** [**hex**] *VALUE* [*UPDATE_FLAGS*] | 53 | **bpftool map update** *MAP* **key** *DATA* **value** *VALUE* [*UPDATE_FLAGS*] |
| 52 | Update map entry for a given *KEY*. | 54 | Update map entry for a given *KEY*. |
| 53 | 55 | ||
| 54 | *UPDATE_FLAGS* can be one of: **any** update existing entry | 56 | *UPDATE_FLAGS* can be one of: **any** update existing entry |
| @@ -61,13 +63,13 @@ DESCRIPTION | |||
| 61 | the bytes are parsed as decimal values, unless a "0x" prefix | 63 | the bytes are parsed as decimal values, unless a "0x" prefix |
| 62 | (for hexadecimal) or a "0" prefix (for octal) is provided. | 64 | (for hexadecimal) or a "0" prefix (for octal) is provided. |
| 63 | 65 | ||
| 64 | **bpftool map lookup** *MAP* **key** [**hex**] *BYTES* | 66 | **bpftool map lookup** *MAP* **key** *DATA* |
| 65 | Lookup **key** in the map. | 67 | Lookup **key** in the map. |
| 66 | 68 | ||
| 67 | **bpftool map getnext** *MAP* [**key** [**hex**] *BYTES*] | 69 | **bpftool map getnext** *MAP* [**key** *DATA*] |
| 68 | Get next key. If *key* is not specified, get first key. | 70 | Get next key. If *key* is not specified, get first key. |
| 69 | 71 | ||
| 70 | **bpftool map delete** *MAP* **key** [**hex**] *BYTES* | 72 | **bpftool map delete** *MAP* **key** *DATA* |
| 71 | Remove entry from the map. | 73 | Remove entry from the map. |
| 72 | 74 | ||
| 73 | **bpftool map pin** *MAP* *FILE* | 75 | **bpftool map pin** *MAP* *FILE* |
| @@ -75,6 +77,22 @@ DESCRIPTION | |||
| 75 | 77 | ||
| 76 | Note: *FILE* must be located in *bpffs* mount. | 78 | Note: *FILE* must be located in *bpffs* mount. |
| 77 | 79 | ||
| 80 | **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*] | ||
| 81 | Read events from a BPF_MAP_TYPE_PERF_EVENT_ARRAY map. | ||
| 82 | |||
| 83 | Install perf rings into a perf event array map and dump | ||
| 84 | output of any bpf_perf_event_output() call in the kernel. | ||
| 85 | By default read the number of CPUs on the system and | ||
| 86 | install perf ring for each CPU in the corresponding index | ||
| 87 | in the array. | ||
| 88 | |||
| 89 | If **cpu** and **index** are specified, install perf ring | ||
| 90 | for given **cpu** at **index** in the array (single ring). | ||
| 91 | |||
| 92 | Note that installing a perf ring into an array will silently | ||
| 93 | replace any existing ring. Any other application will stop | ||
| 94 | receiving events if it installed its rings earlier. | ||
| 95 | |||
| 78 | **bpftool map help** | 96 | **bpftool map help** |
| 79 | Print short help message. | 97 | Print short help message. |
| 80 | 98 | ||
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index 20689a321ffe..564cb0d9692b 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst | |||
| @@ -23,7 +23,7 @@ SYNOPSIS | |||
| 23 | 23 | ||
| 24 | *MAP-COMMANDS* := | 24 | *MAP-COMMANDS* := |
| 25 | { **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete** | 25 | { **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete** |
| 26 | | **pin** | **help** } | 26 | | **pin** | **event_pipe** | **help** } |
| 27 | 27 | ||
| 28 | *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** | 28 | *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** |
| 29 | | **load** | **help** } | 29 | | **load** | **help** } |
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 4e69782c4a79..892dbf095bff 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile | |||
| @@ -39,7 +39,12 @@ CC = gcc | |||
| 39 | 39 | ||
| 40 | CFLAGS += -O2 | 40 | CFLAGS += -O2 |
| 41 | CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow -Wno-missing-field-initializers | 41 | CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow -Wno-missing-field-initializers |
| 42 | CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/ | 42 | CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \ |
| 43 | -I$(srctree)/kernel/bpf/ \ | ||
| 44 | -I$(srctree)/tools/include \ | ||
| 45 | -I$(srctree)/tools/include/uapi \ | ||
| 46 | -I$(srctree)/tools/lib/bpf \ | ||
| 47 | -I$(srctree)/tools/perf | ||
| 43 | CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"' | 48 | CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"' |
| 44 | LIBS = -lelf -lbfd -lopcodes $(LIBBPF) | 49 | LIBS = -lelf -lbfd -lopcodes $(LIBBPF) |
| 45 | 50 | ||
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 852d84a98acd..b301c9b315f1 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | # bpftool(8) bash completion -*- shell-script -*- | 1 | # bpftool(8) bash completion -*- shell-script -*- |
| 2 | # | 2 | # |
| 3 | # Copyright (C) 2017 Netronome Systems, Inc. | 3 | # Copyright (C) 2017-2018 Netronome Systems, Inc. |
| 4 | # | 4 | # |
| 5 | # This software is dual licensed under the GNU General License | 5 | # This software is dual licensed under the GNU General License |
| 6 | # Version 2, June 1991 as shown in the file COPYING in the top-level | 6 | # Version 2, June 1991 as shown in the file COPYING in the top-level |
| @@ -79,6 +79,14 @@ _bpftool_get_map_ids() | |||
| 79 | command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) ) | 79 | command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) ) |
| 80 | } | 80 | } |
| 81 | 81 | ||
| 82 | _bpftool_get_perf_map_ids() | ||
| 83 | { | ||
| 84 | COMPREPLY+=( $( compgen -W "$( bpftool -jp map 2>&1 | \ | ||
| 85 | command grep -C2 perf_event_array | \ | ||
| 86 | command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) ) | ||
| 87 | } | ||
| 88 | |||
| 89 | |||
| 82 | _bpftool_get_prog_ids() | 90 | _bpftool_get_prog_ids() |
| 83 | { | 91 | { |
| 84 | COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \ | 92 | COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \ |
| @@ -359,10 +367,34 @@ _bpftool() | |||
| 359 | fi | 367 | fi |
| 360 | return 0 | 368 | return 0 |
| 361 | ;; | 369 | ;; |
| 370 | event_pipe) | ||
| 371 | case $prev in | ||
| 372 | $command) | ||
| 373 | COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) ) | ||
| 374 | return 0 | ||
| 375 | ;; | ||
| 376 | id) | ||
| 377 | _bpftool_get_perf_map_ids | ||
| 378 | return 0 | ||
| 379 | ;; | ||
| 380 | cpu) | ||
| 381 | return 0 | ||
| 382 | ;; | ||
| 383 | index) | ||
| 384 | return 0 | ||
| 385 | ;; | ||
| 386 | *) | ||
| 387 | _bpftool_once_attr 'cpu' | ||
| 388 | _bpftool_once_attr 'index' | ||
| 389 | return 0 | ||
| 390 | ;; | ||
| 391 | esac | ||
| 392 | ;; | ||
| 362 | *) | 393 | *) |
| 363 | [[ $prev == $object ]] && \ | 394 | [[ $prev == $object ]] && \ |
| 364 | COMPREPLY=( $( compgen -W 'delete dump getnext help \ | 395 | COMPREPLY=( $( compgen -W 'delete dump getnext help \ |
| 365 | lookup pin show list update' -- "$cur" ) ) | 396 | lookup pin event_pipe show list update' -- \ |
| 397 | "$cur" ) ) | ||
| 366 | ;; | 398 | ;; |
| 367 | esac | 399 | esac |
| 368 | ;; | 400 | ;; |
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 465995281dcd..32f9e397a6c0 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2017 Netronome Systems, Inc. | 2 | * Copyright (C) 2017-2018 Netronome Systems, Inc. |
| 3 | * | 3 | * |
| 4 | * This software is dual licensed under the GNU General License Version 2, | 4 | * This software is dual licensed under the GNU General License Version 2, |
| 5 | * June 1991 as shown in the file COPYING in the top-level directory of this | 5 | * June 1991 as shown in the file COPYING in the top-level directory of this |
| @@ -33,6 +33,7 @@ | |||
| 33 | 33 | ||
| 34 | /* Author: Jakub Kicinski <kubakici@wp.pl> */ | 34 | /* Author: Jakub Kicinski <kubakici@wp.pl> */ |
| 35 | 35 | ||
| 36 | #include <ctype.h> | ||
| 36 | #include <errno.h> | 37 | #include <errno.h> |
| 37 | #include <fcntl.h> | 38 | #include <fcntl.h> |
| 38 | #include <fts.h> | 39 | #include <fts.h> |
| @@ -330,6 +331,16 @@ char *get_fdinfo(int fd, const char *key) | |||
| 330 | return NULL; | 331 | return NULL; |
| 331 | } | 332 | } |
| 332 | 333 | ||
| 334 | void print_data_json(uint8_t *data, size_t len) | ||
| 335 | { | ||
| 336 | unsigned int i; | ||
| 337 | |||
| 338 | jsonw_start_array(json_wtr); | ||
| 339 | for (i = 0; i < len; i++) | ||
| 340 | jsonw_printf(json_wtr, "%d", data[i]); | ||
| 341 | jsonw_end_array(json_wtr); | ||
| 342 | } | ||
| 343 | |||
| 333 | void print_hex_data_json(uint8_t *data, size_t len) | 344 | void print_hex_data_json(uint8_t *data, size_t len) |
| 334 | { | 345 | { |
| 335 | unsigned int i; | 346 | unsigned int i; |
| @@ -420,6 +431,70 @@ void delete_pinned_obj_table(struct pinned_obj_table *tab) | |||
| 420 | } | 431 | } |
| 421 | } | 432 | } |
| 422 | 433 | ||
| 434 | unsigned int get_page_size(void) | ||
| 435 | { | ||
| 436 | static int result; | ||
| 437 | |||
| 438 | if (!result) | ||
| 439 | result = getpagesize(); | ||
| 440 | return result; | ||
| 441 | } | ||
| 442 | |||
| 443 | unsigned int get_possible_cpus(void) | ||
| 444 | { | ||
| 445 | static unsigned int result; | ||
| 446 | char buf[128]; | ||
| 447 | long int n; | ||
| 448 | char *ptr; | ||
| 449 | int fd; | ||
| 450 | |||
| 451 | if (result) | ||
| 452 | return result; | ||
| 453 | |||
| 454 | fd = open("/sys/devices/system/cpu/possible", O_RDONLY); | ||
| 455 | if (fd < 0) { | ||
| 456 | p_err("can't open sysfs possible cpus"); | ||
| 457 | exit(-1); | ||
| 458 | } | ||
| 459 | |||
| 460 | n = read(fd, buf, sizeof(buf)); | ||
| 461 | if (n < 2) { | ||
| 462 | p_err("can't read sysfs possible cpus"); | ||
| 463 | exit(-1); | ||
| 464 | } | ||
| 465 | close(fd); | ||
| 466 | |||
| 467 | if (n == sizeof(buf)) { | ||
| 468 | p_err("read sysfs possible cpus overflow"); | ||
| 469 | exit(-1); | ||
| 470 | } | ||
| 471 | |||
| 472 | ptr = buf; | ||
| 473 | n = 0; | ||
| 474 | while (*ptr && *ptr != '\n') { | ||
| 475 | unsigned int a, b; | ||
| 476 | |||
| 477 | if (sscanf(ptr, "%u-%u", &a, &b) == 2) { | ||
| 478 | n += b - a + 1; | ||
| 479 | |||
| 480 | ptr = strchr(ptr, '-') + 1; | ||
| 481 | } else if (sscanf(ptr, "%u", &a) == 1) { | ||
| 482 | n++; | ||
| 483 | } else { | ||
| 484 | assert(0); | ||
| 485 | } | ||
| 486 | |||
| 487 | while (isdigit(*ptr)) | ||
| 488 | ptr++; | ||
| 489 | if (*ptr == ',') | ||
| 490 | ptr++; | ||
| 491 | } | ||
| 492 | |||
| 493 | result = n; | ||
| 494 | |||
| 495 | return result; | ||
| 496 | } | ||
| 497 | |||
| 423 | static char * | 498 | static char * |
| 424 | ifindex_to_name_ns(__u32 ifindex, __u32 ns_dev, __u32 ns_ino, char *buf) | 499 | ifindex_to_name_ns(__u32 ifindex, __u32 ns_dev, __u32 ns_ino, char *buf) |
| 425 | { | 500 | { |
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index b8e9584d6246..6173cd997e7a 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2017 Netronome Systems, Inc. | 2 | * Copyright (C) 2017-2018 Netronome Systems, Inc. |
| 3 | * | 3 | * |
| 4 | * This software is dual licensed under the GNU General License Version 2, | 4 | * This software is dual licensed under the GNU General License Version 2, |
| 5 | * June 1991 as shown in the file COPYING in the top-level directory of this | 5 | * June 1991 as shown in the file COPYING in the top-level directory of this |
| @@ -117,14 +117,19 @@ int do_pin_fd(int fd, const char *name); | |||
| 117 | 117 | ||
| 118 | int do_prog(int argc, char **arg); | 118 | int do_prog(int argc, char **arg); |
| 119 | int do_map(int argc, char **arg); | 119 | int do_map(int argc, char **arg); |
| 120 | int do_event_pipe(int argc, char **argv); | ||
| 120 | int do_cgroup(int argc, char **arg); | 121 | int do_cgroup(int argc, char **arg); |
| 121 | 122 | ||
| 122 | int prog_parse_fd(int *argc, char ***argv); | 123 | int prog_parse_fd(int *argc, char ***argv); |
| 124 | int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len); | ||
| 123 | 125 | ||
| 124 | void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, | 126 | void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, |
| 125 | const char *arch); | 127 | const char *arch); |
| 128 | void print_data_json(uint8_t *data, size_t len); | ||
| 126 | void print_hex_data_json(uint8_t *data, size_t len); | 129 | void print_hex_data_json(uint8_t *data, size_t len); |
| 127 | 130 | ||
| 131 | unsigned int get_page_size(void); | ||
| 132 | unsigned int get_possible_cpus(void); | ||
| 128 | const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino); | 133 | const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino); |
| 129 | 134 | ||
| 130 | #endif | 135 | #endif |
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index a6cdb640a0d7..af6766e956ba 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2017 Netronome Systems, Inc. | 2 | * Copyright (C) 2017-2018 Netronome Systems, Inc. |
| 3 | * | 3 | * |
| 4 | * This software is dual licensed under the GNU General License Version 2, | 4 | * This software is dual licensed under the GNU General License Version 2, |
| 5 | * June 1991 as shown in the file COPYING in the top-level directory of this | 5 | * June 1991 as shown in the file COPYING in the top-level directory of this |
| @@ -34,7 +34,6 @@ | |||
| 34 | /* Author: Jakub Kicinski <kubakici@wp.pl> */ | 34 | /* Author: Jakub Kicinski <kubakici@wp.pl> */ |
| 35 | 35 | ||
| 36 | #include <assert.h> | 36 | #include <assert.h> |
| 37 | #include <ctype.h> | ||
| 38 | #include <errno.h> | 37 | #include <errno.h> |
| 39 | #include <fcntl.h> | 38 | #include <fcntl.h> |
| 40 | #include <stdbool.h> | 39 | #include <stdbool.h> |
| @@ -69,61 +68,6 @@ static const char * const map_type_name[] = { | |||
| 69 | [BPF_MAP_TYPE_CPUMAP] = "cpumap", | 68 | [BPF_MAP_TYPE_CPUMAP] = "cpumap", |
| 70 | }; | 69 | }; |
| 71 | 70 | ||
| 72 | static unsigned int get_possible_cpus(void) | ||
| 73 | { | ||
| 74 | static unsigned int result; | ||
| 75 | char buf[128]; | ||
| 76 | long int n; | ||
| 77 | char *ptr; | ||
| 78 | int fd; | ||
| 79 | |||
| 80 | if (result) | ||
| 81 | return result; | ||
| 82 | |||
| 83 | fd = open("/sys/devices/system/cpu/possible", O_RDONLY); | ||
| 84 | if (fd < 0) { | ||
| 85 | p_err("can't open sysfs possible cpus"); | ||
| 86 | exit(-1); | ||
| 87 | } | ||
| 88 | |||
| 89 | n = read(fd, buf, sizeof(buf)); | ||
| 90 | if (n < 2) { | ||
| 91 | p_err("can't read sysfs possible cpus"); | ||
| 92 | exit(-1); | ||
| 93 | } | ||
| 94 | close(fd); | ||
| 95 | |||
| 96 | if (n == sizeof(buf)) { | ||
| 97 | p_err("read sysfs possible cpus overflow"); | ||
| 98 | exit(-1); | ||
| 99 | } | ||
| 100 | |||
| 101 | ptr = buf; | ||
| 102 | n = 0; | ||
| 103 | while (*ptr && *ptr != '\n') { | ||
| 104 | unsigned int a, b; | ||
| 105 | |||
| 106 | if (sscanf(ptr, "%u-%u", &a, &b) == 2) { | ||
| 107 | n += b - a + 1; | ||
| 108 | |||
| 109 | ptr = strchr(ptr, '-') + 1; | ||
| 110 | } else if (sscanf(ptr, "%u", &a) == 1) { | ||
| 111 | n++; | ||
| 112 | } else { | ||
| 113 | assert(0); | ||
| 114 | } | ||
| 115 | |||
| 116 | while (isdigit(*ptr)) | ||
| 117 | ptr++; | ||
| 118 | if (*ptr == ',') | ||
| 119 | ptr++; | ||
| 120 | } | ||
| 121 | |||
| 122 | result = n; | ||
| 123 | |||
| 124 | return result; | ||
| 125 | } | ||
| 126 | |||
| 127 | static bool map_is_per_cpu(__u32 type) | 71 | static bool map_is_per_cpu(__u32 type) |
| 128 | { | 72 | { |
| 129 | return type == BPF_MAP_TYPE_PERCPU_HASH || | 73 | return type == BPF_MAP_TYPE_PERCPU_HASH || |
| @@ -186,8 +130,7 @@ static int map_parse_fd(int *argc, char ***argv) | |||
| 186 | return -1; | 130 | return -1; |
| 187 | } | 131 | } |
| 188 | 132 | ||
| 189 | static int | 133 | int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) |
| 190 | map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) | ||
| 191 | { | 134 | { |
| 192 | int err; | 135 | int err; |
| 193 | int fd; | 136 | int fd; |
| @@ -873,23 +816,25 @@ static int do_help(int argc, char **argv) | |||
| 873 | 816 | ||
| 874 | fprintf(stderr, | 817 | fprintf(stderr, |
| 875 | "Usage: %s %s { show | list } [MAP]\n" | 818 | "Usage: %s %s { show | list } [MAP]\n" |
| 876 | " %s %s dump MAP\n" | 819 | " %s %s dump MAP\n" |
| 877 | " %s %s update MAP key [hex] BYTES value [hex] VALUE [UPDATE_FLAGS]\n" | 820 | " %s %s update MAP key DATA value VALUE [UPDATE_FLAGS]\n" |
| 878 | " %s %s lookup MAP key [hex] BYTES\n" | 821 | " %s %s lookup MAP key DATA\n" |
| 879 | " %s %s getnext MAP [key [hex] BYTES]\n" | 822 | " %s %s getnext MAP [key DATA]\n" |
| 880 | " %s %s delete MAP key [hex] BYTES\n" | 823 | " %s %s delete MAP key DATA\n" |
| 881 | " %s %s pin MAP FILE\n" | 824 | " %s %s pin MAP FILE\n" |
| 825 | " %s %s event_pipe MAP [cpu N index M]\n" | ||
| 882 | " %s %s help\n" | 826 | " %s %s help\n" |
| 883 | "\n" | 827 | "\n" |
| 884 | " MAP := { id MAP_ID | pinned FILE }\n" | 828 | " MAP := { id MAP_ID | pinned FILE }\n" |
| 829 | " DATA := { [hex] BYTES }\n" | ||
| 885 | " " HELP_SPEC_PROGRAM "\n" | 830 | " " HELP_SPEC_PROGRAM "\n" |
| 886 | " VALUE := { BYTES | MAP | PROG }\n" | 831 | " VALUE := { DATA | MAP | PROG }\n" |
| 887 | " UPDATE_FLAGS := { any | exist | noexist }\n" | 832 | " UPDATE_FLAGS := { any | exist | noexist }\n" |
| 888 | " " HELP_SPEC_OPTIONS "\n" | 833 | " " HELP_SPEC_OPTIONS "\n" |
| 889 | "", | 834 | "", |
| 890 | bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], | 835 | bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], |
| 891 | bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], | 836 | bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], |
| 892 | bin_name, argv[-2], bin_name, argv[-2]); | 837 | bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]); |
| 893 | 838 | ||
| 894 | return 0; | 839 | return 0; |
| 895 | } | 840 | } |
| @@ -904,6 +849,7 @@ static const struct cmd cmds[] = { | |||
| 904 | { "getnext", do_getnext }, | 849 | { "getnext", do_getnext }, |
| 905 | { "delete", do_delete }, | 850 | { "delete", do_delete }, |
| 906 | { "pin", do_pin }, | 851 | { "pin", do_pin }, |
| 852 | { "event_pipe", do_event_pipe }, | ||
| 907 | { 0 } | 853 | { 0 } |
| 908 | }; | 854 | }; |
| 909 | 855 | ||
diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c new file mode 100644 index 000000000000..c5a2ced8552d --- /dev/null +++ b/tools/bpf/bpftool/map_perf_ring.c | |||
| @@ -0,0 +1,347 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0-only | ||
| 2 | /* Copyright (C) 2018 Netronome Systems, Inc. */ | ||
| 3 | /* This program is free software; you can redistribute it and/or | ||
| 4 | * modify it under the terms of version 2 of the GNU General Public | ||
| 5 | * License as published by the Free Software Foundation. | ||
| 6 | */ | ||
| 7 | #include <errno.h> | ||
| 8 | #include <fcntl.h> | ||
| 9 | #include <libbpf.h> | ||
| 10 | #include <poll.h> | ||
| 11 | #include <signal.h> | ||
| 12 | #include <stdbool.h> | ||
| 13 | #include <stdio.h> | ||
| 14 | #include <stdlib.h> | ||
| 15 | #include <string.h> | ||
| 16 | #include <time.h> | ||
| 17 | #include <unistd.h> | ||
| 18 | #include <linux/bpf.h> | ||
| 19 | #include <linux/perf_event.h> | ||
| 20 | #include <sys/ioctl.h> | ||
| 21 | #include <sys/mman.h> | ||
| 22 | #include <sys/syscall.h> | ||
| 23 | |||
| 24 | #include <bpf.h> | ||
| 25 | #include <perf-sys.h> | ||
| 26 | |||
| 27 | #include "main.h" | ||
| 28 | |||
| 29 | #define MMAP_PAGE_CNT 16 | ||
| 30 | |||
| 31 | static bool stop; | ||
| 32 | |||
| 33 | struct event_ring_info { | ||
| 34 | int fd; | ||
| 35 | int key; | ||
| 36 | unsigned int cpu; | ||
| 37 | void *mem; | ||
| 38 | }; | ||
| 39 | |||
| 40 | struct perf_event_sample { | ||
| 41 | struct perf_event_header header; | ||
| 42 | __u32 size; | ||
| 43 | unsigned char data[]; | ||
| 44 | }; | ||
| 45 | |||
| 46 | static void int_exit(int signo) | ||
| 47 | { | ||
| 48 | fprintf(stderr, "Stopping...\n"); | ||
| 49 | stop = true; | ||
| 50 | } | ||
| 51 | |||
| 52 | static void | ||
| 53 | print_bpf_output(struct event_ring_info *ring, struct perf_event_sample *e) | ||
| 54 | { | ||
| 55 | struct { | ||
| 56 | struct perf_event_header header; | ||
| 57 | __u64 id; | ||
| 58 | __u64 lost; | ||
| 59 | } *lost = (void *)e; | ||
| 60 | struct timespec ts; | ||
| 61 | |||
| 62 | if (clock_gettime(CLOCK_MONOTONIC, &ts)) { | ||
| 63 | perror("Can't read clock for timestamp"); | ||
| 64 | return; | ||
| 65 | } | ||
| 66 | |||
| 67 | if (json_output) { | ||
| 68 | jsonw_start_object(json_wtr); | ||
| 69 | jsonw_name(json_wtr, "timestamp"); | ||
| 70 | jsonw_uint(json_wtr, ts.tv_sec * 1000000000ull + ts.tv_nsec); | ||
| 71 | jsonw_name(json_wtr, "type"); | ||
| 72 | jsonw_uint(json_wtr, e->header.type); | ||
| 73 | jsonw_name(json_wtr, "cpu"); | ||
| 74 | jsonw_uint(json_wtr, ring->cpu); | ||
| 75 | jsonw_name(json_wtr, "index"); | ||
| 76 | jsonw_uint(json_wtr, ring->key); | ||
| 77 | if (e->header.type == PERF_RECORD_SAMPLE) { | ||
| 78 | jsonw_name(json_wtr, "data"); | ||
| 79 | print_data_json(e->data, e->size); | ||
| 80 | } else if (e->header.type == PERF_RECORD_LOST) { | ||
| 81 | jsonw_name(json_wtr, "lost"); | ||
| 82 | jsonw_start_object(json_wtr); | ||
| 83 | jsonw_name(json_wtr, "id"); | ||
| 84 | jsonw_uint(json_wtr, lost->id); | ||
| 85 | jsonw_name(json_wtr, "count"); | ||
| 86 | jsonw_uint(json_wtr, lost->lost); | ||
| 87 | jsonw_end_object(json_wtr); | ||
| 88 | } | ||
| 89 | jsonw_end_object(json_wtr); | ||
| 90 | } else { | ||
| 91 | if (e->header.type == PERF_RECORD_SAMPLE) { | ||
| 92 | printf("== @%ld.%ld CPU: %d index: %d =====\n", | ||
| 93 | (long)ts.tv_sec, ts.tv_nsec, | ||
| 94 | ring->cpu, ring->key); | ||
| 95 | fprint_hex(stdout, e->data, e->size, " "); | ||
| 96 | printf("\n"); | ||
| 97 | } else if (e->header.type == PERF_RECORD_LOST) { | ||
| 98 | printf("lost %lld events\n", lost->lost); | ||
| 99 | } else { | ||
| 100 | printf("unknown event type=%d size=%d\n", | ||
| 101 | e->header.type, e->header.size); | ||
| 102 | } | ||
| 103 | } | ||
| 104 | } | ||
| 105 | |||
| 106 | static void | ||
| 107 | perf_event_read(struct event_ring_info *ring, void **buf, size_t *buf_len) | ||
| 108 | { | ||
| 109 | volatile struct perf_event_mmap_page *header = ring->mem; | ||
| 110 | __u64 buffer_size = MMAP_PAGE_CNT * get_page_size(); | ||
| 111 | __u64 data_tail = header->data_tail; | ||
| 112 | __u64 data_head = header->data_head; | ||
| 113 | void *base, *begin, *end; | ||
| 114 | |||
| 115 | asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */ | ||
| 116 | if (data_head == data_tail) | ||
| 117 | return; | ||
| 118 | |||
| 119 | base = ((char *)header) + get_page_size(); | ||
| 120 | |||
| 121 | begin = base + data_tail % buffer_size; | ||
| 122 | end = base + data_head % buffer_size; | ||
| 123 | |||
| 124 | while (begin != end) { | ||
| 125 | struct perf_event_sample *e; | ||
| 126 | |||
| 127 | e = begin; | ||
| 128 | if (begin + e->header.size > base + buffer_size) { | ||
| 129 | long len = base + buffer_size - begin; | ||
| 130 | |||
| 131 | if (*buf_len < e->header.size) { | ||
| 132 | free(*buf); | ||
| 133 | *buf = malloc(e->header.size); | ||
| 134 | if (!*buf) { | ||
| 135 | fprintf(stderr, | ||
| 136 | "can't allocate memory"); | ||
| 137 | stop = true; | ||
| 138 | return; | ||
| 139 | } | ||
| 140 | *buf_len = e->header.size; | ||
| 141 | } | ||
| 142 | |||
| 143 | memcpy(*buf, begin, len); | ||
| 144 | memcpy(*buf + len, base, e->header.size - len); | ||
| 145 | e = (void *)*buf; | ||
| 146 | begin = base + e->header.size - len; | ||
| 147 | } else if (begin + e->header.size == base + buffer_size) { | ||
| 148 | begin = base; | ||
| 149 | } else { | ||
| 150 | begin += e->header.size; | ||
| 151 | } | ||
| 152 | |||
| 153 | print_bpf_output(ring, e); | ||
| 154 | } | ||
| 155 | |||
| 156 | __sync_synchronize(); /* smp_mb() */ | ||
| 157 | header->data_tail = data_head; | ||
| 158 | } | ||
| 159 | |||
| 160 | static int perf_mmap_size(void) | ||
| 161 | { | ||
| 162 | return get_page_size() * (MMAP_PAGE_CNT + 1); | ||
| 163 | } | ||
| 164 | |||
| 165 | static void *perf_event_mmap(int fd) | ||
| 166 | { | ||
| 167 | int mmap_size = perf_mmap_size(); | ||
| 168 | void *base; | ||
| 169 | |||
| 170 | base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); | ||
| 171 | if (base == MAP_FAILED) { | ||
| 172 | p_err("event mmap failed: %s\n", strerror(errno)); | ||
| 173 | return NULL; | ||
| 174 | } | ||
| 175 | |||
| 176 | return base; | ||
| 177 | } | ||
| 178 | |||
| 179 | static void perf_event_unmap(void *mem) | ||
| 180 | { | ||
| 181 | if (munmap(mem, perf_mmap_size())) | ||
| 182 | fprintf(stderr, "Can't unmap ring memory!\n"); | ||
| 183 | } | ||
| 184 | |||
| 185 | static int bpf_perf_event_open(int map_fd, int key, int cpu) | ||
| 186 | { | ||
| 187 | struct perf_event_attr attr = { | ||
| 188 | .sample_type = PERF_SAMPLE_RAW, | ||
| 189 | .type = PERF_TYPE_SOFTWARE, | ||
| 190 | .config = PERF_COUNT_SW_BPF_OUTPUT, | ||
| 191 | }; | ||
| 192 | int pmu_fd; | ||
| 193 | |||
| 194 | pmu_fd = sys_perf_event_open(&attr, -1, cpu, -1, 0); | ||
| 195 | if (pmu_fd < 0) { | ||
| 196 | p_err("failed to open perf event %d for CPU %d", key, cpu); | ||
| 197 | return -1; | ||
| 198 | } | ||
| 199 | |||
| 200 | if (bpf_map_update_elem(map_fd, &key, &pmu_fd, BPF_ANY)) { | ||
| 201 | p_err("failed to update map for event %d for CPU %d", key, cpu); | ||
| 202 | goto err_close; | ||
| 203 | } | ||
| 204 | if (ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) { | ||
| 205 | p_err("failed to enable event %d for CPU %d", key, cpu); | ||
| 206 | goto err_close; | ||
| 207 | } | ||
| 208 | |||
| 209 | return pmu_fd; | ||
| 210 | |||
| 211 | err_close: | ||
| 212 | close(pmu_fd); | ||
| 213 | return -1; | ||
| 214 | } | ||
| 215 | |||
| 216 | int do_event_pipe(int argc, char **argv) | ||
| 217 | { | ||
| 218 | int i, nfds, map_fd, index = -1, cpu = -1; | ||
| 219 | struct bpf_map_info map_info = {}; | ||
| 220 | struct event_ring_info *rings; | ||
| 221 | size_t tmp_buf_sz = 0; | ||
| 222 | void *tmp_buf = NULL; | ||
| 223 | struct pollfd *pfds; | ||
| 224 | __u32 map_info_len; | ||
| 225 | bool do_all = true; | ||
| 226 | |||
| 227 | map_info_len = sizeof(map_info); | ||
| 228 | map_fd = map_parse_fd_and_info(&argc, &argv, &map_info, &map_info_len); | ||
| 229 | if (map_fd < 0) | ||
| 230 | return -1; | ||
| 231 | |||
| 232 | if (map_info.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { | ||
| 233 | p_err("map is not a perf event array"); | ||
| 234 | goto err_close_map; | ||
| 235 | } | ||
| 236 | |||
| 237 | while (argc) { | ||
| 238 | if (argc < 2) | ||
| 239 | BAD_ARG(); | ||
| 240 | |||
| 241 | if (is_prefix(*argv, "cpu")) { | ||
| 242 | char *endptr; | ||
| 243 | |||
| 244 | NEXT_ARG(); | ||
| 245 | cpu = strtoul(*argv, &endptr, 0); | ||
| 246 | if (*endptr) { | ||
| 247 | p_err("can't parse %s as CPU ID", **argv); | ||
| 248 | goto err_close_map; | ||
| 249 | } | ||
| 250 | |||
| 251 | NEXT_ARG(); | ||
| 252 | } else if (is_prefix(*argv, "index")) { | ||
| 253 | char *endptr; | ||
| 254 | |||
| 255 | NEXT_ARG(); | ||
| 256 | index = strtoul(*argv, &endptr, 0); | ||
| 257 | if (*endptr) { | ||
| 258 | p_err("can't parse %s as index", **argv); | ||
| 259 | goto err_close_map; | ||
| 260 | } | ||
| 261 | |||
| 262 | NEXT_ARG(); | ||
| 263 | } else { | ||
| 264 | BAD_ARG(); | ||
| 265 | } | ||
| 266 | |||
| 267 | do_all = false; | ||
| 268 | } | ||
| 269 | |||
| 270 | if (!do_all) { | ||
| 271 | if (index == -1 || cpu == -1) { | ||
| 272 | p_err("cpu and index must be specified together"); | ||
| 273 | goto err_close_map; | ||
| 274 | } | ||
| 275 | |||
| 276 | nfds = 1; | ||
| 277 | } else { | ||
| 278 | nfds = min(get_possible_cpus(), map_info.max_entries); | ||
| 279 | cpu = 0; | ||
| 280 | index = 0; | ||
| 281 | } | ||
| 282 | |||
| 283 | rings = calloc(nfds, sizeof(rings[0])); | ||
| 284 | if (!rings) | ||
| 285 | goto err_close_map; | ||
| 286 | |||
| 287 | pfds = calloc(nfds, sizeof(pfds[0])); | ||
| 288 | if (!pfds) | ||
| 289 | goto err_free_rings; | ||
| 290 | |||
| 291 | for (i = 0; i < nfds; i++) { | ||
| 292 | rings[i].cpu = cpu + i; | ||
| 293 | rings[i].key = index + i; | ||
| 294 | |||
| 295 | rings[i].fd = bpf_perf_event_open(map_fd, rings[i].key, | ||
| 296 | rings[i].cpu); | ||
| 297 | if (rings[i].fd < 0) | ||
| 298 | goto err_close_fds_prev; | ||
| 299 | |||
| 300 | rings[i].mem = perf_event_mmap(rings[i].fd); | ||
| 301 | if (!rings[i].mem) | ||
| 302 | goto err_close_fds_current; | ||
| 303 | |||
| 304 | pfds[i].fd = rings[i].fd; | ||
| 305 | pfds[i].events = POLLIN; | ||
| 306 | } | ||
| 307 | |||
| 308 | signal(SIGINT, int_exit); | ||
| 309 | signal(SIGHUP, int_exit); | ||
| 310 | signal(SIGTERM, int_exit); | ||
| 311 | |||
| 312 | if (json_output) | ||
| 313 | jsonw_start_array(json_wtr); | ||
| 314 | |||
| 315 | while (!stop) { | ||
| 316 | poll(pfds, nfds, 200); | ||
| 317 | for (i = 0; i < nfds; i++) | ||
| 318 | perf_event_read(&rings[i], &tmp_buf, &tmp_buf_sz); | ||
| 319 | } | ||
| 320 | free(tmp_buf); | ||
| 321 | |||
| 322 | if (json_output) | ||
| 323 | jsonw_end_array(json_wtr); | ||
| 324 | |||
| 325 | for (i = 0; i < nfds; i++) { | ||
| 326 | perf_event_unmap(rings[i].mem); | ||
| 327 | close(rings[i].fd); | ||
| 328 | } | ||
| 329 | free(pfds); | ||
| 330 | free(rings); | ||
| 331 | close(map_fd); | ||
| 332 | |||
| 333 | return 0; | ||
| 334 | |||
| 335 | err_close_fds_prev: | ||
| 336 | while (i--) { | ||
| 337 | perf_event_unmap(rings[i].mem); | ||
| 338 | err_close_fds_current: | ||
| 339 | close(rings[i].fd); | ||
| 340 | } | ||
| 341 | free(pfds); | ||
| 342 | err_free_rings: | ||
| 343 | free(rings); | ||
| 344 | err_close_map: | ||
| 345 | close(map_fd); | ||
| 346 | return -1; | ||
| 347 | } | ||
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index e71a0a11afde..9bdfdf2d3fbe 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c | |||
| @@ -96,7 +96,10 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) | |||
| 96 | return; | 96 | return; |
| 97 | } | 97 | } |
| 98 | 98 | ||
| 99 | strftime(buf, size, "%b %d/%H:%M", &load_tm); | 99 | if (json_output) |
| 100 | strftime(buf, size, "%s", &load_tm); | ||
| 101 | else | ||
| 102 | strftime(buf, size, "%FT%T%z", &load_tm); | ||
| 100 | } | 103 | } |
| 101 | 104 | ||
| 102 | static int prog_fd_by_tag(unsigned char *tag) | 105 | static int prog_fd_by_tag(unsigned char *tag) |
| @@ -245,7 +248,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) | |||
| 245 | print_boot_time(info->load_time, buf, sizeof(buf)); | 248 | print_boot_time(info->load_time, buf, sizeof(buf)); |
| 246 | 249 | ||
| 247 | /* Piggy back on load_time, since 0 uid is a valid one */ | 250 | /* Piggy back on load_time, since 0 uid is a valid one */ |
| 248 | jsonw_string_field(json_wtr, "loaded_at", buf); | 251 | jsonw_name(json_wtr, "loaded_at"); |
| 252 | jsonw_printf(json_wtr, "%s", buf); | ||
| 249 | jsonw_uint_field(json_wtr, "uid", info->created_by_uid); | 253 | jsonw_uint_field(json_wtr, "uid", info->created_by_uid); |
| 250 | } | 254 | } |
| 251 | 255 | ||
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index da77a9388947..83a95ae388dd 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h | |||
| @@ -828,12 +828,12 @@ union bpf_attr { | |||
| 828 | * | 828 | * |
| 829 | * Also, be aware that the newer helper | 829 | * Also, be aware that the newer helper |
| 830 | * **bpf_perf_event_read_value**\ () is recommended over | 830 | * **bpf_perf_event_read_value**\ () is recommended over |
| 831 | * **bpf_perf_event_read*\ () in general. The latter has some ABI | 831 | * **bpf_perf_event_read**\ () in general. The latter has some ABI |
| 832 | * quirks where error and counter value are used as a return code | 832 | * quirks where error and counter value are used as a return code |
| 833 | * (which is wrong to do since ranges may overlap). This issue is | 833 | * (which is wrong to do since ranges may overlap). This issue is |
| 834 | * fixed with bpf_perf_event_read_value(), which at the same time | 834 | * fixed with **bpf_perf_event_read_value**\ (), which at the same |
| 835 | * provides more features over the **bpf_perf_event_read**\ () | 835 | * time provides more features over the **bpf_perf_event_read**\ |
| 836 | * interface. Please refer to the description of | 836 | * () interface. Please refer to the description of |
| 837 | * **bpf_perf_event_read_value**\ () for details. | 837 | * **bpf_perf_event_read_value**\ () for details. |
| 838 | * Return | 838 | * Return |
| 839 | * The value of the perf event counter read from the map, or a | 839 | * The value of the perf event counter read from the map, or a |
| @@ -1361,7 +1361,7 @@ union bpf_attr { | |||
| 1361 | * Return | 1361 | * Return |
| 1362 | * 0 | 1362 | * 0 |
| 1363 | * | 1363 | * |
| 1364 | * int bpf_setsockopt(struct bpf_sock_ops_kern *bpf_socket, int level, int optname, char *optval, int optlen) | 1364 | * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen) |
| 1365 | * Description | 1365 | * Description |
| 1366 | * Emulate a call to **setsockopt()** on the socket associated to | 1366 | * Emulate a call to **setsockopt()** on the socket associated to |
| 1367 | * *bpf_socket*, which must be a full socket. The *level* at | 1367 | * *bpf_socket*, which must be a full socket. The *level* at |
| @@ -1435,7 +1435,7 @@ union bpf_attr { | |||
| 1435 | * Return | 1435 | * Return |
| 1436 | * **SK_PASS** on success, or **SK_DROP** on error. | 1436 | * **SK_PASS** on success, or **SK_DROP** on error. |
| 1437 | * | 1437 | * |
| 1438 | * int bpf_sock_map_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags) | 1438 | * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) |
| 1439 | * Description | 1439 | * Description |
| 1440 | * Add an entry to, or update a *map* referencing sockets. The | 1440 | * Add an entry to, or update a *map* referencing sockets. The |
| 1441 | * *skops* is used as a new value for the entry associated to | 1441 | * *skops* is used as a new value for the entry associated to |
| @@ -1533,7 +1533,7 @@ union bpf_attr { | |||
| 1533 | * Return | 1533 | * Return |
| 1534 | * 0 on success, or a negative error in case of failure. | 1534 | * 0 on success, or a negative error in case of failure. |
| 1535 | * | 1535 | * |
| 1536 | * int bpf_perf_prog_read_value(struct bpf_perf_event_data_kern *ctx, struct bpf_perf_event_value *buf, u32 buf_size) | 1536 | * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) |
| 1537 | * Description | 1537 | * Description |
| 1538 | * For en eBPF program attached to a perf event, retrieve the | 1538 | * For en eBPF program attached to a perf event, retrieve the |
| 1539 | * value of the event counter associated to *ctx* and store it in | 1539 | * value of the event counter associated to *ctx* and store it in |
| @@ -1544,7 +1544,7 @@ union bpf_attr { | |||
| 1544 | * Return | 1544 | * Return |
| 1545 | * 0 on success, or a negative error in case of failure. | 1545 | * 0 on success, or a negative error in case of failure. |
| 1546 | * | 1546 | * |
| 1547 | * int bpf_getsockopt(struct bpf_sock_ops_kern *bpf_socket, int level, int optname, char *optval, int optlen) | 1547 | * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen) |
| 1548 | * Description | 1548 | * Description |
| 1549 | * Emulate a call to **getsockopt()** on the socket associated to | 1549 | * Emulate a call to **getsockopt()** on the socket associated to |
| 1550 | * *bpf_socket*, which must be a full socket. The *level* at | 1550 | * *bpf_socket*, which must be a full socket. The *level* at |
| @@ -1588,7 +1588,7 @@ union bpf_attr { | |||
| 1588 | * Return | 1588 | * Return |
| 1589 | * 0 | 1589 | * 0 |
| 1590 | * | 1590 | * |
| 1591 | * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops_kern *bpf_sock, int argval) | 1591 | * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval) |
| 1592 | * Description | 1592 | * Description |
| 1593 | * Attempt to set the value of the **bpf_sock_ops_cb_flags** field | 1593 | * Attempt to set the value of the **bpf_sock_ops_cb_flags** field |
| 1594 | * for the full TCP socket associated to *bpf_sock_ops* to | 1594 | * for the full TCP socket associated to *bpf_sock_ops* to |
| @@ -1721,7 +1721,7 @@ union bpf_attr { | |||
| 1721 | * Return | 1721 | * Return |
| 1722 | * 0 on success, or a negative error in case of failure. | 1722 | * 0 on success, or a negative error in case of failure. |
| 1723 | * | 1723 | * |
| 1724 | * int bpf_bind(struct bpf_sock_addr_kern *ctx, struct sockaddr *addr, int addr_len) | 1724 | * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) |
| 1725 | * Description | 1725 | * Description |
| 1726 | * Bind the socket associated to *ctx* to the address pointed by | 1726 | * Bind the socket associated to *ctx* to the address pointed by |
| 1727 | * *addr*, of length *addr_len*. This allows for making outgoing | 1727 | * *addr*, of length *addr_len*. This allows for making outgoing |
| @@ -1767,6 +1767,64 @@ union bpf_attr { | |||
| 1767 | * **CONFIG_XFRM** configuration option. | 1767 | * **CONFIG_XFRM** configuration option. |
| 1768 | * Return | 1768 | * Return |
| 1769 | * 0 on success, or a negative error in case of failure. | 1769 | * 0 on success, or a negative error in case of failure. |
| 1770 | * | ||
| 1771 | * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags) | ||
| 1772 | * Description | ||
| 1773 | * Return a user or a kernel stack in bpf program provided buffer. | ||
| 1774 | * To achieve this, the helper needs *ctx*, which is a pointer | ||
| 1775 | * to the context on which the tracing program is executed. | ||
| 1776 | * To store the stacktrace, the bpf program provides *buf* with | ||
| 1777 | * a nonnegative *size*. | ||
| 1778 | * | ||
| 1779 | * The last argument, *flags*, holds the number of stack frames to | ||
| 1780 | * skip (from 0 to 255), masked with | ||
| 1781 | * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set | ||
| 1782 | * the following flags: | ||
| 1783 | * | ||
| 1784 | * **BPF_F_USER_STACK** | ||
| 1785 | * Collect a user space stack instead of a kernel stack. | ||
| 1786 | * **BPF_F_USER_BUILD_ID** | ||
| 1787 | * Collect buildid+offset instead of ips for user stack, | ||
| 1788 | * only valid if **BPF_F_USER_STACK** is also specified. | ||
| 1789 | * | ||
| 1790 | * **bpf_get_stack**\ () can collect up to | ||
| 1791 | * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject | ||
| 1792 | * to sufficient large buffer size. Note that | ||
| 1793 | * this limit can be controlled with the **sysctl** program, and | ||
| 1794 | * that it should be manually increased in order to profile long | ||
| 1795 | * user stacks (such as stacks for Java programs). To do so, use: | ||
| 1796 | * | ||
| 1797 | * :: | ||
| 1798 | * | ||
| 1799 | * # sysctl kernel.perf_event_max_stack=<new value> | ||
| 1800 | * | ||
| 1801 | * Return | ||
| 1802 | * a non-negative value equal to or less than size on success, or | ||
| 1803 | * a negative error in case of failure. | ||
| 1804 | * | ||
| 1805 | * int skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header) | ||
| 1806 | * Description | ||
| 1807 | * This helper is similar to **bpf_skb_load_bytes**\ () in that | ||
| 1808 | * it provides an easy way to load *len* bytes from *offset* | ||
| 1809 | * from the packet associated to *skb*, into the buffer pointed | ||
| 1810 | * by *to*. The difference to **bpf_skb_load_bytes**\ () is that | ||
| 1811 | * a fifth argument *start_header* exists in order to select a | ||
| 1812 | * base offset to start from. *start_header* can be one of: | ||
| 1813 | * | ||
| 1814 | * **BPF_HDR_START_MAC** | ||
| 1815 | * Base offset to load data from is *skb*'s mac header. | ||
| 1816 | * **BPF_HDR_START_NET** | ||
| 1817 | * Base offset to load data from is *skb*'s network header. | ||
| 1818 | * | ||
| 1819 | * In general, "direct packet access" is the preferred method to | ||
| 1820 | * access packet data, however, this helper is in particular useful | ||
| 1821 | * in socket filters where *skb*\ **->data** does not always point | ||
| 1822 | * to the start of the mac header and where "direct packet access" | ||
| 1823 | * is not available. | ||
| 1824 | * | ||
| 1825 | * Return | ||
| 1826 | * 0 on success, or a negative error in case of failure. | ||
| 1827 | * | ||
| 1770 | */ | 1828 | */ |
| 1771 | #define __BPF_FUNC_MAPPER(FN) \ | 1829 | #define __BPF_FUNC_MAPPER(FN) \ |
| 1772 | FN(unspec), \ | 1830 | FN(unspec), \ |
| @@ -1835,7 +1893,9 @@ union bpf_attr { | |||
| 1835 | FN(msg_pull_data), \ | 1893 | FN(msg_pull_data), \ |
| 1836 | FN(bind), \ | 1894 | FN(bind), \ |
| 1837 | FN(xdp_adjust_tail), \ | 1895 | FN(xdp_adjust_tail), \ |
| 1838 | FN(skb_get_xfrm_state), | 1896 | FN(skb_get_xfrm_state), \ |
| 1897 | FN(get_stack), \ | ||
| 1898 | FN(skb_load_bytes_relative), | ||
| 1839 | 1899 | ||
| 1840 | /* integer value in 'imm' field of BPF_CALL instruction selects which helper | 1900 | /* integer value in 'imm' field of BPF_CALL instruction selects which helper |
| 1841 | * function eBPF program intends to call | 1901 | * function eBPF program intends to call |
| @@ -1869,11 +1929,14 @@ enum bpf_func_id { | |||
| 1869 | /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ | 1929 | /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ |
| 1870 | #define BPF_F_TUNINFO_IPV6 (1ULL << 0) | 1930 | #define BPF_F_TUNINFO_IPV6 (1ULL << 0) |
| 1871 | 1931 | ||
| 1872 | /* BPF_FUNC_get_stackid flags. */ | 1932 | /* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */ |
| 1873 | #define BPF_F_SKIP_FIELD_MASK 0xffULL | 1933 | #define BPF_F_SKIP_FIELD_MASK 0xffULL |
| 1874 | #define BPF_F_USER_STACK (1ULL << 8) | 1934 | #define BPF_F_USER_STACK (1ULL << 8) |
| 1935 | /* flags used by BPF_FUNC_get_stackid only. */ | ||
| 1875 | #define BPF_F_FAST_STACK_CMP (1ULL << 9) | 1936 | #define BPF_F_FAST_STACK_CMP (1ULL << 9) |
| 1876 | #define BPF_F_REUSE_STACKID (1ULL << 10) | 1937 | #define BPF_F_REUSE_STACKID (1ULL << 10) |
| 1938 | /* flags used by BPF_FUNC_get_stack only. */ | ||
| 1939 | #define BPF_F_USER_BUILD_ID (1ULL << 11) | ||
| 1877 | 1940 | ||
| 1878 | /* BPF_FUNC_skb_set_tunnel_key flags. */ | 1941 | /* BPF_FUNC_skb_set_tunnel_key flags. */ |
| 1879 | #define BPF_F_ZERO_CSUM_TX (1ULL << 1) | 1942 | #define BPF_F_ZERO_CSUM_TX (1ULL << 1) |
| @@ -1893,6 +1956,12 @@ enum bpf_adj_room_mode { | |||
| 1893 | BPF_ADJ_ROOM_NET, | 1956 | BPF_ADJ_ROOM_NET, |
| 1894 | }; | 1957 | }; |
| 1895 | 1958 | ||
| 1959 | /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */ | ||
| 1960 | enum bpf_hdr_start_off { | ||
| 1961 | BPF_HDR_START_MAC, | ||
| 1962 | BPF_HDR_START_NET, | ||
| 1963 | }; | ||
| 1964 | |||
| 1896 | /* user accessible mirror of in-kernel sk_buff. | 1965 | /* user accessible mirror of in-kernel sk_buff. |
| 1897 | * new fields can only be added to the end of this structure | 1966 | * new fields can only be added to the end of this structure |
| 1898 | */ | 1967 | */ |
diff --git a/tools/include/uapi/linux/erspan.h b/tools/include/uapi/linux/erspan.h new file mode 100644 index 000000000000..841573019ae1 --- /dev/null +++ b/tools/include/uapi/linux/erspan.h | |||
| @@ -0,0 +1,52 @@ | |||
| 1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ | ||
| 2 | /* | ||
| 3 | * ERSPAN Tunnel Metadata | ||
| 4 | * | ||
| 5 | * Copyright (c) 2018 VMware | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or modify | ||
| 8 | * it under the terms of the GNU General Public License version 2 | ||
| 9 | * as published by the Free Software Foundation. | ||
| 10 | * | ||
| 11 | * Userspace API for metadata mode ERSPAN tunnel | ||
| 12 | */ | ||
| 13 | #ifndef _UAPI_ERSPAN_H | ||
| 14 | #define _UAPI_ERSPAN_H | ||
| 15 | |||
| 16 | #include <linux/types.h> /* For __beXX in userspace */ | ||
| 17 | #include <asm/byteorder.h> | ||
| 18 | |||
| 19 | /* ERSPAN version 2 metadata header */ | ||
| 20 | struct erspan_md2 { | ||
| 21 | __be32 timestamp; | ||
| 22 | __be16 sgt; /* security group tag */ | ||
| 23 | #if defined(__LITTLE_ENDIAN_BITFIELD) | ||
| 24 | __u8 hwid_upper:2, | ||
| 25 | ft:5, | ||
| 26 | p:1; | ||
| 27 | __u8 o:1, | ||
| 28 | gra:2, | ||
| 29 | dir:1, | ||
| 30 | hwid:4; | ||
| 31 | #elif defined(__BIG_ENDIAN_BITFIELD) | ||
| 32 | __u8 p:1, | ||
| 33 | ft:5, | ||
| 34 | hwid_upper:2; | ||
| 35 | __u8 hwid:4, | ||
| 36 | dir:1, | ||
| 37 | gra:2, | ||
| 38 | o:1; | ||
| 39 | #else | ||
| 40 | #error "Please fix <asm/byteorder.h>" | ||
| 41 | #endif | ||
| 42 | }; | ||
| 43 | |||
| 44 | struct erspan_metadata { | ||
| 45 | int version; | ||
| 46 | union { | ||
| 47 | __be32 index; /* Version 1 (type II)*/ | ||
| 48 | struct erspan_md2 md2; /* Version 2 (type III) */ | ||
| 49 | } u; | ||
| 50 | }; | ||
| 51 | |||
| 52 | #endif /* _UAPI_ERSPAN_H */ | ||
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index b64a7a39cbc8..9d762184b805 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile | |||
| @@ -32,7 +32,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test | |||
| 32 | test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ | 32 | test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ |
| 33 | sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \ | 33 | sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \ |
| 34 | sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \ | 34 | sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \ |
| 35 | test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o | 35 | test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \ |
| 36 | test_get_stack_rawtp.o | ||
| 36 | 37 | ||
| 37 | # Order correspond to 'make run_tests' order | 38 | # Order correspond to 'make run_tests' order |
| 38 | TEST_PROGS := test_kmod.sh \ | 39 | TEST_PROGS := test_kmod.sh \ |
| @@ -58,6 +59,7 @@ $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c | |||
| 58 | $(OUTPUT)/test_sock: cgroup_helpers.c | 59 | $(OUTPUT)/test_sock: cgroup_helpers.c |
| 59 | $(OUTPUT)/test_sock_addr: cgroup_helpers.c | 60 | $(OUTPUT)/test_sock_addr: cgroup_helpers.c |
| 60 | $(OUTPUT)/test_sockmap: cgroup_helpers.c | 61 | $(OUTPUT)/test_sockmap: cgroup_helpers.c |
| 62 | $(OUTPUT)/test_progs: trace_helpers.c | ||
| 61 | 63 | ||
| 62 | .PHONY: force | 64 | .PHONY: force |
| 63 | 65 | ||
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 69d7b918e66a..265f8e0e8ada 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h | |||
| @@ -101,6 +101,8 @@ static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) = | |||
| 101 | static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state, | 101 | static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state, |
| 102 | int size, int flags) = | 102 | int size, int flags) = |
| 103 | (void *) BPF_FUNC_skb_get_xfrm_state; | 103 | (void *) BPF_FUNC_skb_get_xfrm_state; |
| 104 | static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) = | ||
| 105 | (void *) BPF_FUNC_get_stack; | ||
| 104 | 106 | ||
| 105 | /* llvm builtin functions that eBPF C program may use to | 107 | /* llvm builtin functions that eBPF C program may use to |
| 106 | * emit BPF_LD_ABS and BPF_LD_IND instructions | 108 | * emit BPF_LD_ABS and BPF_LD_IND instructions |
diff --git a/tools/testing/selftests/bpf/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/test_get_stack_rawtp.c new file mode 100644 index 000000000000..f6d9f238e00a --- /dev/null +++ b/tools/testing/selftests/bpf/test_get_stack_rawtp.c | |||
| @@ -0,0 +1,102 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | ||
| 2 | |||
| 3 | #include <linux/bpf.h> | ||
| 4 | #include "bpf_helpers.h" | ||
| 5 | |||
| 6 | /* Permit pretty deep stack traces */ | ||
| 7 | #define MAX_STACK_RAWTP 100 | ||
| 8 | struct stack_trace_t { | ||
| 9 | int pid; | ||
| 10 | int kern_stack_size; | ||
| 11 | int user_stack_size; | ||
| 12 | int user_stack_buildid_size; | ||
| 13 | __u64 kern_stack[MAX_STACK_RAWTP]; | ||
| 14 | __u64 user_stack[MAX_STACK_RAWTP]; | ||
| 15 | struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP]; | ||
| 16 | }; | ||
| 17 | |||
| 18 | struct bpf_map_def SEC("maps") perfmap = { | ||
| 19 | .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, | ||
| 20 | .key_size = sizeof(int), | ||
| 21 | .value_size = sizeof(__u32), | ||
| 22 | .max_entries = 2, | ||
| 23 | }; | ||
| 24 | |||
| 25 | struct bpf_map_def SEC("maps") stackdata_map = { | ||
| 26 | .type = BPF_MAP_TYPE_PERCPU_ARRAY, | ||
| 27 | .key_size = sizeof(__u32), | ||
| 28 | .value_size = sizeof(struct stack_trace_t), | ||
| 29 | .max_entries = 1, | ||
| 30 | }; | ||
| 31 | |||
| 32 | /* Allocate per-cpu space twice the needed. For the code below | ||
| 33 | * usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK); | ||
| 34 | * if (usize < 0) | ||
| 35 | * return 0; | ||
| 36 | * ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0); | ||
| 37 | * | ||
| 38 | * If we have value_size = MAX_STACK_RAWTP * sizeof(__u64), | ||
| 39 | * verifier will complain that access "raw_data + usize" | ||
| 40 | * with size "max_len - usize" may be out of bound. | ||
| 41 | * The maximum "raw_data + usize" is "raw_data + max_len" | ||
| 42 | * and the maximum "max_len - usize" is "max_len", verifier | ||
| 43 | * concludes that the maximum buffer access range is | ||
| 44 | * "raw_data[0...max_len * 2 - 1]" and hence reject the program. | ||
| 45 | * | ||
| 46 | * Doubling the to-be-used max buffer size can fix this verifier | ||
| 47 | * issue and avoid complicated C programming massaging. | ||
| 48 | * This is an acceptable workaround since there is one entry here. | ||
| 49 | */ | ||
| 50 | struct bpf_map_def SEC("maps") rawdata_map = { | ||
| 51 | .type = BPF_MAP_TYPE_PERCPU_ARRAY, | ||
| 52 | .key_size = sizeof(__u32), | ||
| 53 | .value_size = MAX_STACK_RAWTP * sizeof(__u64) * 2, | ||
| 54 | .max_entries = 1, | ||
| 55 | }; | ||
| 56 | |||
| 57 | SEC("tracepoint/raw_syscalls/sys_enter") | ||
| 58 | int bpf_prog1(void *ctx) | ||
| 59 | { | ||
| 60 | int max_len, max_buildid_len, usize, ksize, total_size; | ||
| 61 | struct stack_trace_t *data; | ||
| 62 | void *raw_data; | ||
| 63 | __u32 key = 0; | ||
| 64 | |||
| 65 | data = bpf_map_lookup_elem(&stackdata_map, &key); | ||
| 66 | if (!data) | ||
| 67 | return 0; | ||
| 68 | |||
| 69 | max_len = MAX_STACK_RAWTP * sizeof(__u64); | ||
| 70 | max_buildid_len = MAX_STACK_RAWTP * sizeof(struct bpf_stack_build_id); | ||
| 71 | data->pid = bpf_get_current_pid_tgid(); | ||
| 72 | data->kern_stack_size = bpf_get_stack(ctx, data->kern_stack, | ||
| 73 | max_len, 0); | ||
| 74 | data->user_stack_size = bpf_get_stack(ctx, data->user_stack, max_len, | ||
| 75 | BPF_F_USER_STACK); | ||
| 76 | data->user_stack_buildid_size = bpf_get_stack( | ||
| 77 | ctx, data->user_stack_buildid, max_buildid_len, | ||
| 78 | BPF_F_USER_STACK | BPF_F_USER_BUILD_ID); | ||
| 79 | bpf_perf_event_output(ctx, &perfmap, 0, data, sizeof(*data)); | ||
| 80 | |||
| 81 | /* write both kernel and user stacks to the same buffer */ | ||
| 82 | raw_data = bpf_map_lookup_elem(&rawdata_map, &key); | ||
| 83 | if (!raw_data) | ||
| 84 | return 0; | ||
| 85 | |||
| 86 | usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK); | ||
| 87 | if (usize < 0) | ||
| 88 | return 0; | ||
| 89 | |||
| 90 | ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0); | ||
| 91 | if (ksize < 0) | ||
| 92 | return 0; | ||
| 93 | |||
| 94 | total_size = usize + ksize; | ||
| 95 | if (total_size > 0 && total_size <= max_len) | ||
| 96 | bpf_perf_event_output(ctx, &perfmap, 0, raw_data, total_size); | ||
| 97 | |||
| 98 | return 0; | ||
| 99 | } | ||
| 100 | |||
| 101 | char _license[] SEC("license") = "GPL"; | ||
| 102 | __u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ | ||
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index fac581f1c57f..ed197eef1cfc 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c | |||
| @@ -38,8 +38,10 @@ typedef __u16 __sum16; | |||
| 38 | #include "bpf_util.h" | 38 | #include "bpf_util.h" |
| 39 | #include "bpf_endian.h" | 39 | #include "bpf_endian.h" |
| 40 | #include "bpf_rlimit.h" | 40 | #include "bpf_rlimit.h" |
| 41 | #include "trace_helpers.h" | ||
| 41 | 42 | ||
| 42 | static int error_cnt, pass_cnt; | 43 | static int error_cnt, pass_cnt; |
| 44 | static bool jit_enabled; | ||
| 43 | 45 | ||
| 44 | #define MAGIC_BYTES 123 | 46 | #define MAGIC_BYTES 123 |
| 45 | 47 | ||
| @@ -391,13 +393,30 @@ static inline __u64 ptr_to_u64(const void *ptr) | |||
| 391 | return (__u64) (unsigned long) ptr; | 393 | return (__u64) (unsigned long) ptr; |
| 392 | } | 394 | } |
| 393 | 395 | ||
| 396 | static bool is_jit_enabled(void) | ||
| 397 | { | ||
| 398 | const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable"; | ||
| 399 | bool enabled = false; | ||
| 400 | int sysctl_fd; | ||
| 401 | |||
| 402 | sysctl_fd = open(jit_sysctl, 0, O_RDONLY); | ||
| 403 | if (sysctl_fd != -1) { | ||
| 404 | char tmpc; | ||
| 405 | |||
| 406 | if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1) | ||
| 407 | enabled = (tmpc != '0'); | ||
| 408 | close(sysctl_fd); | ||
| 409 | } | ||
| 410 | |||
| 411 | return enabled; | ||
| 412 | } | ||
| 413 | |||
| 394 | static void test_bpf_obj_id(void) | 414 | static void test_bpf_obj_id(void) |
| 395 | { | 415 | { |
| 396 | const __u64 array_magic_value = 0xfaceb00c; | 416 | const __u64 array_magic_value = 0xfaceb00c; |
| 397 | const __u32 array_key = 0; | 417 | const __u32 array_key = 0; |
| 398 | const int nr_iters = 2; | 418 | const int nr_iters = 2; |
| 399 | const char *file = "./test_obj_id.o"; | 419 | const char *file = "./test_obj_id.o"; |
| 400 | const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable"; | ||
| 401 | const char *expected_prog_name = "test_obj_id"; | 420 | const char *expected_prog_name = "test_obj_id"; |
| 402 | const char *expected_map_name = "test_map_id"; | 421 | const char *expected_map_name = "test_map_id"; |
| 403 | const __u64 nsec_per_sec = 1000000000; | 422 | const __u64 nsec_per_sec = 1000000000; |
| @@ -414,20 +433,11 @@ static void test_bpf_obj_id(void) | |||
| 414 | char jited_insns[128], xlated_insns[128], zeros[128]; | 433 | char jited_insns[128], xlated_insns[128], zeros[128]; |
| 415 | __u32 i, next_id, info_len, nr_id_found, duration = 0; | 434 | __u32 i, next_id, info_len, nr_id_found, duration = 0; |
| 416 | struct timespec real_time_ts, boot_time_ts; | 435 | struct timespec real_time_ts, boot_time_ts; |
| 417 | int sysctl_fd, jit_enabled = 0, err = 0; | 436 | int err = 0; |
| 418 | __u64 array_value; | 437 | __u64 array_value; |
| 419 | uid_t my_uid = getuid(); | 438 | uid_t my_uid = getuid(); |
| 420 | time_t now, load_time; | 439 | time_t now, load_time; |
| 421 | 440 | ||
| 422 | sysctl_fd = open(jit_sysctl, 0, O_RDONLY); | ||
| 423 | if (sysctl_fd != -1) { | ||
| 424 | char tmpc; | ||
| 425 | |||
| 426 | if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1) | ||
| 427 | jit_enabled = (tmpc != '0'); | ||
| 428 | close(sysctl_fd); | ||
| 429 | } | ||
| 430 | |||
| 431 | err = bpf_prog_get_fd_by_id(0); | 441 | err = bpf_prog_get_fd_by_id(0); |
| 432 | CHECK(err >= 0 || errno != ENOENT, | 442 | CHECK(err >= 0 || errno != ENOENT, |
| 433 | "get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno); | 443 | "get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno); |
| @@ -896,11 +906,47 @@ static int compare_map_keys(int map1_fd, int map2_fd) | |||
| 896 | return 0; | 906 | return 0; |
| 897 | } | 907 | } |
| 898 | 908 | ||
| 909 | static int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len) | ||
| 910 | { | ||
| 911 | __u32 key, next_key, *cur_key_p, *next_key_p; | ||
| 912 | char *val_buf1, *val_buf2; | ||
| 913 | int i, err = 0; | ||
| 914 | |||
| 915 | val_buf1 = malloc(stack_trace_len); | ||
| 916 | val_buf2 = malloc(stack_trace_len); | ||
| 917 | cur_key_p = NULL; | ||
| 918 | next_key_p = &key; | ||
| 919 | while (bpf_map_get_next_key(smap_fd, cur_key_p, next_key_p) == 0) { | ||
| 920 | err = bpf_map_lookup_elem(smap_fd, next_key_p, val_buf1); | ||
| 921 | if (err) | ||
| 922 | goto out; | ||
| 923 | err = bpf_map_lookup_elem(amap_fd, next_key_p, val_buf2); | ||
| 924 | if (err) | ||
| 925 | goto out; | ||
| 926 | for (i = 0; i < stack_trace_len; i++) { | ||
| 927 | if (val_buf1[i] != val_buf2[i]) { | ||
| 928 | err = -1; | ||
| 929 | goto out; | ||
| 930 | } | ||
| 931 | } | ||
| 932 | key = *next_key_p; | ||
| 933 | cur_key_p = &key; | ||
| 934 | next_key_p = &next_key; | ||
| 935 | } | ||
| 936 | if (errno != ENOENT) | ||
| 937 | err = -1; | ||
| 938 | |||
| 939 | out: | ||
| 940 | free(val_buf1); | ||
| 941 | free(val_buf2); | ||
| 942 | return err; | ||
| 943 | } | ||
| 944 | |||
| 899 | static void test_stacktrace_map() | 945 | static void test_stacktrace_map() |
| 900 | { | 946 | { |
| 901 | int control_map_fd, stackid_hmap_fd, stackmap_fd; | 947 | int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; |
| 902 | const char *file = "./test_stacktrace_map.o"; | 948 | const char *file = "./test_stacktrace_map.o"; |
| 903 | int bytes, efd, err, pmu_fd, prog_fd; | 949 | int bytes, efd, err, pmu_fd, prog_fd, stack_trace_len; |
| 904 | struct perf_event_attr attr = {}; | 950 | struct perf_event_attr attr = {}; |
| 905 | __u32 key, val, duration = 0; | 951 | __u32 key, val, duration = 0; |
| 906 | struct bpf_object *obj; | 952 | struct bpf_object *obj; |
| @@ -956,6 +1002,10 @@ static void test_stacktrace_map() | |||
| 956 | if (stackmap_fd < 0) | 1002 | if (stackmap_fd < 0) |
| 957 | goto disable_pmu; | 1003 | goto disable_pmu; |
| 958 | 1004 | ||
| 1005 | stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap"); | ||
| 1006 | if (stack_amap_fd < 0) | ||
| 1007 | goto disable_pmu; | ||
| 1008 | |||
| 959 | /* give some time for bpf program run */ | 1009 | /* give some time for bpf program run */ |
| 960 | sleep(1); | 1010 | sleep(1); |
| 961 | 1011 | ||
| @@ -977,6 +1027,12 @@ static void test_stacktrace_map() | |||
| 977 | "err %d errno %d\n", err, errno)) | 1027 | "err %d errno %d\n", err, errno)) |
| 978 | goto disable_pmu_noerr; | 1028 | goto disable_pmu_noerr; |
| 979 | 1029 | ||
| 1030 | stack_trace_len = PERF_MAX_STACK_DEPTH * sizeof(__u64); | ||
| 1031 | err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len); | ||
| 1032 | if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap", | ||
| 1033 | "err %d errno %d\n", err, errno)) | ||
| 1034 | goto disable_pmu_noerr; | ||
| 1035 | |||
| 980 | goto disable_pmu_noerr; | 1036 | goto disable_pmu_noerr; |
| 981 | disable_pmu: | 1037 | disable_pmu: |
| 982 | error_cnt++; | 1038 | error_cnt++; |
| @@ -1070,9 +1126,9 @@ err: | |||
| 1070 | 1126 | ||
| 1071 | static void test_stacktrace_build_id(void) | 1127 | static void test_stacktrace_build_id(void) |
| 1072 | { | 1128 | { |
| 1073 | int control_map_fd, stackid_hmap_fd, stackmap_fd; | 1129 | int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; |
| 1074 | const char *file = "./test_stacktrace_build_id.o"; | 1130 | const char *file = "./test_stacktrace_build_id.o"; |
| 1075 | int bytes, efd, err, pmu_fd, prog_fd; | 1131 | int bytes, efd, err, pmu_fd, prog_fd, stack_trace_len; |
| 1076 | struct perf_event_attr attr = {}; | 1132 | struct perf_event_attr attr = {}; |
| 1077 | __u32 key, previous_key, val, duration = 0; | 1133 | __u32 key, previous_key, val, duration = 0; |
| 1078 | struct bpf_object *obj; | 1134 | struct bpf_object *obj; |
| @@ -1137,6 +1193,11 @@ static void test_stacktrace_build_id(void) | |||
| 1137 | err, errno)) | 1193 | err, errno)) |
| 1138 | goto disable_pmu; | 1194 | goto disable_pmu; |
| 1139 | 1195 | ||
| 1196 | stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap"); | ||
| 1197 | if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap", | ||
| 1198 | "err %d errno %d\n", err, errno)) | ||
| 1199 | goto disable_pmu; | ||
| 1200 | |||
| 1140 | assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null") | 1201 | assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null") |
| 1141 | == 0); | 1202 | == 0); |
| 1142 | assert(system("./urandom_read") == 0); | 1203 | assert(system("./urandom_read") == 0); |
| @@ -1188,8 +1249,15 @@ static void test_stacktrace_build_id(void) | |||
| 1188 | previous_key = key; | 1249 | previous_key = key; |
| 1189 | } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0); | 1250 | } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0); |
| 1190 | 1251 | ||
| 1191 | CHECK(build_id_matches < 1, "build id match", | 1252 | if (CHECK(build_id_matches < 1, "build id match", |
| 1192 | "Didn't find expected build ID from the map\n"); | 1253 | "Didn't find expected build ID from the map\n")) |
| 1254 | goto disable_pmu; | ||
| 1255 | |||
| 1256 | stack_trace_len = PERF_MAX_STACK_DEPTH | ||
| 1257 | * sizeof(struct bpf_stack_build_id); | ||
| 1258 | err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len); | ||
| 1259 | CHECK(err, "compare_stack_ips stackmap vs. stack_amap", | ||
| 1260 | "err %d errno %d\n", err, errno); | ||
| 1193 | 1261 | ||
| 1194 | disable_pmu: | 1262 | disable_pmu: |
| 1195 | ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); | 1263 | ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); |
| @@ -1204,8 +1272,147 @@ out: | |||
| 1204 | return; | 1272 | return; |
| 1205 | } | 1273 | } |
| 1206 | 1274 | ||
| 1275 | #define MAX_CNT_RAWTP 10ull | ||
| 1276 | #define MAX_STACK_RAWTP 100 | ||
| 1277 | struct get_stack_trace_t { | ||
| 1278 | int pid; | ||
| 1279 | int kern_stack_size; | ||
| 1280 | int user_stack_size; | ||
| 1281 | int user_stack_buildid_size; | ||
| 1282 | __u64 kern_stack[MAX_STACK_RAWTP]; | ||
| 1283 | __u64 user_stack[MAX_STACK_RAWTP]; | ||
| 1284 | struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP]; | ||
| 1285 | }; | ||
| 1286 | |||
| 1287 | static int get_stack_print_output(void *data, int size) | ||
| 1288 | { | ||
| 1289 | bool good_kern_stack = false, good_user_stack = false; | ||
| 1290 | const char *nonjit_func = "___bpf_prog_run"; | ||
| 1291 | struct get_stack_trace_t *e = data; | ||
| 1292 | int i, num_stack; | ||
| 1293 | static __u64 cnt; | ||
| 1294 | struct ksym *ks; | ||
| 1295 | |||
| 1296 | cnt++; | ||
| 1297 | |||
| 1298 | if (size < sizeof(struct get_stack_trace_t)) { | ||
| 1299 | __u64 *raw_data = data; | ||
| 1300 | bool found = false; | ||
| 1301 | |||
| 1302 | num_stack = size / sizeof(__u64); | ||
| 1303 | /* If jit is enabled, we do not have a good way to | ||
| 1304 | * verify the sanity of the kernel stack. So we | ||
| 1305 | * just assume it is good if the stack is not empty. | ||
| 1306 | * This could be improved in the future. | ||
| 1307 | */ | ||
| 1308 | if (jit_enabled) { | ||
| 1309 | found = num_stack > 0; | ||
| 1310 | } else { | ||
| 1311 | for (i = 0; i < num_stack; i++) { | ||
| 1312 | ks = ksym_search(raw_data[i]); | ||
| 1313 | if (strcmp(ks->name, nonjit_func) == 0) { | ||
| 1314 | found = true; | ||
| 1315 | break; | ||
| 1316 | } | ||
| 1317 | } | ||
| 1318 | } | ||
| 1319 | if (found) { | ||
| 1320 | good_kern_stack = true; | ||
| 1321 | good_user_stack = true; | ||
| 1322 | } | ||
| 1323 | } else { | ||
| 1324 | num_stack = e->kern_stack_size / sizeof(__u64); | ||
| 1325 | if (jit_enabled) { | ||
| 1326 | good_kern_stack = num_stack > 0; | ||
| 1327 | } else { | ||
| 1328 | for (i = 0; i < num_stack; i++) { | ||
| 1329 | ks = ksym_search(e->kern_stack[i]); | ||
| 1330 | if (strcmp(ks->name, nonjit_func) == 0) { | ||
| 1331 | good_kern_stack = true; | ||
| 1332 | break; | ||
| 1333 | } | ||
| 1334 | } | ||
| 1335 | } | ||
| 1336 | if (e->user_stack_size > 0 && e->user_stack_buildid_size > 0) | ||
| 1337 | good_user_stack = true; | ||
| 1338 | } | ||
| 1339 | if (!good_kern_stack || !good_user_stack) | ||
| 1340 | return PERF_EVENT_ERROR; | ||
| 1341 | |||
| 1342 | if (cnt == MAX_CNT_RAWTP) | ||
| 1343 | return PERF_EVENT_DONE; | ||
| 1344 | |||
| 1345 | return PERF_EVENT_CONT; | ||
| 1346 | } | ||
| 1347 | |||
| 1348 | static void test_get_stack_raw_tp(void) | ||
| 1349 | { | ||
| 1350 | const char *file = "./test_get_stack_rawtp.o"; | ||
| 1351 | int i, efd, err, prog_fd, pmu_fd, perfmap_fd; | ||
| 1352 | struct perf_event_attr attr = {}; | ||
| 1353 | struct timespec tv = {0, 10}; | ||
| 1354 | __u32 key = 0, duration = 0; | ||
| 1355 | struct bpf_object *obj; | ||
| 1356 | |||
| 1357 | err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); | ||
| 1358 | if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno)) | ||
| 1359 | return; | ||
| 1360 | |||
| 1361 | efd = bpf_raw_tracepoint_open("sys_enter", prog_fd); | ||
| 1362 | if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno)) | ||
| 1363 | goto close_prog; | ||
| 1364 | |||
| 1365 | perfmap_fd = bpf_find_map(__func__, obj, "perfmap"); | ||
| 1366 | if (CHECK(perfmap_fd < 0, "bpf_find_map", "err %d errno %d\n", | ||
| 1367 | perfmap_fd, errno)) | ||
| 1368 | goto close_prog; | ||
| 1369 | |||
| 1370 | err = load_kallsyms(); | ||
| 1371 | if (CHECK(err < 0, "load_kallsyms", "err %d errno %d\n", err, errno)) | ||
| 1372 | goto close_prog; | ||
| 1373 | |||
| 1374 | attr.sample_type = PERF_SAMPLE_RAW; | ||
| 1375 | attr.type = PERF_TYPE_SOFTWARE; | ||
| 1376 | attr.config = PERF_COUNT_SW_BPF_OUTPUT; | ||
| 1377 | pmu_fd = syscall(__NR_perf_event_open, &attr, getpid()/*pid*/, -1/*cpu*/, | ||
| 1378 | -1/*group_fd*/, 0); | ||
| 1379 | if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd, | ||
| 1380 | errno)) | ||
| 1381 | goto close_prog; | ||
| 1382 | |||
| 1383 | err = bpf_map_update_elem(perfmap_fd, &key, &pmu_fd, BPF_ANY); | ||
| 1384 | if (CHECK(err < 0, "bpf_map_update_elem", "err %d errno %d\n", err, | ||
| 1385 | errno)) | ||
| 1386 | goto close_prog; | ||
| 1387 | |||
| 1388 | err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); | ||
| 1389 | if (CHECK(err < 0, "ioctl PERF_EVENT_IOC_ENABLE", "err %d errno %d\n", | ||
| 1390 | err, errno)) | ||
| 1391 | goto close_prog; | ||
| 1392 | |||
| 1393 | err = perf_event_mmap(pmu_fd); | ||
| 1394 | if (CHECK(err < 0, "perf_event_mmap", "err %d errno %d\n", err, errno)) | ||
| 1395 | goto close_prog; | ||
| 1396 | |||
| 1397 | /* trigger some syscall action */ | ||
| 1398 | for (i = 0; i < MAX_CNT_RAWTP; i++) | ||
| 1399 | nanosleep(&tv, NULL); | ||
| 1400 | |||
| 1401 | err = perf_event_poller(pmu_fd, get_stack_print_output); | ||
| 1402 | if (CHECK(err < 0, "perf_event_poller", "err %d errno %d\n", err, errno)) | ||
| 1403 | goto close_prog; | ||
| 1404 | |||
| 1405 | goto close_prog_noerr; | ||
| 1406 | close_prog: | ||
| 1407 | error_cnt++; | ||
| 1408 | close_prog_noerr: | ||
| 1409 | bpf_object__close(obj); | ||
| 1410 | } | ||
| 1411 | |||
| 1207 | int main(void) | 1412 | int main(void) |
| 1208 | { | 1413 | { |
| 1414 | jit_enabled = is_jit_enabled(); | ||
| 1415 | |||
| 1209 | test_pkt_access(); | 1416 | test_pkt_access(); |
| 1210 | test_xdp(); | 1417 | test_xdp(); |
| 1211 | test_xdp_adjust_tail(); | 1418 | test_xdp_adjust_tail(); |
| @@ -1219,6 +1426,7 @@ int main(void) | |||
| 1219 | test_stacktrace_map(); | 1426 | test_stacktrace_map(); |
| 1220 | test_stacktrace_build_id(); | 1427 | test_stacktrace_build_id(); |
| 1221 | test_stacktrace_map_raw_tp(); | 1428 | test_stacktrace_map_raw_tp(); |
| 1429 | test_get_stack_raw_tp(); | ||
| 1222 | 1430 | ||
| 1223 | printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); | 1431 | printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); |
| 1224 | return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; | 1432 | return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; |
diff --git a/tools/testing/selftests/bpf/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/test_stacktrace_build_id.c index b755bd783ce5..d86c281e957f 100644 --- a/tools/testing/selftests/bpf/test_stacktrace_build_id.c +++ b/tools/testing/selftests/bpf/test_stacktrace_build_id.c | |||
| @@ -19,7 +19,7 @@ struct bpf_map_def SEC("maps") stackid_hmap = { | |||
| 19 | .type = BPF_MAP_TYPE_HASH, | 19 | .type = BPF_MAP_TYPE_HASH, |
| 20 | .key_size = sizeof(__u32), | 20 | .key_size = sizeof(__u32), |
| 21 | .value_size = sizeof(__u32), | 21 | .value_size = sizeof(__u32), |
| 22 | .max_entries = 10000, | 22 | .max_entries = 16384, |
| 23 | }; | 23 | }; |
| 24 | 24 | ||
| 25 | struct bpf_map_def SEC("maps") stackmap = { | 25 | struct bpf_map_def SEC("maps") stackmap = { |
| @@ -31,6 +31,14 @@ struct bpf_map_def SEC("maps") stackmap = { | |||
| 31 | .map_flags = BPF_F_STACK_BUILD_ID, | 31 | .map_flags = BPF_F_STACK_BUILD_ID, |
| 32 | }; | 32 | }; |
| 33 | 33 | ||
| 34 | struct bpf_map_def SEC("maps") stack_amap = { | ||
| 35 | .type = BPF_MAP_TYPE_ARRAY, | ||
| 36 | .key_size = sizeof(__u32), | ||
| 37 | .value_size = sizeof(struct bpf_stack_build_id) | ||
| 38 | * PERF_MAX_STACK_DEPTH, | ||
| 39 | .max_entries = 128, | ||
| 40 | }; | ||
| 41 | |||
| 34 | /* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */ | 42 | /* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */ |
| 35 | struct random_urandom_args { | 43 | struct random_urandom_args { |
| 36 | unsigned long long pad; | 44 | unsigned long long pad; |
| @@ -42,7 +50,10 @@ struct random_urandom_args { | |||
| 42 | SEC("tracepoint/random/urandom_read") | 50 | SEC("tracepoint/random/urandom_read") |
| 43 | int oncpu(struct random_urandom_args *args) | 51 | int oncpu(struct random_urandom_args *args) |
| 44 | { | 52 | { |
| 53 | __u32 max_len = sizeof(struct bpf_stack_build_id) | ||
| 54 | * PERF_MAX_STACK_DEPTH; | ||
| 45 | __u32 key = 0, val = 0, *value_p; | 55 | __u32 key = 0, val = 0, *value_p; |
| 56 | void *stack_p; | ||
| 46 | 57 | ||
| 47 | value_p = bpf_map_lookup_elem(&control_map, &key); | 58 | value_p = bpf_map_lookup_elem(&control_map, &key); |
| 48 | if (value_p && *value_p) | 59 | if (value_p && *value_p) |
| @@ -50,8 +61,13 @@ int oncpu(struct random_urandom_args *args) | |||
| 50 | 61 | ||
| 51 | /* The size of stackmap and stackid_hmap should be the same */ | 62 | /* The size of stackmap and stackid_hmap should be the same */ |
| 52 | key = bpf_get_stackid(args, &stackmap, BPF_F_USER_STACK); | 63 | key = bpf_get_stackid(args, &stackmap, BPF_F_USER_STACK); |
| 53 | if ((int)key >= 0) | 64 | if ((int)key >= 0) { |
| 54 | bpf_map_update_elem(&stackid_hmap, &key, &val, 0); | 65 | bpf_map_update_elem(&stackid_hmap, &key, &val, 0); |
| 66 | stack_p = bpf_map_lookup_elem(&stack_amap, &key); | ||
| 67 | if (stack_p) | ||
| 68 | bpf_get_stack(args, stack_p, max_len, | ||
| 69 | BPF_F_USER_STACK | BPF_F_USER_BUILD_ID); | ||
| 70 | } | ||
| 55 | 71 | ||
| 56 | return 0; | 72 | return 0; |
| 57 | } | 73 | } |
diff --git a/tools/testing/selftests/bpf/test_stacktrace_map.c b/tools/testing/selftests/bpf/test_stacktrace_map.c index 76d85c5d08bd..af111af7ca1a 100644 --- a/tools/testing/selftests/bpf/test_stacktrace_map.c +++ b/tools/testing/selftests/bpf/test_stacktrace_map.c | |||
| @@ -19,14 +19,21 @@ struct bpf_map_def SEC("maps") stackid_hmap = { | |||
| 19 | .type = BPF_MAP_TYPE_HASH, | 19 | .type = BPF_MAP_TYPE_HASH, |
| 20 | .key_size = sizeof(__u32), | 20 | .key_size = sizeof(__u32), |
| 21 | .value_size = sizeof(__u32), | 21 | .value_size = sizeof(__u32), |
| 22 | .max_entries = 10000, | 22 | .max_entries = 16384, |
| 23 | }; | 23 | }; |
| 24 | 24 | ||
| 25 | struct bpf_map_def SEC("maps") stackmap = { | 25 | struct bpf_map_def SEC("maps") stackmap = { |
| 26 | .type = BPF_MAP_TYPE_STACK_TRACE, | 26 | .type = BPF_MAP_TYPE_STACK_TRACE, |
| 27 | .key_size = sizeof(__u32), | 27 | .key_size = sizeof(__u32), |
| 28 | .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH, | 28 | .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH, |
| 29 | .max_entries = 10000, | 29 | .max_entries = 16384, |
| 30 | }; | ||
| 31 | |||
| 32 | struct bpf_map_def SEC("maps") stack_amap = { | ||
| 33 | .type = BPF_MAP_TYPE_ARRAY, | ||
| 34 | .key_size = sizeof(__u32), | ||
| 35 | .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH, | ||
| 36 | .max_entries = 16384, | ||
| 30 | }; | 37 | }; |
| 31 | 38 | ||
| 32 | /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ | 39 | /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ |
| @@ -44,7 +51,9 @@ struct sched_switch_args { | |||
| 44 | SEC("tracepoint/sched/sched_switch") | 51 | SEC("tracepoint/sched/sched_switch") |
| 45 | int oncpu(struct sched_switch_args *ctx) | 52 | int oncpu(struct sched_switch_args *ctx) |
| 46 | { | 53 | { |
| 54 | __u32 max_len = PERF_MAX_STACK_DEPTH * sizeof(__u64); | ||
| 47 | __u32 key = 0, val = 0, *value_p; | 55 | __u32 key = 0, val = 0, *value_p; |
| 56 | void *stack_p; | ||
| 48 | 57 | ||
| 49 | value_p = bpf_map_lookup_elem(&control_map, &key); | 58 | value_p = bpf_map_lookup_elem(&control_map, &key); |
| 50 | if (value_p && *value_p) | 59 | if (value_p && *value_p) |
| @@ -52,8 +61,12 @@ int oncpu(struct sched_switch_args *ctx) | |||
| 52 | 61 | ||
| 53 | /* The size of stackmap and stackid_hmap should be the same */ | 62 | /* The size of stackmap and stackid_hmap should be the same */ |
| 54 | key = bpf_get_stackid(ctx, &stackmap, 0); | 63 | key = bpf_get_stackid(ctx, &stackmap, 0); |
| 55 | if ((int)key >= 0) | 64 | if ((int)key >= 0) { |
| 56 | bpf_map_update_elem(&stackid_hmap, &key, &val, 0); | 65 | bpf_map_update_elem(&stackid_hmap, &key, &val, 0); |
| 66 | stack_p = bpf_map_lookup_elem(&stack_amap, &key); | ||
| 67 | if (stack_p) | ||
| 68 | bpf_get_stack(ctx, stack_p, max_len, 0); | ||
| 69 | } | ||
| 57 | 70 | ||
| 58 | return 0; | 71 | return 0; |
| 59 | } | 72 | } |
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 165e9ddfa446..275b4570b5b8 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c | |||
| @@ -47,7 +47,7 @@ | |||
| 47 | # define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) | 47 | # define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) |
| 48 | #endif | 48 | #endif |
| 49 | 49 | ||
| 50 | #define MAX_INSNS 512 | 50 | #define MAX_INSNS BPF_MAXINSNS |
| 51 | #define MAX_FIXUPS 8 | 51 | #define MAX_FIXUPS 8 |
| 52 | #define MAX_NR_MAPS 4 | 52 | #define MAX_NR_MAPS 4 |
| 53 | #define POINTER_VALUE 0xcafe4all | 53 | #define POINTER_VALUE 0xcafe4all |
| @@ -77,6 +77,8 @@ struct bpf_test { | |||
| 77 | } result, result_unpriv; | 77 | } result, result_unpriv; |
| 78 | enum bpf_prog_type prog_type; | 78 | enum bpf_prog_type prog_type; |
| 79 | uint8_t flags; | 79 | uint8_t flags; |
| 80 | __u8 data[TEST_DATA_LEN]; | ||
| 81 | void (*fill_helper)(struct bpf_test *self); | ||
| 80 | }; | 82 | }; |
| 81 | 83 | ||
| 82 | /* Note we want this to be 64 bit aligned so that the end of our array is | 84 | /* Note we want this to be 64 bit aligned so that the end of our array is |
| @@ -94,6 +96,62 @@ struct other_val { | |||
| 94 | long long bar; | 96 | long long bar; |
| 95 | }; | 97 | }; |
| 96 | 98 | ||
| 99 | static void bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self) | ||
| 100 | { | ||
| 101 | /* test: {skb->data[0], vlan_push} x 68 + {skb->data[0], vlan_pop} x 68 */ | ||
| 102 | #define PUSH_CNT 51 | ||
| 103 | unsigned int len = BPF_MAXINSNS; | ||
| 104 | struct bpf_insn *insn = self->insns; | ||
| 105 | int i = 0, j, k = 0; | ||
| 106 | |||
| 107 | insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1); | ||
| 108 | loop: | ||
| 109 | for (j = 0; j < PUSH_CNT; j++) { | ||
| 110 | insn[i++] = BPF_LD_ABS(BPF_B, 0); | ||
| 111 | insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 2); | ||
| 112 | i++; | ||
| 113 | insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6); | ||
| 114 | insn[i++] = BPF_MOV64_IMM(BPF_REG_2, 1); | ||
| 115 | insn[i++] = BPF_MOV64_IMM(BPF_REG_3, 2); | ||
| 116 | insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
| 117 | BPF_FUNC_skb_vlan_push), | ||
| 118 | insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 2); | ||
| 119 | i++; | ||
| 120 | } | ||
| 121 | |||
| 122 | for (j = 0; j < PUSH_CNT; j++) { | ||
| 123 | insn[i++] = BPF_LD_ABS(BPF_B, 0); | ||
| 124 | insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 2); | ||
| 125 | i++; | ||
| 126 | insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6); | ||
| 127 | insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
| 128 | BPF_FUNC_skb_vlan_pop), | ||
| 129 | insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 2); | ||
| 130 | i++; | ||
| 131 | } | ||
| 132 | if (++k < 5) | ||
| 133 | goto loop; | ||
| 134 | |||
| 135 | for (; i < len - 1; i++) | ||
| 136 | insn[i] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 0xbef); | ||
| 137 | insn[len - 1] = BPF_EXIT_INSN(); | ||
| 138 | } | ||
| 139 | |||
| 140 | static void bpf_fill_jump_around_ld_abs(struct bpf_test *self) | ||
| 141 | { | ||
| 142 | struct bpf_insn *insn = self->insns; | ||
| 143 | unsigned int len = BPF_MAXINSNS; | ||
| 144 | int i = 0; | ||
| 145 | |||
| 146 | insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1); | ||
| 147 | insn[i++] = BPF_LD_ABS(BPF_B, 0); | ||
| 148 | insn[i] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 10, len - i - 2); | ||
| 149 | i++; | ||
| 150 | while (i < len - 1) | ||
| 151 | insn[i++] = BPF_LD_ABS(BPF_B, 1); | ||
| 152 | insn[i] = BPF_EXIT_INSN(); | ||
| 153 | } | ||
| 154 | |||
| 97 | static struct bpf_test tests[] = { | 155 | static struct bpf_test tests[] = { |
| 98 | { | 156 | { |
| 99 | "add+sub+mul", | 157 | "add+sub+mul", |
| @@ -11680,6 +11738,242 @@ static struct bpf_test tests[] = { | |||
| 11680 | .errstr = "BPF_XADD stores into R2 packet", | 11738 | .errstr = "BPF_XADD stores into R2 packet", |
| 11681 | .prog_type = BPF_PROG_TYPE_XDP, | 11739 | .prog_type = BPF_PROG_TYPE_XDP, |
| 11682 | }, | 11740 | }, |
| 11741 | { | ||
| 11742 | "bpf_get_stack return R0 within range", | ||
| 11743 | .insns = { | ||
| 11744 | BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), | ||
| 11745 | BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), | ||
| 11746 | BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), | ||
| 11747 | BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), | ||
| 11748 | BPF_LD_MAP_FD(BPF_REG_1, 0), | ||
| 11749 | BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
| 11750 | BPF_FUNC_map_lookup_elem), | ||
| 11751 | BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 28), | ||
| 11752 | BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), | ||
| 11753 | BPF_MOV64_IMM(BPF_REG_9, sizeof(struct test_val)), | ||
| 11754 | BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), | ||
| 11755 | BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), | ||
| 11756 | BPF_MOV64_IMM(BPF_REG_3, sizeof(struct test_val)), | ||
| 11757 | BPF_MOV64_IMM(BPF_REG_4, 256), | ||
| 11758 | BPF_EMIT_CALL(BPF_FUNC_get_stack), | ||
| 11759 | BPF_MOV64_IMM(BPF_REG_1, 0), | ||
| 11760 | BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), | ||
| 11761 | BPF_ALU64_IMM(BPF_LSH, BPF_REG_8, 32), | ||
| 11762 | BPF_ALU64_IMM(BPF_ARSH, BPF_REG_8, 32), | ||
| 11763 | BPF_JMP_REG(BPF_JSLT, BPF_REG_1, BPF_REG_8, 16), | ||
| 11764 | BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8), | ||
| 11765 | BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), | ||
| 11766 | BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_8), | ||
| 11767 | BPF_MOV64_REG(BPF_REG_1, BPF_REG_9), | ||
| 11768 | BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32), | ||
| 11769 | BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 32), | ||
| 11770 | BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), | ||
| 11771 | BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1), | ||
| 11772 | BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), | ||
| 11773 | BPF_MOV64_IMM(BPF_REG_5, sizeof(struct test_val)), | ||
| 11774 | BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_5), | ||
| 11775 | BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 4), | ||
| 11776 | BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), | ||
| 11777 | BPF_MOV64_REG(BPF_REG_3, BPF_REG_9), | ||
| 11778 | BPF_MOV64_IMM(BPF_REG_4, 0), | ||
| 11779 | BPF_EMIT_CALL(BPF_FUNC_get_stack), | ||
| 11780 | BPF_EXIT_INSN(), | ||
| 11781 | }, | ||
| 11782 | .fixup_map2 = { 4 }, | ||
| 11783 | .result = ACCEPT, | ||
| 11784 | .prog_type = BPF_PROG_TYPE_TRACEPOINT, | ||
| 11785 | }, | ||
| 11786 | { | ||
| 11787 | "ld_abs: invalid op 1", | ||
| 11788 | .insns = { | ||
| 11789 | BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), | ||
| 11790 | BPF_LD_ABS(BPF_DW, 0), | ||
| 11791 | BPF_EXIT_INSN(), | ||
| 11792 | }, | ||
| 11793 | .prog_type = BPF_PROG_TYPE_SCHED_CLS, | ||
| 11794 | .result = REJECT, | ||
| 11795 | .errstr = "unknown opcode", | ||
| 11796 | }, | ||
| 11797 | { | ||
| 11798 | "ld_abs: invalid op 2", | ||
| 11799 | .insns = { | ||
| 11800 | BPF_MOV32_IMM(BPF_REG_0, 256), | ||
| 11801 | BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), | ||
| 11802 | BPF_LD_IND(BPF_DW, BPF_REG_0, 0), | ||
| 11803 | BPF_EXIT_INSN(), | ||
| 11804 | }, | ||
| 11805 | .prog_type = BPF_PROG_TYPE_SCHED_CLS, | ||
| 11806 | .result = REJECT, | ||
| 11807 | .errstr = "unknown opcode", | ||
| 11808 | }, | ||
| 11809 | { | ||
| 11810 | "ld_abs: nmap reduced", | ||
| 11811 | .insns = { | ||
| 11812 | BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), | ||
| 11813 | BPF_LD_ABS(BPF_H, 12), | ||
| 11814 | BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 28), | ||
| 11815 | BPF_LD_ABS(BPF_H, 12), | ||
| 11816 | BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 26), | ||
| 11817 | BPF_MOV32_IMM(BPF_REG_0, 18), | ||
| 11818 | BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -64), | ||
| 11819 | BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -64), | ||
| 11820 | BPF_LD_IND(BPF_W, BPF_REG_7, 14), | ||
| 11821 | BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -60), | ||
| 11822 | BPF_MOV32_IMM(BPF_REG_0, 280971478), | ||
| 11823 | BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -56), | ||
| 11824 | BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -56), | ||
| 11825 | BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -60), | ||
| 11826 | BPF_ALU32_REG(BPF_SUB, BPF_REG_0, BPF_REG_7), | ||
| 11827 | BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 15), | ||
| 11828 | BPF_LD_ABS(BPF_H, 12), | ||
| 11829 | BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 13), | ||
| 11830 | BPF_MOV32_IMM(BPF_REG_0, 22), | ||
| 11831 | BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -56), | ||
| 11832 | BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -56), | ||
| 11833 | BPF_LD_IND(BPF_H, BPF_REG_7, 14), | ||
| 11834 | BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -52), | ||
| 11835 | BPF_MOV32_IMM(BPF_REG_0, 17366), | ||
| 11836 | BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -48), | ||
| 11837 | BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -48), | ||
| 11838 | BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -52), | ||
| 11839 | BPF_ALU32_REG(BPF_SUB, BPF_REG_0, BPF_REG_7), | ||
| 11840 | BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), | ||
| 11841 | BPF_MOV32_IMM(BPF_REG_0, 256), | ||
| 11842 | BPF_EXIT_INSN(), | ||
| 11843 | BPF_MOV32_IMM(BPF_REG_0, 0), | ||
| 11844 | BPF_EXIT_INSN(), | ||
| 11845 | }, | ||
| 11846 | .data = { | ||
| 11847 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x06, 0, | ||
| 11848 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 11849 | 0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6, | ||
| 11850 | }, | ||
| 11851 | .prog_type = BPF_PROG_TYPE_SCHED_CLS, | ||
| 11852 | .result = ACCEPT, | ||
| 11853 | .retval = 256, | ||
| 11854 | }, | ||
| 11855 | { | ||
| 11856 | "ld_abs: div + abs, test 1", | ||
| 11857 | .insns = { | ||
| 11858 | BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), | ||
| 11859 | BPF_LD_ABS(BPF_B, 3), | ||
| 11860 | BPF_ALU64_IMM(BPF_MOV, BPF_REG_2, 2), | ||
| 11861 | BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_2), | ||
| 11862 | BPF_ALU64_REG(BPF_MOV, BPF_REG_8, BPF_REG_0), | ||
| 11863 | BPF_LD_ABS(BPF_B, 4), | ||
| 11864 | BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0), | ||
| 11865 | BPF_LD_IND(BPF_B, BPF_REG_8, -70), | ||
| 11866 | BPF_EXIT_INSN(), | ||
| 11867 | }, | ||
| 11868 | .data = { | ||
| 11869 | 10, 20, 30, 40, 50, | ||
| 11870 | }, | ||
| 11871 | .prog_type = BPF_PROG_TYPE_SCHED_CLS, | ||
| 11872 | .result = ACCEPT, | ||
| 11873 | .retval = 10, | ||
| 11874 | }, | ||
| 11875 | { | ||
| 11876 | "ld_abs: div + abs, test 2", | ||
| 11877 | .insns = { | ||
| 11878 | BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), | ||
| 11879 | BPF_LD_ABS(BPF_B, 3), | ||
| 11880 | BPF_ALU64_IMM(BPF_MOV, BPF_REG_2, 2), | ||
| 11881 | BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_2), | ||
| 11882 | BPF_ALU64_REG(BPF_MOV, BPF_REG_8, BPF_REG_0), | ||
| 11883 | BPF_LD_ABS(BPF_B, 128), | ||
| 11884 | BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0), | ||
| 11885 | BPF_LD_IND(BPF_B, BPF_REG_8, -70), | ||
| 11886 | BPF_EXIT_INSN(), | ||
| 11887 | }, | ||
| 11888 | .data = { | ||
| 11889 | 10, 20, 30, 40, 50, | ||
| 11890 | }, | ||
| 11891 | .prog_type = BPF_PROG_TYPE_SCHED_CLS, | ||
| 11892 | .result = ACCEPT, | ||
| 11893 | .retval = 0, | ||
| 11894 | }, | ||
| 11895 | { | ||
| 11896 | "ld_abs: div + abs, test 3", | ||
| 11897 | .insns = { | ||
| 11898 | BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), | ||
| 11899 | BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0), | ||
| 11900 | BPF_LD_ABS(BPF_B, 3), | ||
| 11901 | BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_7), | ||
| 11902 | BPF_EXIT_INSN(), | ||
| 11903 | }, | ||
| 11904 | .data = { | ||
| 11905 | 10, 20, 30, 40, 50, | ||
| 11906 | }, | ||
| 11907 | .prog_type = BPF_PROG_TYPE_SCHED_CLS, | ||
| 11908 | .result = ACCEPT, | ||
| 11909 | .retval = 0, | ||
| 11910 | }, | ||
| 11911 | { | ||
| 11912 | "ld_abs: div + abs, test 4", | ||
| 11913 | .insns = { | ||
| 11914 | BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), | ||
| 11915 | BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0), | ||
| 11916 | BPF_LD_ABS(BPF_B, 256), | ||
| 11917 | BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_7), | ||
| 11918 | BPF_EXIT_INSN(), | ||
| 11919 | }, | ||
| 11920 | .data = { | ||
| 11921 | 10, 20, 30, 40, 50, | ||
| 11922 | }, | ||
| 11923 | .prog_type = BPF_PROG_TYPE_SCHED_CLS, | ||
| 11924 | .result = ACCEPT, | ||
| 11925 | .retval = 0, | ||
| 11926 | }, | ||
| 11927 | { | ||
| 11928 | "ld_abs: vlan + abs, test 1", | ||
| 11929 | .insns = { }, | ||
| 11930 | .data = { | ||
| 11931 | 0x34, | ||
| 11932 | }, | ||
| 11933 | .fill_helper = bpf_fill_ld_abs_vlan_push_pop, | ||
| 11934 | .prog_type = BPF_PROG_TYPE_SCHED_CLS, | ||
| 11935 | .result = ACCEPT, | ||
| 11936 | .retval = 0xbef, | ||
| 11937 | }, | ||
| 11938 | { | ||
| 11939 | "ld_abs: vlan + abs, test 2", | ||
| 11940 | .insns = { | ||
| 11941 | BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), | ||
| 11942 | BPF_LD_ABS(BPF_B, 0), | ||
| 11943 | BPF_LD_ABS(BPF_H, 0), | ||
| 11944 | BPF_LD_ABS(BPF_W, 0), | ||
| 11945 | BPF_MOV64_REG(BPF_REG_7, BPF_REG_6), | ||
| 11946 | BPF_MOV64_IMM(BPF_REG_6, 0), | ||
| 11947 | BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), | ||
| 11948 | BPF_MOV64_IMM(BPF_REG_2, 1), | ||
| 11949 | BPF_MOV64_IMM(BPF_REG_3, 2), | ||
| 11950 | BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
| 11951 | BPF_FUNC_skb_vlan_push), | ||
| 11952 | BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), | ||
| 11953 | BPF_LD_ABS(BPF_B, 0), | ||
| 11954 | BPF_LD_ABS(BPF_H, 0), | ||
| 11955 | BPF_LD_ABS(BPF_W, 0), | ||
| 11956 | BPF_MOV64_IMM(BPF_REG_0, 42), | ||
| 11957 | BPF_EXIT_INSN(), | ||
| 11958 | }, | ||
| 11959 | .data = { | ||
| 11960 | 0x34, | ||
| 11961 | }, | ||
| 11962 | .prog_type = BPF_PROG_TYPE_SCHED_CLS, | ||
| 11963 | .result = ACCEPT, | ||
| 11964 | .retval = 42, | ||
| 11965 | }, | ||
| 11966 | { | ||
| 11967 | "ld_abs: jump around ld_abs", | ||
| 11968 | .insns = { }, | ||
| 11969 | .data = { | ||
| 11970 | 10, 11, | ||
| 11971 | }, | ||
| 11972 | .fill_helper = bpf_fill_jump_around_ld_abs, | ||
| 11973 | .prog_type = BPF_PROG_TYPE_SCHED_CLS, | ||
| 11974 | .result = ACCEPT, | ||
| 11975 | .retval = 10, | ||
| 11976 | }, | ||
| 11683 | }; | 11977 | }; |
| 11684 | 11978 | ||
| 11685 | static int probe_filter_length(const struct bpf_insn *fp) | 11979 | static int probe_filter_length(const struct bpf_insn *fp) |
| @@ -11783,7 +12077,7 @@ static int create_map_in_map(void) | |||
| 11783 | return outer_map_fd; | 12077 | return outer_map_fd; |
| 11784 | } | 12078 | } |
| 11785 | 12079 | ||
| 11786 | static char bpf_vlog[32768]; | 12080 | static char bpf_vlog[UINT_MAX >> 8]; |
| 11787 | 12081 | ||
| 11788 | static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, | 12082 | static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, |
| 11789 | int *map_fds) | 12083 | int *map_fds) |
| @@ -11794,6 +12088,9 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, | |||
| 11794 | int *fixup_prog = test->fixup_prog; | 12088 | int *fixup_prog = test->fixup_prog; |
| 11795 | int *fixup_map_in_map = test->fixup_map_in_map; | 12089 | int *fixup_map_in_map = test->fixup_map_in_map; |
| 11796 | 12090 | ||
| 12091 | if (test->fill_helper) | ||
| 12092 | test->fill_helper(test); | ||
| 12093 | |||
| 11797 | /* Allocating HTs with 1 elem is fine here, since we only test | 12094 | /* Allocating HTs with 1 elem is fine here, since we only test |
| 11798 | * for verifier and not do a runtime lookup, so the only thing | 12095 | * for verifier and not do a runtime lookup, so the only thing |
| 11799 | * that really matters is value size in this case. | 12096 | * that really matters is value size in this case. |
| @@ -11843,10 +12140,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv, | |||
| 11843 | int *passes, int *errors) | 12140 | int *passes, int *errors) |
| 11844 | { | 12141 | { |
| 11845 | int fd_prog, expected_ret, reject_from_alignment; | 12142 | int fd_prog, expected_ret, reject_from_alignment; |
| 12143 | int prog_len, prog_type = test->prog_type; | ||
| 11846 | struct bpf_insn *prog = test->insns; | 12144 | struct bpf_insn *prog = test->insns; |
| 11847 | int prog_len = probe_filter_length(prog); | ||
| 11848 | char data_in[TEST_DATA_LEN] = {}; | ||
| 11849 | int prog_type = test->prog_type; | ||
| 11850 | int map_fds[MAX_NR_MAPS]; | 12145 | int map_fds[MAX_NR_MAPS]; |
| 11851 | const char *expected_err; | 12146 | const char *expected_err; |
| 11852 | uint32_t retval; | 12147 | uint32_t retval; |
| @@ -11856,6 +12151,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, | |||
| 11856 | map_fds[i] = -1; | 12151 | map_fds[i] = -1; |
| 11857 | 12152 | ||
| 11858 | do_test_fixup(test, prog, map_fds); | 12153 | do_test_fixup(test, prog, map_fds); |
| 12154 | prog_len = probe_filter_length(prog); | ||
| 11859 | 12155 | ||
| 11860 | fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, | 12156 | fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, |
| 11861 | prog, prog_len, test->flags & F_LOAD_WITH_STRICT_ALIGNMENT, | 12157 | prog, prog_len, test->flags & F_LOAD_WITH_STRICT_ALIGNMENT, |
| @@ -11895,8 +12191,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv, | |||
| 11895 | } | 12191 | } |
| 11896 | 12192 | ||
| 11897 | if (fd_prog >= 0) { | 12193 | if (fd_prog >= 0) { |
| 11898 | err = bpf_prog_test_run(fd_prog, 1, data_in, sizeof(data_in), | 12194 | err = bpf_prog_test_run(fd_prog, 1, test->data, |
| 11899 | NULL, NULL, &retval, NULL); | 12195 | sizeof(test->data), NULL, NULL, |
| 12196 | &retval, NULL); | ||
| 11900 | if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) { | 12197 | if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) { |
| 11901 | printf("Unexpected bpf_prog_test_run error\n"); | 12198 | printf("Unexpected bpf_prog_test_run error\n"); |
| 11902 | goto fail_log; | 12199 | goto fail_log; |
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c new file mode 100644 index 000000000000..ad025bd75f1c --- /dev/null +++ b/tools/testing/selftests/bpf/trace_helpers.c | |||
| @@ -0,0 +1,180 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | ||
| 2 | #include <stdio.h> | ||
| 3 | #include <stdlib.h> | ||
| 4 | #include <string.h> | ||
| 5 | #include <assert.h> | ||
| 6 | #include <errno.h> | ||
| 7 | #include <poll.h> | ||
| 8 | #include <unistd.h> | ||
| 9 | #include <linux/perf_event.h> | ||
| 10 | #include <sys/mman.h> | ||
| 11 | #include "trace_helpers.h" | ||
| 12 | |||
| 13 | #define MAX_SYMS 300000 | ||
| 14 | static struct ksym syms[MAX_SYMS]; | ||
| 15 | static int sym_cnt; | ||
| 16 | |||
| 17 | static int ksym_cmp(const void *p1, const void *p2) | ||
| 18 | { | ||
| 19 | return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr; | ||
| 20 | } | ||
| 21 | |||
| 22 | int load_kallsyms(void) | ||
| 23 | { | ||
| 24 | FILE *f = fopen("/proc/kallsyms", "r"); | ||
| 25 | char func[256], buf[256]; | ||
| 26 | char symbol; | ||
| 27 | void *addr; | ||
| 28 | int i = 0; | ||
| 29 | |||
| 30 | if (!f) | ||
| 31 | return -ENOENT; | ||
| 32 | |||
| 33 | while (!feof(f)) { | ||
| 34 | if (!fgets(buf, sizeof(buf), f)) | ||
| 35 | break; | ||
| 36 | if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3) | ||
| 37 | break; | ||
| 38 | if (!addr) | ||
| 39 | continue; | ||
| 40 | syms[i].addr = (long) addr; | ||
| 41 | syms[i].name = strdup(func); | ||
| 42 | i++; | ||
| 43 | } | ||
| 44 | sym_cnt = i; | ||
| 45 | qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp); | ||
| 46 | return 0; | ||
| 47 | } | ||
| 48 | |||
| 49 | struct ksym *ksym_search(long key) | ||
| 50 | { | ||
| 51 | int start = 0, end = sym_cnt; | ||
| 52 | int result; | ||
| 53 | |||
| 54 | while (start < end) { | ||
| 55 | size_t mid = start + (end - start) / 2; | ||
| 56 | |||
| 57 | result = key - syms[mid].addr; | ||
| 58 | if (result < 0) | ||
| 59 | end = mid; | ||
| 60 | else if (result > 0) | ||
| 61 | start = mid + 1; | ||
| 62 | else | ||
| 63 | return &syms[mid]; | ||
| 64 | } | ||
| 65 | |||
| 66 | if (start >= 1 && syms[start - 1].addr < key && | ||
| 67 | key < syms[start].addr) | ||
| 68 | /* valid ksym */ | ||
| 69 | return &syms[start - 1]; | ||
| 70 | |||
| 71 | /* out of range. return _stext */ | ||
| 72 | return &syms[0]; | ||
| 73 | } | ||
| 74 | |||
| 75 | static int page_size; | ||
| 76 | static int page_cnt = 8; | ||
| 77 | static volatile struct perf_event_mmap_page *header; | ||
| 78 | |||
| 79 | int perf_event_mmap(int fd) | ||
| 80 | { | ||
| 81 | void *base; | ||
| 82 | int mmap_size; | ||
| 83 | |||
| 84 | page_size = getpagesize(); | ||
| 85 | mmap_size = page_size * (page_cnt + 1); | ||
| 86 | |||
| 87 | base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); | ||
| 88 | if (base == MAP_FAILED) { | ||
| 89 | printf("mmap err\n"); | ||
| 90 | return -1; | ||
| 91 | } | ||
| 92 | |||
| 93 | header = base; | ||
| 94 | return 0; | ||
| 95 | } | ||
| 96 | |||
| 97 | static int perf_event_poll(int fd) | ||
| 98 | { | ||
| 99 | struct pollfd pfd = { .fd = fd, .events = POLLIN }; | ||
| 100 | |||
| 101 | return poll(&pfd, 1, 1000); | ||
| 102 | } | ||
| 103 | |||
| 104 | struct perf_event_sample { | ||
| 105 | struct perf_event_header header; | ||
| 106 | __u32 size; | ||
| 107 | char data[]; | ||
| 108 | }; | ||
| 109 | |||
| 110 | static int perf_event_read(perf_event_print_fn fn) | ||
| 111 | { | ||
| 112 | __u64 data_tail = header->data_tail; | ||
| 113 | __u64 data_head = header->data_head; | ||
| 114 | __u64 buffer_size = page_cnt * page_size; | ||
| 115 | void *base, *begin, *end; | ||
| 116 | char buf[256]; | ||
| 117 | int ret; | ||
| 118 | |||
| 119 | asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */ | ||
| 120 | if (data_head == data_tail) | ||
| 121 | return PERF_EVENT_CONT; | ||
| 122 | |||
| 123 | base = ((char *)header) + page_size; | ||
| 124 | |||
| 125 | begin = base + data_tail % buffer_size; | ||
| 126 | end = base + data_head % buffer_size; | ||
| 127 | |||
| 128 | while (begin != end) { | ||
| 129 | struct perf_event_sample *e; | ||
| 130 | |||
| 131 | e = begin; | ||
| 132 | if (begin + e->header.size > base + buffer_size) { | ||
| 133 | long len = base + buffer_size - begin; | ||
| 134 | |||
| 135 | assert(len < e->header.size); | ||
| 136 | memcpy(buf, begin, len); | ||
| 137 | memcpy(buf + len, base, e->header.size - len); | ||
| 138 | e = (void *) buf; | ||
| 139 | begin = base + e->header.size - len; | ||
| 140 | } else if (begin + e->header.size == base + buffer_size) { | ||
| 141 | begin = base; | ||
| 142 | } else { | ||
| 143 | begin += e->header.size; | ||
| 144 | } | ||
| 145 | |||
| 146 | if (e->header.type == PERF_RECORD_SAMPLE) { | ||
| 147 | ret = fn(e->data, e->size); | ||
| 148 | if (ret != PERF_EVENT_CONT) | ||
| 149 | return ret; | ||
| 150 | } else if (e->header.type == PERF_RECORD_LOST) { | ||
| 151 | struct { | ||
| 152 | struct perf_event_header header; | ||
| 153 | __u64 id; | ||
| 154 | __u64 lost; | ||
| 155 | } *lost = (void *) e; | ||
| 156 | printf("lost %lld events\n", lost->lost); | ||
| 157 | } else { | ||
| 158 | printf("unknown event type=%d size=%d\n", | ||
| 159 | e->header.type, e->header.size); | ||
| 160 | } | ||
| 161 | } | ||
| 162 | |||
| 163 | __sync_synchronize(); /* smp_mb() */ | ||
| 164 | header->data_tail = data_head; | ||
| 165 | return PERF_EVENT_CONT; | ||
| 166 | } | ||
| 167 | |||
| 168 | int perf_event_poller(int fd, perf_event_print_fn output_fn) | ||
| 169 | { | ||
| 170 | int ret; | ||
| 171 | |||
| 172 | for (;;) { | ||
| 173 | perf_event_poll(fd); | ||
| 174 | ret = perf_event_read(output_fn); | ||
| 175 | if (ret != PERF_EVENT_CONT) | ||
| 176 | return ret; | ||
| 177 | } | ||
| 178 | |||
| 179 | return PERF_EVENT_DONE; | ||
| 180 | } | ||
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h new file mode 100644 index 000000000000..fe3eefd21e86 --- /dev/null +++ b/tools/testing/selftests/bpf/trace_helpers.h | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
| 2 | #ifndef __TRACE_HELPER_H | ||
| 3 | #define __TRACE_HELPER_H | ||
| 4 | |||
| 5 | struct ksym { | ||
| 6 | long addr; | ||
| 7 | char *name; | ||
| 8 | }; | ||
| 9 | |||
| 10 | int load_kallsyms(void); | ||
| 11 | struct ksym *ksym_search(long key); | ||
| 12 | |||
| 13 | typedef int (*perf_event_print_fn)(void *data, int size); | ||
| 14 | |||
| 15 | /* return code for perf_event_print_fn */ | ||
| 16 | #define PERF_EVENT_DONE 0 | ||
| 17 | #define PERF_EVENT_ERROR -1 | ||
| 18 | #define PERF_EVENT_CONT -2 | ||
| 19 | |||
| 20 | int perf_event_mmap(int fd); | ||
| 21 | /* return PERF_EVENT_DONE or PERF_EVENT_ERROR */ | ||
| 22 | int perf_event_poller(int fd, perf_event_print_fn output_fn); | ||
| 23 | #endif | ||
