aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/fault-injection/fault-injection.txt68
-rw-r--r--arch/Kconfig2
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/include/asm/error-injection.h13
-rw-r--r--arch/x86/include/asm/kprobes.h4
-rw-r--r--arch/x86/kernel/kprobes/ftrace.c14
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/error-inject.c19
-rw-r--r--drivers/net/ethernet/netronome/nfp/Makefile1
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/cmsg.c446
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/fw.h103
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/jit.c168
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.c60
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.h96
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/offload.c106
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/verifier.c55
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_app.h9
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_asm.c58
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_asm.h4
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net.h12
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c7
-rw-r--r--fs/btrfs/disk-io.c4
-rw-r--r--fs/btrfs/free-space-cache.c4
-rw-r--r--include/asm-generic/error-injection.h35
-rw-r--r--include/asm-generic/vmlinux.lds.h14
-rw-r--r--include/linux/bpf.h76
-rw-r--r--include/linux/error-injection.h27
-rw-r--r--include/linux/kprobes.h1
-rw-r--r--include/linux/module.h7
-rw-r--r--include/linux/netdevice.h6
-rw-r--r--include/uapi/linux/bpf.h3
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/bpf/cpumap.c31
-rw-r--r--kernel/bpf/devmap.c8
-rw-r--r--kernel/bpf/disasm.h4
-rw-r--r--kernel/bpf/hashtab.c103
-rw-r--r--kernel/bpf/lpm_trie.c7
-rw-r--r--kernel/bpf/offload.c222
-rw-r--r--kernel/bpf/sockmap.c8
-rw-r--r--kernel/bpf/stackmap.c6
-rw-r--r--kernel/bpf/syscall.c71
-rw-r--r--kernel/bpf/verifier.c7
-rw-r--r--kernel/fail_function.c349
-rw-r--r--kernel/kprobes.c163
-rw-r--r--kernel/module.c8
-rw-r--r--kernel/trace/Kconfig4
-rw-r--r--kernel/trace/bpf_trace.c11
-rw-r--r--kernel/trace/trace_kprobe.c33
-rw-r--r--kernel/trace/trace_probe.h12
-rw-r--r--lib/Kconfig.debug14
-rw-r--r--lib/Makefile1
-rw-r--r--lib/error-inject.c242
-rw-r--r--net/core/filter.c7
-rw-r--r--samples/bpf/xdp_monitor_kern.c2
-rw-r--r--tools/bpf/bpftool/Makefile2
-rw-r--r--tools/bpf/bpftool/prog.c1
-rw-r--r--tools/build/feature/Makefile2
-rw-r--r--tools/include/uapi/linux/bpf.h1
-rw-r--r--tools/lib/bpf/Makefile20
-rw-r--r--tools/lib/bpf/libbpf.c2
60 files changed, 2365 insertions, 402 deletions
diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt
index 918972babcd8..f4a32463ca48 100644
--- a/Documentation/fault-injection/fault-injection.txt
+++ b/Documentation/fault-injection/fault-injection.txt
@@ -30,6 +30,12 @@ o fail_mmc_request
30 injects MMC data errors on devices permitted by setting 30 injects MMC data errors on devices permitted by setting
31 debugfs entries under /sys/kernel/debug/mmc0/fail_mmc_request 31 debugfs entries under /sys/kernel/debug/mmc0/fail_mmc_request
32 32
33o fail_function
34
35 injects error return on specific functions, which are marked by
36 ALLOW_ERROR_INJECTION() macro, by setting debugfs entries
37 under /sys/kernel/debug/fail_function. No boot option supported.
38
33Configure fault-injection capabilities behavior 39Configure fault-injection capabilities behavior
34----------------------------------------------- 40-----------------------------------------------
35 41
@@ -123,6 +129,29 @@ configuration of fault-injection capabilities.
123 default is 'N', setting it to 'Y' will disable failure injections 129 default is 'N', setting it to 'Y' will disable failure injections
124 when dealing with private (address space) futexes. 130 when dealing with private (address space) futexes.
125 131
132- /sys/kernel/debug/fail_function/inject:
133
134 Format: { 'function-name' | '!function-name' | '' }
135 specifies the target function of error injection by name.
136 If the function name leads '!' prefix, given function is
137 removed from injection list. If nothing specified ('')
138 injection list is cleared.
139
140- /sys/kernel/debug/fail_function/injectable:
141
142 (read only) shows error injectable functions and what type of
143 error values can be specified. The error type will be one of
144 below;
145 - NULL: retval must be 0.
146 - ERRNO: retval must be -1 to -MAX_ERRNO (-4096).
147 - ERR_NULL: retval must be 0 or -1 to -MAX_ERRNO (-4096).
148
149- /sys/kernel/debug/fail_function/<functiuon-name>/retval:
150
151 specifies the "error" return value to inject to the given
152 function for given function. This will be created when
153 user specifies new injection entry.
154
126o Boot option 155o Boot option
127 156
128In order to inject faults while debugfs is not available (early boot time), 157In order to inject faults while debugfs is not available (early boot time),
@@ -268,6 +297,45 @@ trap "echo 0 > /sys/kernel/debug/$FAILTYPE/probability" SIGINT SIGTERM EXIT
268echo "Injecting errors into the module $module... (interrupt to stop)" 297echo "Injecting errors into the module $module... (interrupt to stop)"
269sleep 1000000 298sleep 1000000
270 299
300------------------------------------------------------------------------------
301
302o Inject open_ctree error while btrfs mount
303
304#!/bin/bash
305
306rm -f testfile.img
307dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1
308DEVICE=$(losetup --show -f testfile.img)
309mkfs.btrfs -f $DEVICE
310mkdir -p tmpmnt
311
312FAILTYPE=fail_function
313FAILFUNC=open_ctree
314echo $FAILFUNC > /sys/kernel/debug/$FAILTYPE/inject
315echo -12 > /sys/kernel/debug/$FAILTYPE/$FAILFUNC/retval
316echo N > /sys/kernel/debug/$FAILTYPE/task-filter
317echo 100 > /sys/kernel/debug/$FAILTYPE/probability
318echo 0 > /sys/kernel/debug/$FAILTYPE/interval
319echo -1 > /sys/kernel/debug/$FAILTYPE/times
320echo 0 > /sys/kernel/debug/$FAILTYPE/space
321echo 1 > /sys/kernel/debug/$FAILTYPE/verbose
322
323mount -t btrfs $DEVICE tmpmnt
324if [ $? -ne 0 ]
325then
326 echo "SUCCESS!"
327else
328 echo "FAILED!"
329 umount tmpmnt
330fi
331
332echo > /sys/kernel/debug/$FAILTYPE/inject
333
334rmdir tmpmnt
335losetup -d $DEVICE
336rm testfile.img
337
338
271Tool to run command with failslab or fail_page_alloc 339Tool to run command with failslab or fail_page_alloc
272---------------------------------------------------- 340----------------------------------------------------
273In order to make it easier to accomplish the tasks mentioned above, we can use 341In order to make it easier to accomplish the tasks mentioned above, we can use
diff --git a/arch/Kconfig b/arch/Kconfig
index d3f4aaf9cb7a..97376accfb14 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -196,7 +196,7 @@ config HAVE_OPTPROBES
196config HAVE_KPROBES_ON_FTRACE 196config HAVE_KPROBES_ON_FTRACE
197 bool 197 bool
198 198
199config HAVE_KPROBE_OVERRIDE 199config HAVE_FUNCTION_ERROR_INJECTION
200 bool 200 bool
201 201
202config HAVE_NMI 202config HAVE_NMI
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 45dc6233f2b9..366b19cb79b7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -154,7 +154,7 @@ config X86
154 select HAVE_KERNEL_XZ 154 select HAVE_KERNEL_XZ
155 select HAVE_KPROBES 155 select HAVE_KPROBES
156 select HAVE_KPROBES_ON_FTRACE 156 select HAVE_KPROBES_ON_FTRACE
157 select HAVE_KPROBE_OVERRIDE 157 select HAVE_FUNCTION_ERROR_INJECTION
158 select HAVE_KRETPROBES 158 select HAVE_KRETPROBES
159 select HAVE_KVM 159 select HAVE_KVM
160 select HAVE_LIVEPATCH if X86_64 160 select HAVE_LIVEPATCH if X86_64
diff --git a/arch/x86/include/asm/error-injection.h b/arch/x86/include/asm/error-injection.h
new file mode 100644
index 000000000000..47b7a1296245
--- /dev/null
+++ b/arch/x86/include/asm/error-injection.h
@@ -0,0 +1,13 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _ASM_ERROR_INJECTION_H
3#define _ASM_ERROR_INJECTION_H
4
5#include <linux/compiler.h>
6#include <linux/linkage.h>
7#include <asm/ptrace.h>
8#include <asm-generic/error-injection.h>
9
10asmlinkage void just_return_func(void);
11void override_function_with_return(struct pt_regs *regs);
12
13#endif /* _ASM_ERROR_INJECTION_H */
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 36abb23a7a35..367d99cff426 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -67,9 +67,7 @@ extern const int kretprobe_blacklist_size;
67void arch_remove_kprobe(struct kprobe *p); 67void arch_remove_kprobe(struct kprobe *p);
68asmlinkage void kretprobe_trampoline(void); 68asmlinkage void kretprobe_trampoline(void);
69 69
70#ifdef CONFIG_KPROBES_ON_FTRACE 70extern void arch_kprobe_override_function(struct pt_regs *regs);
71extern void arch_ftrace_kprobe_override_function(struct pt_regs *regs);
72#endif
73 71
74/* Architecture specific copy of original instruction*/ 72/* Architecture specific copy of original instruction*/
75struct arch_specific_insn { 73struct arch_specific_insn {
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index 1ea748d682fd..8dc0161cec8f 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -97,17 +97,3 @@ int arch_prepare_kprobe_ftrace(struct kprobe *p)
97 p->ainsn.boostable = false; 97 p->ainsn.boostable = false;
98 return 0; 98 return 0;
99} 99}
100
101asmlinkage void override_func(void);
102asm(
103 ".type override_func, @function\n"
104 "override_func:\n"
105 " ret\n"
106 ".size override_func, .-override_func\n"
107);
108
109void arch_ftrace_kprobe_override_function(struct pt_regs *regs)
110{
111 regs->ip = (unsigned long)&override_func;
112}
113NOKPROBE_SYMBOL(arch_ftrace_kprobe_override_function);
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 7b181b61170e..171377b83be1 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -26,6 +26,7 @@ lib-y += memcpy_$(BITS).o
26lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o 26lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
27lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o 27lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
28lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o 28lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
29lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
29 30
30obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o 31obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
31 32
diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c
new file mode 100644
index 000000000000..7b881d03d0dd
--- /dev/null
+++ b/arch/x86/lib/error-inject.c
@@ -0,0 +1,19 @@
1// SPDX-License-Identifier: GPL-2.0
2
3#include <linux/error-injection.h>
4#include <linux/kprobes.h>
5
6asmlinkage void just_return_func(void);
7
8asm(
9 ".type just_return_func, @function\n"
10 "just_return_func:\n"
11 " ret\n"
12 ".size just_return_func, .-just_return_func\n"
13);
14
15void override_function_with_return(struct pt_regs *regs)
16{
17 regs->ip = (unsigned long)&just_return_func;
18}
19NOKPROBE_SYMBOL(override_function_with_return);
diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile
index 6e5ef984398b..064f00e23a19 100644
--- a/drivers/net/ethernet/netronome/nfp/Makefile
+++ b/drivers/net/ethernet/netronome/nfp/Makefile
@@ -44,6 +44,7 @@ endif
44 44
45ifeq ($(CONFIG_BPF_SYSCALL),y) 45ifeq ($(CONFIG_BPF_SYSCALL),y)
46nfp-objs += \ 46nfp-objs += \
47 bpf/cmsg.o \
47 bpf/main.o \ 48 bpf/main.o \
48 bpf/offload.o \ 49 bpf/offload.o \
49 bpf/verifier.o \ 50 bpf/verifier.o \
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
new file mode 100644
index 000000000000..71e6586acc36
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
@@ -0,0 +1,446 @@
1/*
2 * Copyright (C) 2017 Netronome Systems, Inc.
3 *
4 * This software is dual licensed under the GNU General License Version 2,
5 * June 1991 as shown in the file COPYING in the top-level directory of this
6 * source tree or the BSD 2-Clause License provided below. You have the
7 * option to license this software under the complete terms of either license.
8 *
9 * The BSD 2-Clause License:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * 1. Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * 2. Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/bpf.h>
35#include <linux/bitops.h>
36#include <linux/bug.h>
37#include <linux/jiffies.h>
38#include <linux/skbuff.h>
39#include <linux/wait.h>
40
41#include "../nfp_app.h"
42#include "../nfp_net.h"
43#include "fw.h"
44#include "main.h"
45
46#define cmsg_warn(bpf, msg...) nn_dp_warn(&(bpf)->app->ctrl->dp, msg)
47
48#define NFP_BPF_TAG_ALLOC_SPAN (U16_MAX / 4)
49
50static bool nfp_bpf_all_tags_busy(struct nfp_app_bpf *bpf)
51{
52 u16 used_tags;
53
54 used_tags = bpf->tag_alloc_next - bpf->tag_alloc_last;
55
56 return used_tags > NFP_BPF_TAG_ALLOC_SPAN;
57}
58
59static int nfp_bpf_alloc_tag(struct nfp_app_bpf *bpf)
60{
61 /* All FW communication for BPF is request-reply. To make sure we
62 * don't reuse the message ID too early after timeout - limit the
63 * number of requests in flight.
64 */
65 if (nfp_bpf_all_tags_busy(bpf)) {
66 cmsg_warn(bpf, "all FW request contexts busy!\n");
67 return -EAGAIN;
68 }
69
70 WARN_ON(__test_and_set_bit(bpf->tag_alloc_next, bpf->tag_allocator));
71 return bpf->tag_alloc_next++;
72}
73
74static void nfp_bpf_free_tag(struct nfp_app_bpf *bpf, u16 tag)
75{
76 WARN_ON(!__test_and_clear_bit(tag, bpf->tag_allocator));
77
78 while (!test_bit(bpf->tag_alloc_last, bpf->tag_allocator) &&
79 bpf->tag_alloc_last != bpf->tag_alloc_next)
80 bpf->tag_alloc_last++;
81}
82
83static struct sk_buff *
84nfp_bpf_cmsg_alloc(struct nfp_app_bpf *bpf, unsigned int size)
85{
86 struct sk_buff *skb;
87
88 skb = nfp_app_ctrl_msg_alloc(bpf->app, size, GFP_KERNEL);
89 skb_put(skb, size);
90
91 return skb;
92}
93
94static struct sk_buff *
95nfp_bpf_cmsg_map_req_alloc(struct nfp_app_bpf *bpf, unsigned int n)
96{
97 unsigned int size;
98
99 size = sizeof(struct cmsg_req_map_op);
100 size += sizeof(struct cmsg_key_value_pair) * n;
101
102 return nfp_bpf_cmsg_alloc(bpf, size);
103}
104
105static unsigned int nfp_bpf_cmsg_get_tag(struct sk_buff *skb)
106{
107 struct cmsg_hdr *hdr;
108
109 hdr = (struct cmsg_hdr *)skb->data;
110
111 return be16_to_cpu(hdr->tag);
112}
113
114static struct sk_buff *__nfp_bpf_reply(struct nfp_app_bpf *bpf, u16 tag)
115{
116 unsigned int msg_tag;
117 struct sk_buff *skb;
118
119 skb_queue_walk(&bpf->cmsg_replies, skb) {
120 msg_tag = nfp_bpf_cmsg_get_tag(skb);
121 if (msg_tag == tag) {
122 nfp_bpf_free_tag(bpf, tag);
123 __skb_unlink(skb, &bpf->cmsg_replies);
124 return skb;
125 }
126 }
127
128 return NULL;
129}
130
131static struct sk_buff *nfp_bpf_reply(struct nfp_app_bpf *bpf, u16 tag)
132{
133 struct sk_buff *skb;
134
135 nfp_ctrl_lock(bpf->app->ctrl);
136 skb = __nfp_bpf_reply(bpf, tag);
137 nfp_ctrl_unlock(bpf->app->ctrl);
138
139 return skb;
140}
141
142static struct sk_buff *nfp_bpf_reply_drop_tag(struct nfp_app_bpf *bpf, u16 tag)
143{
144 struct sk_buff *skb;
145
146 nfp_ctrl_lock(bpf->app->ctrl);
147 skb = __nfp_bpf_reply(bpf, tag);
148 if (!skb)
149 nfp_bpf_free_tag(bpf, tag);
150 nfp_ctrl_unlock(bpf->app->ctrl);
151
152 return skb;
153}
154
155static struct sk_buff *
156nfp_bpf_cmsg_wait_reply(struct nfp_app_bpf *bpf, enum nfp_bpf_cmsg_type type,
157 int tag)
158{
159 struct sk_buff *skb;
160 int err;
161
162 err = wait_event_interruptible_timeout(bpf->cmsg_wq,
163 skb = nfp_bpf_reply(bpf, tag),
164 msecs_to_jiffies(5000));
165 /* We didn't get a response - try last time and atomically drop
166 * the tag even if no response is matched.
167 */
168 if (!skb)
169 skb = nfp_bpf_reply_drop_tag(bpf, tag);
170 if (err < 0) {
171 cmsg_warn(bpf, "%s waiting for response to 0x%02x: %d\n",
172 err == ERESTARTSYS ? "interrupted" : "error",
173 type, err);
174 return ERR_PTR(err);
175 }
176 if (!skb) {
177 cmsg_warn(bpf, "timeout waiting for response to 0x%02x\n",
178 type);
179 return ERR_PTR(-ETIMEDOUT);
180 }
181
182 return skb;
183}
184
185static struct sk_buff *
186nfp_bpf_cmsg_communicate(struct nfp_app_bpf *bpf, struct sk_buff *skb,
187 enum nfp_bpf_cmsg_type type, unsigned int reply_size)
188{
189 struct cmsg_hdr *hdr;
190 int tag;
191
192 nfp_ctrl_lock(bpf->app->ctrl);
193 tag = nfp_bpf_alloc_tag(bpf);
194 if (tag < 0) {
195 nfp_ctrl_unlock(bpf->app->ctrl);
196 dev_kfree_skb_any(skb);
197 return ERR_PTR(tag);
198 }
199
200 hdr = (void *)skb->data;
201 hdr->ver = CMSG_MAP_ABI_VERSION;
202 hdr->type = type;
203 hdr->tag = cpu_to_be16(tag);
204
205 __nfp_app_ctrl_tx(bpf->app, skb);
206
207 nfp_ctrl_unlock(bpf->app->ctrl);
208
209 skb = nfp_bpf_cmsg_wait_reply(bpf, type, tag);
210 if (IS_ERR(skb))
211 return skb;
212
213 hdr = (struct cmsg_hdr *)skb->data;
214 /* 0 reply_size means caller will do the validation */
215 if (reply_size && skb->len != reply_size) {
216 cmsg_warn(bpf, "cmsg drop - wrong size %d != %d!\n",
217 skb->len, reply_size);
218 goto err_free;
219 }
220 if (hdr->type != __CMSG_REPLY(type)) {
221 cmsg_warn(bpf, "cmsg drop - wrong type 0x%02x != 0x%02lx!\n",
222 hdr->type, __CMSG_REPLY(type));
223 goto err_free;
224 }
225
226 return skb;
227err_free:
228 dev_kfree_skb_any(skb);
229 return ERR_PTR(-EIO);
230}
231
232static int
233nfp_bpf_ctrl_rc_to_errno(struct nfp_app_bpf *bpf,
234 struct cmsg_reply_map_simple *reply)
235{
236 static const int res_table[] = {
237 [CMSG_RC_SUCCESS] = 0,
238 [CMSG_RC_ERR_MAP_FD] = -EBADFD,
239 [CMSG_RC_ERR_MAP_NOENT] = -ENOENT,
240 [CMSG_RC_ERR_MAP_ERR] = -EINVAL,
241 [CMSG_RC_ERR_MAP_PARSE] = -EIO,
242 [CMSG_RC_ERR_MAP_EXIST] = -EEXIST,
243 [CMSG_RC_ERR_MAP_NOMEM] = -ENOMEM,
244 [CMSG_RC_ERR_MAP_E2BIG] = -E2BIG,
245 };
246 u32 rc;
247
248 rc = be32_to_cpu(reply->rc);
249 if (rc >= ARRAY_SIZE(res_table)) {
250 cmsg_warn(bpf, "FW responded with invalid status: %u\n", rc);
251 return -EIO;
252 }
253
254 return res_table[rc];
255}
256
257long long int
258nfp_bpf_ctrl_alloc_map(struct nfp_app_bpf *bpf, struct bpf_map *map)
259{
260 struct cmsg_reply_map_alloc_tbl *reply;
261 struct cmsg_req_map_alloc_tbl *req;
262 struct sk_buff *skb;
263 u32 tid;
264 int err;
265
266 skb = nfp_bpf_cmsg_alloc(bpf, sizeof(*req));
267 if (!skb)
268 return -ENOMEM;
269
270 req = (void *)skb->data;
271 req->key_size = cpu_to_be32(map->key_size);
272 req->value_size = cpu_to_be32(map->value_size);
273 req->max_entries = cpu_to_be32(map->max_entries);
274 req->map_type = cpu_to_be32(map->map_type);
275 req->map_flags = 0;
276
277 skb = nfp_bpf_cmsg_communicate(bpf, skb, CMSG_TYPE_MAP_ALLOC,
278 sizeof(*reply));
279 if (IS_ERR(skb))
280 return PTR_ERR(skb);
281
282 reply = (void *)skb->data;
283 err = nfp_bpf_ctrl_rc_to_errno(bpf, &reply->reply_hdr);
284 if (err)
285 goto err_free;
286
287 tid = be32_to_cpu(reply->tid);
288 dev_consume_skb_any(skb);
289
290 return tid;
291err_free:
292 dev_kfree_skb_any(skb);
293 return err;
294}
295
296void nfp_bpf_ctrl_free_map(struct nfp_app_bpf *bpf, struct nfp_bpf_map *nfp_map)
297{
298 struct cmsg_reply_map_free_tbl *reply;
299 struct cmsg_req_map_free_tbl *req;
300 struct sk_buff *skb;
301 int err;
302
303 skb = nfp_bpf_cmsg_alloc(bpf, sizeof(*req));
304 if (!skb) {
305 cmsg_warn(bpf, "leaking map - failed to allocate msg\n");
306 return;
307 }
308
309 req = (void *)skb->data;
310 req->tid = cpu_to_be32(nfp_map->tid);
311
312 skb = nfp_bpf_cmsg_communicate(bpf, skb, CMSG_TYPE_MAP_FREE,
313 sizeof(*reply));
314 if (IS_ERR(skb)) {
315 cmsg_warn(bpf, "leaking map - I/O error\n");
316 return;
317 }
318
319 reply = (void *)skb->data;
320 err = nfp_bpf_ctrl_rc_to_errno(bpf, &reply->reply_hdr);
321 if (err)
322 cmsg_warn(bpf, "leaking map - FW responded with: %d\n", err);
323
324 dev_consume_skb_any(skb);
325}
326
327static int
328nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap,
329 enum nfp_bpf_cmsg_type op,
330 u8 *key, u8 *value, u64 flags, u8 *out_key, u8 *out_value)
331{
332 struct nfp_bpf_map *nfp_map = offmap->dev_priv;
333 struct nfp_app_bpf *bpf = nfp_map->bpf;
334 struct bpf_map *map = &offmap->map;
335 struct cmsg_reply_map_op *reply;
336 struct cmsg_req_map_op *req;
337 struct sk_buff *skb;
338 int err;
339
340 /* FW messages have no space for more than 32 bits of flags */
341 if (flags >> 32)
342 return -EOPNOTSUPP;
343
344 skb = nfp_bpf_cmsg_map_req_alloc(bpf, 1);
345 if (!skb)
346 return -ENOMEM;
347
348 req = (void *)skb->data;
349 req->tid = cpu_to_be32(nfp_map->tid);
350 req->count = cpu_to_be32(1);
351 req->flags = cpu_to_be32(flags);
352
353 /* Copy inputs */
354 if (key)
355 memcpy(&req->elem[0].key, key, map->key_size);
356 if (value)
357 memcpy(&req->elem[0].value, value, map->value_size);
358
359 skb = nfp_bpf_cmsg_communicate(bpf, skb, op,
360 sizeof(*reply) + sizeof(*reply->elem));
361 if (IS_ERR(skb))
362 return PTR_ERR(skb);
363
364 reply = (void *)skb->data;
365 err = nfp_bpf_ctrl_rc_to_errno(bpf, &reply->reply_hdr);
366 if (err)
367 goto err_free;
368
369 /* Copy outputs */
370 if (out_key)
371 memcpy(out_key, &reply->elem[0].key, map->key_size);
372 if (out_value)
373 memcpy(out_value, &reply->elem[0].value, map->value_size);
374
375 dev_consume_skb_any(skb);
376
377 return 0;
378err_free:
379 dev_kfree_skb_any(skb);
380 return err;
381}
382
383int nfp_bpf_ctrl_update_entry(struct bpf_offloaded_map *offmap,
384 void *key, void *value, u64 flags)
385{
386 return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_UPDATE,
387 key, value, flags, NULL, NULL);
388}
389
390int nfp_bpf_ctrl_del_entry(struct bpf_offloaded_map *offmap, void *key)
391{
392 return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_DELETE,
393 key, NULL, 0, NULL, NULL);
394}
395
396int nfp_bpf_ctrl_lookup_entry(struct bpf_offloaded_map *offmap,
397 void *key, void *value)
398{
399 return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_LOOKUP,
400 key, NULL, 0, NULL, value);
401}
402
403int nfp_bpf_ctrl_getfirst_entry(struct bpf_offloaded_map *offmap,
404 void *next_key)
405{
406 return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_GETFIRST,
407 NULL, NULL, 0, next_key, NULL);
408}
409
410int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap,
411 void *key, void *next_key)
412{
413 return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_GETNEXT,
414 key, NULL, 0, next_key, NULL);
415}
416
417void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb)
418{
419 struct nfp_app_bpf *bpf = app->priv;
420 unsigned int tag;
421
422 if (unlikely(skb->len < sizeof(struct cmsg_reply_map_simple))) {
423 cmsg_warn(bpf, "cmsg drop - too short %d!\n", skb->len);
424 goto err_free;
425 }
426
427 nfp_ctrl_lock(bpf->app->ctrl);
428
429 tag = nfp_bpf_cmsg_get_tag(skb);
430 if (unlikely(!test_bit(tag, bpf->tag_allocator))) {
431 cmsg_warn(bpf, "cmsg drop - no one is waiting for tag %u!\n",
432 tag);
433 goto err_unlock;
434 }
435
436 __skb_queue_tail(&bpf->cmsg_replies, skb);
437 wake_up_interruptible_all(&bpf->cmsg_wq);
438
439 nfp_ctrl_unlock(bpf->app->ctrl);
440
441 return;
442err_unlock:
443 nfp_ctrl_unlock(bpf->app->ctrl);
444err_free:
445 dev_kfree_skb_any(skb);
446}
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/fw.h b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
index 7206aa1522db..cfcc7bcb2c67 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/fw.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
@@ -38,7 +38,14 @@
38#include <linux/types.h> 38#include <linux/types.h>
39 39
40enum bpf_cap_tlv_type { 40enum bpf_cap_tlv_type {
41 NFP_BPF_CAP_TYPE_FUNC = 1,
41 NFP_BPF_CAP_TYPE_ADJUST_HEAD = 2, 42 NFP_BPF_CAP_TYPE_ADJUST_HEAD = 2,
43 NFP_BPF_CAP_TYPE_MAPS = 3,
44};
45
46struct nfp_bpf_cap_tlv_func {
47 __le32 func_id;
48 __le32 func_addr;
42}; 49};
43 50
44struct nfp_bpf_cap_tlv_adjust_head { 51struct nfp_bpf_cap_tlv_adjust_head {
@@ -51,4 +58,100 @@ struct nfp_bpf_cap_tlv_adjust_head {
51 58
52#define NFP_BPF_ADJUST_HEAD_NO_META BIT(0) 59#define NFP_BPF_ADJUST_HEAD_NO_META BIT(0)
53 60
61struct nfp_bpf_cap_tlv_maps {
62 __le32 types;
63 __le32 max_maps;
64 __le32 max_elems;
65 __le32 max_key_sz;
66 __le32 max_val_sz;
67 __le32 max_elem_sz;
68};
69
70/*
71 * Types defined for map related control messages
72 */
73#define CMSG_MAP_ABI_VERSION 1
74
75enum nfp_bpf_cmsg_type {
76 CMSG_TYPE_MAP_ALLOC = 1,
77 CMSG_TYPE_MAP_FREE = 2,
78 CMSG_TYPE_MAP_LOOKUP = 3,
79 CMSG_TYPE_MAP_UPDATE = 4,
80 CMSG_TYPE_MAP_DELETE = 5,
81 CMSG_TYPE_MAP_GETNEXT = 6,
82 CMSG_TYPE_MAP_GETFIRST = 7,
83 __CMSG_TYPE_MAP_MAX,
84};
85
86#define CMSG_TYPE_MAP_REPLY_BIT 7
87#define __CMSG_REPLY(req) (BIT(CMSG_TYPE_MAP_REPLY_BIT) | (req))
88
89#define CMSG_MAP_KEY_LW 16
90#define CMSG_MAP_VALUE_LW 16
91
92enum nfp_bpf_cmsg_status {
93 CMSG_RC_SUCCESS = 0,
94 CMSG_RC_ERR_MAP_FD = 1,
95 CMSG_RC_ERR_MAP_NOENT = 2,
96 CMSG_RC_ERR_MAP_ERR = 3,
97 CMSG_RC_ERR_MAP_PARSE = 4,
98 CMSG_RC_ERR_MAP_EXIST = 5,
99 CMSG_RC_ERR_MAP_NOMEM = 6,
100 CMSG_RC_ERR_MAP_E2BIG = 7,
101};
102
103struct cmsg_hdr {
104 u8 type;
105 u8 ver;
106 __be16 tag;
107};
108
109struct cmsg_reply_map_simple {
110 struct cmsg_hdr hdr;
111 __be32 rc;
112};
113
114struct cmsg_req_map_alloc_tbl {
115 struct cmsg_hdr hdr;
116 __be32 key_size; /* in bytes */
117 __be32 value_size; /* in bytes */
118 __be32 max_entries;
119 __be32 map_type;
120 __be32 map_flags; /* reserved */
121};
122
123struct cmsg_reply_map_alloc_tbl {
124 struct cmsg_reply_map_simple reply_hdr;
125 __be32 tid;
126};
127
128struct cmsg_req_map_free_tbl {
129 struct cmsg_hdr hdr;
130 __be32 tid;
131};
132
133struct cmsg_reply_map_free_tbl {
134 struct cmsg_reply_map_simple reply_hdr;
135 __be32 count;
136};
137
138struct cmsg_key_value_pair {
139 __be32 key[CMSG_MAP_KEY_LW];
140 __be32 value[CMSG_MAP_VALUE_LW];
141};
142
143struct cmsg_req_map_op {
144 struct cmsg_hdr hdr;
145 __be32 tid;
146 __be32 count;
147 __be32 flags;
148 struct cmsg_key_value_pair elem[0];
149};
150
151struct cmsg_reply_map_op {
152 struct cmsg_reply_map_simple reply_hdr;
153 __be32 count;
154 __be32 resv;
155 struct cmsg_key_value_pair elem[0];
156};
54#endif 157#endif
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 47c5224f8d6f..56451edf01c2 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -483,6 +483,21 @@ static void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm)
483 } 483 }
484} 484}
485 485
486static void
487wrp_immed_relo(struct nfp_prog *nfp_prog, swreg dst, u32 imm,
488 enum nfp_relo_type relo)
489{
490 if (imm > 0xffff) {
491 pr_err("relocation of a large immediate!\n");
492 nfp_prog->error = -EFAULT;
493 return;
494 }
495 emit_immed(nfp_prog, dst, imm, IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
496
497 nfp_prog->prog[nfp_prog->prog_len - 1] |=
498 FIELD_PREP(OP_RELO_TYPE, relo);
499}
500
486/* ur_load_imm_any() - encode immediate or use tmp register (unrestricted) 501/* ur_load_imm_any() - encode immediate or use tmp register (unrestricted)
487 * If the @imm is small enough encode it directly in operand and return 502 * If the @imm is small enough encode it directly in operand and return
488 * otherwise load @imm to a spare register and return its encoding. 503 * otherwise load @imm to a spare register and return its encoding.
@@ -538,27 +553,51 @@ wrp_reg_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, u8 field_len,
538 emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true); 553 emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true);
539} 554}
540 555
556static void
557addr40_offset(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
558 swreg *rega, swreg *regb)
559{
560 if (offset == reg_imm(0)) {
561 *rega = reg_a(src_gpr);
562 *regb = reg_b(src_gpr + 1);
563 return;
564 }
565
566 emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(src_gpr), ALU_OP_ADD, offset);
567 emit_alu(nfp_prog, imm_b(nfp_prog), reg_b(src_gpr + 1), ALU_OP_ADD_C,
568 reg_imm(0));
569 *rega = imm_a(nfp_prog);
570 *regb = imm_b(nfp_prog);
571}
572
541/* NFP has Command Push Pull bus which supports bluk memory operations. */ 573/* NFP has Command Push Pull bus which supports bluk memory operations. */
542static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 574static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
543{ 575{
544 bool descending_seq = meta->ldst_gather_len < 0; 576 bool descending_seq = meta->ldst_gather_len < 0;
545 s16 len = abs(meta->ldst_gather_len); 577 s16 len = abs(meta->ldst_gather_len);
546 swreg src_base, off; 578 swreg src_base, off;
579 bool src_40bit_addr;
547 unsigned int i; 580 unsigned int i;
548 u8 xfer_num; 581 u8 xfer_num;
549 582
550 off = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); 583 off = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
584 src_40bit_addr = meta->ptr.type == PTR_TO_MAP_VALUE;
551 src_base = reg_a(meta->insn.src_reg * 2); 585 src_base = reg_a(meta->insn.src_reg * 2);
552 xfer_num = round_up(len, 4) / 4; 586 xfer_num = round_up(len, 4) / 4;
553 587
588 if (src_40bit_addr)
589 addr40_offset(nfp_prog, meta->insn.src_reg, off, &src_base,
590 &off);
591
554 /* Setup PREV_ALU fields to override memory read length. */ 592 /* Setup PREV_ALU fields to override memory read length. */
555 if (len > 32) 593 if (len > 32)
556 wrp_immed(nfp_prog, reg_none(), 594 wrp_immed(nfp_prog, reg_none(),
557 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); 595 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
558 596
559 /* Memory read from source addr into transfer-in registers. */ 597 /* Memory read from source addr into transfer-in registers. */
560 emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base, 598 emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP,
561 off, xfer_num - 1, true, len > 32); 599 src_40bit_addr ? CMD_MODE_40b_BA : CMD_MODE_32b, 0,
600 src_base, off, xfer_num - 1, true, len > 32);
562 601
563 /* Move from transfer-in to transfer-out. */ 602 /* Move from transfer-in to transfer-out. */
564 for (i = 0; i < xfer_num; i++) 603 for (i = 0; i < xfer_num; i++)
@@ -696,20 +735,20 @@ data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size)
696} 735}
697 736
698static int 737static int
699data_ld_host_order(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset, 738data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr,
700 u8 dst_gpr, int size) 739 swreg lreg, swreg rreg, int size, enum cmd_mode mode)
701{ 740{
702 unsigned int i; 741 unsigned int i;
703 u8 mask, sz; 742 u8 mask, sz;
704 743
705 /* We load the value from the address indicated in @offset and then 744 /* We load the value from the address indicated in rreg + lreg and then
706 * mask out the data we don't need. Note: this is little endian! 745 * mask out the data we don't need. Note: this is little endian!
707 */ 746 */
708 sz = max(size, 4); 747 sz = max(size, 4);
709 mask = size < 4 ? GENMASK(size - 1, 0) : 0; 748 mask = size < 4 ? GENMASK(size - 1, 0) : 0;
710 749
711 emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, 750 emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, mode, 0,
712 reg_a(src_gpr), offset, sz / 4 - 1, true); 751 lreg, rreg, sz / 4 - 1, true);
713 752
714 i = 0; 753 i = 0;
715 if (mask) 754 if (mask)
@@ -726,6 +765,26 @@ data_ld_host_order(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
726} 765}
727 766
728static int 767static int
768data_ld_host_order_addr32(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
769 u8 dst_gpr, u8 size)
770{
771 return data_ld_host_order(nfp_prog, dst_gpr, reg_a(src_gpr), offset,
772 size, CMD_MODE_32b);
773}
774
775static int
776data_ld_host_order_addr40(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
777 u8 dst_gpr, u8 size)
778{
779 swreg rega, regb;
780
781 addr40_offset(nfp_prog, src_gpr, offset, &rega, &regb);
782
783 return data_ld_host_order(nfp_prog, dst_gpr, rega, regb,
784 size, CMD_MODE_40b_BA);
785}
786
787static int
729construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size) 788construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size)
730{ 789{
731 swreg tmp_reg; 790 swreg tmp_reg;
@@ -1279,6 +1338,56 @@ static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1279 return 0; 1338 return 0;
1280} 1339}
1281 1340
1341static int
1342map_lookup_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1343{
1344 struct bpf_offloaded_map *offmap;
1345 struct nfp_bpf_map *nfp_map;
1346 bool load_lm_ptr;
1347 u32 ret_tgt;
1348 s64 lm_off;
1349 swreg tid;
1350
1351 offmap = (struct bpf_offloaded_map *)meta->arg1.map_ptr;
1352 nfp_map = offmap->dev_priv;
1353
1354 /* We only have to reload LM0 if the key is not at start of stack */
1355 lm_off = nfp_prog->stack_depth;
1356 lm_off += meta->arg2.var_off.value + meta->arg2.off;
1357 load_lm_ptr = meta->arg2_var_off || lm_off;
1358
1359 /* Set LM0 to start of key */
1360 if (load_lm_ptr)
1361 emit_csr_wr(nfp_prog, reg_b(2 * 2), NFP_CSR_ACT_LM_ADDR0);
1362
1363 /* Load map ID into a register, it should actually fit as an immediate
1364 * but in case it doesn't deal with it here, not in the delay slots.
1365 */
1366 tid = ur_load_imm_any(nfp_prog, nfp_map->tid, imm_a(nfp_prog));
1367
1368 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + BPF_FUNC_map_lookup_elem,
1369 2, RELO_BR_HELPER);
1370 ret_tgt = nfp_prog_current_offset(nfp_prog) + 2;
1371
1372 /* Load map ID into A0 */
1373 wrp_mov(nfp_prog, reg_a(0), tid);
1374
1375 /* Load the return address into B0 */
1376 wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL);
1377
1378 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
1379 return -EINVAL;
1380
1381 /* Reset the LM0 pointer */
1382 if (!load_lm_ptr)
1383 return 0;
1384
1385 emit_csr_wr(nfp_prog, stack_reg(nfp_prog), NFP_CSR_ACT_LM_ADDR0);
1386 wrp_nops(nfp_prog, 3);
1387
1388 return 0;
1389}
1390
1282/* --- Callbacks --- */ 1391/* --- Callbacks --- */
1283static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1392static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1284{ 1393{
@@ -1713,8 +1822,20 @@ mem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1713 1822
1714 tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); 1823 tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
1715 1824
1716 return data_ld_host_order(nfp_prog, meta->insn.src_reg * 2, tmp_reg, 1825 return data_ld_host_order_addr32(nfp_prog, meta->insn.src_reg * 2,
1717 meta->insn.dst_reg * 2, size); 1826 tmp_reg, meta->insn.dst_reg * 2, size);
1827}
1828
1829static int
1830mem_ldx_emem(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1831 unsigned int size)
1832{
1833 swreg tmp_reg;
1834
1835 tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
1836
1837 return data_ld_host_order_addr40(nfp_prog, meta->insn.src_reg * 2,
1838 tmp_reg, meta->insn.dst_reg * 2, size);
1718} 1839}
1719 1840
1720static int 1841static int
@@ -1738,6 +1859,9 @@ mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1738 return mem_ldx_stack(nfp_prog, meta, size, 1859 return mem_ldx_stack(nfp_prog, meta, size,
1739 meta->ptr.off + meta->ptr.var_off.value); 1860 meta->ptr.off + meta->ptr.var_off.value);
1740 1861
1862 if (meta->ptr.type == PTR_TO_MAP_VALUE)
1863 return mem_ldx_emem(nfp_prog, meta, size);
1864
1741 return -EOPNOTSUPP; 1865 return -EOPNOTSUPP;
1742} 1866}
1743 1867
@@ -2058,6 +2182,8 @@ static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2058 switch (meta->insn.imm) { 2182 switch (meta->insn.imm) {
2059 case BPF_FUNC_xdp_adjust_head: 2183 case BPF_FUNC_xdp_adjust_head:
2060 return adjust_head(nfp_prog, meta); 2184 return adjust_head(nfp_prog, meta);
2185 case BPF_FUNC_map_lookup_elem:
2186 return map_lookup_stack(nfp_prog, meta);
2061 default: 2187 default:
2062 WARN_ONCE(1, "verifier allowed unsupported function\n"); 2188 WARN_ONCE(1, "verifier allowed unsupported function\n");
2063 return -EOPNOTSUPP; 2189 return -EOPNOTSUPP;
@@ -2781,6 +2907,11 @@ void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt)
2781 } 2907 }
2782} 2908}
2783 2909
2910bool nfp_bpf_supported_opcode(u8 code)
2911{
2912 return !!instr_cb[code];
2913}
2914
2784void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv) 2915void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv)
2785{ 2916{
2786 unsigned int i; 2917 unsigned int i;
@@ -2794,6 +2925,7 @@ void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv)
2794 2925
2795 for (i = 0; i < nfp_prog->prog_len; i++) { 2926 for (i = 0; i < nfp_prog->prog_len; i++) {
2796 enum nfp_relo_type special; 2927 enum nfp_relo_type special;
2928 u32 val;
2797 2929
2798 special = FIELD_GET(OP_RELO_TYPE, prog[i]); 2930 special = FIELD_GET(OP_RELO_TYPE, prog[i]);
2799 switch (special) { 2931 switch (special) {
@@ -2813,6 +2945,24 @@ void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv)
2813 case RELO_BR_NEXT_PKT: 2945 case RELO_BR_NEXT_PKT:
2814 br_set_offset(&prog[i], bv->tgt_done); 2946 br_set_offset(&prog[i], bv->tgt_done);
2815 break; 2947 break;
2948 case RELO_BR_HELPER:
2949 val = br_get_offset(prog[i]);
2950 val -= BR_OFF_RELO;
2951 switch (val) {
2952 case BPF_FUNC_map_lookup_elem:
2953 val = nfp_prog->bpf->helpers.map_lookup;
2954 break;
2955 default:
2956 pr_err("relocation of unknown helper %d\n",
2957 val);
2958 err = -EINVAL;
2959 goto err_free_prog;
2960 }
2961 br_set_offset(&prog[i], val);
2962 break;
2963 case RELO_IMMED_REL:
2964 immed_add_value(&prog[i], bv->start_off);
2965 break;
2816 } 2966 }
2817 2967
2818 prog[i] &= ~OP_RELO_TYPE; 2968 prog[i] &= ~OP_RELO_TYPE;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index e8cfe300c8c4..8823c8360047 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -251,6 +251,45 @@ nfp_bpf_parse_cap_adjust_head(struct nfp_app_bpf *bpf, void __iomem *value,
251 return 0; 251 return 0;
252} 252}
253 253
254static int
255nfp_bpf_parse_cap_func(struct nfp_app_bpf *bpf, void __iomem *value, u32 length)
256{
257 struct nfp_bpf_cap_tlv_func __iomem *cap = value;
258
259 if (length < sizeof(*cap)) {
260 nfp_err(bpf->app->cpp, "truncated function TLV: %d\n", length);
261 return -EINVAL;
262 }
263
264 switch (readl(&cap->func_id)) {
265 case BPF_FUNC_map_lookup_elem:
266 bpf->helpers.map_lookup = readl(&cap->func_addr);
267 break;
268 }
269
270 return 0;
271}
272
273static int
274nfp_bpf_parse_cap_maps(struct nfp_app_bpf *bpf, void __iomem *value, u32 length)
275{
276 struct nfp_bpf_cap_tlv_maps __iomem *cap = value;
277
278 if (length < sizeof(*cap)) {
279 nfp_err(bpf->app->cpp, "truncated maps TLV: %d\n", length);
280 return -EINVAL;
281 }
282
283 bpf->maps.types = readl(&cap->types);
284 bpf->maps.max_maps = readl(&cap->max_maps);
285 bpf->maps.max_elems = readl(&cap->max_elems);
286 bpf->maps.max_key_sz = readl(&cap->max_key_sz);
287 bpf->maps.max_val_sz = readl(&cap->max_val_sz);
288 bpf->maps.max_elem_sz = readl(&cap->max_elem_sz);
289
290 return 0;
291}
292
254static int nfp_bpf_parse_capabilities(struct nfp_app *app) 293static int nfp_bpf_parse_capabilities(struct nfp_app *app)
255{ 294{
256 struct nfp_cpp *cpp = app->pf->cpp; 295 struct nfp_cpp *cpp = app->pf->cpp;
@@ -276,11 +315,19 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app)
276 goto err_release_free; 315 goto err_release_free;
277 316
278 switch (type) { 317 switch (type) {
318 case NFP_BPF_CAP_TYPE_FUNC:
319 if (nfp_bpf_parse_cap_func(app->priv, value, length))
320 goto err_release_free;
321 break;
279 case NFP_BPF_CAP_TYPE_ADJUST_HEAD: 322 case NFP_BPF_CAP_TYPE_ADJUST_HEAD:
280 if (nfp_bpf_parse_cap_adjust_head(app->priv, value, 323 if (nfp_bpf_parse_cap_adjust_head(app->priv, value,
281 length)) 324 length))
282 goto err_release_free; 325 goto err_release_free;
283 break; 326 break;
327 case NFP_BPF_CAP_TYPE_MAPS:
328 if (nfp_bpf_parse_cap_maps(app->priv, value, length))
329 goto err_release_free;
330 break;
284 default: 331 default:
285 nfp_dbg(cpp, "unknown BPF capability: %d\n", type); 332 nfp_dbg(cpp, "unknown BPF capability: %d\n", type);
286 break; 333 break;
@@ -313,6 +360,10 @@ static int nfp_bpf_init(struct nfp_app *app)
313 bpf->app = app; 360 bpf->app = app;
314 app->priv = bpf; 361 app->priv = bpf;
315 362
363 skb_queue_head_init(&bpf->cmsg_replies);
364 init_waitqueue_head(&bpf->cmsg_wq);
365 INIT_LIST_HEAD(&bpf->map_list);
366
316 err = nfp_bpf_parse_capabilities(app); 367 err = nfp_bpf_parse_capabilities(app);
317 if (err) 368 if (err)
318 goto err_free_bpf; 369 goto err_free_bpf;
@@ -326,7 +377,12 @@ err_free_bpf:
326 377
327static void nfp_bpf_clean(struct nfp_app *app) 378static void nfp_bpf_clean(struct nfp_app *app)
328{ 379{
329 kfree(app->priv); 380 struct nfp_app_bpf *bpf = app->priv;
381
382 WARN_ON(!skb_queue_empty(&bpf->cmsg_replies));
383 WARN_ON(!list_empty(&bpf->map_list));
384 WARN_ON(bpf->maps_in_use || bpf->map_elems_in_use);
385 kfree(bpf);
330} 386}
331 387
332const struct nfp_app_type app_bpf = { 388const struct nfp_app_type app_bpf = {
@@ -343,6 +399,8 @@ const struct nfp_app_type app_bpf = {
343 .vnic_alloc = nfp_bpf_vnic_alloc, 399 .vnic_alloc = nfp_bpf_vnic_alloc,
344 .vnic_free = nfp_bpf_vnic_free, 400 .vnic_free = nfp_bpf_vnic_free,
345 401
402 .ctrl_msg_rx = nfp_bpf_ctrl_msg_rx,
403
346 .setup_tc = nfp_bpf_setup_tc, 404 .setup_tc = nfp_bpf_setup_tc,
347 .tc_busy = nfp_bpf_tc_busy, 405 .tc_busy = nfp_bpf_tc_busy,
348 .bpf = nfp_ndo_bpf, 406 .bpf = nfp_ndo_bpf,
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 66381afee2a9..c476bca15ba4 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -37,10 +37,14 @@
37#include <linux/bitfield.h> 37#include <linux/bitfield.h>
38#include <linux/bpf.h> 38#include <linux/bpf.h>
39#include <linux/bpf_verifier.h> 39#include <linux/bpf_verifier.h>
40#include <linux/kernel.h>
40#include <linux/list.h> 41#include <linux/list.h>
42#include <linux/skbuff.h>
41#include <linux/types.h> 43#include <linux/types.h>
44#include <linux/wait.h>
42 45
43#include "../nfp_asm.h" 46#include "../nfp_asm.h"
47#include "fw.h"
44 48
45/* For relocation logic use up-most byte of branch instruction as scratch 49/* For relocation logic use up-most byte of branch instruction as scratch
46 * area. Remember to clear this before sending instructions to HW! 50 * area. Remember to clear this before sending instructions to HW!
@@ -56,6 +60,9 @@ enum nfp_relo_type {
56 RELO_BR_GO_ABORT, 60 RELO_BR_GO_ABORT,
57 /* external jumps to fixed addresses */ 61 /* external jumps to fixed addresses */
58 RELO_BR_NEXT_PKT, 62 RELO_BR_NEXT_PKT,
63 RELO_BR_HELPER,
64 /* immediate relocation against load address */
65 RELO_IMMED_REL,
59}; 66};
60 67
61/* To make absolute relocated branches (branches other than RELO_BR_REL) 68/* To make absolute relocated branches (branches other than RELO_BR_REL)
@@ -93,16 +100,49 @@ enum pkt_vec {
93 * struct nfp_app_bpf - bpf app priv structure 100 * struct nfp_app_bpf - bpf app priv structure
94 * @app: backpointer to the app 101 * @app: backpointer to the app
95 * 102 *
103 * @tag_allocator: bitmap of control message tags in use
104 * @tag_alloc_next: next tag bit to allocate
105 * @tag_alloc_last: next tag bit to be freed
106 *
107 * @cmsg_replies: received cmsg replies waiting to be consumed
108 * @cmsg_wq: work queue for waiting for cmsg replies
109 *
110 * @map_list: list of offloaded maps
111 * @maps_in_use: number of currently offloaded maps
112 * @map_elems_in_use: number of elements allocated to offloaded maps
113 *
96 * @adjust_head: adjust head capability 114 * @adjust_head: adjust head capability
97 * @flags: extra flags for adjust head 115 * @flags: extra flags for adjust head
98 * @off_min: minimal packet offset within buffer required 116 * @off_min: minimal packet offset within buffer required
99 * @off_max: maximum packet offset within buffer required 117 * @off_max: maximum packet offset within buffer required
100 * @guaranteed_sub: amount of negative adjustment guaranteed possible 118 * @guaranteed_sub: amount of negative adjustment guaranteed possible
101 * @guaranteed_add: amount of positive adjustment guaranteed possible 119 * @guaranteed_add: amount of positive adjustment guaranteed possible
120 *
121 * @maps: map capability
122 * @types: supported map types
123 * @max_maps: max number of maps supported
124 * @max_elems: max number of entries in each map
125 * @max_key_sz: max size of map key
126 * @max_val_sz: max size of map value
127 * @max_elem_sz: max size of map entry (key + value)
128 *
129 * @helpers: helper addressess for various calls
130 * @map_lookup: map lookup helper address
102 */ 131 */
103struct nfp_app_bpf { 132struct nfp_app_bpf {
104 struct nfp_app *app; 133 struct nfp_app *app;
105 134
135 DECLARE_BITMAP(tag_allocator, U16_MAX + 1);
136 u16 tag_alloc_next;
137 u16 tag_alloc_last;
138
139 struct sk_buff_head cmsg_replies;
140 struct wait_queue_head cmsg_wq;
141
142 struct list_head map_list;
143 unsigned int maps_in_use;
144 unsigned int map_elems_in_use;
145
106 struct nfp_bpf_cap_adjust_head { 146 struct nfp_bpf_cap_adjust_head {
107 u32 flags; 147 u32 flags;
108 int off_min; 148 int off_min;
@@ -110,6 +150,33 @@ struct nfp_app_bpf {
110 int guaranteed_sub; 150 int guaranteed_sub;
111 int guaranteed_add; 151 int guaranteed_add;
112 } adjust_head; 152 } adjust_head;
153
154 struct {
155 u32 types;
156 u32 max_maps;
157 u32 max_elems;
158 u32 max_key_sz;
159 u32 max_val_sz;
160 u32 max_elem_sz;
161 } maps;
162
163 struct {
164 u32 map_lookup;
165 } helpers;
166};
167
168/**
169 * struct nfp_bpf_map - private per-map data attached to BPF maps for offload
170 * @offmap: pointer to the offloaded BPF map
171 * @bpf: back pointer to bpf app private structure
172 * @tid: table id identifying map on datapath
173 * @l: link on the nfp_app_bpf->map_list list
174 */
175struct nfp_bpf_map {
176 struct bpf_offloaded_map *offmap;
177 struct nfp_app_bpf *bpf;
178 u32 tid;
179 struct list_head l;
113}; 180};
114 181
115struct nfp_prog; 182struct nfp_prog;
@@ -131,9 +198,12 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
131 * @ptr: pointer type for memory operations 198 * @ptr: pointer type for memory operations
132 * @ldst_gather_len: memcpy length gathered from load/store sequence 199 * @ldst_gather_len: memcpy length gathered from load/store sequence
133 * @paired_st: the paired store insn at the head of the sequence 200 * @paired_st: the paired store insn at the head of the sequence
134 * @arg2: arg2 for call instructions
135 * @ptr_not_const: pointer is not always constant 201 * @ptr_not_const: pointer is not always constant
136 * @jmp_dst: destination info for jump instructions 202 * @jmp_dst: destination info for jump instructions
203 * @func_id: function id for call instructions
204 * @arg1: arg1 for call instructions
205 * @arg2: arg2 for call instructions
206 * @arg2_var_off: arg2 changes stack offset on different paths
137 * @off: index of first generated machine instruction (in nfp_prog.prog) 207 * @off: index of first generated machine instruction (in nfp_prog.prog)
138 * @n: eBPF instruction number 208 * @n: eBPF instruction number
139 * @flags: eBPF instruction extra optimization flags 209 * @flags: eBPF instruction extra optimization flags
@@ -151,7 +221,12 @@ struct nfp_insn_meta {
151 bool ptr_not_const; 221 bool ptr_not_const;
152 }; 222 };
153 struct nfp_insn_meta *jmp_dst; 223 struct nfp_insn_meta *jmp_dst;
154 struct bpf_reg_state arg2; 224 struct {
225 u32 func_id;
226 struct bpf_reg_state arg1;
227 struct bpf_reg_state arg2;
228 bool arg2_var_off;
229 };
155 }; 230 };
156 unsigned int off; 231 unsigned int off;
157 unsigned short n; 232 unsigned short n;
@@ -249,6 +324,7 @@ struct nfp_bpf_vnic {
249 324
250void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt); 325void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt);
251int nfp_bpf_jit(struct nfp_prog *prog); 326int nfp_bpf_jit(struct nfp_prog *prog);
327bool nfp_bpf_supported_opcode(u8 code);
252 328
253extern const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops; 329extern const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops;
254 330
@@ -266,4 +342,20 @@ nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
266 unsigned int insn_idx, unsigned int n_insns); 342 unsigned int insn_idx, unsigned int n_insns);
267 343
268void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv); 344void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv);
345
346long long int
347nfp_bpf_ctrl_alloc_map(struct nfp_app_bpf *bpf, struct bpf_map *map);
348void
349nfp_bpf_ctrl_free_map(struct nfp_app_bpf *bpf, struct nfp_bpf_map *nfp_map);
350int nfp_bpf_ctrl_getfirst_entry(struct bpf_offloaded_map *offmap,
351 void *next_key);
352int nfp_bpf_ctrl_update_entry(struct bpf_offloaded_map *offmap,
353 void *key, void *value, u64 flags);
354int nfp_bpf_ctrl_del_entry(struct bpf_offloaded_map *offmap, void *key);
355int nfp_bpf_ctrl_lookup_entry(struct bpf_offloaded_map *offmap,
356 void *key, void *value);
357int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap,
358 void *key, void *next_key);
359
360void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb);
269#endif 361#endif
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index 320b2250d29a..e2859b2e9c6a 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -36,6 +36,9 @@
36 * Netronome network device driver: TC offload functions for PF and VF 36 * Netronome network device driver: TC offload functions for PF and VF
37 */ 37 */
38 38
39#define pr_fmt(fmt) "NFP net bpf: " fmt
40
41#include <linux/bpf.h>
39#include <linux/kernel.h> 42#include <linux/kernel.h>
40#include <linux/netdevice.h> 43#include <linux/netdevice.h>
41#include <linux/pci.h> 44#include <linux/pci.h>
@@ -153,6 +156,103 @@ static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog)
153 return 0; 156 return 0;
154} 157}
155 158
159static int
160nfp_bpf_map_get_next_key(struct bpf_offloaded_map *offmap,
161 void *key, void *next_key)
162{
163 if (!key)
164 return nfp_bpf_ctrl_getfirst_entry(offmap, next_key);
165 return nfp_bpf_ctrl_getnext_entry(offmap, key, next_key);
166}
167
168static int
169nfp_bpf_map_delete_elem(struct bpf_offloaded_map *offmap, void *key)
170{
171 return nfp_bpf_ctrl_del_entry(offmap, key);
172}
173
174static const struct bpf_map_dev_ops nfp_bpf_map_ops = {
175 .map_get_next_key = nfp_bpf_map_get_next_key,
176 .map_lookup_elem = nfp_bpf_ctrl_lookup_entry,
177 .map_update_elem = nfp_bpf_ctrl_update_entry,
178 .map_delete_elem = nfp_bpf_map_delete_elem,
179};
180
181static int
182nfp_bpf_map_alloc(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
183{
184 struct nfp_bpf_map *nfp_map;
185 long long int res;
186
187 if (!bpf->maps.types)
188 return -EOPNOTSUPP;
189
190 if (offmap->map.map_flags ||
191 offmap->map.numa_node != NUMA_NO_NODE) {
192 pr_info("map flags are not supported\n");
193 return -EINVAL;
194 }
195
196 if (!(bpf->maps.types & 1 << offmap->map.map_type)) {
197 pr_info("map type not supported\n");
198 return -EOPNOTSUPP;
199 }
200 if (bpf->maps.max_maps == bpf->maps_in_use) {
201 pr_info("too many maps for a device\n");
202 return -ENOMEM;
203 }
204 if (bpf->maps.max_elems - bpf->map_elems_in_use <
205 offmap->map.max_entries) {
206 pr_info("map with too many elements: %u, left: %u\n",
207 offmap->map.max_entries,
208 bpf->maps.max_elems - bpf->map_elems_in_use);
209 return -ENOMEM;
210 }
211 if (offmap->map.key_size > bpf->maps.max_key_sz ||
212 offmap->map.value_size > bpf->maps.max_val_sz ||
213 round_up(offmap->map.key_size, 8) +
214 round_up(offmap->map.value_size, 8) > bpf->maps.max_elem_sz) {
215 pr_info("elements don't fit in device constraints\n");
216 return -ENOMEM;
217 }
218
219 nfp_map = kzalloc(sizeof(*nfp_map), GFP_USER);
220 if (!nfp_map)
221 return -ENOMEM;
222
223 offmap->dev_priv = nfp_map;
224 nfp_map->offmap = offmap;
225 nfp_map->bpf = bpf;
226
227 res = nfp_bpf_ctrl_alloc_map(bpf, &offmap->map);
228 if (res < 0) {
229 kfree(nfp_map);
230 return res;
231 }
232
233 nfp_map->tid = res;
234 offmap->dev_ops = &nfp_bpf_map_ops;
235 bpf->maps_in_use++;
236 bpf->map_elems_in_use += offmap->map.max_entries;
237 list_add_tail(&nfp_map->l, &bpf->map_list);
238
239 return 0;
240}
241
242static int
243nfp_bpf_map_free(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
244{
245 struct nfp_bpf_map *nfp_map = offmap->dev_priv;
246
247 nfp_bpf_ctrl_free_map(bpf, nfp_map);
248 list_del_init(&nfp_map->l);
249 bpf->map_elems_in_use -= offmap->map.max_entries;
250 bpf->maps_in_use--;
251 kfree(nfp_map);
252
253 return 0;
254}
255
156int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf) 256int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf)
157{ 257{
158 switch (bpf->command) { 258 switch (bpf->command) {
@@ -162,6 +262,10 @@ int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf)
162 return nfp_bpf_translate(nn, bpf->offload.prog); 262 return nfp_bpf_translate(nn, bpf->offload.prog);
163 case BPF_OFFLOAD_DESTROY: 263 case BPF_OFFLOAD_DESTROY:
164 return nfp_bpf_destroy(nn, bpf->offload.prog); 264 return nfp_bpf_destroy(nn, bpf->offload.prog);
265 case BPF_OFFLOAD_MAP_ALLOC:
266 return nfp_bpf_map_alloc(app->priv, bpf->offmap);
267 case BPF_OFFLOAD_MAP_FREE:
268 return nfp_bpf_map_free(app->priv, bpf->offmap);
165 default: 269 default:
166 return -EINVAL; 270 return -EINVAL;
167 } 271 }
@@ -237,7 +341,7 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog,
237 int err; 341 int err;
238 342
239 if (prog) { 343 if (prog) {
240 struct bpf_dev_offload *offload = prog->aux->offload; 344 struct bpf_prog_offload *offload = prog->aux->offload;
241 345
242 if (!offload) 346 if (!offload)
243 return -EINVAL; 347 return -EINVAL;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index 7890d95d4018..479f602887e9 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -110,9 +110,11 @@ static int
110nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env, 110nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
111 struct nfp_insn_meta *meta) 111 struct nfp_insn_meta *meta)
112{ 112{
113 const struct bpf_reg_state *reg1 = cur_regs(env) + BPF_REG_1;
113 const struct bpf_reg_state *reg2 = cur_regs(env) + BPF_REG_2; 114 const struct bpf_reg_state *reg2 = cur_regs(env) + BPF_REG_2;
114 struct nfp_app_bpf *bpf = nfp_prog->bpf; 115 struct nfp_app_bpf *bpf = nfp_prog->bpf;
115 u32 func_id = meta->insn.imm; 116 u32 func_id = meta->insn.imm;
117 s64 off, old_off;
116 118
117 switch (func_id) { 119 switch (func_id) {
118 case BPF_FUNC_xdp_adjust_head: 120 case BPF_FUNC_xdp_adjust_head:
@@ -127,11 +129,50 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
127 129
128 nfp_record_adjust_head(bpf, nfp_prog, meta, reg2); 130 nfp_record_adjust_head(bpf, nfp_prog, meta, reg2);
129 break; 131 break;
132
133 case BPF_FUNC_map_lookup_elem:
134 if (!bpf->helpers.map_lookup) {
135 pr_vlog(env, "map_lookup: not supported by FW\n");
136 return -EOPNOTSUPP;
137 }
138 if (reg2->type != PTR_TO_STACK) {
139 pr_vlog(env,
140 "map_lookup: unsupported key ptr type %d\n",
141 reg2->type);
142 return -EOPNOTSUPP;
143 }
144 if (!tnum_is_const(reg2->var_off)) {
145 pr_vlog(env, "map_lookup: variable key pointer\n");
146 return -EOPNOTSUPP;
147 }
148
149 off = reg2->var_off.value + reg2->off;
150 if (-off % 4) {
151 pr_vlog(env,
152 "map_lookup: unaligned stack pointer %lld\n",
153 -off);
154 return -EOPNOTSUPP;
155 }
156
157 /* Rest of the checks is only if we re-parse the same insn */
158 if (!meta->func_id)
159 break;
160
161 old_off = meta->arg2.var_off.value + meta->arg2.off;
162 meta->arg2_var_off |= off != old_off;
163
164 if (meta->arg1.map_ptr != reg1->map_ptr) {
165 pr_vlog(env, "map_lookup: called for different map\n");
166 return -EOPNOTSUPP;
167 }
168 break;
130 default: 169 default:
131 pr_vlog(env, "unsupported function id: %d\n", func_id); 170 pr_vlog(env, "unsupported function id: %d\n", func_id);
132 return -EOPNOTSUPP; 171 return -EOPNOTSUPP;
133 } 172 }
134 173
174 meta->func_id = func_id;
175 meta->arg1 = *reg1;
135 meta->arg2 = *reg2; 176 meta->arg2 = *reg2;
136 177
137 return 0; 178 return 0;
@@ -210,6 +251,7 @@ nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
210 251
211 if (reg->type != PTR_TO_CTX && 252 if (reg->type != PTR_TO_CTX &&
212 reg->type != PTR_TO_STACK && 253 reg->type != PTR_TO_STACK &&
254 reg->type != PTR_TO_MAP_VALUE &&
213 reg->type != PTR_TO_PACKET) { 255 reg->type != PTR_TO_PACKET) {
214 pr_vlog(env, "unsupported ptr type: %d\n", reg->type); 256 pr_vlog(env, "unsupported ptr type: %d\n", reg->type);
215 return -EINVAL; 257 return -EINVAL;
@@ -221,6 +263,13 @@ nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
221 return err; 263 return err;
222 } 264 }
223 265
266 if (reg->type == PTR_TO_MAP_VALUE) {
267 if (is_mbpf_store(meta)) {
268 pr_vlog(env, "map writes not supported\n");
269 return -EOPNOTSUPP;
270 }
271 }
272
224 if (meta->ptr.type != NOT_INIT && meta->ptr.type != reg->type) { 273 if (meta->ptr.type != NOT_INIT && meta->ptr.type != reg->type) {
225 pr_vlog(env, "ptr type changed for instruction %d -> %d\n", 274 pr_vlog(env, "ptr type changed for instruction %d -> %d\n",
226 meta->ptr.type, reg->type); 275 meta->ptr.type, reg->type);
@@ -241,6 +290,12 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
241 meta = nfp_bpf_goto_meta(nfp_prog, meta, insn_idx, env->prog->len); 290 meta = nfp_bpf_goto_meta(nfp_prog, meta, insn_idx, env->prog->len);
242 nfp_prog->verifier_meta = meta; 291 nfp_prog->verifier_meta = meta;
243 292
293 if (!nfp_bpf_supported_opcode(meta->insn.code)) {
294 pr_vlog(env, "instruction %#02x not supported\n",
295 meta->insn.code);
296 return -EINVAL;
297 }
298
244 if (meta->insn.src_reg >= MAX_BPF_REG || 299 if (meta->insn.src_reg >= MAX_BPF_REG ||
245 meta->insn.dst_reg >= MAX_BPF_REG) { 300 meta->insn.dst_reg >= MAX_BPF_REG) {
246 pr_vlog(env, "program uses extended registers - jit hardening?\n"); 301 pr_vlog(env, "program uses extended registers - jit hardening?\n");
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h
index 32ff46a00f70..6a6eb02b516e 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h
@@ -165,6 +165,7 @@ struct nfp_app {
165 void *priv; 165 void *priv;
166}; 166};
167 167
168bool __nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb);
168bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb); 169bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb);
169 170
170static inline int nfp_app_init(struct nfp_app *app) 171static inline int nfp_app_init(struct nfp_app *app)
@@ -326,6 +327,14 @@ static inline int nfp_app_xdp_offload(struct nfp_app *app, struct nfp_net *nn,
326 return app->type->xdp_offload(app, nn, prog); 327 return app->type->xdp_offload(app, nn, prog);
327} 328}
328 329
330static inline bool __nfp_app_ctrl_tx(struct nfp_app *app, struct sk_buff *skb)
331{
332 trace_devlink_hwmsg(priv_to_devlink(app->pf), false, 0,
333 skb->data, skb->len);
334
335 return __nfp_ctrl_tx(app->ctrl, skb);
336}
337
329static inline bool nfp_app_ctrl_tx(struct nfp_app *app, struct sk_buff *skb) 338static inline bool nfp_app_ctrl_tx(struct nfp_app *app, struct sk_buff *skb)
330{ 339{
331 trace_devlink_hwmsg(priv_to_devlink(app->pf), false, 0, 340 trace_devlink_hwmsg(priv_to_devlink(app->pf), false, 0,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c
index 9ee3a3f60cc7..3f6952b66a49 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c
@@ -50,6 +50,11 @@ const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = {
50 [CMD_TGT_READ_SWAP_LE] = { 0x03, 0x40 }, 50 [CMD_TGT_READ_SWAP_LE] = { 0x03, 0x40 },
51}; 51};
52 52
53static bool unreg_is_imm(u16 reg)
54{
55 return (reg & UR_REG_IMM) == UR_REG_IMM;
56}
57
53u16 br_get_offset(u64 instr) 58u16 br_get_offset(u64 instr)
54{ 59{
55 u16 addr_lo, addr_hi; 60 u16 addr_lo, addr_hi;
@@ -80,6 +85,59 @@ void br_add_offset(u64 *instr, u16 offset)
80 br_set_offset(instr, addr + offset); 85 br_set_offset(instr, addr + offset);
81} 86}
82 87
88static bool immed_can_modify(u64 instr)
89{
90 if (FIELD_GET(OP_IMMED_INV, instr) ||
91 FIELD_GET(OP_IMMED_SHIFT, instr) ||
92 FIELD_GET(OP_IMMED_WIDTH, instr) != IMMED_WIDTH_ALL) {
93 pr_err("Can't decode/encode immed!\n");
94 return false;
95 }
96 return true;
97}
98
99u16 immed_get_value(u64 instr)
100{
101 u16 reg;
102
103 if (!immed_can_modify(instr))
104 return 0;
105
106 reg = FIELD_GET(OP_IMMED_A_SRC, instr);
107 if (!unreg_is_imm(reg))
108 reg = FIELD_GET(OP_IMMED_B_SRC, instr);
109
110 return (reg & 0xff) | FIELD_GET(OP_IMMED_IMM, instr);
111}
112
113void immed_set_value(u64 *instr, u16 immed)
114{
115 if (!immed_can_modify(*instr))
116 return;
117
118 if (unreg_is_imm(FIELD_GET(OP_IMMED_A_SRC, *instr))) {
119 *instr &= ~FIELD_PREP(OP_IMMED_A_SRC, 0xff);
120 *instr |= FIELD_PREP(OP_IMMED_A_SRC, immed & 0xff);
121 } else {
122 *instr &= ~FIELD_PREP(OP_IMMED_B_SRC, 0xff);
123 *instr |= FIELD_PREP(OP_IMMED_B_SRC, immed & 0xff);
124 }
125
126 *instr &= ~OP_IMMED_IMM;
127 *instr |= FIELD_PREP(OP_IMMED_IMM, immed >> 8);
128}
129
130void immed_add_value(u64 *instr, u16 offset)
131{
132 u16 val;
133
134 if (!immed_can_modify(*instr))
135 return;
136
137 val = immed_get_value(*instr);
138 immed_set_value(instr, val + offset);
139}
140
83static u16 nfp_swreg_to_unreg(swreg reg, bool is_dst) 141static u16 nfp_swreg_to_unreg(swreg reg, bool is_dst)
84{ 142{
85 bool lm_id, lm_dec = false; 143 bool lm_id, lm_dec = false;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
index 20e51cb60e69..5f9291db98e0 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
@@ -138,6 +138,10 @@ enum immed_shift {
138 IMMED_SHIFT_2B = 2, 138 IMMED_SHIFT_2B = 2,
139}; 139};
140 140
141u16 immed_get_value(u64 instr);
142void immed_set_value(u64 *instr, u16 immed);
143void immed_add_value(u64 *instr, u16 offset);
144
141#define OP_SHF_BASE 0x08000000000ULL 145#define OP_SHF_BASE 0x08000000000ULL
142#define OP_SHF_A_SRC 0x000000000ffULL 146#define OP_SHF_A_SRC 0x000000000ffULL
143#define OP_SHF_SC 0x00000000300ULL 147#define OP_SHF_SC 0x00000000300ULL
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 0e564cfabe7e..6f6e3d6fd935 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -839,6 +839,18 @@ static inline const char *nfp_net_name(struct nfp_net *nn)
839 return nn->dp.netdev ? nn->dp.netdev->name : "ctrl"; 839 return nn->dp.netdev ? nn->dp.netdev->name : "ctrl";
840} 840}
841 841
842static inline void nfp_ctrl_lock(struct nfp_net *nn)
843 __acquires(&nn->r_vecs[0].lock)
844{
845 spin_lock_bh(&nn->r_vecs[0].lock);
846}
847
848static inline void nfp_ctrl_unlock(struct nfp_net *nn)
849 __releases(&nn->r_vecs[0].lock)
850{
851 spin_unlock_bh(&nn->r_vecs[0].lock);
852}
853
842/* Globals */ 854/* Globals */
843extern const char nfp_driver_version[]; 855extern const char nfp_driver_version[];
844 856
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 07e0587dc14e..2b5cad3069a7 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1920,6 +1920,13 @@ err_free:
1920 return false; 1920 return false;
1921} 1921}
1922 1922
1923bool __nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb)
1924{
1925 struct nfp_net_r_vector *r_vec = &nn->r_vecs[0];
1926
1927 return nfp_ctrl_tx_one(nn, r_vec, skb, false);
1928}
1929
1923bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb) 1930bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb)
1924{ 1931{
1925 struct nfp_net_r_vector *r_vec = &nn->r_vecs[0]; 1932 struct nfp_net_r_vector *r_vec = &nn->r_vecs[0];
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5da18ebc9222..83e2349e1362 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -30,7 +30,7 @@
30#include <linux/ratelimit.h> 30#include <linux/ratelimit.h>
31#include <linux/uuid.h> 31#include <linux/uuid.h>
32#include <linux/semaphore.h> 32#include <linux/semaphore.h>
33#include <linux/bpf.h> 33#include <linux/error-injection.h>
34#include <asm/unaligned.h> 34#include <asm/unaligned.h>
35#include "ctree.h" 35#include "ctree.h"
36#include "disk-io.h" 36#include "disk-io.h"
@@ -3124,7 +3124,7 @@ recovery_tree_root:
3124 goto fail_block_groups; 3124 goto fail_block_groups;
3125 goto retry_root_backup; 3125 goto retry_root_backup;
3126} 3126}
3127BPF_ALLOW_ERROR_INJECTION(open_ctree); 3127ALLOW_ERROR_INJECTION(open_ctree, ERRNO);
3128 3128
3129static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) 3129static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
3130{ 3130{
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index fb1382893bfc..586bb06472bb 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -22,7 +22,7 @@
22#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/math64.h> 23#include <linux/math64.h>
24#include <linux/ratelimit.h> 24#include <linux/ratelimit.h>
25#include <linux/bpf.h> 25#include <linux/error-injection.h>
26#include "ctree.h" 26#include "ctree.h"
27#include "free-space-cache.h" 27#include "free-space-cache.h"
28#include "transaction.h" 28#include "transaction.h"
@@ -333,7 +333,7 @@ static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
333 333
334 return 0; 334 return 0;
335} 335}
336BPF_ALLOW_ERROR_INJECTION(io_ctl_init); 336ALLOW_ERROR_INJECTION(io_ctl_init, ERRNO);
337 337
338static void io_ctl_free(struct btrfs_io_ctl *io_ctl) 338static void io_ctl_free(struct btrfs_io_ctl *io_ctl)
339{ 339{
diff --git a/include/asm-generic/error-injection.h b/include/asm-generic/error-injection.h
new file mode 100644
index 000000000000..296c65442f00
--- /dev/null
+++ b/include/asm-generic/error-injection.h
@@ -0,0 +1,35 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _ASM_GENERIC_ERROR_INJECTION_H
3#define _ASM_GENERIC_ERROR_INJECTION_H
4
5#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
6enum {
7 EI_ETYPE_NONE, /* Dummy value for undefined case */
8 EI_ETYPE_NULL, /* Return NULL if failure */
9 EI_ETYPE_ERRNO, /* Return -ERRNO if failure */
10 EI_ETYPE_ERRNO_NULL, /* Return -ERRNO or NULL if failure */
11};
12
13struct error_injection_entry {
14 unsigned long addr;
15 int etype;
16};
17
18#ifdef CONFIG_FUNCTION_ERROR_INJECTION
19/*
20 * Whitelist ganerating macro. Specify functions which can be
21 * error-injectable using this macro.
22 */
23#define ALLOW_ERROR_INJECTION(fname, _etype) \
24static struct error_injection_entry __used \
25 __attribute__((__section__("_error_injection_whitelist"))) \
26 _eil_addr_##fname = { \
27 .addr = (unsigned long)fname, \
28 .etype = EI_ETYPE_##_etype, \
29 };
30#else
31#define ALLOW_ERROR_INJECTION(fname, _etype)
32#endif
33#endif
34
35#endif /* _ASM_GENERIC_ERROR_INJECTION_H */
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index a2e8582d094a..ebe544e048cd 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -136,13 +136,13 @@
136#define KPROBE_BLACKLIST() 136#define KPROBE_BLACKLIST()
137#endif 137#endif
138 138
139#ifdef CONFIG_BPF_KPROBE_OVERRIDE 139#ifdef CONFIG_FUNCTION_ERROR_INJECTION
140#define ERROR_INJECT_LIST() . = ALIGN(8); \ 140#define ERROR_INJECT_WHITELIST() STRUCT_ALIGN(); \
141 VMLINUX_SYMBOL(__start_kprobe_error_inject_list) = .; \ 141 VMLINUX_SYMBOL(__start_error_injection_whitelist) = .;\
142 KEEP(*(_kprobe_error_inject_list)) \ 142 KEEP(*(_error_injection_whitelist)) \
143 VMLINUX_SYMBOL(__stop_kprobe_error_inject_list) = .; 143 VMLINUX_SYMBOL(__stop_error_injection_whitelist) = .;
144#else 144#else
145#define ERROR_INJECT_LIST() 145#define ERROR_INJECT_WHITELIST()
146#endif 146#endif
147 147
148#ifdef CONFIG_EVENT_TRACING 148#ifdef CONFIG_EVENT_TRACING
@@ -573,7 +573,7 @@
573 FTRACE_EVENTS() \ 573 FTRACE_EVENTS() \
574 TRACE_SYSCALLS() \ 574 TRACE_SYSCALLS() \
575 KPROBE_BLACKLIST() \ 575 KPROBE_BLACKLIST() \
576 ERROR_INJECT_LIST() \ 576 ERROR_INJECT_WHITELIST() \
577 MEM_DISCARD(init.rodata) \ 577 MEM_DISCARD(init.rodata) \
578 CLK_OF_TABLES() \ 578 CLK_OF_TABLES() \
579 RESERVEDMEM_OF_TABLES() \ 579 RESERVEDMEM_OF_TABLES() \
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 44f26f6df8fc..5c2c104dc2c5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -25,6 +25,7 @@ struct bpf_map;
25/* map is generic key/value storage optionally accesible by eBPF programs */ 25/* map is generic key/value storage optionally accesible by eBPF programs */
26struct bpf_map_ops { 26struct bpf_map_ops {
27 /* funcs callable from userspace (via syscall) */ 27 /* funcs callable from userspace (via syscall) */
28 int (*map_alloc_check)(union bpf_attr *attr);
28 struct bpf_map *(*map_alloc)(union bpf_attr *attr); 29 struct bpf_map *(*map_alloc)(union bpf_attr *attr);
29 void (*map_release)(struct bpf_map *map, struct file *map_file); 30 void (*map_release)(struct bpf_map *map, struct file *map_file);
30 void (*map_free)(struct bpf_map *map); 31 void (*map_free)(struct bpf_map *map);
@@ -73,6 +74,33 @@ struct bpf_map {
73 char name[BPF_OBJ_NAME_LEN]; 74 char name[BPF_OBJ_NAME_LEN];
74}; 75};
75 76
77struct bpf_offloaded_map;
78
79struct bpf_map_dev_ops {
80 int (*map_get_next_key)(struct bpf_offloaded_map *map,
81 void *key, void *next_key);
82 int (*map_lookup_elem)(struct bpf_offloaded_map *map,
83 void *key, void *value);
84 int (*map_update_elem)(struct bpf_offloaded_map *map,
85 void *key, void *value, u64 flags);
86 int (*map_delete_elem)(struct bpf_offloaded_map *map, void *key);
87};
88
89struct bpf_offloaded_map {
90 struct bpf_map map;
91 struct net_device *netdev;
92 const struct bpf_map_dev_ops *dev_ops;
93 void *dev_priv;
94 struct list_head offloads;
95};
96
97static inline struct bpf_offloaded_map *map_to_offmap(struct bpf_map *map)
98{
99 return container_of(map, struct bpf_offloaded_map, map);
100}
101
102extern const struct bpf_map_ops bpf_map_offload_ops;
103
76/* function argument constraints */ 104/* function argument constraints */
77enum bpf_arg_type { 105enum bpf_arg_type {
78 ARG_DONTCARE = 0, /* unused argument in helper function */ 106 ARG_DONTCARE = 0, /* unused argument in helper function */
@@ -199,7 +227,7 @@ struct bpf_prog_offload_ops {
199 int insn_idx, int prev_insn_idx); 227 int insn_idx, int prev_insn_idx);
200}; 228};
201 229
202struct bpf_dev_offload { 230struct bpf_prog_offload {
203 struct bpf_prog *prog; 231 struct bpf_prog *prog;
204 struct net_device *netdev; 232 struct net_device *netdev;
205 void *dev_priv; 233 void *dev_priv;
@@ -229,7 +257,7 @@ struct bpf_prog_aux {
229#ifdef CONFIG_SECURITY 257#ifdef CONFIG_SECURITY
230 void *security; 258 void *security;
231#endif 259#endif
232 struct bpf_dev_offload *offload; 260 struct bpf_prog_offload *offload;
233 union { 261 union {
234 struct work_struct work; 262 struct work_struct work;
235 struct rcu_head rcu; 263 struct rcu_head rcu;
@@ -368,6 +396,7 @@ int __bpf_prog_charge(struct user_struct *user, u32 pages);
368void __bpf_prog_uncharge(struct user_struct *user, u32 pages); 396void __bpf_prog_uncharge(struct user_struct *user, u32 pages);
369 397
370void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock); 398void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock);
399void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock);
371 400
372struct bpf_map *bpf_map_get_with_uref(u32 ufd); 401struct bpf_map *bpf_map_get_with_uref(u32 ufd);
373struct bpf_map *__bpf_map_get(struct fd f); 402struct bpf_map *__bpf_map_get(struct fd f);
@@ -377,6 +406,7 @@ void bpf_map_put(struct bpf_map *map);
377int bpf_map_precharge_memlock(u32 pages); 406int bpf_map_precharge_memlock(u32 pages);
378void *bpf_map_area_alloc(size_t size, int numa_node); 407void *bpf_map_area_alloc(size_t size, int numa_node);
379void bpf_map_area_free(void *base); 408void bpf_map_area_free(void *base);
409void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
380 410
381extern int sysctl_unprivileged_bpf_disabled; 411extern int sysctl_unprivileged_bpf_disabled;
382 412
@@ -554,6 +584,15 @@ void bpf_prog_offload_destroy(struct bpf_prog *prog);
554int bpf_prog_offload_info_fill(struct bpf_prog_info *info, 584int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
555 struct bpf_prog *prog); 585 struct bpf_prog *prog);
556 586
587int bpf_map_offload_lookup_elem(struct bpf_map *map, void *key, void *value);
588int bpf_map_offload_update_elem(struct bpf_map *map,
589 void *key, void *value, u64 flags);
590int bpf_map_offload_delete_elem(struct bpf_map *map, void *key);
591int bpf_map_offload_get_next_key(struct bpf_map *map,
592 void *key, void *next_key);
593
594bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map);
595
557#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) 596#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
558int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); 597int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
559 598
@@ -561,6 +600,14 @@ static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux)
561{ 600{
562 return aux->offload_requested; 601 return aux->offload_requested;
563} 602}
603
604static inline bool bpf_map_is_dev_bound(struct bpf_map *map)
605{
606 return unlikely(map->ops == &bpf_map_offload_ops);
607}
608
609struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr);
610void bpf_map_offload_map_free(struct bpf_map *map);
564#else 611#else
565static inline int bpf_prog_offload_init(struct bpf_prog *prog, 612static inline int bpf_prog_offload_init(struct bpf_prog *prog,
566 union bpf_attr *attr) 613 union bpf_attr *attr)
@@ -572,6 +619,20 @@ static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux)
572{ 619{
573 return false; 620 return false;
574} 621}
622
623static inline bool bpf_map_is_dev_bound(struct bpf_map *map)
624{
625 return false;
626}
627
628static inline struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
629{
630 return ERR_PTR(-EOPNOTSUPP);
631}
632
633static inline void bpf_map_offload_map_free(struct bpf_map *map)
634{
635}
575#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ 636#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
576 637
577#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_INET) 638#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_INET)
@@ -613,15 +674,4 @@ extern const struct bpf_func_proto bpf_sock_map_update_proto;
613void bpf_user_rnd_init_once(void); 674void bpf_user_rnd_init_once(void);
614u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); 675u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
615 676
616#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
617#ifdef CONFIG_BPF_KPROBE_OVERRIDE
618#define BPF_ALLOW_ERROR_INJECTION(fname) \
619static unsigned long __used \
620 __attribute__((__section__("_kprobe_error_inject_list"))) \
621 _eil_addr_##fname = (unsigned long)fname;
622#else
623#define BPF_ALLOW_ERROR_INJECTION(fname)
624#endif
625#endif
626
627#endif /* _LINUX_BPF_H */ 677#endif /* _LINUX_BPF_H */
diff --git a/include/linux/error-injection.h b/include/linux/error-injection.h
new file mode 100644
index 000000000000..280c61ecbf20
--- /dev/null
+++ b/include/linux/error-injection.h
@@ -0,0 +1,27 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _LINUX_ERROR_INJECTION_H
3#define _LINUX_ERROR_INJECTION_H
4
5#ifdef CONFIG_FUNCTION_ERROR_INJECTION
6
7#include <asm/error-injection.h>
8
9extern bool within_error_injection_list(unsigned long addr);
10extern int get_injectable_error_type(unsigned long addr);
11
12#else /* !CONFIG_FUNCTION_ERROR_INJECTION */
13
14#include <asm-generic/error-injection.h>
15static inline bool within_error_injection_list(unsigned long addr)
16{
17 return false;
18}
19
20static inline int get_injectable_error_type(unsigned long addr)
21{
22 return EI_ETYPE_NONE;
23}
24
25#endif
26
27#endif /* _LINUX_ERROR_INJECTION_H */
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 963fd364f3d6..9440a2fc8893 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -271,7 +271,6 @@ extern bool arch_kprobe_on_func_entry(unsigned long offset);
271extern bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset); 271extern bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset);
272 272
273extern bool within_kprobe_blacklist(unsigned long addr); 273extern bool within_kprobe_blacklist(unsigned long addr);
274extern bool within_kprobe_error_injection_list(unsigned long addr);
275 274
276struct kprobe_insn_cache { 275struct kprobe_insn_cache {
277 struct mutex mutex; 276 struct mutex mutex;
diff --git a/include/linux/module.h b/include/linux/module.h
index 548fa09fa806..9642d3116718 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -19,6 +19,7 @@
19#include <linux/jump_label.h> 19#include <linux/jump_label.h>
20#include <linux/export.h> 20#include <linux/export.h>
21#include <linux/rbtree_latch.h> 21#include <linux/rbtree_latch.h>
22#include <linux/error-injection.h>
22 23
23#include <linux/percpu.h> 24#include <linux/percpu.h>
24#include <asm/module.h> 25#include <asm/module.h>
@@ -476,9 +477,9 @@ struct module {
476 unsigned int num_ctors; 477 unsigned int num_ctors;
477#endif 478#endif
478 479
479#ifdef CONFIG_BPF_KPROBE_OVERRIDE 480#ifdef CONFIG_FUNCTION_ERROR_INJECTION
480 unsigned int num_kprobe_ei_funcs; 481 struct error_injection_entry *ei_funcs;
481 unsigned long *kprobe_ei_funcs; 482 unsigned int num_ei_funcs;
482#endif 483#endif
483} ____cacheline_aligned __randomize_layout; 484} ____cacheline_aligned __randomize_layout;
484#ifndef MODULE_ARCH_INIT 485#ifndef MODULE_ARCH_INIT
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6d95477b962c..ed0799a12bf2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -805,6 +805,8 @@ enum bpf_netdev_command {
805 BPF_OFFLOAD_VERIFIER_PREP, 805 BPF_OFFLOAD_VERIFIER_PREP,
806 BPF_OFFLOAD_TRANSLATE, 806 BPF_OFFLOAD_TRANSLATE,
807 BPF_OFFLOAD_DESTROY, 807 BPF_OFFLOAD_DESTROY,
808 BPF_OFFLOAD_MAP_ALLOC,
809 BPF_OFFLOAD_MAP_FREE,
808}; 810};
809 811
810struct bpf_prog_offload_ops; 812struct bpf_prog_offload_ops;
@@ -835,6 +837,10 @@ struct netdev_bpf {
835 struct { 837 struct {
836 struct bpf_prog *prog; 838 struct bpf_prog *prog;
837 } offload; 839 } offload;
840 /* BPF_OFFLOAD_MAP_ALLOC, BPF_OFFLOAD_MAP_FREE */
841 struct {
842 struct bpf_offloaded_map *offmap;
843 };
838 }; 844 };
839}; 845};
840 846
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 405317f9c064..7c2259e8bc54 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -245,6 +245,7 @@ union bpf_attr {
245 * BPF_F_NUMA_NODE is set). 245 * BPF_F_NUMA_NODE is set).
246 */ 246 */
247 char map_name[BPF_OBJ_NAME_LEN]; 247 char map_name[BPF_OBJ_NAME_LEN];
248 __u32 map_ifindex; /* ifindex of netdev to create on */
248 }; 249 };
249 250
250 struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ 251 struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -899,7 +900,7 @@ struct xdp_md {
899 __u32 data; 900 __u32 data;
900 __u32 data_end; 901 __u32 data_end;
901 __u32 data_meta; 902 __u32 data_meta;
902 /* Below access go though struct xdp_rxq_info */ 903 /* Below access go through struct xdp_rxq_info */
903 __u32 ingress_ifindex; /* rxq->dev->ifindex */ 904 __u32 ingress_ifindex; /* rxq->dev->ifindex */
904 __u32 rx_queue_index; /* rxq->queue_index */ 905 __u32 rx_queue_index; /* rxq->queue_index */
905}; 906};
diff --git a/kernel/Makefile b/kernel/Makefile
index 172d151d429c..f85ae5dfa474 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -81,6 +81,7 @@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
81obj-$(CONFIG_GCOV_KERNEL) += gcov/ 81obj-$(CONFIG_GCOV_KERNEL) += gcov/
82obj-$(CONFIG_KCOV) += kcov.o 82obj-$(CONFIG_KCOV) += kcov.o
83obj-$(CONFIG_KPROBES) += kprobes.o 83obj-$(CONFIG_KPROBES) += kprobes.o
84obj-$(CONFIG_FAIL_FUNCTION) += fail_function.o
84obj-$(CONFIG_KGDB) += debug/ 85obj-$(CONFIG_KGDB) += debug/
85obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o 86obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
86obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o 87obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index ce5b669003b2..fbfdada6caee 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -94,13 +94,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
94 if (!cmap) 94 if (!cmap)
95 return ERR_PTR(-ENOMEM); 95 return ERR_PTR(-ENOMEM);
96 96
97 /* mandatory map attributes */ 97 bpf_map_init_from_attr(&cmap->map, attr);
98 cmap->map.map_type = attr->map_type;
99 cmap->map.key_size = attr->key_size;
100 cmap->map.value_size = attr->value_size;
101 cmap->map.max_entries = attr->max_entries;
102 cmap->map.map_flags = attr->map_flags;
103 cmap->map.numa_node = bpf_map_attr_numa_node(attr);
104 98
105 /* Pre-limit array size based on NR_CPUS, not final CPU check */ 99 /* Pre-limit array size based on NR_CPUS, not final CPU check */
106 if (cmap->map.max_entries > NR_CPUS) { 100 if (cmap->map.max_entries > NR_CPUS) {
@@ -143,7 +137,7 @@ free_cmap:
143 return ERR_PTR(err); 137 return ERR_PTR(err);
144} 138}
145 139
146void __cpu_map_queue_destructor(void *ptr) 140static void __cpu_map_queue_destructor(void *ptr)
147{ 141{
148 /* The tear-down procedure should have made sure that queue is 142 /* The tear-down procedure should have made sure that queue is
149 * empty. See __cpu_map_entry_replace() and work-queue 143 * empty. See __cpu_map_entry_replace() and work-queue
@@ -222,8 +216,8 @@ static struct xdp_pkt *convert_to_xdp_pkt(struct xdp_buff *xdp)
222 return xdp_pkt; 216 return xdp_pkt;
223} 217}
224 218
225struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu, 219static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
226 struct xdp_pkt *xdp_pkt) 220 struct xdp_pkt *xdp_pkt)
227{ 221{
228 unsigned int frame_size; 222 unsigned int frame_size;
229 void *pkt_data_start; 223 void *pkt_data_start;
@@ -337,7 +331,8 @@ static int cpu_map_kthread_run(void *data)
337 return 0; 331 return 0;
338} 332}
339 333
340struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu, int map_id) 334static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu,
335 int map_id)
341{ 336{
342 gfp_t gfp = GFP_ATOMIC|__GFP_NOWARN; 337 gfp_t gfp = GFP_ATOMIC|__GFP_NOWARN;
343 struct bpf_cpu_map_entry *rcpu; 338 struct bpf_cpu_map_entry *rcpu;
@@ -395,7 +390,7 @@ free_rcu:
395 return NULL; 390 return NULL;
396} 391}
397 392
398void __cpu_map_entry_free(struct rcu_head *rcu) 393static void __cpu_map_entry_free(struct rcu_head *rcu)
399{ 394{
400 struct bpf_cpu_map_entry *rcpu; 395 struct bpf_cpu_map_entry *rcpu;
401 int cpu; 396 int cpu;
@@ -438,8 +433,8 @@ void __cpu_map_entry_free(struct rcu_head *rcu)
438 * cpu_map_kthread_stop, which waits for an RCU graze period before 433 * cpu_map_kthread_stop, which waits for an RCU graze period before
439 * stopping kthread, emptying the queue. 434 * stopping kthread, emptying the queue.
440 */ 435 */
441void __cpu_map_entry_replace(struct bpf_cpu_map *cmap, 436static void __cpu_map_entry_replace(struct bpf_cpu_map *cmap,
442 u32 key_cpu, struct bpf_cpu_map_entry *rcpu) 437 u32 key_cpu, struct bpf_cpu_map_entry *rcpu)
443{ 438{
444 struct bpf_cpu_map_entry *old_rcpu; 439 struct bpf_cpu_map_entry *old_rcpu;
445 440
@@ -451,7 +446,7 @@ void __cpu_map_entry_replace(struct bpf_cpu_map *cmap,
451 } 446 }
452} 447}
453 448
454int cpu_map_delete_elem(struct bpf_map *map, void *key) 449static int cpu_map_delete_elem(struct bpf_map *map, void *key)
455{ 450{
456 struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); 451 struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
457 u32 key_cpu = *(u32 *)key; 452 u32 key_cpu = *(u32 *)key;
@@ -464,8 +459,8 @@ int cpu_map_delete_elem(struct bpf_map *map, void *key)
464 return 0; 459 return 0;
465} 460}
466 461
467int cpu_map_update_elem(struct bpf_map *map, void *key, void *value, 462static int cpu_map_update_elem(struct bpf_map *map, void *key, void *value,
468 u64 map_flags) 463 u64 map_flags)
469{ 464{
470 struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); 465 struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
471 struct bpf_cpu_map_entry *rcpu; 466 struct bpf_cpu_map_entry *rcpu;
@@ -502,7 +497,7 @@ int cpu_map_update_elem(struct bpf_map *map, void *key, void *value,
502 return 0; 497 return 0;
503} 498}
504 499
505void cpu_map_free(struct bpf_map *map) 500static void cpu_map_free(struct bpf_map *map)
506{ 501{
507 struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); 502 struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
508 int cpu; 503 int cpu;
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index ebdef54bf7df..565f9ece9115 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -93,13 +93,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
93 if (!dtab) 93 if (!dtab)
94 return ERR_PTR(-ENOMEM); 94 return ERR_PTR(-ENOMEM);
95 95
96 /* mandatory map attributes */ 96 bpf_map_init_from_attr(&dtab->map, attr);
97 dtab->map.map_type = attr->map_type;
98 dtab->map.key_size = attr->key_size;
99 dtab->map.value_size = attr->value_size;
100 dtab->map.max_entries = attr->max_entries;
101 dtab->map.map_flags = attr->map_flags;
102 dtab->map.numa_node = bpf_map_attr_numa_node(attr);
103 97
104 /* make sure page count doesn't overflow */ 98 /* make sure page count doesn't overflow */
105 cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *); 99 cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *);
diff --git a/kernel/bpf/disasm.h b/kernel/bpf/disasm.h
index e0857d016f89..266fe8ee542b 100644
--- a/kernel/bpf/disasm.h
+++ b/kernel/bpf/disasm.h
@@ -29,8 +29,8 @@ extern const char *const bpf_class_string[8];
29 29
30const char *func_id_name(int id); 30const char *func_id_name(int id);
31 31
32typedef void (*bpf_insn_print_t)(struct bpf_verifier_env *env, 32typedef __printf(2, 3) void (*bpf_insn_print_t)(struct bpf_verifier_env *env,
33 const char *, ...); 33 const char *, ...);
34typedef const char *(*bpf_insn_revmap_call_t)(void *private_data, 34typedef const char *(*bpf_insn_revmap_call_t)(void *private_data,
35 const struct bpf_insn *insn); 35 const struct bpf_insn *insn);
36typedef const char *(*bpf_insn_print_imm_t)(void *private_data, 36typedef const char *(*bpf_insn_print_imm_t)(void *private_data,
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 3905d4bc5b80..b76828f23b49 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -227,7 +227,7 @@ static int alloc_extra_elems(struct bpf_htab *htab)
227} 227}
228 228
229/* Called from syscall */ 229/* Called from syscall */
230static struct bpf_map *htab_map_alloc(union bpf_attr *attr) 230static int htab_map_alloc_check(union bpf_attr *attr)
231{ 231{
232 bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH || 232 bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
233 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); 233 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
@@ -241,9 +241,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
241 bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); 241 bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
242 bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); 242 bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
243 int numa_node = bpf_map_attr_numa_node(attr); 243 int numa_node = bpf_map_attr_numa_node(attr);
244 struct bpf_htab *htab;
245 int err, i;
246 u64 cost;
247 244
248 BUILD_BUG_ON(offsetof(struct htab_elem, htab) != 245 BUILD_BUG_ON(offsetof(struct htab_elem, htab) !=
249 offsetof(struct htab_elem, hash_node.pprev)); 246 offsetof(struct htab_elem, hash_node.pprev));
@@ -254,40 +251,68 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
254 /* LRU implementation is much complicated than other 251 /* LRU implementation is much complicated than other
255 * maps. Hence, limit to CAP_SYS_ADMIN for now. 252 * maps. Hence, limit to CAP_SYS_ADMIN for now.
256 */ 253 */
257 return ERR_PTR(-EPERM); 254 return -EPERM;
258 255
259 if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK) 256 if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK)
260 /* reserved bits should not be used */ 257 /* reserved bits should not be used */
261 return ERR_PTR(-EINVAL); 258 return -EINVAL;
262 259
263 if (!lru && percpu_lru) 260 if (!lru && percpu_lru)
264 return ERR_PTR(-EINVAL); 261 return -EINVAL;
265 262
266 if (lru && !prealloc) 263 if (lru && !prealloc)
267 return ERR_PTR(-ENOTSUPP); 264 return -ENOTSUPP;
268 265
269 if (numa_node != NUMA_NO_NODE && (percpu || percpu_lru)) 266 if (numa_node != NUMA_NO_NODE && (percpu || percpu_lru))
270 return ERR_PTR(-EINVAL); 267 return -EINVAL;
268
269 /* check sanity of attributes.
270 * value_size == 0 may be allowed in the future to use map as a set
271 */
272 if (attr->max_entries == 0 || attr->key_size == 0 ||
273 attr->value_size == 0)
274 return -EINVAL;
275
276 if (attr->key_size > MAX_BPF_STACK)
277 /* eBPF programs initialize keys on stack, so they cannot be
278 * larger than max stack size
279 */
280 return -E2BIG;
281
282 if (attr->value_size >= KMALLOC_MAX_SIZE -
283 MAX_BPF_STACK - sizeof(struct htab_elem))
284 /* if value_size is bigger, the user space won't be able to
285 * access the elements via bpf syscall. This check also makes
286 * sure that the elem_size doesn't overflow and it's
287 * kmalloc-able later in htab_map_update_elem()
288 */
289 return -E2BIG;
290
291 return 0;
292}
293
294static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
295{
296 bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
297 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
298 bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH ||
299 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
300 /* percpu_lru means each cpu has its own LRU list.
301 * it is different from BPF_MAP_TYPE_PERCPU_HASH where
302 * the map's value itself is percpu. percpu_lru has
303 * nothing to do with the map's value.
304 */
305 bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
306 bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
307 struct bpf_htab *htab;
308 int err, i;
309 u64 cost;
271 310
272 htab = kzalloc(sizeof(*htab), GFP_USER); 311 htab = kzalloc(sizeof(*htab), GFP_USER);
273 if (!htab) 312 if (!htab)
274 return ERR_PTR(-ENOMEM); 313 return ERR_PTR(-ENOMEM);
275 314
276 /* mandatory map attributes */ 315 bpf_map_init_from_attr(&htab->map, attr);
277 htab->map.map_type = attr->map_type;
278 htab->map.key_size = attr->key_size;
279 htab->map.value_size = attr->value_size;
280 htab->map.max_entries = attr->max_entries;
281 htab->map.map_flags = attr->map_flags;
282 htab->map.numa_node = numa_node;
283
284 /* check sanity of attributes.
285 * value_size == 0 may be allowed in the future to use map as a set
286 */
287 err = -EINVAL;
288 if (htab->map.max_entries == 0 || htab->map.key_size == 0 ||
289 htab->map.value_size == 0)
290 goto free_htab;
291 316
292 if (percpu_lru) { 317 if (percpu_lru) {
293 /* ensure each CPU's lru list has >=1 elements. 318 /* ensure each CPU's lru list has >=1 elements.
@@ -304,22 +329,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
304 /* hash table size must be power of 2 */ 329 /* hash table size must be power of 2 */
305 htab->n_buckets = roundup_pow_of_two(htab->map.max_entries); 330 htab->n_buckets = roundup_pow_of_two(htab->map.max_entries);
306 331
307 err = -E2BIG;
308 if (htab->map.key_size > MAX_BPF_STACK)
309 /* eBPF programs initialize keys on stack, so they cannot be
310 * larger than max stack size
311 */
312 goto free_htab;
313
314 if (htab->map.value_size >= KMALLOC_MAX_SIZE -
315 MAX_BPF_STACK - sizeof(struct htab_elem))
316 /* if value_size is bigger, the user space won't be able to
317 * access the elements via bpf syscall. This check also makes
318 * sure that the elem_size doesn't overflow and it's
319 * kmalloc-able later in htab_map_update_elem()
320 */
321 goto free_htab;
322
323 htab->elem_size = sizeof(struct htab_elem) + 332 htab->elem_size = sizeof(struct htab_elem) +
324 round_up(htab->map.key_size, 8); 333 round_up(htab->map.key_size, 8);
325 if (percpu) 334 if (percpu)
@@ -327,6 +336,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
327 else 336 else
328 htab->elem_size += round_up(htab->map.value_size, 8); 337 htab->elem_size += round_up(htab->map.value_size, 8);
329 338
339 err = -E2BIG;
330 /* prevent zero size kmalloc and check for u32 overflow */ 340 /* prevent zero size kmalloc and check for u32 overflow */
331 if (htab->n_buckets == 0 || 341 if (htab->n_buckets == 0 ||
332 htab->n_buckets > U32_MAX / sizeof(struct bucket)) 342 htab->n_buckets > U32_MAX / sizeof(struct bucket))
@@ -1143,6 +1153,7 @@ static void htab_map_free(struct bpf_map *map)
1143} 1153}
1144 1154
1145const struct bpf_map_ops htab_map_ops = { 1155const struct bpf_map_ops htab_map_ops = {
1156 .map_alloc_check = htab_map_alloc_check,
1146 .map_alloc = htab_map_alloc, 1157 .map_alloc = htab_map_alloc,
1147 .map_free = htab_map_free, 1158 .map_free = htab_map_free,
1148 .map_get_next_key = htab_map_get_next_key, 1159 .map_get_next_key = htab_map_get_next_key,
@@ -1153,6 +1164,7 @@ const struct bpf_map_ops htab_map_ops = {
1153}; 1164};
1154 1165
1155const struct bpf_map_ops htab_lru_map_ops = { 1166const struct bpf_map_ops htab_lru_map_ops = {
1167 .map_alloc_check = htab_map_alloc_check,
1156 .map_alloc = htab_map_alloc, 1168 .map_alloc = htab_map_alloc,
1157 .map_free = htab_map_free, 1169 .map_free = htab_map_free,
1158 .map_get_next_key = htab_map_get_next_key, 1170 .map_get_next_key = htab_map_get_next_key,
@@ -1236,6 +1248,7 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
1236} 1248}
1237 1249
1238const struct bpf_map_ops htab_percpu_map_ops = { 1250const struct bpf_map_ops htab_percpu_map_ops = {
1251 .map_alloc_check = htab_map_alloc_check,
1239 .map_alloc = htab_map_alloc, 1252 .map_alloc = htab_map_alloc,
1240 .map_free = htab_map_free, 1253 .map_free = htab_map_free,
1241 .map_get_next_key = htab_map_get_next_key, 1254 .map_get_next_key = htab_map_get_next_key,
@@ -1245,6 +1258,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
1245}; 1258};
1246 1259
1247const struct bpf_map_ops htab_lru_percpu_map_ops = { 1260const struct bpf_map_ops htab_lru_percpu_map_ops = {
1261 .map_alloc_check = htab_map_alloc_check,
1248 .map_alloc = htab_map_alloc, 1262 .map_alloc = htab_map_alloc,
1249 .map_free = htab_map_free, 1263 .map_free = htab_map_free,
1250 .map_get_next_key = htab_map_get_next_key, 1264 .map_get_next_key = htab_map_get_next_key,
@@ -1253,11 +1267,11 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
1253 .map_delete_elem = htab_lru_map_delete_elem, 1267 .map_delete_elem = htab_lru_map_delete_elem,
1254}; 1268};
1255 1269
1256static struct bpf_map *fd_htab_map_alloc(union bpf_attr *attr) 1270static int fd_htab_map_alloc_check(union bpf_attr *attr)
1257{ 1271{
1258 if (attr->value_size != sizeof(u32)) 1272 if (attr->value_size != sizeof(u32))
1259 return ERR_PTR(-EINVAL); 1273 return -EINVAL;
1260 return htab_map_alloc(attr); 1274 return htab_map_alloc_check(attr);
1261} 1275}
1262 1276
1263static void fd_htab_map_free(struct bpf_map *map) 1277static void fd_htab_map_free(struct bpf_map *map)
@@ -1328,7 +1342,7 @@ static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr)
1328 if (IS_ERR(inner_map_meta)) 1342 if (IS_ERR(inner_map_meta))
1329 return inner_map_meta; 1343 return inner_map_meta;
1330 1344
1331 map = fd_htab_map_alloc(attr); 1345 map = htab_map_alloc(attr);
1332 if (IS_ERR(map)) { 1346 if (IS_ERR(map)) {
1333 bpf_map_meta_free(inner_map_meta); 1347 bpf_map_meta_free(inner_map_meta);
1334 return map; 1348 return map;
@@ -1372,6 +1386,7 @@ static void htab_of_map_free(struct bpf_map *map)
1372} 1386}
1373 1387
1374const struct bpf_map_ops htab_of_maps_map_ops = { 1388const struct bpf_map_ops htab_of_maps_map_ops = {
1389 .map_alloc_check = fd_htab_map_alloc_check,
1375 .map_alloc = htab_of_map_alloc, 1390 .map_alloc = htab_of_map_alloc,
1376 .map_free = htab_of_map_free, 1391 .map_free = htab_of_map_free,
1377 .map_get_next_key = htab_map_get_next_key, 1392 .map_get_next_key = htab_map_get_next_key,
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 885e45479680..584e02227671 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -522,12 +522,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
522 return ERR_PTR(-ENOMEM); 522 return ERR_PTR(-ENOMEM);
523 523
524 /* copy mandatory map attributes */ 524 /* copy mandatory map attributes */
525 trie->map.map_type = attr->map_type; 525 bpf_map_init_from_attr(&trie->map, attr);
526 trie->map.key_size = attr->key_size;
527 trie->map.value_size = attr->value_size;
528 trie->map.max_entries = attr->max_entries;
529 trie->map.map_flags = attr->map_flags;
530 trie->map.numa_node = bpf_map_attr_numa_node(attr);
531 trie->data_size = attr->key_size - 526 trie->data_size = attr->key_size -
532 offsetof(struct bpf_lpm_trie_key, data); 527 offsetof(struct bpf_lpm_trie_key, data);
533 trie->max_prefixlen = trie->data_size * 8; 528 trie->max_prefixlen = trie->data_size * 8;
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 040d4e0edf3f..a88cebf368bf 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -24,15 +24,27 @@
24#include <linux/rtnetlink.h> 24#include <linux/rtnetlink.h>
25#include <linux/rwsem.h> 25#include <linux/rwsem.h>
26 26
27/* Protects bpf_prog_offload_devs and offload members of all progs. 27/* Protects bpf_prog_offload_devs, bpf_map_offload_devs and offload members
28 * of all progs.
28 * RTNL lock cannot be taken when holding this lock. 29 * RTNL lock cannot be taken when holding this lock.
29 */ 30 */
30static DECLARE_RWSEM(bpf_devs_lock); 31static DECLARE_RWSEM(bpf_devs_lock);
31static LIST_HEAD(bpf_prog_offload_devs); 32static LIST_HEAD(bpf_prog_offload_devs);
33static LIST_HEAD(bpf_map_offload_devs);
34
35static int bpf_dev_offload_check(struct net_device *netdev)
36{
37 if (!netdev)
38 return -EINVAL;
39 if (!netdev->netdev_ops->ndo_bpf)
40 return -EOPNOTSUPP;
41 return 0;
42}
32 43
33int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr) 44int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
34{ 45{
35 struct bpf_dev_offload *offload; 46 struct bpf_prog_offload *offload;
47 int err;
36 48
37 if (attr->prog_type != BPF_PROG_TYPE_SCHED_CLS && 49 if (attr->prog_type != BPF_PROG_TYPE_SCHED_CLS &&
38 attr->prog_type != BPF_PROG_TYPE_XDP) 50 attr->prog_type != BPF_PROG_TYPE_XDP)
@@ -49,12 +61,15 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
49 61
50 offload->netdev = dev_get_by_index(current->nsproxy->net_ns, 62 offload->netdev = dev_get_by_index(current->nsproxy->net_ns,
51 attr->prog_ifindex); 63 attr->prog_ifindex);
52 if (!offload->netdev) 64 err = bpf_dev_offload_check(offload->netdev);
53 goto err_free; 65 if (err)
66 goto err_maybe_put;
54 67
55 down_write(&bpf_devs_lock); 68 down_write(&bpf_devs_lock);
56 if (offload->netdev->reg_state != NETREG_REGISTERED) 69 if (offload->netdev->reg_state != NETREG_REGISTERED) {
70 err = -EINVAL;
57 goto err_unlock; 71 goto err_unlock;
72 }
58 prog->aux->offload = offload; 73 prog->aux->offload = offload;
59 list_add_tail(&offload->offloads, &bpf_prog_offload_devs); 74 list_add_tail(&offload->offloads, &bpf_prog_offload_devs);
60 dev_put(offload->netdev); 75 dev_put(offload->netdev);
@@ -63,16 +78,17 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
63 return 0; 78 return 0;
64err_unlock: 79err_unlock:
65 up_write(&bpf_devs_lock); 80 up_write(&bpf_devs_lock);
66 dev_put(offload->netdev); 81err_maybe_put:
67err_free: 82 if (offload->netdev)
83 dev_put(offload->netdev);
68 kfree(offload); 84 kfree(offload);
69 return -EINVAL; 85 return err;
70} 86}
71 87
72static int __bpf_offload_ndo(struct bpf_prog *prog, enum bpf_netdev_command cmd, 88static int __bpf_offload_ndo(struct bpf_prog *prog, enum bpf_netdev_command cmd,
73 struct netdev_bpf *data) 89 struct netdev_bpf *data)
74{ 90{
75 struct bpf_dev_offload *offload = prog->aux->offload; 91 struct bpf_prog_offload *offload = prog->aux->offload;
76 struct net_device *netdev; 92 struct net_device *netdev;
77 93
78 ASSERT_RTNL(); 94 ASSERT_RTNL();
@@ -80,8 +96,6 @@ static int __bpf_offload_ndo(struct bpf_prog *prog, enum bpf_netdev_command cmd,
80 if (!offload) 96 if (!offload)
81 return -ENODEV; 97 return -ENODEV;
82 netdev = offload->netdev; 98 netdev = offload->netdev;
83 if (!netdev->netdev_ops->ndo_bpf)
84 return -EOPNOTSUPP;
85 99
86 data->command = cmd; 100 data->command = cmd;
87 101
@@ -110,7 +124,7 @@ exit_unlock:
110int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env, 124int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
111 int insn_idx, int prev_insn_idx) 125 int insn_idx, int prev_insn_idx)
112{ 126{
113 struct bpf_dev_offload *offload; 127 struct bpf_prog_offload *offload;
114 int ret = -ENODEV; 128 int ret = -ENODEV;
115 129
116 down_read(&bpf_devs_lock); 130 down_read(&bpf_devs_lock);
@@ -124,7 +138,7 @@ int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
124 138
125static void __bpf_prog_offload_destroy(struct bpf_prog *prog) 139static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
126{ 140{
127 struct bpf_dev_offload *offload = prog->aux->offload; 141 struct bpf_prog_offload *offload = prog->aux->offload;
128 struct netdev_bpf data = {}; 142 struct netdev_bpf data = {};
129 143
130 data.offload.prog = prog; 144 data.offload.prog = prog;
@@ -238,11 +252,184 @@ int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
238const struct bpf_prog_ops bpf_offload_prog_ops = { 252const struct bpf_prog_ops bpf_offload_prog_ops = {
239}; 253};
240 254
255static int bpf_map_offload_ndo(struct bpf_offloaded_map *offmap,
256 enum bpf_netdev_command cmd)
257{
258 struct netdev_bpf data = {};
259 struct net_device *netdev;
260
261 ASSERT_RTNL();
262
263 data.command = cmd;
264 data.offmap = offmap;
265 /* Caller must make sure netdev is valid */
266 netdev = offmap->netdev;
267
268 return netdev->netdev_ops->ndo_bpf(netdev, &data);
269}
270
271struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
272{
273 struct net *net = current->nsproxy->net_ns;
274 struct bpf_offloaded_map *offmap;
275 int err;
276
277 if (!capable(CAP_SYS_ADMIN))
278 return ERR_PTR(-EPERM);
279 if (attr->map_type != BPF_MAP_TYPE_HASH)
280 return ERR_PTR(-EINVAL);
281
282 offmap = kzalloc(sizeof(*offmap), GFP_USER);
283 if (!offmap)
284 return ERR_PTR(-ENOMEM);
285
286 bpf_map_init_from_attr(&offmap->map, attr);
287
288 rtnl_lock();
289 down_write(&bpf_devs_lock);
290 offmap->netdev = __dev_get_by_index(net, attr->map_ifindex);
291 err = bpf_dev_offload_check(offmap->netdev);
292 if (err)
293 goto err_unlock;
294
295 err = bpf_map_offload_ndo(offmap, BPF_OFFLOAD_MAP_ALLOC);
296 if (err)
297 goto err_unlock;
298
299 list_add_tail(&offmap->offloads, &bpf_map_offload_devs);
300 up_write(&bpf_devs_lock);
301 rtnl_unlock();
302
303 return &offmap->map;
304
305err_unlock:
306 up_write(&bpf_devs_lock);
307 rtnl_unlock();
308 kfree(offmap);
309 return ERR_PTR(err);
310}
311
312static void __bpf_map_offload_destroy(struct bpf_offloaded_map *offmap)
313{
314 WARN_ON(bpf_map_offload_ndo(offmap, BPF_OFFLOAD_MAP_FREE));
315 /* Make sure BPF_MAP_GET_NEXT_ID can't find this dead map */
316 bpf_map_free_id(&offmap->map, true);
317 list_del_init(&offmap->offloads);
318 offmap->netdev = NULL;
319}
320
321void bpf_map_offload_map_free(struct bpf_map *map)
322{
323 struct bpf_offloaded_map *offmap = map_to_offmap(map);
324
325 rtnl_lock();
326 down_write(&bpf_devs_lock);
327 if (offmap->netdev)
328 __bpf_map_offload_destroy(offmap);
329 up_write(&bpf_devs_lock);
330 rtnl_unlock();
331
332 kfree(offmap);
333}
334
335int bpf_map_offload_lookup_elem(struct bpf_map *map, void *key, void *value)
336{
337 struct bpf_offloaded_map *offmap = map_to_offmap(map);
338 int ret = -ENODEV;
339
340 down_read(&bpf_devs_lock);
341 if (offmap->netdev)
342 ret = offmap->dev_ops->map_lookup_elem(offmap, key, value);
343 up_read(&bpf_devs_lock);
344
345 return ret;
346}
347
348int bpf_map_offload_update_elem(struct bpf_map *map,
349 void *key, void *value, u64 flags)
350{
351 struct bpf_offloaded_map *offmap = map_to_offmap(map);
352 int ret = -ENODEV;
353
354 if (unlikely(flags > BPF_EXIST))
355 return -EINVAL;
356
357 down_read(&bpf_devs_lock);
358 if (offmap->netdev)
359 ret = offmap->dev_ops->map_update_elem(offmap, key, value,
360 flags);
361 up_read(&bpf_devs_lock);
362
363 return ret;
364}
365
366int bpf_map_offload_delete_elem(struct bpf_map *map, void *key)
367{
368 struct bpf_offloaded_map *offmap = map_to_offmap(map);
369 int ret = -ENODEV;
370
371 down_read(&bpf_devs_lock);
372 if (offmap->netdev)
373 ret = offmap->dev_ops->map_delete_elem(offmap, key);
374 up_read(&bpf_devs_lock);
375
376 return ret;
377}
378
379int bpf_map_offload_get_next_key(struct bpf_map *map, void *key, void *next_key)
380{
381 struct bpf_offloaded_map *offmap = map_to_offmap(map);
382 int ret = -ENODEV;
383
384 down_read(&bpf_devs_lock);
385 if (offmap->netdev)
386 ret = offmap->dev_ops->map_get_next_key(offmap, key, next_key);
387 up_read(&bpf_devs_lock);
388
389 return ret;
390}
391
392bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map)
393{
394 struct bpf_offloaded_map *offmap;
395 struct bpf_prog_offload *offload;
396 bool ret;
397
398 if (!bpf_prog_is_dev_bound(prog->aux) || !bpf_map_is_dev_bound(map))
399 return false;
400
401 down_read(&bpf_devs_lock);
402 offload = prog->aux->offload;
403 offmap = map_to_offmap(map);
404
405 ret = offload && offload->netdev == offmap->netdev;
406 up_read(&bpf_devs_lock);
407
408 return ret;
409}
410
411static void bpf_offload_orphan_all_progs(struct net_device *netdev)
412{
413 struct bpf_prog_offload *offload, *tmp;
414
415 list_for_each_entry_safe(offload, tmp, &bpf_prog_offload_devs, offloads)
416 if (offload->netdev == netdev)
417 __bpf_prog_offload_destroy(offload->prog);
418}
419
420static void bpf_offload_orphan_all_maps(struct net_device *netdev)
421{
422 struct bpf_offloaded_map *offmap, *tmp;
423
424 list_for_each_entry_safe(offmap, tmp, &bpf_map_offload_devs, offloads)
425 if (offmap->netdev == netdev)
426 __bpf_map_offload_destroy(offmap);
427}
428
241static int bpf_offload_notification(struct notifier_block *notifier, 429static int bpf_offload_notification(struct notifier_block *notifier,
242 ulong event, void *ptr) 430 ulong event, void *ptr)
243{ 431{
244 struct net_device *netdev = netdev_notifier_info_to_dev(ptr); 432 struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
245 struct bpf_dev_offload *offload, *tmp;
246 433
247 ASSERT_RTNL(); 434 ASSERT_RTNL();
248 435
@@ -253,11 +440,8 @@ static int bpf_offload_notification(struct notifier_block *notifier,
253 break; 440 break;
254 441
255 down_write(&bpf_devs_lock); 442 down_write(&bpf_devs_lock);
256 list_for_each_entry_safe(offload, tmp, &bpf_prog_offload_devs, 443 bpf_offload_orphan_all_progs(netdev);
257 offloads) { 444 bpf_offload_orphan_all_maps(netdev);
258 if (offload->netdev == netdev)
259 __bpf_prog_offload_destroy(offload->prog);
260 }
261 up_write(&bpf_devs_lock); 445 up_write(&bpf_devs_lock);
262 break; 446 break;
263 default: 447 default:
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 079968680bc3..0314d1783d77 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -513,13 +513,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
513 if (!stab) 513 if (!stab)
514 return ERR_PTR(-ENOMEM); 514 return ERR_PTR(-ENOMEM);
515 515
516 /* mandatory map attributes */ 516 bpf_map_init_from_attr(&stab->map, attr);
517 stab->map.map_type = attr->map_type;
518 stab->map.key_size = attr->key_size;
519 stab->map.value_size = attr->value_size;
520 stab->map.max_entries = attr->max_entries;
521 stab->map.map_flags = attr->map_flags;
522 stab->map.numa_node = bpf_map_attr_numa_node(attr);
523 517
524 /* make sure page count doesn't overflow */ 518 /* make sure page count doesn't overflow */
525 cost = (u64) stab->map.max_entries * sizeof(struct sock *); 519 cost = (u64) stab->map.max_entries * sizeof(struct sock *);
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 6c63c2222ea8..b0ecf43f5894 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -88,14 +88,10 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
88 if (cost >= U32_MAX - PAGE_SIZE) 88 if (cost >= U32_MAX - PAGE_SIZE)
89 goto free_smap; 89 goto free_smap;
90 90
91 smap->map.map_type = attr->map_type; 91 bpf_map_init_from_attr(&smap->map, attr);
92 smap->map.key_size = attr->key_size;
93 smap->map.value_size = value_size; 92 smap->map.value_size = value_size;
94 smap->map.max_entries = attr->max_entries;
95 smap->map.map_flags = attr->map_flags;
96 smap->n_buckets = n_buckets; 93 smap->n_buckets = n_buckets;
97 smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; 94 smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
98 smap->map.numa_node = bpf_map_attr_numa_node(attr);
99 95
100 err = bpf_map_precharge_memlock(smap->map.pages); 96 err = bpf_map_precharge_memlock(smap->map.pages);
101 if (err) 97 if (err)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2bac0dc8baba..c691b9e972e3 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -94,18 +94,34 @@ static int check_uarg_tail_zero(void __user *uaddr,
94 return 0; 94 return 0;
95} 95}
96 96
97const struct bpf_map_ops bpf_map_offload_ops = {
98 .map_alloc = bpf_map_offload_map_alloc,
99 .map_free = bpf_map_offload_map_free,
100};
101
97static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 102static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
98{ 103{
104 const struct bpf_map_ops *ops;
99 struct bpf_map *map; 105 struct bpf_map *map;
106 int err;
100 107
101 if (attr->map_type >= ARRAY_SIZE(bpf_map_types) || 108 if (attr->map_type >= ARRAY_SIZE(bpf_map_types))
102 !bpf_map_types[attr->map_type]) 109 return ERR_PTR(-EINVAL);
110 ops = bpf_map_types[attr->map_type];
111 if (!ops)
103 return ERR_PTR(-EINVAL); 112 return ERR_PTR(-EINVAL);
104 113
105 map = bpf_map_types[attr->map_type]->map_alloc(attr); 114 if (ops->map_alloc_check) {
115 err = ops->map_alloc_check(attr);
116 if (err)
117 return ERR_PTR(err);
118 }
119 if (attr->map_ifindex)
120 ops = &bpf_map_offload_ops;
121 map = ops->map_alloc(attr);
106 if (IS_ERR(map)) 122 if (IS_ERR(map))
107 return map; 123 return map;
108 map->ops = bpf_map_types[attr->map_type]; 124 map->ops = ops;
109 map->map_type = attr->map_type; 125 map->map_type = attr->map_type;
110 return map; 126 return map;
111} 127}
@@ -134,6 +150,16 @@ void bpf_map_area_free(void *area)
134 kvfree(area); 150 kvfree(area);
135} 151}
136 152
153void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
154{
155 map->map_type = attr->map_type;
156 map->key_size = attr->key_size;
157 map->value_size = attr->value_size;
158 map->max_entries = attr->max_entries;
159 map->map_flags = attr->map_flags;
160 map->numa_node = bpf_map_attr_numa_node(attr);
161}
162
137int bpf_map_precharge_memlock(u32 pages) 163int bpf_map_precharge_memlock(u32 pages)
138{ 164{
139 struct user_struct *user = get_current_user(); 165 struct user_struct *user = get_current_user();
@@ -189,16 +215,25 @@ static int bpf_map_alloc_id(struct bpf_map *map)
189 return id > 0 ? 0 : id; 215 return id > 0 ? 0 : id;
190} 216}
191 217
192static void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock) 218void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
193{ 219{
194 unsigned long flags; 220 unsigned long flags;
195 221
222 /* Offloaded maps are removed from the IDR store when their device
223 * disappears - even if someone holds an fd to them they are unusable,
224 * the memory is gone, all ops will fail; they are simply waiting for
225 * refcnt to drop to be freed.
226 */
227 if (!map->id)
228 return;
229
196 if (do_idr_lock) 230 if (do_idr_lock)
197 spin_lock_irqsave(&map_idr_lock, flags); 231 spin_lock_irqsave(&map_idr_lock, flags);
198 else 232 else
199 __acquire(&map_idr_lock); 233 __acquire(&map_idr_lock);
200 234
201 idr_remove(&map_idr, map->id); 235 idr_remove(&map_idr, map->id);
236 map->id = 0;
202 237
203 if (do_idr_lock) 238 if (do_idr_lock)
204 spin_unlock_irqrestore(&map_idr_lock, flags); 239 spin_unlock_irqrestore(&map_idr_lock, flags);
@@ -378,7 +413,7 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
378 return 0; 413 return 0;
379} 414}
380 415
381#define BPF_MAP_CREATE_LAST_FIELD map_name 416#define BPF_MAP_CREATE_LAST_FIELD map_ifindex
382/* called via syscall */ 417/* called via syscall */
383static int map_create(union bpf_attr *attr) 418static int map_create(union bpf_attr *attr)
384{ 419{
@@ -566,8 +601,10 @@ static int map_lookup_elem(union bpf_attr *attr)
566 if (!value) 601 if (!value)
567 goto free_key; 602 goto free_key;
568 603
569 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 604 if (bpf_map_is_dev_bound(map)) {
570 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 605 err = bpf_map_offload_lookup_elem(map, key, value);
606 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
607 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
571 err = bpf_percpu_hash_copy(map, key, value); 608 err = bpf_percpu_hash_copy(map, key, value);
572 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 609 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
573 err = bpf_percpu_array_copy(map, key, value); 610 err = bpf_percpu_array_copy(map, key, value);
@@ -654,7 +691,10 @@ static int map_update_elem(union bpf_attr *attr)
654 goto free_value; 691 goto free_value;
655 692
656 /* Need to create a kthread, thus must support schedule */ 693 /* Need to create a kthread, thus must support schedule */
657 if (map->map_type == BPF_MAP_TYPE_CPUMAP) { 694 if (bpf_map_is_dev_bound(map)) {
695 err = bpf_map_offload_update_elem(map, key, value, attr->flags);
696 goto out;
697 } else if (map->map_type == BPF_MAP_TYPE_CPUMAP) {
658 err = map->ops->map_update_elem(map, key, value, attr->flags); 698 err = map->ops->map_update_elem(map, key, value, attr->flags);
659 goto out; 699 goto out;
660 } 700 }
@@ -731,6 +771,11 @@ static int map_delete_elem(union bpf_attr *attr)
731 goto err_put; 771 goto err_put;
732 } 772 }
733 773
774 if (bpf_map_is_dev_bound(map)) {
775 err = bpf_map_offload_delete_elem(map, key);
776 goto out;
777 }
778
734 preempt_disable(); 779 preempt_disable();
735 __this_cpu_inc(bpf_prog_active); 780 __this_cpu_inc(bpf_prog_active);
736 rcu_read_lock(); 781 rcu_read_lock();
@@ -738,7 +783,7 @@ static int map_delete_elem(union bpf_attr *attr)
738 rcu_read_unlock(); 783 rcu_read_unlock();
739 __this_cpu_dec(bpf_prog_active); 784 __this_cpu_dec(bpf_prog_active);
740 preempt_enable(); 785 preempt_enable();
741 786out:
742 if (!err) 787 if (!err)
743 trace_bpf_map_delete_elem(map, ufd, key); 788 trace_bpf_map_delete_elem(map, ufd, key);
744 kfree(key); 789 kfree(key);
@@ -788,9 +833,15 @@ static int map_get_next_key(union bpf_attr *attr)
788 if (!next_key) 833 if (!next_key)
789 goto free_key; 834 goto free_key;
790 835
836 if (bpf_map_is_dev_bound(map)) {
837 err = bpf_map_offload_get_next_key(map, key, next_key);
838 goto out;
839 }
840
791 rcu_read_lock(); 841 rcu_read_lock();
792 err = map->ops->map_get_next_key(map, key, next_key); 842 err = map->ops->map_get_next_key(map, key, next_key);
793 rcu_read_unlock(); 843 rcu_read_unlock();
844out:
794 if (err) 845 if (err)
795 goto free_next_key; 846 goto free_next_key;
796 847
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 48b61caa94cb..ceabb394d2dc 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4816,6 +4816,13 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
4816 return -EINVAL; 4816 return -EINVAL;
4817 } 4817 }
4818 } 4818 }
4819
4820 if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
4821 !bpf_offload_dev_match(prog, map)) {
4822 verbose(env, "offload device mismatch between prog and map\n");
4823 return -EINVAL;
4824 }
4825
4819 return 0; 4826 return 0;
4820} 4827}
4821 4828
diff --git a/kernel/fail_function.c b/kernel/fail_function.c
new file mode 100644
index 000000000000..21b0122cb39c
--- /dev/null
+++ b/kernel/fail_function.c
@@ -0,0 +1,349 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * fail_function.c: Function-based error injection
4 */
5#include <linux/error-injection.h>
6#include <linux/debugfs.h>
7#include <linux/fault-inject.h>
8#include <linux/kallsyms.h>
9#include <linux/kprobes.h>
10#include <linux/module.h>
11#include <linux/mutex.h>
12#include <linux/slab.h>
13#include <linux/uaccess.h>
14
15static int fei_kprobe_handler(struct kprobe *kp, struct pt_regs *regs);
16
17struct fei_attr {
18 struct list_head list;
19 struct kprobe kp;
20 unsigned long retval;
21};
22static DEFINE_MUTEX(fei_lock);
23static LIST_HEAD(fei_attr_list);
24static DECLARE_FAULT_ATTR(fei_fault_attr);
25static struct dentry *fei_debugfs_dir;
26
27static unsigned long adjust_error_retval(unsigned long addr, unsigned long retv)
28{
29 switch (get_injectable_error_type(addr)) {
30 case EI_ETYPE_NULL:
31 if (retv != 0)
32 return 0;
33 break;
34 case EI_ETYPE_ERRNO:
35 if (retv < (unsigned long)-MAX_ERRNO)
36 return (unsigned long)-EINVAL;
37 break;
38 case EI_ETYPE_ERRNO_NULL:
39 if (retv != 0 && retv < (unsigned long)-MAX_ERRNO)
40 return (unsigned long)-EINVAL;
41 break;
42 }
43
44 return retv;
45}
46
47static struct fei_attr *fei_attr_new(const char *sym, unsigned long addr)
48{
49 struct fei_attr *attr;
50
51 attr = kzalloc(sizeof(*attr), GFP_KERNEL);
52 if (attr) {
53 attr->kp.symbol_name = kstrdup(sym, GFP_KERNEL);
54 if (!attr->kp.symbol_name) {
55 kfree(attr);
56 return NULL;
57 }
58 attr->kp.pre_handler = fei_kprobe_handler;
59 attr->retval = adjust_error_retval(addr, 0);
60 INIT_LIST_HEAD(&attr->list);
61 }
62 return attr;
63}
64
65static void fei_attr_free(struct fei_attr *attr)
66{
67 if (attr) {
68 kfree(attr->kp.symbol_name);
69 kfree(attr);
70 }
71}
72
73static struct fei_attr *fei_attr_lookup(const char *sym)
74{
75 struct fei_attr *attr;
76
77 list_for_each_entry(attr, &fei_attr_list, list) {
78 if (!strcmp(attr->kp.symbol_name, sym))
79 return attr;
80 }
81
82 return NULL;
83}
84
85static bool fei_attr_is_valid(struct fei_attr *_attr)
86{
87 struct fei_attr *attr;
88
89 list_for_each_entry(attr, &fei_attr_list, list) {
90 if (attr == _attr)
91 return true;
92 }
93
94 return false;
95}
96
97static int fei_retval_set(void *data, u64 val)
98{
99 struct fei_attr *attr = data;
100 unsigned long retv = (unsigned long)val;
101 int err = 0;
102
103 mutex_lock(&fei_lock);
104 /*
105 * Since this operation can be done after retval file is removed,
106 * It is safer to check the attr is still valid before accessing
107 * its member.
108 */
109 if (!fei_attr_is_valid(attr)) {
110 err = -ENOENT;
111 goto out;
112 }
113
114 if (attr->kp.addr) {
115 if (adjust_error_retval((unsigned long)attr->kp.addr,
116 val) != retv)
117 err = -EINVAL;
118 }
119 if (!err)
120 attr->retval = val;
121out:
122 mutex_unlock(&fei_lock);
123
124 return err;
125}
126
127static int fei_retval_get(void *data, u64 *val)
128{
129 struct fei_attr *attr = data;
130 int err = 0;
131
132 mutex_lock(&fei_lock);
133 /* Here we also validate @attr to ensure it still exists. */
134 if (!fei_attr_is_valid(attr))
135 err = -ENOENT;
136 else
137 *val = attr->retval;
138 mutex_unlock(&fei_lock);
139
140 return err;
141}
142DEFINE_DEBUGFS_ATTRIBUTE(fei_retval_ops, fei_retval_get, fei_retval_set,
143 "%llx\n");
144
145static int fei_debugfs_add_attr(struct fei_attr *attr)
146{
147 struct dentry *dir;
148
149 dir = debugfs_create_dir(attr->kp.symbol_name, fei_debugfs_dir);
150 if (!dir)
151 return -ENOMEM;
152
153 if (!debugfs_create_file("retval", 0600, dir, attr, &fei_retval_ops)) {
154 debugfs_remove_recursive(dir);
155 return -ENOMEM;
156 }
157
158 return 0;
159}
160
161static void fei_debugfs_remove_attr(struct fei_attr *attr)
162{
163 struct dentry *dir;
164
165 dir = debugfs_lookup(attr->kp.symbol_name, fei_debugfs_dir);
166 if (dir)
167 debugfs_remove_recursive(dir);
168}
169
170static int fei_kprobe_handler(struct kprobe *kp, struct pt_regs *regs)
171{
172 struct fei_attr *attr = container_of(kp, struct fei_attr, kp);
173
174 if (should_fail(&fei_fault_attr, 1)) {
175 regs_set_return_value(regs, attr->retval);
176 override_function_with_return(regs);
177 /* Kprobe specific fixup */
178 reset_current_kprobe();
179 preempt_enable_no_resched();
180 return 1;
181 }
182
183 return 0;
184}
185NOKPROBE_SYMBOL(fei_kprobe_handler)
186
187static void *fei_seq_start(struct seq_file *m, loff_t *pos)
188{
189 mutex_lock(&fei_lock);
190 return seq_list_start(&fei_attr_list, *pos);
191}
192
193static void fei_seq_stop(struct seq_file *m, void *v)
194{
195 mutex_unlock(&fei_lock);
196}
197
198static void *fei_seq_next(struct seq_file *m, void *v, loff_t *pos)
199{
200 return seq_list_next(v, &fei_attr_list, pos);
201}
202
203static int fei_seq_show(struct seq_file *m, void *v)
204{
205 struct fei_attr *attr = list_entry(v, struct fei_attr, list);
206
207 seq_printf(m, "%pf\n", attr->kp.addr);
208 return 0;
209}
210
211static const struct seq_operations fei_seq_ops = {
212 .start = fei_seq_start,
213 .next = fei_seq_next,
214 .stop = fei_seq_stop,
215 .show = fei_seq_show,
216};
217
218static int fei_open(struct inode *inode, struct file *file)
219{
220 return seq_open(file, &fei_seq_ops);
221}
222
223static void fei_attr_remove(struct fei_attr *attr)
224{
225 fei_debugfs_remove_attr(attr);
226 unregister_kprobe(&attr->kp);
227 list_del(&attr->list);
228 fei_attr_free(attr);
229}
230
231static void fei_attr_remove_all(void)
232{
233 struct fei_attr *attr, *n;
234
235 list_for_each_entry_safe(attr, n, &fei_attr_list, list) {
236 fei_attr_remove(attr);
237 }
238}
239
240static ssize_t fei_write(struct file *file, const char __user *buffer,
241 size_t count, loff_t *ppos)
242{
243 struct fei_attr *attr;
244 unsigned long addr;
245 char *buf, *sym;
246 int ret;
247
248 /* cut off if it is too long */
249 if (count > KSYM_NAME_LEN)
250 count = KSYM_NAME_LEN;
251 buf = kmalloc(sizeof(char) * (count + 1), GFP_KERNEL);
252 if (!buf)
253 return -ENOMEM;
254
255 if (copy_from_user(buf, buffer, count)) {
256 ret = -EFAULT;
257 goto out;
258 }
259 buf[count] = '\0';
260 sym = strstrip(buf);
261
262 mutex_lock(&fei_lock);
263
264 /* Writing just spaces will remove all injection points */
265 if (sym[0] == '\0') {
266 fei_attr_remove_all();
267 ret = count;
268 goto out;
269 }
270 /* Writing !function will remove one injection point */
271 if (sym[0] == '!') {
272 attr = fei_attr_lookup(sym + 1);
273 if (!attr) {
274 ret = -ENOENT;
275 goto out;
276 }
277 fei_attr_remove(attr);
278 ret = count;
279 goto out;
280 }
281
282 addr = kallsyms_lookup_name(sym);
283 if (!addr) {
284 ret = -EINVAL;
285 goto out;
286 }
287 if (!within_error_injection_list(addr)) {
288 ret = -ERANGE;
289 goto out;
290 }
291 if (fei_attr_lookup(sym)) {
292 ret = -EBUSY;
293 goto out;
294 }
295 attr = fei_attr_new(sym, addr);
296 if (!attr) {
297 ret = -ENOMEM;
298 goto out;
299 }
300
301 ret = register_kprobe(&attr->kp);
302 if (!ret)
303 ret = fei_debugfs_add_attr(attr);
304 if (ret < 0)
305 fei_attr_remove(attr);
306 else {
307 list_add_tail(&attr->list, &fei_attr_list);
308 ret = count;
309 }
310out:
311 kfree(buf);
312 mutex_unlock(&fei_lock);
313 return ret;
314}
315
316static const struct file_operations fei_ops = {
317 .open = fei_open,
318 .read = seq_read,
319 .write = fei_write,
320 .llseek = seq_lseek,
321 .release = seq_release,
322};
323
324static int __init fei_debugfs_init(void)
325{
326 struct dentry *dir;
327
328 dir = fault_create_debugfs_attr("fail_function", NULL,
329 &fei_fault_attr);
330 if (IS_ERR(dir))
331 return PTR_ERR(dir);
332
333 /* injectable attribute is just a symlink of error_inject/list */
334 if (!debugfs_create_symlink("injectable", dir,
335 "../error_injection/list"))
336 goto error;
337
338 if (!debugfs_create_file("inject", 0600, dir, NULL, &fei_ops))
339 goto error;
340
341 fei_debugfs_dir = dir;
342
343 return 0;
344error:
345 debugfs_remove_recursive(dir);
346 return -ENOMEM;
347}
348
349late_initcall(fei_debugfs_init);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index b4aab48ad258..da2ccf142358 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -83,16 +83,6 @@ static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
83 return &(kretprobe_table_locks[hash].lock); 83 return &(kretprobe_table_locks[hash].lock);
84} 84}
85 85
86/* List of symbols that can be overriden for error injection. */
87static LIST_HEAD(kprobe_error_injection_list);
88static DEFINE_MUTEX(kprobe_ei_mutex);
89struct kprobe_ei_entry {
90 struct list_head list;
91 unsigned long start_addr;
92 unsigned long end_addr;
93 void *priv;
94};
95
96/* Blacklist -- list of struct kprobe_blacklist_entry */ 86/* Blacklist -- list of struct kprobe_blacklist_entry */
97static LIST_HEAD(kprobe_blacklist); 87static LIST_HEAD(kprobe_blacklist);
98 88
@@ -1404,17 +1394,6 @@ bool within_kprobe_blacklist(unsigned long addr)
1404 return false; 1394 return false;
1405} 1395}
1406 1396
1407bool within_kprobe_error_injection_list(unsigned long addr)
1408{
1409 struct kprobe_ei_entry *ent;
1410
1411 list_for_each_entry(ent, &kprobe_error_injection_list, list) {
1412 if (addr >= ent->start_addr && addr < ent->end_addr)
1413 return true;
1414 }
1415 return false;
1416}
1417
1418/* 1397/*
1419 * If we have a symbol_name argument, look it up and add the offset field 1398 * If we have a symbol_name argument, look it up and add the offset field
1420 * to it. This way, we can specify a relative address to a symbol. 1399 * to it. This way, we can specify a relative address to a symbol.
@@ -2189,86 +2168,6 @@ static int __init populate_kprobe_blacklist(unsigned long *start,
2189 return 0; 2168 return 0;
2190} 2169}
2191 2170
2192#ifdef CONFIG_BPF_KPROBE_OVERRIDE
2193/* Markers of the _kprobe_error_inject_list section */
2194extern unsigned long __start_kprobe_error_inject_list[];
2195extern unsigned long __stop_kprobe_error_inject_list[];
2196
2197/*
2198 * Lookup and populate the kprobe_error_injection_list.
2199 *
2200 * For safety reasons we only allow certain functions to be overriden with
2201 * bpf_error_injection, so we need to populate the list of the symbols that have
2202 * been marked as safe for overriding.
2203 */
2204static void populate_kprobe_error_injection_list(unsigned long *start,
2205 unsigned long *end,
2206 void *priv)
2207{
2208 unsigned long *iter;
2209 struct kprobe_ei_entry *ent;
2210 unsigned long entry, offset = 0, size = 0;
2211
2212 mutex_lock(&kprobe_ei_mutex);
2213 for (iter = start; iter < end; iter++) {
2214 entry = arch_deref_entry_point((void *)*iter);
2215
2216 if (!kernel_text_address(entry) ||
2217 !kallsyms_lookup_size_offset(entry, &size, &offset)) {
2218 pr_err("Failed to find error inject entry at %p\n",
2219 (void *)entry);
2220 continue;
2221 }
2222
2223 ent = kmalloc(sizeof(*ent), GFP_KERNEL);
2224 if (!ent)
2225 break;
2226 ent->start_addr = entry;
2227 ent->end_addr = entry + size;
2228 ent->priv = priv;
2229 INIT_LIST_HEAD(&ent->list);
2230 list_add_tail(&ent->list, &kprobe_error_injection_list);
2231 }
2232 mutex_unlock(&kprobe_ei_mutex);
2233}
2234
2235static void __init populate_kernel_kprobe_ei_list(void)
2236{
2237 populate_kprobe_error_injection_list(__start_kprobe_error_inject_list,
2238 __stop_kprobe_error_inject_list,
2239 NULL);
2240}
2241
2242static void module_load_kprobe_ei_list(struct module *mod)
2243{
2244 if (!mod->num_kprobe_ei_funcs)
2245 return;
2246 populate_kprobe_error_injection_list(mod->kprobe_ei_funcs,
2247 mod->kprobe_ei_funcs +
2248 mod->num_kprobe_ei_funcs, mod);
2249}
2250
2251static void module_unload_kprobe_ei_list(struct module *mod)
2252{
2253 struct kprobe_ei_entry *ent, *n;
2254 if (!mod->num_kprobe_ei_funcs)
2255 return;
2256
2257 mutex_lock(&kprobe_ei_mutex);
2258 list_for_each_entry_safe(ent, n, &kprobe_error_injection_list, list) {
2259 if (ent->priv == mod) {
2260 list_del_init(&ent->list);
2261 kfree(ent);
2262 }
2263 }
2264 mutex_unlock(&kprobe_ei_mutex);
2265}
2266#else
2267static inline void __init populate_kernel_kprobe_ei_list(void) {}
2268static inline void module_load_kprobe_ei_list(struct module *m) {}
2269static inline void module_unload_kprobe_ei_list(struct module *m) {}
2270#endif
2271
2272/* Module notifier call back, checking kprobes on the module */ 2171/* Module notifier call back, checking kprobes on the module */
2273static int kprobes_module_callback(struct notifier_block *nb, 2172static int kprobes_module_callback(struct notifier_block *nb,
2274 unsigned long val, void *data) 2173 unsigned long val, void *data)
@@ -2279,11 +2178,6 @@ static int kprobes_module_callback(struct notifier_block *nb,
2279 unsigned int i; 2178 unsigned int i;
2280 int checkcore = (val == MODULE_STATE_GOING); 2179 int checkcore = (val == MODULE_STATE_GOING);
2281 2180
2282 if (val == MODULE_STATE_COMING)
2283 module_load_kprobe_ei_list(mod);
2284 else if (val == MODULE_STATE_GOING)
2285 module_unload_kprobe_ei_list(mod);
2286
2287 if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE) 2181 if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE)
2288 return NOTIFY_DONE; 2182 return NOTIFY_DONE;
2289 2183
@@ -2346,8 +2240,6 @@ static int __init init_kprobes(void)
2346 pr_err("Please take care of using kprobes.\n"); 2240 pr_err("Please take care of using kprobes.\n");
2347 } 2241 }
2348 2242
2349 populate_kernel_kprobe_ei_list();
2350
2351 if (kretprobe_blacklist_size) { 2243 if (kretprobe_blacklist_size) {
2352 /* lookup the function address from its name */ 2244 /* lookup the function address from its name */
2353 for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { 2245 for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
@@ -2515,56 +2407,6 @@ static const struct file_operations debugfs_kprobe_blacklist_ops = {
2515 .release = seq_release, 2407 .release = seq_release,
2516}; 2408};
2517 2409
2518/*
2519 * kprobes/error_injection_list -- shows which functions can be overriden for
2520 * error injection.
2521 * */
2522static void *kprobe_ei_seq_start(struct seq_file *m, loff_t *pos)
2523{
2524 mutex_lock(&kprobe_ei_mutex);
2525 return seq_list_start(&kprobe_error_injection_list, *pos);
2526}
2527
2528static void kprobe_ei_seq_stop(struct seq_file *m, void *v)
2529{
2530 mutex_unlock(&kprobe_ei_mutex);
2531}
2532
2533static void *kprobe_ei_seq_next(struct seq_file *m, void *v, loff_t *pos)
2534{
2535 return seq_list_next(v, &kprobe_error_injection_list, pos);
2536}
2537
2538static int kprobe_ei_seq_show(struct seq_file *m, void *v)
2539{
2540 char buffer[KSYM_SYMBOL_LEN];
2541 struct kprobe_ei_entry *ent =
2542 list_entry(v, struct kprobe_ei_entry, list);
2543
2544 sprint_symbol(buffer, ent->start_addr);
2545 seq_printf(m, "%s\n", buffer);
2546 return 0;
2547}
2548
2549static const struct seq_operations kprobe_ei_seq_ops = {
2550 .start = kprobe_ei_seq_start,
2551 .next = kprobe_ei_seq_next,
2552 .stop = kprobe_ei_seq_stop,
2553 .show = kprobe_ei_seq_show,
2554};
2555
2556static int kprobe_ei_open(struct inode *inode, struct file *filp)
2557{
2558 return seq_open(filp, &kprobe_ei_seq_ops);
2559}
2560
2561static const struct file_operations debugfs_kprobe_ei_ops = {
2562 .open = kprobe_ei_open,
2563 .read = seq_read,
2564 .llseek = seq_lseek,
2565 .release = seq_release,
2566};
2567
2568static void arm_all_kprobes(void) 2410static void arm_all_kprobes(void)
2569{ 2411{
2570 struct hlist_head *head; 2412 struct hlist_head *head;
@@ -2706,11 +2548,6 @@ static int __init debugfs_kprobe_init(void)
2706 if (!file) 2548 if (!file)
2707 goto error; 2549 goto error;
2708 2550
2709 file = debugfs_create_file("error_injection_list", 0444, dir, NULL,
2710 &debugfs_kprobe_ei_ops);
2711 if (!file)
2712 goto error;
2713
2714 return 0; 2551 return 0;
2715 2552
2716error: 2553error:
diff --git a/kernel/module.c b/kernel/module.c
index bd695bfdc5c4..601494d4b7ea 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3118,10 +3118,10 @@ static int find_module_sections(struct module *mod, struct load_info *info)
3118 sizeof(*mod->ftrace_callsites), 3118 sizeof(*mod->ftrace_callsites),
3119 &mod->num_ftrace_callsites); 3119 &mod->num_ftrace_callsites);
3120#endif 3120#endif
3121#ifdef CONFIG_BPF_KPROBE_OVERRIDE 3121#ifdef CONFIG_FUNCTION_ERROR_INJECTION
3122 mod->kprobe_ei_funcs = section_objs(info, "_kprobe_error_inject_list", 3122 mod->ei_funcs = section_objs(info, "_error_injection_whitelist",
3123 sizeof(*mod->kprobe_ei_funcs), 3123 sizeof(*mod->ei_funcs),
3124 &mod->num_kprobe_ei_funcs); 3124 &mod->num_ei_funcs);
3125#endif 3125#endif
3126 mod->extable = section_objs(info, "__ex_table", 3126 mod->extable = section_objs(info, "__ex_table",
3127 sizeof(*mod->extable), &mod->num_exentries); 3127 sizeof(*mod->extable), &mod->num_exentries);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index ae3a2d519e50..7114c885a78a 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -533,9 +533,7 @@ config FUNCTION_PROFILER
533config BPF_KPROBE_OVERRIDE 533config BPF_KPROBE_OVERRIDE
534 bool "Enable BPF programs to override a kprobed function" 534 bool "Enable BPF programs to override a kprobed function"
535 depends on BPF_EVENTS 535 depends on BPF_EVENTS
536 depends on KPROBES_ON_FTRACE 536 depends on FUNCTION_ERROR_INJECTION
537 depends on HAVE_KPROBE_OVERRIDE
538 depends on DYNAMIC_FTRACE_WITH_REGS
539 default n 537 default n
540 help 538 help
541 Allows BPF to override the execution of a probed function and 539 Allows BPF to override the execution of a probed function and
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f6d2327ecb59..f274468cbc45 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -14,7 +14,7 @@
14#include <linux/uaccess.h> 14#include <linux/uaccess.h>
15#include <linux/ctype.h> 15#include <linux/ctype.h>
16#include <linux/kprobes.h> 16#include <linux/kprobes.h>
17#include <asm/kprobes.h> 17#include <linux/error-injection.h>
18 18
19#include "trace_probe.h" 19#include "trace_probe.h"
20#include "trace.h" 20#include "trace.h"
@@ -83,9 +83,8 @@ EXPORT_SYMBOL_GPL(trace_call_bpf);
83#ifdef CONFIG_BPF_KPROBE_OVERRIDE 83#ifdef CONFIG_BPF_KPROBE_OVERRIDE
84BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) 84BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
85{ 85{
86 __this_cpu_write(bpf_kprobe_override, 1);
87 regs_set_return_value(regs, rc); 86 regs_set_return_value(regs, rc);
88 arch_ftrace_kprobe_override_function(regs); 87 override_function_with_return(regs);
89 return 0; 88 return 0;
90} 89}
91 90
@@ -800,11 +799,11 @@ int perf_event_attach_bpf_prog(struct perf_event *event,
800 int ret = -EEXIST; 799 int ret = -EEXIST;
801 800
802 /* 801 /*
803 * Kprobe override only works for ftrace based kprobes, and only if they 802 * Kprobe override only works if they are on the function entry,
804 * are on the opt-in list. 803 * and only if they are on the opt-in list.
805 */ 804 */
806 if (prog->kprobe_override && 805 if (prog->kprobe_override &&
807 (!trace_kprobe_ftrace(event->tp_event) || 806 (!trace_kprobe_on_func_entry(event->tp_event) ||
808 !trace_kprobe_error_injectable(event->tp_event))) 807 !trace_kprobe_error_injectable(event->tp_event)))
809 return -EINVAL; 808 return -EINVAL;
810 809
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 91f4b57dab82..1fad24acd444 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -21,6 +21,7 @@
21#include <linux/module.h> 21#include <linux/module.h>
22#include <linux/uaccess.h> 22#include <linux/uaccess.h>
23#include <linux/rculist.h> 23#include <linux/rculist.h>
24#include <linux/error-injection.h>
24 25
25#include "trace_probe.h" 26#include "trace_probe.h"
26 27
@@ -42,8 +43,6 @@ struct trace_kprobe {
42 (offsetof(struct trace_kprobe, tp.args) + \ 43 (offsetof(struct trace_kprobe, tp.args) + \
43 (sizeof(struct probe_arg) * (n))) 44 (sizeof(struct probe_arg) * (n)))
44 45
45DEFINE_PER_CPU(int, bpf_kprobe_override);
46
47static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk) 46static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
48{ 47{
49 return tk->rp.handler != NULL; 48 return tk->rp.handler != NULL;
@@ -88,13 +87,16 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
88 return nhit; 87 return nhit;
89} 88}
90 89
91int trace_kprobe_ftrace(struct trace_event_call *call) 90bool trace_kprobe_on_func_entry(struct trace_event_call *call)
92{ 91{
93 struct trace_kprobe *tk = (struct trace_kprobe *)call->data; 92 struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
94 return kprobe_ftrace(&tk->rp.kp); 93
94 return kprobe_on_func_entry(tk->rp.kp.addr,
95 tk->rp.kp.addr ? NULL : tk->rp.kp.symbol_name,
96 tk->rp.kp.addr ? 0 : tk->rp.kp.offset);
95} 97}
96 98
97int trace_kprobe_error_injectable(struct trace_event_call *call) 99bool trace_kprobe_error_injectable(struct trace_event_call *call)
98{ 100{
99 struct trace_kprobe *tk = (struct trace_kprobe *)call->data; 101 struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
100 unsigned long addr; 102 unsigned long addr;
@@ -106,7 +108,7 @@ int trace_kprobe_error_injectable(struct trace_event_call *call)
106 } else { 108 } else {
107 addr = (unsigned long)tk->rp.kp.addr; 109 addr = (unsigned long)tk->rp.kp.addr;
108 } 110 }
109 return within_kprobe_error_injection_list(addr); 111 return within_error_injection_list(addr);
110} 112}
111 113
112static int register_kprobe_event(struct trace_kprobe *tk); 114static int register_kprobe_event(struct trace_kprobe *tk);
@@ -1202,6 +1204,7 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
1202 int rctx; 1204 int rctx;
1203 1205
1204 if (bpf_prog_array_valid(call)) { 1206 if (bpf_prog_array_valid(call)) {
1207 unsigned long orig_ip = instruction_pointer(regs);
1205 int ret; 1208 int ret;
1206 1209
1207 ret = trace_call_bpf(call, regs); 1210 ret = trace_call_bpf(call, regs);
@@ -1209,12 +1212,13 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
1209 /* 1212 /*
1210 * We need to check and see if we modified the pc of the 1213 * We need to check and see if we modified the pc of the
1211 * pt_regs, and if so clear the kprobe and return 1 so that we 1214 * pt_regs, and if so clear the kprobe and return 1 so that we
1212 * don't do the instruction skipping. Also reset our state so 1215 * don't do the single stepping.
1213 * we are clean the next pass through. 1216 * The ftrace kprobe handler leaves it up to us to re-enable
1217 * preemption here before returning if we've modified the ip.
1214 */ 1218 */
1215 if (__this_cpu_read(bpf_kprobe_override)) { 1219 if (orig_ip != instruction_pointer(regs)) {
1216 __this_cpu_write(bpf_kprobe_override, 0);
1217 reset_current_kprobe(); 1220 reset_current_kprobe();
1221 preempt_enable_no_resched();
1218 return 1; 1222 return 1;
1219 } 1223 }
1220 if (!ret) 1224 if (!ret)
@@ -1322,15 +1326,8 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1322 if (tk->tp.flags & TP_FLAG_TRACE) 1326 if (tk->tp.flags & TP_FLAG_TRACE)
1323 kprobe_trace_func(tk, regs); 1327 kprobe_trace_func(tk, regs);
1324#ifdef CONFIG_PERF_EVENTS 1328#ifdef CONFIG_PERF_EVENTS
1325 if (tk->tp.flags & TP_FLAG_PROFILE) { 1329 if (tk->tp.flags & TP_FLAG_PROFILE)
1326 ret = kprobe_perf_func(tk, regs); 1330 ret = kprobe_perf_func(tk, regs);
1327 /*
1328 * The ftrace kprobe handler leaves it up to us to re-enable
1329 * preemption here before returning if we've modified the ip.
1330 */
1331 if (ret)
1332 preempt_enable_no_resched();
1333 }
1334#endif 1331#endif
1335 return ret; 1332 return ret;
1336} 1333}
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 5e54d748c84c..e101c5bb9eda 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -252,8 +252,8 @@ struct symbol_cache;
252unsigned long update_symbol_cache(struct symbol_cache *sc); 252unsigned long update_symbol_cache(struct symbol_cache *sc);
253void free_symbol_cache(struct symbol_cache *sc); 253void free_symbol_cache(struct symbol_cache *sc);
254struct symbol_cache *alloc_symbol_cache(const char *sym, long offset); 254struct symbol_cache *alloc_symbol_cache(const char *sym, long offset);
255int trace_kprobe_ftrace(struct trace_event_call *call); 255bool trace_kprobe_on_func_entry(struct trace_event_call *call);
256int trace_kprobe_error_injectable(struct trace_event_call *call); 256bool trace_kprobe_error_injectable(struct trace_event_call *call);
257#else 257#else
258/* uprobes do not support symbol fetch methods */ 258/* uprobes do not support symbol fetch methods */
259#define fetch_symbol_u8 NULL 259#define fetch_symbol_u8 NULL
@@ -280,14 +280,14 @@ alloc_symbol_cache(const char *sym, long offset)
280 return NULL; 280 return NULL;
281} 281}
282 282
283static inline int trace_kprobe_ftrace(struct trace_event_call *call) 283static inline bool trace_kprobe_on_func_entry(struct trace_event_call *call)
284{ 284{
285 return 0; 285 return false;
286} 286}
287 287
288static inline int trace_kprobe_error_injectable(struct trace_event_call *call) 288static inline bool trace_kprobe_error_injectable(struct trace_event_call *call)
289{ 289{
290 return 0; 290 return false;
291} 291}
292#endif /* CONFIG_KPROBE_EVENTS */ 292#endif /* CONFIG_KPROBE_EVENTS */
293 293
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 9d5b78aad4c5..890d4766cef3 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1500,6 +1500,10 @@ config FAULT_INJECTION
1500 Provide fault-injection framework. 1500 Provide fault-injection framework.
1501 For more details, see Documentation/fault-injection/. 1501 For more details, see Documentation/fault-injection/.
1502 1502
1503config FUNCTION_ERROR_INJECTION
1504 def_bool y
1505 depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES
1506
1503config FAILSLAB 1507config FAILSLAB
1504 bool "Fault-injection capability for kmalloc" 1508 bool "Fault-injection capability for kmalloc"
1505 depends on FAULT_INJECTION 1509 depends on FAULT_INJECTION
@@ -1547,6 +1551,16 @@ config FAIL_FUTEX
1547 help 1551 help
1548 Provide fault-injection capability for futexes. 1552 Provide fault-injection capability for futexes.
1549 1553
1554config FAIL_FUNCTION
1555 bool "Fault-injection capability for functions"
1556 depends on FAULT_INJECTION_DEBUG_FS && FUNCTION_ERROR_INJECTION
1557 help
1558 Provide function-based fault-injection capability.
1559 This will allow you to override a specific function with a return
1560 with given return value. As a result, function caller will see
1561 an error value and have to handle it. This is useful to test the
1562 error handling in various subsystems.
1563
1550config FAULT_INJECTION_DEBUG_FS 1564config FAULT_INJECTION_DEBUG_FS
1551 bool "Debugfs entries for fault-injection capabilities" 1565 bool "Debugfs entries for fault-injection capabilities"
1552 depends on FAULT_INJECTION && SYSFS && DEBUG_FS 1566 depends on FAULT_INJECTION && SYSFS && DEBUG_FS
diff --git a/lib/Makefile b/lib/Makefile
index a6c8529dd9b2..75ec13778cd8 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -149,6 +149,7 @@ obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJECT) += netdev-notifier-error-inject.o
149obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o 149obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o
150obj-$(CONFIG_OF_RECONFIG_NOTIFIER_ERROR_INJECT) += \ 150obj-$(CONFIG_OF_RECONFIG_NOTIFIER_ERROR_INJECT) += \
151 of-reconfig-notifier-error-inject.o 151 of-reconfig-notifier-error-inject.o
152obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
152 153
153lib-$(CONFIG_GENERIC_BUG) += bug.o 154lib-$(CONFIG_GENERIC_BUG) += bug.o
154 155
diff --git a/lib/error-inject.c b/lib/error-inject.c
new file mode 100644
index 000000000000..c0d4600f4896
--- /dev/null
+++ b/lib/error-inject.c
@@ -0,0 +1,242 @@
1// SPDX-License-Identifier: GPL-2.0
2// error-inject.c: Function-level error injection table
3#include <linux/error-injection.h>
4#include <linux/debugfs.h>
5#include <linux/kallsyms.h>
6#include <linux/kprobes.h>
7#include <linux/module.h>
8#include <linux/mutex.h>
9#include <linux/list.h>
10#include <linux/slab.h>
11
12/* Whitelist of symbols that can be overridden for error injection. */
13static LIST_HEAD(error_injection_list);
14static DEFINE_MUTEX(ei_mutex);
15struct ei_entry {
16 struct list_head list;
17 unsigned long start_addr;
18 unsigned long end_addr;
19 int etype;
20 void *priv;
21};
22
23bool within_error_injection_list(unsigned long addr)
24{
25 struct ei_entry *ent;
26 bool ret = false;
27
28 mutex_lock(&ei_mutex);
29 list_for_each_entry(ent, &error_injection_list, list) {
30 if (addr >= ent->start_addr && addr < ent->end_addr) {
31 ret = true;
32 break;
33 }
34 }
35 mutex_unlock(&ei_mutex);
36 return ret;
37}
38
39int get_injectable_error_type(unsigned long addr)
40{
41 struct ei_entry *ent;
42
43 list_for_each_entry(ent, &error_injection_list, list) {
44 if (addr >= ent->start_addr && addr < ent->end_addr)
45 return ent->etype;
46 }
47 return EI_ETYPE_NONE;
48}
49
50/*
51 * Lookup and populate the error_injection_list.
52 *
53 * For safety reasons we only allow certain functions to be overridden with
54 * bpf_error_injection, so we need to populate the list of the symbols that have
55 * been marked as safe for overriding.
56 */
57static void populate_error_injection_list(struct error_injection_entry *start,
58 struct error_injection_entry *end,
59 void *priv)
60{
61 struct error_injection_entry *iter;
62 struct ei_entry *ent;
63 unsigned long entry, offset = 0, size = 0;
64
65 mutex_lock(&ei_mutex);
66 for (iter = start; iter < end; iter++) {
67 entry = arch_deref_entry_point((void *)iter->addr);
68
69 if (!kernel_text_address(entry) ||
70 !kallsyms_lookup_size_offset(entry, &size, &offset)) {
71 pr_err("Failed to find error inject entry at %p\n",
72 (void *)entry);
73 continue;
74 }
75
76 ent = kmalloc(sizeof(*ent), GFP_KERNEL);
77 if (!ent)
78 break;
79 ent->start_addr = entry;
80 ent->end_addr = entry + size;
81 ent->etype = iter->etype;
82 ent->priv = priv;
83 INIT_LIST_HEAD(&ent->list);
84 list_add_tail(&ent->list, &error_injection_list);
85 }
86 mutex_unlock(&ei_mutex);
87}
88
89/* Markers of the _error_inject_whitelist section */
90extern struct error_injection_entry __start_error_injection_whitelist[];
91extern struct error_injection_entry __stop_error_injection_whitelist[];
92
93static void __init populate_kernel_ei_list(void)
94{
95 populate_error_injection_list(__start_error_injection_whitelist,
96 __stop_error_injection_whitelist,
97 NULL);
98}
99
100#ifdef CONFIG_MODULES
101static void module_load_ei_list(struct module *mod)
102{
103 if (!mod->num_ei_funcs)
104 return;
105
106 populate_error_injection_list(mod->ei_funcs,
107 mod->ei_funcs + mod->num_ei_funcs, mod);
108}
109
110static void module_unload_ei_list(struct module *mod)
111{
112 struct ei_entry *ent, *n;
113
114 if (!mod->num_ei_funcs)
115 return;
116
117 mutex_lock(&ei_mutex);
118 list_for_each_entry_safe(ent, n, &error_injection_list, list) {
119 if (ent->priv == mod) {
120 list_del_init(&ent->list);
121 kfree(ent);
122 }
123 }
124 mutex_unlock(&ei_mutex);
125}
126
127/* Module notifier call back, checking error injection table on the module */
128static int ei_module_callback(struct notifier_block *nb,
129 unsigned long val, void *data)
130{
131 struct module *mod = data;
132
133 if (val == MODULE_STATE_COMING)
134 module_load_ei_list(mod);
135 else if (val == MODULE_STATE_GOING)
136 module_unload_ei_list(mod);
137
138 return NOTIFY_DONE;
139}
140
141static struct notifier_block ei_module_nb = {
142 .notifier_call = ei_module_callback,
143 .priority = 0
144};
145
146static __init int module_ei_init(void)
147{
148 return register_module_notifier(&ei_module_nb);
149}
150#else /* !CONFIG_MODULES */
151#define module_ei_init() (0)
152#endif
153
154/*
155 * error_injection/whitelist -- shows which functions can be overridden for
156 * error injection.
157 */
158static void *ei_seq_start(struct seq_file *m, loff_t *pos)
159{
160 mutex_lock(&ei_mutex);
161 return seq_list_start(&error_injection_list, *pos);
162}
163
164static void ei_seq_stop(struct seq_file *m, void *v)
165{
166 mutex_unlock(&ei_mutex);
167}
168
169static void *ei_seq_next(struct seq_file *m, void *v, loff_t *pos)
170{
171 return seq_list_next(v, &error_injection_list, pos);
172}
173
174static const char *error_type_string(int etype)
175{
176 switch (etype) {
177 case EI_ETYPE_NULL:
178 return "NULL";
179 case EI_ETYPE_ERRNO:
180 return "ERRNO";
181 case EI_ETYPE_ERRNO_NULL:
182 return "ERRNO_NULL";
183 default:
184 return "(unknown)";
185 }
186}
187
188static int ei_seq_show(struct seq_file *m, void *v)
189{
190 struct ei_entry *ent = list_entry(v, struct ei_entry, list);
191
192 seq_printf(m, "%pf\t%s\n", (void *)ent->start_addr,
193 error_type_string(ent->etype));
194 return 0;
195}
196
197static const struct seq_operations ei_seq_ops = {
198 .start = ei_seq_start,
199 .next = ei_seq_next,
200 .stop = ei_seq_stop,
201 .show = ei_seq_show,
202};
203
204static int ei_open(struct inode *inode, struct file *filp)
205{
206 return seq_open(filp, &ei_seq_ops);
207}
208
209static const struct file_operations debugfs_ei_ops = {
210 .open = ei_open,
211 .read = seq_read,
212 .llseek = seq_lseek,
213 .release = seq_release,
214};
215
216static int __init ei_debugfs_init(void)
217{
218 struct dentry *dir, *file;
219
220 dir = debugfs_create_dir("error_injection", NULL);
221 if (!dir)
222 return -ENOMEM;
223
224 file = debugfs_create_file("list", 0444, dir, NULL, &debugfs_ei_ops);
225 if (!file) {
226 debugfs_remove(dir);
227 return -ENOMEM;
228 }
229
230 return 0;
231}
232
233static int __init init_error_injection(void)
234{
235 populate_kernel_ei_list();
236
237 if (!module_ei_init())
238 ei_debugfs_init();
239
240 return 0;
241}
242late_initcall(init_error_injection);
diff --git a/net/core/filter.c b/net/core/filter.c
index d4b190e63b79..db2ee8c7e1bd 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4310,16 +4310,15 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
4310 si->dst_reg, si->dst_reg, 4310 si->dst_reg, si->dst_reg,
4311 offsetof(struct xdp_rxq_info, dev)); 4311 offsetof(struct xdp_rxq_info, dev));
4312 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 4312 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
4313 bpf_target_off(struct net_device, 4313 offsetof(struct net_device, ifindex));
4314 ifindex, 4, target_size));
4315 break; 4314 break;
4316 case offsetof(struct xdp_md, rx_queue_index): 4315 case offsetof(struct xdp_md, rx_queue_index):
4317 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq), 4316 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
4318 si->dst_reg, si->src_reg, 4317 si->dst_reg, si->src_reg,
4319 offsetof(struct xdp_buff, rxq)); 4318 offsetof(struct xdp_buff, rxq));
4320 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 4319 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
4321 bpf_target_off(struct xdp_rxq_info, 4320 offsetof(struct xdp_rxq_info,
4322 queue_index, 4, target_size)); 4321 queue_index));
4323 break; 4322 break;
4324 } 4323 }
4325 4324
diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c
index 2fe2f761a0d0..c969141bfa8b 100644
--- a/samples/bpf/xdp_monitor_kern.c
+++ b/samples/bpf/xdp_monitor_kern.c
@@ -104,7 +104,7 @@ struct xdp_exception_ctx {
104SEC("tracepoint/xdp/xdp_exception") 104SEC("tracepoint/xdp/xdp_exception")
105int trace_xdp_exception(struct xdp_exception_ctx *ctx) 105int trace_xdp_exception(struct xdp_exception_ctx *ctx)
106{ 106{
107 u64 *cnt;; 107 u64 *cnt;
108 u32 key; 108 u32 key;
109 109
110 key = ctx->act; 110 key = ctx->act;
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 2237bc43f71c..26901ec87361 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -39,7 +39,7 @@ CC = gcc
39 39
40CFLAGS += -O2 40CFLAGS += -O2
41CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow 41CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow
42CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/ 42CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/
43CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"' 43CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"'
44LIBS = -lelf -lbfd -lopcodes $(LIBBPF) 44LIBS = -lelf -lbfd -lopcodes $(LIBBPF)
45 45
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index c6a28be4665c..099e21cf1b5c 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -66,6 +66,7 @@ static const char * const prog_type_name[] = {
66 [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", 66 [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit",
67 [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", 67 [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops",
68 [BPF_PROG_TYPE_SK_SKB] = "sk_skb", 68 [BPF_PROG_TYPE_SK_SKB] = "sk_skb",
69 [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device",
69}; 70};
70 71
71static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) 72static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 17f2c73fff8b..bc715f6ac320 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -190,7 +190,7 @@ $(OUTPUT)test-libbfd.bin:
190 $(BUILD) -DPACKAGE='"perf"' -lbfd -lz -liberty -ldl 190 $(BUILD) -DPACKAGE='"perf"' -lbfd -lz -liberty -ldl
191 191
192$(OUTPUT)test-disassembler-four-args.bin: 192$(OUTPUT)test-disassembler-four-args.bin:
193 $(BUILD) -lbfd -lopcodes 193 $(BUILD) -DPACKAGE='"perf"' -lbfd -lopcodes
194 194
195$(OUTPUT)test-liberty.bin: 195$(OUTPUT)test-liberty.bin:
196 $(CC) $(CFLAGS) -Wall -Werror -o $@ test-libbfd.c -DPACKAGE='"perf"' $(LDFLAGS) -lbfd -ldl -liberty 196 $(CC) $(CFLAGS) -Wall -Werror -o $@ test-libbfd.c -DPACKAGE='"perf"' $(LDFLAGS) -lbfd -ldl -liberty
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4e8c60acfa32..69f96af4a569 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -245,6 +245,7 @@ union bpf_attr {
245 * BPF_F_NUMA_NODE is set). 245 * BPF_F_NUMA_NODE is set).
246 */ 246 */
247 char map_name[BPF_OBJ_NAME_LEN]; 247 char map_name[BPF_OBJ_NAME_LEN];
248 __u32 map_ifindex; /* ifindex of netdev to create on */
248 }; 249 };
249 250
250 struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ 251 struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 8ed43ae9db9b..83714ca1f22b 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -93,7 +93,6 @@ export prefix libdir src obj
93# Shell quotes 93# Shell quotes
94libdir_SQ = $(subst ','\'',$(libdir)) 94libdir_SQ = $(subst ','\'',$(libdir))
95libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) 95libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
96plugin_dir_SQ = $(subst ','\'',$(plugin_dir))
97 96
98LIB_FILE = libbpf.a libbpf.so 97LIB_FILE = libbpf.a libbpf.so
99 98
@@ -150,7 +149,7 @@ CMD_TARGETS = $(LIB_FILE)
150 149
151TARGETS = $(CMD_TARGETS) 150TARGETS = $(CMD_TARGETS)
152 151
153all: fixdep $(VERSION_FILES) all_cmd 152all: fixdep all_cmd
154 153
155all_cmd: $(CMD_TARGETS) 154all_cmd: $(CMD_TARGETS)
156 155
@@ -169,21 +168,11 @@ $(OUTPUT)libbpf.so: $(BPF_IN)
169$(OUTPUT)libbpf.a: $(BPF_IN) 168$(OUTPUT)libbpf.a: $(BPF_IN)
170 $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ 169 $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
171 170
172define update_dir
173 (echo $1 > $@.tmp; \
174 if [ -r $@ ] && cmp -s $@ $@.tmp; then \
175 rm -f $@.tmp; \
176 else \
177 echo ' UPDATE $@'; \
178 mv -f $@.tmp $@; \
179 fi);
180endef
181
182define do_install 171define do_install
183 if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ 172 if [ ! -d '$(DESTDIR_SQ)$2' ]; then \
184 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ 173 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
185 fi; \ 174 fi; \
186 $(INSTALL) $1 '$(DESTDIR_SQ)$2' 175 $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2'
187endef 176endef
188 177
189install_lib: all_cmd 178install_lib: all_cmd
@@ -192,7 +181,8 @@ install_lib: all_cmd
192 181
193install_headers: 182install_headers:
194 $(call QUIET_INSTALL, headers) \ 183 $(call QUIET_INSTALL, headers) \
195 $(call do_install,bpf.h,$(prefix)/include/bpf,644) 184 $(call do_install,bpf.h,$(prefix)/include/bpf,644); \
185 $(call do_install,libbpf.h,$(prefix)/include/bpf,644);
196 186
197install: install_lib 187install: install_lib
198 188
@@ -203,7 +193,7 @@ config-clean:
203 $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null 193 $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
204 194
205clean: 195clean:
206 $(call QUIET_CLEAN, libbpf) $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd \ 196 $(call QUIET_CLEAN, libbpf) $(RM) *.o *~ $(TARGETS) *.a *.so .*.d .*.cmd \
207 $(RM) LIBBPF-CFLAGS 197 $(RM) LIBBPF-CFLAGS
208 $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf 198 $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf
209 199
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e9c4b7cabcf2..30c776375118 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1803,7 +1803,7 @@ BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
1803BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP); 1803BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
1804BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT); 1804BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
1805 1805
1806#define BPF_PROG_SEC(string, type) { string, sizeof(string), type } 1806#define BPF_PROG_SEC(string, type) { string, sizeof(string) - 1, type }
1807static const struct { 1807static const struct {
1808 const char *sec; 1808 const char *sec;
1809 size_t len; 1809 size_t len;