aboutsummaryrefslogtreecommitdiffstats
path: root/net/sched
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2015-03-20 10:11:12 -0400
committerDavid S. Miller <davem@davemloft.net>2015-03-20 19:10:44 -0400
commita8cb5f556b567974d75ea29c15181c445c541b1f (patch)
tree0fb121bb44e532307c10a3b7e4154f60381ebbbc /net/sched
parent94caee8c312d96522bcdae88791aaa9ebcd5f22c (diff)
act_bpf: add initial eBPF support for actions
This work extends the "classic" BPF programmable tc action by extending its scope also to native eBPF code! Together with commit e2e9b6541dd4 ("cls_bpf: add initial eBPF support for programmable classifiers") this adds the facility to implement fully flexible classifier and actions for tc that can be implemented in a C subset in user space, "safely" loaded into the kernel, and being run in native speed when JITed. Also, since eBPF maps can be shared between eBPF programs, it offers the possibility that cls_bpf and act_bpf can share data 1) between themselves and 2) between user space applications. That means that, f.e. customized runtime statistics can be collected in user space, but also more importantly classifier and action behaviour could be altered based on map input from the user space application. For the remaining details on the workflow and integration, see the cls_bpf commit e2e9b6541dd4. Preliminary iproute2 part can be found under [1]. [1] http://git.breakpoint.cc/cgit/dborkman/iproute2.git/log/?h=ebpf-act Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Cc: Jamal Hadi Salim <jhs@mojatatu.com> Cc: Jiri Pirko <jiri@resnulli.us> Acked-by: Jiri Pirko <jiri@resnulli.us> Acked-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/act_bpf.c295
1 files changed, 213 insertions, 82 deletions
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 5f6288fa3f12..4d2cede17468 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -13,26 +13,40 @@
13#include <linux/skbuff.h> 13#include <linux/skbuff.h>
14#include <linux/rtnetlink.h> 14#include <linux/rtnetlink.h>
15#include <linux/filter.h> 15#include <linux/filter.h>
16#include <linux/bpf.h>
17
16#include <net/netlink.h> 18#include <net/netlink.h>
17#include <net/pkt_sched.h> 19#include <net/pkt_sched.h>
18 20
19#include <linux/tc_act/tc_bpf.h> 21#include <linux/tc_act/tc_bpf.h>
20#include <net/tc_act/tc_bpf.h> 22#include <net/tc_act/tc_bpf.h>
21 23
22#define BPF_TAB_MASK 15 24#define BPF_TAB_MASK 15
25#define ACT_BPF_NAME_LEN 256
26
27struct tcf_bpf_cfg {
28 struct bpf_prog *filter;
29 struct sock_filter *bpf_ops;
30 char *bpf_name;
31 u32 bpf_fd;
32 u16 bpf_num_ops;
33};
23 34
24static int tcf_bpf(struct sk_buff *skb, const struct tc_action *a, 35static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
25 struct tcf_result *res) 36 struct tcf_result *res)
26{ 37{
27 struct tcf_bpf *b = a->priv; 38 struct tcf_bpf *prog = act->priv;
28 int action, filter_res; 39 int action, filter_res;
29 40
30 spin_lock(&b->tcf_lock); 41 spin_lock(&prog->tcf_lock);
31 42
32 b->tcf_tm.lastuse = jiffies; 43 prog->tcf_tm.lastuse = jiffies;
33 bstats_update(&b->tcf_bstats, skb); 44 bstats_update(&prog->tcf_bstats, skb);
34 45
35 filter_res = BPF_PROG_RUN(b->filter, skb); 46 /* Needed here for accessing maps. */
47 rcu_read_lock();
48 filter_res = BPF_PROG_RUN(prog->filter, skb);
49 rcu_read_unlock();
36 50
37 /* A BPF program may overwrite the default action opcode. 51 /* A BPF program may overwrite the default action opcode.
38 * Similarly as in cls_bpf, if filter_res == -1 we use the 52 * Similarly as in cls_bpf, if filter_res == -1 we use the
@@ -52,52 +66,87 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *a,
52 break; 66 break;
53 case TC_ACT_SHOT: 67 case TC_ACT_SHOT:
54 action = filter_res; 68 action = filter_res;
55 b->tcf_qstats.drops++; 69 prog->tcf_qstats.drops++;
56 break; 70 break;
57 case TC_ACT_UNSPEC: 71 case TC_ACT_UNSPEC:
58 action = b->tcf_action; 72 action = prog->tcf_action;
59 break; 73 break;
60 default: 74 default:
61 action = TC_ACT_UNSPEC; 75 action = TC_ACT_UNSPEC;
62 break; 76 break;
63 } 77 }
64 78
65 spin_unlock(&b->tcf_lock); 79 spin_unlock(&prog->tcf_lock);
66 return action; 80 return action;
67} 81}
68 82
69static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *a, 83static bool tcf_bpf_is_ebpf(const struct tcf_bpf *prog)
84{
85 return !prog->bpf_ops;
86}
87
88static int tcf_bpf_dump_bpf_info(const struct tcf_bpf *prog,
89 struct sk_buff *skb)
90{
91 struct nlattr *nla;
92
93 if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, prog->bpf_num_ops))
94 return -EMSGSIZE;
95
96 nla = nla_reserve(skb, TCA_ACT_BPF_OPS, prog->bpf_num_ops *
97 sizeof(struct sock_filter));
98 if (nla == NULL)
99 return -EMSGSIZE;
100
101 memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));
102
103 return 0;
104}
105
106static int tcf_bpf_dump_ebpf_info(const struct tcf_bpf *prog,
107 struct sk_buff *skb)
108{
109 if (nla_put_u32(skb, TCA_ACT_BPF_FD, prog->bpf_fd))
110 return -EMSGSIZE;
111
112 if (prog->bpf_name &&
113 nla_put_string(skb, TCA_ACT_BPF_NAME, prog->bpf_name))
114 return -EMSGSIZE;
115
116 return 0;
117}
118
119static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act,
70 int bind, int ref) 120 int bind, int ref)
71{ 121{
72 unsigned char *tp = skb_tail_pointer(skb); 122 unsigned char *tp = skb_tail_pointer(skb);
73 struct tcf_bpf *b = a->priv; 123 struct tcf_bpf *prog = act->priv;
74 struct tc_act_bpf opt = { 124 struct tc_act_bpf opt = {
75 .index = b->tcf_index, 125 .index = prog->tcf_index,
76 .refcnt = b->tcf_refcnt - ref, 126 .refcnt = prog->tcf_refcnt - ref,
77 .bindcnt = b->tcf_bindcnt - bind, 127 .bindcnt = prog->tcf_bindcnt - bind,
78 .action = b->tcf_action, 128 .action = prog->tcf_action,
79 }; 129 };
80 struct tcf_t t; 130 struct tcf_t tm;
81 struct nlattr *nla; 131 int ret;
82 132
83 if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt)) 133 if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt))
84 goto nla_put_failure; 134 goto nla_put_failure;
85 135
86 if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, b->bpf_num_ops)) 136 if (tcf_bpf_is_ebpf(prog))
87 goto nla_put_failure; 137 ret = tcf_bpf_dump_ebpf_info(prog, skb);
88 138 else
89 nla = nla_reserve(skb, TCA_ACT_BPF_OPS, b->bpf_num_ops * 139 ret = tcf_bpf_dump_bpf_info(prog, skb);
90 sizeof(struct sock_filter)); 140 if (ret)
91 if (!nla)
92 goto nla_put_failure; 141 goto nla_put_failure;
93 142
94 memcpy(nla_data(nla), b->bpf_ops, nla_len(nla)); 143 tm.install = jiffies_to_clock_t(jiffies - prog->tcf_tm.install);
144 tm.lastuse = jiffies_to_clock_t(jiffies - prog->tcf_tm.lastuse);
145 tm.expires = jiffies_to_clock_t(prog->tcf_tm.expires);
95 146
96 t.install = jiffies_to_clock_t(jiffies - b->tcf_tm.install); 147 if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(tm), &tm))
97 t.lastuse = jiffies_to_clock_t(jiffies - b->tcf_tm.lastuse);
98 t.expires = jiffies_to_clock_t(b->tcf_tm.expires);
99 if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(t), &t))
100 goto nla_put_failure; 148 goto nla_put_failure;
149
101 return skb->len; 150 return skb->len;
102 151
103nla_put_failure: 152nla_put_failure:
@@ -107,36 +156,21 @@ nla_put_failure:
107 156
108static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = { 157static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = {
109 [TCA_ACT_BPF_PARMS] = { .len = sizeof(struct tc_act_bpf) }, 158 [TCA_ACT_BPF_PARMS] = { .len = sizeof(struct tc_act_bpf) },
159 [TCA_ACT_BPF_FD] = { .type = NLA_U32 },
160 [TCA_ACT_BPF_NAME] = { .type = NLA_NUL_STRING, .len = ACT_BPF_NAME_LEN },
110 [TCA_ACT_BPF_OPS_LEN] = { .type = NLA_U16 }, 161 [TCA_ACT_BPF_OPS_LEN] = { .type = NLA_U16 },
111 [TCA_ACT_BPF_OPS] = { .type = NLA_BINARY, 162 [TCA_ACT_BPF_OPS] = { .type = NLA_BINARY,
112 .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, 163 .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
113}; 164};
114 165
115static int tcf_bpf_init(struct net *net, struct nlattr *nla, 166static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
116 struct nlattr *est, struct tc_action *a,
117 int ovr, int bind)
118{ 167{
119 struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
120 struct tc_act_bpf *parm;
121 struct tcf_bpf *b;
122 u16 bpf_size, bpf_num_ops;
123 struct sock_filter *bpf_ops; 168 struct sock_filter *bpf_ops;
124 struct sock_fprog_kern tmp; 169 struct sock_fprog_kern fprog_tmp;
125 struct bpf_prog *fp; 170 struct bpf_prog *fp;
171 u16 bpf_size, bpf_num_ops;
126 int ret; 172 int ret;
127 173
128 if (!nla)
129 return -EINVAL;
130
131 ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy);
132 if (ret < 0)
133 return ret;
134
135 if (!tb[TCA_ACT_BPF_PARMS] ||
136 !tb[TCA_ACT_BPF_OPS_LEN] || !tb[TCA_ACT_BPF_OPS])
137 return -EINVAL;
138 parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
139
140 bpf_num_ops = nla_get_u16(tb[TCA_ACT_BPF_OPS_LEN]); 174 bpf_num_ops = nla_get_u16(tb[TCA_ACT_BPF_OPS_LEN]);
141 if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) 175 if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0)
142 return -EINVAL; 176 return -EINVAL;
@@ -146,68 +180,165 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
146 return -EINVAL; 180 return -EINVAL;
147 181
148 bpf_ops = kzalloc(bpf_size, GFP_KERNEL); 182 bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
149 if (!bpf_ops) 183 if (bpf_ops == NULL)
150 return -ENOMEM; 184 return -ENOMEM;
151 185
152 memcpy(bpf_ops, nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size); 186 memcpy(bpf_ops, nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size);
153 187
154 tmp.len = bpf_num_ops; 188 fprog_tmp.len = bpf_num_ops;
155 tmp.filter = bpf_ops; 189 fprog_tmp.filter = bpf_ops;
156 190
157 ret = bpf_prog_create(&fp, &tmp); 191 ret = bpf_prog_create(&fp, &fprog_tmp);
158 if (ret) 192 if (ret < 0) {
159 goto free_bpf_ops; 193 kfree(bpf_ops);
194 return ret;
195 }
160 196
161 if (!tcf_hash_check(parm->index, a, bind)) { 197 cfg->bpf_ops = bpf_ops;
162 ret = tcf_hash_create(parm->index, est, a, sizeof(*b), bind); 198 cfg->bpf_num_ops = bpf_num_ops;
163 if (ret) 199 cfg->filter = fp;
200
201 return 0;
202}
203
204static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
205{
206 struct bpf_prog *fp;
207 char *name = NULL;
208 u32 bpf_fd;
209
210 bpf_fd = nla_get_u32(tb[TCA_ACT_BPF_FD]);
211
212 fp = bpf_prog_get(bpf_fd);
213 if (IS_ERR(fp))
214 return PTR_ERR(fp);
215
216 if (fp->type != BPF_PROG_TYPE_SCHED_ACT) {
217 bpf_prog_put(fp);
218 return -EINVAL;
219 }
220
221 if (tb[TCA_ACT_BPF_NAME]) {
222 name = kmemdup(nla_data(tb[TCA_ACT_BPF_NAME]),
223 nla_len(tb[TCA_ACT_BPF_NAME]),
224 GFP_KERNEL);
225 if (!name) {
226 bpf_prog_put(fp);
227 return -ENOMEM;
228 }
229 }
230
231 cfg->bpf_fd = bpf_fd;
232 cfg->bpf_name = name;
233 cfg->filter = fp;
234
235 return 0;
236}
237
238static int tcf_bpf_init(struct net *net, struct nlattr *nla,
239 struct nlattr *est, struct tc_action *act,
240 int replace, int bind)
241{
242 struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
243 struct tc_act_bpf *parm;
244 struct tcf_bpf *prog;
245 struct tcf_bpf_cfg cfg;
246 bool is_bpf, is_ebpf;
247 int ret;
248
249 if (!nla)
250 return -EINVAL;
251
252 ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy);
253 if (ret < 0)
254 return ret;
255
256 is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS];
257 is_ebpf = tb[TCA_ACT_BPF_FD];
258
259 if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) ||
260 !tb[TCA_ACT_BPF_PARMS])
261 return -EINVAL;
262
263 parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
264
265 memset(&cfg, 0, sizeof(cfg));
266
267 ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) :
268 tcf_bpf_init_from_efd(tb, &cfg);
269 if (ret < 0)
270 return ret;
271
272 if (!tcf_hash_check(parm->index, act, bind)) {
273 ret = tcf_hash_create(parm->index, est, act,
274 sizeof(*prog), bind);
275 if (ret < 0)
164 goto destroy_fp; 276 goto destroy_fp;
165 277
166 ret = ACT_P_CREATED; 278 ret = ACT_P_CREATED;
167 } else { 279 } else {
280 /* Don't override defaults. */
168 if (bind) 281 if (bind)
169 goto destroy_fp; 282 goto destroy_fp;
170 tcf_hash_release(a, bind); 283
171 if (!ovr) { 284 tcf_hash_release(act, bind);
285 if (!replace) {
172 ret = -EEXIST; 286 ret = -EEXIST;
173 goto destroy_fp; 287 goto destroy_fp;
174 } 288 }
175 } 289 }
176 290
177 b = to_bpf(a); 291 prog = to_bpf(act);
178 spin_lock_bh(&b->tcf_lock); 292 spin_lock_bh(&prog->tcf_lock);
179 b->tcf_action = parm->action; 293
180 b->bpf_num_ops = bpf_num_ops; 294 prog->bpf_ops = cfg.bpf_ops;
181 b->bpf_ops = bpf_ops; 295 prog->bpf_name = cfg.bpf_name;
182 b->filter = fp; 296
183 spin_unlock_bh(&b->tcf_lock); 297 if (cfg.bpf_num_ops)
298 prog->bpf_num_ops = cfg.bpf_num_ops;
299 if (cfg.bpf_fd)
300 prog->bpf_fd = cfg.bpf_fd;
301
302 prog->tcf_action = parm->action;
303 prog->filter = cfg.filter;
304
305 spin_unlock_bh(&prog->tcf_lock);
184 306
185 if (ret == ACT_P_CREATED) 307 if (ret == ACT_P_CREATED)
186 tcf_hash_insert(a); 308 tcf_hash_insert(act);
309
187 return ret; 310 return ret;
188 311
189destroy_fp: 312destroy_fp:
190 bpf_prog_destroy(fp); 313 if (is_ebpf)
191free_bpf_ops: 314 bpf_prog_put(cfg.filter);
192 kfree(bpf_ops); 315 else
316 bpf_prog_destroy(cfg.filter);
317
318 kfree(cfg.bpf_ops);
319 kfree(cfg.bpf_name);
320
193 return ret; 321 return ret;
194} 322}
195 323
196static void tcf_bpf_cleanup(struct tc_action *a, int bind) 324static void tcf_bpf_cleanup(struct tc_action *act, int bind)
197{ 325{
198 struct tcf_bpf *b = a->priv; 326 const struct tcf_bpf *prog = act->priv;
199 327
200 bpf_prog_destroy(b->filter); 328 if (tcf_bpf_is_ebpf(prog))
329 bpf_prog_put(prog->filter);
330 else
331 bpf_prog_destroy(prog->filter);
201} 332}
202 333
203static struct tc_action_ops act_bpf_ops = { 334static struct tc_action_ops act_bpf_ops __read_mostly = {
204 .kind = "bpf", 335 .kind = "bpf",
205 .type = TCA_ACT_BPF, 336 .type = TCA_ACT_BPF,
206 .owner = THIS_MODULE, 337 .owner = THIS_MODULE,
207 .act = tcf_bpf, 338 .act = tcf_bpf,
208 .dump = tcf_bpf_dump, 339 .dump = tcf_bpf_dump,
209 .cleanup = tcf_bpf_cleanup, 340 .cleanup = tcf_bpf_cleanup,
210 .init = tcf_bpf_init, 341 .init = tcf_bpf_init,
211}; 342};
212 343
213static int __init bpf_init_module(void) 344static int __init bpf_init_module(void)