aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf/syscall.c
diff options
context:
space:
mode:
authorJohn Fastabend <john.fastabend@gmail.com>2017-08-16 01:32:47 -0400
committerDavid S. Miller <davem@davemloft.net>2017-08-16 14:27:53 -0400
commit174a79ff9515f400b9a6115643dafd62a635b7e6 (patch)
treef48f1fc407adb9bce6fb0e5cddaabd7141acd071 /kernel/bpf/syscall.c
parenta6f6df69c48b86cd84f36c70593eb4968fceb34a (diff)
bpf: sockmap with sk redirect support
Recently we added a new map type called dev map used to forward XDP packets between ports (6093ec2dc313). This patches introduces a similar notion for sockets. A sockmap allows users to add participating sockets to a map. When sockets are added to the map enough context is stored with the map entry to use the entry with a new helper bpf_sk_redirect_map(map, key, flags) This helper (analogous to bpf_redirect_map in XDP) is given the map and an entry in the map. When called from a sockmap program, discussed below, the skb will be sent on the socket using skb_send_sock(). With the above we need a bpf program to call the helper from that will then implement the send logic. The initial site implemented in this series is the recv_sock hook. For this to work we implemented a map attach command to add attributes to a map. In sockmap we add two programs a parse program and a verdict program. The parse program uses strparser to build messages and pass them to the verdict program. The parse programs use the normal strparser semantics. The verdict program is of type SK_SKB. The verdict program returns a verdict SK_DROP, or SK_REDIRECT for now. Additional actions may be added later. When SK_REDIRECT is returned, expected when bpf program uses bpf_sk_redirect_map(), the sockmap logic will consult per cpu variables set by the helper routine and pull the sock entry out of the sock map. This pattern follows the existing redirect logic in cls and xdp programs. This gives the flow, recv_sock -> str_parser (parse_prog) -> verdict_prog -> skb_send_sock \ -> kfree_skb As an example use case a message based load balancer may use specific logic in the verdict program to select the sock to send on. Sample programs are provided in future patches that hopefully illustrate the user interfaces. Also selftests are in follow-on patches. Signed-off-by: John Fastabend <john.fastabend@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel/bpf/syscall.c')
-rw-r--r--kernel/bpf/syscall.c51
1 files changed, 50 insertions, 1 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 17e29f596de1..d2f2bdf71ffa 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1087,7 +1087,50 @@ static int bpf_obj_get(const union bpf_attr *attr)
1087 1087
1088#ifdef CONFIG_CGROUP_BPF 1088#ifdef CONFIG_CGROUP_BPF
1089 1089
1090#define BPF_PROG_ATTACH_LAST_FIELD attach_flags 1090#define BPF_PROG_ATTACH_LAST_FIELD attach_bpf_fd2
1091
1092static int sockmap_get_from_fd(const union bpf_attr *attr, int ptype)
1093{
1094 struct bpf_prog *prog1, *prog2;
1095 int ufd = attr->target_fd;
1096 struct bpf_map *map;
1097 struct fd f;
1098 int err;
1099
1100 f = fdget(ufd);
1101 map = __bpf_map_get(f);
1102 if (IS_ERR(map))
1103 return PTR_ERR(map);
1104
1105 if (!map->ops->map_attach) {
1106 fdput(f);
1107 return -EOPNOTSUPP;
1108 }
1109
1110 prog1 = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
1111 if (IS_ERR(prog1)) {
1112 fdput(f);
1113 return PTR_ERR(prog1);
1114 }
1115
1116 prog2 = bpf_prog_get_type(attr->attach_bpf_fd2, ptype);
1117 if (IS_ERR(prog2)) {
1118 fdput(f);
1119 bpf_prog_put(prog1);
1120 return PTR_ERR(prog2);
1121 }
1122
1123 err = map->ops->map_attach(map, prog1, prog2);
1124 if (err) {
1125 fdput(f);
1126 bpf_prog_put(prog1);
1127 bpf_prog_put(prog2);
1128 return PTR_ERR(map);
1129 }
1130
1131 fdput(f);
1132 return err;
1133}
1091 1134
1092static int bpf_prog_attach(const union bpf_attr *attr) 1135static int bpf_prog_attach(const union bpf_attr *attr)
1093{ 1136{
@@ -1116,10 +1159,16 @@ static int bpf_prog_attach(const union bpf_attr *attr)
1116 case BPF_CGROUP_SOCK_OPS: 1159 case BPF_CGROUP_SOCK_OPS:
1117 ptype = BPF_PROG_TYPE_SOCK_OPS; 1160 ptype = BPF_PROG_TYPE_SOCK_OPS;
1118 break; 1161 break;
1162 case BPF_CGROUP_SMAP_INGRESS:
1163 ptype = BPF_PROG_TYPE_SK_SKB;
1164 break;
1119 default: 1165 default:
1120 return -EINVAL; 1166 return -EINVAL;
1121 } 1167 }
1122 1168
1169 if (attr->attach_type == BPF_CGROUP_SMAP_INGRESS)
1170 return sockmap_get_from_fd(attr, ptype);
1171
1123 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 1172 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
1124 if (IS_ERR(prog)) 1173 if (IS_ERR(prog))
1125 return PTR_ERR(prog); 1174 return PTR_ERR(prog);