summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYotam Gigi <yotamg@mellanox.com>2017-01-23 05:07:08 -0500
committerDavid S. Miller <davem@davemloft.net>2017-01-24 13:44:28 -0500
commit6ae0a6286171154661b74f7f550f9441c6008424 (patch)
treeafce8750cb4e2ec1e1162d30b986cb88724c8ba7
parentd36db83bac41e46cb89bd1ef57e049ce80a7433a (diff)
net: Introduce psample, a new genetlink channel for packet sampling
Add a general way for kernel modules to sample packets, without being tied to any specific subsystem. This netlink channel can be used by tc, iptables, etc. and allow to standardize packet sampling in the kernel. For every sampled packet, the psample module adds the following metadata fields: PSAMPLE_ATTR_IIFINDEX - the packets input ifindex, if applicable PSAMPLE_ATTR_OIFINDEX - the packet output ifindex, if applicable PSAMPLE_ATTR_ORIGSIZE - the packet's original size, in case it has been truncated during sampling PSAMPLE_ATTR_SAMPLE_GROUP - the packet's sample group, which is set by the user who initiated the sampling. This field allows the user to differentiate between several samplers working simultaneously and filter packets relevant to him PSAMPLE_ATTR_GROUP_SEQ - sequence counter of last sent packet. The sequence is kept for each group PSAMPLE_ATTR_SAMPLE_RATE - the sampling rate used for sampling the packets PSAMPLE_ATTR_DATA - the actual packet bits The sampled packets are sent to the PSAMPLE_NL_MCGRP_SAMPLE multicast group. In addition, add the GET_GROUPS netlink command which allows the user to see the current sample groups, their refcount and sequence number. This command currently supports only netlink dump mode. Signed-off-by: Yotam Gigi <yotamg@mellanox.com> Signed-off-by: Jiri Pirko <jiri@mellanox.com> Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com> Reviewed-by: Simon Horman <simon.horman@netronome.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--MAINTAINERS7
-rw-r--r--include/net/psample.h36
-rw-r--r--include/uapi/linux/Kbuild1
-rw-r--r--include/uapi/linux/psample.h35
-rw-r--r--net/Kconfig1
-rw-r--r--net/Makefile1
-rw-r--r--net/psample/Kconfig15
-rw-r--r--net/psample/Makefile5
-rw-r--r--net/psample/psample.c301
9 files changed, 402 insertions, 0 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 3c84a8fecc09..d76fccd09266 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9957,6 +9957,13 @@ L: linuxppc-dev@lists.ozlabs.org
9957S: Maintained 9957S: Maintained
9958F: drivers/block/ps3vram.c 9958F: drivers/block/ps3vram.c
9959 9959
9960PSAMPLE PACKET SAMPLING SUPPORT:
9961M: Yotam Gigi <yotamg@mellanox.com>
9962S: Maintained
9963F: net/psample
9964F: include/net/psample.h
9965F: include/uapi/linux/psample.h
9966
9960PSTORE FILESYSTEM 9967PSTORE FILESYSTEM
9961M: Anton Vorontsov <anton@enomsg.org> 9968M: Anton Vorontsov <anton@enomsg.org>
9962M: Colin Cross <ccross@android.com> 9969M: Colin Cross <ccross@android.com>
diff --git a/include/net/psample.h b/include/net/psample.h
new file mode 100644
index 000000000000..8888b0e1a82e
--- /dev/null
+++ b/include/net/psample.h
@@ -0,0 +1,36 @@
1#ifndef __NET_PSAMPLE_H
2#define __NET_PSAMPLE_H
3
4#include <uapi/linux/psample.h>
5#include <linux/module.h>
6#include <linux/list.h>
7
8struct psample_group {
9 struct list_head list;
10 struct net *net;
11 u32 group_num;
12 u32 refcount;
13 u32 seq;
14};
15
16struct psample_group *psample_group_get(struct net *net, u32 group_num);
17void psample_group_put(struct psample_group *group);
18
19#if IS_ENABLED(CONFIG_PSAMPLE)
20
21void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
22 u32 trunc_size, int in_ifindex, int out_ifindex,
23 u32 sample_rate);
24
25#else
26
27static inline void psample_sample_packet(struct psample_group *group,
28 struct sk_buff *skb, u32 trunc_size,
29 int in_ifindex, int out_ifindex,
30 u32 sample_rate)
31{
32}
33
34#endif
35
36#endif /* __NET_PSAMPLE_H */
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index e600b50be77e..80ad741a42fa 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -305,6 +305,7 @@ header-y += netrom.h
305header-y += net_namespace.h 305header-y += net_namespace.h
306header-y += net_tstamp.h 306header-y += net_tstamp.h
307header-y += nfc.h 307header-y += nfc.h
308header-y += psample.h
308header-y += nfs2.h 309header-y += nfs2.h
309header-y += nfs3.h 310header-y += nfs3.h
310header-y += nfs4.h 311header-y += nfs4.h
diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h
new file mode 100644
index 000000000000..ed48996ec0e8
--- /dev/null
+++ b/include/uapi/linux/psample.h
@@ -0,0 +1,35 @@
1#ifndef __UAPI_PSAMPLE_H
2#define __UAPI_PSAMPLE_H
3
4enum {
5 /* sampled packet metadata */
6 PSAMPLE_ATTR_IIFINDEX,
7 PSAMPLE_ATTR_OIFINDEX,
8 PSAMPLE_ATTR_ORIGSIZE,
9 PSAMPLE_ATTR_SAMPLE_GROUP,
10 PSAMPLE_ATTR_GROUP_SEQ,
11 PSAMPLE_ATTR_SAMPLE_RATE,
12 PSAMPLE_ATTR_DATA,
13
14 /* commands attributes */
15 PSAMPLE_ATTR_GROUP_REFCOUNT,
16
17 __PSAMPLE_ATTR_MAX
18};
19
20enum psample_command {
21 PSAMPLE_CMD_SAMPLE,
22 PSAMPLE_CMD_GET_GROUP,
23 PSAMPLE_CMD_NEW_GROUP,
24 PSAMPLE_CMD_DEL_GROUP,
25};
26
27/* Can be overridden at runtime by module option */
28#define PSAMPLE_ATTR_MAX (__PSAMPLE_ATTR_MAX - 1)
29
30#define PSAMPLE_NL_MCGRP_CONFIG_NAME "config"
31#define PSAMPLE_NL_MCGRP_SAMPLE_NAME "packets"
32#define PSAMPLE_GENL_NAME "psample"
33#define PSAMPLE_GENL_VERSION 1
34
35#endif
diff --git a/net/Kconfig b/net/Kconfig
index 92ae1500d9e1..ce4aee69fc0d 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -390,6 +390,7 @@ source "net/9p/Kconfig"
390source "net/caif/Kconfig" 390source "net/caif/Kconfig"
391source "net/ceph/Kconfig" 391source "net/ceph/Kconfig"
392source "net/nfc/Kconfig" 392source "net/nfc/Kconfig"
393source "net/psample/Kconfig"
393 394
394config LWTUNNEL 395config LWTUNNEL
395 bool "Network light weight tunnels" 396 bool "Network light weight tunnels"
diff --git a/net/Makefile b/net/Makefile
index 5d6e0e5ff7f8..7d41de48310e 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -70,6 +70,7 @@ obj-$(CONFIG_DNS_RESOLVER) += dns_resolver/
70obj-$(CONFIG_CEPH_LIB) += ceph/ 70obj-$(CONFIG_CEPH_LIB) += ceph/
71obj-$(CONFIG_BATMAN_ADV) += batman-adv/ 71obj-$(CONFIG_BATMAN_ADV) += batman-adv/
72obj-$(CONFIG_NFC) += nfc/ 72obj-$(CONFIG_NFC) += nfc/
73obj-$(CONFIG_PSAMPLE) += psample/
73obj-$(CONFIG_OPENVSWITCH) += openvswitch/ 74obj-$(CONFIG_OPENVSWITCH) += openvswitch/
74obj-$(CONFIG_VSOCKETS) += vmw_vsock/ 75obj-$(CONFIG_VSOCKETS) += vmw_vsock/
75obj-$(CONFIG_MPLS) += mpls/ 76obj-$(CONFIG_MPLS) += mpls/
diff --git a/net/psample/Kconfig b/net/psample/Kconfig
new file mode 100644
index 000000000000..d850246a6059
--- /dev/null
+++ b/net/psample/Kconfig
@@ -0,0 +1,15 @@
1#
2# psample packet sampling configuration
3#
4
5menuconfig PSAMPLE
6 depends on NET
7 tristate "Packet-sampling netlink channel"
8 default n
9 help
10 Say Y here to add support for packet-sampling netlink channel
11 This netlink channel allows transferring packets alongside some
12 metadata to userspace.
13
14 To compile this support as a module, choose M here: the module will
15 be called psample.
diff --git a/net/psample/Makefile b/net/psample/Makefile
new file mode 100644
index 000000000000..609b0a79c9f3
--- /dev/null
+++ b/net/psample/Makefile
@@ -0,0 +1,5 @@
1#
2# Makefile for the psample netlink channel
3#
4
5obj-$(CONFIG_PSAMPLE) += psample.o
diff --git a/net/psample/psample.c b/net/psample/psample.c
new file mode 100644
index 000000000000..8aa58a918783
--- /dev/null
+++ b/net/psample/psample.c
@@ -0,0 +1,301 @@
1/*
2 * net/psample/psample.c - Netlink channel for packet sampling
3 * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/types.h>
11#include <linux/kernel.h>
12#include <linux/skbuff.h>
13#include <linux/module.h>
14#include <net/net_namespace.h>
15#include <net/sock.h>
16#include <net/netlink.h>
17#include <net/genetlink.h>
18#include <net/psample.h>
19#include <linux/spinlock.h>
20
21#define PSAMPLE_MAX_PACKET_SIZE 0xffff
22
23static LIST_HEAD(psample_groups_list);
24static DEFINE_SPINLOCK(psample_groups_lock);
25
26/* multicast groups */
27enum psample_nl_multicast_groups {
28 PSAMPLE_NL_MCGRP_CONFIG,
29 PSAMPLE_NL_MCGRP_SAMPLE,
30};
31
32static const struct genl_multicast_group psample_nl_mcgrps[] = {
33 [PSAMPLE_NL_MCGRP_CONFIG] = { .name = PSAMPLE_NL_MCGRP_CONFIG_NAME },
34 [PSAMPLE_NL_MCGRP_SAMPLE] = { .name = PSAMPLE_NL_MCGRP_SAMPLE_NAME },
35};
36
37static struct genl_family psample_nl_family __ro_after_init;
38
39static int psample_group_nl_fill(struct sk_buff *msg,
40 struct psample_group *group,
41 enum psample_command cmd, u32 portid, u32 seq,
42 int flags)
43{
44 void *hdr;
45 int ret;
46
47 hdr = genlmsg_put(msg, portid, seq, &psample_nl_family, flags, cmd);
48 if (!hdr)
49 return -EMSGSIZE;
50
51 ret = nla_put_u32(msg, PSAMPLE_ATTR_SAMPLE_GROUP, group->group_num);
52 if (ret < 0)
53 goto error;
54
55 ret = nla_put_u32(msg, PSAMPLE_ATTR_GROUP_REFCOUNT, group->refcount);
56 if (ret < 0)
57 goto error;
58
59 ret = nla_put_u32(msg, PSAMPLE_ATTR_GROUP_SEQ, group->seq);
60 if (ret < 0)
61 goto error;
62
63 genlmsg_end(msg, hdr);
64 return 0;
65
66error:
67 genlmsg_cancel(msg, hdr);
68 return -EMSGSIZE;
69}
70
71static int psample_nl_cmd_get_group_dumpit(struct sk_buff *msg,
72 struct netlink_callback *cb)
73{
74 struct psample_group *group;
75 int start = cb->args[0];
76 int idx = 0;
77 int err;
78
79 spin_lock(&psample_groups_lock);
80 list_for_each_entry(group, &psample_groups_list, list) {
81 if (!net_eq(group->net, sock_net(msg->sk)))
82 continue;
83 if (idx < start) {
84 idx++;
85 continue;
86 }
87 err = psample_group_nl_fill(msg, group, PSAMPLE_CMD_NEW_GROUP,
88 NETLINK_CB(cb->skb).portid,
89 cb->nlh->nlmsg_seq, NLM_F_MULTI);
90 if (err)
91 break;
92 idx++;
93 }
94
95 spin_unlock(&psample_groups_lock);
96 cb->args[0] = idx;
97 return msg->len;
98}
99
100static const struct genl_ops psample_nl_ops[] = {
101 {
102 .cmd = PSAMPLE_CMD_GET_GROUP,
103 .dumpit = psample_nl_cmd_get_group_dumpit,
104 /* can be retrieved by unprivileged users */
105 }
106};
107
108static struct genl_family psample_nl_family __ro_after_init = {
109 .name = PSAMPLE_GENL_NAME,
110 .version = PSAMPLE_GENL_VERSION,
111 .maxattr = PSAMPLE_ATTR_MAX,
112 .netnsok = true,
113 .module = THIS_MODULE,
114 .mcgrps = psample_nl_mcgrps,
115 .ops = psample_nl_ops,
116 .n_ops = ARRAY_SIZE(psample_nl_ops),
117 .n_mcgrps = ARRAY_SIZE(psample_nl_mcgrps),
118};
119
120static void psample_group_notify(struct psample_group *group,
121 enum psample_command cmd)
122{
123 struct sk_buff *msg;
124 int err;
125
126 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
127 if (!msg)
128 return;
129
130 err = psample_group_nl_fill(msg, group, cmd, 0, 0, NLM_F_MULTI);
131 if (!err)
132 genlmsg_multicast_netns(&psample_nl_family, group->net, msg, 0,
133 PSAMPLE_NL_MCGRP_CONFIG, GFP_ATOMIC);
134 else
135 nlmsg_free(msg);
136}
137
138static struct psample_group *psample_group_create(struct net *net,
139 u32 group_num)
140{
141 struct psample_group *group;
142
143 group = kzalloc(sizeof(*group), GFP_ATOMIC);
144 if (!group)
145 return NULL;
146
147 group->net = net;
148 group->group_num = group_num;
149 list_add_tail(&group->list, &psample_groups_list);
150
151 psample_group_notify(group, PSAMPLE_CMD_NEW_GROUP);
152 return group;
153}
154
155static void psample_group_destroy(struct psample_group *group)
156{
157 psample_group_notify(group, PSAMPLE_CMD_DEL_GROUP);
158 list_del(&group->list);
159 kfree(group);
160}
161
162static struct psample_group *
163psample_group_lookup(struct net *net, u32 group_num)
164{
165 struct psample_group *group;
166
167 list_for_each_entry(group, &psample_groups_list, list)
168 if ((group->group_num == group_num) && (group->net == net))
169 return group;
170 return NULL;
171}
172
173struct psample_group *psample_group_get(struct net *net, u32 group_num)
174{
175 struct psample_group *group;
176
177 spin_lock(&psample_groups_lock);
178
179 group = psample_group_lookup(net, group_num);
180 if (!group) {
181 group = psample_group_create(net, group_num);
182 if (!group)
183 goto out;
184 }
185 group->refcount++;
186
187out:
188 spin_unlock(&psample_groups_lock);
189 return group;
190}
191EXPORT_SYMBOL_GPL(psample_group_get);
192
193void psample_group_put(struct psample_group *group)
194{
195 spin_lock(&psample_groups_lock);
196
197 if (--group->refcount == 0)
198 psample_group_destroy(group);
199
200 spin_unlock(&psample_groups_lock);
201}
202EXPORT_SYMBOL_GPL(psample_group_put);
203
204void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
205 u32 trunc_size, int in_ifindex, int out_ifindex,
206 u32 sample_rate)
207{
208 struct sk_buff *nl_skb;
209 int data_len;
210 int meta_len;
211 void *data;
212 int ret;
213
214 meta_len = (in_ifindex ? nla_total_size(sizeof(u16)) : 0) +
215 (out_ifindex ? nla_total_size(sizeof(u16)) : 0) +
216 nla_total_size(sizeof(u32)) + /* sample_rate */
217 nla_total_size(sizeof(u32)) + /* orig_size */
218 nla_total_size(sizeof(u32)) + /* group_num */
219 nla_total_size(sizeof(u32)); /* seq */
220
221 data_len = min(skb->len, trunc_size);
222 if (meta_len + nla_total_size(data_len) > PSAMPLE_MAX_PACKET_SIZE)
223 data_len = PSAMPLE_MAX_PACKET_SIZE - meta_len - NLA_HDRLEN
224 - NLA_ALIGNTO;
225
226 nl_skb = genlmsg_new(meta_len + data_len, GFP_ATOMIC);
227 if (unlikely(!nl_skb))
228 return;
229
230 data = genlmsg_put(nl_skb, 0, 0, &psample_nl_family, 0,
231 PSAMPLE_CMD_SAMPLE);
232 if (unlikely(!data))
233 goto error;
234
235 if (in_ifindex) {
236 ret = nla_put_u16(nl_skb, PSAMPLE_ATTR_IIFINDEX, in_ifindex);
237 if (unlikely(ret < 0))
238 goto error;
239 }
240
241 if (out_ifindex) {
242 ret = nla_put_u16(nl_skb, PSAMPLE_ATTR_OIFINDEX, out_ifindex);
243 if (unlikely(ret < 0))
244 goto error;
245 }
246
247 ret = nla_put_u32(nl_skb, PSAMPLE_ATTR_SAMPLE_RATE, sample_rate);
248 if (unlikely(ret < 0))
249 goto error;
250
251 ret = nla_put_u32(nl_skb, PSAMPLE_ATTR_ORIGSIZE, skb->len);
252 if (unlikely(ret < 0))
253 goto error;
254
255 ret = nla_put_u32(nl_skb, PSAMPLE_ATTR_SAMPLE_GROUP, group->group_num);
256 if (unlikely(ret < 0))
257 goto error;
258
259 ret = nla_put_u32(nl_skb, PSAMPLE_ATTR_GROUP_SEQ, group->seq++);
260 if (unlikely(ret < 0))
261 goto error;
262
263 if (data_len) {
264 int nla_len = nla_total_size(data_len);
265 struct nlattr *nla;
266
267 nla = (struct nlattr *)skb_put(nl_skb, nla_len);
268 nla->nla_type = PSAMPLE_ATTR_DATA;
269 nla->nla_len = nla_attr_size(data_len);
270
271 if (skb_copy_bits(skb, 0, nla_data(nla), data_len))
272 goto error;
273 }
274
275 genlmsg_end(nl_skb, data);
276 genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0,
277 PSAMPLE_NL_MCGRP_SAMPLE, GFP_ATOMIC);
278
279 return;
280error:
281 pr_err_ratelimited("Could not create psample log message\n");
282 nlmsg_free(nl_skb);
283}
284EXPORT_SYMBOL_GPL(psample_sample_packet);
285
286static int __init psample_module_init(void)
287{
288 return genl_register_family(&psample_nl_family);
289}
290
291static void __exit psample_module_exit(void)
292{
293 genl_unregister_family(&psample_nl_family);
294}
295
296module_init(psample_module_init);
297module_exit(psample_module_exit);
298
299MODULE_AUTHOR("Yotam Gigi <yotamg@mellanox.com>");
300MODULE_DESCRIPTION("netlink channel for packet sampling");
301MODULE_LICENSE("GPL v2");