aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Borkmann <dborkman@redhat.com>2013-06-21 13:38:08 -0400
committerDavid S. Miller <davem@davemloft.net>2013-06-24 19:39:05 -0400
commite4fc408e0e99fd2e009c8b3702d9637f5554fd5c (patch)
treef808ee97e82aaeea7c5fce0dea7ae37610ad27cb
parentbcbde0d449eda7afa8f63280b165c8300dbd00e2 (diff)
packet: nlmon: virtual netlink monitoring device for packet sockets
Currently, there is no good possibility to debug netlink traffic that is being exchanged between kernel and user space. Therefore, this patch implements a netlink virtual device, so that netlink messages will be made visible to PF_PACKET sockets. Once there was an approach with a similar idea [1], but it got forgotten somehow. I think it makes most sense to accept the "overhead" of an extra netlink net device over implementing the same functionality from PF_PACKET sockets once again into netlink sockets. We have BPF filters that can already be easily applied which even have netlink extensions, we have RX_RING zero-copy between kernel- and user space that can be reused, and much more features. So instead of re-implementing all of this, we simply pass the skb to a given PF_PACKET socket for further analysis. Another nice benefit that comes from that is that no code needs to be changed in user space packet analyzers (maybe adding a dissector, but not more), thus out of the box, we can already capture pcap files of netlink traffic to debug/troubleshoot netlink problems. Also thanks goes to Thomas Graf, Flavio Leitner, Jesper Dangaard Brouer. [1] http://marc.info/?l=linux-netdev&m=113813401516110 Signed-off-by: Daniel Borkmann <dborkman@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/Kconfig10
-rw-r--r--drivers/net/Makefile1
-rw-r--r--drivers/net/nlmon.c170
3 files changed, 181 insertions, 0 deletions
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 00aba08f01a9..b45b240889f5 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -240,6 +240,16 @@ config VIRTIO_NET
240 This is the virtual network driver for virtio. It can be used with 240 This is the virtual network driver for virtio. It can be used with
241 lguest or QEMU based VMMs (like KVM or Xen). Say Y or M. 241 lguest or QEMU based VMMs (like KVM or Xen). Say Y or M.
242 242
243config NLMON
244 tristate "Virtual netlink monitoring device"
245 ---help---
246 This option enables a monitoring net device for netlink skbs. The
247 purpose of this is to analyze netlink messages with packet sockets.
248 Thus applications like tcpdump will be able to see local netlink
249 messages if they tap into the netlink device, record pcaps for further
250 diagnostics, etc. This is mostly intended for developers or support
251 to debug netlink issues. If unsure, say N.
252
243endif # NET_CORE 253endif # NET_CORE
244 254
245config SUNGEM_PHY 255config SUNGEM_PHY
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index ef3d090efedf..3fef8a81c0f6 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_TUN) += tun.o
22obj-$(CONFIG_VETH) += veth.o 22obj-$(CONFIG_VETH) += veth.o
23obj-$(CONFIG_VIRTIO_NET) += virtio_net.o 23obj-$(CONFIG_VIRTIO_NET) += virtio_net.o
24obj-$(CONFIG_VXLAN) += vxlan.o 24obj-$(CONFIG_VXLAN) += vxlan.o
25obj-$(CONFIG_NLMON) += nlmon.o
25 26
26# 27#
27# Networking Drivers 28# Networking Drivers
diff --git a/drivers/net/nlmon.c b/drivers/net/nlmon.c
new file mode 100644
index 000000000000..dc364be6e61e
--- /dev/null
+++ b/drivers/net/nlmon.c
@@ -0,0 +1,170 @@
1#include <linux/module.h>
2#include <linux/kernel.h>
3#include <linux/netdevice.h>
4#include <linux/netlink.h>
5#include <net/net_namespace.h>
6#include <linux/if_arp.h>
7
8struct pcpu_lstats {
9 u64 packets;
10 u64 bytes;
11 struct u64_stats_sync syncp;
12};
13
14static netdev_tx_t nlmon_xmit(struct sk_buff *skb, struct net_device *dev)
15{
16 int len = skb->len;
17 struct pcpu_lstats *stats = this_cpu_ptr(dev->lstats);
18
19 u64_stats_update_begin(&stats->syncp);
20 stats->bytes += len;
21 stats->packets++;
22 u64_stats_update_end(&stats->syncp);
23
24 dev_kfree_skb(skb);
25
26 return NETDEV_TX_OK;
27}
28
29static int nlmon_is_valid_mtu(int new_mtu)
30{
31 return new_mtu >= sizeof(struct nlmsghdr) && new_mtu <= INT_MAX;
32}
33
34static int nlmon_change_mtu(struct net_device *dev, int new_mtu)
35{
36 if (!nlmon_is_valid_mtu(new_mtu))
37 return -EINVAL;
38
39 dev->mtu = new_mtu;
40 return 0;
41}
42
43static int nlmon_dev_init(struct net_device *dev)
44{
45 dev->lstats = alloc_percpu(struct pcpu_lstats);
46
47 return dev->lstats == NULL ? -ENOMEM : 0;
48}
49
50static void nlmon_dev_uninit(struct net_device *dev)
51{
52 free_percpu(dev->lstats);
53}
54
55static struct netlink_tap nlmon_tap;
56
57static int nlmon_open(struct net_device *dev)
58{
59 return netlink_add_tap(&nlmon_tap);
60}
61
62static int nlmon_close(struct net_device *dev)
63{
64 return netlink_remove_tap(&nlmon_tap);
65}
66
67static struct rtnl_link_stats64 *
68nlmon_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
69{
70 int i;
71 u64 bytes = 0, packets = 0;
72
73 for_each_possible_cpu(i) {
74 const struct pcpu_lstats *nl_stats;
75 u64 tbytes, tpackets;
76 unsigned int start;
77
78 nl_stats = per_cpu_ptr(dev->lstats, i);
79
80 do {
81 start = u64_stats_fetch_begin_bh(&nl_stats->syncp);
82 tbytes = nl_stats->bytes;
83 tpackets = nl_stats->packets;
84 } while (u64_stats_fetch_retry_bh(&nl_stats->syncp, start));
85
86 packets += tpackets;
87 bytes += tbytes;
88 }
89
90 stats->rx_packets = packets;
91 stats->tx_packets = 0;
92
93 stats->rx_bytes = bytes;
94 stats->tx_bytes = 0;
95
96 return stats;
97}
98
99static u32 always_on(struct net_device *dev)
100{
101 return 1;
102}
103
104static const struct ethtool_ops nlmon_ethtool_ops = {
105 .get_link = always_on,
106};
107
108static const struct net_device_ops nlmon_ops = {
109 .ndo_init = nlmon_dev_init,
110 .ndo_uninit = nlmon_dev_uninit,
111 .ndo_open = nlmon_open,
112 .ndo_stop = nlmon_close,
113 .ndo_start_xmit = nlmon_xmit,
114 .ndo_get_stats64 = nlmon_get_stats64,
115 .ndo_change_mtu = nlmon_change_mtu,
116};
117
118static struct netlink_tap nlmon_tap __read_mostly = {
119 .module = THIS_MODULE,
120};
121
122static void nlmon_setup(struct net_device *dev)
123{
124 dev->type = ARPHRD_NETLINK;
125 dev->tx_queue_len = 0;
126
127 dev->netdev_ops = &nlmon_ops;
128 dev->ethtool_ops = &nlmon_ethtool_ops;
129 dev->destructor = free_netdev;
130
131 dev->features = NETIF_F_FRAGLIST | NETIF_F_HIGHDMA;
132 dev->flags = IFF_NOARP;
133
134 /* That's rather a softlimit here, which, of course,
135 * can be altered. Not a real MTU, but what is to be
136 * expected in most cases.
137 */
138 dev->mtu = NLMSG_GOODSIZE;
139}
140
141static __init int nlmon_register(void)
142{
143 int err;
144 struct net_device *nldev;
145
146 nldev = nlmon_tap.dev = alloc_netdev(0, "netlink", nlmon_setup);
147 if (unlikely(nldev == NULL))
148 return -ENOMEM;
149
150 err = register_netdev(nldev);
151 if (unlikely(err))
152 free_netdev(nldev);
153
154 return err;
155}
156
157static __exit void nlmon_unregister(void)
158{
159 struct net_device *nldev = nlmon_tap.dev;
160
161 unregister_netdev(nldev);
162}
163
164module_init(nlmon_register);
165module_exit(nlmon_unregister);
166
167MODULE_LICENSE("GPL v2");
168MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
169MODULE_AUTHOR("Mathieu Geli <geli@enseirb.fr>");
170MODULE_DESCRIPTION("Netlink monitoring device");