summaryrefslogtreecommitdiffstats
path: root/arch/um
diff options
context:
space:
mode:
authorAnton Ivanov <anton.ivanov@cambridgegreys.com>2017-11-20 16:17:59 -0500
committerRichard Weinberger <richard@nod.at>2018-02-19 13:38:51 -0500
commit49da7e64f33e80edffb1a9eeb230fa4c3f42dffb (patch)
tree3d91defecef8c2235c9ef31e919e1ff33aa01e81 /arch/um
parentff6a17989c08b0bb0fd490cc500b084581b3a9b9 (diff)
High Performance UML Vector Network Driver
1. Provides infrastructure for vector IO using recvmmsg/sendmmsg. 1.1. Multi-message read. 1.2. Multi-message write. 1.3. Optimized queue support for multi-packet enqueue/dequeue. 1.4. BQL/DQL support. 2. Implements transports for several transports as well support for direct wiring of PWEs to NIC. Allows direct connection of VMs to host, other VMs and network devices with no switch in use. 2.1. Raw socket >4 times higher PPS and 10 times higher tcp RX than existing pcap based transport (> 4Gbit) 2.2. New tap transport using socket RX and tap xmit. Similar performance improvements (>4Gbit) 2.3. GRE transport - direct wiring to GRE PWE 2.4. L2TPv3 transport - direct wiring to L2TPv3 PWE 3. Tuning, performance and offload related setting support via ethtool. 4. Initial BPF support - used in tap/raw to avoid software looping 5. Scatter Gather support. 6. VNET and checksum offload support for raw socket transport. 7. TSO/GSO support where applicable or available 8. Migrates all error messages to netdevice_*() and rate limits them where needed. Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com> Signed-off-by: Richard Weinberger <richard@nod.at>
Diffstat (limited to 'arch/um')
-rw-r--r--arch/um/Kconfig.net11
-rw-r--r--arch/um/drivers/Makefile4
-rw-r--r--arch/um/drivers/net_kern.c4
-rw-r--r--arch/um/drivers/vector_kern.c1630
-rw-r--r--arch/um/drivers/vector_kern.h129
-rw-r--r--arch/um/drivers/vector_transports.c458
-rw-r--r--arch/um/drivers/vector_user.c586
-rw-r--r--arch/um/drivers/vector_user.h99
-rw-r--r--arch/um/include/asm/irq.h12
-rw-r--r--arch/um/include/shared/net_kern.h2
10 files changed, 2932 insertions, 3 deletions
diff --git a/arch/um/Kconfig.net b/arch/um/Kconfig.net
index e871af24d9cd..c390f3deb0dc 100644
--- a/arch/um/Kconfig.net
+++ b/arch/um/Kconfig.net
@@ -109,6 +109,17 @@ config UML_NET_DAEMON
109 more than one without conflict. If you don't need UML networking, 109 more than one without conflict. If you don't need UML networking,
110 say N. 110 say N.
111 111
112config UML_NET_VECTOR
113 bool "Vector I/O high performance network devices"
114 depends on UML_NET
115 help
116 This User-Mode Linux network driver uses multi-message send
117 and receive functions. The host running the UML guest must have
118 a linux kernel version above 3.0 and a libc version > 2.13.
119 This driver provides tap, raw, gre and l2tpv3 network transports
120 with up to 4 times higher network throughput than the UML network
121 drivers.
122
112config UML_NET_VDE 123config UML_NET_VDE
113 bool "VDE transport" 124 bool "VDE transport"
114 depends on UML_NET 125 depends on UML_NET
diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
index e7582e1d248c..16b3cebddafb 100644
--- a/arch/um/drivers/Makefile
+++ b/arch/um/drivers/Makefile
@@ -9,6 +9,7 @@
9slip-objs := slip_kern.o slip_user.o 9slip-objs := slip_kern.o slip_user.o
10slirp-objs := slirp_kern.o slirp_user.o 10slirp-objs := slirp_kern.o slirp_user.o
11daemon-objs := daemon_kern.o daemon_user.o 11daemon-objs := daemon_kern.o daemon_user.o
12vector-objs := vector_kern.o vector_user.o vector_transports.o
12umcast-objs := umcast_kern.o umcast_user.o 13umcast-objs := umcast_kern.o umcast_user.o
13net-objs := net_kern.o net_user.o 14net-objs := net_kern.o net_user.o
14mconsole-objs := mconsole_kern.o mconsole_user.o 15mconsole-objs := mconsole_kern.o mconsole_user.o
@@ -43,6 +44,7 @@ obj-$(CONFIG_STDERR_CONSOLE) += stderr_console.o
43obj-$(CONFIG_UML_NET_SLIP) += slip.o slip_common.o 44obj-$(CONFIG_UML_NET_SLIP) += slip.o slip_common.o
44obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o 45obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o
45obj-$(CONFIG_UML_NET_DAEMON) += daemon.o 46obj-$(CONFIG_UML_NET_DAEMON) += daemon.o
47obj-$(CONFIG_UML_NET_VECTOR) += vector.o
46obj-$(CONFIG_UML_NET_VDE) += vde.o 48obj-$(CONFIG_UML_NET_VDE) += vde.o
47obj-$(CONFIG_UML_NET_MCAST) += umcast.o 49obj-$(CONFIG_UML_NET_MCAST) += umcast.o
48obj-$(CONFIG_UML_NET_PCAP) += pcap.o 50obj-$(CONFIG_UML_NET_PCAP) += pcap.o
@@ -61,7 +63,7 @@ obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
61obj-$(CONFIG_UML_RANDOM) += random.o 63obj-$(CONFIG_UML_RANDOM) += random.o
62 64
63# pcap_user.o must be added explicitly. 65# pcap_user.o must be added explicitly.
64USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o 66USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o
65CFLAGS_null.o = -DDEV_NULL=$(DEV_NULL_PATH) 67CFLAGS_null.o = -DDEV_NULL=$(DEV_NULL_PATH)
66 68
67include arch/um/scripts/Makefile.rules 69include arch/um/scripts/Makefile.rules
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index b305f8247909..3ef1b48e064a 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -288,7 +288,7 @@ static void uml_net_user_timer_expire(struct timer_list *t)
288#endif 288#endif
289} 289}
290 290
291static void setup_etheraddr(struct net_device *dev, char *str) 291void uml_net_setup_etheraddr(struct net_device *dev, char *str)
292{ 292{
293 unsigned char *addr = dev->dev_addr; 293 unsigned char *addr = dev->dev_addr;
294 char *end; 294 char *end;
@@ -412,7 +412,7 @@ static void eth_configure(int n, void *init, char *mac,
412 */ 412 */
413 snprintf(dev->name, sizeof(dev->name), "eth%d", n); 413 snprintf(dev->name, sizeof(dev->name), "eth%d", n);
414 414
415 setup_etheraddr(dev, mac); 415 uml_net_setup_etheraddr(dev, mac);
416 416
417 printk(KERN_INFO "Netdevice %d (%pM) : ", n, dev->dev_addr); 417 printk(KERN_INFO "Netdevice %d (%pM) : ", n, dev->dev_addr);
418 418
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
new file mode 100644
index 000000000000..d1d53015d572
--- /dev/null
+++ b/arch/um/drivers/vector_kern.c
@@ -0,0 +1,1630 @@
1/*
2 * Copyright (C) 2017 - Cambridge Greys Limited
3 * Copyright (C) 2011 - 2014 Cisco Systems Inc
4 * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
5 * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
6 * James Leu (jleu@mindspring.net).
7 * Copyright (C) 2001 by various other people who didn't put their name here.
8 * Licensed under the GPL.
9 */
10
11#include <linux/version.h>
12#include <linux/bootmem.h>
13#include <linux/etherdevice.h>
14#include <linux/ethtool.h>
15#include <linux/inetdevice.h>
16#include <linux/init.h>
17#include <linux/list.h>
18#include <linux/netdevice.h>
19#include <linux/platform_device.h>
20#include <linux/rtnetlink.h>
21#include <linux/skbuff.h>
22#include <linux/slab.h>
23#include <linux/interrupt.h>
24#include <init.h>
25#include <irq_kern.h>
26#include <irq_user.h>
27#include <net_kern.h>
28#include <os.h>
29#include "mconsole_kern.h"
30#include "vector_user.h"
31#include "vector_kern.h"
32
33/*
34 * Adapted from network devices with the following major changes:
35 * All transports are static - simplifies the code significantly
36 * Multiple FDs/IRQs per device
37 * Vector IO optionally used for read/write, falling back to legacy
38 * based on configuration and/or availability
39 * Configuration is no longer positional - L2TPv3 and GRE require up to
40 * 10 parameters, passing this as positional is not fit for purpose.
41 * Only socket transports are supported
42 */
43
44
45#define DRIVER_NAME "uml-vector"
46#define DRIVER_VERSION "01"
47struct vector_cmd_line_arg {
48 struct list_head list;
49 int unit;
50 char *arguments;
51};
52
53struct vector_device {
54 struct list_head list;
55 struct net_device *dev;
56 struct platform_device pdev;
57 int unit;
58 int opened;
59};
60
61static LIST_HEAD(vec_cmd_line);
62
63static DEFINE_SPINLOCK(vector_devices_lock);
64static LIST_HEAD(vector_devices);
65
66static int driver_registered;
67
68static void vector_eth_configure(int n, struct arglist *def);
69
70/* Argument accessors to set variables (and/or set default values)
71 * mtu, buffer sizing, default headroom, etc
72 */
73
74#define DEFAULT_HEADROOM 2
75#define SAFETY_MARGIN 32
76#define DEFAULT_VECTOR_SIZE 64
77#define TX_SMALL_PACKET 128
78#define MAX_IOV_SIZE (MAX_SKB_FRAGS + 1)
79
80static const struct {
81 const char string[ETH_GSTRING_LEN];
82} ethtool_stats_keys[] = {
83 { "rx_queue_max" },
84 { "rx_queue_running_average" },
85 { "tx_queue_max" },
86 { "tx_queue_running_average" },
87 { "rx_encaps_errors" },
88 { "tx_timeout_count" },
89 { "tx_restart_queue" },
90 { "tx_kicks" },
91 { "tx_flow_control_xon" },
92 { "tx_flow_control_xoff" },
93 { "rx_csum_offload_good" },
94 { "rx_csum_offload_errors"},
95 { "sg_ok"},
96 { "sg_linearized"},
97};
98
99#define VECTOR_NUM_STATS ARRAY_SIZE(ethtool_stats_keys)
100
101static void vector_reset_stats(struct vector_private *vp)
102{
103 vp->estats.rx_queue_max = 0;
104 vp->estats.rx_queue_running_average = 0;
105 vp->estats.tx_queue_max = 0;
106 vp->estats.tx_queue_running_average = 0;
107 vp->estats.rx_encaps_errors = 0;
108 vp->estats.tx_timeout_count = 0;
109 vp->estats.tx_restart_queue = 0;
110 vp->estats.tx_kicks = 0;
111 vp->estats.tx_flow_control_xon = 0;
112 vp->estats.tx_flow_control_xoff = 0;
113 vp->estats.sg_ok = 0;
114 vp->estats.sg_linearized = 0;
115}
116
117static int get_mtu(struct arglist *def)
118{
119 char *mtu = uml_vector_fetch_arg(def, "mtu");
120 long result;
121
122 if (mtu != NULL) {
123 if (kstrtoul(mtu, 10, &result) == 0)
124 return result;
125 }
126 return ETH_MAX_PACKET;
127}
128
129static int get_depth(struct arglist *def)
130{
131 char *mtu = uml_vector_fetch_arg(def, "depth");
132 long result;
133
134 if (mtu != NULL) {
135 if (kstrtoul(mtu, 10, &result) == 0)
136 return result;
137 }
138 return DEFAULT_VECTOR_SIZE;
139}
140
141static int get_headroom(struct arglist *def)
142{
143 char *mtu = uml_vector_fetch_arg(def, "headroom");
144 long result;
145
146 if (mtu != NULL) {
147 if (kstrtoul(mtu, 10, &result) == 0)
148 return result;
149 }
150 return DEFAULT_HEADROOM;
151}
152
153static int get_req_size(struct arglist *def)
154{
155 char *gro = uml_vector_fetch_arg(def, "gro");
156 long result;
157
158 if (gro != NULL) {
159 if (kstrtoul(gro, 10, &result) == 0) {
160 if (result > 0)
161 return 65536;
162 }
163 }
164 return get_mtu(def) + ETH_HEADER_OTHER +
165 get_headroom(def) + SAFETY_MARGIN;
166}
167
168
169static int get_transport_options(struct arglist *def)
170{
171 char *transport = uml_vector_fetch_arg(def, "transport");
172 char *vector = uml_vector_fetch_arg(def, "vec");
173
174 int vec_rx = VECTOR_RX;
175 int vec_tx = VECTOR_TX;
176 long parsed;
177
178 if (vector != NULL) {
179 if (kstrtoul(vector, 10, &parsed) == 0) {
180 if (parsed == 0) {
181 vec_rx = 0;
182 vec_tx = 0;
183 }
184 }
185 }
186
187
188 if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
189 return (vec_rx | VECTOR_BPF);
190 if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
191 return (vec_rx | vec_tx | VECTOR_BPF);
192 return (vec_rx | vec_tx);
193}
194
195
196/* A mini-buffer for packet drop read
197 * All of our supported transports are datagram oriented and we always
198 * read using recvmsg or recvmmsg. If we pass a buffer which is smaller
199 * than the packet size it still counts as full packet read and will
200 * clean the incoming stream to keep sigio/epoll happy
201 */
202
203#define DROP_BUFFER_SIZE 32
204
205static char *drop_buffer;
206
207/* Array backed queues optimized for bulk enqueue/dequeue and
208 * 1:N (small values of N) or 1:1 enqueuer/dequeuer ratios.
209 * For more details and full design rationale see
210 * http://foswiki.cambridgegreys.com/Main/EatYourTailAndEnjoyIt
211 */
212
213
214/*
215 * Advance the mmsg queue head by n = advance. Resets the queue to
216 * maximum enqueue/dequeue-at-once capacity if possible. Called by
217 * dequeuers. Caller must hold the head_lock!
218 */
219
220static int vector_advancehead(struct vector_queue *qi, int advance)
221{
222 int queue_depth;
223
224 qi->head =
225 (qi->head + advance)
226 % qi->max_depth;
227
228
229 spin_lock(&qi->tail_lock);
230 qi->queue_depth -= advance;
231
232 /* we are at 0, use this to
233 * reset head and tail so we can use max size vectors
234 */
235
236 if (qi->queue_depth == 0) {
237 qi->head = 0;
238 qi->tail = 0;
239 }
240 queue_depth = qi->queue_depth;
241 spin_unlock(&qi->tail_lock);
242 return queue_depth;
243}
244
245/* Advance the queue tail by n = advance.
246 * This is called by enqueuers which should hold the
247 * head lock already
248 */
249
250static int vector_advancetail(struct vector_queue *qi, int advance)
251{
252 int queue_depth;
253
254 qi->tail =
255 (qi->tail + advance)
256 % qi->max_depth;
257 spin_lock(&qi->head_lock);
258 qi->queue_depth += advance;
259 queue_depth = qi->queue_depth;
260 spin_unlock(&qi->head_lock);
261 return queue_depth;
262}
263
264static int prep_msg(struct vector_private *vp,
265 struct sk_buff *skb,
266 struct iovec *iov)
267{
268 int iov_index = 0;
269 int nr_frags, frag;
270 skb_frag_t *skb_frag;
271
272 nr_frags = skb_shinfo(skb)->nr_frags;
273 if (nr_frags > MAX_IOV_SIZE) {
274 if (skb_linearize(skb) != 0)
275 goto drop;
276 }
277 if (vp->header_size > 0) {
278 iov[iov_index].iov_len = vp->header_size;
279 vp->form_header(iov[iov_index].iov_base, skb, vp);
280 iov_index++;
281 }
282 iov[iov_index].iov_base = skb->data;
283 if (nr_frags > 0) {
284 iov[iov_index].iov_len = skb->len - skb->data_len;
285 vp->estats.sg_ok++;
286 } else
287 iov[iov_index].iov_len = skb->len;
288 iov_index++;
289 for (frag = 0; frag < nr_frags; frag++) {
290 skb_frag = &skb_shinfo(skb)->frags[frag];
291 iov[iov_index].iov_base = skb_frag_address_safe(skb_frag);
292 iov[iov_index].iov_len = skb_frag_size(skb_frag);
293 iov_index++;
294 }
295 return iov_index;
296drop:
297 return -1;
298}
299/*
300 * Generic vector enqueue with support for forming headers using transport
301 * specific callback. Allows GRE, L2TPv3, RAW and other transports
302 * to use a common enqueue procedure in vector mode
303 */
304
305static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb)
306{
307 struct vector_private *vp = netdev_priv(qi->dev);
308 int queue_depth;
309 int packet_len;
310 struct mmsghdr *mmsg_vector = qi->mmsg_vector;
311 int iov_count;
312
313 spin_lock(&qi->tail_lock);
314 spin_lock(&qi->head_lock);
315 queue_depth = qi->queue_depth;
316 spin_unlock(&qi->head_lock);
317
318 if (skb)
319 packet_len = skb->len;
320
321 if (queue_depth < qi->max_depth) {
322
323 *(qi->skbuff_vector + qi->tail) = skb;
324 mmsg_vector += qi->tail;
325 iov_count = prep_msg(
326 vp,
327 skb,
328 mmsg_vector->msg_hdr.msg_iov
329 );
330 if (iov_count < 1)
331 goto drop;
332 mmsg_vector->msg_hdr.msg_iovlen = iov_count;
333 mmsg_vector->msg_hdr.msg_name = vp->fds->remote_addr;
334 mmsg_vector->msg_hdr.msg_namelen = vp->fds->remote_addr_size;
335 queue_depth = vector_advancetail(qi, 1);
336 } else
337 goto drop;
338 spin_unlock(&qi->tail_lock);
339 return queue_depth;
340drop:
341 qi->dev->stats.tx_dropped++;
342 if (skb != NULL) {
343 packet_len = skb->len;
344 dev_consume_skb_any(skb);
345 netdev_completed_queue(qi->dev, 1, packet_len);
346 }
347 spin_unlock(&qi->tail_lock);
348 return queue_depth;
349}
350
351static int consume_vector_skbs(struct vector_queue *qi, int count)
352{
353 struct sk_buff *skb;
354 int skb_index;
355 int bytes_compl = 0;
356
357 for (skb_index = qi->head; skb_index < qi->head + count; skb_index++) {
358 skb = *(qi->skbuff_vector + skb_index);
359 /* mark as empty to ensure correct destruction if
360 * needed
361 */
362 bytes_compl += skb->len;
363 *(qi->skbuff_vector + skb_index) = NULL;
364 dev_consume_skb_any(skb);
365 }
366 qi->dev->stats.tx_bytes += bytes_compl;
367 qi->dev->stats.tx_packets += count;
368 netdev_completed_queue(qi->dev, count, bytes_compl);
369 return vector_advancehead(qi, count);
370}
371
372/*
373 * Generic vector deque via sendmmsg with support for forming headers
374 * using transport specific callback. Allows GRE, L2TPv3, RAW and
375 * other transports to use a common dequeue procedure in vector mode
376 */
377
378
379static int vector_send(struct vector_queue *qi)
380{
381 struct vector_private *vp = netdev_priv(qi->dev);
382 struct mmsghdr *send_from;
383 int result = 0, send_len, queue_depth = qi->max_depth;
384
385 if (spin_trylock(&qi->head_lock)) {
386 if (spin_trylock(&qi->tail_lock)) {
387 /* update queue_depth to current value */
388 queue_depth = qi->queue_depth;
389 spin_unlock(&qi->tail_lock);
390 while (queue_depth > 0) {
391 /* Calculate the start of the vector */
392 send_len = queue_depth;
393 send_from = qi->mmsg_vector;
394 send_from += qi->head;
395 /* Adjust vector size if wraparound */
396 if (send_len + qi->head > qi->max_depth)
397 send_len = qi->max_depth - qi->head;
398 /* Try to TX as many packets as possible */
399 if (send_len > 0) {
400 result = uml_vector_sendmmsg(
401 vp->fds->tx_fd,
402 send_from,
403 send_len,
404 0
405 );
406 vp->in_write_poll =
407 (result != send_len);
408 }
409 /* For some of the sendmmsg error scenarios
410 * we may end being unsure in the TX success
411 * for all packets. It is safer to declare
412 * them all TX-ed and blame the network.
413 */
414 if (result < 0) {
415 if (net_ratelimit())
416 netdev_err(vp->dev, "sendmmsg err=%i\n",
417 result);
418 result = send_len;
419 }
420 if (result > 0) {
421 queue_depth =
422 consume_vector_skbs(qi, result);
423 /* This is equivalent to an TX IRQ.
424 * Restart the upper layers to feed us
425 * more packets.
426 */
427 if (result > vp->estats.tx_queue_max)
428 vp->estats.tx_queue_max = result;
429 vp->estats.tx_queue_running_average =
430 (vp->estats.tx_queue_running_average + result) >> 1;
431 }
432 netif_trans_update(qi->dev);
433 netif_wake_queue(qi->dev);
434 /* if TX is busy, break out of the send loop,
435 * poll write IRQ will reschedule xmit for us
436 */
437 if (result != send_len) {
438 vp->estats.tx_restart_queue++;
439 break;
440 }
441 }
442 }
443 spin_unlock(&qi->head_lock);
444 } else {
445 tasklet_schedule(&vp->tx_poll);
446 }
447 return queue_depth;
448}
449
450/* Queue destructor. Deliberately stateless so we can use
451 * it in queue cleanup if initialization fails.
452 */
453
454static void destroy_queue(struct vector_queue *qi)
455{
456 int i;
457 struct iovec *iov;
458 struct vector_private *vp = netdev_priv(qi->dev);
459 struct mmsghdr *mmsg_vector;
460
461 if (qi == NULL)
462 return;
463 /* deallocate any skbuffs - we rely on any unused to be
464 * set to NULL.
465 */
466 if (qi->skbuff_vector != NULL) {
467 for (i = 0; i < qi->max_depth; i++) {
468 if (*(qi->skbuff_vector + i) != NULL)
469 dev_kfree_skb_any(*(qi->skbuff_vector + i));
470 }
471 kfree(qi->skbuff_vector);
472 }
473 /* deallocate matching IOV structures including header buffs */
474 if (qi->mmsg_vector != NULL) {
475 mmsg_vector = qi->mmsg_vector;
476 for (i = 0; i < qi->max_depth; i++) {
477 iov = mmsg_vector->msg_hdr.msg_iov;
478 if (iov != NULL) {
479 if ((vp->header_size > 0) &&
480 (iov->iov_base != NULL))
481 kfree(iov->iov_base);
482 kfree(iov);
483 }
484 mmsg_vector++;
485 }
486 kfree(qi->mmsg_vector);
487 }
488 kfree(qi);
489}
490
491/*
492 * Queue constructor. Create a queue with a given side.
493 */
494static struct vector_queue *create_queue(
495 struct vector_private *vp,
496 int max_size,
497 int header_size,
498 int num_extra_frags)
499{
500 struct vector_queue *result;
501 int i;
502 struct iovec *iov;
503 struct mmsghdr *mmsg_vector;
504
505 result = kmalloc(sizeof(struct vector_queue), GFP_KERNEL);
506 if (result == NULL)
507 goto out_fail;
508 result->max_depth = max_size;
509 result->dev = vp->dev;
510 result->mmsg_vector = kmalloc(
511 (sizeof(struct mmsghdr) * max_size), GFP_KERNEL);
512 result->skbuff_vector = kmalloc(
513 (sizeof(void *) * max_size), GFP_KERNEL);
514 if (result->mmsg_vector == NULL || result->skbuff_vector == NULL)
515 goto out_fail;
516
517 mmsg_vector = result->mmsg_vector;
518 for (i = 0; i < max_size; i++) {
519 /* Clear all pointers - we use non-NULL as marking on
520 * what to free on destruction
521 */
522 *(result->skbuff_vector + i) = NULL;
523 mmsg_vector->msg_hdr.msg_iov = NULL;
524 mmsg_vector++;
525 }
526 mmsg_vector = result->mmsg_vector;
527 result->max_iov_frags = num_extra_frags;
528 for (i = 0; i < max_size; i++) {
529 if (vp->header_size > 0)
530 iov = kmalloc(
531 sizeof(struct iovec) * (3 + num_extra_frags),
532 GFP_KERNEL
533 );
534 else
535 iov = kmalloc(
536 sizeof(struct iovec) * (2 + num_extra_frags),
537 GFP_KERNEL
538 );
539 if (iov == NULL)
540 goto out_fail;
541 mmsg_vector->msg_hdr.msg_iov = iov;
542 mmsg_vector->msg_hdr.msg_iovlen = 1;
543 mmsg_vector->msg_hdr.msg_control = NULL;
544 mmsg_vector->msg_hdr.msg_controllen = 0;
545 mmsg_vector->msg_hdr.msg_flags = MSG_DONTWAIT;
546 mmsg_vector->msg_hdr.msg_name = NULL;
547 mmsg_vector->msg_hdr.msg_namelen = 0;
548 if (vp->header_size > 0) {
549 iov->iov_base = kmalloc(header_size, GFP_KERNEL);
550 if (iov->iov_base == NULL)
551 goto out_fail;
552 iov->iov_len = header_size;
553 mmsg_vector->msg_hdr.msg_iovlen = 2;
554 iov++;
555 }
556 iov->iov_base = NULL;
557 iov->iov_len = 0;
558 mmsg_vector++;
559 }
560 spin_lock_init(&result->head_lock);
561 spin_lock_init(&result->tail_lock);
562 result->queue_depth = 0;
563 result->head = 0;
564 result->tail = 0;
565 return result;
566out_fail:
567 destroy_queue(result);
568 return NULL;
569}
570
571/*
572 * We do not use the RX queue as a proper wraparound queue for now
573 * This is not necessary because the consumption via netif_rx()
574 * happens in-line. While we can try using the return code of
575 * netif_rx() for flow control there are no drivers doing this today.
576 * For this RX specific use we ignore the tail/head locks and
577 * just read into a prepared queue filled with skbuffs.
578 */
579
580static struct sk_buff *prep_skb(
581 struct vector_private *vp,
582 struct user_msghdr *msg)
583{
584 int linear = vp->max_packet + vp->headroom + SAFETY_MARGIN;
585 struct sk_buff *result;
586 int iov_index = 0, len;
587 struct iovec *iov = msg->msg_iov;
588 int err, nr_frags, frag;
589 skb_frag_t *skb_frag;
590
591 if (vp->req_size <= linear)
592 len = linear;
593 else
594 len = vp->req_size;
595 result = alloc_skb_with_frags(
596 linear,
597 len - vp->max_packet,
598 3,
599 &err,
600 GFP_ATOMIC
601 );
602 if (vp->header_size > 0)
603 iov_index++;
604 if (result == NULL) {
605 iov[iov_index].iov_base = NULL;
606 iov[iov_index].iov_len = 0;
607 goto done;
608 }
609 skb_reserve(result, vp->headroom);
610 result->dev = vp->dev;
611 skb_put(result, vp->max_packet);
612 result->data_len = len - vp->max_packet;
613 result->len += len - vp->max_packet;
614 skb_reset_mac_header(result);
615 result->ip_summed = CHECKSUM_NONE;
616 iov[iov_index].iov_base = result->data;
617 iov[iov_index].iov_len = vp->max_packet;
618 iov_index++;
619
620 nr_frags = skb_shinfo(result)->nr_frags;
621 for (frag = 0; frag < nr_frags; frag++) {
622 skb_frag = &skb_shinfo(result)->frags[frag];
623 iov[iov_index].iov_base = skb_frag_address_safe(skb_frag);
624 if (iov[iov_index].iov_base != NULL)
625 iov[iov_index].iov_len = skb_frag_size(skb_frag);
626 else
627 iov[iov_index].iov_len = 0;
628 iov_index++;
629 }
630done:
631 msg->msg_iovlen = iov_index;
632 return result;
633}
634
635
636/* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs*/
637
638static void prep_queue_for_rx(struct vector_queue *qi)
639{
640 struct vector_private *vp = netdev_priv(qi->dev);
641 struct mmsghdr *mmsg_vector = qi->mmsg_vector;
642 void **skbuff_vector = qi->skbuff_vector;
643 int i;
644
645 if (qi->queue_depth == 0)
646 return;
647 for (i = 0; i < qi->queue_depth; i++) {
648 /* it is OK if allocation fails - recvmmsg with NULL data in
649 * iov argument still performs an RX, just drops the packet
650 * This allows us stop faffing around with a "drop buffer"
651 */
652
653 *skbuff_vector = prep_skb(vp, &mmsg_vector->msg_hdr);
654 skbuff_vector++;
655 mmsg_vector++;
656 }
657 qi->queue_depth = 0;
658}
659
660static struct vector_device *find_device(int n)
661{
662 struct vector_device *device;
663 struct list_head *ele;
664
665 spin_lock(&vector_devices_lock);
666 list_for_each(ele, &vector_devices) {
667 device = list_entry(ele, struct vector_device, list);
668 if (device->unit == n)
669 goto out;
670 }
671 device = NULL;
672 out:
673 spin_unlock(&vector_devices_lock);
674 return device;
675}
676
677static int vector_parse(char *str, int *index_out, char **str_out,
678 char **error_out)
679{
680 int n, len, err = -EINVAL;
681 char *start = str;
682
683 len = strlen(str);
684
685 while ((*str != ':') && (strlen(str) > 1))
686 str++;
687 if (*str != ':') {
688 *error_out = "Expected ':' after device number";
689 return err;
690 }
691 *str = '\0';
692
693 err = kstrtouint(start, 0, &n);
694 if (err < 0) {
695 *error_out = "Bad device number";
696 return err;
697 }
698
699 str++;
700 if (find_device(n)) {
701 *error_out = "Device already configured";
702 return err;
703 }
704
705 *index_out = n;
706 *str_out = str;
707 return 0;
708}
709
710static int vector_config(char *str, char **error_out)
711{
712 int err, n;
713 char *params;
714 struct arglist *parsed;
715
716 err = vector_parse(str, &n, &params, error_out);
717 if (err != 0)
718 return err;
719
720 /* This string is broken up and the pieces used by the underlying
721 * driver. We should copy it to make sure things do not go wrong
722 * later.
723 */
724
725 params = kstrdup(params, GFP_KERNEL);
726 if (str == NULL) {
727 *error_out = "vector_config failed to strdup string";
728 return -ENOMEM;
729 }
730
731 parsed = uml_parse_vector_ifspec(params);
732
733 if (parsed == NULL) {
734 *error_out = "vector_config failed to parse parameters";
735 return -EINVAL;
736 }
737
738 vector_eth_configure(n, parsed);
739 return 0;
740}
741
742static int vector_id(char **str, int *start_out, int *end_out)
743{
744 char *end;
745 int n;
746
747 n = simple_strtoul(*str, &end, 0);
748 if ((*end != '\0') || (end == *str))
749 return -1;
750
751 *start_out = n;
752 *end_out = n;
753 *str = end;
754 return n;
755}
756
757static int vector_remove(int n, char **error_out)
758{
759 struct vector_device *vec_d;
760 struct net_device *dev;
761 struct vector_private *vp;
762
763 vec_d = find_device(n);
764 if (vec_d == NULL)
765 return -ENODEV;
766 dev = vec_d->dev;
767 vp = netdev_priv(dev);
768 if (vp->fds != NULL)
769 return -EBUSY;
770 unregister_netdev(dev);
771 platform_device_unregister(&vec_d->pdev);
772 return 0;
773}
774
775/*
776 * There is no shared per-transport initialization code, so
777 * we will just initialize each interface one by one and
778 * add them to a list
779 */
780
781static struct platform_driver uml_net_driver = {
782 .driver = {
783 .name = DRIVER_NAME,
784 },
785};
786
787
788static void vector_device_release(struct device *dev)
789{
790 struct vector_device *device = dev_get_drvdata(dev);
791 struct net_device *netdev = device->dev;
792
793 list_del(&device->list);
794 kfree(device);
795 free_netdev(netdev);
796}
797
798/* Bog standard recv using recvmsg - not used normally unless the user
799 * explicitly specifies not to use recvmmsg vector RX.
800 */
801
802static int vector_legacy_rx(struct vector_private *vp)
803{
804 int pkt_len;
805 struct user_msghdr hdr;
806 struct iovec iov[2 + MAX_IOV_SIZE]; /* header + data use case only */
807 int iovpos = 0;
808 struct sk_buff *skb;
809 int header_check;
810
811 hdr.msg_name = NULL;
812 hdr.msg_namelen = 0;
813 hdr.msg_iov = (struct iovec *) &iov;
814 hdr.msg_control = NULL;
815 hdr.msg_controllen = 0;
816 hdr.msg_flags = 0;
817
818 if (vp->header_size > 0) {
819 iov[0].iov_base = vp->header_rxbuffer;
820 iov[0].iov_len = vp->header_size;
821 }
822
823 skb = prep_skb(vp, &hdr);
824
825 if (skb == NULL) {
826 /* Read a packet into drop_buffer and don't do
827 * anything with it.
828 */
829 iov[iovpos].iov_base = drop_buffer;
830 iov[iovpos].iov_len = DROP_BUFFER_SIZE;
831 hdr.msg_iovlen = 1;
832 vp->dev->stats.rx_dropped++;
833 }
834
835 pkt_len = uml_vector_recvmsg(vp->fds->rx_fd, &hdr, 0);
836
837 if (skb != NULL) {
838 if (pkt_len > vp->header_size) {
839 if (vp->header_size > 0) {
840 header_check = vp->verify_header(
841 vp->header_rxbuffer, skb, vp);
842 if (header_check < 0) {
843 dev_kfree_skb_irq(skb);
844 vp->dev->stats.rx_dropped++;
845 vp->estats.rx_encaps_errors++;
846 return 0;
847 }
848 if (header_check > 0) {
849 vp->estats.rx_csum_offload_good++;
850 skb->ip_summed = CHECKSUM_UNNECESSARY;
851 }
852 }
853 pskb_trim(skb, pkt_len - vp->rx_header_size);
854 skb->protocol = eth_type_trans(skb, skb->dev);
855 vp->dev->stats.rx_bytes += skb->len;
856 vp->dev->stats.rx_packets++;
857 netif_rx(skb);
858 } else {
859 dev_kfree_skb_irq(skb);
860 }
861 }
862 return pkt_len;
863}
864
865/*
866 * Packet at a time TX which falls back to vector TX if the
867 * underlying transport is busy.
868 */
869
870
871
872static int writev_tx(struct vector_private *vp, struct sk_buff *skb)
873{
874 struct iovec iov[3 + MAX_IOV_SIZE];
875 int iov_count, pkt_len = 0;
876
877 iov[0].iov_base = vp->header_txbuffer;
878 iov_count = prep_msg(vp, skb, (struct iovec *) &iov);
879
880 if (iov_count < 1)
881 goto drop;
882 pkt_len = uml_vector_writev(
883 vp->fds->tx_fd,
884 (struct iovec *) &iov,
885 iov_count
886 );
887
888 netif_trans_update(vp->dev);
889 netif_wake_queue(vp->dev);
890
891 if (pkt_len > 0) {
892 vp->dev->stats.tx_bytes += skb->len;
893 vp->dev->stats.tx_packets++;
894 } else {
895 vp->dev->stats.tx_dropped++;
896 }
897 consume_skb(skb);
898 return pkt_len;
899drop:
900 vp->dev->stats.tx_dropped++;
901 consume_skb(skb);
902 return pkt_len;
903}
904
905/*
906 * Receive as many messages as we can in one call using the special
907 * mmsg vector matched to an skb vector which we prepared earlier.
908 */
909
910static int vector_mmsg_rx(struct vector_private *vp)
911{
912 int packet_count, i;
913 struct vector_queue *qi = vp->rx_queue;
914 struct sk_buff *skb;
915 struct mmsghdr *mmsg_vector = qi->mmsg_vector;
916 void **skbuff_vector = qi->skbuff_vector;
917 int header_check;
918
919 /* Refresh the vector and make sure it is with new skbs and the
920 * iovs are updated to point to them.
921 */
922
923 prep_queue_for_rx(qi);
924
925 /* Fire the Lazy Gun - get as many packets as we can in one go. */
926
927 packet_count = uml_vector_recvmmsg(
928 vp->fds->rx_fd, qi->mmsg_vector, qi->max_depth, 0);
929
930 if (packet_count <= 0)
931 return packet_count;
932
933 /* We treat packet processing as enqueue, buffer refresh as dequeue
934 * The queue_depth tells us how many buffers have been used and how
935 * many do we need to prep the next time prep_queue_for_rx() is called.
936 */
937
938 qi->queue_depth = packet_count;
939
940 for (i = 0; i < packet_count; i++) {
941 skb = (*skbuff_vector);
942 if (mmsg_vector->msg_len > vp->header_size) {
943 if (vp->header_size > 0) {
944 header_check = vp->verify_header(
945 mmsg_vector->msg_hdr.msg_iov->iov_base,
946 skb,
947 vp
948 );
949 if (header_check < 0) {
950 /* Overlay header failed to verify - discard.
951 * We can actually keep this skb and reuse it,
952 * but that will make the prep logic too
953 * complex.
954 */
955 dev_kfree_skb_irq(skb);
956 vp->estats.rx_encaps_errors++;
957 continue;
958 }
959 if (header_check > 0) {
960 vp->estats.rx_csum_offload_good++;
961 skb->ip_summed = CHECKSUM_UNNECESSARY;
962 }
963 }
964 pskb_trim(skb,
965 mmsg_vector->msg_len - vp->rx_header_size);
966 skb->protocol = eth_type_trans(skb, skb->dev);
967 /*
968 * We do not need to lock on updating stats here
969 * The interrupt loop is non-reentrant.
970 */
971 vp->dev->stats.rx_bytes += skb->len;
972 vp->dev->stats.rx_packets++;
973 netif_rx(skb);
974 } else {
975 /* Overlay header too short to do anything - discard.
976 * We can actually keep this skb and reuse it,
977 * but that will make the prep logic too complex.
978 */
979 if (skb != NULL)
980 dev_kfree_skb_irq(skb);
981 }
982 (*skbuff_vector) = NULL;
983 /* Move to the next buffer element */
984 mmsg_vector++;
985 skbuff_vector++;
986 }
987 if (packet_count > 0) {
988 if (vp->estats.rx_queue_max < packet_count)
989 vp->estats.rx_queue_max = packet_count;
990 vp->estats.rx_queue_running_average =
991 (vp->estats.rx_queue_running_average + packet_count) >> 1;
992 }
993 return packet_count;
994}
995
996static void vector_rx(struct vector_private *vp)
997{
998 int err;
999
1000 if ((vp->options & VECTOR_RX) > 0)
1001 while ((err = vector_mmsg_rx(vp)) > 0)
1002 ;
1003 else
1004 while ((err = vector_legacy_rx(vp)) > 0)
1005 ;
1006 if ((err != 0) && net_ratelimit())
1007 netdev_err(vp->dev, "vector_rx: error(%d)\n", err);
1008}
1009
1010static int vector_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
1011{
1012 struct vector_private *vp = netdev_priv(dev);
1013 int queue_depth = 0;
1014
1015 if ((vp->options & VECTOR_TX) == 0) {
1016 writev_tx(vp, skb);
1017 return NETDEV_TX_OK;
1018 }
1019
1020 /* We do BQL only in the vector path, no point doing it in
1021 * packet at a time mode as there is no device queue
1022 */
1023
1024 netdev_sent_queue(vp->dev, skb->len);
1025 queue_depth = vector_enqueue(vp->tx_queue, skb);
1026
1027 /* if the device queue is full, stop the upper layers and
1028 * flush it.
1029 */
1030
1031 if (queue_depth >= vp->tx_queue->max_depth - 1) {
1032 vp->estats.tx_kicks++;
1033 netif_stop_queue(dev);
1034 vector_send(vp->tx_queue);
1035 return NETDEV_TX_OK;
1036 }
1037 if (skb->xmit_more) {
1038 mod_timer(&vp->tl, vp->coalesce);
1039 return NETDEV_TX_OK;
1040 }
1041 if (skb->len < TX_SMALL_PACKET) {
1042 vp->estats.tx_kicks++;
1043 vector_send(vp->tx_queue);
1044 } else
1045 tasklet_schedule(&vp->tx_poll);
1046 return NETDEV_TX_OK;
1047}
1048
1049static irqreturn_t vector_rx_interrupt(int irq, void *dev_id)
1050{
1051 struct net_device *dev = dev_id;
1052 struct vector_private *vp = netdev_priv(dev);
1053
1054 if (!netif_running(dev))
1055 return IRQ_NONE;
1056 vector_rx(vp);
1057 return IRQ_HANDLED;
1058
1059}
1060
1061static irqreturn_t vector_tx_interrupt(int irq, void *dev_id)
1062{
1063 struct net_device *dev = dev_id;
1064 struct vector_private *vp = netdev_priv(dev);
1065
1066 if (!netif_running(dev))
1067 return IRQ_NONE;
1068 /* We need to pay attention to it only if we got
1069 * -EAGAIN or -ENOBUFFS from sendmmsg. Otherwise
1070 * we ignore it. In the future, it may be worth
1071 * it to improve the IRQ controller a bit to make
1072 * tweaking the IRQ mask less costly
1073 */
1074
1075 if (vp->in_write_poll)
1076 tasklet_schedule(&vp->tx_poll);
1077 return IRQ_HANDLED;
1078
1079}
1080
1081static int irq_rr;
1082
1083static int vector_net_close(struct net_device *dev)
1084{
1085 struct vector_private *vp = netdev_priv(dev);
1086 unsigned long flags;
1087
1088 netif_stop_queue(dev);
1089 del_timer(&vp->tl);
1090
1091 if (vp->fds == NULL)
1092 return 0;
1093
1094 /* Disable and free all IRQS */
1095 if (vp->rx_irq > 0) {
1096 um_free_irq(vp->rx_irq, dev);
1097 vp->rx_irq = 0;
1098 }
1099 if (vp->tx_irq > 0) {
1100 um_free_irq(vp->tx_irq, dev);
1101 vp->tx_irq = 0;
1102 }
1103 tasklet_kill(&vp->tx_poll);
1104 if (vp->fds->rx_fd > 0) {
1105 os_close_file(vp->fds->rx_fd);
1106 vp->fds->rx_fd = -1;
1107 }
1108 if (vp->fds->tx_fd > 0) {
1109 os_close_file(vp->fds->tx_fd);
1110 vp->fds->tx_fd = -1;
1111 }
1112 if (vp->bpf != NULL)
1113 kfree(vp->bpf);
1114 if (vp->fds->remote_addr != NULL)
1115 kfree(vp->fds->remote_addr);
1116 if (vp->transport_data != NULL)
1117 kfree(vp->transport_data);
1118 if (vp->header_rxbuffer != NULL)
1119 kfree(vp->header_rxbuffer);
1120 if (vp->header_txbuffer != NULL)
1121 kfree(vp->header_txbuffer);
1122 if (vp->rx_queue != NULL)
1123 destroy_queue(vp->rx_queue);
1124 if (vp->tx_queue != NULL)
1125 destroy_queue(vp->tx_queue);
1126 kfree(vp->fds);
1127 vp->fds = NULL;
1128 spin_lock_irqsave(&vp->lock, flags);
1129 vp->opened = false;
1130 spin_unlock_irqrestore(&vp->lock, flags);
1131 return 0;
1132}
1133
1134/* TX tasklet */
1135
1136static void vector_tx_poll(unsigned long data)
1137{
1138 struct vector_private *vp = (struct vector_private *)data;
1139
1140 vp->estats.tx_kicks++;
1141 vector_send(vp->tx_queue);
1142}
1143static void vector_reset_tx(struct work_struct *work)
1144{
1145 struct vector_private *vp =
1146 container_of(work, struct vector_private, reset_tx);
1147 netdev_reset_queue(vp->dev);
1148 netif_start_queue(vp->dev);
1149 netif_wake_queue(vp->dev);
1150}
1151static int vector_net_open(struct net_device *dev)
1152{
1153 struct vector_private *vp = netdev_priv(dev);
1154 unsigned long flags;
1155 int err = -EINVAL;
1156 struct vector_device *vdevice;
1157
1158 spin_lock_irqsave(&vp->lock, flags);
1159 if (vp->opened)
1160 return -ENXIO;
1161 vp->opened = true;
1162 spin_unlock_irqrestore(&vp->lock, flags);
1163
1164 vp->fds = uml_vector_user_open(vp->unit, vp->parsed);
1165
1166 if (vp->fds == NULL)
1167 goto out_close;
1168
1169 if (build_transport_data(vp) < 0)
1170 goto out_close;
1171
1172 if ((vp->options & VECTOR_RX) > 0) {
1173 vp->rx_queue = create_queue(
1174 vp,
1175 get_depth(vp->parsed),
1176 vp->rx_header_size,
1177 MAX_IOV_SIZE
1178 );
1179 vp->rx_queue->queue_depth = get_depth(vp->parsed);
1180 } else {
1181 vp->header_rxbuffer = kmalloc(
1182 vp->rx_header_size,
1183 GFP_KERNEL
1184 );
1185 if (vp->header_rxbuffer == NULL)
1186 goto out_close;
1187 }
1188 if ((vp->options & VECTOR_TX) > 0) {
1189 vp->tx_queue = create_queue(
1190 vp,
1191 get_depth(vp->parsed),
1192 vp->header_size,
1193 MAX_IOV_SIZE
1194 );
1195 } else {
1196 vp->header_txbuffer = kmalloc(vp->header_size, GFP_KERNEL);
1197 if (vp->header_txbuffer == NULL)
1198 goto out_close;
1199 }
1200
1201 /* READ IRQ */
1202 err = um_request_irq(
1203 irq_rr + VECTOR_BASE_IRQ, vp->fds->rx_fd,
1204 IRQ_READ, vector_rx_interrupt,
1205 IRQF_SHARED, dev->name, dev);
1206 if (err != 0) {
1207 netdev_err(dev, "vector_open: failed to get rx irq(%d)\n", err);
1208 err = -ENETUNREACH;
1209 goto out_close;
1210 }
1211 vp->rx_irq = irq_rr + VECTOR_BASE_IRQ;
1212 dev->irq = irq_rr + VECTOR_BASE_IRQ;
1213 irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE;
1214
1215 /* WRITE IRQ - we need it only if we have vector TX */
1216 if ((vp->options & VECTOR_TX) > 0) {
1217 err = um_request_irq(
1218 irq_rr + VECTOR_BASE_IRQ, vp->fds->tx_fd,
1219 IRQ_WRITE, vector_tx_interrupt,
1220 IRQF_SHARED, dev->name, dev);
1221 if (err != 0) {
1222 netdev_err(dev,
1223 "vector_open: failed to get tx irq(%d)\n", err);
1224 err = -ENETUNREACH;
1225 goto out_close;
1226 }
1227 vp->tx_irq = irq_rr + VECTOR_BASE_IRQ;
1228 irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE;
1229 }
1230
1231 if ((vp->options & VECTOR_BPF) != 0)
1232 vp->bpf = uml_vector_default_bpf(vp->fds->rx_fd, dev->dev_addr);
1233
1234 /* Write Timeout Timer */
1235
1236 vp->tl.data = (unsigned long) vp;
1237 netif_start_queue(dev);
1238
1239 /* clear buffer - it can happen that the host side of the interface
1240 * is full when we get here. In this case, new data is never queued,
1241 * SIGIOs never arrive, and the net never works.
1242 */
1243
1244 vector_rx(vp);
1245
1246 vector_reset_stats(vp);
1247 vdevice = find_device(vp->unit);
1248 vdevice->opened = 1;
1249
1250 if ((vp->options & VECTOR_TX) != 0)
1251 add_timer(&vp->tl);
1252 return 0;
1253out_close:
1254 vector_net_close(dev);
1255 return err;
1256}
1257
1258
1259static void vector_net_set_multicast_list(struct net_device *dev)
1260{
1261 /* TODO: - we can do some BPF games here */
1262 return;
1263}
1264
1265static void vector_net_tx_timeout(struct net_device *dev)
1266{
1267 struct vector_private *vp = netdev_priv(dev);
1268
1269 vp->estats.tx_timeout_count++;
1270 netif_trans_update(dev);
1271 schedule_work(&vp->reset_tx);
1272}
1273
1274static netdev_features_t vector_fix_features(struct net_device *dev,
1275 netdev_features_t features)
1276{
1277 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
1278 return features;
1279}
1280
1281static int vector_set_features(struct net_device *dev,
1282 netdev_features_t features)
1283{
1284 struct vector_private *vp = netdev_priv(dev);
1285 /* Adjust buffer sizes for GSO/GRO. Unfortunately, there is
1286 * no way to negotiate it on raw sockets, so we can change
1287 * only our side.
1288 */
1289 if (features & NETIF_F_GRO)
1290 /* All new frame buffers will be GRO-sized */
1291 vp->req_size = 65536;
1292 else
1293 /* All new frame buffers will be normal sized */
1294 vp->req_size = vp->max_packet + vp->headroom + SAFETY_MARGIN;
1295 return 0;
1296}
1297
1298#ifdef CONFIG_NET_POLL_CONTROLLER
1299static void vector_net_poll_controller(struct net_device *dev)
1300{
1301 disable_irq(dev->irq);
1302 vector_rx_interrupt(dev->irq, dev);
1303 enable_irq(dev->irq);
1304}
1305#endif
1306
1307static void vector_net_get_drvinfo(struct net_device *dev,
1308 struct ethtool_drvinfo *info)
1309{
1310 strlcpy(info->driver, DRIVER_NAME, sizeof(info->driver));
1311 strlcpy(info->version, DRIVER_VERSION, sizeof(info->version));
1312}
1313
1314static void vector_get_ringparam(struct net_device *netdev,
1315 struct ethtool_ringparam *ring)
1316{
1317 struct vector_private *vp = netdev_priv(netdev);
1318
1319 ring->rx_max_pending = vp->rx_queue->max_depth;
1320 ring->tx_max_pending = vp->tx_queue->max_depth;
1321 ring->rx_pending = vp->rx_queue->max_depth;
1322 ring->tx_pending = vp->tx_queue->max_depth;
1323}
1324
1325static void vector_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
1326{
1327 switch (stringset) {
1328 case ETH_SS_TEST:
1329 *buf = '\0';
1330 break;
1331 case ETH_SS_STATS:
1332 memcpy(buf, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
1333 break;
1334 default:
1335 WARN_ON(1);
1336 break;
1337 }
1338}
1339
1340static int vector_get_sset_count(struct net_device *dev, int sset)
1341{
1342 switch (sset) {
1343 case ETH_SS_TEST:
1344 return 0;
1345 case ETH_SS_STATS:
1346 return VECTOR_NUM_STATS;
1347 default:
1348 return -EOPNOTSUPP;
1349 }
1350}
1351
1352static void vector_get_ethtool_stats(struct net_device *dev,
1353 struct ethtool_stats *estats,
1354 u64 *tmp_stats)
1355{
1356 struct vector_private *vp = netdev_priv(dev);
1357
1358 memcpy(tmp_stats, &vp->estats, sizeof(struct vector_estats));
1359}
1360
1361static int vector_get_coalesce(struct net_device *netdev,
1362 struct ethtool_coalesce *ec)
1363{
1364 struct vector_private *vp = netdev_priv(netdev);
1365
1366 ec->tx_coalesce_usecs = (vp->coalesce * 1000000) / HZ;
1367 return 0;
1368}
1369
1370static int vector_set_coalesce(struct net_device *netdev,
1371 struct ethtool_coalesce *ec)
1372{
1373 struct vector_private *vp = netdev_priv(netdev);
1374
1375 vp->coalesce = (ec->tx_coalesce_usecs * HZ) / 1000000;
1376 if (vp->coalesce == 0)
1377 vp->coalesce = 1;
1378 return 0;
1379}
1380
1381static const struct ethtool_ops vector_net_ethtool_ops = {
1382 .get_drvinfo = vector_net_get_drvinfo,
1383 .get_link = ethtool_op_get_link,
1384 .get_ts_info = ethtool_op_get_ts_info,
1385 .get_ringparam = vector_get_ringparam,
1386 .get_strings = vector_get_strings,
1387 .get_sset_count = vector_get_sset_count,
1388 .get_ethtool_stats = vector_get_ethtool_stats,
1389 .get_coalesce = vector_get_coalesce,
1390 .set_coalesce = vector_set_coalesce,
1391};
1392
1393
1394static const struct net_device_ops vector_netdev_ops = {
1395 .ndo_open = vector_net_open,
1396 .ndo_stop = vector_net_close,
1397 .ndo_start_xmit = vector_net_start_xmit,
1398 .ndo_set_rx_mode = vector_net_set_multicast_list,
1399 .ndo_tx_timeout = vector_net_tx_timeout,
1400 .ndo_set_mac_address = eth_mac_addr,
1401 .ndo_validate_addr = eth_validate_addr,
1402 .ndo_fix_features = vector_fix_features,
1403 .ndo_set_features = vector_set_features,
1404#ifdef CONFIG_NET_POLL_CONTROLLER
1405 .ndo_poll_controller = vector_net_poll_controller,
1406#endif
1407};
1408
1409
1410static void vector_timer_expire(unsigned long _conn)
1411{
1412 struct vector_private *vp = (struct vector_private *)_conn;
1413
1414 vp->estats.tx_kicks++;
1415 vector_send(vp->tx_queue);
1416}
1417
1418static void vector_eth_configure(
1419 int n,
1420 struct arglist *def
1421 )
1422{
1423 struct vector_device *device;
1424 struct net_device *dev;
1425 struct vector_private *vp;
1426 int err;
1427
1428 device = kzalloc(sizeof(*device), GFP_KERNEL);
1429 if (device == NULL) {
1430 printk(KERN_ERR "eth_configure failed to allocate struct "
1431 "vector_device\n");
1432 return;
1433 }
1434 dev = alloc_etherdev(sizeof(struct vector_private));
1435 if (dev == NULL) {
1436 printk(KERN_ERR "eth_configure: failed to allocate struct "
1437 "net_device for vec%d\n", n);
1438 goto out_free_device;
1439 }
1440
1441 dev->mtu = get_mtu(def);
1442
1443 INIT_LIST_HEAD(&device->list);
1444 device->unit = n;
1445
1446 /* If this name ends up conflicting with an existing registered
1447 * netdevice, that is OK, register_netdev{,ice}() will notice this
1448 * and fail.
1449 */
1450 snprintf(dev->name, sizeof(dev->name), "vec%d", n);
1451 uml_net_setup_etheraddr(dev, uml_vector_fetch_arg(def, "mac"));
1452 vp = netdev_priv(dev);
1453
1454 /* sysfs register */
1455 if (!driver_registered) {
1456 platform_driver_register(&uml_net_driver);
1457 driver_registered = 1;
1458 }
1459 device->pdev.id = n;
1460 device->pdev.name = DRIVER_NAME;
1461 device->pdev.dev.release = vector_device_release;
1462 dev_set_drvdata(&device->pdev.dev, device);
1463 if (platform_device_register(&device->pdev))
1464 goto out_free_netdev;
1465 SET_NETDEV_DEV(dev, &device->pdev.dev);
1466
1467 device->dev = dev;
1468
1469 *vp = ((struct vector_private)
1470 {
1471 .list = LIST_HEAD_INIT(vp->list),
1472 .dev = dev,
1473 .unit = n,
1474 .options = get_transport_options(def),
1475 .rx_irq = 0,
1476 .tx_irq = 0,
1477 .parsed = def,
1478 .max_packet = get_mtu(def) + ETH_HEADER_OTHER,
1479 /* TODO - we need to calculate headroom so that ip header
1480 * is 16 byte aligned all the time
1481 */
1482 .headroom = get_headroom(def),
1483 .form_header = NULL,
1484 .verify_header = NULL,
1485 .header_rxbuffer = NULL,
1486 .header_txbuffer = NULL,
1487 .header_size = 0,
1488 .rx_header_size = 0,
1489 .rexmit_scheduled = false,
1490 .opened = false,
1491 .transport_data = NULL,
1492 .in_write_poll = false,
1493 .coalesce = 2,
1494 .req_size = get_req_size(def)
1495 });
1496
1497 dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST);
1498 tasklet_init(&vp->tx_poll, vector_tx_poll, (unsigned long)vp);
1499 INIT_WORK(&vp->reset_tx, vector_reset_tx);
1500
1501 init_timer(&vp->tl);
1502 spin_lock_init(&vp->lock);
1503 vp->tl.function = vector_timer_expire;
1504
1505 /* FIXME */
1506 dev->netdev_ops = &vector_netdev_ops;
1507 dev->ethtool_ops = &vector_net_ethtool_ops;
1508 dev->watchdog_timeo = (HZ >> 1);
1509 /* primary IRQ - fixme */
1510 dev->irq = 0; /* we will adjust this once opened */
1511
1512 rtnl_lock();
1513 err = register_netdevice(dev);
1514 rtnl_unlock();
1515 if (err)
1516 goto out_undo_user_init;
1517
1518 spin_lock(&vector_devices_lock);
1519 list_add(&device->list, &vector_devices);
1520 spin_unlock(&vector_devices_lock);
1521
1522 return;
1523
1524out_undo_user_init:
1525 return;
1526out_free_netdev:
1527 free_netdev(dev);
1528out_free_device:
1529 kfree(device);
1530}
1531
1532
1533
1534
1535/*
1536 * Invoked late in the init
1537 */
1538
1539static int __init vector_init(void)
1540{
1541 struct list_head *ele;
1542 struct vector_cmd_line_arg *def;
1543 struct arglist *parsed;
1544
1545 list_for_each(ele, &vec_cmd_line) {
1546 def = list_entry(ele, struct vector_cmd_line_arg, list);
1547 parsed = uml_parse_vector_ifspec(def->arguments);
1548 if (parsed != NULL)
1549 vector_eth_configure(def->unit, parsed);
1550 }
1551 return 0;
1552}
1553
1554
1555/* Invoked at initial argument parsing, only stores
1556 * arguments until a proper vector_init is called
1557 * later
1558 */
1559
1560static int __init vector_setup(char *str)
1561{
1562 char *error;
1563 int n, err;
1564 struct vector_cmd_line_arg *new;
1565
1566 err = vector_parse(str, &n, &str, &error);
1567 if (err) {
1568 printk(KERN_ERR "vector_setup - Couldn't parse '%s' : %s\n",
1569 str, error);
1570 return 1;
1571 }
1572 new = alloc_bootmem(sizeof(*new));
1573 INIT_LIST_HEAD(&new->list);
1574 new->unit = n;
1575 new->arguments = str;
1576 list_add_tail(&new->list, &vec_cmd_line);
1577 return 1;
1578}
1579
1580__setup("vec", vector_setup);
1581__uml_help(vector_setup,
1582"vec[0-9]+:<option>=<value>,<option>=<value>\n"
1583" Configure a vector io network device.\n\n"
1584);
1585
1586late_initcall(vector_init);
1587
1588static struct mc_device vector_mc = {
1589 .list = LIST_HEAD_INIT(vector_mc.list),
1590 .name = "vec",
1591 .config = vector_config,
1592 .get_config = NULL,
1593 .id = vector_id,
1594 .remove = vector_remove,
1595};
1596
1597#ifdef CONFIG_INET
1598static int vector_inetaddr_event(
1599 struct notifier_block *this,
1600 unsigned long event,
1601 void *ptr)
1602{
1603 return NOTIFY_DONE;
1604}
1605
1606static struct notifier_block vector_inetaddr_notifier = {
1607 .notifier_call = vector_inetaddr_event,
1608};
1609
1610static void inet_register(void)
1611{
1612 register_inetaddr_notifier(&vector_inetaddr_notifier);
1613}
1614#else
1615static inline void inet_register(void)
1616{
1617}
1618#endif
1619
1620static int vector_net_init(void)
1621{
1622 mconsole_register_dev(&vector_mc);
1623 inet_register();
1624 return 0;
1625}
1626
1627__initcall(vector_net_init);
1628
1629
1630
diff --git a/arch/um/drivers/vector_kern.h b/arch/um/drivers/vector_kern.h
new file mode 100644
index 000000000000..699696deb396
--- /dev/null
+++ b/arch/um/drivers/vector_kern.h
@@ -0,0 +1,129 @@
1/*
2 * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL
4 */
5
6#ifndef __UM_VECTOR_KERN_H
7#define __UM_VECTOR_KERN_H
8
9#include <linux/netdevice.h>
10#include <linux/platform_device.h>
11#include <linux/skbuff.h>
12#include <linux/socket.h>
13#include <linux/list.h>
14#include <linux/ctype.h>
15#include <linux/workqueue.h>
16#include <linux/interrupt.h>
17#include "vector_user.h"
18
19/* Queue structure specially adapted for multiple enqueue/dequeue
20 * in a mmsgrecv/mmsgsend context
21 */
22
23/* Dequeue method */
24
25#define QUEUE_SENDMSG 0
26#define QUEUE_SENDMMSG 1
27
28#define VECTOR_RX 1
29#define VECTOR_TX (1 << 1)
30#define VECTOR_BPF (1 << 2)
31
32#define ETH_MAX_PACKET 1500
33#define ETH_HEADER_OTHER 32 /* just in case someone decides to go mad on QnQ */
34
35struct vector_queue {
36 struct mmsghdr *mmsg_vector;
37 void **skbuff_vector;
38 /* backlink to device which owns us */
39 struct net_device *dev;
40 spinlock_t head_lock;
41 spinlock_t tail_lock;
42 int queue_depth, head, tail, max_depth, max_iov_frags;
43 short options;
44};
45
46struct vector_estats {
47 uint64_t rx_queue_max;
48 uint64_t rx_queue_running_average;
49 uint64_t tx_queue_max;
50 uint64_t tx_queue_running_average;
51 uint64_t rx_encaps_errors;
52 uint64_t tx_timeout_count;
53 uint64_t tx_restart_queue;
54 uint64_t tx_kicks;
55 uint64_t tx_flow_control_xon;
56 uint64_t tx_flow_control_xoff;
57 uint64_t rx_csum_offload_good;
58 uint64_t rx_csum_offload_errors;
59 uint64_t sg_ok;
60 uint64_t sg_linearized;
61};
62
63#define VERIFY_HEADER_NOK -1
64#define VERIFY_HEADER_OK 0
65#define VERIFY_CSUM_OK 1
66
67struct vector_private {
68 struct list_head list;
69 spinlock_t lock;
70 struct net_device *dev;
71
72 int unit;
73
74 /* Timeout timer in TX */
75
76 struct timer_list tl;
77
78 /* Scheduled "remove device" work */
79 struct work_struct reset_tx;
80 struct vector_fds *fds;
81
82 struct vector_queue *rx_queue;
83 struct vector_queue *tx_queue;
84
85 int rx_irq;
86 int tx_irq;
87
88 struct arglist *parsed;
89
90 void *transport_data; /* transport specific params if needed */
91
92 int max_packet;
93 int req_size; /* different from max packet - used for TSO */
94 int headroom;
95
96 int options;
97
98 /* remote address if any - some transports will leave this as null */
99
100 int header_size;
101 int rx_header_size;
102 int coalesce;
103
104 void *header_rxbuffer;
105 void *header_txbuffer;
106
107 int (*form_header)(uint8_t *header,
108 struct sk_buff *skb, struct vector_private *vp);
109 int (*verify_header)(uint8_t *header,
110 struct sk_buff *skb, struct vector_private *vp);
111
112 spinlock_t stats_lock;
113
114 struct tasklet_struct tx_poll;
115 bool rexmit_scheduled;
116 bool opened;
117 bool in_write_poll;
118
119 /* ethtool stats */
120
121 struct vector_estats estats;
122 void *bpf;
123
124 char user[0];
125};
126
127extern int build_transport_data(struct vector_private *vp);
128
129#endif
diff --git a/arch/um/drivers/vector_transports.c b/arch/um/drivers/vector_transports.c
new file mode 100644
index 000000000000..9065047f844b
--- /dev/null
+++ b/arch/um/drivers/vector_transports.c
@@ -0,0 +1,458 @@
1/*
2 * Copyright (C) 2017 - Cambridge Greys Limited
3 * Copyright (C) 2011 - 2014 Cisco Systems Inc
4 * Licensed under the GPL.
5 */
6
7#include <linux/etherdevice.h>
8#include <linux/netdevice.h>
9#include <linux/skbuff.h>
10#include <linux/slab.h>
11#include <asm/byteorder.h>
12#include <uapi/linux/ip.h>
13#include <uapi/linux/virtio_net.h>
14#include <linux/virtio_net.h>
15#include <linux/virtio_byteorder.h>
16#include <linux/netdev_features.h>
17#include "vector_user.h"
18#include "vector_kern.h"
19
20#define GOOD_LINEAR 512
21#define GSO_ERROR "Incoming GSO frames and GRO disabled on the interface"
22
23struct gre_minimal_header {
24 uint16_t header;
25 uint16_t arptype;
26};
27
28
29struct uml_gre_data {
30 uint32_t rx_key;
31 uint32_t tx_key;
32 uint32_t sequence;
33
34 bool ipv6;
35 bool has_sequence;
36 bool pin_sequence;
37 bool checksum;
38 bool key;
39 struct gre_minimal_header expected_header;
40
41 uint32_t checksum_offset;
42 uint32_t key_offset;
43 uint32_t sequence_offset;
44
45};
46
47struct uml_l2tpv3_data {
48 uint64_t rx_cookie;
49 uint64_t tx_cookie;
50 uint64_t rx_session;
51 uint64_t tx_session;
52 uint32_t counter;
53
54 bool udp;
55 bool ipv6;
56 bool has_counter;
57 bool pin_counter;
58 bool cookie;
59 bool cookie_is_64;
60
61 uint32_t cookie_offset;
62 uint32_t session_offset;
63 uint32_t counter_offset;
64};
65
66static int l2tpv3_form_header(uint8_t *header,
67 struct sk_buff *skb, struct vector_private *vp)
68{
69 struct uml_l2tpv3_data *td = vp->transport_data;
70 uint32_t *counter;
71
72 if (td->udp)
73 *(uint32_t *) header = cpu_to_be32(L2TPV3_DATA_PACKET);
74 (*(uint32_t *) (header + td->session_offset)) = td->tx_session;
75
76 if (td->cookie) {
77 if (td->cookie_is_64)
78 (*(uint64_t *)(header + td->cookie_offset)) =
79 td->tx_cookie;
80 else
81 (*(uint32_t *)(header + td->cookie_offset)) =
82 td->tx_cookie;
83 }
84 if (td->has_counter) {
85 counter = (uint32_t *)(header + td->counter_offset);
86 if (td->pin_counter) {
87 *counter = 0;
88 } else {
89 td->counter++;
90 *counter = cpu_to_be32(td->counter);
91 }
92 }
93 return 0;
94}
95
96static int gre_form_header(uint8_t *header,
97 struct sk_buff *skb, struct vector_private *vp)
98{
99 struct uml_gre_data *td = vp->transport_data;
100 uint32_t *sequence;
101 *((uint32_t *) header) = *((uint32_t *) &td->expected_header);
102 if (td->key)
103 (*(uint32_t *) (header + td->key_offset)) = td->tx_key;
104 if (td->has_sequence) {
105 sequence = (uint32_t *)(header + td->sequence_offset);
106 if (td->pin_sequence)
107 *sequence = 0;
108 else
109 *sequence = cpu_to_be32(++td->sequence);
110 }
111 return 0;
112}
113
114static int raw_form_header(uint8_t *header,
115 struct sk_buff *skb, struct vector_private *vp)
116{
117 struct virtio_net_hdr *vheader = (struct virtio_net_hdr *) header;
118
119 virtio_net_hdr_from_skb(
120 skb,
121 vheader,
122 virtio_legacy_is_little_endian(),
123 false
124 );
125
126 return 0;
127}
128
129static int l2tpv3_verify_header(
130 uint8_t *header, struct sk_buff *skb, struct vector_private *vp)
131{
132 struct uml_l2tpv3_data *td = vp->transport_data;
133 uint32_t *session;
134 uint64_t cookie;
135
136 if ((!td->udp) && (!td->ipv6))
137 header += sizeof(struct iphdr) /* fix for ipv4 raw */;
138
139 /* we do not do a strict check for "data" packets as per
140 * the RFC spec because the pure IP spec does not have
141 * that anyway.
142 */
143
144 if (td->cookie) {
145 if (td->cookie_is_64)
146 cookie = *(uint64_t *)(header + td->cookie_offset);
147 else
148 cookie = *(uint32_t *)(header + td->cookie_offset);
149 if (cookie != td->rx_cookie) {
150 if (net_ratelimit())
151 netdev_err(vp->dev, "uml_l2tpv3: unknown cookie id");
152 return -1;
153 }
154 }
155 session = (uint32_t *) (header + td->session_offset);
156 if (*session != td->rx_session) {
157 if (net_ratelimit())
158 netdev_err(vp->dev, "uml_l2tpv3: session mismatch");
159 return -1;
160 }
161 return 0;
162}
163
164static int gre_verify_header(
165 uint8_t *header, struct sk_buff *skb, struct vector_private *vp)
166{
167
168 uint32_t key;
169 struct uml_gre_data *td = vp->transport_data;
170
171 if (!td->ipv6)
172 header += sizeof(struct iphdr) /* fix for ipv4 raw */;
173
174 if (*((uint32_t *) header) != *((uint32_t *) &td->expected_header)) {
175 if (net_ratelimit())
176 netdev_err(vp->dev, "header type disagreement, expecting %0x, got %0x",
177 *((uint32_t *) &td->expected_header),
178 *((uint32_t *) header)
179 );
180 return -1;
181 }
182
183 if (td->key) {
184 key = (*(uint32_t *)(header + td->key_offset));
185 if (key != td->rx_key) {
186 if (net_ratelimit())
187 netdev_err(vp->dev, "unknown key id %0x, expecting %0x",
188 key, td->rx_key);
189 return -1;
190 }
191 }
192 return 0;
193}
194
195static int raw_verify_header(
196 uint8_t *header, struct sk_buff *skb, struct vector_private *vp)
197{
198 struct virtio_net_hdr *vheader = (struct virtio_net_hdr *) header;
199
200 if ((vheader->gso_type != VIRTIO_NET_HDR_GSO_NONE) &&
201 (vp->req_size != 65536)) {
202 if (net_ratelimit())
203 netdev_err(
204 vp->dev,
205 GSO_ERROR
206 );
207 }
208 if ((vheader->flags & VIRTIO_NET_HDR_F_DATA_VALID) > 0)
209 return 1;
210
211 virtio_net_hdr_to_skb(skb, vheader, virtio_legacy_is_little_endian());
212 return 0;
213}
214
215static bool get_uint_param(
216 struct arglist *def, char *param, unsigned int *result)
217{
218 char *arg = uml_vector_fetch_arg(def, param);
219
220 if (arg != NULL) {
221 if (kstrtoint(arg, 0, result) == 0)
222 return true;
223 }
224 return false;
225}
226
227static bool get_ulong_param(
228 struct arglist *def, char *param, unsigned long *result)
229{
230 char *arg = uml_vector_fetch_arg(def, param);
231
232 if (arg != NULL) {
233 if (kstrtoul(arg, 0, result) == 0)
234 return true;
235 return true;
236 }
237 return false;
238}
239
240static int build_gre_transport_data(struct vector_private *vp)
241{
242 struct uml_gre_data *td;
243 int temp_int;
244 int temp_rx;
245 int temp_tx;
246
247 vp->transport_data = kmalloc(sizeof(struct uml_gre_data), GFP_KERNEL);
248 if (vp->transport_data == NULL)
249 return -ENOMEM;
250 td = vp->transport_data;
251 td->sequence = 0;
252
253 td->expected_header.arptype = GRE_IRB;
254 td->expected_header.header = 0;
255
256 vp->form_header = &gre_form_header;
257 vp->verify_header = &gre_verify_header;
258 vp->header_size = 4;
259 td->key_offset = 4;
260 td->sequence_offset = 4;
261 td->checksum_offset = 4;
262
263 td->ipv6 = false;
264 if (get_uint_param(vp->parsed, "v6", &temp_int)) {
265 if (temp_int > 0)
266 td->ipv6 = true;
267 }
268 td->key = false;
269 if (get_uint_param(vp->parsed, "rx_key", &temp_rx)) {
270 if (get_uint_param(vp->parsed, "tx_key", &temp_tx)) {
271 td->key = true;
272 td->expected_header.header |= GRE_MODE_KEY;
273 td->rx_key = cpu_to_be32(temp_rx);
274 td->tx_key = cpu_to_be32(temp_tx);
275 vp->header_size += 4;
276 td->sequence_offset += 4;
277 } else {
278 return -EINVAL;
279 }
280 }
281
282 td->sequence = false;
283 if (get_uint_param(vp->parsed, "sequence", &temp_int)) {
284 if (temp_int > 0) {
285 vp->header_size += 4;
286 td->has_sequence = true;
287 td->expected_header.header |= GRE_MODE_SEQUENCE;
288 if (get_uint_param(
289 vp->parsed, "pin_sequence", &temp_int)) {
290 if (temp_int > 0)
291 td->pin_sequence = true;
292 }
293 }
294 }
295 vp->rx_header_size = vp->header_size;
296 if (!td->ipv6)
297 vp->rx_header_size += sizeof(struct iphdr);
298 return 0;
299}
300
301static int build_l2tpv3_transport_data(struct vector_private *vp)
302{
303
304 struct uml_l2tpv3_data *td;
305 int temp_int, temp_rxs, temp_txs;
306 unsigned long temp_rx;
307 unsigned long temp_tx;
308
309 vp->transport_data = kmalloc(
310 sizeof(struct uml_l2tpv3_data), GFP_KERNEL);
311
312 if (vp->transport_data == NULL)
313 return -ENOMEM;
314
315 td = vp->transport_data;
316
317 vp->form_header = &l2tpv3_form_header;
318 vp->verify_header = &l2tpv3_verify_header;
319 td->counter = 0;
320
321 vp->header_size = 4;
322 td->session_offset = 0;
323 td->cookie_offset = 4;
324 td->counter_offset = 4;
325
326
327 td->ipv6 = false;
328 if (get_uint_param(vp->parsed, "v6", &temp_int)) {
329 if (temp_int > 0)
330 td->ipv6 = true;
331 }
332
333 if (get_uint_param(vp->parsed, "rx_session", &temp_rxs)) {
334 if (get_uint_param(vp->parsed, "tx_session", &temp_txs)) {
335 td->tx_session = cpu_to_be32(temp_txs);
336 td->rx_session = cpu_to_be32(temp_rxs);
337 } else {
338 return -EINVAL;
339 }
340 } else {
341 return -EINVAL;
342 }
343
344 td->cookie_is_64 = false;
345 if (get_uint_param(vp->parsed, "cookie64", &temp_int)) {
346 if (temp_int > 0)
347 td->cookie_is_64 = true;
348 }
349 td->cookie = false;
350 if (get_ulong_param(vp->parsed, "rx_cookie", &temp_rx)) {
351 if (get_ulong_param(vp->parsed, "tx_cookie", &temp_tx)) {
352 td->cookie = true;
353 if (td->cookie_is_64) {
354 td->rx_cookie = cpu_to_be64(temp_rx);
355 td->tx_cookie = cpu_to_be64(temp_tx);
356 vp->header_size += 8;
357 td->counter_offset += 8;
358 } else {
359 td->rx_cookie = cpu_to_be32(temp_rx);
360 td->tx_cookie = cpu_to_be32(temp_tx);
361 vp->header_size += 4;
362 td->counter_offset += 4;
363 }
364 } else {
365 return -EINVAL;
366 }
367 }
368
369 td->has_counter = false;
370 if (get_uint_param(vp->parsed, "counter", &temp_int)) {
371 if (temp_int > 0) {
372 td->has_counter = true;
373 vp->header_size += 4;
374 if (get_uint_param(
375 vp->parsed, "pin_counter", &temp_int)) {
376 if (temp_int > 0)
377 td->pin_counter = true;
378 }
379 }
380 }
381
382 if (get_uint_param(vp->parsed, "udp", &temp_int)) {
383 if (temp_int > 0) {
384 td->udp = true;
385 vp->header_size += 4;
386 td->counter_offset += 4;
387 td->session_offset += 4;
388 td->cookie_offset += 4;
389 }
390 }
391
392 vp->rx_header_size = vp->header_size;
393 if ((!td->ipv6) && (!td->udp))
394 vp->rx_header_size += sizeof(struct iphdr);
395
396 return 0;
397}
398
399static int build_raw_transport_data(struct vector_private *vp)
400{
401 if (uml_raw_enable_vnet_headers(vp->fds->rx_fd)) {
402 if (!uml_raw_enable_vnet_headers(vp->fds->tx_fd))
403 return -1;
404 vp->form_header = &raw_form_header;
405 vp->verify_header = &raw_verify_header;
406 vp->header_size = sizeof(struct virtio_net_hdr);
407 vp->rx_header_size = sizeof(struct virtio_net_hdr);
408 vp->dev->hw_features |= (NETIF_F_TSO | NETIF_F_GRO);
409 vp->dev->features |=
410 (NETIF_F_RXCSUM | NETIF_F_HW_CSUM |
411 NETIF_F_TSO | NETIF_F_GRO);
412 netdev_info(
413 vp->dev,
414 "raw: using vnet headers for tso and tx/rx checksum"
415 );
416 }
417 return 0;
418}
419
420static int build_tap_transport_data(struct vector_private *vp)
421{
422 if (uml_raw_enable_vnet_headers(vp->fds->rx_fd)) {
423 vp->form_header = &raw_form_header;
424 vp->verify_header = &raw_verify_header;
425 vp->header_size = sizeof(struct virtio_net_hdr);
426 vp->rx_header_size = sizeof(struct virtio_net_hdr);
427 vp->dev->hw_features |=
428 (NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO);
429 vp->dev->features |=
430 (NETIF_F_RXCSUM | NETIF_F_HW_CSUM |
431 NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO);
432 netdev_info(
433 vp->dev,
434 "tap/raw: using vnet headers for tso and tx/rx checksum"
435 );
436 } else {
437 return 0; /* do not try to enable tap too if raw failed */
438 }
439 if (uml_tap_enable_vnet_headers(vp->fds->tx_fd))
440 return 0;
441 return -1;
442}
443
444int build_transport_data(struct vector_private *vp)
445{
446 char *transport = uml_vector_fetch_arg(vp->parsed, "transport");
447
448 if (strncmp(transport, TRANS_GRE, TRANS_GRE_LEN) == 0)
449 return build_gre_transport_data(vp);
450 if (strncmp(transport, TRANS_L2TPV3, TRANS_L2TPV3_LEN) == 0)
451 return build_l2tpv3_transport_data(vp);
452 if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
453 return build_raw_transport_data(vp);
454 if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
455 return build_tap_transport_data(vp);
456 return 0;
457}
458
diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c
new file mode 100644
index 000000000000..4291f1a5d342
--- /dev/null
+++ b/arch/um/drivers/vector_user.c
@@ -0,0 +1,586 @@
1/*
2 * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL
4 */
5
6#include <stdio.h>
7#include <unistd.h>
8#include <stdarg.h>
9#include <errno.h>
10#include <stddef.h>
11#include <string.h>
12#include <sys/ioctl.h>
13#include <net/if.h>
14#include <linux/if_tun.h>
15#include <arpa/inet.h>
16#include <sys/types.h>
17#include <sys/stat.h>
18#include <fcntl.h>
19#include <sys/types.h>
20#include <sys/socket.h>
21#include <net/ethernet.h>
22#include <netinet/ip.h>
23#include <netinet/ether.h>
24#include <linux/if_ether.h>
25#include <linux/if_packet.h>
26#include <sys/socket.h>
27#include <sys/wait.h>
28#include <linux/virtio_net.h>
29#include <netdb.h>
30#include <stdlib.h>
31#include <os.h>
32#include <um_malloc.h>
33#include "vector_user.h"
34
35#define ID_GRE 0
36#define ID_L2TPV3 1
37#define ID_MAX 1
38
39#define TOKEN_IFNAME "ifname"
40
41#define TRANS_RAW "raw"
42#define TRANS_RAW_LEN strlen(TRANS_RAW)
43
44#define QDISC_FAIL "user_init_raw: could not disable qdisc on interface"
45#define VNET_HDR_FAIL "could not enable vnet headers on fd %d"
46#define TUN_GET_F_FAIL "tapraw: TUNGETFEATURES failed: %s"
47#define L2TPV3_BIND_FAIL "l2tpv3_open : could not bind socket err=%i"
48#define BPF_ATTACH_FAIL "Failed to attach filter size %d to %d, err %d\n"
49
50/* This is very ugly and brute force lookup, but it is done
51 * only once at initialization so not worth doing hashes or
52 * anything more intelligent
53 */
54
55char *uml_vector_fetch_arg(struct arglist *ifspec, char *token)
56{
57 int i;
58
59 for (i = 0; i < ifspec->numargs; i++) {
60 if (strcmp(ifspec->tokens[i], token) == 0)
61 return ifspec->values[i];
62 }
63 return NULL;
64
65}
66
67struct arglist *uml_parse_vector_ifspec(char *arg)
68{
69 struct arglist *result;
70 int pos, len;
71 bool parsing_token = true, next_starts = true;
72
73 if (arg == NULL)
74 return NULL;
75 result = uml_kmalloc(sizeof(struct arglist), UM_GFP_KERNEL);
76 if (result == NULL)
77 return NULL;
78 result->numargs = 0;
79 len = strlen(arg);
80 for (pos = 0; pos < len; pos++) {
81 if (next_starts) {
82 if (parsing_token) {
83 result->tokens[result->numargs] = arg + pos;
84 } else {
85 result->values[result->numargs] = arg + pos;
86 result->numargs++;
87 }
88 next_starts = false;
89 }
90 if (*(arg + pos) == '=') {
91 if (parsing_token)
92 parsing_token = false;
93 else
94 goto cleanup;
95 next_starts = true;
96 (*(arg + pos)) = '\0';
97 }
98 if (*(arg + pos) == ',') {
99 parsing_token = true;
100 next_starts = true;
101 (*(arg + pos)) = '\0';
102 }
103 }
104 return result;
105cleanup:
106 printk(UM_KERN_ERR "vector_setup - Couldn't parse '%s'\n", arg);
107 kfree(result);
108 return NULL;
109}
110
111/*
112 * Socket/FD configuration functions. These return an structure
113 * of rx and tx descriptors to cover cases where these are not
114 * the same (f.e. read via raw socket and write via tap).
115 */
116
117#define PATH_NET_TUN "/dev/net/tun"
118
119static struct vector_fds *user_init_tap_fds(struct arglist *ifspec)
120{
121 struct ifreq ifr;
122 int fd = -1;
123 struct sockaddr_ll sock;
124 int err = -ENOMEM, offload;
125 char *iface;
126 struct vector_fds *result = NULL;
127
128 iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME);
129 if (iface == NULL) {
130 printk(UM_KERN_ERR "uml_tap: failed to parse interface spec\n");
131 goto tap_cleanup;
132 }
133
134 result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
135 if (result == NULL) {
136 printk(UM_KERN_ERR "uml_tap: failed to allocate file descriptors\n");
137 goto tap_cleanup;
138 }
139 result->rx_fd = -1;
140 result->tx_fd = -1;
141 result->remote_addr = NULL;
142 result->remote_addr_size = 0;
143
144 /* TAP */
145
146 fd = open(PATH_NET_TUN, O_RDWR);
147 if (fd < 0) {
148 printk(UM_KERN_ERR "uml_tap: failed to open tun device\n");
149 goto tap_cleanup;
150 }
151 result->tx_fd = fd;
152 memset(&ifr, 0, sizeof(ifr));
153 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
154 strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1);
155
156 err = ioctl(fd, TUNSETIFF, (void *) &ifr);
157 if (err != 0) {
158 printk(UM_KERN_ERR "uml_tap: failed to select tap interface\n");
159 goto tap_cleanup;
160 }
161
162 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6;
163 ioctl(fd, TUNSETOFFLOAD, offload);
164
165 /* RAW */
166
167 fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
168 if (fd == -1) {
169 printk(UM_KERN_ERR
170 "uml_tap: failed to create socket: %i\n", -errno);
171 goto tap_cleanup;
172 }
173 result->rx_fd = fd;
174 memset(&ifr, 0, sizeof(ifr));
175 strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1);
176 if (ioctl(fd, SIOCGIFINDEX, (void *) &ifr) < 0) {
177 printk(UM_KERN_ERR
178 "uml_tap: failed to set interface: %i\n", -errno);
179 goto tap_cleanup;
180 }
181
182 sock.sll_family = AF_PACKET;
183 sock.sll_protocol = htons(ETH_P_ALL);
184 sock.sll_ifindex = ifr.ifr_ifindex;
185
186 if (bind(fd,
187 (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) {
188 printk(UM_KERN_ERR
189 "user_init_tap: failed to bind raw pair, err %d\n",
190 -errno);
191 goto tap_cleanup;
192 }
193 return result;
194tap_cleanup:
195 printk(UM_KERN_ERR "user_init_tap: init failed, error %d", err);
196 if (result != NULL) {
197 if (result->rx_fd >= 0)
198 os_close_file(result->rx_fd);
199 if (result->tx_fd >= 0)
200 os_close_file(result->tx_fd);
201 kfree(result);
202 }
203 return NULL;
204}
205
206
207static struct vector_fds *user_init_raw_fds(struct arglist *ifspec)
208{
209 struct ifreq ifr;
210 int rxfd = -1, txfd = -1;
211 struct sockaddr_ll sock;
212 int err = -ENOMEM;
213 char *iface;
214 struct vector_fds *result = NULL;
215 int optval = 1;
216
217
218 iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME);
219 if (iface == NULL)
220 goto cleanup;
221
222 rxfd = socket(AF_PACKET, SOCK_RAW, ETH_P_ALL);
223 if (rxfd == -1) {
224 err = -errno;
225 goto cleanup;
226 }
227 txfd = socket(AF_PACKET, SOCK_RAW, 0); /* Turn off RX on this fd */
228 if (txfd == -1) {
229 err = -errno;
230 goto cleanup;
231 }
232 memset(&ifr, 0, sizeof(ifr));
233 strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1);
234 if (ioctl(rxfd, SIOCGIFINDEX, (void *) &ifr) < 0) {
235 err = -errno;
236 goto cleanup;
237 }
238
239 sock.sll_family = AF_PACKET;
240 sock.sll_protocol = htons(ETH_P_ALL);
241 sock.sll_ifindex = ifr.ifr_ifindex;
242
243 if (bind(rxfd,
244 (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) {
245 err = -errno;
246 goto cleanup;
247 }
248
249 sock.sll_family = AF_PACKET;
250 sock.sll_protocol = htons(ETH_P_IP);
251 sock.sll_ifindex = ifr.ifr_ifindex;
252
253 if (bind(txfd,
254 (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) {
255 err = -errno;
256 goto cleanup;
257 }
258
259 if (setsockopt(txfd,
260 SOL_PACKET, PACKET_QDISC_BYPASS,
261 &optval, sizeof(optval)) != 0) {
262 printk(UM_KERN_INFO QDISC_FAIL);
263 }
264
265 result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
266 if (result != NULL) {
267 result->rx_fd = rxfd;
268 result->tx_fd = txfd;
269 result->remote_addr = NULL;
270 result->remote_addr_size = 0;
271 }
272 return result;
273cleanup:
274 printk(UM_KERN_ERR "user_init_raw: init failed, error %d", err);
275 if (rxfd >= 0)
276 os_close_file(rxfd);
277 if (txfd >= 0)
278 os_close_file(txfd);
279 if (result != NULL)
280 kfree(result);
281 return NULL;
282}
283
284bool uml_raw_enable_vnet_headers(int fd)
285{
286 int optval = 1;
287
288 if (setsockopt(fd,
289 SOL_PACKET, PACKET_VNET_HDR,
290 &optval, sizeof(optval)) != 0) {
291 printk(UM_KERN_INFO VNET_HDR_FAIL, fd);
292 return false;
293 }
294 return true;
295}
296bool uml_tap_enable_vnet_headers(int fd)
297{
298 unsigned int features;
299 int len = sizeof(struct virtio_net_hdr);
300
301 if (ioctl(fd, TUNGETFEATURES, &features) == -1) {
302 printk(UM_KERN_INFO TUN_GET_F_FAIL, strerror(errno));
303 return false;
304 }
305 if ((features & IFF_VNET_HDR) == 0) {
306 printk(UM_KERN_INFO "tapraw: No VNET HEADER support");
307 return false;
308 }
309 ioctl(fd, TUNSETVNETHDRSZ, &len);
310 return true;
311}
312
313static struct vector_fds *user_init_socket_fds(struct arglist *ifspec, int id)
314{
315 int err = -ENOMEM;
316 int fd = -1, gairet;
317 struct addrinfo srchints;
318 struct addrinfo dsthints;
319 bool v6, udp;
320 char *value;
321 char *src, *dst, *srcport, *dstport;
322 struct addrinfo *gairesult = NULL;
323 struct vector_fds *result = NULL;
324
325
326 value = uml_vector_fetch_arg(ifspec, "v6");
327 v6 = false;
328 udp = false;
329 if (value != NULL) {
330 if (strtol((const char *) value, NULL, 10) > 0)
331 v6 = true;
332 }
333
334 value = uml_vector_fetch_arg(ifspec, "udp");
335 if (value != NULL) {
336 if (strtol((const char *) value, NULL, 10) > 0)
337 udp = true;
338 }
339 src = uml_vector_fetch_arg(ifspec, "src");
340 dst = uml_vector_fetch_arg(ifspec, "dst");
341 srcport = uml_vector_fetch_arg(ifspec, "srcport");
342 dstport = uml_vector_fetch_arg(ifspec, "dstport");
343
344 memset(&dsthints, 0, sizeof(dsthints));
345
346 if (v6)
347 dsthints.ai_family = AF_INET6;
348 else
349 dsthints.ai_family = AF_INET;
350
351 switch (id) {
352 case ID_GRE:
353 dsthints.ai_socktype = SOCK_RAW;
354 dsthints.ai_protocol = IPPROTO_GRE;
355 break;
356 case ID_L2TPV3:
357 if (udp) {
358 dsthints.ai_socktype = SOCK_DGRAM;
359 dsthints.ai_protocol = 0;
360 } else {
361 dsthints.ai_socktype = SOCK_RAW;
362 dsthints.ai_protocol = IPPROTO_L2TP;
363 }
364 break;
365 default:
366 printk(KERN_ERR "Unsupported socket type\n");
367 return NULL;
368 }
369 memcpy(&srchints, &dsthints, sizeof(struct addrinfo));
370
371 gairet = getaddrinfo(src, srcport, &dsthints, &gairesult);
372 if ((gairet != 0) || (gairesult == NULL)) {
373 printk(UM_KERN_ERR
374 "socket_open : could not resolve src, error = %s",
375 gai_strerror(gairet)
376 );
377 return NULL;
378 }
379 fd = socket(gairesult->ai_family,
380 gairesult->ai_socktype, gairesult->ai_protocol);
381 if (fd == -1) {
382 printk(UM_KERN_ERR
383 "socket_open : could not open socket, error = %d",
384 -errno
385 );
386 goto cleanup;
387 }
388 if (bind(fd,
389 (struct sockaddr *) gairesult->ai_addr,
390 gairesult->ai_addrlen)) {
391 printk(UM_KERN_ERR L2TPV3_BIND_FAIL, errno);
392 goto cleanup;
393 }
394
395 if (gairesult != NULL)
396 freeaddrinfo(gairesult);
397
398 gairesult = NULL;
399
400 gairet = getaddrinfo(dst, dstport, &dsthints, &gairesult);
401 if ((gairet != 0) || (gairesult == NULL)) {
402 printk(UM_KERN_ERR
403 "socket_open : could not resolve dst, error = %s",
404 gai_strerror(gairet)
405 );
406 return NULL;
407 }
408
409 result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
410 if (result != NULL) {
411 result->rx_fd = fd;
412 result->tx_fd = fd;
413 result->remote_addr = uml_kmalloc(
414 gairesult->ai_addrlen, UM_GFP_KERNEL);
415 if (result->remote_addr == NULL)
416 goto cleanup;
417 result->remote_addr_size = gairesult->ai_addrlen;
418 memcpy(
419 result->remote_addr,
420 gairesult->ai_addr,
421 gairesult->ai_addrlen
422 );
423 }
424 freeaddrinfo(gairesult);
425 return result;
426cleanup:
427 if (gairesult != NULL)
428 freeaddrinfo(gairesult);
429 printk(UM_KERN_ERR "user_init_socket: init failed, error %d", err);
430 if (fd >= 0)
431 os_close_file(fd);
432 if (result != NULL) {
433 if (result->remote_addr != NULL)
434 kfree(result->remote_addr);
435 kfree(result);
436 }
437 return NULL;
438}
439
440struct vector_fds *uml_vector_user_open(
441 int unit,
442 struct arglist *parsed
443)
444{
445 char *transport;
446
447 if (parsed == NULL) {
448 printk(UM_KERN_ERR "no parsed config for unit %d\n", unit);
449 return NULL;
450 }
451 transport = uml_vector_fetch_arg(parsed, "transport");
452 if (transport == NULL) {
453 printk(UM_KERN_ERR "missing transport for unit %d\n", unit);
454 return NULL;
455 }
456 if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
457 return user_init_raw_fds(parsed);
458 if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
459 return user_init_tap_fds(parsed);
460 if (strncmp(transport, TRANS_GRE, TRANS_GRE_LEN) == 0)
461 return user_init_socket_fds(parsed, ID_GRE);
462 if (strncmp(transport, TRANS_L2TPV3, TRANS_L2TPV3_LEN) == 0)
463 return user_init_socket_fds(parsed, ID_L2TPV3);
464 return NULL;
465}
466
467
468int uml_vector_sendmsg(int fd, void *hdr, int flags)
469{
470 int n;
471
472 CATCH_EINTR(n = sendmsg(fd, (struct msghdr *) hdr, flags));
473 if ((n < 0) && (errno == EAGAIN))
474 return 0;
475 if (n >= 0)
476 return n;
477 else
478 return -errno;
479}
480
481int uml_vector_recvmsg(int fd, void *hdr, int flags)
482{
483 int n;
484
485 CATCH_EINTR(n = recvmsg(fd, (struct msghdr *) hdr, flags));
486 if ((n < 0) && (errno == EAGAIN))
487 return 0;
488 if (n >= 0)
489 return n;
490 else
491 return -errno;
492}
493
494int uml_vector_writev(int fd, void *hdr, int iovcount)
495{
496 int n;
497
498 CATCH_EINTR(n = writev(fd, (struct iovec *) hdr, iovcount));
499 if ((n < 0) && (errno == EAGAIN))
500 return 0;
501 if (n >= 0)
502 return n;
503 else
504 return -errno;
505}
506
507int uml_vector_sendmmsg(
508 int fd,
509 void *msgvec,
510 unsigned int vlen,
511 unsigned int flags)
512{
513 int n;
514
515 CATCH_EINTR(n = sendmmsg(fd, (struct mmsghdr *) msgvec, vlen, flags));
516 if ((n < 0) && (errno == EAGAIN))
517 return 0;
518 if (n >= 0)
519 return n;
520 else
521 return -errno;
522}
523
524int uml_vector_recvmmsg(
525 int fd,
526 void *msgvec,
527 unsigned int vlen,
528 unsigned int flags)
529{
530 int n;
531
532 CATCH_EINTR(
533 n = recvmmsg(fd, (struct mmsghdr *) msgvec, vlen, flags, 0));
534 if ((n < 0) && (errno == EAGAIN))
535 return 0;
536 if (n >= 0)
537 return n;
538 else
539 return -errno;
540}
541int uml_vector_attach_bpf(int fd, void *bpf, int bpf_len)
542{
543 int err = setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, bpf, bpf_len);
544
545 if (err < 0)
546 printk(KERN_ERR BPF_ATTACH_FAIL, bpf_len, fd, -errno);
547 return err;
548}
549
550#define DEFAULT_BPF_LEN 6
551
552void *uml_vector_default_bpf(int fd, void *mac)
553{
554 struct sock_filter *bpf;
555 uint32_t *mac1 = (uint32_t *)(mac + 2);
556 uint16_t *mac2 = (uint16_t *) mac;
557 struct sock_fprog bpf_prog = {
558 .len = 6,
559 .filter = NULL,
560 };
561
562 bpf = uml_kmalloc(
563 sizeof(struct sock_filter) * DEFAULT_BPF_LEN, UM_GFP_KERNEL);
564 if (bpf != NULL) {
565 bpf_prog.filter = bpf;
566 /* ld [8] */
567 bpf[0] = (struct sock_filter){ 0x20, 0, 0, 0x00000008 };
568 /* jeq #0xMAC[2-6] jt 2 jf 5*/
569 bpf[1] = (struct sock_filter){ 0x15, 0, 3, ntohl(*mac1)};
570 /* ldh [6] */
571 bpf[2] = (struct sock_filter){ 0x28, 0, 0, 0x00000006 };
572 /* jeq #0xMAC[0-1] jt 4 jf 5 */
573 bpf[3] = (struct sock_filter){ 0x15, 0, 1, ntohs(*mac2)};
574 /* ret #0 */
575 bpf[4] = (struct sock_filter){ 0x6, 0, 0, 0x00000000 };
576 /* ret #0x40000 */
577 bpf[5] = (struct sock_filter){ 0x6, 0, 0, 0x00040000 };
578 if (uml_vector_attach_bpf(
579 fd, &bpf_prog, sizeof(struct sock_fprog)) < 0) {
580 kfree(bpf);
581 bpf = NULL;
582 }
583 }
584 return bpf;
585}
586
diff --git a/arch/um/drivers/vector_user.h b/arch/um/drivers/vector_user.h
new file mode 100644
index 000000000000..421092c57bb7
--- /dev/null
+++ b/arch/um/drivers/vector_user.h
@@ -0,0 +1,99 @@
1/*
2 * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL
4 */
5
6#ifndef __UM_VECTOR_USER_H
7#define __UM_VECTOR_USER_H
8
9#define MAXVARGS 20
10
11#define TOKEN_IFNAME "ifname"
12
13#define TRANS_RAW "raw"
14#define TRANS_RAW_LEN strlen(TRANS_RAW)
15
16#define TRANS_TAP "tap"
17#define TRANS_TAP_LEN strlen(TRANS_TAP)
18
19
20#define TRANS_GRE "gre"
21#define TRANS_GRE_LEN strlen(TRANS_RAW)
22
23#define TRANS_L2TPV3 "l2tpv3"
24#define TRANS_L2TPV3_LEN strlen(TRANS_L2TPV3)
25
26#ifndef IPPROTO_GRE
27#define IPPROTO_GRE 0x2F
28#endif
29
30#define GRE_MODE_CHECKSUM cpu_to_be16(8 << 12) /* checksum */
31#define GRE_MODE_RESERVED cpu_to_be16(4 << 12) /* unused */
32#define GRE_MODE_KEY cpu_to_be16(2 << 12) /* KEY present */
33#define GRE_MODE_SEQUENCE cpu_to_be16(1 << 12) /* sequence */
34
35#define GRE_IRB cpu_to_be16(0x6558)
36
37#define L2TPV3_DATA_PACKET 0x30000
38
39/* IANA-assigned IP protocol ID for L2TPv3 */
40
41#ifndef IPPROTO_L2TP
42#define IPPROTO_L2TP 0x73
43#endif
44
45struct arglist {
46 int numargs;
47 char *tokens[MAXVARGS];
48 char *values[MAXVARGS];
49};
50
51/* Separating read and write FDs allows us to have different
52 * rx and tx method. Example - read tap via raw socket using
53 * recvmmsg, write using legacy tap write calls
54 */
55
56struct vector_fds {
57 int rx_fd;
58 int tx_fd;
59 void *remote_addr;
60 int remote_addr_size;
61};
62
63#define VECTOR_READ 1
64#define VECTOR_WRITE (1 < 1)
65#define VECTOR_HEADERS (1 < 2)
66
67extern struct arglist *uml_parse_vector_ifspec(char *arg);
68
69extern struct vector_fds *uml_vector_user_open(
70 int unit,
71 struct arglist *parsed
72);
73
74extern char *uml_vector_fetch_arg(
75 struct arglist *ifspec,
76 char *token
77);
78
79extern int uml_vector_recvmsg(int fd, void *hdr, int flags);
80extern int uml_vector_sendmsg(int fd, void *hdr, int flags);
81extern int uml_vector_writev(int fd, void *hdr, int iovcount);
82extern int uml_vector_sendmmsg(
83 int fd, void *msgvec,
84 unsigned int vlen,
85 unsigned int flags
86);
87extern int uml_vector_recvmmsg(
88 int fd,
89 void *msgvec,
90 unsigned int vlen,
91 unsigned int flags
92);
93extern void *uml_vector_default_bpf(int fd, void *mac);
94extern int uml_vector_attach_bpf(int fd, void *bpf, int bpf_len);
95extern bool uml_raw_enable_vnet_headers(int fd);
96extern bool uml_tap_enable_vnet_headers(int fd);
97
98
99#endif
diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h
index b5cdd3f91157..49ed3e35b35a 100644
--- a/arch/um/include/asm/irq.h
+++ b/arch/um/include/asm/irq.h
@@ -18,7 +18,19 @@
18#define XTERM_IRQ 13 18#define XTERM_IRQ 13
19#define RANDOM_IRQ 14 19#define RANDOM_IRQ 14
20 20
21#ifdef CONFIG_UML_NET_VECTOR
22
23#define VECTOR_BASE_IRQ 15
24#define VECTOR_IRQ_SPACE 8
25
26#define LAST_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ)
27
28#else
29
21#define LAST_IRQ RANDOM_IRQ 30#define LAST_IRQ RANDOM_IRQ
31
32#endif
33
22#define NR_IRQS (LAST_IRQ + 1) 34#define NR_IRQS (LAST_IRQ + 1)
23 35
24#endif 36#endif
diff --git a/arch/um/include/shared/net_kern.h b/arch/um/include/shared/net_kern.h
index 012ac87d4900..40442b98b173 100644
--- a/arch/um/include/shared/net_kern.h
+++ b/arch/um/include/shared/net_kern.h
@@ -65,5 +65,7 @@ extern int tap_setup_common(char *str, char *type, char **dev_name,
65 char **mac_out, char **gate_addr); 65 char **mac_out, char **gate_addr);
66extern void register_transport(struct transport *new); 66extern void register_transport(struct transport *new);
67extern unsigned short eth_protocol(struct sk_buff *skb); 67extern unsigned short eth_protocol(struct sk_buff *skb);
68extern void uml_net_setup_etheraddr(struct net_device *dev, char *str);
69
68 70
69#endif 71#endif