aboutsummaryrefslogtreecommitdiffstats
path: root/arch/um
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-04-11 19:36:47 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-04-11 19:36:47 -0400
commit375479c38689fbc403cf57b2999278615a4163f5 (patch)
treeda9565f449ab769802bce0f69cb41e74c4dfb605 /arch/um
parent45df60cd2cbe2a8c32fd34e474b62b2b41bacf69 (diff)
parente40238dedb484c8a19f8257e4ef5d77d038f9ad8 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml
Pull UML updates from Richard Weinberger: - a new and faster epoll based IRQ controller and NIC driver - misc fixes and janitorial updates * git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml: Fix vector raw inintialization logic Migrate vector timers to new timer API um: Compile with modern headers um: vector: Fix an error handling path in 'vector_parse()' um: vector: Fix a memory allocation check um: vector: fix missing unlock on error in vector_net_open() um: Add missing EXPORT for free_irq_by_fd() High Performance UML Vector Network Driver Epoll based IRQ controller um: Use POSIX ucontext_t instead of struct ucontext um: time: Use timespec64 for persistent clock um: Restore symbol versions for __memcpy and memcpy
Diffstat (limited to 'arch/um')
-rw-r--r--arch/um/Kconfig.net11
-rw-r--r--arch/um/drivers/Makefile4
-rw-r--r--arch/um/drivers/chan_kern.c53
-rw-r--r--arch/um/drivers/line.c2
-rw-r--r--arch/um/drivers/net_kern.c4
-rw-r--r--arch/um/drivers/random.c11
-rw-r--r--arch/um/drivers/ubd_kern.c4
-rw-r--r--arch/um/drivers/vector_kern.c1633
-rw-r--r--arch/um/drivers/vector_kern.h130
-rw-r--r--arch/um/drivers/vector_transports.c458
-rw-r--r--arch/um/drivers/vector_user.c590
-rw-r--r--arch/um/drivers/vector_user.h100
-rw-r--r--arch/um/include/asm/asm-prototypes.h1
-rw-r--r--arch/um/include/asm/irq.h12
-rw-r--r--arch/um/include/shared/irq_user.h12
-rw-r--r--arch/um/include/shared/net_kern.h2
-rw-r--r--arch/um/include/shared/os.h17
-rw-r--r--arch/um/kernel/irq.c461
-rw-r--r--arch/um/kernel/time.c6
-rw-r--r--arch/um/os-Linux/file.c1
-rw-r--r--arch/um/os-Linux/irq.c202
-rw-r--r--arch/um/os-Linux/signal.c3
22 files changed, 3393 insertions, 324 deletions
diff --git a/arch/um/Kconfig.net b/arch/um/Kconfig.net
index e871af24d9cd..c390f3deb0dc 100644
--- a/arch/um/Kconfig.net
+++ b/arch/um/Kconfig.net
@@ -109,6 +109,17 @@ config UML_NET_DAEMON
109 more than one without conflict. If you don't need UML networking, 109 more than one without conflict. If you don't need UML networking,
110 say N. 110 say N.
111 111
112config UML_NET_VECTOR
113 bool "Vector I/O high performance network devices"
114 depends on UML_NET
115 help
116 This User-Mode Linux network driver uses multi-message send
117 and receive functions. The host running the UML guest must have
118 a linux kernel version above 3.0 and a libc version > 2.13.
119 This driver provides tap, raw, gre and l2tpv3 network transports
120 with up to 4 times higher network throughput than the UML network
121 drivers.
122
112config UML_NET_VDE 123config UML_NET_VDE
113 bool "VDE transport" 124 bool "VDE transport"
114 depends on UML_NET 125 depends on UML_NET
diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
index e7582e1d248c..16b3cebddafb 100644
--- a/arch/um/drivers/Makefile
+++ b/arch/um/drivers/Makefile
@@ -9,6 +9,7 @@
9slip-objs := slip_kern.o slip_user.o 9slip-objs := slip_kern.o slip_user.o
10slirp-objs := slirp_kern.o slirp_user.o 10slirp-objs := slirp_kern.o slirp_user.o
11daemon-objs := daemon_kern.o daemon_user.o 11daemon-objs := daemon_kern.o daemon_user.o
12vector-objs := vector_kern.o vector_user.o vector_transports.o
12umcast-objs := umcast_kern.o umcast_user.o 13umcast-objs := umcast_kern.o umcast_user.o
13net-objs := net_kern.o net_user.o 14net-objs := net_kern.o net_user.o
14mconsole-objs := mconsole_kern.o mconsole_user.o 15mconsole-objs := mconsole_kern.o mconsole_user.o
@@ -43,6 +44,7 @@ obj-$(CONFIG_STDERR_CONSOLE) += stderr_console.o
43obj-$(CONFIG_UML_NET_SLIP) += slip.o slip_common.o 44obj-$(CONFIG_UML_NET_SLIP) += slip.o slip_common.o
44obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o 45obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o
45obj-$(CONFIG_UML_NET_DAEMON) += daemon.o 46obj-$(CONFIG_UML_NET_DAEMON) += daemon.o
47obj-$(CONFIG_UML_NET_VECTOR) += vector.o
46obj-$(CONFIG_UML_NET_VDE) += vde.o 48obj-$(CONFIG_UML_NET_VDE) += vde.o
47obj-$(CONFIG_UML_NET_MCAST) += umcast.o 49obj-$(CONFIG_UML_NET_MCAST) += umcast.o
48obj-$(CONFIG_UML_NET_PCAP) += pcap.o 50obj-$(CONFIG_UML_NET_PCAP) += pcap.o
@@ -61,7 +63,7 @@ obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
61obj-$(CONFIG_UML_RANDOM) += random.o 63obj-$(CONFIG_UML_RANDOM) += random.o
62 64
63# pcap_user.o must be added explicitly. 65# pcap_user.o must be added explicitly.
64USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o 66USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o
65CFLAGS_null.o = -DDEV_NULL=$(DEV_NULL_PATH) 67CFLAGS_null.o = -DDEV_NULL=$(DEV_NULL_PATH)
66 68
67include arch/um/scripts/Makefile.rules 69include arch/um/scripts/Makefile.rules
diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
index acbe6c67afba..05588f9466c7 100644
--- a/arch/um/drivers/chan_kern.c
+++ b/arch/um/drivers/chan_kern.c
@@ -171,56 +171,19 @@ int enable_chan(struct line *line)
171 return err; 171 return err;
172} 172}
173 173
174/* Items are added in IRQ context, when free_irq can't be called, and
175 * removed in process context, when it can.
176 * This handles interrupt sources which disappear, and which need to
177 * be permanently disabled. This is discovered in IRQ context, but
178 * the freeing of the IRQ must be done later.
179 */
180static DEFINE_SPINLOCK(irqs_to_free_lock);
181static LIST_HEAD(irqs_to_free);
182
183void free_irqs(void)
184{
185 struct chan *chan;
186 LIST_HEAD(list);
187 struct list_head *ele;
188 unsigned long flags;
189
190 spin_lock_irqsave(&irqs_to_free_lock, flags);
191 list_splice_init(&irqs_to_free, &list);
192 spin_unlock_irqrestore(&irqs_to_free_lock, flags);
193
194 list_for_each(ele, &list) {
195 chan = list_entry(ele, struct chan, free_list);
196
197 if (chan->input && chan->enabled)
198 um_free_irq(chan->line->driver->read_irq, chan);
199 if (chan->output && chan->enabled)
200 um_free_irq(chan->line->driver->write_irq, chan);
201 chan->enabled = 0;
202 }
203}
204
205static void close_one_chan(struct chan *chan, int delay_free_irq) 174static void close_one_chan(struct chan *chan, int delay_free_irq)
206{ 175{
207 unsigned long flags;
208
209 if (!chan->opened) 176 if (!chan->opened)
210 return; 177 return;
211 178
212 if (delay_free_irq) { 179 /* we can safely call free now - it will be marked
213 spin_lock_irqsave(&irqs_to_free_lock, flags); 180 * as free and freed once the IRQ stopped processing
214 list_add(&chan->free_list, &irqs_to_free); 181 */
215 spin_unlock_irqrestore(&irqs_to_free_lock, flags); 182 if (chan->input && chan->enabled)
216 } 183 um_free_irq(chan->line->driver->read_irq, chan);
217 else { 184 if (chan->output && chan->enabled)
218 if (chan->input && chan->enabled) 185 um_free_irq(chan->line->driver->write_irq, chan);
219 um_free_irq(chan->line->driver->read_irq, chan); 186 chan->enabled = 0;
220 if (chan->output && chan->enabled)
221 um_free_irq(chan->line->driver->write_irq, chan);
222 chan->enabled = 0;
223 }
224 if (chan->ops->close != NULL) 187 if (chan->ops->close != NULL)
225 (*chan->ops->close)(chan->fd, chan->data); 188 (*chan->ops->close)(chan->fd, chan->data);
226 189
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 366e57f5e8d6..8d80b27502e6 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -284,7 +284,7 @@ int line_setup_irq(int fd, int input, int output, struct line *line, void *data)
284 if (err) 284 if (err)
285 return err; 285 return err;
286 if (output) 286 if (output)
287 err = um_request_irq(driver->write_irq, fd, IRQ_WRITE, 287 err = um_request_irq(driver->write_irq, fd, IRQ_NONE,
288 line_write_interrupt, IRQF_SHARED, 288 line_write_interrupt, IRQF_SHARED,
289 driver->write_irq_name, data); 289 driver->write_irq_name, data);
290 return err; 290 return err;
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index b305f8247909..3ef1b48e064a 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -288,7 +288,7 @@ static void uml_net_user_timer_expire(struct timer_list *t)
288#endif 288#endif
289} 289}
290 290
291static void setup_etheraddr(struct net_device *dev, char *str) 291void uml_net_setup_etheraddr(struct net_device *dev, char *str)
292{ 292{
293 unsigned char *addr = dev->dev_addr; 293 unsigned char *addr = dev->dev_addr;
294 char *end; 294 char *end;
@@ -412,7 +412,7 @@ static void eth_configure(int n, void *init, char *mac,
412 */ 412 */
413 snprintf(dev->name, sizeof(dev->name), "eth%d", n); 413 snprintf(dev->name, sizeof(dev->name), "eth%d", n);
414 414
415 setup_etheraddr(dev, mac); 415 uml_net_setup_etheraddr(dev, mac);
416 416
417 printk(KERN_INFO "Netdevice %d (%pM) : ", n, dev->dev_addr); 417 printk(KERN_INFO "Netdevice %d (%pM) : ", n, dev->dev_addr);
418 418
diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c
index 37c51a6be690..778a0e52d5a5 100644
--- a/arch/um/drivers/random.c
+++ b/arch/um/drivers/random.c
@@ -13,6 +13,7 @@
13#include <linux/miscdevice.h> 13#include <linux/miscdevice.h>
14#include <linux/delay.h> 14#include <linux/delay.h>
15#include <linux/uaccess.h> 15#include <linux/uaccess.h>
16#include <init.h>
16#include <irq_kern.h> 17#include <irq_kern.h>
17#include <os.h> 18#include <os.h>
18 19
@@ -154,7 +155,14 @@ err_out_cleanup_hw:
154/* 155/*
155 * rng_cleanup - shutdown RNG module 156 * rng_cleanup - shutdown RNG module
156 */ 157 */
157static void __exit rng_cleanup (void) 158
159static void cleanup(void)
160{
161 free_irq_by_fd(random_fd);
162 os_close_file(random_fd);
163}
164
165static void __exit rng_cleanup(void)
158{ 166{
159 os_close_file(random_fd); 167 os_close_file(random_fd);
160 misc_deregister (&rng_miscdev); 168 misc_deregister (&rng_miscdev);
@@ -162,6 +170,7 @@ static void __exit rng_cleanup (void)
162 170
163module_init (rng_init); 171module_init (rng_init);
164module_exit (rng_cleanup); 172module_exit (rng_cleanup);
173__uml_exitcall(cleanup);
165 174
166MODULE_DESCRIPTION("UML Host Random Number Generator (RNG) driver"); 175MODULE_DESCRIPTION("UML Host Random Number Generator (RNG) driver");
167MODULE_LICENSE("GPL"); 176MODULE_LICENSE("GPL");
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index b55fe9bf5d3e..d4e8c497ae86 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -1587,11 +1587,11 @@ int io_thread(void *arg)
1587 1587
1588 do { 1588 do {
1589 res = os_write_file(kernel_fd, ((char *) io_req_buffer) + written, n); 1589 res = os_write_file(kernel_fd, ((char *) io_req_buffer) + written, n);
1590 if (res > 0) { 1590 if (res >= 0) {
1591 written += res; 1591 written += res;
1592 } else { 1592 } else {
1593 if (res != -EAGAIN) { 1593 if (res != -EAGAIN) {
1594 printk("io_thread - read failed, fd = %d, " 1594 printk("io_thread - write failed, fd = %d, "
1595 "err = %d\n", kernel_fd, -n); 1595 "err = %d\n", kernel_fd, -n);
1596 } 1596 }
1597 } 1597 }
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
new file mode 100644
index 000000000000..02168fe25105
--- /dev/null
+++ b/arch/um/drivers/vector_kern.c
@@ -0,0 +1,1633 @@
1/*
2 * Copyright (C) 2017 - Cambridge Greys Limited
3 * Copyright (C) 2011 - 2014 Cisco Systems Inc
4 * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
5 * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
6 * James Leu (jleu@mindspring.net).
7 * Copyright (C) 2001 by various other people who didn't put their name here.
8 * Licensed under the GPL.
9 */
10
11#include <linux/version.h>
12#include <linux/bootmem.h>
13#include <linux/etherdevice.h>
14#include <linux/ethtool.h>
15#include <linux/inetdevice.h>
16#include <linux/init.h>
17#include <linux/list.h>
18#include <linux/netdevice.h>
19#include <linux/platform_device.h>
20#include <linux/rtnetlink.h>
21#include <linux/skbuff.h>
22#include <linux/slab.h>
23#include <linux/interrupt.h>
24#include <init.h>
25#include <irq_kern.h>
26#include <irq_user.h>
27#include <net_kern.h>
28#include <os.h>
29#include "mconsole_kern.h"
30#include "vector_user.h"
31#include "vector_kern.h"
32
33/*
34 * Adapted from network devices with the following major changes:
35 * All transports are static - simplifies the code significantly
36 * Multiple FDs/IRQs per device
37 * Vector IO optionally used for read/write, falling back to legacy
38 * based on configuration and/or availability
39 * Configuration is no longer positional - L2TPv3 and GRE require up to
40 * 10 parameters, passing this as positional is not fit for purpose.
41 * Only socket transports are supported
42 */
43
44
45#define DRIVER_NAME "uml-vector"
46#define DRIVER_VERSION "01"
47struct vector_cmd_line_arg {
48 struct list_head list;
49 int unit;
50 char *arguments;
51};
52
53struct vector_device {
54 struct list_head list;
55 struct net_device *dev;
56 struct platform_device pdev;
57 int unit;
58 int opened;
59};
60
61static LIST_HEAD(vec_cmd_line);
62
63static DEFINE_SPINLOCK(vector_devices_lock);
64static LIST_HEAD(vector_devices);
65
66static int driver_registered;
67
68static void vector_eth_configure(int n, struct arglist *def);
69
70/* Argument accessors to set variables (and/or set default values)
71 * mtu, buffer sizing, default headroom, etc
72 */
73
74#define DEFAULT_HEADROOM 2
75#define SAFETY_MARGIN 32
76#define DEFAULT_VECTOR_SIZE 64
77#define TX_SMALL_PACKET 128
78#define MAX_IOV_SIZE (MAX_SKB_FRAGS + 1)
79
80static const struct {
81 const char string[ETH_GSTRING_LEN];
82} ethtool_stats_keys[] = {
83 { "rx_queue_max" },
84 { "rx_queue_running_average" },
85 { "tx_queue_max" },
86 { "tx_queue_running_average" },
87 { "rx_encaps_errors" },
88 { "tx_timeout_count" },
89 { "tx_restart_queue" },
90 { "tx_kicks" },
91 { "tx_flow_control_xon" },
92 { "tx_flow_control_xoff" },
93 { "rx_csum_offload_good" },
94 { "rx_csum_offload_errors"},
95 { "sg_ok"},
96 { "sg_linearized"},
97};
98
99#define VECTOR_NUM_STATS ARRAY_SIZE(ethtool_stats_keys)
100
101static void vector_reset_stats(struct vector_private *vp)
102{
103 vp->estats.rx_queue_max = 0;
104 vp->estats.rx_queue_running_average = 0;
105 vp->estats.tx_queue_max = 0;
106 vp->estats.tx_queue_running_average = 0;
107 vp->estats.rx_encaps_errors = 0;
108 vp->estats.tx_timeout_count = 0;
109 vp->estats.tx_restart_queue = 0;
110 vp->estats.tx_kicks = 0;
111 vp->estats.tx_flow_control_xon = 0;
112 vp->estats.tx_flow_control_xoff = 0;
113 vp->estats.sg_ok = 0;
114 vp->estats.sg_linearized = 0;
115}
116
117static int get_mtu(struct arglist *def)
118{
119 char *mtu = uml_vector_fetch_arg(def, "mtu");
120 long result;
121
122 if (mtu != NULL) {
123 if (kstrtoul(mtu, 10, &result) == 0)
124 return result;
125 }
126 return ETH_MAX_PACKET;
127}
128
129static int get_depth(struct arglist *def)
130{
131 char *mtu = uml_vector_fetch_arg(def, "depth");
132 long result;
133
134 if (mtu != NULL) {
135 if (kstrtoul(mtu, 10, &result) == 0)
136 return result;
137 }
138 return DEFAULT_VECTOR_SIZE;
139}
140
141static int get_headroom(struct arglist *def)
142{
143 char *mtu = uml_vector_fetch_arg(def, "headroom");
144 long result;
145
146 if (mtu != NULL) {
147 if (kstrtoul(mtu, 10, &result) == 0)
148 return result;
149 }
150 return DEFAULT_HEADROOM;
151}
152
153static int get_req_size(struct arglist *def)
154{
155 char *gro = uml_vector_fetch_arg(def, "gro");
156 long result;
157
158 if (gro != NULL) {
159 if (kstrtoul(gro, 10, &result) == 0) {
160 if (result > 0)
161 return 65536;
162 }
163 }
164 return get_mtu(def) + ETH_HEADER_OTHER +
165 get_headroom(def) + SAFETY_MARGIN;
166}
167
168
169static int get_transport_options(struct arglist *def)
170{
171 char *transport = uml_vector_fetch_arg(def, "transport");
172 char *vector = uml_vector_fetch_arg(def, "vec");
173
174 int vec_rx = VECTOR_RX;
175 int vec_tx = VECTOR_TX;
176 long parsed;
177
178 if (vector != NULL) {
179 if (kstrtoul(vector, 10, &parsed) == 0) {
180 if (parsed == 0) {
181 vec_rx = 0;
182 vec_tx = 0;
183 }
184 }
185 }
186
187
188 if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
189 return (vec_rx | VECTOR_BPF);
190 if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
191 return (vec_rx | vec_tx);
192 return (vec_rx | vec_tx);
193}
194
195
196/* A mini-buffer for packet drop read
197 * All of our supported transports are datagram oriented and we always
198 * read using recvmsg or recvmmsg. If we pass a buffer which is smaller
199 * than the packet size it still counts as full packet read and will
200 * clean the incoming stream to keep sigio/epoll happy
201 */
202
203#define DROP_BUFFER_SIZE 32
204
205static char *drop_buffer;
206
207/* Array backed queues optimized for bulk enqueue/dequeue and
208 * 1:N (small values of N) or 1:1 enqueuer/dequeuer ratios.
209 * For more details and full design rationale see
210 * http://foswiki.cambridgegreys.com/Main/EatYourTailAndEnjoyIt
211 */
212
213
214/*
215 * Advance the mmsg queue head by n = advance. Resets the queue to
216 * maximum enqueue/dequeue-at-once capacity if possible. Called by
217 * dequeuers. Caller must hold the head_lock!
218 */
219
220static int vector_advancehead(struct vector_queue *qi, int advance)
221{
222 int queue_depth;
223
224 qi->head =
225 (qi->head + advance)
226 % qi->max_depth;
227
228
229 spin_lock(&qi->tail_lock);
230 qi->queue_depth -= advance;
231
232 /* we are at 0, use this to
233 * reset head and tail so we can use max size vectors
234 */
235
236 if (qi->queue_depth == 0) {
237 qi->head = 0;
238 qi->tail = 0;
239 }
240 queue_depth = qi->queue_depth;
241 spin_unlock(&qi->tail_lock);
242 return queue_depth;
243}
244
245/* Advance the queue tail by n = advance.
246 * This is called by enqueuers which should hold the
247 * head lock already
248 */
249
250static int vector_advancetail(struct vector_queue *qi, int advance)
251{
252 int queue_depth;
253
254 qi->tail =
255 (qi->tail + advance)
256 % qi->max_depth;
257 spin_lock(&qi->head_lock);
258 qi->queue_depth += advance;
259 queue_depth = qi->queue_depth;
260 spin_unlock(&qi->head_lock);
261 return queue_depth;
262}
263
264static int prep_msg(struct vector_private *vp,
265 struct sk_buff *skb,
266 struct iovec *iov)
267{
268 int iov_index = 0;
269 int nr_frags, frag;
270 skb_frag_t *skb_frag;
271
272 nr_frags = skb_shinfo(skb)->nr_frags;
273 if (nr_frags > MAX_IOV_SIZE) {
274 if (skb_linearize(skb) != 0)
275 goto drop;
276 }
277 if (vp->header_size > 0) {
278 iov[iov_index].iov_len = vp->header_size;
279 vp->form_header(iov[iov_index].iov_base, skb, vp);
280 iov_index++;
281 }
282 iov[iov_index].iov_base = skb->data;
283 if (nr_frags > 0) {
284 iov[iov_index].iov_len = skb->len - skb->data_len;
285 vp->estats.sg_ok++;
286 } else
287 iov[iov_index].iov_len = skb->len;
288 iov_index++;
289 for (frag = 0; frag < nr_frags; frag++) {
290 skb_frag = &skb_shinfo(skb)->frags[frag];
291 iov[iov_index].iov_base = skb_frag_address_safe(skb_frag);
292 iov[iov_index].iov_len = skb_frag_size(skb_frag);
293 iov_index++;
294 }
295 return iov_index;
296drop:
297 return -1;
298}
299/*
300 * Generic vector enqueue with support for forming headers using transport
301 * specific callback. Allows GRE, L2TPv3, RAW and other transports
302 * to use a common enqueue procedure in vector mode
303 */
304
305static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb)
306{
307 struct vector_private *vp = netdev_priv(qi->dev);
308 int queue_depth;
309 int packet_len;
310 struct mmsghdr *mmsg_vector = qi->mmsg_vector;
311 int iov_count;
312
313 spin_lock(&qi->tail_lock);
314 spin_lock(&qi->head_lock);
315 queue_depth = qi->queue_depth;
316 spin_unlock(&qi->head_lock);
317
318 if (skb)
319 packet_len = skb->len;
320
321 if (queue_depth < qi->max_depth) {
322
323 *(qi->skbuff_vector + qi->tail) = skb;
324 mmsg_vector += qi->tail;
325 iov_count = prep_msg(
326 vp,
327 skb,
328 mmsg_vector->msg_hdr.msg_iov
329 );
330 if (iov_count < 1)
331 goto drop;
332 mmsg_vector->msg_hdr.msg_iovlen = iov_count;
333 mmsg_vector->msg_hdr.msg_name = vp->fds->remote_addr;
334 mmsg_vector->msg_hdr.msg_namelen = vp->fds->remote_addr_size;
335 queue_depth = vector_advancetail(qi, 1);
336 } else
337 goto drop;
338 spin_unlock(&qi->tail_lock);
339 return queue_depth;
340drop:
341 qi->dev->stats.tx_dropped++;
342 if (skb != NULL) {
343 packet_len = skb->len;
344 dev_consume_skb_any(skb);
345 netdev_completed_queue(qi->dev, 1, packet_len);
346 }
347 spin_unlock(&qi->tail_lock);
348 return queue_depth;
349}
350
351static int consume_vector_skbs(struct vector_queue *qi, int count)
352{
353 struct sk_buff *skb;
354 int skb_index;
355 int bytes_compl = 0;
356
357 for (skb_index = qi->head; skb_index < qi->head + count; skb_index++) {
358 skb = *(qi->skbuff_vector + skb_index);
359 /* mark as empty to ensure correct destruction if
360 * needed
361 */
362 bytes_compl += skb->len;
363 *(qi->skbuff_vector + skb_index) = NULL;
364 dev_consume_skb_any(skb);
365 }
366 qi->dev->stats.tx_bytes += bytes_compl;
367 qi->dev->stats.tx_packets += count;
368 netdev_completed_queue(qi->dev, count, bytes_compl);
369 return vector_advancehead(qi, count);
370}
371
372/*
373 * Generic vector deque via sendmmsg with support for forming headers
374 * using transport specific callback. Allows GRE, L2TPv3, RAW and
375 * other transports to use a common dequeue procedure in vector mode
376 */
377
378
379static int vector_send(struct vector_queue *qi)
380{
381 struct vector_private *vp = netdev_priv(qi->dev);
382 struct mmsghdr *send_from;
383 int result = 0, send_len, queue_depth = qi->max_depth;
384
385 if (spin_trylock(&qi->head_lock)) {
386 if (spin_trylock(&qi->tail_lock)) {
387 /* update queue_depth to current value */
388 queue_depth = qi->queue_depth;
389 spin_unlock(&qi->tail_lock);
390 while (queue_depth > 0) {
391 /* Calculate the start of the vector */
392 send_len = queue_depth;
393 send_from = qi->mmsg_vector;
394 send_from += qi->head;
395 /* Adjust vector size if wraparound */
396 if (send_len + qi->head > qi->max_depth)
397 send_len = qi->max_depth - qi->head;
398 /* Try to TX as many packets as possible */
399 if (send_len > 0) {
400 result = uml_vector_sendmmsg(
401 vp->fds->tx_fd,
402 send_from,
403 send_len,
404 0
405 );
406 vp->in_write_poll =
407 (result != send_len);
408 }
409 /* For some of the sendmmsg error scenarios
410 * we may end being unsure in the TX success
411 * for all packets. It is safer to declare
412 * them all TX-ed and blame the network.
413 */
414 if (result < 0) {
415 if (net_ratelimit())
416 netdev_err(vp->dev, "sendmmsg err=%i\n",
417 result);
418 result = send_len;
419 }
420 if (result > 0) {
421 queue_depth =
422 consume_vector_skbs(qi, result);
423 /* This is equivalent to an TX IRQ.
424 * Restart the upper layers to feed us
425 * more packets.
426 */
427 if (result > vp->estats.tx_queue_max)
428 vp->estats.tx_queue_max = result;
429 vp->estats.tx_queue_running_average =
430 (vp->estats.tx_queue_running_average + result) >> 1;
431 }
432 netif_trans_update(qi->dev);
433 netif_wake_queue(qi->dev);
434 /* if TX is busy, break out of the send loop,
435 * poll write IRQ will reschedule xmit for us
436 */
437 if (result != send_len) {
438 vp->estats.tx_restart_queue++;
439 break;
440 }
441 }
442 }
443 spin_unlock(&qi->head_lock);
444 } else {
445 tasklet_schedule(&vp->tx_poll);
446 }
447 return queue_depth;
448}
449
450/* Queue destructor. Deliberately stateless so we can use
451 * it in queue cleanup if initialization fails.
452 */
453
454static void destroy_queue(struct vector_queue *qi)
455{
456 int i;
457 struct iovec *iov;
458 struct vector_private *vp = netdev_priv(qi->dev);
459 struct mmsghdr *mmsg_vector;
460
461 if (qi == NULL)
462 return;
463 /* deallocate any skbuffs - we rely on any unused to be
464 * set to NULL.
465 */
466 if (qi->skbuff_vector != NULL) {
467 for (i = 0; i < qi->max_depth; i++) {
468 if (*(qi->skbuff_vector + i) != NULL)
469 dev_kfree_skb_any(*(qi->skbuff_vector + i));
470 }
471 kfree(qi->skbuff_vector);
472 }
473 /* deallocate matching IOV structures including header buffs */
474 if (qi->mmsg_vector != NULL) {
475 mmsg_vector = qi->mmsg_vector;
476 for (i = 0; i < qi->max_depth; i++) {
477 iov = mmsg_vector->msg_hdr.msg_iov;
478 if (iov != NULL) {
479 if ((vp->header_size > 0) &&
480 (iov->iov_base != NULL))
481 kfree(iov->iov_base);
482 kfree(iov);
483 }
484 mmsg_vector++;
485 }
486 kfree(qi->mmsg_vector);
487 }
488 kfree(qi);
489}
490
491/*
492 * Queue constructor. Create a queue with a given side.
493 */
494static struct vector_queue *create_queue(
495 struct vector_private *vp,
496 int max_size,
497 int header_size,
498 int num_extra_frags)
499{
500 struct vector_queue *result;
501 int i;
502 struct iovec *iov;
503 struct mmsghdr *mmsg_vector;
504
505 result = kmalloc(sizeof(struct vector_queue), GFP_KERNEL);
506 if (result == NULL)
507 goto out_fail;
508 result->max_depth = max_size;
509 result->dev = vp->dev;
510 result->mmsg_vector = kmalloc(
511 (sizeof(struct mmsghdr) * max_size), GFP_KERNEL);
512 result->skbuff_vector = kmalloc(
513 (sizeof(void *) * max_size), GFP_KERNEL);
514 if (result->mmsg_vector == NULL || result->skbuff_vector == NULL)
515 goto out_fail;
516
517 mmsg_vector = result->mmsg_vector;
518 for (i = 0; i < max_size; i++) {
519 /* Clear all pointers - we use non-NULL as marking on
520 * what to free on destruction
521 */
522 *(result->skbuff_vector + i) = NULL;
523 mmsg_vector->msg_hdr.msg_iov = NULL;
524 mmsg_vector++;
525 }
526 mmsg_vector = result->mmsg_vector;
527 result->max_iov_frags = num_extra_frags;
528 for (i = 0; i < max_size; i++) {
529 if (vp->header_size > 0)
530 iov = kmalloc(
531 sizeof(struct iovec) * (3 + num_extra_frags),
532 GFP_KERNEL
533 );
534 else
535 iov = kmalloc(
536 sizeof(struct iovec) * (2 + num_extra_frags),
537 GFP_KERNEL
538 );
539 if (iov == NULL)
540 goto out_fail;
541 mmsg_vector->msg_hdr.msg_iov = iov;
542 mmsg_vector->msg_hdr.msg_iovlen = 1;
543 mmsg_vector->msg_hdr.msg_control = NULL;
544 mmsg_vector->msg_hdr.msg_controllen = 0;
545 mmsg_vector->msg_hdr.msg_flags = MSG_DONTWAIT;
546 mmsg_vector->msg_hdr.msg_name = NULL;
547 mmsg_vector->msg_hdr.msg_namelen = 0;
548 if (vp->header_size > 0) {
549 iov->iov_base = kmalloc(header_size, GFP_KERNEL);
550 if (iov->iov_base == NULL)
551 goto out_fail;
552 iov->iov_len = header_size;
553 mmsg_vector->msg_hdr.msg_iovlen = 2;
554 iov++;
555 }
556 iov->iov_base = NULL;
557 iov->iov_len = 0;
558 mmsg_vector++;
559 }
560 spin_lock_init(&result->head_lock);
561 spin_lock_init(&result->tail_lock);
562 result->queue_depth = 0;
563 result->head = 0;
564 result->tail = 0;
565 return result;
566out_fail:
567 destroy_queue(result);
568 return NULL;
569}
570
571/*
572 * We do not use the RX queue as a proper wraparound queue for now
573 * This is not necessary because the consumption via netif_rx()
574 * happens in-line. While we can try using the return code of
575 * netif_rx() for flow control there are no drivers doing this today.
576 * For this RX specific use we ignore the tail/head locks and
577 * just read into a prepared queue filled with skbuffs.
578 */
579
580static struct sk_buff *prep_skb(
581 struct vector_private *vp,
582 struct user_msghdr *msg)
583{
584 int linear = vp->max_packet + vp->headroom + SAFETY_MARGIN;
585 struct sk_buff *result;
586 int iov_index = 0, len;
587 struct iovec *iov = msg->msg_iov;
588 int err, nr_frags, frag;
589 skb_frag_t *skb_frag;
590
591 if (vp->req_size <= linear)
592 len = linear;
593 else
594 len = vp->req_size;
595 result = alloc_skb_with_frags(
596 linear,
597 len - vp->max_packet,
598 3,
599 &err,
600 GFP_ATOMIC
601 );
602 if (vp->header_size > 0)
603 iov_index++;
604 if (result == NULL) {
605 iov[iov_index].iov_base = NULL;
606 iov[iov_index].iov_len = 0;
607 goto done;
608 }
609 skb_reserve(result, vp->headroom);
610 result->dev = vp->dev;
611 skb_put(result, vp->max_packet);
612 result->data_len = len - vp->max_packet;
613 result->len += len - vp->max_packet;
614 skb_reset_mac_header(result);
615 result->ip_summed = CHECKSUM_NONE;
616 iov[iov_index].iov_base = result->data;
617 iov[iov_index].iov_len = vp->max_packet;
618 iov_index++;
619
620 nr_frags = skb_shinfo(result)->nr_frags;
621 for (frag = 0; frag < nr_frags; frag++) {
622 skb_frag = &skb_shinfo(result)->frags[frag];
623 iov[iov_index].iov_base = skb_frag_address_safe(skb_frag);
624 if (iov[iov_index].iov_base != NULL)
625 iov[iov_index].iov_len = skb_frag_size(skb_frag);
626 else
627 iov[iov_index].iov_len = 0;
628 iov_index++;
629 }
630done:
631 msg->msg_iovlen = iov_index;
632 return result;
633}
634
635
636/* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs*/
637
638static void prep_queue_for_rx(struct vector_queue *qi)
639{
640 struct vector_private *vp = netdev_priv(qi->dev);
641 struct mmsghdr *mmsg_vector = qi->mmsg_vector;
642 void **skbuff_vector = qi->skbuff_vector;
643 int i;
644
645 if (qi->queue_depth == 0)
646 return;
647 for (i = 0; i < qi->queue_depth; i++) {
648 /* it is OK if allocation fails - recvmmsg with NULL data in
649 * iov argument still performs an RX, just drops the packet
650 * This allows us stop faffing around with a "drop buffer"
651 */
652
653 *skbuff_vector = prep_skb(vp, &mmsg_vector->msg_hdr);
654 skbuff_vector++;
655 mmsg_vector++;
656 }
657 qi->queue_depth = 0;
658}
659
660static struct vector_device *find_device(int n)
661{
662 struct vector_device *device;
663 struct list_head *ele;
664
665 spin_lock(&vector_devices_lock);
666 list_for_each(ele, &vector_devices) {
667 device = list_entry(ele, struct vector_device, list);
668 if (device->unit == n)
669 goto out;
670 }
671 device = NULL;
672 out:
673 spin_unlock(&vector_devices_lock);
674 return device;
675}
676
677static int vector_parse(char *str, int *index_out, char **str_out,
678 char **error_out)
679{
680 int n, len, err;
681 char *start = str;
682
683 len = strlen(str);
684
685 while ((*str != ':') && (strlen(str) > 1))
686 str++;
687 if (*str != ':') {
688 *error_out = "Expected ':' after device number";
689 return -EINVAL;
690 }
691 *str = '\0';
692
693 err = kstrtouint(start, 0, &n);
694 if (err < 0) {
695 *error_out = "Bad device number";
696 return err;
697 }
698
699 str++;
700 if (find_device(n)) {
701 *error_out = "Device already configured";
702 return -EINVAL;
703 }
704
705 *index_out = n;
706 *str_out = str;
707 return 0;
708}
709
710static int vector_config(char *str, char **error_out)
711{
712 int err, n;
713 char *params;
714 struct arglist *parsed;
715
716 err = vector_parse(str, &n, &params, error_out);
717 if (err != 0)
718 return err;
719
720 /* This string is broken up and the pieces used by the underlying
721 * driver. We should copy it to make sure things do not go wrong
722 * later.
723 */
724
725 params = kstrdup(params, GFP_KERNEL);
726 if (params == NULL) {
727 *error_out = "vector_config failed to strdup string";
728 return -ENOMEM;
729 }
730
731 parsed = uml_parse_vector_ifspec(params);
732
733 if (parsed == NULL) {
734 *error_out = "vector_config failed to parse parameters";
735 return -EINVAL;
736 }
737
738 vector_eth_configure(n, parsed);
739 return 0;
740}
741
742static int vector_id(char **str, int *start_out, int *end_out)
743{
744 char *end;
745 int n;
746
747 n = simple_strtoul(*str, &end, 0);
748 if ((*end != '\0') || (end == *str))
749 return -1;
750
751 *start_out = n;
752 *end_out = n;
753 *str = end;
754 return n;
755}
756
757static int vector_remove(int n, char **error_out)
758{
759 struct vector_device *vec_d;
760 struct net_device *dev;
761 struct vector_private *vp;
762
763 vec_d = find_device(n);
764 if (vec_d == NULL)
765 return -ENODEV;
766 dev = vec_d->dev;
767 vp = netdev_priv(dev);
768 if (vp->fds != NULL)
769 return -EBUSY;
770 unregister_netdev(dev);
771 platform_device_unregister(&vec_d->pdev);
772 return 0;
773}
774
775/*
776 * There is no shared per-transport initialization code, so
777 * we will just initialize each interface one by one and
778 * add them to a list
779 */
780
781static struct platform_driver uml_net_driver = {
782 .driver = {
783 .name = DRIVER_NAME,
784 },
785};
786
787
788static void vector_device_release(struct device *dev)
789{
790 struct vector_device *device = dev_get_drvdata(dev);
791 struct net_device *netdev = device->dev;
792
793 list_del(&device->list);
794 kfree(device);
795 free_netdev(netdev);
796}
797
798/* Bog standard recv using recvmsg - not used normally unless the user
799 * explicitly specifies not to use recvmmsg vector RX.
800 */
801
802static int vector_legacy_rx(struct vector_private *vp)
803{
804 int pkt_len;
805 struct user_msghdr hdr;
806 struct iovec iov[2 + MAX_IOV_SIZE]; /* header + data use case only */
807 int iovpos = 0;
808 struct sk_buff *skb;
809 int header_check;
810
811 hdr.msg_name = NULL;
812 hdr.msg_namelen = 0;
813 hdr.msg_iov = (struct iovec *) &iov;
814 hdr.msg_control = NULL;
815 hdr.msg_controllen = 0;
816 hdr.msg_flags = 0;
817
818 if (vp->header_size > 0) {
819 iov[0].iov_base = vp->header_rxbuffer;
820 iov[0].iov_len = vp->header_size;
821 }
822
823 skb = prep_skb(vp, &hdr);
824
825 if (skb == NULL) {
826 /* Read a packet into drop_buffer and don't do
827 * anything with it.
828 */
829 iov[iovpos].iov_base = drop_buffer;
830 iov[iovpos].iov_len = DROP_BUFFER_SIZE;
831 hdr.msg_iovlen = 1;
832 vp->dev->stats.rx_dropped++;
833 }
834
835 pkt_len = uml_vector_recvmsg(vp->fds->rx_fd, &hdr, 0);
836
837 if (skb != NULL) {
838 if (pkt_len > vp->header_size) {
839 if (vp->header_size > 0) {
840 header_check = vp->verify_header(
841 vp->header_rxbuffer, skb, vp);
842 if (header_check < 0) {
843 dev_kfree_skb_irq(skb);
844 vp->dev->stats.rx_dropped++;
845 vp->estats.rx_encaps_errors++;
846 return 0;
847 }
848 if (header_check > 0) {
849 vp->estats.rx_csum_offload_good++;
850 skb->ip_summed = CHECKSUM_UNNECESSARY;
851 }
852 }
853 pskb_trim(skb, pkt_len - vp->rx_header_size);
854 skb->protocol = eth_type_trans(skb, skb->dev);
855 vp->dev->stats.rx_bytes += skb->len;
856 vp->dev->stats.rx_packets++;
857 netif_rx(skb);
858 } else {
859 dev_kfree_skb_irq(skb);
860 }
861 }
862 return pkt_len;
863}
864
865/*
866 * Packet at a time TX which falls back to vector TX if the
867 * underlying transport is busy.
868 */
869
870
871
872static int writev_tx(struct vector_private *vp, struct sk_buff *skb)
873{
874 struct iovec iov[3 + MAX_IOV_SIZE];
875 int iov_count, pkt_len = 0;
876
877 iov[0].iov_base = vp->header_txbuffer;
878 iov_count = prep_msg(vp, skb, (struct iovec *) &iov);
879
880 if (iov_count < 1)
881 goto drop;
882 pkt_len = uml_vector_writev(
883 vp->fds->tx_fd,
884 (struct iovec *) &iov,
885 iov_count
886 );
887
888 netif_trans_update(vp->dev);
889 netif_wake_queue(vp->dev);
890
891 if (pkt_len > 0) {
892 vp->dev->stats.tx_bytes += skb->len;
893 vp->dev->stats.tx_packets++;
894 } else {
895 vp->dev->stats.tx_dropped++;
896 }
897 consume_skb(skb);
898 return pkt_len;
899drop:
900 vp->dev->stats.tx_dropped++;
901 consume_skb(skb);
902 return pkt_len;
903}
904
905/*
906 * Receive as many messages as we can in one call using the special
907 * mmsg vector matched to an skb vector which we prepared earlier.
908 */
909
910static int vector_mmsg_rx(struct vector_private *vp)
911{
912 int packet_count, i;
913 struct vector_queue *qi = vp->rx_queue;
914 struct sk_buff *skb;
915 struct mmsghdr *mmsg_vector = qi->mmsg_vector;
916 void **skbuff_vector = qi->skbuff_vector;
917 int header_check;
918
919 /* Refresh the vector and make sure it is with new skbs and the
920 * iovs are updated to point to them.
921 */
922
923 prep_queue_for_rx(qi);
924
925 /* Fire the Lazy Gun - get as many packets as we can in one go. */
926
927 packet_count = uml_vector_recvmmsg(
928 vp->fds->rx_fd, qi->mmsg_vector, qi->max_depth, 0);
929
930 if (packet_count <= 0)
931 return packet_count;
932
933 /* We treat packet processing as enqueue, buffer refresh as dequeue
934 * The queue_depth tells us how many buffers have been used and how
935 * many do we need to prep the next time prep_queue_for_rx() is called.
936 */
937
938 qi->queue_depth = packet_count;
939
940 for (i = 0; i < packet_count; i++) {
941 skb = (*skbuff_vector);
942 if (mmsg_vector->msg_len > vp->header_size) {
943 if (vp->header_size > 0) {
944 header_check = vp->verify_header(
945 mmsg_vector->msg_hdr.msg_iov->iov_base,
946 skb,
947 vp
948 );
949 if (header_check < 0) {
950 /* Overlay header failed to verify - discard.
951 * We can actually keep this skb and reuse it,
952 * but that will make the prep logic too
953 * complex.
954 */
955 dev_kfree_skb_irq(skb);
956 vp->estats.rx_encaps_errors++;
957 continue;
958 }
959 if (header_check > 0) {
960 vp->estats.rx_csum_offload_good++;
961 skb->ip_summed = CHECKSUM_UNNECESSARY;
962 }
963 }
964 pskb_trim(skb,
965 mmsg_vector->msg_len - vp->rx_header_size);
966 skb->protocol = eth_type_trans(skb, skb->dev);
967 /*
968 * We do not need to lock on updating stats here
969 * The interrupt loop is non-reentrant.
970 */
971 vp->dev->stats.rx_bytes += skb->len;
972 vp->dev->stats.rx_packets++;
973 netif_rx(skb);
974 } else {
975 /* Overlay header too short to do anything - discard.
976 * We can actually keep this skb and reuse it,
977 * but that will make the prep logic too complex.
978 */
979 if (skb != NULL)
980 dev_kfree_skb_irq(skb);
981 }
982 (*skbuff_vector) = NULL;
983 /* Move to the next buffer element */
984 mmsg_vector++;
985 skbuff_vector++;
986 }
987 if (packet_count > 0) {
988 if (vp->estats.rx_queue_max < packet_count)
989 vp->estats.rx_queue_max = packet_count;
990 vp->estats.rx_queue_running_average =
991 (vp->estats.rx_queue_running_average + packet_count) >> 1;
992 }
993 return packet_count;
994}
995
996static void vector_rx(struct vector_private *vp)
997{
998 int err;
999
1000 if ((vp->options & VECTOR_RX) > 0)
1001 while ((err = vector_mmsg_rx(vp)) > 0)
1002 ;
1003 else
1004 while ((err = vector_legacy_rx(vp)) > 0)
1005 ;
1006 if ((err != 0) && net_ratelimit())
1007 netdev_err(vp->dev, "vector_rx: error(%d)\n", err);
1008}
1009
1010static int vector_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
1011{
1012 struct vector_private *vp = netdev_priv(dev);
1013 int queue_depth = 0;
1014
1015 if ((vp->options & VECTOR_TX) == 0) {
1016 writev_tx(vp, skb);
1017 return NETDEV_TX_OK;
1018 }
1019
1020 /* We do BQL only in the vector path, no point doing it in
1021 * packet at a time mode as there is no device queue
1022 */
1023
1024 netdev_sent_queue(vp->dev, skb->len);
1025 queue_depth = vector_enqueue(vp->tx_queue, skb);
1026
1027 /* if the device queue is full, stop the upper layers and
1028 * flush it.
1029 */
1030
1031 if (queue_depth >= vp->tx_queue->max_depth - 1) {
1032 vp->estats.tx_kicks++;
1033 netif_stop_queue(dev);
1034 vector_send(vp->tx_queue);
1035 return NETDEV_TX_OK;
1036 }
1037 if (skb->xmit_more) {
1038 mod_timer(&vp->tl, vp->coalesce);
1039 return NETDEV_TX_OK;
1040 }
1041 if (skb->len < TX_SMALL_PACKET) {
1042 vp->estats.tx_kicks++;
1043 vector_send(vp->tx_queue);
1044 } else
1045 tasklet_schedule(&vp->tx_poll);
1046 return NETDEV_TX_OK;
1047}
1048
1049static irqreturn_t vector_rx_interrupt(int irq, void *dev_id)
1050{
1051 struct net_device *dev = dev_id;
1052 struct vector_private *vp = netdev_priv(dev);
1053
1054 if (!netif_running(dev))
1055 return IRQ_NONE;
1056 vector_rx(vp);
1057 return IRQ_HANDLED;
1058
1059}
1060
1061static irqreturn_t vector_tx_interrupt(int irq, void *dev_id)
1062{
1063 struct net_device *dev = dev_id;
1064 struct vector_private *vp = netdev_priv(dev);
1065
1066 if (!netif_running(dev))
1067 return IRQ_NONE;
1068 /* We need to pay attention to it only if we got
1069 * -EAGAIN or -ENOBUFFS from sendmmsg. Otherwise
1070 * we ignore it. In the future, it may be worth
1071 * it to improve the IRQ controller a bit to make
1072 * tweaking the IRQ mask less costly
1073 */
1074
1075 if (vp->in_write_poll)
1076 tasklet_schedule(&vp->tx_poll);
1077 return IRQ_HANDLED;
1078
1079}
1080
1081static int irq_rr;
1082
1083static int vector_net_close(struct net_device *dev)
1084{
1085 struct vector_private *vp = netdev_priv(dev);
1086 unsigned long flags;
1087
1088 netif_stop_queue(dev);
1089 del_timer(&vp->tl);
1090
1091 if (vp->fds == NULL)
1092 return 0;
1093
1094 /* Disable and free all IRQS */
1095 if (vp->rx_irq > 0) {
1096 um_free_irq(vp->rx_irq, dev);
1097 vp->rx_irq = 0;
1098 }
1099 if (vp->tx_irq > 0) {
1100 um_free_irq(vp->tx_irq, dev);
1101 vp->tx_irq = 0;
1102 }
1103 tasklet_kill(&vp->tx_poll);
1104 if (vp->fds->rx_fd > 0) {
1105 os_close_file(vp->fds->rx_fd);
1106 vp->fds->rx_fd = -1;
1107 }
1108 if (vp->fds->tx_fd > 0) {
1109 os_close_file(vp->fds->tx_fd);
1110 vp->fds->tx_fd = -1;
1111 }
1112 if (vp->bpf != NULL)
1113 kfree(vp->bpf);
1114 if (vp->fds->remote_addr != NULL)
1115 kfree(vp->fds->remote_addr);
1116 if (vp->transport_data != NULL)
1117 kfree(vp->transport_data);
1118 if (vp->header_rxbuffer != NULL)
1119 kfree(vp->header_rxbuffer);
1120 if (vp->header_txbuffer != NULL)
1121 kfree(vp->header_txbuffer);
1122 if (vp->rx_queue != NULL)
1123 destroy_queue(vp->rx_queue);
1124 if (vp->tx_queue != NULL)
1125 destroy_queue(vp->tx_queue);
1126 kfree(vp->fds);
1127 vp->fds = NULL;
1128 spin_lock_irqsave(&vp->lock, flags);
1129 vp->opened = false;
1130 spin_unlock_irqrestore(&vp->lock, flags);
1131 return 0;
1132}
1133
1134/* TX tasklet */
1135
1136static void vector_tx_poll(unsigned long data)
1137{
1138 struct vector_private *vp = (struct vector_private *)data;
1139
1140 vp->estats.tx_kicks++;
1141 vector_send(vp->tx_queue);
1142}
1143static void vector_reset_tx(struct work_struct *work)
1144{
1145 struct vector_private *vp =
1146 container_of(work, struct vector_private, reset_tx);
1147 netdev_reset_queue(vp->dev);
1148 netif_start_queue(vp->dev);
1149 netif_wake_queue(vp->dev);
1150}
1151static int vector_net_open(struct net_device *dev)
1152{
1153 struct vector_private *vp = netdev_priv(dev);
1154 unsigned long flags;
1155 int err = -EINVAL;
1156 struct vector_device *vdevice;
1157
1158 spin_lock_irqsave(&vp->lock, flags);
1159 if (vp->opened) {
1160 spin_unlock_irqrestore(&vp->lock, flags);
1161 return -ENXIO;
1162 }
1163 vp->opened = true;
1164 spin_unlock_irqrestore(&vp->lock, flags);
1165
1166 vp->fds = uml_vector_user_open(vp->unit, vp->parsed);
1167
1168 if (vp->fds == NULL)
1169 goto out_close;
1170
1171 if (build_transport_data(vp) < 0)
1172 goto out_close;
1173
1174 if ((vp->options & VECTOR_RX) > 0) {
1175 vp->rx_queue = create_queue(
1176 vp,
1177 get_depth(vp->parsed),
1178 vp->rx_header_size,
1179 MAX_IOV_SIZE
1180 );
1181 vp->rx_queue->queue_depth = get_depth(vp->parsed);
1182 } else {
1183 vp->header_rxbuffer = kmalloc(
1184 vp->rx_header_size,
1185 GFP_KERNEL
1186 );
1187 if (vp->header_rxbuffer == NULL)
1188 goto out_close;
1189 }
1190 if ((vp->options & VECTOR_TX) > 0) {
1191 vp->tx_queue = create_queue(
1192 vp,
1193 get_depth(vp->parsed),
1194 vp->header_size,
1195 MAX_IOV_SIZE
1196 );
1197 } else {
1198 vp->header_txbuffer = kmalloc(vp->header_size, GFP_KERNEL);
1199 if (vp->header_txbuffer == NULL)
1200 goto out_close;
1201 }
1202
1203 /* READ IRQ */
1204 err = um_request_irq(
1205 irq_rr + VECTOR_BASE_IRQ, vp->fds->rx_fd,
1206 IRQ_READ, vector_rx_interrupt,
1207 IRQF_SHARED, dev->name, dev);
1208 if (err != 0) {
1209 netdev_err(dev, "vector_open: failed to get rx irq(%d)\n", err);
1210 err = -ENETUNREACH;
1211 goto out_close;
1212 }
1213 vp->rx_irq = irq_rr + VECTOR_BASE_IRQ;
1214 dev->irq = irq_rr + VECTOR_BASE_IRQ;
1215 irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE;
1216
1217 /* WRITE IRQ - we need it only if we have vector TX */
1218 if ((vp->options & VECTOR_TX) > 0) {
1219 err = um_request_irq(
1220 irq_rr + VECTOR_BASE_IRQ, vp->fds->tx_fd,
1221 IRQ_WRITE, vector_tx_interrupt,
1222 IRQF_SHARED, dev->name, dev);
1223 if (err != 0) {
1224 netdev_err(dev,
1225 "vector_open: failed to get tx irq(%d)\n", err);
1226 err = -ENETUNREACH;
1227 goto out_close;
1228 }
1229 vp->tx_irq = irq_rr + VECTOR_BASE_IRQ;
1230 irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE;
1231 }
1232
1233 if ((vp->options & VECTOR_QDISC_BYPASS) != 0) {
1234 if (!uml_raw_enable_qdisc_bypass(vp->fds->rx_fd))
1235 vp->options = vp->options | VECTOR_BPF;
1236 }
1237
1238 if ((vp->options & VECTOR_BPF) != 0)
1239 vp->bpf = uml_vector_default_bpf(vp->fds->rx_fd, dev->dev_addr);
1240
1241 netif_start_queue(dev);
1242
1243 /* clear buffer - it can happen that the host side of the interface
1244 * is full when we get here. In this case, new data is never queued,
1245 * SIGIOs never arrive, and the net never works.
1246 */
1247
1248 vector_rx(vp);
1249
1250 vector_reset_stats(vp);
1251 vdevice = find_device(vp->unit);
1252 vdevice->opened = 1;
1253
1254 if ((vp->options & VECTOR_TX) != 0)
1255 add_timer(&vp->tl);
1256 return 0;
1257out_close:
1258 vector_net_close(dev);
1259 return err;
1260}
1261
1262
1263static void vector_net_set_multicast_list(struct net_device *dev)
1264{
1265 /* TODO: - we can do some BPF games here */
1266 return;
1267}
1268
1269static void vector_net_tx_timeout(struct net_device *dev)
1270{
1271 struct vector_private *vp = netdev_priv(dev);
1272
1273 vp->estats.tx_timeout_count++;
1274 netif_trans_update(dev);
1275 schedule_work(&vp->reset_tx);
1276}
1277
1278static netdev_features_t vector_fix_features(struct net_device *dev,
1279 netdev_features_t features)
1280{
1281 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
1282 return features;
1283}
1284
1285static int vector_set_features(struct net_device *dev,
1286 netdev_features_t features)
1287{
1288 struct vector_private *vp = netdev_priv(dev);
1289 /* Adjust buffer sizes for GSO/GRO. Unfortunately, there is
1290 * no way to negotiate it on raw sockets, so we can change
1291 * only our side.
1292 */
1293 if (features & NETIF_F_GRO)
1294 /* All new frame buffers will be GRO-sized */
1295 vp->req_size = 65536;
1296 else
1297 /* All new frame buffers will be normal sized */
1298 vp->req_size = vp->max_packet + vp->headroom + SAFETY_MARGIN;
1299 return 0;
1300}
1301
1302#ifdef CONFIG_NET_POLL_CONTROLLER
1303static void vector_net_poll_controller(struct net_device *dev)
1304{
1305 disable_irq(dev->irq);
1306 vector_rx_interrupt(dev->irq, dev);
1307 enable_irq(dev->irq);
1308}
1309#endif
1310
1311static void vector_net_get_drvinfo(struct net_device *dev,
1312 struct ethtool_drvinfo *info)
1313{
1314 strlcpy(info->driver, DRIVER_NAME, sizeof(info->driver));
1315 strlcpy(info->version, DRIVER_VERSION, sizeof(info->version));
1316}
1317
1318static void vector_get_ringparam(struct net_device *netdev,
1319 struct ethtool_ringparam *ring)
1320{
1321 struct vector_private *vp = netdev_priv(netdev);
1322
1323 ring->rx_max_pending = vp->rx_queue->max_depth;
1324 ring->tx_max_pending = vp->tx_queue->max_depth;
1325 ring->rx_pending = vp->rx_queue->max_depth;
1326 ring->tx_pending = vp->tx_queue->max_depth;
1327}
1328
1329static void vector_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
1330{
1331 switch (stringset) {
1332 case ETH_SS_TEST:
1333 *buf = '\0';
1334 break;
1335 case ETH_SS_STATS:
1336 memcpy(buf, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
1337 break;
1338 default:
1339 WARN_ON(1);
1340 break;
1341 }
1342}
1343
1344static int vector_get_sset_count(struct net_device *dev, int sset)
1345{
1346 switch (sset) {
1347 case ETH_SS_TEST:
1348 return 0;
1349 case ETH_SS_STATS:
1350 return VECTOR_NUM_STATS;
1351 default:
1352 return -EOPNOTSUPP;
1353 }
1354}
1355
1356static void vector_get_ethtool_stats(struct net_device *dev,
1357 struct ethtool_stats *estats,
1358 u64 *tmp_stats)
1359{
1360 struct vector_private *vp = netdev_priv(dev);
1361
1362 memcpy(tmp_stats, &vp->estats, sizeof(struct vector_estats));
1363}
1364
1365static int vector_get_coalesce(struct net_device *netdev,
1366 struct ethtool_coalesce *ec)
1367{
1368 struct vector_private *vp = netdev_priv(netdev);
1369
1370 ec->tx_coalesce_usecs = (vp->coalesce * 1000000) / HZ;
1371 return 0;
1372}
1373
1374static int vector_set_coalesce(struct net_device *netdev,
1375 struct ethtool_coalesce *ec)
1376{
1377 struct vector_private *vp = netdev_priv(netdev);
1378
1379 vp->coalesce = (ec->tx_coalesce_usecs * HZ) / 1000000;
1380 if (vp->coalesce == 0)
1381 vp->coalesce = 1;
1382 return 0;
1383}
1384
1385static const struct ethtool_ops vector_net_ethtool_ops = {
1386 .get_drvinfo = vector_net_get_drvinfo,
1387 .get_link = ethtool_op_get_link,
1388 .get_ts_info = ethtool_op_get_ts_info,
1389 .get_ringparam = vector_get_ringparam,
1390 .get_strings = vector_get_strings,
1391 .get_sset_count = vector_get_sset_count,
1392 .get_ethtool_stats = vector_get_ethtool_stats,
1393 .get_coalesce = vector_get_coalesce,
1394 .set_coalesce = vector_set_coalesce,
1395};
1396
1397
1398static const struct net_device_ops vector_netdev_ops = {
1399 .ndo_open = vector_net_open,
1400 .ndo_stop = vector_net_close,
1401 .ndo_start_xmit = vector_net_start_xmit,
1402 .ndo_set_rx_mode = vector_net_set_multicast_list,
1403 .ndo_tx_timeout = vector_net_tx_timeout,
1404 .ndo_set_mac_address = eth_mac_addr,
1405 .ndo_validate_addr = eth_validate_addr,
1406 .ndo_fix_features = vector_fix_features,
1407 .ndo_set_features = vector_set_features,
1408#ifdef CONFIG_NET_POLL_CONTROLLER
1409 .ndo_poll_controller = vector_net_poll_controller,
1410#endif
1411};
1412
1413
1414static void vector_timer_expire(struct timer_list *t)
1415{
1416 struct vector_private *vp = from_timer(vp, t, tl);
1417
1418 vp->estats.tx_kicks++;
1419 vector_send(vp->tx_queue);
1420}
1421
1422static void vector_eth_configure(
1423 int n,
1424 struct arglist *def
1425 )
1426{
1427 struct vector_device *device;
1428 struct net_device *dev;
1429 struct vector_private *vp;
1430 int err;
1431
1432 device = kzalloc(sizeof(*device), GFP_KERNEL);
1433 if (device == NULL) {
1434 printk(KERN_ERR "eth_configure failed to allocate struct "
1435 "vector_device\n");
1436 return;
1437 }
1438 dev = alloc_etherdev(sizeof(struct vector_private));
1439 if (dev == NULL) {
1440 printk(KERN_ERR "eth_configure: failed to allocate struct "
1441 "net_device for vec%d\n", n);
1442 goto out_free_device;
1443 }
1444
1445 dev->mtu = get_mtu(def);
1446
1447 INIT_LIST_HEAD(&device->list);
1448 device->unit = n;
1449
1450 /* If this name ends up conflicting with an existing registered
1451 * netdevice, that is OK, register_netdev{,ice}() will notice this
1452 * and fail.
1453 */
1454 snprintf(dev->name, sizeof(dev->name), "vec%d", n);
1455 uml_net_setup_etheraddr(dev, uml_vector_fetch_arg(def, "mac"));
1456 vp = netdev_priv(dev);
1457
1458 /* sysfs register */
1459 if (!driver_registered) {
1460 platform_driver_register(&uml_net_driver);
1461 driver_registered = 1;
1462 }
1463 device->pdev.id = n;
1464 device->pdev.name = DRIVER_NAME;
1465 device->pdev.dev.release = vector_device_release;
1466 dev_set_drvdata(&device->pdev.dev, device);
1467 if (platform_device_register(&device->pdev))
1468 goto out_free_netdev;
1469 SET_NETDEV_DEV(dev, &device->pdev.dev);
1470
1471 device->dev = dev;
1472
1473 *vp = ((struct vector_private)
1474 {
1475 .list = LIST_HEAD_INIT(vp->list),
1476 .dev = dev,
1477 .unit = n,
1478 .options = get_transport_options(def),
1479 .rx_irq = 0,
1480 .tx_irq = 0,
1481 .parsed = def,
1482 .max_packet = get_mtu(def) + ETH_HEADER_OTHER,
1483 /* TODO - we need to calculate headroom so that ip header
1484 * is 16 byte aligned all the time
1485 */
1486 .headroom = get_headroom(def),
1487 .form_header = NULL,
1488 .verify_header = NULL,
1489 .header_rxbuffer = NULL,
1490 .header_txbuffer = NULL,
1491 .header_size = 0,
1492 .rx_header_size = 0,
1493 .rexmit_scheduled = false,
1494 .opened = false,
1495 .transport_data = NULL,
1496 .in_write_poll = false,
1497 .coalesce = 2,
1498 .req_size = get_req_size(def)
1499 });
1500
1501 dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST);
1502 tasklet_init(&vp->tx_poll, vector_tx_poll, (unsigned long)vp);
1503 INIT_WORK(&vp->reset_tx, vector_reset_tx);
1504
1505 timer_setup(&vp->tl, vector_timer_expire, 0);
1506 spin_lock_init(&vp->lock);
1507
1508 /* FIXME */
1509 dev->netdev_ops = &vector_netdev_ops;
1510 dev->ethtool_ops = &vector_net_ethtool_ops;
1511 dev->watchdog_timeo = (HZ >> 1);
1512 /* primary IRQ - fixme */
1513 dev->irq = 0; /* we will adjust this once opened */
1514
1515 rtnl_lock();
1516 err = register_netdevice(dev);
1517 rtnl_unlock();
1518 if (err)
1519 goto out_undo_user_init;
1520
1521 spin_lock(&vector_devices_lock);
1522 list_add(&device->list, &vector_devices);
1523 spin_unlock(&vector_devices_lock);
1524
1525 return;
1526
1527out_undo_user_init:
1528 return;
1529out_free_netdev:
1530 free_netdev(dev);
1531out_free_device:
1532 kfree(device);
1533}
1534
1535
1536
1537
1538/*
1539 * Invoked late in the init
1540 */
1541
1542static int __init vector_init(void)
1543{
1544 struct list_head *ele;
1545 struct vector_cmd_line_arg *def;
1546 struct arglist *parsed;
1547
1548 list_for_each(ele, &vec_cmd_line) {
1549 def = list_entry(ele, struct vector_cmd_line_arg, list);
1550 parsed = uml_parse_vector_ifspec(def->arguments);
1551 if (parsed != NULL)
1552 vector_eth_configure(def->unit, parsed);
1553 }
1554 return 0;
1555}
1556
1557
1558/* Invoked at initial argument parsing, only stores
1559 * arguments until a proper vector_init is called
1560 * later
1561 */
1562
1563static int __init vector_setup(char *str)
1564{
1565 char *error;
1566 int n, err;
1567 struct vector_cmd_line_arg *new;
1568
1569 err = vector_parse(str, &n, &str, &error);
1570 if (err) {
1571 printk(KERN_ERR "vector_setup - Couldn't parse '%s' : %s\n",
1572 str, error);
1573 return 1;
1574 }
1575 new = alloc_bootmem(sizeof(*new));
1576 INIT_LIST_HEAD(&new->list);
1577 new->unit = n;
1578 new->arguments = str;
1579 list_add_tail(&new->list, &vec_cmd_line);
1580 return 1;
1581}
1582
1583__setup("vec", vector_setup);
1584__uml_help(vector_setup,
1585"vec[0-9]+:<option>=<value>,<option>=<value>\n"
1586" Configure a vector io network device.\n\n"
1587);
1588
1589late_initcall(vector_init);
1590
1591static struct mc_device vector_mc = {
1592 .list = LIST_HEAD_INIT(vector_mc.list),
1593 .name = "vec",
1594 .config = vector_config,
1595 .get_config = NULL,
1596 .id = vector_id,
1597 .remove = vector_remove,
1598};
1599
1600#ifdef CONFIG_INET
1601static int vector_inetaddr_event(
1602 struct notifier_block *this,
1603 unsigned long event,
1604 void *ptr)
1605{
1606 return NOTIFY_DONE;
1607}
1608
1609static struct notifier_block vector_inetaddr_notifier = {
1610 .notifier_call = vector_inetaddr_event,
1611};
1612
1613static void inet_register(void)
1614{
1615 register_inetaddr_notifier(&vector_inetaddr_notifier);
1616}
1617#else
1618static inline void inet_register(void)
1619{
1620}
1621#endif
1622
1623static int vector_net_init(void)
1624{
1625 mconsole_register_dev(&vector_mc);
1626 inet_register();
1627 return 0;
1628}
1629
1630__initcall(vector_net_init);
1631
1632
1633
diff --git a/arch/um/drivers/vector_kern.h b/arch/um/drivers/vector_kern.h
new file mode 100644
index 000000000000..0b0a767b9076
--- /dev/null
+++ b/arch/um/drivers/vector_kern.h
@@ -0,0 +1,130 @@
1/*
2 * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL
4 */
5
6#ifndef __UM_VECTOR_KERN_H
7#define __UM_VECTOR_KERN_H
8
9#include <linux/netdevice.h>
10#include <linux/platform_device.h>
11#include <linux/skbuff.h>
12#include <linux/socket.h>
13#include <linux/list.h>
14#include <linux/ctype.h>
15#include <linux/workqueue.h>
16#include <linux/interrupt.h>
17#include "vector_user.h"
18
19/* Queue structure specially adapted for multiple enqueue/dequeue
20 * in a mmsgrecv/mmsgsend context
21 */
22
23/* Dequeue method */
24
25#define QUEUE_SENDMSG 0
26#define QUEUE_SENDMMSG 1
27
28#define VECTOR_RX 1
29#define VECTOR_TX (1 << 1)
30#define VECTOR_BPF (1 << 2)
31#define VECTOR_QDISC_BYPASS (1 << 3)
32
33#define ETH_MAX_PACKET 1500
34#define ETH_HEADER_OTHER 32 /* just in case someone decides to go mad on QnQ */
35
36struct vector_queue {
37 struct mmsghdr *mmsg_vector;
38 void **skbuff_vector;
39 /* backlink to device which owns us */
40 struct net_device *dev;
41 spinlock_t head_lock;
42 spinlock_t tail_lock;
43 int queue_depth, head, tail, max_depth, max_iov_frags;
44 short options;
45};
46
47struct vector_estats {
48 uint64_t rx_queue_max;
49 uint64_t rx_queue_running_average;
50 uint64_t tx_queue_max;
51 uint64_t tx_queue_running_average;
52 uint64_t rx_encaps_errors;
53 uint64_t tx_timeout_count;
54 uint64_t tx_restart_queue;
55 uint64_t tx_kicks;
56 uint64_t tx_flow_control_xon;
57 uint64_t tx_flow_control_xoff;
58 uint64_t rx_csum_offload_good;
59 uint64_t rx_csum_offload_errors;
60 uint64_t sg_ok;
61 uint64_t sg_linearized;
62};
63
64#define VERIFY_HEADER_NOK -1
65#define VERIFY_HEADER_OK 0
66#define VERIFY_CSUM_OK 1
67
68struct vector_private {
69 struct list_head list;
70 spinlock_t lock;
71 struct net_device *dev;
72
73 int unit;
74
75 /* Timeout timer in TX */
76
77 struct timer_list tl;
78
79 /* Scheduled "remove device" work */
80 struct work_struct reset_tx;
81 struct vector_fds *fds;
82
83 struct vector_queue *rx_queue;
84 struct vector_queue *tx_queue;
85
86 int rx_irq;
87 int tx_irq;
88
89 struct arglist *parsed;
90
91 void *transport_data; /* transport specific params if needed */
92
93 int max_packet;
94 int req_size; /* different from max packet - used for TSO */
95 int headroom;
96
97 int options;
98
99 /* remote address if any - some transports will leave this as null */
100
101 int header_size;
102 int rx_header_size;
103 int coalesce;
104
105 void *header_rxbuffer;
106 void *header_txbuffer;
107
108 int (*form_header)(uint8_t *header,
109 struct sk_buff *skb, struct vector_private *vp);
110 int (*verify_header)(uint8_t *header,
111 struct sk_buff *skb, struct vector_private *vp);
112
113 spinlock_t stats_lock;
114
115 struct tasklet_struct tx_poll;
116 bool rexmit_scheduled;
117 bool opened;
118 bool in_write_poll;
119
120 /* ethtool stats */
121
122 struct vector_estats estats;
123 void *bpf;
124
125 char user[0];
126};
127
128extern int build_transport_data(struct vector_private *vp);
129
130#endif
diff --git a/arch/um/drivers/vector_transports.c b/arch/um/drivers/vector_transports.c
new file mode 100644
index 000000000000..9065047f844b
--- /dev/null
+++ b/arch/um/drivers/vector_transports.c
@@ -0,0 +1,458 @@
1/*
2 * Copyright (C) 2017 - Cambridge Greys Limited
3 * Copyright (C) 2011 - 2014 Cisco Systems Inc
4 * Licensed under the GPL.
5 */
6
7#include <linux/etherdevice.h>
8#include <linux/netdevice.h>
9#include <linux/skbuff.h>
10#include <linux/slab.h>
11#include <asm/byteorder.h>
12#include <uapi/linux/ip.h>
13#include <uapi/linux/virtio_net.h>
14#include <linux/virtio_net.h>
15#include <linux/virtio_byteorder.h>
16#include <linux/netdev_features.h>
17#include "vector_user.h"
18#include "vector_kern.h"
19
20#define GOOD_LINEAR 512
21#define GSO_ERROR "Incoming GSO frames and GRO disabled on the interface"
22
23struct gre_minimal_header {
24 uint16_t header;
25 uint16_t arptype;
26};
27
28
29struct uml_gre_data {
30 uint32_t rx_key;
31 uint32_t tx_key;
32 uint32_t sequence;
33
34 bool ipv6;
35 bool has_sequence;
36 bool pin_sequence;
37 bool checksum;
38 bool key;
39 struct gre_minimal_header expected_header;
40
41 uint32_t checksum_offset;
42 uint32_t key_offset;
43 uint32_t sequence_offset;
44
45};
46
47struct uml_l2tpv3_data {
48 uint64_t rx_cookie;
49 uint64_t tx_cookie;
50 uint64_t rx_session;
51 uint64_t tx_session;
52 uint32_t counter;
53
54 bool udp;
55 bool ipv6;
56 bool has_counter;
57 bool pin_counter;
58 bool cookie;
59 bool cookie_is_64;
60
61 uint32_t cookie_offset;
62 uint32_t session_offset;
63 uint32_t counter_offset;
64};
65
66static int l2tpv3_form_header(uint8_t *header,
67 struct sk_buff *skb, struct vector_private *vp)
68{
69 struct uml_l2tpv3_data *td = vp->transport_data;
70 uint32_t *counter;
71
72 if (td->udp)
73 *(uint32_t *) header = cpu_to_be32(L2TPV3_DATA_PACKET);
74 (*(uint32_t *) (header + td->session_offset)) = td->tx_session;
75
76 if (td->cookie) {
77 if (td->cookie_is_64)
78 (*(uint64_t *)(header + td->cookie_offset)) =
79 td->tx_cookie;
80 else
81 (*(uint32_t *)(header + td->cookie_offset)) =
82 td->tx_cookie;
83 }
84 if (td->has_counter) {
85 counter = (uint32_t *)(header + td->counter_offset);
86 if (td->pin_counter) {
87 *counter = 0;
88 } else {
89 td->counter++;
90 *counter = cpu_to_be32(td->counter);
91 }
92 }
93 return 0;
94}
95
96static int gre_form_header(uint8_t *header,
97 struct sk_buff *skb, struct vector_private *vp)
98{
99 struct uml_gre_data *td = vp->transport_data;
100 uint32_t *sequence;
101 *((uint32_t *) header) = *((uint32_t *) &td->expected_header);
102 if (td->key)
103 (*(uint32_t *) (header + td->key_offset)) = td->tx_key;
104 if (td->has_sequence) {
105 sequence = (uint32_t *)(header + td->sequence_offset);
106 if (td->pin_sequence)
107 *sequence = 0;
108 else
109 *sequence = cpu_to_be32(++td->sequence);
110 }
111 return 0;
112}
113
114static int raw_form_header(uint8_t *header,
115 struct sk_buff *skb, struct vector_private *vp)
116{
117 struct virtio_net_hdr *vheader = (struct virtio_net_hdr *) header;
118
119 virtio_net_hdr_from_skb(
120 skb,
121 vheader,
122 virtio_legacy_is_little_endian(),
123 false
124 );
125
126 return 0;
127}
128
129static int l2tpv3_verify_header(
130 uint8_t *header, struct sk_buff *skb, struct vector_private *vp)
131{
132 struct uml_l2tpv3_data *td = vp->transport_data;
133 uint32_t *session;
134 uint64_t cookie;
135
136 if ((!td->udp) && (!td->ipv6))
137 header += sizeof(struct iphdr) /* fix for ipv4 raw */;
138
139 /* we do not do a strict check for "data" packets as per
140 * the RFC spec because the pure IP spec does not have
141 * that anyway.
142 */
143
144 if (td->cookie) {
145 if (td->cookie_is_64)
146 cookie = *(uint64_t *)(header + td->cookie_offset);
147 else
148 cookie = *(uint32_t *)(header + td->cookie_offset);
149 if (cookie != td->rx_cookie) {
150 if (net_ratelimit())
151 netdev_err(vp->dev, "uml_l2tpv3: unknown cookie id");
152 return -1;
153 }
154 }
155 session = (uint32_t *) (header + td->session_offset);
156 if (*session != td->rx_session) {
157 if (net_ratelimit())
158 netdev_err(vp->dev, "uml_l2tpv3: session mismatch");
159 return -1;
160 }
161 return 0;
162}
163
164static int gre_verify_header(
165 uint8_t *header, struct sk_buff *skb, struct vector_private *vp)
166{
167
168 uint32_t key;
169 struct uml_gre_data *td = vp->transport_data;
170
171 if (!td->ipv6)
172 header += sizeof(struct iphdr) /* fix for ipv4 raw */;
173
174 if (*((uint32_t *) header) != *((uint32_t *) &td->expected_header)) {
175 if (net_ratelimit())
176 netdev_err(vp->dev, "header type disagreement, expecting %0x, got %0x",
177 *((uint32_t *) &td->expected_header),
178 *((uint32_t *) header)
179 );
180 return -1;
181 }
182
183 if (td->key) {
184 key = (*(uint32_t *)(header + td->key_offset));
185 if (key != td->rx_key) {
186 if (net_ratelimit())
187 netdev_err(vp->dev, "unknown key id %0x, expecting %0x",
188 key, td->rx_key);
189 return -1;
190 }
191 }
192 return 0;
193}
194
195static int raw_verify_header(
196 uint8_t *header, struct sk_buff *skb, struct vector_private *vp)
197{
198 struct virtio_net_hdr *vheader = (struct virtio_net_hdr *) header;
199
200 if ((vheader->gso_type != VIRTIO_NET_HDR_GSO_NONE) &&
201 (vp->req_size != 65536)) {
202 if (net_ratelimit())
203 netdev_err(
204 vp->dev,
205 GSO_ERROR
206 );
207 }
208 if ((vheader->flags & VIRTIO_NET_HDR_F_DATA_VALID) > 0)
209 return 1;
210
211 virtio_net_hdr_to_skb(skb, vheader, virtio_legacy_is_little_endian());
212 return 0;
213}
214
215static bool get_uint_param(
216 struct arglist *def, char *param, unsigned int *result)
217{
218 char *arg = uml_vector_fetch_arg(def, param);
219
220 if (arg != NULL) {
221 if (kstrtoint(arg, 0, result) == 0)
222 return true;
223 }
224 return false;
225}
226
227static bool get_ulong_param(
228 struct arglist *def, char *param, unsigned long *result)
229{
230 char *arg = uml_vector_fetch_arg(def, param);
231
232 if (arg != NULL) {
233 if (kstrtoul(arg, 0, result) == 0)
234 return true;
235 return true;
236 }
237 return false;
238}
239
240static int build_gre_transport_data(struct vector_private *vp)
241{
242 struct uml_gre_data *td;
243 int temp_int;
244 int temp_rx;
245 int temp_tx;
246
247 vp->transport_data = kmalloc(sizeof(struct uml_gre_data), GFP_KERNEL);
248 if (vp->transport_data == NULL)
249 return -ENOMEM;
250 td = vp->transport_data;
251 td->sequence = 0;
252
253 td->expected_header.arptype = GRE_IRB;
254 td->expected_header.header = 0;
255
256 vp->form_header = &gre_form_header;
257 vp->verify_header = &gre_verify_header;
258 vp->header_size = 4;
259 td->key_offset = 4;
260 td->sequence_offset = 4;
261 td->checksum_offset = 4;
262
263 td->ipv6 = false;
264 if (get_uint_param(vp->parsed, "v6", &temp_int)) {
265 if (temp_int > 0)
266 td->ipv6 = true;
267 }
268 td->key = false;
269 if (get_uint_param(vp->parsed, "rx_key", &temp_rx)) {
270 if (get_uint_param(vp->parsed, "tx_key", &temp_tx)) {
271 td->key = true;
272 td->expected_header.header |= GRE_MODE_KEY;
273 td->rx_key = cpu_to_be32(temp_rx);
274 td->tx_key = cpu_to_be32(temp_tx);
275 vp->header_size += 4;
276 td->sequence_offset += 4;
277 } else {
278 return -EINVAL;
279 }
280 }
281
282 td->sequence = false;
283 if (get_uint_param(vp->parsed, "sequence", &temp_int)) {
284 if (temp_int > 0) {
285 vp->header_size += 4;
286 td->has_sequence = true;
287 td->expected_header.header |= GRE_MODE_SEQUENCE;
288 if (get_uint_param(
289 vp->parsed, "pin_sequence", &temp_int)) {
290 if (temp_int > 0)
291 td->pin_sequence = true;
292 }
293 }
294 }
295 vp->rx_header_size = vp->header_size;
296 if (!td->ipv6)
297 vp->rx_header_size += sizeof(struct iphdr);
298 return 0;
299}
300
301static int build_l2tpv3_transport_data(struct vector_private *vp)
302{
303
304 struct uml_l2tpv3_data *td;
305 int temp_int, temp_rxs, temp_txs;
306 unsigned long temp_rx;
307 unsigned long temp_tx;
308
309 vp->transport_data = kmalloc(
310 sizeof(struct uml_l2tpv3_data), GFP_KERNEL);
311
312 if (vp->transport_data == NULL)
313 return -ENOMEM;
314
315 td = vp->transport_data;
316
317 vp->form_header = &l2tpv3_form_header;
318 vp->verify_header = &l2tpv3_verify_header;
319 td->counter = 0;
320
321 vp->header_size = 4;
322 td->session_offset = 0;
323 td->cookie_offset = 4;
324 td->counter_offset = 4;
325
326
327 td->ipv6 = false;
328 if (get_uint_param(vp->parsed, "v6", &temp_int)) {
329 if (temp_int > 0)
330 td->ipv6 = true;
331 }
332
333 if (get_uint_param(vp->parsed, "rx_session", &temp_rxs)) {
334 if (get_uint_param(vp->parsed, "tx_session", &temp_txs)) {
335 td->tx_session = cpu_to_be32(temp_txs);
336 td->rx_session = cpu_to_be32(temp_rxs);
337 } else {
338 return -EINVAL;
339 }
340 } else {
341 return -EINVAL;
342 }
343
344 td->cookie_is_64 = false;
345 if (get_uint_param(vp->parsed, "cookie64", &temp_int)) {
346 if (temp_int > 0)
347 td->cookie_is_64 = true;
348 }
349 td->cookie = false;
350 if (get_ulong_param(vp->parsed, "rx_cookie", &temp_rx)) {
351 if (get_ulong_param(vp->parsed, "tx_cookie", &temp_tx)) {
352 td->cookie = true;
353 if (td->cookie_is_64) {
354 td->rx_cookie = cpu_to_be64(temp_rx);
355 td->tx_cookie = cpu_to_be64(temp_tx);
356 vp->header_size += 8;
357 td->counter_offset += 8;
358 } else {
359 td->rx_cookie = cpu_to_be32(temp_rx);
360 td->tx_cookie = cpu_to_be32(temp_tx);
361 vp->header_size += 4;
362 td->counter_offset += 4;
363 }
364 } else {
365 return -EINVAL;
366 }
367 }
368
369 td->has_counter = false;
370 if (get_uint_param(vp->parsed, "counter", &temp_int)) {
371 if (temp_int > 0) {
372 td->has_counter = true;
373 vp->header_size += 4;
374 if (get_uint_param(
375 vp->parsed, "pin_counter", &temp_int)) {
376 if (temp_int > 0)
377 td->pin_counter = true;
378 }
379 }
380 }
381
382 if (get_uint_param(vp->parsed, "udp", &temp_int)) {
383 if (temp_int > 0) {
384 td->udp = true;
385 vp->header_size += 4;
386 td->counter_offset += 4;
387 td->session_offset += 4;
388 td->cookie_offset += 4;
389 }
390 }
391
392 vp->rx_header_size = vp->header_size;
393 if ((!td->ipv6) && (!td->udp))
394 vp->rx_header_size += sizeof(struct iphdr);
395
396 return 0;
397}
398
399static int build_raw_transport_data(struct vector_private *vp)
400{
401 if (uml_raw_enable_vnet_headers(vp->fds->rx_fd)) {
402 if (!uml_raw_enable_vnet_headers(vp->fds->tx_fd))
403 return -1;
404 vp->form_header = &raw_form_header;
405 vp->verify_header = &raw_verify_header;
406 vp->header_size = sizeof(struct virtio_net_hdr);
407 vp->rx_header_size = sizeof(struct virtio_net_hdr);
408 vp->dev->hw_features |= (NETIF_F_TSO | NETIF_F_GRO);
409 vp->dev->features |=
410 (NETIF_F_RXCSUM | NETIF_F_HW_CSUM |
411 NETIF_F_TSO | NETIF_F_GRO);
412 netdev_info(
413 vp->dev,
414 "raw: using vnet headers for tso and tx/rx checksum"
415 );
416 }
417 return 0;
418}
419
420static int build_tap_transport_data(struct vector_private *vp)
421{
422 if (uml_raw_enable_vnet_headers(vp->fds->rx_fd)) {
423 vp->form_header = &raw_form_header;
424 vp->verify_header = &raw_verify_header;
425 vp->header_size = sizeof(struct virtio_net_hdr);
426 vp->rx_header_size = sizeof(struct virtio_net_hdr);
427 vp->dev->hw_features |=
428 (NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO);
429 vp->dev->features |=
430 (NETIF_F_RXCSUM | NETIF_F_HW_CSUM |
431 NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO);
432 netdev_info(
433 vp->dev,
434 "tap/raw: using vnet headers for tso and tx/rx checksum"
435 );
436 } else {
437 return 0; /* do not try to enable tap too if raw failed */
438 }
439 if (uml_tap_enable_vnet_headers(vp->fds->tx_fd))
440 return 0;
441 return -1;
442}
443
444int build_transport_data(struct vector_private *vp)
445{
446 char *transport = uml_vector_fetch_arg(vp->parsed, "transport");
447
448 if (strncmp(transport, TRANS_GRE, TRANS_GRE_LEN) == 0)
449 return build_gre_transport_data(vp);
450 if (strncmp(transport, TRANS_L2TPV3, TRANS_L2TPV3_LEN) == 0)
451 return build_l2tpv3_transport_data(vp);
452 if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
453 return build_raw_transport_data(vp);
454 if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
455 return build_tap_transport_data(vp);
456 return 0;
457}
458
diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c
new file mode 100644
index 000000000000..4d6a78e31089
--- /dev/null
+++ b/arch/um/drivers/vector_user.c
@@ -0,0 +1,590 @@
1/*
2 * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL
4 */
5
6#include <stdio.h>
7#include <unistd.h>
8#include <stdarg.h>
9#include <errno.h>
10#include <stddef.h>
11#include <string.h>
12#include <sys/ioctl.h>
13#include <net/if.h>
14#include <linux/if_tun.h>
15#include <arpa/inet.h>
16#include <sys/types.h>
17#include <sys/stat.h>
18#include <fcntl.h>
19#include <sys/types.h>
20#include <sys/socket.h>
21#include <net/ethernet.h>
22#include <netinet/ip.h>
23#include <netinet/ether.h>
24#include <linux/if_ether.h>
25#include <linux/if_packet.h>
26#include <sys/socket.h>
27#include <sys/wait.h>
28#include <linux/virtio_net.h>
29#include <netdb.h>
30#include <stdlib.h>
31#include <os.h>
32#include <um_malloc.h>
33#include "vector_user.h"
34
35#define ID_GRE 0
36#define ID_L2TPV3 1
37#define ID_MAX 1
38
39#define TOKEN_IFNAME "ifname"
40
41#define TRANS_RAW "raw"
42#define TRANS_RAW_LEN strlen(TRANS_RAW)
43
44#define VNET_HDR_FAIL "could not enable vnet headers on fd %d"
45#define TUN_GET_F_FAIL "tapraw: TUNGETFEATURES failed: %s"
46#define L2TPV3_BIND_FAIL "l2tpv3_open : could not bind socket err=%i"
47#define BPF_ATTACH_FAIL "Failed to attach filter size %d to %d, err %d\n"
48
49/* This is very ugly and brute force lookup, but it is done
50 * only once at initialization so not worth doing hashes or
51 * anything more intelligent
52 */
53
54char *uml_vector_fetch_arg(struct arglist *ifspec, char *token)
55{
56 int i;
57
58 for (i = 0; i < ifspec->numargs; i++) {
59 if (strcmp(ifspec->tokens[i], token) == 0)
60 return ifspec->values[i];
61 }
62 return NULL;
63
64}
65
66struct arglist *uml_parse_vector_ifspec(char *arg)
67{
68 struct arglist *result;
69 int pos, len;
70 bool parsing_token = true, next_starts = true;
71
72 if (arg == NULL)
73 return NULL;
74 result = uml_kmalloc(sizeof(struct arglist), UM_GFP_KERNEL);
75 if (result == NULL)
76 return NULL;
77 result->numargs = 0;
78 len = strlen(arg);
79 for (pos = 0; pos < len; pos++) {
80 if (next_starts) {
81 if (parsing_token) {
82 result->tokens[result->numargs] = arg + pos;
83 } else {
84 result->values[result->numargs] = arg + pos;
85 result->numargs++;
86 }
87 next_starts = false;
88 }
89 if (*(arg + pos) == '=') {
90 if (parsing_token)
91 parsing_token = false;
92 else
93 goto cleanup;
94 next_starts = true;
95 (*(arg + pos)) = '\0';
96 }
97 if (*(arg + pos) == ',') {
98 parsing_token = true;
99 next_starts = true;
100 (*(arg + pos)) = '\0';
101 }
102 }
103 return result;
104cleanup:
105 printk(UM_KERN_ERR "vector_setup - Couldn't parse '%s'\n", arg);
106 kfree(result);
107 return NULL;
108}
109
110/*
111 * Socket/FD configuration functions. These return an structure
112 * of rx and tx descriptors to cover cases where these are not
113 * the same (f.e. read via raw socket and write via tap).
114 */
115
116#define PATH_NET_TUN "/dev/net/tun"
117
118static struct vector_fds *user_init_tap_fds(struct arglist *ifspec)
119{
120 struct ifreq ifr;
121 int fd = -1;
122 struct sockaddr_ll sock;
123 int err = -ENOMEM, offload;
124 char *iface;
125 struct vector_fds *result = NULL;
126
127 iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME);
128 if (iface == NULL) {
129 printk(UM_KERN_ERR "uml_tap: failed to parse interface spec\n");
130 goto tap_cleanup;
131 }
132
133 result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
134 if (result == NULL) {
135 printk(UM_KERN_ERR "uml_tap: failed to allocate file descriptors\n");
136 goto tap_cleanup;
137 }
138 result->rx_fd = -1;
139 result->tx_fd = -1;
140 result->remote_addr = NULL;
141 result->remote_addr_size = 0;
142
143 /* TAP */
144
145 fd = open(PATH_NET_TUN, O_RDWR);
146 if (fd < 0) {
147 printk(UM_KERN_ERR "uml_tap: failed to open tun device\n");
148 goto tap_cleanup;
149 }
150 result->tx_fd = fd;
151 memset(&ifr, 0, sizeof(ifr));
152 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
153 strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1);
154
155 err = ioctl(fd, TUNSETIFF, (void *) &ifr);
156 if (err != 0) {
157 printk(UM_KERN_ERR "uml_tap: failed to select tap interface\n");
158 goto tap_cleanup;
159 }
160
161 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6;
162 ioctl(fd, TUNSETOFFLOAD, offload);
163
164 /* RAW */
165
166 fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
167 if (fd == -1) {
168 printk(UM_KERN_ERR
169 "uml_tap: failed to create socket: %i\n", -errno);
170 goto tap_cleanup;
171 }
172 result->rx_fd = fd;
173 memset(&ifr, 0, sizeof(ifr));
174 strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1);
175 if (ioctl(fd, SIOCGIFINDEX, (void *) &ifr) < 0) {
176 printk(UM_KERN_ERR
177 "uml_tap: failed to set interface: %i\n", -errno);
178 goto tap_cleanup;
179 }
180
181 sock.sll_family = AF_PACKET;
182 sock.sll_protocol = htons(ETH_P_ALL);
183 sock.sll_ifindex = ifr.ifr_ifindex;
184
185 if (bind(fd,
186 (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) {
187 printk(UM_KERN_ERR
188 "user_init_tap: failed to bind raw pair, err %d\n",
189 -errno);
190 goto tap_cleanup;
191 }
192 return result;
193tap_cleanup:
194 printk(UM_KERN_ERR "user_init_tap: init failed, error %d", err);
195 if (result != NULL) {
196 if (result->rx_fd >= 0)
197 os_close_file(result->rx_fd);
198 if (result->tx_fd >= 0)
199 os_close_file(result->tx_fd);
200 kfree(result);
201 }
202 return NULL;
203}
204
205
206static struct vector_fds *user_init_raw_fds(struct arglist *ifspec)
207{
208 struct ifreq ifr;
209 int rxfd = -1, txfd = -1;
210 struct sockaddr_ll sock;
211 int err = -ENOMEM;
212 char *iface;
213 struct vector_fds *result = NULL;
214
215 iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME);
216 if (iface == NULL)
217 goto cleanup;
218
219 rxfd = socket(AF_PACKET, SOCK_RAW, ETH_P_ALL);
220 if (rxfd == -1) {
221 err = -errno;
222 goto cleanup;
223 }
224 txfd = socket(AF_PACKET, SOCK_RAW, 0); /* Turn off RX on this fd */
225 if (txfd == -1) {
226 err = -errno;
227 goto cleanup;
228 }
229 memset(&ifr, 0, sizeof(ifr));
230 strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1);
231 if (ioctl(rxfd, SIOCGIFINDEX, (void *) &ifr) < 0) {
232 err = -errno;
233 goto cleanup;
234 }
235
236 sock.sll_family = AF_PACKET;
237 sock.sll_protocol = htons(ETH_P_ALL);
238 sock.sll_ifindex = ifr.ifr_ifindex;
239
240 if (bind(rxfd,
241 (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) {
242 err = -errno;
243 goto cleanup;
244 }
245
246 sock.sll_family = AF_PACKET;
247 sock.sll_protocol = htons(ETH_P_IP);
248 sock.sll_ifindex = ifr.ifr_ifindex;
249
250 if (bind(txfd,
251 (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) {
252 err = -errno;
253 goto cleanup;
254 }
255
256 result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
257 if (result != NULL) {
258 result->rx_fd = rxfd;
259 result->tx_fd = txfd;
260 result->remote_addr = NULL;
261 result->remote_addr_size = 0;
262 }
263 return result;
264cleanup:
265 printk(UM_KERN_ERR "user_init_raw: init failed, error %d", err);
266 if (rxfd >= 0)
267 os_close_file(rxfd);
268 if (txfd >= 0)
269 os_close_file(txfd);
270 if (result != NULL)
271 kfree(result);
272 return NULL;
273}
274
275
276bool uml_raw_enable_qdisc_bypass(int fd)
277{
278 int optval = 1;
279
280 if (setsockopt(fd,
281 SOL_PACKET, PACKET_QDISC_BYPASS,
282 &optval, sizeof(optval)) != 0) {
283 return false;
284 }
285 return true;
286}
287
288bool uml_raw_enable_vnet_headers(int fd)
289{
290 int optval = 1;
291
292 if (setsockopt(fd,
293 SOL_PACKET, PACKET_VNET_HDR,
294 &optval, sizeof(optval)) != 0) {
295 printk(UM_KERN_INFO VNET_HDR_FAIL, fd);
296 return false;
297 }
298 return true;
299}
300bool uml_tap_enable_vnet_headers(int fd)
301{
302 unsigned int features;
303 int len = sizeof(struct virtio_net_hdr);
304
305 if (ioctl(fd, TUNGETFEATURES, &features) == -1) {
306 printk(UM_KERN_INFO TUN_GET_F_FAIL, strerror(errno));
307 return false;
308 }
309 if ((features & IFF_VNET_HDR) == 0) {
310 printk(UM_KERN_INFO "tapraw: No VNET HEADER support");
311 return false;
312 }
313 ioctl(fd, TUNSETVNETHDRSZ, &len);
314 return true;
315}
316
317static struct vector_fds *user_init_socket_fds(struct arglist *ifspec, int id)
318{
319 int err = -ENOMEM;
320 int fd = -1, gairet;
321 struct addrinfo srchints;
322 struct addrinfo dsthints;
323 bool v6, udp;
324 char *value;
325 char *src, *dst, *srcport, *dstport;
326 struct addrinfo *gairesult = NULL;
327 struct vector_fds *result = NULL;
328
329
330 value = uml_vector_fetch_arg(ifspec, "v6");
331 v6 = false;
332 udp = false;
333 if (value != NULL) {
334 if (strtol((const char *) value, NULL, 10) > 0)
335 v6 = true;
336 }
337
338 value = uml_vector_fetch_arg(ifspec, "udp");
339 if (value != NULL) {
340 if (strtol((const char *) value, NULL, 10) > 0)
341 udp = true;
342 }
343 src = uml_vector_fetch_arg(ifspec, "src");
344 dst = uml_vector_fetch_arg(ifspec, "dst");
345 srcport = uml_vector_fetch_arg(ifspec, "srcport");
346 dstport = uml_vector_fetch_arg(ifspec, "dstport");
347
348 memset(&dsthints, 0, sizeof(dsthints));
349
350 if (v6)
351 dsthints.ai_family = AF_INET6;
352 else
353 dsthints.ai_family = AF_INET;
354
355 switch (id) {
356 case ID_GRE:
357 dsthints.ai_socktype = SOCK_RAW;
358 dsthints.ai_protocol = IPPROTO_GRE;
359 break;
360 case ID_L2TPV3:
361 if (udp) {
362 dsthints.ai_socktype = SOCK_DGRAM;
363 dsthints.ai_protocol = 0;
364 } else {
365 dsthints.ai_socktype = SOCK_RAW;
366 dsthints.ai_protocol = IPPROTO_L2TP;
367 }
368 break;
369 default:
370 printk(KERN_ERR "Unsupported socket type\n");
371 return NULL;
372 }
373 memcpy(&srchints, &dsthints, sizeof(struct addrinfo));
374
375 gairet = getaddrinfo(src, srcport, &dsthints, &gairesult);
376 if ((gairet != 0) || (gairesult == NULL)) {
377 printk(UM_KERN_ERR
378 "socket_open : could not resolve src, error = %s",
379 gai_strerror(gairet)
380 );
381 return NULL;
382 }
383 fd = socket(gairesult->ai_family,
384 gairesult->ai_socktype, gairesult->ai_protocol);
385 if (fd == -1) {
386 printk(UM_KERN_ERR
387 "socket_open : could not open socket, error = %d",
388 -errno
389 );
390 goto cleanup;
391 }
392 if (bind(fd,
393 (struct sockaddr *) gairesult->ai_addr,
394 gairesult->ai_addrlen)) {
395 printk(UM_KERN_ERR L2TPV3_BIND_FAIL, errno);
396 goto cleanup;
397 }
398
399 if (gairesult != NULL)
400 freeaddrinfo(gairesult);
401
402 gairesult = NULL;
403
404 gairet = getaddrinfo(dst, dstport, &dsthints, &gairesult);
405 if ((gairet != 0) || (gairesult == NULL)) {
406 printk(UM_KERN_ERR
407 "socket_open : could not resolve dst, error = %s",
408 gai_strerror(gairet)
409 );
410 return NULL;
411 }
412
413 result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
414 if (result != NULL) {
415 result->rx_fd = fd;
416 result->tx_fd = fd;
417 result->remote_addr = uml_kmalloc(
418 gairesult->ai_addrlen, UM_GFP_KERNEL);
419 if (result->remote_addr == NULL)
420 goto cleanup;
421 result->remote_addr_size = gairesult->ai_addrlen;
422 memcpy(
423 result->remote_addr,
424 gairesult->ai_addr,
425 gairesult->ai_addrlen
426 );
427 }
428 freeaddrinfo(gairesult);
429 return result;
430cleanup:
431 if (gairesult != NULL)
432 freeaddrinfo(gairesult);
433 printk(UM_KERN_ERR "user_init_socket: init failed, error %d", err);
434 if (fd >= 0)
435 os_close_file(fd);
436 if (result != NULL) {
437 if (result->remote_addr != NULL)
438 kfree(result->remote_addr);
439 kfree(result);
440 }
441 return NULL;
442}
443
444struct vector_fds *uml_vector_user_open(
445 int unit,
446 struct arglist *parsed
447)
448{
449 char *transport;
450
451 if (parsed == NULL) {
452 printk(UM_KERN_ERR "no parsed config for unit %d\n", unit);
453 return NULL;
454 }
455 transport = uml_vector_fetch_arg(parsed, "transport");
456 if (transport == NULL) {
457 printk(UM_KERN_ERR "missing transport for unit %d\n", unit);
458 return NULL;
459 }
460 if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
461 return user_init_raw_fds(parsed);
462 if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
463 return user_init_tap_fds(parsed);
464 if (strncmp(transport, TRANS_GRE, TRANS_GRE_LEN) == 0)
465 return user_init_socket_fds(parsed, ID_GRE);
466 if (strncmp(transport, TRANS_L2TPV3, TRANS_L2TPV3_LEN) == 0)
467 return user_init_socket_fds(parsed, ID_L2TPV3);
468 return NULL;
469}
470
471
472int uml_vector_sendmsg(int fd, void *hdr, int flags)
473{
474 int n;
475
476 CATCH_EINTR(n = sendmsg(fd, (struct msghdr *) hdr, flags));
477 if ((n < 0) && (errno == EAGAIN))
478 return 0;
479 if (n >= 0)
480 return n;
481 else
482 return -errno;
483}
484
485int uml_vector_recvmsg(int fd, void *hdr, int flags)
486{
487 int n;
488
489 CATCH_EINTR(n = recvmsg(fd, (struct msghdr *) hdr, flags));
490 if ((n < 0) && (errno == EAGAIN))
491 return 0;
492 if (n >= 0)
493 return n;
494 else
495 return -errno;
496}
497
498int uml_vector_writev(int fd, void *hdr, int iovcount)
499{
500 int n;
501
502 CATCH_EINTR(n = writev(fd, (struct iovec *) hdr, iovcount));
503 if ((n < 0) && (errno == EAGAIN))
504 return 0;
505 if (n >= 0)
506 return n;
507 else
508 return -errno;
509}
510
511int uml_vector_sendmmsg(
512 int fd,
513 void *msgvec,
514 unsigned int vlen,
515 unsigned int flags)
516{
517 int n;
518
519 CATCH_EINTR(n = sendmmsg(fd, (struct mmsghdr *) msgvec, vlen, flags));
520 if ((n < 0) && (errno == EAGAIN))
521 return 0;
522 if (n >= 0)
523 return n;
524 else
525 return -errno;
526}
527
528int uml_vector_recvmmsg(
529 int fd,
530 void *msgvec,
531 unsigned int vlen,
532 unsigned int flags)
533{
534 int n;
535
536 CATCH_EINTR(
537 n = recvmmsg(fd, (struct mmsghdr *) msgvec, vlen, flags, 0));
538 if ((n < 0) && (errno == EAGAIN))
539 return 0;
540 if (n >= 0)
541 return n;
542 else
543 return -errno;
544}
545int uml_vector_attach_bpf(int fd, void *bpf, int bpf_len)
546{
547 int err = setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, bpf, bpf_len);
548
549 if (err < 0)
550 printk(KERN_ERR BPF_ATTACH_FAIL, bpf_len, fd, -errno);
551 return err;
552}
553
554#define DEFAULT_BPF_LEN 6
555
556void *uml_vector_default_bpf(int fd, void *mac)
557{
558 struct sock_filter *bpf;
559 uint32_t *mac1 = (uint32_t *)(mac + 2);
560 uint16_t *mac2 = (uint16_t *) mac;
561 struct sock_fprog bpf_prog = {
562 .len = 6,
563 .filter = NULL,
564 };
565
566 bpf = uml_kmalloc(
567 sizeof(struct sock_filter) * DEFAULT_BPF_LEN, UM_GFP_KERNEL);
568 if (bpf != NULL) {
569 bpf_prog.filter = bpf;
570 /* ld [8] */
571 bpf[0] = (struct sock_filter){ 0x20, 0, 0, 0x00000008 };
572 /* jeq #0xMAC[2-6] jt 2 jf 5*/
573 bpf[1] = (struct sock_filter){ 0x15, 0, 3, ntohl(*mac1)};
574 /* ldh [6] */
575 bpf[2] = (struct sock_filter){ 0x28, 0, 0, 0x00000006 };
576 /* jeq #0xMAC[0-1] jt 4 jf 5 */
577 bpf[3] = (struct sock_filter){ 0x15, 0, 1, ntohs(*mac2)};
578 /* ret #0 */
579 bpf[4] = (struct sock_filter){ 0x6, 0, 0, 0x00000000 };
580 /* ret #0x40000 */
581 bpf[5] = (struct sock_filter){ 0x6, 0, 0, 0x00040000 };
582 if (uml_vector_attach_bpf(
583 fd, &bpf_prog, sizeof(struct sock_fprog)) < 0) {
584 kfree(bpf);
585 bpf = NULL;
586 }
587 }
588 return bpf;
589}
590
diff --git a/arch/um/drivers/vector_user.h b/arch/um/drivers/vector_user.h
new file mode 100644
index 000000000000..d7cbff73b7ff
--- /dev/null
+++ b/arch/um/drivers/vector_user.h
@@ -0,0 +1,100 @@
1/*
2 * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL
4 */
5
6#ifndef __UM_VECTOR_USER_H
7#define __UM_VECTOR_USER_H
8
9#define MAXVARGS 20
10
11#define TOKEN_IFNAME "ifname"
12
13#define TRANS_RAW "raw"
14#define TRANS_RAW_LEN strlen(TRANS_RAW)
15
16#define TRANS_TAP "tap"
17#define TRANS_TAP_LEN strlen(TRANS_TAP)
18
19
20#define TRANS_GRE "gre"
21#define TRANS_GRE_LEN strlen(TRANS_RAW)
22
23#define TRANS_L2TPV3 "l2tpv3"
24#define TRANS_L2TPV3_LEN strlen(TRANS_L2TPV3)
25
26#ifndef IPPROTO_GRE
27#define IPPROTO_GRE 0x2F
28#endif
29
30#define GRE_MODE_CHECKSUM cpu_to_be16(8 << 12) /* checksum */
31#define GRE_MODE_RESERVED cpu_to_be16(4 << 12) /* unused */
32#define GRE_MODE_KEY cpu_to_be16(2 << 12) /* KEY present */
33#define GRE_MODE_SEQUENCE cpu_to_be16(1 << 12) /* sequence */
34
35#define GRE_IRB cpu_to_be16(0x6558)
36
37#define L2TPV3_DATA_PACKET 0x30000
38
39/* IANA-assigned IP protocol ID for L2TPv3 */
40
41#ifndef IPPROTO_L2TP
42#define IPPROTO_L2TP 0x73
43#endif
44
45struct arglist {
46 int numargs;
47 char *tokens[MAXVARGS];
48 char *values[MAXVARGS];
49};
50
51/* Separating read and write FDs allows us to have different
52 * rx and tx method. Example - read tap via raw socket using
53 * recvmmsg, write using legacy tap write calls
54 */
55
56struct vector_fds {
57 int rx_fd;
58 int tx_fd;
59 void *remote_addr;
60 int remote_addr_size;
61};
62
63#define VECTOR_READ 1
64#define VECTOR_WRITE (1 < 1)
65#define VECTOR_HEADERS (1 < 2)
66
67extern struct arglist *uml_parse_vector_ifspec(char *arg);
68
69extern struct vector_fds *uml_vector_user_open(
70 int unit,
71 struct arglist *parsed
72);
73
74extern char *uml_vector_fetch_arg(
75 struct arglist *ifspec,
76 char *token
77);
78
79extern int uml_vector_recvmsg(int fd, void *hdr, int flags);
80extern int uml_vector_sendmsg(int fd, void *hdr, int flags);
81extern int uml_vector_writev(int fd, void *hdr, int iovcount);
82extern int uml_vector_sendmmsg(
83 int fd, void *msgvec,
84 unsigned int vlen,
85 unsigned int flags
86);
87extern int uml_vector_recvmmsg(
88 int fd,
89 void *msgvec,
90 unsigned int vlen,
91 unsigned int flags
92);
93extern void *uml_vector_default_bpf(int fd, void *mac);
94extern int uml_vector_attach_bpf(int fd, void *bpf, int bpf_len);
95extern bool uml_raw_enable_qdisc_bypass(int fd);
96extern bool uml_raw_enable_vnet_headers(int fd);
97extern bool uml_tap_enable_vnet_headers(int fd);
98
99
100#endif
diff --git a/arch/um/include/asm/asm-prototypes.h b/arch/um/include/asm/asm-prototypes.h
new file mode 100644
index 000000000000..5898a26daa0d
--- /dev/null
+++ b/arch/um/include/asm/asm-prototypes.h
@@ -0,0 +1 @@
#include <asm-generic/asm-prototypes.h>
diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h
index b5cdd3f91157..49ed3e35b35a 100644
--- a/arch/um/include/asm/irq.h
+++ b/arch/um/include/asm/irq.h
@@ -18,7 +18,19 @@
18#define XTERM_IRQ 13 18#define XTERM_IRQ 13
19#define RANDOM_IRQ 14 19#define RANDOM_IRQ 14
20 20
21#ifdef CONFIG_UML_NET_VECTOR
22
23#define VECTOR_BASE_IRQ 15
24#define VECTOR_IRQ_SPACE 8
25
26#define LAST_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ)
27
28#else
29
21#define LAST_IRQ RANDOM_IRQ 30#define LAST_IRQ RANDOM_IRQ
31
32#endif
33
22#define NR_IRQS (LAST_IRQ + 1) 34#define NR_IRQS (LAST_IRQ + 1)
23 35
24#endif 36#endif
diff --git a/arch/um/include/shared/irq_user.h b/arch/um/include/shared/irq_user.h
index df5633053957..a7a6120f19d5 100644
--- a/arch/um/include/shared/irq_user.h
+++ b/arch/um/include/shared/irq_user.h
@@ -7,6 +7,7 @@
7#define __IRQ_USER_H__ 7#define __IRQ_USER_H__
8 8
9#include <sysdep/ptrace.h> 9#include <sysdep/ptrace.h>
10#include <stdbool.h>
10 11
11struct irq_fd { 12struct irq_fd {
12 struct irq_fd *next; 13 struct irq_fd *next;
@@ -15,10 +16,17 @@ struct irq_fd {
15 int type; 16 int type;
16 int irq; 17 int irq;
17 int events; 18 int events;
18 int current_events; 19 bool active;
20 bool pending;
21 bool purge;
19}; 22};
20 23
21enum { IRQ_READ, IRQ_WRITE }; 24#define IRQ_READ 0
25#define IRQ_WRITE 1
26#define IRQ_NONE 2
27#define MAX_IRQ_TYPE (IRQ_NONE + 1)
28
29
22 30
23struct siginfo; 31struct siginfo;
24extern void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); 32extern void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
diff --git a/arch/um/include/shared/net_kern.h b/arch/um/include/shared/net_kern.h
index 012ac87d4900..40442b98b173 100644
--- a/arch/um/include/shared/net_kern.h
+++ b/arch/um/include/shared/net_kern.h
@@ -65,5 +65,7 @@ extern int tap_setup_common(char *str, char *type, char **dev_name,
65 char **mac_out, char **gate_addr); 65 char **mac_out, char **gate_addr);
66extern void register_transport(struct transport *new); 66extern void register_transport(struct transport *new);
67extern unsigned short eth_protocol(struct sk_buff *skb); 67extern unsigned short eth_protocol(struct sk_buff *skb);
68extern void uml_net_setup_etheraddr(struct net_device *dev, char *str);
69
68 70
69#endif 71#endif
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index d8ddaf9790d2..048ae37eb5aa 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -290,15 +290,16 @@ extern void halt_skas(void);
290extern void reboot_skas(void); 290extern void reboot_skas(void);
291 291
292/* irq.c */ 292/* irq.c */
293extern int os_waiting_for_events(struct irq_fd *active_fds); 293extern int os_waiting_for_events_epoll(void);
294extern int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds); 294extern void *os_epoll_get_data_pointer(int index);
295extern void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg, 295extern int os_epoll_triggered(int index, int events);
296 struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2); 296extern int os_event_mask(int irq_type);
297extern void os_free_irq_later(struct irq_fd *active_fds, 297extern int os_setup_epoll(void);
298 int irq, void *dev_id); 298extern int os_add_epoll_fd(int events, int fd, void *data);
299extern int os_get_pollfd(int i); 299extern int os_mod_epoll_fd(int events, int fd, void *data);
300extern void os_set_pollfd(int i, int fd); 300extern int os_del_epoll_fd(int fd);
301extern void os_set_ioignore(void); 301extern void os_set_ioignore(void);
302extern void os_close_epoll_fd(void);
302 303
303/* sigio.c */ 304/* sigio.c */
304extern int add_sigio_fd(int fd); 305extern int add_sigio_fd(int fd);
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 23cb9350d47e..6b7f3827d6e4 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -1,4 +1,6 @@
1/* 1/*
2 * Copyright (C) 2017 - Cambridge Greys Ltd
3 * Copyright (C) 2011 - 2014 Cisco Systems Inc
2 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 4 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL 5 * Licensed under the GPL
4 * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c: 6 * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c:
@@ -16,243 +18,362 @@
16#include <as-layout.h> 18#include <as-layout.h>
17#include <kern_util.h> 19#include <kern_util.h>
18#include <os.h> 20#include <os.h>
21#include <irq_user.h>
19 22
20/* 23
21 * This list is accessed under irq_lock, except in sigio_handler, 24/* When epoll triggers we do not know why it did so
22 * where it is safe from being modified. IRQ handlers won't change it - 25 * we can also have different IRQs for read and write.
23 * if an IRQ source has vanished, it will be freed by free_irqs just 26 * This is why we keep a small irq_fd array for each fd -
24 * before returning from sigio_handler. That will process a separate 27 * one entry per IRQ type
25 * list of irqs to free, with its own locking, coming back here to
26 * remove list elements, taking the irq_lock to do so.
27 */ 28 */
28static struct irq_fd *active_fds = NULL;
29static struct irq_fd **last_irq_ptr = &active_fds;
30 29
31extern void free_irqs(void); 30struct irq_entry {
31 struct irq_entry *next;
32 int fd;
33 struct irq_fd *irq_array[MAX_IRQ_TYPE + 1];
34};
35
36static struct irq_entry *active_fds;
37
38static DEFINE_SPINLOCK(irq_lock);
39
40static void irq_io_loop(struct irq_fd *irq, struct uml_pt_regs *regs)
41{
42/*
43 * irq->active guards against reentry
44 * irq->pending accumulates pending requests
45 * if pending is raised the irq_handler is re-run
46 * until pending is cleared
47 */
48 if (irq->active) {
49 irq->active = false;
50 do {
51 irq->pending = false;
52 do_IRQ(irq->irq, regs);
53 } while (irq->pending && (!irq->purge));
54 if (!irq->purge)
55 irq->active = true;
56 } else {
57 irq->pending = true;
58 }
59}
32 60
33void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) 61void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
34{ 62{
35 struct irq_fd *irq_fd; 63 struct irq_entry *irq_entry;
36 int n; 64 struct irq_fd *irq;
65
66 int n, i, j;
37 67
38 while (1) { 68 while (1) {
39 n = os_waiting_for_events(active_fds); 69 /* This is now lockless - epoll keeps back-referencesto the irqs
70 * which have trigger it so there is no need to walk the irq
71 * list and lock it every time. We avoid locking by turning off
72 * IO for a specific fd by executing os_del_epoll_fd(fd) before
73 * we do any changes to the actual data structures
74 */
75 n = os_waiting_for_events_epoll();
76
40 if (n <= 0) { 77 if (n <= 0) {
41 if (n == -EINTR) 78 if (n == -EINTR)
42 continue; 79 continue;
43 else break; 80 else
81 break;
44 } 82 }
45 83
46 for (irq_fd = active_fds; irq_fd != NULL; 84 for (i = 0; i < n ; i++) {
47 irq_fd = irq_fd->next) { 85 /* Epoll back reference is the entry with 3 irq_fd
48 if (irq_fd->current_events != 0) { 86 * leaves - one for each irq type.
49 irq_fd->current_events = 0; 87 */
50 do_IRQ(irq_fd->irq, regs); 88 irq_entry = (struct irq_entry *)
89 os_epoll_get_data_pointer(i);
90 for (j = 0; j < MAX_IRQ_TYPE ; j++) {
91 irq = irq_entry->irq_array[j];
92 if (irq == NULL)
93 continue;
94 if (os_epoll_triggered(i, irq->events) > 0)
95 irq_io_loop(irq, regs);
96 if (irq->purge) {
97 irq_entry->irq_array[j] = NULL;
98 kfree(irq);
99 }
51 } 100 }
52 } 101 }
53 } 102 }
103}
104
105static int assign_epoll_events_to_irq(struct irq_entry *irq_entry)
106{
107 int i;
108 int events = 0;
109 struct irq_fd *irq;
54 110
55 free_irqs(); 111 for (i = 0; i < MAX_IRQ_TYPE ; i++) {
112 irq = irq_entry->irq_array[i];
113 if (irq != NULL)
114 events = irq->events | events;
115 }
116 if (events > 0) {
117 /* os_add_epoll will call os_mod_epoll if this already exists */
118 return os_add_epoll_fd(events, irq_entry->fd, irq_entry);
119 }
120 /* No events - delete */
121 return os_del_epoll_fd(irq_entry->fd);
56} 122}
57 123
58static DEFINE_SPINLOCK(irq_lock); 124
59 125
60static int activate_fd(int irq, int fd, int type, void *dev_id) 126static int activate_fd(int irq, int fd, int type, void *dev_id)
61{ 127{
62 struct pollfd *tmp_pfd; 128 struct irq_fd *new_fd;
63 struct irq_fd *new_fd, *irq_fd; 129 struct irq_entry *irq_entry;
130 int i, err, events;
64 unsigned long flags; 131 unsigned long flags;
65 int events, err, n;
66 132
67 err = os_set_fd_async(fd); 133 err = os_set_fd_async(fd);
68 if (err < 0) 134 if (err < 0)
69 goto out; 135 goto out;
70 136
71 err = -ENOMEM; 137 spin_lock_irqsave(&irq_lock, flags);
72 new_fd = kmalloc(sizeof(struct irq_fd), GFP_KERNEL);
73 if (new_fd == NULL)
74 goto out;
75 138
76 if (type == IRQ_READ) 139 /* Check if we have an entry for this fd */
77 events = UM_POLLIN | UM_POLLPRI;
78 else events = UM_POLLOUT;
79 *new_fd = ((struct irq_fd) { .next = NULL,
80 .id = dev_id,
81 .fd = fd,
82 .type = type,
83 .irq = irq,
84 .events = events,
85 .current_events = 0 } );
86 140
87 err = -EBUSY; 141 err = -EBUSY;
88 spin_lock_irqsave(&irq_lock, flags); 142 for (irq_entry = active_fds;
89 for (irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next) { 143 irq_entry != NULL; irq_entry = irq_entry->next) {
90 if ((irq_fd->fd == fd) && (irq_fd->type == type)) { 144 if (irq_entry->fd == fd)
91 printk(KERN_ERR "Registering fd %d twice\n", fd); 145 break;
92 printk(KERN_ERR "Irqs : %d, %d\n", irq_fd->irq, irq); 146 }
93 printk(KERN_ERR "Ids : 0x%p, 0x%p\n", irq_fd->id, 147
94 dev_id); 148 if (irq_entry == NULL) {
149 /* This needs to be atomic as it may be called from an
150 * IRQ context.
151 */
152 irq_entry = kmalloc(sizeof(struct irq_entry), GFP_ATOMIC);
153 if (irq_entry == NULL) {
154 printk(KERN_ERR
155 "Failed to allocate new IRQ entry\n");
95 goto out_unlock; 156 goto out_unlock;
96 } 157 }
158 irq_entry->fd = fd;
159 for (i = 0; i < MAX_IRQ_TYPE; i++)
160 irq_entry->irq_array[i] = NULL;
161 irq_entry->next = active_fds;
162 active_fds = irq_entry;
97 } 163 }
98 164
99 if (type == IRQ_WRITE) 165 /* Check if we are trying to re-register an interrupt for a
100 fd = -1; 166 * particular fd
101 167 */
102 tmp_pfd = NULL;
103 n = 0;
104 168
105 while (1) { 169 if (irq_entry->irq_array[type] != NULL) {
106 n = os_create_pollfd(fd, events, tmp_pfd, n); 170 printk(KERN_ERR
107 if (n == 0) 171 "Trying to reregister IRQ %d FD %d TYPE %d ID %p\n",
108 break; 172 irq, fd, type, dev_id
173 );
174 goto out_unlock;
175 } else {
176 /* New entry for this fd */
177
178 err = -ENOMEM;
179 new_fd = kmalloc(sizeof(struct irq_fd), GFP_ATOMIC);
180 if (new_fd == NULL)
181 goto out_unlock;
109 182
110 /* 183 events = os_event_mask(type);
111 * n > 0 184
112 * It means we couldn't put new pollfd to current pollfds 185 *new_fd = ((struct irq_fd) {
113 * and tmp_fds is NULL or too small for new pollfds array. 186 .id = dev_id,
114 * Needed size is equal to n as minimum. 187 .irq = irq,
115 * 188 .type = type,
116 * Here we have to drop the lock in order to call 189 .events = events,
117 * kmalloc, which might sleep. 190 .active = true,
118 * If something else came in and changed the pollfds array 191 .pending = false,
119 * so we will not be able to put new pollfd struct to pollfds 192 .purge = false
120 * then we free the buffer tmp_fds and try again. 193 });
194 /* Turn off any IO on this fd - allows us to
195 * avoid locking the IRQ loop
121 */ 196 */
122 spin_unlock_irqrestore(&irq_lock, flags); 197 os_del_epoll_fd(irq_entry->fd);
123 kfree(tmp_pfd); 198 irq_entry->irq_array[type] = new_fd;
124
125 tmp_pfd = kmalloc(n, GFP_KERNEL);
126 if (tmp_pfd == NULL)
127 goto out_kfree;
128
129 spin_lock_irqsave(&irq_lock, flags);
130 } 199 }
131 200
132 *last_irq_ptr = new_fd; 201 /* Turn back IO on with the correct (new) IO event mask */
133 last_irq_ptr = &new_fd->next; 202 assign_epoll_events_to_irq(irq_entry);
134
135 spin_unlock_irqrestore(&irq_lock, flags); 203 spin_unlock_irqrestore(&irq_lock, flags);
136 204 maybe_sigio_broken(fd, (type != IRQ_NONE));
137 /*
138 * This calls activate_fd, so it has to be outside the critical
139 * section.
140 */
141 maybe_sigio_broken(fd, (type == IRQ_READ));
142 205
143 return 0; 206 return 0;
144 207out_unlock:
145 out_unlock:
146 spin_unlock_irqrestore(&irq_lock, flags); 208 spin_unlock_irqrestore(&irq_lock, flags);
147 out_kfree: 209out:
148 kfree(new_fd);
149 out:
150 return err; 210 return err;
151} 211}
152 212
153static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg) 213/*
214 * Walk the IRQ list and dispose of any unused entries.
215 * Should be done under irq_lock.
216 */
217
218static void garbage_collect_irq_entries(void)
154{ 219{
155 unsigned long flags; 220 int i;
221 bool reap;
222 struct irq_entry *walk;
223 struct irq_entry *previous = NULL;
224 struct irq_entry *to_free;
156 225
157 spin_lock_irqsave(&irq_lock, flags); 226 if (active_fds == NULL)
158 os_free_irq_by_cb(test, arg, active_fds, &last_irq_ptr); 227 return;
159 spin_unlock_irqrestore(&irq_lock, flags); 228 walk = active_fds;
229 while (walk != NULL) {
230 reap = true;
231 for (i = 0; i < MAX_IRQ_TYPE ; i++) {
232 if (walk->irq_array[i] != NULL) {
233 reap = false;
234 break;
235 }
236 }
237 if (reap) {
238 if (previous == NULL)
239 active_fds = walk->next;
240 else
241 previous->next = walk->next;
242 to_free = walk;
243 } else {
244 to_free = NULL;
245 }
246 walk = walk->next;
247 if (to_free != NULL)
248 kfree(to_free);
249 }
160} 250}
161 251
162struct irq_and_dev { 252/*
163 int irq; 253 * Walk the IRQ list and get the descriptor for our FD
164 void *dev; 254 */
165};
166 255
167static int same_irq_and_dev(struct irq_fd *irq, void *d) 256static struct irq_entry *get_irq_entry_by_fd(int fd)
168{ 257{
169 struct irq_and_dev *data = d; 258 struct irq_entry *walk = active_fds;
170 259
171 return ((irq->irq == data->irq) && (irq->id == data->dev)); 260 while (walk != NULL) {
261 if (walk->fd == fd)
262 return walk;
263 walk = walk->next;
264 }
265 return NULL;
172} 266}
173 267
174static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
175{
176 struct irq_and_dev data = ((struct irq_and_dev) { .irq = irq,
177 .dev = dev });
178 268
179 free_irq_by_cb(same_irq_and_dev, &data); 269/*
180} 270 * Walk the IRQ list and dispose of an entry for a specific
271 * device, fd and number. Note - if sharing an IRQ for read
272 * and writefor the same FD it will be disposed in either case.
273 * If this behaviour is undesirable use different IRQ ids.
274 */
181 275
182static int same_fd(struct irq_fd *irq, void *fd) 276#define IGNORE_IRQ 1
183{ 277#define IGNORE_DEV (1<<1)
184 return (irq->fd == *((int *)fd));
185}
186 278
187void free_irq_by_fd(int fd) 279static void do_free_by_irq_and_dev(
280 struct irq_entry *irq_entry,
281 unsigned int irq,
282 void *dev,
283 int flags
284)
188{ 285{
189 free_irq_by_cb(same_fd, &fd); 286 int i;
287 struct irq_fd *to_free;
288
289 for (i = 0; i < MAX_IRQ_TYPE ; i++) {
290 if (irq_entry->irq_array[i] != NULL) {
291 if (
292 ((flags & IGNORE_IRQ) ||
293 (irq_entry->irq_array[i]->irq == irq)) &&
294 ((flags & IGNORE_DEV) ||
295 (irq_entry->irq_array[i]->id == dev))
296 ) {
297 /* Turn off any IO on this fd - allows us to
298 * avoid locking the IRQ loop
299 */
300 os_del_epoll_fd(irq_entry->fd);
301 to_free = irq_entry->irq_array[i];
302 irq_entry->irq_array[i] = NULL;
303 assign_epoll_events_to_irq(irq_entry);
304 if (to_free->active)
305 to_free->purge = true;
306 else
307 kfree(to_free);
308 }
309 }
310 }
190} 311}
191 312
192/* Must be called with irq_lock held */ 313void free_irq_by_fd(int fd)
193static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out)
194{ 314{
195 struct irq_fd *irq; 315 struct irq_entry *to_free;
196 int i = 0; 316 unsigned long flags;
197 int fdi;
198 317
199 for (irq = active_fds; irq != NULL; irq = irq->next) { 318 spin_lock_irqsave(&irq_lock, flags);
200 if ((irq->fd == fd) && (irq->irq == irqnum)) 319 to_free = get_irq_entry_by_fd(fd);
201 break; 320 if (to_free != NULL) {
202 i++; 321 do_free_by_irq_and_dev(
203 } 322 to_free,
204 if (irq == NULL) { 323 -1,
205 printk(KERN_ERR "find_irq_by_fd doesn't have descriptor %d\n", 324 NULL,
206 fd); 325 IGNORE_IRQ | IGNORE_DEV
207 goto out; 326 );
208 }
209 fdi = os_get_pollfd(i);
210 if ((fdi != -1) && (fdi != fd)) {
211 printk(KERN_ERR "find_irq_by_fd - mismatch between active_fds "
212 "and pollfds, fd %d vs %d, need %d\n", irq->fd,
213 fdi, fd);
214 irq = NULL;
215 goto out;
216 } 327 }
217 *index_out = i; 328 garbage_collect_irq_entries();
218 out: 329 spin_unlock_irqrestore(&irq_lock, flags);
219 return irq;
220} 330}
331EXPORT_SYMBOL(free_irq_by_fd);
221 332
222void reactivate_fd(int fd, int irqnum) 333static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
223{ 334{
224 struct irq_fd *irq; 335 struct irq_entry *to_free;
225 unsigned long flags; 336 unsigned long flags;
226 int i;
227 337
228 spin_lock_irqsave(&irq_lock, flags); 338 spin_lock_irqsave(&irq_lock, flags);
229 irq = find_irq_by_fd(fd, irqnum, &i); 339 to_free = active_fds;
230 if (irq == NULL) { 340 while (to_free != NULL) {
231 spin_unlock_irqrestore(&irq_lock, flags); 341 do_free_by_irq_and_dev(
232 return; 342 to_free,
343 irq,
344 dev,
345 0
346 );
347 to_free = to_free->next;
233 } 348 }
234 os_set_pollfd(i, irq->fd); 349 garbage_collect_irq_entries();
235 spin_unlock_irqrestore(&irq_lock, flags); 350 spin_unlock_irqrestore(&irq_lock, flags);
351}
236 352
237 add_sigio_fd(fd); 353
354void reactivate_fd(int fd, int irqnum)
355{
356 /** NOP - we do auto-EOI now **/
238} 357}
239 358
240void deactivate_fd(int fd, int irqnum) 359void deactivate_fd(int fd, int irqnum)
241{ 360{
242 struct irq_fd *irq; 361 struct irq_entry *to_free;
243 unsigned long flags; 362 unsigned long flags;
244 int i;
245 363
364 os_del_epoll_fd(fd);
246 spin_lock_irqsave(&irq_lock, flags); 365 spin_lock_irqsave(&irq_lock, flags);
247 irq = find_irq_by_fd(fd, irqnum, &i); 366 to_free = get_irq_entry_by_fd(fd);
248 if (irq == NULL) { 367 if (to_free != NULL) {
249 spin_unlock_irqrestore(&irq_lock, flags); 368 do_free_by_irq_and_dev(
250 return; 369 to_free,
370 irqnum,
371 NULL,
372 IGNORE_DEV
373 );
251 } 374 }
252 375 garbage_collect_irq_entries();
253 os_set_pollfd(i, -1);
254 spin_unlock_irqrestore(&irq_lock, flags); 376 spin_unlock_irqrestore(&irq_lock, flags);
255
256 ignore_sigio_fd(fd); 377 ignore_sigio_fd(fd);
257} 378}
258EXPORT_SYMBOL(deactivate_fd); 379EXPORT_SYMBOL(deactivate_fd);
@@ -265,17 +386,28 @@ EXPORT_SYMBOL(deactivate_fd);
265 */ 386 */
266int deactivate_all_fds(void) 387int deactivate_all_fds(void)
267{ 388{
268 struct irq_fd *irq; 389 unsigned long flags;
269 int err; 390 struct irq_entry *to_free;
270 391
271 for (irq = active_fds; irq != NULL; irq = irq->next) { 392 spin_lock_irqsave(&irq_lock, flags);
272 err = os_clear_fd_async(irq->fd); 393 /* Stop IO. The IRQ loop has no lock so this is our
273 if (err) 394 * only way of making sure we are safe to dispose
274 return err; 395 * of all IRQ handlers
275 } 396 */
276 /* If there is a signal already queued, after unblocking ignore it */
277 os_set_ioignore(); 397 os_set_ioignore();
278 398 to_free = active_fds;
399 while (to_free != NULL) {
400 do_free_by_irq_and_dev(
401 to_free,
402 -1,
403 NULL,
404 IGNORE_IRQ | IGNORE_DEV
405 );
406 to_free = to_free->next;
407 }
408 garbage_collect_irq_entries();
409 spin_unlock_irqrestore(&irq_lock, flags);
410 os_close_epoll_fd();
279 return 0; 411 return 0;
280} 412}
281 413
@@ -353,8 +485,11 @@ void __init init_IRQ(void)
353 485
354 irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, handle_edge_irq); 486 irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, handle_edge_irq);
355 487
488
356 for (i = 1; i < NR_IRQS; i++) 489 for (i = 1; i < NR_IRQS; i++)
357 irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq); 490 irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
491 /* Initialize EPOLL Loop */
492 os_setup_epoll();
358} 493}
359 494
360/* 495/*
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 7f69d17de354..052de4c8acb2 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -121,12 +121,12 @@ static void __init um_timer_setup(void)
121 clockevents_register_device(&timer_clockevent); 121 clockevents_register_device(&timer_clockevent);
122} 122}
123 123
124void read_persistent_clock(struct timespec *ts) 124void read_persistent_clock64(struct timespec64 *ts)
125{ 125{
126 long long nsecs = os_persistent_clock_emulation(); 126 long long nsecs = os_persistent_clock_emulation();
127 127
128 set_normalized_timespec(ts, nsecs / NSEC_PER_SEC, 128 set_normalized_timespec64(ts, nsecs / NSEC_PER_SEC,
129 nsecs % NSEC_PER_SEC); 129 nsecs % NSEC_PER_SEC);
130} 130}
131 131
132void __init time_init(void) 132void __init time_init(void)
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index 2db18cbbb0ea..c0197097c86e 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -12,6 +12,7 @@
12#include <sys/mount.h> 12#include <sys/mount.h>
13#include <sys/socket.h> 13#include <sys/socket.h>
14#include <sys/stat.h> 14#include <sys/stat.h>
15#include <sys/sysmacros.h>
15#include <sys/un.h> 16#include <sys/un.h>
16#include <sys/types.h> 17#include <sys/types.h>
17#include <os.h> 18#include <os.h>
diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c
index b9afb74b79ad..365823010346 100644
--- a/arch/um/os-Linux/irq.c
+++ b/arch/um/os-Linux/irq.c
@@ -1,135 +1,147 @@
1/* 1/*
2 * Copyright (C) 2017 - Cambridge Greys Ltd
3 * Copyright (C) 2011 - 2014 Cisco Systems Inc
2 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 4 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL 5 * Licensed under the GPL
4 */ 6 */
5 7
6#include <stdlib.h> 8#include <stdlib.h>
7#include <errno.h> 9#include <errno.h>
8#include <poll.h> 10#include <sys/epoll.h>
9#include <signal.h> 11#include <signal.h>
10#include <string.h> 12#include <string.h>
11#include <irq_user.h> 13#include <irq_user.h>
12#include <os.h> 14#include <os.h>
13#include <um_malloc.h> 15#include <um_malloc.h>
14 16
17/* Epoll support */
18
19static int epollfd = -1;
20
21#define MAX_EPOLL_EVENTS 64
22
23static struct epoll_event epoll_events[MAX_EPOLL_EVENTS];
24
25/* Helper to return an Epoll data pointer from an epoll event structure.
26 * We need to keep this one on the userspace side to keep includes separate
27 */
28
29void *os_epoll_get_data_pointer(int index)
30{
31 return epoll_events[index].data.ptr;
32}
33
34/* Helper to compare events versus the events in the epoll structure.
35 * Same as above - needs to be on the userspace side
36 */
37
38
39int os_epoll_triggered(int index, int events)
40{
41 return epoll_events[index].events & events;
42}
43/* Helper to set the event mask.
44 * The event mask is opaque to the kernel side, because it does not have
45 * access to the right includes/defines for EPOLL constants.
46 */
47
48int os_event_mask(int irq_type)
49{
50 if (irq_type == IRQ_READ)
51 return EPOLLIN | EPOLLPRI;
52 if (irq_type == IRQ_WRITE)
53 return EPOLLOUT;
54 return 0;
55}
56
15/* 57/*
16 * Locked by irq_lock in arch/um/kernel/irq.c. Changed by os_create_pollfd 58 * Initial Epoll Setup
17 * and os_free_irq_by_cb, which are called under irq_lock.
18 */ 59 */
19static struct pollfd *pollfds = NULL; 60int os_setup_epoll(void)
20static int pollfds_num = 0; 61{
21static int pollfds_size = 0; 62 epollfd = epoll_create(MAX_EPOLL_EVENTS);
63 return epollfd;
64}
22 65
23int os_waiting_for_events(struct irq_fd *active_fds) 66/*
67 * Helper to run the actual epoll_wait
68 */
69int os_waiting_for_events_epoll(void)
24{ 70{
25 struct irq_fd *irq_fd; 71 int n, err;
26 int i, n, err;
27 72
28 n = poll(pollfds, pollfds_num, 0); 73 n = epoll_wait(epollfd,
74 (struct epoll_event *) &epoll_events, MAX_EPOLL_EVENTS, 0);
29 if (n < 0) { 75 if (n < 0) {
30 err = -errno; 76 err = -errno;
31 if (errno != EINTR) 77 if (errno != EINTR)
32 printk(UM_KERN_ERR "os_waiting_for_events:" 78 printk(
33 " poll returned %d, errno = %d\n", n, errno); 79 UM_KERN_ERR "os_waiting_for_events:"
80 " epoll returned %d, error = %s\n", n,
81 strerror(errno)
82 );
34 return err; 83 return err;
35 } 84 }
36
37 if (n == 0)
38 return 0;
39
40 irq_fd = active_fds;
41
42 for (i = 0; i < pollfds_num; i++) {
43 if (pollfds[i].revents != 0) {
44 irq_fd->current_events = pollfds[i].revents;
45 pollfds[i].fd = -1;
46 }
47 irq_fd = irq_fd->next;
48 }
49 return n; 85 return n;
50} 86}
51 87
52int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds)
53{
54 if (pollfds_num == pollfds_size) {
55 if (size_tmpfds <= pollfds_size * sizeof(pollfds[0])) {
56 /* return min size needed for new pollfds area */
57 return (pollfds_size + 1) * sizeof(pollfds[0]);
58 }
59
60 if (pollfds != NULL) {
61 memcpy(tmp_pfd, pollfds,
62 sizeof(pollfds[0]) * pollfds_size);
63 /* remove old pollfds */
64 kfree(pollfds);
65 }
66 pollfds = tmp_pfd;
67 pollfds_size++;
68 } else
69 kfree(tmp_pfd); /* remove not used tmp_pfd */
70
71 pollfds[pollfds_num] = ((struct pollfd) { .fd = fd,
72 .events = events,
73 .revents = 0 });
74 pollfds_num++;
75
76 return 0;
77}
78 88
79void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg, 89/*
80 struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2) 90 * Helper to add a fd to epoll
91 */
92int os_add_epoll_fd(int events, int fd, void *data)
81{ 93{
82 struct irq_fd **prev; 94 struct epoll_event event;
83 int i = 0; 95 int result;
84 96
85 prev = &active_fds; 97 event.data.ptr = data;
86 while (*prev != NULL) { 98 event.events = events | EPOLLET;
87 if ((*test)(*prev, arg)) { 99 result = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event);
88 struct irq_fd *old_fd = *prev; 100 if ((result) && (errno == EEXIST))
89 if ((pollfds[i].fd != -1) && 101 result = os_mod_epoll_fd(events, fd, data);
90 (pollfds[i].fd != (*prev)->fd)) { 102 if (result)
91 printk(UM_KERN_ERR "os_free_irq_by_cb - " 103 printk("epollctl add err fd %d, %s\n", fd, strerror(errno));
92 "mismatch between active_fds and " 104 return result;
93 "pollfds, fd %d vs %d\n",
94 (*prev)->fd, pollfds[i].fd);
95 goto out;
96 }
97
98 pollfds_num--;
99
100 /*
101 * This moves the *whole* array after pollfds[i]
102 * (though it doesn't spot as such)!
103 */
104 memmove(&pollfds[i], &pollfds[i + 1],
105 (pollfds_num - i) * sizeof(pollfds[0]));
106 if (*last_irq_ptr2 == &old_fd->next)
107 *last_irq_ptr2 = prev;
108
109 *prev = (*prev)->next;
110 if (old_fd->type == IRQ_WRITE)
111 ignore_sigio_fd(old_fd->fd);
112 kfree(old_fd);
113 continue;
114 }
115 prev = &(*prev)->next;
116 i++;
117 }
118 out:
119 return;
120} 105}
121 106
122int os_get_pollfd(int i) 107/*
108 * Helper to mod the fd event mask and/or data backreference
109 */
110int os_mod_epoll_fd(int events, int fd, void *data)
123{ 111{
124 return pollfds[i].fd; 112 struct epoll_event event;
113 int result;
114
115 event.data.ptr = data;
116 event.events = events;
117 result = epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &event);
118 if (result)
119 printk(UM_KERN_ERR
120 "epollctl mod err fd %d, %s\n", fd, strerror(errno));
121 return result;
125} 122}
126 123
127void os_set_pollfd(int i, int fd) 124/*
125 * Helper to delete the epoll fd
126 */
127int os_del_epoll_fd(int fd)
128{ 128{
129 pollfds[i].fd = fd; 129 struct epoll_event event;
130 int result;
131 /* This is quiet as we use this as IO ON/OFF - so it is often
132 * invoked on a non-existent fd
133 */
134 result = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, &event);
135 return result;
130} 136}
131 137
132void os_set_ioignore(void) 138void os_set_ioignore(void)
133{ 139{
134 signal(SIGIO, SIG_IGN); 140 signal(SIGIO, SIG_IGN);
135} 141}
142
143void os_close_epoll_fd(void)
144{
145 /* Needed so we do not leak an fd when rebooting */
146 os_close_file(epollfd);
147}
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index a86d7cc2c2d8..bf0acb8aad8b 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -16,6 +16,7 @@
16#include <os.h> 16#include <os.h>
17#include <sysdep/mcontext.h> 17#include <sysdep/mcontext.h>
18#include <um_malloc.h> 18#include <um_malloc.h>
19#include <sys/ucontext.h>
19 20
20void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { 21void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = {
21 [SIGTRAP] = relay_signal, 22 [SIGTRAP] = relay_signal,
@@ -159,7 +160,7 @@ static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = {
159 160
160static void hard_handler(int sig, siginfo_t *si, void *p) 161static void hard_handler(int sig, siginfo_t *si, void *p)
161{ 162{
162 struct ucontext *uc = p; 163 ucontext_t *uc = p;
163 mcontext_t *mc = &uc->uc_mcontext; 164 mcontext_t *mc = &uc->uc_mcontext;
164 unsigned long pending = 1UL << sig; 165 unsigned long pending = 1UL << sig;
165 166