aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation/mic
diff options
context:
space:
mode:
authorCaz Yokoyama <Caz.Yokoyama@intel.com>2013-09-05 19:42:39 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2013-09-26 16:52:24 -0400
commit8d49751580db804a02caf6a5b7cebe2ff26c0d7e (patch)
tree69c8a70bbbed2d4b56efe7be87a44b62286d4c14 /Documentation/mic
parent2141c7c5ee677014023cb50c793f91e85f44d2ea (diff)
Sample Implementation of Intel MIC User Space Daemon.
This patch introduces a sample user space daemon which implements the virtio device backends on the host. The daemon creates/removes/configures virtio device backends by communicating with the Intel MIC Host Driver. The virtio devices currently supported are virtio net, virtio console and virtio block. Virtio net supports TSO/GSO. The daemon also monitors card shutdown status and takes appropriate actions like killing the virtio backends and resetting the card upon card shutdown and crashes. Co-author: Ashutosh Dixit <ashutosh.dixit@intel.com> Co-author: Sudeep Dutt <sudeep.dutt@intel.com> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com> Signed-off-by: Caz Yokoyama <Caz.Yokoyama@intel.com> Signed-off-by: Dasaratharaman Chandramouli <dasaratharaman.chandramouli@intel.com> Signed-off-by: Nikhil Rao <nikhil.rao@intel.com> Signed-off-by: Harshavardhan R Kharche <harshavardhan.r.kharche@intel.com> Signed-off-by: Sudeep Dutt <sudeep.dutt@intel.com> Acked-by: Yaozu (Eddie) Dong <eddie.dong@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'Documentation/mic')
-rw-r--r--Documentation/mic/mic_overview.txt49
-rw-r--r--Documentation/mic/mpssd/.gitignore1
-rw-r--r--Documentation/mic/mpssd/Makefile19
-rwxr-xr-xDocumentation/mic/mpssd/micctrl173
-rwxr-xr-xDocumentation/mic/mpssd/mpss202
-rw-r--r--Documentation/mic/mpssd/mpssd.c1701
-rw-r--r--Documentation/mic/mpssd/mpssd.h100
-rw-r--r--Documentation/mic/mpssd/sysfs.c102
8 files changed, 2347 insertions, 0 deletions
diff --git a/Documentation/mic/mic_overview.txt b/Documentation/mic/mic_overview.txt
new file mode 100644
index 000000000000..c4424ed1b746
--- /dev/null
+++ b/Documentation/mic/mic_overview.txt
@@ -0,0 +1,49 @@
1An Intel MIC X100 device is a PCIe form factor add-in coprocessor
2card based on the Intel Many Integrated Core (MIC) architecture
3that runs a Linux OS. It is a PCIe endpoint in a platform and therefore
4implements the three required standard address spaces i.e. configuration,
5memory and I/O. The host OS loads a device driver as is typical for
6PCIe devices. The card itself runs a bootstrap after reset that
7transfers control to the card OS downloaded from the host driver.
8The card OS as shipped by Intel is a Linux kernel with modifications
9for the X100 devices.
10
11Since it is a PCIe card, it does not have the ability to host hardware
12devices for networking, storage and console. We provide these devices
13on X100 coprocessors thus enabling a self-bootable equivalent environment
14for applications. A key benefit of our solution is that it leverages
15the standard virtio framework for network, disk and console devices,
16though in our case the virtio framework is used across a PCIe bus.
17
18Here is a block diagram of the various components described above. The
19virtio backends are situated on the host rather than the card given better
20single threaded performance for the host compared to MIC, the ability of
21the host to initiate DMA's to/from the card using the MIC DMA engine and
22the fact that the virtio block storage backend can only be on the host.
23
24 |
25 +----------+ | +----------+
26 | Card OS | | | Host OS |
27 +----------+ | +----------+
28 |
29+-------+ +--------+ +------+ | +---------+ +--------+ +--------+
30| Virtio| |Virtio | |Virtio| | |Virtio | |Virtio | |Virtio |
31| Net | |Console | |Block | | |Net | |Console | |Block |
32| Driver| |Driver | |Driver| | |backend | |backend | |backend |
33+-------+ +--------+ +------+ | +---------+ +--------+ +--------+
34 | | | | | | |
35 | | | |User | | |
36 | | | |------|------------|---------|-------
37 +-------------------+ |Kernel +--------------------------+
38 | | | Virtio over PCIe IOCTLs |
39 | | +--------------------------+
40 +--------------+ | |
41 |Intel MIC | | +---------------+
42 |Card Driver | | |Intel MIC |
43 +--------------+ | |Host Driver |
44 | | +---------------+
45 | | |
46 +-------------------------------------------------------------+
47 | |
48 | PCIe Bus |
49 +-------------------------------------------------------------+
diff --git a/Documentation/mic/mpssd/.gitignore b/Documentation/mic/mpssd/.gitignore
new file mode 100644
index 000000000000..8b7c72f07c92
--- /dev/null
+++ b/Documentation/mic/mpssd/.gitignore
@@ -0,0 +1 @@
mpssd
diff --git a/Documentation/mic/mpssd/Makefile b/Documentation/mic/mpssd/Makefile
new file mode 100644
index 000000000000..eb860a7d152e
--- /dev/null
+++ b/Documentation/mic/mpssd/Makefile
@@ -0,0 +1,19 @@
1#
2# Makefile - Intel MIC User Space Tools.
3# Copyright(c) 2013, Intel Corporation.
4#
5ifdef DEBUG
6CFLAGS += $(USERWARNFLAGS) -I. -g -Wall -DDEBUG=$(DEBUG)
7else
8CFLAGS += $(USERWARNFLAGS) -I. -g -Wall
9endif
10
11mpssd: mpssd.o sysfs.o
12 $(CC) $(CFLAGS) -o $@ $^ -lpthread
13
14install:
15 install mpssd /usr/sbin/mpssd
16 install micctrl /usr/sbin/micctrl
17
18clean:
19 rm -f mpssd *.o
diff --git a/Documentation/mic/mpssd/micctrl b/Documentation/mic/mpssd/micctrl
new file mode 100755
index 000000000000..8f2629b41c5f
--- /dev/null
+++ b/Documentation/mic/mpssd/micctrl
@@ -0,0 +1,173 @@
1#!/bin/bash
2# Intel MIC Platform Software Stack (MPSS)
3#
4# Copyright(c) 2013 Intel Corporation.
5#
6# This program is free software; you can redistribute it and/or modify
7# it under the terms of the GNU General Public License, version 2, as
8# published by the Free Software Foundation.
9#
10# This program is distributed in the hope that it will be useful, but
11# WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13# General Public License for more details.
14#
15# The full GNU General Public License is included in this distribution in
16# the file called "COPYING".
17#
18# Intel MIC User Space Tools.
19#
20# micctrl - Controls MIC boot/start/stop.
21#
22# chkconfig: 2345 95 05
23# description: start MPSS stack processing.
24#
25### BEGIN INIT INFO
26# Provides: micctrl
27### END INIT INFO
28
29# Source function library.
30. /etc/init.d/functions
31
32sysfs="/sys/class/mic"
33
34_status()
35{
36 f=$sysfs/$1
37 echo -e $1 state: "`cat $f/state`" shutdown_status: "`cat $f/shutdown_status`"
38}
39
40status()
41{
42 if [ "`echo $1 | head -c3`" == "mic" ]; then
43 _status $1
44 return $?
45 fi
46 for f in $sysfs/*
47 do
48 _status `basename $f`
49 RETVAL=$?
50 [ $RETVAL -ne 0 ] && return $RETVAL
51 done
52 return 0
53}
54
55_reset()
56{
57 f=$sysfs/$1
58 echo reset > $f/state
59}
60
61reset()
62{
63 if [ "`echo $1 | head -c3`" == "mic" ]; then
64 _reset $1
65 return $?
66 fi
67 for f in $sysfs/*
68 do
69 _reset `basename $f`
70 RETVAL=$?
71 [ $RETVAL -ne 0 ] && return $RETVAL
72 done
73 return 0
74}
75
76_boot()
77{
78 f=$sysfs/$1
79 echo "linux" > $f/bootmode
80 echo "mic/uos.img" > $f/firmware
81 echo "mic/$1.image" > $f/ramdisk
82 echo "boot" > $f/state
83}
84
85boot()
86{
87 if [ "`echo $1 | head -c3`" == "mic" ]; then
88 _boot $1
89 return $?
90 fi
91 for f in $sysfs/*
92 do
93 _boot `basename $f`
94 RETVAL=$?
95 [ $RETVAL -ne 0 ] && return $RETVAL
96 done
97 return 0
98}
99
100_shutdown()
101{
102 f=$sysfs/$1
103 echo shutdown > $f/state
104}
105
106shutdown()
107{
108 if [ "`echo $1 | head -c3`" == "mic" ]; then
109 _shutdown $1
110 return $?
111 fi
112 for f in $sysfs/*
113 do
114 _shutdown `basename $f`
115 RETVAL=$?
116 [ $RETVAL -ne 0 ] && return $RETVAL
117 done
118 return 0
119}
120
121_wait()
122{
123 f=$sysfs/$1
124 while [ "`cat $f/state`" != "offline" -a "`cat $f/state`" != "online" ]
125 do
126 sleep 1
127 echo -e "Waiting for $1 to go offline"
128 done
129}
130
131wait()
132{
133 if [ "`echo $1 | head -c3`" == "mic" ]; then
134 _wait $1
135 return $?
136 fi
137 # Wait for the cards to go offline
138 for f in $sysfs/*
139 do
140 _wait `basename $f`
141 RETVAL=$?
142 [ $RETVAL -ne 0 ] && return $RETVAL
143 done
144 return 0
145}
146
147if [ ! -d "$sysfs" ]; then
148 echo -e $"Module unloaded "
149 exit 3
150fi
151
152case $1 in
153 -s)
154 status $2
155 ;;
156 -r)
157 reset $2
158 ;;
159 -b)
160 boot $2
161 ;;
162 -S)
163 shutdown $2
164 ;;
165 -w)
166 wait $2
167 ;;
168 *)
169 echo $"Usage: $0 {-s (status) |-r (reset) |-b (boot) |-S (shutdown) |-w (wait)}"
170 exit 2
171esac
172
173exit $?
diff --git a/Documentation/mic/mpssd/mpss b/Documentation/mic/mpssd/mpss
new file mode 100755
index 000000000000..3136c68dad0b
--- /dev/null
+++ b/Documentation/mic/mpssd/mpss
@@ -0,0 +1,202 @@
1#!/bin/bash
2# Intel MIC Platform Software Stack (MPSS)
3#
4# Copyright(c) 2013 Intel Corporation.
5#
6# This program is free software; you can redistribute it and/or modify
7# it under the terms of the GNU General Public License, version 2, as
8# published by the Free Software Foundation.
9#
10# This program is distributed in the hope that it will be useful, but
11# WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13# General Public License for more details.
14#
15# The full GNU General Public License is included in this distribution in
16# the file called "COPYING".
17#
18# Intel MIC User Space Tools.
19#
20# mpss Start mpssd.
21#
22# chkconfig: 2345 95 05
23# description: start MPSS stack processing.
24#
25### BEGIN INIT INFO
26# Provides: mpss
27# Required-Start:
28# Required-Stop:
29# Short-Description: MPSS stack control
30# Description: MPSS stack control
31### END INIT INFO
32
33# Source function library.
34. /etc/init.d/functions
35
36exec=/usr/sbin/mpssd
37sysfs="/sys/class/mic"
38
39start()
40{
41 [ -x $exec ] || exit 5
42
43 if [ "`ps -e | awk '{print $4}' | grep mpssd | head -1`" = "mpssd" ]; then
44 echo -e $"MPSSD already running! "
45 success
46 echo
47 return 0
48 fi
49
50 echo -e $"Starting MPSS Stack"
51 echo -e $"Loading MIC_HOST Module"
52
53 # Ensure the driver is loaded
54 if [ ! -d "$sysfs" ]; then
55 modprobe mic_host
56 RETVAL=$?
57 if [ $RETVAL -ne 0 ]; then
58 failure
59 echo
60 return $RETVAL
61 fi
62 fi
63
64 # Start the daemon
65 echo -n $"Starting MPSSD "
66 $exec
67 RETVAL=$?
68 if [ $RETVAL -ne 0 ]; then
69 failure
70 echo
71 return $RETVAL
72 fi
73 success
74 echo
75
76 sleep 5
77
78 # Boot the cards
79 micctrl -b
80
81 # Wait till ping works
82 for f in $sysfs/*
83 do
84 count=100
85 ipaddr=`cat $f/cmdline`
86 ipaddr=${ipaddr#*address,}
87 ipaddr=`echo $ipaddr | cut -d, -f1 | cut -d\; -f1`
88 while [ $count -ge 0 ]
89 do
90 echo -e "Pinging "`basename $f`" "
91 ping -c 1 $ipaddr &> /dev/null
92 RETVAL=$?
93 if [ $RETVAL -eq 0 ]; then
94 success
95 break
96 fi
97 sleep 1
98 count=`expr $count - 1`
99 done
100 [ $RETVAL -ne 0 ] && failure || success
101 echo
102 done
103 return $RETVAL
104}
105
106stop()
107{
108 echo -e $"Shutting down MPSS Stack: "
109
110 # Bail out if module is unloaded
111 if [ ! -d "$sysfs" ]; then
112 echo -n $"Module unloaded "
113 success
114 echo
115 return 0
116 fi
117
118 # Shut down the cards.
119 micctrl -S
120
121 # Wait for the cards to go offline
122 for f in $sysfs/*
123 do
124 while [ "`cat $f/state`" != "offline" ]
125 do
126 sleep 1
127 echo -e "Waiting for "`basename $f`" to go offline"
128 done
129 done
130
131 # Display the status of the cards
132 micctrl -s
133
134 # Kill MPSSD now
135 echo -n $"Killing MPSSD"
136 killall -9 mpssd 2>/dev/null
137 RETVAL=$?
138 [ $RETVAL -ne 0 ] && failure || success
139 echo
140 return $RETVAL
141}
142
143restart()
144{
145 stop
146 sleep 5
147 start
148}
149
150status()
151{
152 micctrl -s
153 if [ "`ps -e | awk '{print $4}' | grep mpssd | head -n 1`" = "mpssd" ]; then
154 echo "mpssd is running"
155 else
156 echo "mpssd is stopped"
157 fi
158 return 0
159}
160
161unload()
162{
163 if [ ! -d "$sysfs" ]; then
164 echo -n $"No MIC_HOST Module: "
165 success
166 echo
167 return
168 fi
169
170 stop
171
172 sleep 5
173 echo -n $"Removing MIC_HOST Module: "
174 modprobe -r mic_host
175 RETVAL=$?
176 [ $RETVAL -ne 0 ] && failure || success
177 echo
178 return $RETVAL
179}
180
181case $1 in
182 start)
183 start
184 ;;
185 stop)
186 stop
187 ;;
188 restart)
189 restart
190 ;;
191 status)
192 status
193 ;;
194 unload)
195 unload
196 ;;
197 *)
198 echo $"Usage: $0 {start|stop|restart|status|unload}"
199 exit 2
200esac
201
202exit $?
diff --git a/Documentation/mic/mpssd/mpssd.c b/Documentation/mic/mpssd/mpssd.c
new file mode 100644
index 000000000000..8064804cdac3
--- /dev/null
+++ b/Documentation/mic/mpssd/mpssd.c
@@ -0,0 +1,1701 @@
1/*
2 * Intel MIC Platform Software Stack (MPSS)
3 *
4 * Copyright(c) 2013 Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * The full GNU General Public License is included in this distribution in
16 * the file called "COPYING".
17 *
18 * Intel MIC User Space Tools.
19 */
20
21#define _GNU_SOURCE
22
23#include <stdlib.h>
24#include <fcntl.h>
25#include <getopt.h>
26#include <assert.h>
27#include <unistd.h>
28#include <stdbool.h>
29#include <signal.h>
30#include <poll.h>
31#include <features.h>
32#include <sys/types.h>
33#include <sys/stat.h>
34#include <sys/mman.h>
35#include <sys/socket.h>
36#include <linux/virtio_ring.h>
37#include <linux/virtio_net.h>
38#include <linux/virtio_console.h>
39#include <linux/virtio_blk.h>
40#include <linux/version.h>
41#include "mpssd.h"
42#include <linux/mic_ioctl.h>
43#include <linux/mic_common.h>
44
45static void init_mic(struct mic_info *mic);
46
47static FILE *logfp;
48static struct mic_info mic_list;
49
50#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
51
52#define min_t(type, x, y) ({ \
53 type __min1 = (x); \
54 type __min2 = (y); \
55 __min1 < __min2 ? __min1 : __min2; })
56
57/* align addr on a size boundary - adjust address up/down if needed */
58#define _ALIGN_DOWN(addr, size) ((addr)&(~((size)-1)))
59#define _ALIGN_UP(addr, size) _ALIGN_DOWN(addr + size - 1, size)
60
61/* align addr on a size boundary - adjust address up if needed */
62#define _ALIGN(addr, size) _ALIGN_UP(addr, size)
63
64/* to align the pointer to the (next) page boundary */
65#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE)
66
67#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
68
69#define GSO_ENABLED 1
70#define MAX_GSO_SIZE (64 * 1024)
71#define ETH_H_LEN 14
72#define MAX_NET_PKT_SIZE (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
73#define MIC_DEVICE_PAGE_END 0x1000
74
75#ifndef VIRTIO_NET_HDR_F_DATA_VALID
76#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */
77#endif
78
79static struct {
80 struct mic_device_desc dd;
81 struct mic_vqconfig vqconfig[2];
82 __u32 host_features, guest_acknowledgements;
83 struct virtio_console_config cons_config;
84} virtcons_dev_page = {
85 .dd = {
86 .type = VIRTIO_ID_CONSOLE,
87 .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
88 .feature_len = sizeof(virtcons_dev_page.host_features),
89 .config_len = sizeof(virtcons_dev_page.cons_config),
90 },
91 .vqconfig[0] = {
92 .num = htole16(MIC_VRING_ENTRIES),
93 },
94 .vqconfig[1] = {
95 .num = htole16(MIC_VRING_ENTRIES),
96 },
97};
98
99static struct {
100 struct mic_device_desc dd;
101 struct mic_vqconfig vqconfig[2];
102 __u32 host_features, guest_acknowledgements;
103 struct virtio_net_config net_config;
104} virtnet_dev_page = {
105 .dd = {
106 .type = VIRTIO_ID_NET,
107 .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
108 .feature_len = sizeof(virtnet_dev_page.host_features),
109 .config_len = sizeof(virtnet_dev_page.net_config),
110 },
111 .vqconfig[0] = {
112 .num = htole16(MIC_VRING_ENTRIES),
113 },
114 .vqconfig[1] = {
115 .num = htole16(MIC_VRING_ENTRIES),
116 },
117#if GSO_ENABLED
118 .host_features = htole32(
119 1 << VIRTIO_NET_F_CSUM |
120 1 << VIRTIO_NET_F_GSO |
121 1 << VIRTIO_NET_F_GUEST_TSO4 |
122 1 << VIRTIO_NET_F_GUEST_TSO6 |
123 1 << VIRTIO_NET_F_GUEST_ECN |
124 1 << VIRTIO_NET_F_GUEST_UFO),
125#else
126 .host_features = 0,
127#endif
128};
129
130static const char *mic_config_dir = "/etc/sysconfig/mic";
131static const char *virtblk_backend = "VIRTBLK_BACKEND";
132static struct {
133 struct mic_device_desc dd;
134 struct mic_vqconfig vqconfig[1];
135 __u32 host_features, guest_acknowledgements;
136 struct virtio_blk_config blk_config;
137} virtblk_dev_page = {
138 .dd = {
139 .type = VIRTIO_ID_BLOCK,
140 .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
141 .feature_len = sizeof(virtblk_dev_page.host_features),
142 .config_len = sizeof(virtblk_dev_page.blk_config),
143 },
144 .vqconfig[0] = {
145 .num = htole16(MIC_VRING_ENTRIES),
146 },
147 .host_features =
148 htole32(1<<VIRTIO_BLK_F_SEG_MAX),
149 .blk_config = {
150 .seg_max = htole32(MIC_VRING_ENTRIES - 2),
151 .capacity = htole64(0),
152 }
153};
154
155static char *myname;
156
157static int
158tap_configure(struct mic_info *mic, char *dev)
159{
160 pid_t pid;
161 char *ifargv[7];
162 char ipaddr[IFNAMSIZ];
163 int ret = 0;
164
165 pid = fork();
166 if (pid == 0) {
167 ifargv[0] = "ip";
168 ifargv[1] = "link";
169 ifargv[2] = "set";
170 ifargv[3] = dev;
171 ifargv[4] = "up";
172 ifargv[5] = NULL;
173 mpsslog("Configuring %s\n", dev);
174 ret = execvp("ip", ifargv);
175 if (ret < 0) {
176 mpsslog("%s execvp failed errno %s\n",
177 mic->name, strerror(errno));
178 return ret;
179 }
180 }
181 if (pid < 0) {
182 mpsslog("%s fork failed errno %s\n",
183 mic->name, strerror(errno));
184 return ret;
185 }
186
187 ret = waitpid(pid, NULL, 0);
188 if (ret < 0) {
189 mpsslog("%s waitpid failed errno %s\n",
190 mic->name, strerror(errno));
191 return ret;
192 }
193
194 snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id);
195
196 pid = fork();
197 if (pid == 0) {
198 ifargv[0] = "ip";
199 ifargv[1] = "addr";
200 ifargv[2] = "add";
201 ifargv[3] = ipaddr;
202 ifargv[4] = "dev";
203 ifargv[5] = dev;
204 ifargv[6] = NULL;
205 mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
206 ret = execvp("ip", ifargv);
207 if (ret < 0) {
208 mpsslog("%s execvp failed errno %s\n",
209 mic->name, strerror(errno));
210 return ret;
211 }
212 }
213 if (pid < 0) {
214 mpsslog("%s fork failed errno %s\n",
215 mic->name, strerror(errno));
216 return ret;
217 }
218
219 ret = waitpid(pid, NULL, 0);
220 if (ret < 0) {
221 mpsslog("%s waitpid failed errno %s\n",
222 mic->name, strerror(errno));
223 return ret;
224 }
225 mpsslog("MIC name %s %s %d DONE!\n",
226 mic->name, __func__, __LINE__);
227 return 0;
228}
229
230static int tun_alloc(struct mic_info *mic, char *dev)
231{
232 struct ifreq ifr;
233 int fd, err;
234#if GSO_ENABLED
235 unsigned offload;
236#endif
237 fd = open("/dev/net/tun", O_RDWR);
238 if (fd < 0) {
239 mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
240 goto done;
241 }
242
243 memset(&ifr, 0, sizeof(ifr));
244
245 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
246 if (*dev)
247 strncpy(ifr.ifr_name, dev, IFNAMSIZ);
248
249 err = ioctl(fd, TUNSETIFF, (void *) &ifr);
250 if (err < 0) {
251 mpsslog("%s %s %d TUNSETIFF failed %s\n",
252 mic->name, __func__, __LINE__, strerror(errno));
253 close(fd);
254 return err;
255 }
256#if GSO_ENABLED
257 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
258 TUN_F_TSO_ECN | TUN_F_UFO;
259
260 err = ioctl(fd, TUNSETOFFLOAD, offload);
261 if (err < 0) {
262 mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
263 mic->name, __func__, __LINE__, strerror(errno));
264 close(fd);
265 return err;
266 }
267#endif
268 strcpy(dev, ifr.ifr_name);
269 mpsslog("Created TAP %s\n", dev);
270done:
271 return fd;
272}
273
274#define NET_FD_VIRTIO_NET 0
275#define NET_FD_TUN 1
276#define MAX_NET_FD 2
277
278static void set_dp(struct mic_info *mic, int type, void *dp)
279{
280 switch (type) {
281 case VIRTIO_ID_CONSOLE:
282 mic->mic_console.console_dp = dp;
283 return;
284 case VIRTIO_ID_NET:
285 mic->mic_net.net_dp = dp;
286 return;
287 case VIRTIO_ID_BLOCK:
288 mic->mic_virtblk.block_dp = dp;
289 return;
290 }
291 mpsslog("%s %s %d not found\n", mic->name, __func__, type);
292 assert(0);
293}
294
295static void *get_dp(struct mic_info *mic, int type)
296{
297 switch (type) {
298 case VIRTIO_ID_CONSOLE:
299 return mic->mic_console.console_dp;
300 case VIRTIO_ID_NET:
301 return mic->mic_net.net_dp;
302 case VIRTIO_ID_BLOCK:
303 return mic->mic_virtblk.block_dp;
304 }
305 mpsslog("%s %s %d not found\n", mic->name, __func__, type);
306 assert(0);
307 return NULL;
308}
309
310static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
311{
312 struct mic_device_desc *d;
313 int i;
314 void *dp = get_dp(mic, type);
315
316 for (i = mic_aligned_size(struct mic_bootparam); i < PAGE_SIZE;
317 i += mic_total_desc_size(d)) {
318 d = dp + i;
319
320 /* End of list */
321 if (d->type == 0)
322 break;
323
324 if (d->type == -1)
325 continue;
326
327 mpsslog("%s %s d-> type %d d %p\n",
328 mic->name, __func__, d->type, d);
329
330 if (d->type == (__u8)type)
331 return d;
332 }
333 mpsslog("%s %s %d not found\n", mic->name, __func__, type);
334 assert(0);
335 return NULL;
336}
337
338/* See comments in vhost.c for explanation of next_desc() */
339static unsigned next_desc(struct vring_desc *desc)
340{
341 unsigned int next;
342
343 if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
344 return -1U;
345 next = le16toh(desc->next);
346 return next;
347}
348
349/* Sum up all the IOVEC length */
350static ssize_t
351sum_iovec_len(struct mic_copy_desc *copy)
352{
353 ssize_t sum = 0;
354 int i;
355
356 for (i = 0; i < copy->iovcnt; i++)
357 sum += copy->iov[i].iov_len;
358 return sum;
359}
360
361static inline void verify_out_len(struct mic_info *mic,
362 struct mic_copy_desc *copy)
363{
364 if (copy->out_len != sum_iovec_len(copy)) {
365 mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%x\n",
366 mic->name, __func__, __LINE__,
367 copy->out_len, sum_iovec_len(copy));
368 assert(copy->out_len == sum_iovec_len(copy));
369 }
370}
371
372/* Display an iovec */
373static void
374disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
375 const char *s, int line)
376{
377 int i;
378
379 for (i = 0; i < copy->iovcnt; i++)
380 mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%lx\n",
381 mic->name, s, line, i,
382 copy->iov[i].iov_base, copy->iov[i].iov_len);
383}
384
385static inline __u16 read_avail_idx(struct mic_vring *vr)
386{
387 return ACCESS_ONCE(vr->info->avail_idx);
388}
389
390static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
391 struct mic_copy_desc *copy, ssize_t len)
392{
393 copy->vr_idx = tx ? 0 : 1;
394 copy->update_used = true;
395 if (type == VIRTIO_ID_NET)
396 copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
397 else
398 copy->iov[0].iov_len = len;
399}
400
401/* Central API which triggers the copies */
402static int
403mic_virtio_copy(struct mic_info *mic, int fd,
404 struct mic_vring *vr, struct mic_copy_desc *copy)
405{
406 int ret;
407
408 ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
409 if (ret) {
410 mpsslog("%s %s %d errno %s ret %d\n",
411 mic->name, __func__, __LINE__,
412 strerror(errno), ret);
413 }
414 return ret;
415}
416
417/*
418 * This initialization routine requires at least one
419 * vring i.e. vr0. vr1 is optional.
420 */
421static void *
422init_vr(struct mic_info *mic, int fd, int type,
423 struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
424{
425 int vr_size;
426 char *va;
427
428 vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
429 MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
430 va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
431 PROT_READ, MAP_SHARED, fd, 0);
432 if (MAP_FAILED == va) {
433 mpsslog("%s %s %d mmap failed errno %s\n",
434 mic->name, __func__, __LINE__,
435 strerror(errno));
436 goto done;
437 }
438 set_dp(mic, type, va);
439 vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
440 vr0->info = vr0->va +
441 vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
442 vring_init(&vr0->vr,
443 MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
444 mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
445 __func__, mic->name, vr0->va, vr0->info, vr_size,
446 vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
447 mpsslog("magic 0x%x expected 0x%x\n",
448 vr0->info->magic, MIC_MAGIC + type);
449 assert(vr0->info->magic == MIC_MAGIC + type);
450 if (vr1) {
451 vr1->va = (struct mic_vring *)
452 &va[MIC_DEVICE_PAGE_END + vr_size];
453 vr1->info = vr1->va + vring_size(MIC_VRING_ENTRIES,
454 MIC_VIRTIO_RING_ALIGN);
455 vring_init(&vr1->vr,
456 MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
457 mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
458 __func__, mic->name, vr1->va, vr1->info, vr_size,
459 vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
460 mpsslog("magic 0x%x expected 0x%x\n",
461 vr1->info->magic, MIC_MAGIC + type + 1);
462 assert(vr1->info->magic == MIC_MAGIC + type + 1);
463 }
464done:
465 return va;
466}
467
468static void
469wait_for_card_driver(struct mic_info *mic, int fd, int type)
470{
471 struct pollfd pollfd;
472 int err;
473 struct mic_device_desc *desc = get_device_desc(mic, type);
474
475 pollfd.fd = fd;
476 mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
477 mic->name, __func__, type, desc->status);
478 while (1) {
479 pollfd.events = POLLIN;
480 pollfd.revents = 0;
481 err = poll(&pollfd, 1, -1);
482 if (err < 0) {
483 mpsslog("%s %s poll failed %s\n",
484 mic->name, __func__, strerror(errno));
485 continue;
486 }
487
488 if (pollfd.revents) {
489 mpsslog("%s %s Waiting... desc-> type %d status 0x%x\n",
490 mic->name, __func__, type, desc->status);
491 if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
492 mpsslog("%s %s poll.revents %d\n",
493 mic->name, __func__, pollfd.revents);
494 mpsslog("%s %s desc-> type %d status 0x%x\n",
495 mic->name, __func__, type,
496 desc->status);
497 break;
498 }
499 }
500 }
501}
502
503/* Spin till we have some descriptors */
504static void
505spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
506{
507 __u16 avail_idx = read_avail_idx(vr);
508
509 while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) {
510#ifdef DEBUG
511 mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
512 mic->name, __func__,
513 le16toh(vr->vr.avail->idx), vr->info->avail_idx);
514#endif
515 sched_yield();
516 }
517}
518
519static void *
520virtio_net(void *arg)
521{
522 static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
523 static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __aligned(64);
524 struct iovec vnet_iov[2][2] = {
525 { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
526 { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
527 { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
528 { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
529 };
530 struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
531 struct mic_info *mic = (struct mic_info *)arg;
532 char if_name[IFNAMSIZ];
533 struct pollfd net_poll[MAX_NET_FD];
534 struct mic_vring tx_vr, rx_vr;
535 struct mic_copy_desc copy;
536 struct mic_device_desc *desc;
537 int err;
538
539 snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
540 mic->mic_net.tap_fd = tun_alloc(mic, if_name);
541 if (mic->mic_net.tap_fd < 0)
542 goto done;
543
544 if (tap_configure(mic, if_name))
545 goto done;
546 mpsslog("MIC name %s id %d\n", mic->name, mic->id);
547
548 net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
549 net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
550 net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
551 net_poll[NET_FD_TUN].events = POLLIN;
552
553 if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
554 VIRTIO_ID_NET, &tx_vr, &rx_vr,
555 virtnet_dev_page.dd.num_vq)) {
556 mpsslog("%s init_vr failed %s\n",
557 mic->name, strerror(errno));
558 goto done;
559 }
560
561 copy.iovcnt = 2;
562 desc = get_device_desc(mic, VIRTIO_ID_NET);
563
564 while (1) {
565 ssize_t len;
566
567 net_poll[NET_FD_VIRTIO_NET].revents = 0;
568 net_poll[NET_FD_TUN].revents = 0;
569
570 /* Start polling for data from tap and virtio net */
571 err = poll(net_poll, 2, -1);
572 if (err < 0) {
573 mpsslog("%s poll failed %s\n",
574 __func__, strerror(errno));
575 continue;
576 }
577 if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
578 wait_for_card_driver(mic, mic->mic_net.virtio_net_fd,
579 VIRTIO_ID_NET);
580 /*
581 * Check if there is data to be read from TUN and write to
582 * virtio net fd if there is.
583 */
584 if (net_poll[NET_FD_TUN].revents & POLLIN) {
585 copy.iov = iov0;
586 len = readv(net_poll[NET_FD_TUN].fd,
587 copy.iov, copy.iovcnt);
588 if (len > 0) {
589 struct virtio_net_hdr *hdr
590 = (struct virtio_net_hdr *) vnet_hdr[0];
591
592 /* Disable checksums on the card since we are on
593 a reliable PCIe link */
594 hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
595#ifdef DEBUG
596 mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
597 __func__, __LINE__, hdr->flags);
598 mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
599 copy.out_len, hdr->gso_type);
600#endif
601#ifdef DEBUG
602 disp_iovec(mic, copy, __func__, __LINE__);
603 mpsslog("%s %s %d read from tap 0x%lx\n",
604 mic->name, __func__, __LINE__,
605 len);
606#endif
607 spin_for_descriptors(mic, &tx_vr);
608 txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, &copy,
609 len);
610
611 err = mic_virtio_copy(mic,
612 mic->mic_net.virtio_net_fd, &tx_vr,
613 &copy);
614 if (err < 0) {
615 mpsslog("%s %s %d mic_virtio_copy %s\n",
616 mic->name, __func__, __LINE__,
617 strerror(errno));
618 }
619 if (!err)
620 verify_out_len(mic, &copy);
621#ifdef DEBUG
622 disp_iovec(mic, copy, __func__, __LINE__);
623 mpsslog("%s %s %d wrote to net 0x%lx\n",
624 mic->name, __func__, __LINE__,
625 sum_iovec_len(&copy));
626#endif
627 /* Reinitialize IOV for next run */
628 iov0[1].iov_len = MAX_NET_PKT_SIZE;
629 } else if (len < 0) {
630 disp_iovec(mic, &copy, __func__, __LINE__);
631 mpsslog("%s %s %d read failed %s ", mic->name,
632 __func__, __LINE__, strerror(errno));
633 mpsslog("cnt %d sum %d\n",
634 copy.iovcnt, sum_iovec_len(&copy));
635 }
636 }
637
638 /*
639 * Check if there is data to be read from virtio net and
640 * write to TUN if there is.
641 */
642 if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
643 while (rx_vr.info->avail_idx !=
644 le16toh(rx_vr.vr.avail->idx)) {
645 copy.iov = iov1;
646 txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, &copy,
647 MAX_NET_PKT_SIZE
648 + sizeof(struct virtio_net_hdr));
649
650 err = mic_virtio_copy(mic,
651 mic->mic_net.virtio_net_fd, &rx_vr,
652 &copy);
653 if (!err) {
654#ifdef DEBUG
655 struct virtio_net_hdr *hdr
656 = (struct virtio_net_hdr *)
657 vnet_hdr[1];
658
659 mpsslog("%s %s %d hdr->flags 0x%x, ",
660 mic->name, __func__, __LINE__,
661 hdr->flags);
662 mpsslog("out_len %d gso_type 0x%x\n",
663 copy.out_len,
664 hdr->gso_type);
665#endif
666 /* Set the correct output iov_len */
667 iov1[1].iov_len = copy.out_len -
668 sizeof(struct virtio_net_hdr);
669 verify_out_len(mic, &copy);
670#ifdef DEBUG
671 disp_iovec(mic, copy, __func__,
672 __LINE__);
673 mpsslog("%s %s %d ",
674 mic->name, __func__, __LINE__);
675 mpsslog("read from net 0x%lx\n",
676 sum_iovec_len(copy));
677#endif
678 len = writev(net_poll[NET_FD_TUN].fd,
679 copy.iov, copy.iovcnt);
680 if (len != sum_iovec_len(&copy)) {
681 mpsslog("Tun write failed %s ",
682 strerror(errno));
683 mpsslog("len 0x%x ", len);
684 mpsslog("read_len 0x%x\n",
685 sum_iovec_len(&copy));
686 } else {
687#ifdef DEBUG
688 disp_iovec(mic, &copy, __func__,
689 __LINE__);
690 mpsslog("%s %s %d ",
691 mic->name, __func__,
692 __LINE__);
693 mpsslog("wrote to tap 0x%lx\n",
694 len);
695#endif
696 }
697 } else {
698 mpsslog("%s %s %d mic_virtio_copy %s\n",
699 mic->name, __func__, __LINE__,
700 strerror(errno));
701 break;
702 }
703 }
704 }
705 if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
706 mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
707 }
708done:
709 pthread_exit(NULL);
710}
711
712/* virtio_console */
713#define VIRTIO_CONSOLE_FD 0
714#define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
715#define MAX_CONSOLE_FD (MONITOR_FD + 1) /* must be the last one + 1 */
716#define MAX_BUFFER_SIZE PAGE_SIZE
717
718static void *
719virtio_console(void *arg)
720{
721 static __u8 vcons_buf[2][PAGE_SIZE];
722 struct iovec vcons_iov[2] = {
723 { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
724 { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
725 };
726 struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
727 struct mic_info *mic = (struct mic_info *)arg;
728 int err;
729 struct pollfd console_poll[MAX_CONSOLE_FD];
730 int pty_fd;
731 char *pts_name;
732 ssize_t len;
733 struct mic_vring tx_vr, rx_vr;
734 struct mic_copy_desc copy;
735 struct mic_device_desc *desc;
736
737 pty_fd = posix_openpt(O_RDWR);
738 if (pty_fd < 0) {
739 mpsslog("can't open a pseudoterminal master device: %s\n",
740 strerror(errno));
741 goto _return;
742 }
743 pts_name = ptsname(pty_fd);
744 if (pts_name == NULL) {
745 mpsslog("can't get pts name\n");
746 goto _close_pty;
747 }
748 printf("%s console message goes to %s\n", mic->name, pts_name);
749 mpsslog("%s console message goes to %s\n", mic->name, pts_name);
750 err = grantpt(pty_fd);
751 if (err < 0) {
752 mpsslog("can't grant access: %s %s\n",
753 pts_name, strerror(errno));
754 goto _close_pty;
755 }
756 err = unlockpt(pty_fd);
757 if (err < 0) {
758 mpsslog("can't unlock a pseudoterminal: %s %s\n",
759 pts_name, strerror(errno));
760 goto _close_pty;
761 }
762 console_poll[MONITOR_FD].fd = pty_fd;
763 console_poll[MONITOR_FD].events = POLLIN;
764
765 console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
766 console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
767
768 if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
769 VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
770 virtcons_dev_page.dd.num_vq)) {
771 mpsslog("%s init_vr failed %s\n",
772 mic->name, strerror(errno));
773 goto _close_pty;
774 }
775
776 copy.iovcnt = 1;
777 desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
778
779 for (;;) {
780 console_poll[MONITOR_FD].revents = 0;
781 console_poll[VIRTIO_CONSOLE_FD].revents = 0;
782 err = poll(console_poll, MAX_CONSOLE_FD, -1);
783 if (err < 0) {
784 mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
785 strerror(errno));
786 continue;
787 }
788 if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
789 wait_for_card_driver(mic,
790 mic->mic_console.virtio_console_fd,
791 VIRTIO_ID_CONSOLE);
792
793 if (console_poll[MONITOR_FD].revents & POLLIN) {
794 copy.iov = iov0;
795 len = readv(pty_fd, copy.iov, copy.iovcnt);
796 if (len > 0) {
797#ifdef DEBUG
798 disp_iovec(mic, copy, __func__, __LINE__);
799 mpsslog("%s %s %d read from tap 0x%lx\n",
800 mic->name, __func__, __LINE__,
801 len);
802#endif
803 spin_for_descriptors(mic, &tx_vr);
804 txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
805 &copy, len);
806
807 err = mic_virtio_copy(mic,
808 mic->mic_console.virtio_console_fd,
809 &tx_vr, &copy);
810 if (err < 0) {
811 mpsslog("%s %s %d mic_virtio_copy %s\n",
812 mic->name, __func__, __LINE__,
813 strerror(errno));
814 }
815 if (!err)
816 verify_out_len(mic, &copy);
817#ifdef DEBUG
818 disp_iovec(mic, copy, __func__, __LINE__);
819 mpsslog("%s %s %d wrote to net 0x%lx\n",
820 mic->name, __func__, __LINE__,
821 sum_iovec_len(copy));
822#endif
823 /* Reinitialize IOV for next run */
824 iov0->iov_len = PAGE_SIZE;
825 } else if (len < 0) {
826 disp_iovec(mic, &copy, __func__, __LINE__);
827 mpsslog("%s %s %d read failed %s ",
828 mic->name, __func__, __LINE__,
829 strerror(errno));
830 mpsslog("cnt %d sum %d\n",
831 copy.iovcnt, sum_iovec_len(&copy));
832 }
833 }
834
835 if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
836 while (rx_vr.info->avail_idx !=
837 le16toh(rx_vr.vr.avail->idx)) {
838 copy.iov = iov1;
839 txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
840 &copy, PAGE_SIZE);
841
842 err = mic_virtio_copy(mic,
843 mic->mic_console.virtio_console_fd,
844 &rx_vr, &copy);
845 if (!err) {
846 /* Set the correct output iov_len */
847 iov1->iov_len = copy.out_len;
848 verify_out_len(mic, &copy);
849#ifdef DEBUG
850 disp_iovec(mic, copy, __func__,
851 __LINE__);
852 mpsslog("%s %s %d ",
853 mic->name, __func__, __LINE__);
854 mpsslog("read from net 0x%lx\n",
855 sum_iovec_len(copy));
856#endif
857 len = writev(pty_fd,
858 copy.iov, copy.iovcnt);
859 if (len != sum_iovec_len(&copy)) {
860 mpsslog("Tun write failed %s ",
861 strerror(errno));
862 mpsslog("len 0x%x ", len);
863 mpsslog("read_len 0x%x\n",
864 sum_iovec_len(&copy));
865 } else {
866#ifdef DEBUG
867 disp_iovec(mic, copy, __func__,
868 __LINE__);
869 mpsslog("%s %s %d ",
870 mic->name, __func__,
871 __LINE__);
872 mpsslog("wrote to tap 0x%lx\n",
873 len);
874#endif
875 }
876 } else {
877 mpsslog("%s %s %d mic_virtio_copy %s\n",
878 mic->name, __func__, __LINE__,
879 strerror(errno));
880 break;
881 }
882 }
883 }
884 if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
885 mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
886 }
887_close_pty:
888 close(pty_fd);
889_return:
890 pthread_exit(NULL);
891}
892
893static void
894add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
895{
896 char path[PATH_MAX];
897 int fd, err;
898
899 snprintf(path, PATH_MAX, "/dev/mic%d", mic->id);
900 fd = open(path, O_RDWR);
901 if (fd < 0) {
902 mpsslog("Could not open %s %s\n", path, strerror(errno));
903 return;
904 }
905
906 err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
907 if (err < 0) {
908 mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
909 close(fd);
910 return;
911 }
912 switch (dd->type) {
913 case VIRTIO_ID_NET:
914 mic->mic_net.virtio_net_fd = fd;
915 mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
916 break;
917 case VIRTIO_ID_CONSOLE:
918 mic->mic_console.virtio_console_fd = fd;
919 mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
920 break;
921 case VIRTIO_ID_BLOCK:
922 mic->mic_virtblk.virtio_block_fd = fd;
923 mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
924 break;
925 }
926}
927
928static bool
929set_backend_file(struct mic_info *mic)
930{
931 FILE *config;
932 char buff[PATH_MAX], *line, *evv, *p;
933
934 snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
935 config = fopen(buff, "r");
936 if (config == NULL)
937 return false;
938 do { /* look for "virtblk_backend=XXXX" */
939 line = fgets(buff, PATH_MAX, config);
940 if (line == NULL)
941 break;
942 if (*line == '#')
943 continue;
944 p = strchr(line, '\n');
945 if (p)
946 *p = '\0';
947 } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
948 fclose(config);
949 if (line == NULL)
950 return false;
951 evv = strchr(line, '=');
952 if (evv == NULL)
953 return false;
954 mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
955 if (mic->mic_virtblk.backend_file == NULL) {
956 mpsslog("can't allocate memory\n", mic->name, mic->id);
957 return false;
958 }
959 strcpy(mic->mic_virtblk.backend_file, evv + 1);
960 return true;
961}
962
963#define SECTOR_SIZE 512
964static bool
965set_backend_size(struct mic_info *mic)
966{
967 mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
968 SEEK_END);
969 if (mic->mic_virtblk.backend_size < 0) {
970 mpsslog("%s: can't seek: %s\n",
971 mic->name, mic->mic_virtblk.backend_file);
972 return false;
973 }
974 virtblk_dev_page.blk_config.capacity =
975 mic->mic_virtblk.backend_size / SECTOR_SIZE;
976 if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
977 virtblk_dev_page.blk_config.capacity++;
978
979 virtblk_dev_page.blk_config.capacity =
980 htole64(virtblk_dev_page.blk_config.capacity);
981
982 return true;
983}
984
985static bool
986open_backend(struct mic_info *mic)
987{
988 if (!set_backend_file(mic))
989 goto _error_exit;
990 mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
991 if (mic->mic_virtblk.backend < 0) {
992 mpsslog("%s: can't open: %s\n", mic->name,
993 mic->mic_virtblk.backend_file);
994 goto _error_free;
995 }
996 if (!set_backend_size(mic))
997 goto _error_close;
998 mic->mic_virtblk.backend_addr = mmap(NULL,
999 mic->mic_virtblk.backend_size,
1000 PROT_READ|PROT_WRITE, MAP_SHARED,
1001 mic->mic_virtblk.backend, 0L);
1002 if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
1003 mpsslog("%s: can't map: %s %s\n",
1004 mic->name, mic->mic_virtblk.backend_file,
1005 strerror(errno));
1006 goto _error_close;
1007 }
1008 return true;
1009
1010 _error_close:
1011 close(mic->mic_virtblk.backend);
1012 _error_free:
1013 free(mic->mic_virtblk.backend_file);
1014 _error_exit:
1015 return false;
1016}
1017
1018static void
1019close_backend(struct mic_info *mic)
1020{
1021 munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
1022 close(mic->mic_virtblk.backend);
1023 free(mic->mic_virtblk.backend_file);
1024}
1025
1026static bool
1027start_virtblk(struct mic_info *mic, struct mic_vring *vring)
1028{
1029 if (((__u64)&virtblk_dev_page.blk_config % 8) != 0) {
1030 mpsslog("%s: blk_config is not 8 byte aligned.\n",
1031 mic->name);
1032 return false;
1033 }
1034 add_virtio_device(mic, &virtblk_dev_page.dd);
1035 if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
1036 VIRTIO_ID_BLOCK, vring, NULL, virtblk_dev_page.dd.num_vq)) {
1037 mpsslog("%s init_vr failed %s\n",
1038 mic->name, strerror(errno));
1039 return false;
1040 }
1041 return true;
1042}
1043
1044static void
1045stop_virtblk(struct mic_info *mic)
1046{
1047 int vr_size, ret;
1048
1049 vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
1050 MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
1051 ret = munmap(mic->mic_virtblk.block_dp,
1052 MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
1053 if (ret < 0)
1054 mpsslog("%s munmap errno %d\n", mic->name, errno);
1055 close(mic->mic_virtblk.virtio_block_fd);
1056}
1057
1058static __u8
1059header_error_check(struct vring_desc *desc)
1060{
1061 if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
1062 mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
1063 __func__, __LINE__);
1064 return -EIO;
1065 }
1066 if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
1067 mpsslog("%s() %d: alone\n",
1068 __func__, __LINE__);
1069 return -EIO;
1070 }
1071 if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
1072 mpsslog("%s() %d: not read\n",
1073 __func__, __LINE__);
1074 return -EIO;
1075 }
1076 return 0;
1077}
1078
1079static int
1080read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
1081{
1082 struct iovec iovec;
1083 struct mic_copy_desc copy;
1084
1085 iovec.iov_len = sizeof(*hdr);
1086 iovec.iov_base = hdr;
1087 copy.iov = &iovec;
1088 copy.iovcnt = 1;
1089 copy.vr_idx = 0; /* only one vring on virtio_block */
1090 copy.update_used = false; /* do not update used index */
1091 return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1092}
1093
1094static int
1095transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
1096{
1097 struct mic_copy_desc copy;
1098
1099 copy.iov = iovec;
1100 copy.iovcnt = iovcnt;
1101 copy.vr_idx = 0; /* only one vring on virtio_block */
1102 copy.update_used = false; /* do not update used index */
1103 return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1104}
1105
1106static __u8
1107status_error_check(struct vring_desc *desc)
1108{
1109 if (le32toh(desc->len) != sizeof(__u8)) {
1110 mpsslog("%s() %d: length is not sizeof(status)\n",
1111 __func__, __LINE__);
1112 return -EIO;
1113 }
1114 return 0;
1115}
1116
1117static int
1118write_status(int fd, __u8 *status)
1119{
1120 struct iovec iovec;
1121 struct mic_copy_desc copy;
1122
1123 iovec.iov_base = status;
1124 iovec.iov_len = sizeof(*status);
1125 copy.iov = &iovec;
1126 copy.iovcnt = 1;
1127 copy.vr_idx = 0; /* only one vring on virtio_block */
1128 copy.update_used = true; /* Update used index */
1129 return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1130}
1131
1132static void *
1133virtio_block(void *arg)
1134{
1135 struct mic_info *mic = (struct mic_info *) arg;
1136 int ret;
1137 struct pollfd block_poll;
1138 struct mic_vring vring;
1139 __u16 avail_idx;
1140 __u32 desc_idx;
1141 struct vring_desc *desc;
1142 struct iovec *iovec, *piov;
1143 __u8 status;
1144 __u32 buffer_desc_idx;
1145 struct virtio_blk_outhdr hdr;
1146 void *fos;
1147
1148 for (;;) { /* forever */
1149 if (!open_backend(mic)) { /* No virtblk */
1150 for (mic->mic_virtblk.signaled = 0;
1151 !mic->mic_virtblk.signaled;)
1152 sleep(1);
1153 continue;
1154 }
1155
1156 /* backend file is specified. */
1157 if (!start_virtblk(mic, &vring))
1158 goto _close_backend;
1159 iovec = malloc(sizeof(*iovec) *
1160 le32toh(virtblk_dev_page.blk_config.seg_max));
1161 if (!iovec) {
1162 mpsslog("%s: can't alloc iovec: %s\n",
1163 mic->name, strerror(ENOMEM));
1164 goto _stop_virtblk;
1165 }
1166
1167 block_poll.fd = mic->mic_virtblk.virtio_block_fd;
1168 block_poll.events = POLLIN;
1169 for (mic->mic_virtblk.signaled = 0;
1170 !mic->mic_virtblk.signaled;) {
1171 block_poll.revents = 0;
1172 /* timeout in 1 sec to see signaled */
1173 ret = poll(&block_poll, 1, 1000);
1174 if (ret < 0) {
1175 mpsslog("%s %d: poll failed: %s\n",
1176 __func__, __LINE__,
1177 strerror(errno));
1178 continue;
1179 }
1180
1181 if (!(block_poll.revents & POLLIN)) {
1182#ifdef DEBUG
1183 mpsslog("%s %d: block_poll.revents=0x%x\n",
1184 __func__, __LINE__, block_poll.revents);
1185#endif
1186 continue;
1187 }
1188
1189 /* POLLIN */
1190 while (vring.info->avail_idx !=
1191 le16toh(vring.vr.avail->idx)) {
1192 /* read header element */
1193 avail_idx =
1194 vring.info->avail_idx &
1195 (vring.vr.num - 1);
1196 desc_idx = le16toh(
1197 vring.vr.avail->ring[avail_idx]);
1198 desc = &vring.vr.desc[desc_idx];
1199#ifdef DEBUG
1200 mpsslog("%s() %d: avail_idx=%d ",
1201 __func__, __LINE__,
1202 vring.info->avail_idx);
1203 mpsslog("vring.vr.num=%d desc=%p\n",
1204 vring.vr.num, desc);
1205#endif
1206 status = header_error_check(desc);
1207 ret = read_header(
1208 mic->mic_virtblk.virtio_block_fd,
1209 &hdr, desc_idx);
1210 if (ret < 0) {
1211 mpsslog("%s() %d %s: ret=%d %s\n",
1212 __func__, __LINE__,
1213 mic->name, ret,
1214 strerror(errno));
1215 break;
1216 }
1217 /* buffer element */
1218 piov = iovec;
1219 status = 0;
1220 fos = mic->mic_virtblk.backend_addr +
1221 (hdr.sector * SECTOR_SIZE);
1222 buffer_desc_idx = desc_idx =
1223 next_desc(desc);
1224 for (desc = &vring.vr.desc[buffer_desc_idx];
1225 desc->flags & VRING_DESC_F_NEXT;
1226 desc_idx = next_desc(desc),
1227 desc = &vring.vr.desc[desc_idx]) {
1228 piov->iov_len = desc->len;
1229 piov->iov_base = fos;
1230 piov++;
1231 fos += desc->len;
1232 }
1233 /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
1234 if (hdr.type & ~(VIRTIO_BLK_T_OUT |
1235 VIRTIO_BLK_T_GET_ID)) {
1236 /*
1237 VIRTIO_BLK_T_IN - does not do
1238 anything. Probably for documenting.
1239 VIRTIO_BLK_T_SCSI_CMD - for
1240 virtio_scsi.
1241 VIRTIO_BLK_T_FLUSH - turned off in
1242 config space.
1243 VIRTIO_BLK_T_BARRIER - defined but not
1244 used in anywhere.
1245 */
1246 mpsslog("%s() %d: type %x ",
1247 __func__, __LINE__,
1248 hdr.type);
1249 mpsslog("is not supported\n");
1250 status = -ENOTSUP;
1251
1252 } else {
1253 ret = transfer_blocks(
1254 mic->mic_virtblk.virtio_block_fd,
1255 iovec,
1256 piov - iovec);
1257 if (ret < 0 &&
1258 status != 0)
1259 status = ret;
1260 }
1261 /* write status and update used pointer */
1262 if (status != 0)
1263 status = status_error_check(desc);
1264 ret = write_status(
1265 mic->mic_virtblk.virtio_block_fd,
1266 &status);
1267#ifdef DEBUG
1268 mpsslog("%s() %d: write status=%d on desc=%p\n",
1269 __func__, __LINE__,
1270 status, desc);
1271#endif
1272 }
1273 }
1274 free(iovec);
1275_stop_virtblk:
1276 stop_virtblk(mic);
1277_close_backend:
1278 close_backend(mic);
1279 } /* forever */
1280
1281 pthread_exit(NULL);
1282}
1283
1284static void
1285reset(struct mic_info *mic)
1286{
1287#define RESET_TIMEOUT 120
1288 int i = RESET_TIMEOUT;
1289 setsysfs(mic->name, "state", "reset");
1290 while (i) {
1291 char *state;
1292 state = readsysfs(mic->name, "state");
1293 if (!state)
1294 goto retry;
1295 mpsslog("%s: %s %d state %s\n",
1296 mic->name, __func__, __LINE__, state);
1297 if ((!strcmp(state, "offline"))) {
1298 free(state);
1299 break;
1300 }
1301 free(state);
1302retry:
1303 sleep(1);
1304 i--;
1305 }
1306}
1307
1308static int
1309get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
1310{
1311 if (!strcmp(shutdown_status, "nop"))
1312 return MIC_NOP;
1313 if (!strcmp(shutdown_status, "crashed"))
1314 return MIC_CRASHED;
1315 if (!strcmp(shutdown_status, "halted"))
1316 return MIC_HALTED;
1317 if (!strcmp(shutdown_status, "poweroff"))
1318 return MIC_POWER_OFF;
1319 if (!strcmp(shutdown_status, "restart"))
1320 return MIC_RESTART;
1321 mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
1322 /* Invalid state */
1323 assert(0);
1324};
1325
1326static int get_mic_state(struct mic_info *mic, char *state)
1327{
1328 if (!strcmp(state, "offline"))
1329 return MIC_OFFLINE;
1330 if (!strcmp(state, "online"))
1331 return MIC_ONLINE;
1332 if (!strcmp(state, "shutting_down"))
1333 return MIC_SHUTTING_DOWN;
1334 if (!strcmp(state, "reset_failed"))
1335 return MIC_RESET_FAILED;
1336 mpsslog("%s: BUG invalid state %s\n", mic->name, state);
1337 /* Invalid state */
1338 assert(0);
1339};
1340
1341static void mic_handle_shutdown(struct mic_info *mic)
1342{
1343#define SHUTDOWN_TIMEOUT 60
1344 int i = SHUTDOWN_TIMEOUT, ret, stat = 0;
1345 char *shutdown_status;
1346 while (i) {
1347 shutdown_status = readsysfs(mic->name, "shutdown_status");
1348 if (!shutdown_status)
1349 continue;
1350 mpsslog("%s: %s %d shutdown_status %s\n",
1351 mic->name, __func__, __LINE__, shutdown_status);
1352 switch (get_mic_shutdown_status(mic, shutdown_status)) {
1353 case MIC_RESTART:
1354 mic->restart = 1;
1355 case MIC_HALTED:
1356 case MIC_POWER_OFF:
1357 case MIC_CRASHED:
1358 free(shutdown_status);
1359 goto reset;
1360 default:
1361 break;
1362 }
1363 free(shutdown_status);
1364 sleep(1);
1365 i--;
1366 }
1367reset:
1368 ret = kill(mic->pid, SIGTERM);
1369 mpsslog("%s: %s %d kill pid %d ret %d\n",
1370 mic->name, __func__, __LINE__,
1371 mic->pid, ret);
1372 if (!ret) {
1373 ret = waitpid(mic->pid, &stat,
1374 WIFSIGNALED(stat));
1375 mpsslog("%s: %s %d waitpid ret %d pid %d\n",
1376 mic->name, __func__, __LINE__,
1377 ret, mic->pid);
1378 }
1379 if (ret == mic->pid)
1380 reset(mic);
1381}
1382
1383static void *
1384mic_config(void *arg)
1385{
1386 struct mic_info *mic = (struct mic_info *)arg;
1387 char *state = NULL;
1388 char pathname[PATH_MAX];
1389 int fd, ret;
1390 struct pollfd ufds[1];
1391 char value[4096];
1392
1393 snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
1394 MICSYSFSDIR, mic->name, "state");
1395
1396 fd = open(pathname, O_RDONLY);
1397 if (fd < 0) {
1398 mpsslog("%s: opening file %s failed %s\n",
1399 mic->name, pathname, strerror(errno));
1400 goto error;
1401 }
1402
1403 do {
1404 ret = read(fd, value, sizeof(value));
1405 if (ret < 0) {
1406 mpsslog("%s: Failed to read sysfs entry '%s': %s\n",
1407 mic->name, pathname, strerror(errno));
1408 goto close_error1;
1409 }
1410retry:
1411 state = readsysfs(mic->name, "state");
1412 if (!state)
1413 goto retry;
1414 mpsslog("%s: %s %d state %s\n",
1415 mic->name, __func__, __LINE__, state);
1416 switch (get_mic_state(mic, state)) {
1417 case MIC_SHUTTING_DOWN:
1418 mic_handle_shutdown(mic);
1419 goto close_error;
1420 default:
1421 break;
1422 }
1423 free(state);
1424
1425 ufds[0].fd = fd;
1426 ufds[0].events = POLLERR | POLLPRI;
1427 ret = poll(ufds, 1, -1);
1428 if (ret < 0) {
1429 mpsslog("%s: poll failed %s\n",
1430 mic->name, strerror(errno));
1431 goto close_error1;
1432 }
1433 } while (1);
1434close_error:
1435 free(state);
1436close_error1:
1437 close(fd);
1438error:
1439 init_mic(mic);
1440 pthread_exit(NULL);
1441}
1442
1443static void
1444set_cmdline(struct mic_info *mic)
1445{
1446 char buffer[PATH_MAX];
1447 int len;
1448
1449 len = snprintf(buffer, PATH_MAX,
1450 "clocksource=tsc highres=off nohz=off ");
1451 len += snprintf(buffer + len, PATH_MAX,
1452 "cpufreq_on;corec6_off;pc3_off;pc6_off ");
1453 len += snprintf(buffer + len, PATH_MAX,
1454 "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
1455 mic->id);
1456
1457 setsysfs(mic->name, "cmdline", buffer);
1458 mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
1459 snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id);
1460 mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
1461}
1462
1463static void
1464set_log_buf_info(struct mic_info *mic)
1465{
1466 int fd;
1467 off_t len;
1468 char system_map[] = "/lib/firmware/mic/System.map";
1469 char *map, *temp, log_buf[17] = {'\0'};
1470
1471 fd = open(system_map, O_RDONLY);
1472 if (fd < 0) {
1473 mpsslog("%s: Opening System.map failed: %d\n",
1474 mic->name, errno);
1475 return;
1476 }
1477 len = lseek(fd, 0, SEEK_END);
1478 if (len < 0) {
1479 mpsslog("%s: Reading System.map size failed: %d\n",
1480 mic->name, errno);
1481 close(fd);
1482 return;
1483 }
1484 map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
1485 if (map == MAP_FAILED) {
1486 mpsslog("%s: mmap of System.map failed: %d\n",
1487 mic->name, errno);
1488 close(fd);
1489 return;
1490 }
1491 temp = strstr(map, "__log_buf");
1492 if (!temp) {
1493 mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
1494 munmap(map, len);
1495 close(fd);
1496 return;
1497 }
1498 strncpy(log_buf, temp - 19, 16);
1499 setsysfs(mic->name, "log_buf_addr", log_buf);
1500 mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
1501 temp = strstr(map, "log_buf_len");
1502 if (!temp) {
1503 mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
1504 munmap(map, len);
1505 close(fd);
1506 return;
1507 }
1508 strncpy(log_buf, temp - 19, 16);
1509 setsysfs(mic->name, "log_buf_len", log_buf);
1510 mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
1511 munmap(map, len);
1512 close(fd);
1513}
1514
1515static void init_mic(struct mic_info *mic);
1516
1517static void
1518change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
1519{
1520 struct mic_info *mic;
1521
1522 for (mic = mic_list.next; mic != NULL; mic = mic->next)
1523 mic->mic_virtblk.signaled = 1/* true */;
1524}
1525
1526static void
1527init_mic(struct mic_info *mic)
1528{
1529 struct sigaction ignore = {
1530 .sa_flags = 0,
1531 .sa_handler = SIG_IGN
1532 };
1533 struct sigaction act = {
1534 .sa_flags = SA_SIGINFO,
1535 .sa_sigaction = change_virtblk_backend,
1536 };
1537 char buffer[PATH_MAX];
1538 int err;
1539
1540 /*
1541 * Currently, one virtio block device is supported for each MIC card
1542 * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
1543 * The signal informs the virtio block backend about a change in the
1544 * configuration file which specifies the virtio backend file name on
1545 * the host. Virtio block backend then re-reads the configuration file
1546 * and switches to the new block device. This signalling mechanism may
1547 * not be required once multiple virtio block devices are supported by
1548 * the MIC daemon.
1549 */
1550 sigaction(SIGUSR1, &ignore, NULL);
1551
1552 mic->pid = fork();
1553 switch (mic->pid) {
1554 case 0:
1555 set_log_buf_info(mic);
1556 set_cmdline(mic);
1557 add_virtio_device(mic, &virtcons_dev_page.dd);
1558 add_virtio_device(mic, &virtnet_dev_page.dd);
1559 err = pthread_create(&mic->mic_console.console_thread, NULL,
1560 virtio_console, mic);
1561 if (err)
1562 mpsslog("%s virtcons pthread_create failed %s\n",
1563 mic->name, strerror(err));
1564 err = pthread_create(&mic->mic_net.net_thread, NULL,
1565 virtio_net, mic);
1566 if (err)
1567 mpsslog("%s virtnet pthread_create failed %s\n",
1568 mic->name, strerror(err));
1569 err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
1570 virtio_block, mic);
1571 if (err)
1572 mpsslog("%s virtblk pthread_create failed %s\n",
1573 mic->name, strerror(err));
1574 sigemptyset(&act.sa_mask);
1575 err = sigaction(SIGUSR1, &act, NULL);
1576 if (err)
1577 mpsslog("%s sigaction SIGUSR1 failed %s\n",
1578 mic->name, strerror(errno));
1579 while (1)
1580 sleep(60);
1581 case -1:
1582 mpsslog("fork failed MIC name %s id %d errno %d\n",
1583 mic->name, mic->id, errno);
1584 break;
1585 default:
1586 if (mic->restart) {
1587 snprintf(buffer, PATH_MAX, "boot");
1588 setsysfs(mic->name, "state", buffer);
1589 mpsslog("%s restarting mic %d\n",
1590 mic->name, mic->restart);
1591 mic->restart = 0;
1592 }
1593 pthread_create(&mic->config_thread, NULL, mic_config, mic);
1594 }
1595}
1596
1597static void
1598start_daemon(void)
1599{
1600 struct mic_info *mic;
1601
1602 for (mic = mic_list.next; mic != NULL; mic = mic->next)
1603 init_mic(mic);
1604
1605 while (1)
1606 sleep(60);
1607}
1608
1609static int
1610init_mic_list(void)
1611{
1612 struct mic_info *mic = &mic_list;
1613 struct dirent *file;
1614 DIR *dp;
1615 int cnt = 0;
1616
1617 dp = opendir(MICSYSFSDIR);
1618 if (!dp)
1619 return 0;
1620
1621 while ((file = readdir(dp)) != NULL) {
1622 if (!strncmp(file->d_name, "mic", 3)) {
1623 mic->next = malloc(sizeof(struct mic_info));
1624 if (mic->next) {
1625 mic = mic->next;
1626 mic->next = NULL;
1627 memset(mic, 0, sizeof(struct mic_info));
1628 mic->id = atoi(&file->d_name[3]);
1629 mic->name = malloc(strlen(file->d_name) + 16);
1630 if (mic->name)
1631 strcpy(mic->name, file->d_name);
1632 mpsslog("MIC name %s id %d\n", mic->name,
1633 mic->id);
1634 cnt++;
1635 }
1636 }
1637 }
1638
1639 closedir(dp);
1640 return cnt;
1641}
1642
1643void
1644mpsslog(char *format, ...)
1645{
1646 va_list args;
1647 char buffer[4096];
1648 char ts[52], *ts1;
1649 time_t t;
1650
1651 if (logfp == NULL)
1652 return;
1653
1654 va_start(args, format);
1655 vsprintf(buffer, format, args);
1656 va_end(args);
1657
1658 time(&t);
1659 ts1 = ctime_r(&t, ts);
1660 ts1[strlen(ts1) - 1] = '\0';
1661 fprintf(logfp, "%s: %s", ts1, buffer);
1662
1663 fflush(logfp);
1664}
1665
1666int
1667main(int argc, char *argv[])
1668{
1669 int cnt;
1670 pid_t pid;
1671
1672 myname = argv[0];
1673
1674 logfp = fopen(LOGFILE_NAME, "a+");
1675 if (!logfp) {
1676 fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
1677 exit(1);
1678 }
1679 pid = fork();
1680 switch (pid) {
1681 case 0:
1682 break;
1683 case -1:
1684 exit(2);
1685 default:
1686 exit(0);
1687 }
1688
1689 mpsslog("MIC Daemon start\n");
1690
1691 cnt = init_mic_list();
1692 if (cnt == 0) {
1693 mpsslog("MIC module not loaded\n");
1694 exit(3);
1695 }
1696 mpsslog("MIC found %d devices\n", cnt);
1697
1698 start_daemon();
1699
1700 exit(0);
1701}
diff --git a/Documentation/mic/mpssd/mpssd.h b/Documentation/mic/mpssd/mpssd.h
new file mode 100644
index 000000000000..b6dee38ca5b1
--- /dev/null
+++ b/Documentation/mic/mpssd/mpssd.h
@@ -0,0 +1,100 @@
1/*
2 * Intel MIC Platform Software Stack (MPSS)
3 *
4 * Copyright(c) 2013 Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * The full GNU General Public License is included in this distribution in
16 * the file called "COPYING".
17 *
18 * Intel MIC User Space Tools.
19 */
20#ifndef _MPSSD_H_
21#define _MPSSD_H_
22
23#include <stdio.h>
24#include <stdlib.h>
25#include <string.h>
26#include <fcntl.h>
27#include <unistd.h>
28#include <dirent.h>
29#include <libgen.h>
30#include <pthread.h>
31#include <stdarg.h>
32#include <time.h>
33#include <errno.h>
34#include <sys/dir.h>
35#include <sys/ioctl.h>
36#include <sys/poll.h>
37#include <sys/types.h>
38#include <sys/socket.h>
39#include <sys/stat.h>
40#include <sys/types.h>
41#include <sys/mman.h>
42#include <sys/utsname.h>
43#include <sys/wait.h>
44#include <netinet/in.h>
45#include <arpa/inet.h>
46#include <netdb.h>
47#include <pthread.h>
48#include <signal.h>
49#include <limits.h>
50#include <syslog.h>
51#include <getopt.h>
52#include <net/if.h>
53#include <linux/if_tun.h>
54#include <linux/if_tun.h>
55#include <linux/virtio_ids.h>
56
57#define MICSYSFSDIR "/sys/class/mic"
58#define LOGFILE_NAME "/var/log/mpssd"
59#define PAGE_SIZE 4096
60
61struct mic_console_info {
62 pthread_t console_thread;
63 int virtio_console_fd;
64 void *console_dp;
65};
66
67struct mic_net_info {
68 pthread_t net_thread;
69 int virtio_net_fd;
70 int tap_fd;
71 void *net_dp;
72};
73
74struct mic_virtblk_info {
75 pthread_t block_thread;
76 int virtio_block_fd;
77 void *block_dp;
78 volatile sig_atomic_t signaled;
79 char *backend_file;
80 int backend;
81 void *backend_addr;
82 long backend_size;
83};
84
85struct mic_info {
86 int id;
87 char *name;
88 pthread_t config_thread;
89 pid_t pid;
90 struct mic_console_info mic_console;
91 struct mic_net_info mic_net;
92 struct mic_virtblk_info mic_virtblk;
93 int restart;
94 struct mic_info *next;
95};
96
97void mpsslog(char *format, ...);
98char *readsysfs(char *dir, char *entry);
99int setsysfs(char *dir, char *entry, char *value);
100#endif
diff --git a/Documentation/mic/mpssd/sysfs.c b/Documentation/mic/mpssd/sysfs.c
new file mode 100644
index 000000000000..11de72b63386
--- /dev/null
+++ b/Documentation/mic/mpssd/sysfs.c
@@ -0,0 +1,102 @@
1/*
2 * Intel MIC Platform Software Stack (MPSS)
3 *
4 * Copyright(c) 2013 Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * The full GNU General Public License is included in this distribution in
16 * the file called "COPYING".
17 *
18 * Intel MIC User Space Tools.
19 */
20
21#include "mpssd.h"
22
23#define PAGE_SIZE 4096
24
25char *
26readsysfs(char *dir, char *entry)
27{
28 char filename[PATH_MAX];
29 char value[PAGE_SIZE];
30 char *string = NULL;
31 int fd;
32 int len;
33
34 if (dir == NULL)
35 snprintf(filename, PATH_MAX, "%s/%s", MICSYSFSDIR, entry);
36 else
37 snprintf(filename, PATH_MAX,
38 "%s/%s/%s", MICSYSFSDIR, dir, entry);
39
40 fd = open(filename, O_RDONLY);
41 if (fd < 0) {
42 mpsslog("Failed to open sysfs entry '%s': %s\n",
43 filename, strerror(errno));
44 return NULL;
45 }
46
47 len = read(fd, value, sizeof(value));
48 if (len < 0) {
49 mpsslog("Failed to read sysfs entry '%s': %s\n",
50 filename, strerror(errno));
51 goto readsys_ret;
52 }
53 if (len == 0)
54 goto readsys_ret;
55
56 value[len - 1] = '\0';
57
58 string = malloc(strlen(value) + 1);
59 if (string)
60 strcpy(string, value);
61
62readsys_ret:
63 close(fd);
64 return string;
65}
66
67int
68setsysfs(char *dir, char *entry, char *value)
69{
70 char filename[PATH_MAX];
71 char *oldvalue;
72 int fd, ret = 0;
73
74 if (dir == NULL)
75 snprintf(filename, PATH_MAX, "%s/%s", MICSYSFSDIR, entry);
76 else
77 snprintf(filename, PATH_MAX, "%s/%s/%s",
78 MICSYSFSDIR, dir, entry);
79
80 oldvalue = readsysfs(dir, entry);
81
82 fd = open(filename, O_RDWR);
83 if (fd < 0) {
84 ret = errno;
85 mpsslog("Failed to open sysfs entry '%s': %s\n",
86 filename, strerror(errno));
87 goto done;
88 }
89
90 if (!oldvalue || strcmp(value, oldvalue)) {
91 if (write(fd, value, strlen(value)) < 0) {
92 ret = errno;
93 mpsslog("Failed to write new sysfs entry '%s': %s\n",
94 filename, strerror(errno));
95 }
96 }
97 close(fd);
98done:
99 if (oldvalue)
100 free(oldvalue);
101 return ret;
102}