/*
* VMware vSockets Driver
*
* Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation version 2 and no later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <linux/types.h>
#include <linux/bitops.h>
#include <linux/cred.h>
#include <linux/init.h>
#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/kmod.h>
#include <linux/list.h>
#include <linux/miscdevice.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/net.h>
#include <linux/poll.h>
#include <linux/skbuff.h>
#include <linux/smp.h>
#include <linux/socket.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/wait.h>
#include <linux/workqueue.h>
#include <net/sock.h>
#include "af_vsock.h"
#include "vmci_transport_notify.h"
static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg);
static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg);
static void vmci_transport_peer_attach_cb(u32 sub_id,
const struct vmci_event_data *ed,
void *client_data);
static void vmci_transport_peer_detach_cb(u32 sub_id,
const struct vmci_event_data *ed,
void *client_data);
static void vmci_transport_recv_pkt_work(struct work_struct *work);
static int vmci_transport_recv_listen(struct sock *sk,
struct vmci_transport_packet *pkt);
static int vmci_transport_recv_connecting_server(
struct sock *sk,
struct sock *pending,
struct vmci_transport_packet *pkt);
static int vmci_transport_recv_connecting_client(
struct sock *sk,
struct vmci_transport_packet *pkt);
static int vmci_transport_recv_connecting_client_negotiate(
struct sock *sk,
struct vmci_transport_packet *pkt);
static int vmci_transport_recv_connecting_client_invalid(
struct sock *sk,
struct vmci_transport_packet *pkt);
static int vmci_transport_recv_connected(struct sock *sk,
struct vmci_transport_packet *pkt);
static bool vmci_transport_old_proto_override(bool *old_pkt_proto);
static u16 vmci_transport_new_proto_supported_versions(void);
static bool vmci_transport_proto_to_notify_struct(struct sock *sk, u16 *proto,
bool old_pkt_proto);
struct vmci_transport_recv_pkt_info {
struct work_struct work;
struct sock *sk;
struct vmci_transport_packet pkt;
};
static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID,
VMCI_INVALID_ID };
static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
static int PROTOCOL_OVERRIDE = -1;
#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN 128
#define VMCI_TRANSPORT_DEFAULT_QP_SIZE 262144
#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX 262144
/* The default peer timeout indicates how long we will wait for a peer response
* to a control message.
*/
#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
#define SS_LISTEN 255
/* Helper function to convert from a VMCI error code to a VSock error code. */
static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
{
int err;
switch (vmci_error) {
case VMCI_ERROR_NO_MEM:
err = ENOMEM;
break;
case VMCI_ERROR_DUPLICATE_ENTRY:
case VMCI_ERROR_ALREADY_EXISTS:
err = EADDRINUSE;
break;
case VMCI_ERROR_NO_ACCESS:
err = EPERM;
break;
case VMCI_ERROR_NO_RESOURCES:
err = ENOBUFS;
break;
case VMCI_ERROR_INVALID_RESOURCE:
err = EHOSTUNREACH;
break;
case VMCI_ERROR_INVALID_ARGS:
default:
err = EINVAL;
}
return err > 0 ? -err : err;
}
static u32 vmci_transport_peer_rid(u32 peer_cid)
{
if (VMADDR_CID_HYPERVISOR == peer_cid)
return VMCI_TRANSPORT_HYPERVISOR_PACKET_RID;
return VMCI_TRANSPORT_PACKET_RID;
}
static inline void
vmci_transport_packet_init(struct vmci_transport_packet *pkt,
struct sockaddr_vm *src,
struct sockaddr_vm *dst,
u8 type,
u64 size,
u64 mode,
struct vmci_transport_waiting_info *wait,
u16 proto,
struct vmci_handle handle)
{
/* We register the stream control handler as an any cid handle so we
* must always send from a source address of VMADDR_CID_ANY
*/
pkt->dg.src = vmci_make_handle(VMADDR_CID_ANY,
VMCI_TRANSPORT_PACKET_RID);
pkt->dg.dst = vmci_make_handle(dst->svm_cid,
vmci_transport_peer_rid(dst->svm_cid));
pkt->dg.payload_size = sizeof(*pkt) - sizeof(pkt->dg);
pkt->version = VMCI_TRANSPORT_PACKET_VERSION;
pkt->type = type;
pkt->src_port = src->svm_port;
pkt->dst_port = dst->svm_port;
memset(&pkt->proto, 0, sizeof(pkt->proto));
memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2));
switch (pkt->type) {
case VMCI_TRANSPORT_PACKET_TYPE_INVALID:
pkt->u.size = 0;
break;
case VMCI_TRANSPORT_PACKET_TYPE_REQUEST:
case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE:
pkt->u.size = size;
break;
case VMCI_TRANSPORT_PACKET_TYPE_OFFER:
case VMCI_TRANSPORT_PACKET_TYPE_ATTACH:
pkt->u.handle = handle;
break;
case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
case VMCI_TRANSPORT_PACKET_TYPE_READ:
case VMCI_TRANSPORT_PACKET_TYPE_RST:
pkt->u.size = 0;
break;
case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN:
pkt->u.mode = mode;
break;
case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ:
case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE:
memcpy(&pkt->u.wait, wait, sizeof(pkt->u.wait));
break;
case VMCI_TRANSPORT_PACKET_TYPE_REQUEST2:
case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2:
pkt->u.size = size;
pkt->proto = proto;
break;
}
}
static inline void
vmci_transport_packet_get_addresses(struct vmci_transport_packet *pkt,
struct sockaddr_vm *local,
struct sockaddr_vm *remote)
{
vsock_addr_init(local, pkt->dg.dst.context, pkt->dst_port);
vsock_addr_init(remote, pkt->dg.src.context, pkt->src_port);
}
static int
__vmci_transport_send_control_pkt(struct vmci_transport_packet *pkt,
struct sockaddr_vm *src,
struct sockaddr_vm *dst,
enum vmci_transport_packet_type type,
u64 size,
u64 mode,
struct vmci_transport_waiting_info *wait,
u16 proto,
struct vmci_handle handle,
bool convert_error)
{
int err;
vmci_transport_packet_init(pkt, src, dst, type, size, mode, wait,
proto, handle);
err = vmci_datagram_send(&pkt->dg);
if (convert_error && (err < 0))
return vmci_transport_error_to_vsock_error(err);
return err;
}
static int
vmci_transport_reply_control_pkt_fast(struct vmci_transport_packet *pkt,
enum vmci_transport_packet_type type,
u64 size,
u64 mode,
struct vmci_transport_waiting_info *wait,
struct vmci_handle handle)
{
struct vmci_transport_packet reply;
struct sockaddr_vm src, dst;
if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) {
return 0;
} else {
vmci_transport_packet_get_addresses(pkt, &src, &dst);
return __vmci_transport_send_control_pkt(&reply, &src, &dst,
type,
size, mode, wait,
VSOCK_PROTO_INVALID,
handle, true);
}
}
static int
vmci_transport_send_control_pkt_bh(struct sockaddr_vm *src,
struct sockaddr_vm *dst,
enum vmci_transport_packet_type type,
u64 size,
u64 mode,
struct vmci_transport_waiting_info *wait,
struct vmci_handle handle)
{
/* Note that it is safe to use a single packet across all CPUs since
* two tasklets of the same type are guaranteed to not ever run
* simultaneously. If that ever changes, or VMCI stops using tasklets,
* we can use per-cpu packets.
*/
static struct vmci_transport_packet pkt;
return __vmci_transport_send_control_pkt(&pkt, src, dst, type,
size, mode, wait,
VSOCK_PROTO_INVALID, handle,
false);
}
static int
vmci_transport_send_control_pkt(struct sock *sk,
enum vmci_transport_packet_type type,
u64 size,
u64 mode,
struct vmci_transport_waiting_info *wait,
u16 proto,
struct vmci_handle handle)
{
struct vmci_transport_packet *pkt;
struct vsock_sock *vsk;
int err;
vsk = vsock_sk(sk);
if (!vsock_addr_bound(&vsk->local_addr))
return -EINVAL;
if (!vsock_addr_bound(&vsk->remote_addr))
return -EINVAL;
pkt = kmalloc(sizeof(*pkt), GFP_KERNEL);
if (!pkt)
return -ENOMEM;
err = __vmci_transport_send_control_pkt(pkt, &vsk->local_addr,
&vsk->remote_addr, type, size,
mode, wait, proto, handle,
true);
kfree(pkt);
return err;
}
static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst,
struct sockaddr_vm *src,
struct vmci_transport_packet *pkt)
{
if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST)
return 0;
return vmci_transport_send_control_pkt_bh(
dst, src,
VMCI_TRANSPORT_PACKET_TYPE_RST, 0,
0, NULL, VMCI_INVALID_HANDLE);
}
static int vmci_transport_send_reset(struct sock *sk,
struct vmci_transport_packet *pkt)
{
if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST)
return 0;
return vmci_transport_send_control_pkt(sk,
VMCI_TRANSPORT_PACKET_TYPE_RST,
0, 0, NULL, VSOCK_PROTO_INVALID,
VMCI_INVALID_HANDLE);
}
static int vmci_transport_send_negotiate(struct sock *sk, size_t size)
{
return vmci_transport_send_control_pkt(
sk,
VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE,
size, 0, NULL,
VSOCK_PROTO_INVALID,
VMCI_INVALID_HANDLE);
}
static int vmci_transport_send_negotiate2(struct sock *sk, size_t size,
u16 version)
{
return vmci_transport_send_control_pkt(
sk,
VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2,
size, 0, NULL, version,
VMCI_INVALID_HANDLE);
}
static int vmci_transport_send_qp_offer(struct sock *sk,
struct vmci_handle handle)
{
return vmci_transport_send_control_pkt(
sk, VMCI_TRANSPORT_PACKET_TYPE_OFFER, 0,
0, NULL,
VSOCK_PROTO_INVALID, handle);
}
static int vmci_transport_send_attach(struct sock *sk,
struct vmci_handle handle)
{
return vmci_transport_send_control_pkt(
sk, VMCI_TRANSPORT_PACKET_TYPE_ATTACH,
0, 0, NULL, VSOCK_PROTO_INVALID,
handle);
}
static int vmci_transport_reply_reset(struct vmci_transport_packet *pkt)
{
return vmci_transport_reply_control_pkt_fast(
pkt,
VMCI_TRANSPORT_PACKET_TYPE_RST,
0, 0, NULL,
VMCI_INVALID_HANDLE);
}
static int vmci_transport_send_invalid_bh(struct sockaddr_vm *dst,
struct sockaddr_vm *src)
{
return vmci_transport_send_control_pkt_bh(
dst, src,
VMCI_TRANSPORT_PACKET_TYPE_INVALID,
0, 0, NULL, VMCI_INVALID_HANDLE);
}
int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst,
struct sockaddr_vm *src)
{
return vmci_transport_send_control_pkt_bh(
dst, src,
VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0,
0, NULL, VMCI_INVALID_HANDLE);
}
int vmci_transport_send_read_bh(struct sockaddr_vm *dst,
struct sockaddr_vm *src)
{
return vmci_transport_send_control_pkt_bh(
dst, src,
VMCI_TRANSPORT_PACKET_TYPE_READ, 0,
0, NULL, VMCI_INVALID_HANDLE);
}
int vmci_transport_send_wrote(struct sock *sk)
{
return vmci_transport_send_control_pkt(
sk, VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0,
0, NULL, VSOCK_PROTO_INVALID,
VMCI_INVALID_HANDLE);
}
int vmci_transport_send_read(struct sock *sk)
{
return vmci_transport_send_control_pkt(
sk, VMCI_TRANSPORT_PACKET_TYPE_READ, 0,
0, NULL, VSOCK_PROTO_INVALID,
VMCI_INVALID_HANDLE);
}
int vmci_transport_send_waiting_write(struct sock *sk,
struct vmci_transport_waiting_info *wait)
{
return vmci_transport_send_control_pkt(
sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE,
0, 0, wait, VSOCK_PROTO_INVALID,
VMCI_INVALID_HANDLE);
}
int vmci_transport_send_waiting_read(struct sock *sk,
struct vmci_transport_waiting_info *wait)
{
return vmci_transport_send_control_pkt(
sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ,
0, 0, wait, VSOCK_PROTO_INVALID,
VMCI_INVALID_HANDLE);
}
static int vmci_transport_shutdown(struct vsock_sock *vsk, int mode)
{
return vmci_transport_send_control_pkt(
&vsk->sk,
VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN,
0, mode, NULL,
VSOCK_PROTO_INVALID,
VMCI_INVALID_HANDLE);
}
static int vmci_transport_send_conn_request(struct sock *sk, size_t size)
{
return vmci_transport_send_control_pkt(sk,
VMCI_TRANSPORT_PACKET_TYPE_REQUEST,
size, 0, NULL,
VSOCK_PROTO_INVALID,
VMCI_INVALID_HANDLE);
}
static int vmci_transport_send_conn_request2(struct sock *sk, size_t size,
u16 version)
{
return vmci_transport_send_control_pkt(
sk, VMCI_TRANSPORT_PACKET_TYPE_REQUEST2,
size, 0, NULL, version,
VMCI_INVALID_HANDLE);
}
static struct sock *vmci_transport_get_pending(
struct sock *listener,
struct vmci_transport_packet *pkt)
{
struct vsock_sock *vlistener;
struct vsock_sock *vpending;
struct sock *pending;
struct sockaddr_vm src;
vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port);
vlistener = vsock_sk(listener);
list_for_each_entry(vpending, &vlistener->pending_links,
pending_links) {
if (vsock_addr_equals_addr(&src, &vpending->remote_addr) &&
pkt->dst_port == vpending->local_addr.svm_port) {
pending = sk_vsock(vpending);
sock_hold(pending);
goto found;
}
}
pending = NULL;
found:
return pending;
}
static void vmci_transport_release_pending(struct sock *pending)
{
sock_put(pending);
}
/* We allow two kinds of sockets to communicate with a restricted VM: 1)
* trusted sockets 2) sockets from applications running as the same user as the
* VM (this is only true for the host side and only when using hosted products)
*/
static bool vmci_transport_is_trusted(struct vsock_sock *vsock, u32 peer_cid)
{
return vsock->trusted ||
vmci_is_context_owner(peer_cid, vsock->owner->uid);
}
/* We allow sending datagrams to and receiving datagrams from a restricted VM
* only if it is trusted as described in vmci_transport_is_trusted.
*/
static bool vmci_transport_allow_dgram(struct vsock_sock *vsock, u32 peer_cid)
{
if (VMADDR_CID_HYPERVISOR == peer_cid)
return true;
if (vsock->cached_peer != peer_cid) {
vsock->cached_peer = peer_cid;
if (!vmci_transport_is_trusted(vsock, peer_cid) &&
(vmci_context_get_priv_flags(peer_cid) &
VMCI_PRIVILEGE_FLAG_RESTRICTED)) {
vsock->cached_peer_allow_dgram = false;
} else {
vsock->cached_peer_allow_dgram = true;
}
}
return vsock->cached_peer_allow_dgram;
}
static int
vmci_transport_queue_pair_alloc(struct vmci_qp **qpair,
struct vmci_handle *handle,
u64 produce_size,
u64 consume_size,
u32 peer, u32 flags, bool trusted)
{
int err = 0;
if (trusted) {
/* Try to allocate our queue pair as trusted. This will only
* work if vsock is running in the host.
*/
err = vmci_qpair_alloc(qpair, handle, produce_size,
consume_size,
peer, flags,
VMCI_PRIVILEGE_FLAG_TRUSTED);
if (err != VMCI_ERROR_NO_ACCESS)
goto out;
}
err = vmci_qpair_alloc(qpair, handle, produce_size, consume_size,
peer, flags, VMCI_NO_PRIVILEGE_FLAGS);
out: