/*
* net/key/af_key.c An implementation of PF_KEYv2 sockets.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Maxim Giryaev <gem@asplinux.ru>
* David S. Miller <davem@redhat.com>
* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
* Kunihiro Ishiguro <kunihiro@ipinfusion.com>
* Kazunori MIYAZAWA / USAGI Project <miyazawa@linux-ipv6.org>
* Derek Atkins <derek@ihtfp.com>
*/
#include <linux/capability.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/socket.h>
#include <linux/pfkeyv2.h>
#include <linux/ipsec.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/proc_fs.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/xfrm.h>
#include <net/sock.h>
#define _X2KEY(x) ((x) == XFRM_INF ? 0 : (x))
#define _KEY2X(x) ((x) == 0 ? XFRM_INF : (x))
static int pfkey_net_id __read_mostly;
struct netns_pfkey {
/* List of all pfkey sockets. */
struct hlist_head table;
atomic_t socks_nr;
};
static DEFINE_MUTEX(pfkey_mutex);
#define DUMMY_MARK 0
static const struct xfrm_mark dummy_mark = {0, 0};
struct pfkey_sock {
/* struct sock must be the first member of struct pfkey_sock */
struct sock sk;
int registered;
int promisc;
struct {
uint8_t msg_version;
uint32_t msg_portid;
int (*dump)(struct pfkey_sock *sk);
void (*done)(struct pfkey_sock *sk);
union {
struct xfrm_policy_walk policy;
struct xfrm_state_walk state;
} u;
struct sk_buff *skb;
} dump;
};
static inline struct pfkey_sock *pfkey_sk(struct sock *sk)
{
return (struct pfkey_sock *)sk;
}
static int pfkey_can_dump(const struct sock *sk)
{
if (3 * atomic_read(&sk->sk_rmem_alloc) <= 2 * sk->sk_rcvbuf)
return 1;
return 0;
}
static void pfkey_terminate_dump(struct pfkey_sock *pfk)
{
if (pfk->dump.dump) {
if (pfk->dump.skb) {
kfree_skb(pfk->dump.skb);
pfk->dump.skb = NULL;
}
pfk->dump.done(pfk);
pfk->dump.dump = NULL;
pfk->dump.done = NULL;
}
}
static void pfkey_sock_destruct(struct sock *sk)
{
struct net *net = sock_net(sk);
struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
pfkey_terminate_dump(pfkey_sk(sk));
skb_queue_purge(&sk->sk_receive_queue);
if (!sock_flag(sk, SOCK_DEAD)) {
pr_err("Attempt to release alive pfkey socket: %p\n", sk);
return;
}
WARN_ON(atomic_read(&sk->sk_rmem_alloc));
WARN_ON(atomic_read(&sk->sk_wmem_alloc));
atomic_dec(&net_pfkey->socks_nr);
}
static const struct proto_ops pfkey_ops;
static void pfkey_insert(struct sock *sk)
{
struct net *net = sock_net(sk);
struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
mutex_lock(&pfkey_mutex);
sk_add_node_rcu(sk, &net_pfkey->table);
mutex_unlock(&pfkey_mutex);
}
static void pfkey_remove(struct sock *sk)
{
mutex_lock(&pfkey_mutex);
sk_del_node_init_rcu(sk);
mutex_unlock(&pfkey_mutex);
}
static struct proto key_proto = {
.name = "KEY",
.owner = THIS_MODULE,
.obj_size = sizeof(struct pfkey_sock),
};
static int pfkey_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
struct sock *sk;
int err;
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (sock->type != SOCK_RAW)
return -ESOCKTNOSUPPORT;
if (protocol != PF_KEY_V2)
return -EPROTONOSUPPORT;
err = -ENOMEM;
sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto);
if (sk == NULL)
goto out;
sock->ops = &pfkey_ops;
sock_init_data(sock, sk);
sk->sk_family = PF_KEY;
sk->sk_destruct = pfkey_sock_destruct;
atomic_inc(&net_pfkey->socks_nr);
pfkey_insert(sk);
return 0;
out:
return err;
}
static int pfkey_release(struct socket *sock)
{
struct sock *sk = sock->sk;
if (!sk)
return 0;
pfkey_remove(sk);
sock_orphan(sk);
sock->sk = NULL;
skb_queue_purge(&sk->sk_write_queue);
synchronize_rcu();
sock_put(sk);
return 0;
}
static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2,
gfp_t allocation, struct sock *sk)
{
int err = -ENOBUFS;
sock_hold(sk);
if (*skb2 == NULL) {
if (atomic_read(&skb->users) != 1) {
*skb2 = skb_clone(skb, allocation);
} else {
*skb2 = skb;
atomic_inc(&skb->users);
}
}
if (*skb2 != NULL) {
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) {
skb_set_owner_r(*skb2, sk);
skb_queue_tail(&sk->sk_receive_queue, *skb2);
sk->sk_data_ready(sk, (*skb2)->len);
*skb2 = NULL;
err = 0;
}
}
sock_put(sk);
return err;
}
/* Send SKB to all pfkey sockets matching selected criteria. */
#define BROADCAST_ALL 0
#define BROADCAST_ONE 1
#define BROADCAST_REGISTERED 2
#define BROADCAST_PROMISC_ONLY 4
static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
int broadcast_flags, struct sock *one_sk,
struct net *net)
{
struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
struct sock *sk;
struct sk_buff *skb2 = NULL;
int err = -ESRCH;
/* XXX Do we need something like netlink_overrun? I think
* XXX PF_KEY socket apps will not mind current behavior.
*/
if (!skb)
return -ENOMEM;
rcu_read_lock();
sk_for_each_rcu(sk, &net_pfkey->table) {
struct pfkey_sock *pfk = pfkey_sk(sk);
int err2;
/* Yes, it means that if you are meant to receive this
* pfkey message you receive it twice as promiscuous
* socket.
*/
if (pfk->promisc)
pfkey_broadcast_one(skb, &skb2, allocation, sk);
/* the exact target will be processed later */
if (sk == one_sk)
continue;
if (broadcast_flags != BROADCAST_ALL) {
if (broadcast_flags & BROADCAST_PROMISC_ONLY)
continue;
if ((broadcast_flags & BROADCAST_REGISTERED) &&
!pfk->registered)
continue;
if (broadcast_flags & BROADCAST_ONE)
continue;
}
err2 = pfkey_broadcast_one(skb, &skb2, allocation, sk);
/* Error is cleare after succecful sending to at least one
* registered KM */
if ((broadcast_flags & BROADCAST_REGISTERED) && err)
err = err2;
}
rcu_read_unlock();
if (one_sk != NULL)
err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk);
kfree_skb(skb2);
kfree_skb(skb);
return err;
}
static int pfkey_do_dump(struct pfkey_sock *pfk)
{
struct sadb_msg *hdr;
int rc;
rc = pfk->dump.dump(pfk);
if (rc == -ENOBUFS)
return 0;
if (pfk->dump.skb) {
if (!pfkey_can_dump(&pfk->sk))
return 0;
hdr = (struct sadb_msg *) pfk->dump.skb->data;
hdr->sadb_msg_seq = 0;
hdr->sadb_msg_errno = rc;
pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE,
&pfk->sk, sock_net(&pfk->sk));
pfk->dump.skb = NULL;
}
pfkey_terminate_dump(pfk);
return rc;
}
static inline void pfkey_hdr_dup(struct sadb_msg *new,
const struct sadb_msg *orig)
{
*new = *orig;
}
static int pfkey_error(const struct sadb_msg *orig, int err, struct sock *sk)
{
struct sk_buff *skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL);
struct sadb_msg *hdr;
if (!skb)
return -ENOBUFS;
/* Woe be to the platform trying to support PFKEY yet
* having normal errnos outside the 1-255 range, inclusive.
*/
err = -err;
if (err == ERESTARTSYS ||
err == ERESTARTNOHAND ||
err == ERESTARTNOINTR)
err = EINTR;
if (err >= 512)
err = EINVAL;
BUG_ON(err <= 0 || err >= 256);
hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
pfkey_hdr_dup(hdr, orig);
hdr->sadb_msg_errno = (uint8_t) err;
hdr->sadb_msg_len = (sizeof(struct sadb_msg) /
sizeof(uint64_t));
pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ONE, sk, sock_net(sk));
return 0;
}
static const u8 sadb_ext_min_len[] = {
[SADB_EXT_RESERVED] = (u8) 0,
[SADB_EXT_SA] = (u8) sizeof(struct sadb_sa),
[SADB_EXT_LIFETIME_CURRENT] = (u8) sizeof(struct sadb_lifetime),
[SADB_EXT_LIFETIME_HARD] = (u8) sizeof(struct sadb_lifetime),
[SADB_EXT_LIFETIME_SOFT] = (u8) sizeof(struct sadb_lifetime),
[SADB_EXT_ADDRESS_SRC] = (u8) sizeof(struct sadb_address),
[SADB_EXT_ADDRESS_DST] = (u8) sizeof(struct sadb_address),
[SADB_EXT_ADDRESS_PROXY] = (u8) sizeof(struct sadb_address),
[SADB_EXT_KEY_AUTH] = (u8) sizeof(struct sadb_key),
[SADB_EXT_KEY_ENCRYPT] = (u8) sizeof(struct sadb_key),
[SADB_EXT_IDENTITY_SRC] = (u8) sizeof(struct sadb_ident),
[SADB_EXT_IDENTITY_DST] = (u8) sizeof(struct sadb_ident),
[SADB_EXT_SENSITIVITY] = (u8) sizeof(struct sadb_sens),
[SADB_EXT_PROPOSAL] = (u8) sizeof(struct sadb_prop),
[SADB_EXT_SUPPORTED_AUTH] = (u8) sizeof(struct sadb_supported),
[SADB_EXT_SUPPORTED_ENCRYPT] = (u8) sizeof(struct sadb_supported),
[SADB_EXT_SPIRANGE] = (u8) sizeof(struct sadb_spirange),
[SADB_X_EXT_KMPRIVATE] = (u8) sizeof(struct sadb_x_kmprivate),
[SADB_X_EXT_POLICY] = (u8) sizeof(struct sadb_x_policy),
[SADB_X_EXT_SA2] = (u8) sizeof(struct sadb_x_sa2),
[SADB_X_EXT_NAT_T_TYPE] = (u8) sizeof(struct sadb_x_nat_t_type),
[SADB_X_EXT_NAT_T_SPORT] = (u8) sizeof(struct sadb_x_nat_t_port),
[SADB_X_EXT_NAT_T_DPORT] = (u8) sizeof(struct sadb_x_nat_t_port),
[SADB_X_EXT_NAT_T_OA] = (u8) sizeof(struct sadb_address),
[SADB_X_EXT_SEC_CTX] = (u8) sizeof(struct sadb_x_sec_ctx),
[SADB_X_EXT_KMADDRESS] = (u8) sizeof(struct sadb_x_kmaddress),
};
/* Verify sadb_address_{len,prefixlen} against sa_family. */
static int verify_address_len(const void *p)
{
const struct sadb_address *sp = p;
const struct sockaddr *addr = (const struct sockaddr *)(sp + 1);
const struct sockaddr_in *sin;
#if IS_ENABLED(CONFIG_IPV6)
const struct sockaddr_in6 *sin6;
#endif
int len;
switch (addr->sa_family) {
case AF_INET:
len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin), sizeof(uint64_t));
if (sp->sadb_address_len != len ||
sp->sadb_address_prefixlen > 32)
return -EINVAL;
break;
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin6), sizeof(uint64_t));
if (sp->sadb_address_len != len ||
sp->sadb_address_prefixlen > 128)
return -EINVAL;
break;
#endif
default:
/* It is user using kernel to keep track of security
* associations for another protocol, such as
* OSPF/RSVP/RIPV2/MIP. It is user's job to verify
* lengths.
*
* XXX Actually, association/policy database is not yet
* XXX able to cope with arbitrary sockaddr families.
* XXX When it can, remove this -EINVAL. -DaveM
*/
return -EINVAL;
break;
}
return 0;
}
static inline int pfkey_sec_ctx_len(const struct sadb_x_sec_ctx *sec_ctx)
{
return DIV_ROUND_UP(sizeof(struct sadb_x_sec_ctx) +
sec_ctx->sadb_x_ctx_len,
sizeof(uint64_t));
}
static inline int verify_sec_ctx_len(const void *p)
{
const struct sadb_x_sec_ctx *sec_ctx = p;
int len = sec_ctx->sadb_x_ctx_len;
if (len > PAGE_SIZE)
return -EINVAL;
len = pfkey_sec_ctx_len(sec_ctx);
if (sec_ctx->sadb_x_sec_len != len)
return -EINVAL;
return 0;
}
static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(const struct sadb_x_sec_ctx *sec_ctx)
{
struct xfrm_user_sec_ctx *uctx = NULL;
int ctx_size = sec_ctx->sadb_x_ctx_len;
uctx = kmalloc((sizeof(*uctx)+ctx_size), GFP_KERNEL);
if (!uctx)
return NULL;
uctx->len = pfkey_sec_ctx_len(sec_ctx);
uctx->exttype = sec_ctx->sadb_x_sec_exttype;
uctx->ctx_doi = sec_ctx->sadb_x_ctx_doi;
uctx->ctx_alg = sec_ctx->sadb_x_ctx_alg;
uctx->ctx_len = sec_ctx->sadb_x_ctx_len;
memcpy(uctx + 1, sec_ctx + 1,
uctx->ctx_len);
return uctx;
}
static int present_and_same_family(const struct sadb_address *src,
const struct sadb_address *dst)
{
const struct sockaddr *s_addr, *d_addr;
if (!src || !dst)
return 0;
s_addr = (const struct sockaddr *)(src + 1);
d_addr = (const struct sockaddr *)(dst + 1);
if (s_addr->sa_family != d_addr->sa_family)
return 0;
if (s_addr->sa_family != AF_INET
#if IS_ENABLED(CONFIG_IPV6)
&& s_addr->sa_family != AF_INET6
#endif
)
return 0;
return 1;
}
static int parse_exthdrs(struct sk_buff *skb, const struct sadb_msg *hdr, void **ext_hdrs)
{
const char *p = (char *) hdr;
int len = skb->len;
len -= sizeof(*hdr);
p += sizeof(*hdr);
while (len > 0) {
const struct sadb_ext *ehdr = (const struct sadb_ext *) p;
uint16_t ext_type;
int ext_len;
ext_len = ehdr->sadb_ext_len;
ext_len *= sizeof(uint64_t);
ext_type = ehdr->sadb_ext_type;
if (ext_len < sizeof(uint64_t) ||
ext_len > len ||
ext_type == SADB_EXT_RESERVED)
return -EINVAL;
if (ext_type <= SADB_EXT_MAX) {
int min = (int) sadb_ext_min_len[ext_type];
if (ext_len < min)
return -EINVAL;
if (ext_hdrs[ext_type-1] != NULL)
return -EINVAL;
if (ext_type == SADB_EXT_ADDRESS_SRC ||
ext_type == SADB_EXT_ADDRESS_DST ||
ext_type == SADB_EXT_ADDRESS_PROXY ||
ext_type == SADB_X_EXT_NAT_T_OA) {
if (verify_address_len(p))
return -EINVAL;
}
if (ext_type == SADB_X_EXT_SEC_CTX) {
if (verify_sec_ctx_len(p))
return -EINVAL;
}
ext_hdrs[ext_type-1] = (void *) p;
}
p += ext_len;
len -= ext_len;
}
return 0;
}
static uint16_t
pfkey_satype2proto(uint8_t satype)
{
switch (satype) {
case SADB_SATYPE_UNSPEC:
return IPSEC_PROTO_ANY;
case SADB_SATYPE_AH:
return IPPROTO_AH;
case SADB_SATYPE_ESP:
return IPPROTO_ESP;
case SADB_X_SATYPE_IPCOMP:
return IPPROTO_COMP;
break;
default:
return 0;
}
/* NOTREACHED */
}
static uint8_t
pfkey_proto2satype(uint16_t proto)
{
switch (proto) {
case IPPROTO_AH:
return SADB_SATYPE_AH;
case IPPROTO_ESP:
return SADB_SATYPE_ESP;
case IPPROTO_COMP:
return SADB_X_SATYPE_IPCOMP;
break;
default:
return 0;
}
/* NOTREACHED */
}
/* BTW, this scheme means that there is no way with PFKEY2 sockets to
* say specifically 'just raw sockets' as we encode them as 255.
*/
static uint8_t pfkey_proto_to_xfrm(uint8_t proto)
{
return proto == IPSEC_PROTO_ANY ? 0 : proto;
}
static uint8_t pfkey_proto_from_xfrm(uint8_t proto)
{
return proto ? proto : IPSEC_PROTO_ANY;
}
static inline int pfkey_sockaddr_len(sa_family_t family)
{
switch (family) {
case AF_INET:
return sizeof(struct sockaddr_in);
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
return sizeof(struct sockaddr_in6);
#endif
}
return 0;
}
static
int pfkey_sockaddr_extract(const struct sockaddr *sa, xfrm_address_t *xaddr)
{
switch (sa->sa_family) {
case AF_INET:
xaddr->a4 =
((struct sockaddr_in *)sa)->sin_addr.s_addr;
return AF_INET;
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
memcpy(xaddr->a6,
&((struct sockaddr_in6 *)sa)->sin6_addr,
sizeof(struct in6_addr));
return AF_INET6;
#endif
}
return 0;
}
static
int pfkey_sadb_addr2xfrm_addr(const struct sadb_address *addr, xfrm_address_t *xaddr)
{
return pfkey_sockaddr_extract((struct sockaddr *)(addr + 1),
xaddr);
}
static struct xfrm_state *pfkey_xfrm_state_lookup(struct net *net, const struct sadb_msg *hdr, void * const *ext_hdrs)
{
const struct sadb_sa *sa;
const struct sadb_address *addr;
uint16_t proto;
unsigned short family;
xfrm_address_t *xaddr;
sa = ext_hdrs[SADB_EXT_SA - 1];
if (sa == NULL)
return NULL;
proto = pfkey_satype2proto(hdr->sadb_msg_satype);
if (proto == 0)
return NULL;
/* sadb_address_len should be checked by caller */
addr = ext_hdrs[SADB_EXT_ADDRESS_DST - 1];
if (addr == NULL)
return NULL;
family = ((const struct sockaddr *)(addr + 1))->sa_family;
switch (family) {
case AF_INET:
xaddr = (xfrm_address_t *)&((const struct sockaddr_in *)(addr + 1))->sin_addr;
break;
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
xaddr = (xfrm_address_t *)&((const struct sockaddr_in6 *)(addr + 1))->sin6_addr;
break;
#endif
default:
xaddr = NULL;
}
if (!xaddr)
return NULL;
return xfrm_state_lookup(net, DUMMY_MARK, xaddr, sa->sadb_sa_spi, proto, family);
}
#define PFKEY_ALIGN8(a) (1 + (((a) - 1) | (8 - 1)))
static int
pfkey_sockaddr_size(sa_family_t family)
{
return PFKEY_ALIGN8(pfkey_sockaddr_len(family));
}
static inline int pfkey_mode_from_xfrm(int mode)
{
switch(mode) {
case XFRM_MODE_TRANSPORT:
return IPSEC_MODE_TRANSPORT;
case XFRM_MODE_TUNNEL:
return IPSEC_MODE_TUNNEL;
case XFRM_MODE_BEET:
return IPSEC_MODE_BEET;
default:
return -1;
}
}
static inline int pfkey_mode_to_xfrm(int mode)
{
switch(mode) {
case IPSEC_MODE_ANY: /*XXX*/
case IPSEC_MODE_TRANSPORT:
return XFRM_MODE_TRANSPORT;
case IPSEC_MODE_TUNNEL:
return XFRM_MODE_TUNNEL;
|