/*
* Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2004 Infinicon Corporation. All rights reserved.
* Copyright (c) 2004 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004 Voltaire Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#if !defined(IB_VERBS_H)
#define IB_VERBS_H
#include <linux/types.h>
#include <linux/device.h>
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/rwsem.h>
#include <linux/scatterlist.h>
#include <linux/workqueue.h>
#include <linux/socket.h>
#include <linux/irq_poll.h>
#include <uapi/linux/if_ether.h>
#include <net/ipv6.h>
#include <net/ip.h>
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/netdevice.h>
#include <linux/if_link.h>
#include <linux/atomic.h>
#include <linux/mmu_notifier.h>
#include <linux/uaccess.h>
#include <linux/cgroup_rdma.h>
#include <uapi/rdma/ib_user_verbs.h>
#define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN
extern struct workqueue_struct *ib_wq;
extern struct workqueue_struct *ib_comp_wq;
union ib_gid {
u8 raw[16];
struct {
__be64 subnet_prefix;
__be64 interface_id;
} global;
};
extern union ib_gid zgid;
enum ib_gid_type {
/* If link layer is Ethernet, this is RoCE V1 */
IB_GID_TYPE_IB = 0,
IB_GID_TYPE_ROCE = 0,
IB_GID_TYPE_ROCE_UDP_ENCAP = 1,
IB_GID_TYPE_SIZE
};
#define ROCE_V2_UDP_DPORT 4791
struct ib_gid_attr {
enum ib_gid_type gid_type;
struct net_device *ndev;
};
enum rdma_node_type {
/* IB values map to NodeInfo:NodeType. */
RDMA_NODE_IB_CA = 1,
RDMA_NODE_IB_SWITCH,
RDMA_NODE_IB_ROUTER,
RDMA_NODE_RNIC,
RDMA_NODE_USNIC,
RDMA_NODE_USNIC_UDP,
};
enum {
/* set the local administered indication */
IB_SA_WELL_KNOWN_GUID = BIT_ULL(57) | 2,
};
enum rdma_transport_type {
RDMA_TRANSPORT_IB,
RDMA_TRANSPORT_IWARP,
RDMA_TRANSPORT_USNIC,
RDMA_TRANSPORT_USNIC_UDP
};
enum rdma_protocol_type {
RDMA_PROTOCOL_IB,
RDMA_PROTOCOL_IBOE,
RDMA_PROTOCOL_IWARP,
RDMA_PROTOCOL_USNIC_UDP
};
__attribute_const__ enum rdma_transport_type
rdma_node_get_transport(enum rdma_node_type node_type);
enum rdma_network_type {
RDMA_NETWORK_IB,
RDMA_NETWORK_ROCE_V1 = RDMA_NETWORK_IB,
RDMA_NETWORK_IPV4,
RDMA_NETWORK_IPV6
};
static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type network_type)
{
if (network_type == RDMA_NETWORK_IPV4 ||
network_type == RDMA_NETWORK_IPV6)
return IB_GID_TYPE_ROCE_UDP_ENCAP;
/* IB_GID_TYPE_IB same as RDMA_NETWORK_ROCE_V1 */
return IB_GID_TYPE_IB;
}
static inline enum rdma_network_type ib_gid_to_network_type(enum ib_gid_type gid_type,
union ib_gid *gid)
{
if (gid_type == IB_GID_TYPE_IB)
return RDMA_NETWORK_IB;
if (ipv6_addr_v4mapped((struct in6_addr *)gid))
return RDMA_NETWORK_IPV4;
else
return RDMA_NETWORK_IPV6;
}
enum rdma_link_layer {
IB_LINK_LAYER_UNSPECIFIED,
IB_LINK_LAYER_INFINIBAND,
IB_LINK_LAYER_ETHERNET,
};
enum ib_device_cap_flags {
IB_DEVICE_RESIZE_MAX_WR = (1 << 0),
IB_DEVICE_BAD_PKEY_CNTR = (1 << 1),
IB_DEVICE_BAD_QKEY_CNTR = (1 << 2),
IB_DEVICE_RAW_MULTI = (1 << 3),
IB_DEVICE_AUTO_PATH_MIG = (1 << 4),
IB_DEVICE_CHANGE_PHY_PORT = (1 << 5),
IB_DEVICE_UD_AV_PORT_ENFORCE = (1 << 6),
IB_DEVICE_CURR_QP_STATE_MOD = (1 << 7),
IB_DEVICE_SHUTDOWN_PORT = (1 << 8),
/* Not in use, former INIT_TYPE = (1 << 9),*/
IB_DEVICE_PORT_ACTIVE_EVENT = (1 << 10),
IB_DEVICE_SYS_IMAGE_GUID = (1 << 11),
IB_DEVICE_RC_RNR_NAK_GEN = (1 << 12),
IB_DEVICE_SRQ_RESIZE = (1 << 13),
IB_DEVICE_N_NOTIFY_CQ = (1 << 14),
/*
* This device supports a per-device lkey or stag that can be
* used without performing a memory registration for the local
* memory. Note that ULPs should never check this flag, but
* instead of use the local_dma_lkey flag in the ib_pd structure,
* which will always contain a usable lkey.
*/
IB_DEVICE_LOCAL_DMA_LKEY = (1 << 15),
/* Reserved, old SEND_W_INV = (1 << 16),*/
IB_DEVICE_MEM_WINDOW = (1 << 17),
/*
* Devices should set IB_DEVICE_UD_IP_SUM if they support
* insertion of UDP and TCP checksum on outgoing UD IPoIB
* messages and can verify the validity of checksum for
* incoming messages. Setting this flag implies that the
* IPoIB driver may set NETIF_F_IP_CSUM for datagram mode.
*/
IB_DEVICE_UD_IP_CSUM = (1 << 18),
IB_DEVICE_UD_TSO = (1 << 19),
IB_DEVICE_XRC = (1 << 20),
/*
* This device supports the IB "base memory management extension",
* which includes support for fast registrations (IB_WR_REG_MR,
* IB_WR_LOCAL_INV and IB_WR_SEND_WITH_INV verbs). This flag should
* also be set by any iWarp device which must support FRs to comply
* to the iWarp verbs spec. iWarp devices also support the
* IB_WR_RDMA_READ_WITH_INV verb for RDMA READs that invalidate the
* stag.
*/
IB_DEVICE_MEM_MGT_EXTENSIONS = (1 << 21),
IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1 << 22),
IB_DEVICE_MEM_WINDOW_TYPE_2A = (1 << 23),
IB_DEVICE_MEM_WINDOW_TYPE_2B = (1 << 24),
IB_DEVICE_RC_IP_CSUM = (1 << 25),
/* Deprecated. Please use IB_RAW_PACKET_CAP_IP_CSUM. */
IB_DEVICE_RAW_IP_CSUM = (1 << 26),
/*
* Devices should set IB_DEVICE_CROSS_CHANNEL if they
* support execution of WQEs that involve synchronization
* of I/O operations with single completion queue managed
* by hardware.
*/
IB_DEVICE_CROSS_CHANNEL = (1 << 27),
IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29),
IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30),
IB_DEVICE_ON_DEMAND_PAGING = (1ULL << 31),
IB_DEVICE_SG_GAPS_REG = (1ULL << 32),
IB_DEVICE_VIRTUAL_FUNCTION = (1ULL << 33),
/* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */
IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34),
IB_DEVICE_RDMA_NETDEV_OPA_VNIC = (1ULL << 35),
};
enum ib_signature_prot_cap {
IB_PROT_T10DIF_TYPE_1 = 1,
IB_PROT_T10DIF_TYPE_2 = 1 << 1,
IB_PROT_T10DIF_TYPE_3 = 1 << 2,
};
enum ib_signature_guard_cap {
IB_GUARD_T10DIF_CRC = 1,
IB_GUARD_T10DIF_CSUM = 1 << 1,
};
enum ib_atomic_cap {
IB_ATOMIC_NONE,
IB_ATOMIC_HCA,
IB_ATOMIC_GLOB
};
enum ib_odp_general_cap_bits {
IB_ODP_SUPPORT = 1 << 0,
IB_ODP_SUPPORT_IMPLICIT = 1 << 1,
};
enum ib_odp_transport_cap_bits {
IB_ODP_SUPPORT_SEND = 1 << 0,
IB_ODP_SUPPORT_RECV = 1 << 1,
IB_ODP_SUPPORT_WRITE = 1 << 2,
IB_ODP_SUPPORT_READ = 1 << 3,
IB_ODP_SUPPORT_ATOMIC = 1 << 4,
};
struct ib_odp_caps {
uint64_t general_caps;
struct {
uint32_t rc_odp_caps;
uint32_t uc_odp_caps;
uint32_t ud_odp_caps;
} per_transport_caps;
};
struct ib_rss_caps {
/* Corresponding bit will be set if qp type from
* 'enum ib_qp_type' is supported, e.g.
* supported_qpts |= 1 << IB_QPT_UD
*/
u32 supported_qpts;
u32 max_rwq_indirection_tables;
u32 max_rwq_indirection_table_size;
};
enum ib_tm_cap_flags {
/* Support tag matching on RC transport */
IB_TM_CAP_RC = 1 << 0,
};
struct ib_xrq_caps {
/* Max size of RNDV header */
u32 max_rndv_hdr_size;
/* Max number of entries in tag matching list */
u32 max_num_tags;
/* From enum ib_tm_cap_flags */
u32 flags;
/* Max number of outstanding list operations */
u32 max_ops;
/* Max number of SGE in tag matching entry */
u32 max_sge;
};
enum ib_cq_creation_flags {
IB_CQ_FLAGS_TIMESTAMP_COMPLETION = 1 << 0,
IB_CQ_FLAGS_IGNORE_OVERRUN = 1 << 1,
};
struct ib_cq_init_attr {
unsigned int cqe;
int comp_vector;
u32 flags;
};
struct ib_device_attr {
u64 fw_ver;
__be64 sys_image_guid;
u64 max_mr_size;
u64 page_size_cap;
u32 vendor_id;
u32 vendor_part_id;
u32 hw_ver;
int max_qp;
int max_qp_wr;
u64 device_cap_flags;
int max_sge;
int max_sge_rd;
int max_cq;
int max_cqe;
int max_mr;
int max_pd;
int max_qp_rd_atom;
int max_ee_rd_atom;
int max_res_rd_atom;
int max_qp_init_rd_atom;
int max_ee_init_rd_atom;
enum ib_atomic_cap atomic_cap;
enum ib_atomic_cap masked_atomic_cap;
int max_ee;
int max_rdd;
int max_mw;
int max_raw_ipv6_qp;
int max_raw_ethy_qp;
int max_mcast_grp;
int max_mcast_qp_attach;
int max_total_mcast_qp_attach;
int max_ah;
int max_fmr;
int max_map_per_fmr;
int max_srq;
int max_srq_wr;
int max_srq_sge;
unsigned int max_fast_reg_page_list_len;
u16 max_pkeys;
u8 local_ca_ack_delay;
int sig_prot_cap;
int sig_guard_cap;
struct ib_odp_caps odp_caps;
uint64_t timestamp_mask;
uint64_t hca_core_clock; /* in KHZ */
struct ib_rss_caps rss_caps;
u32 max_wq_type_rq;
u32 raw_packet_caps; /* Use ib_raw_packet_caps enum */
struct ib_xrq_caps xrq_caps;
};
enum ib_mtu {
IB_MTU_256 = 1,
IB_MTU_512 = 2,
IB_MTU_1024 = 3,
IB_MTU_2048 = 4,
IB_MTU_4096 = 5
};
static inline int ib_mtu_enum_to_int(enum ib_mtu mtu)
{
switch (mtu) {
case IB_MTU_256: return 256;
case IB_MTU_512: return 512;
case IB_MTU_1024: return 1024;
case IB_MTU_2048: return 2048;
case IB_MTU_4096: return 4096;
default: return -1;
}
}
static inline enum ib_mtu ib_mtu_int_to_enum(int mtu)
{
if (mtu >= 4096)
return IB_MTU_4096;
else if (mtu >= 2048)
return IB_MTU_2048;
else if (mtu >= 1024)
return IB_MTU_1024;
else if (mtu >= 512)
return IB_MTU_512;
else
return IB_MTU_256;
}
enum ib_port_state {
IB_PORT_NOP = 0,
IB_PORT_DOWN = 1,
IB_PORT_INIT = 2,
IB_PORT_ARMED = 3,
IB_PORT_ACTIVE = 4,
IB_PORT_ACTIVE_DEFER = 5
};
enum ib_port_cap_flags {
IB_PORT_SM = 1 << 1,
IB_PORT_NOTICE_SUP = 1 << 2,
IB_PORT_TRAP_SUP = 1 << 3,
IB_PORT_OPT_IPD_SUP = 1 << 4,
IB_PORT_AUTO_MIGR_SUP = 1 << 5,
IB_PORT_SL_MAP_SUP = 1 << 6,
IB_PORT_MKEY_NVRAM = 1 << 7,
IB_PORT_PKEY_NVRAM = 1 << 8,
IB_PORT_LED_INFO_SUP = 1 << 9,
IB_PORT_SM_DISABLED = 1 << 10,
IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11,
IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14,
IB_PORT_CM_SUP = 1 << 16,
IB_PORT_SNMP_TUNNEL_SUP = 1 << 17,
IB_PORT_REINIT_SUP = 1 << 18,
IB_PORT_DEVICE_MGMT_SUP = 1 << 19,
IB_PORT_VENDOR_CLASS_SUP = 1 << 20,
IB_PORT_DR_NOTICE_SUP = 1 << 21,
IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22,
IB_PORT_BOOT_MGMT_SUP = 1 << 23,
IB_PORT_LINK_LATENCY_SUP = 1 << 24,
IB_PORT_CLIENT_REG_SUP = 1 << 25,
IB_PORT_IP_BASED_GIDS = 1 << 26,
};
enum ib_port_width {
IB_WIDTH_1X = 1,
IB_WIDTH_4X = 2,
IB_WIDTH_8X = 4,
IB_WIDTH_12X = 8
};
static inline int ib_width_enum_to_int(enum ib_port_width width)
{
switch (width) {
case IB_WIDTH_1X: return 1;
case IB_WIDTH_4X: return 4;
case IB_WIDTH_8X: return 8;
case IB_WIDTH_12X: return 12;
default: return -1;
}
}
enum ib_port_speed {
IB_SPEED_SDR = 1,
IB_SPEED_DDR = 2,
IB_SPEED_QDR = 4,
IB_SPEED_FDR10 = 8,
IB_SPEED_FDR = 16,
IB_SPEED_EDR = 32,
IB_SPEED_HDR = 64
};
/**
* struct rdma_hw_stats
* @timestamp - Used by the core code to track when the last update was
* @lifespan - Used by the core code to determine how old the counters
* should be before being updated again. Stored in jiffies, defaults
* to 10 milliseconds, drivers can override the default be specifying
* their own value during their allocation routine.
* @name - Array of pointers to static names used for the counters in
* directory.
* @num_counters - How many hardware counters there are. If name is
* shorter than this number, a kernel oops will result. Driver authors
* are encouraged to leave BUILD_BUG_ON(ARRAY_SIZE(@name) < num_counters)
* in their code to prevent this.
* @value - Array of u64 counters that are accessed by the sysfs code and
* filled in by the drivers get_stats routine
*/
struct rdma_hw_stats {
unsigned long timestamp;
unsigned long lifespan;
const char * const *names;
int num_counters;
u64 value[];
};
#define RDMA_HW_STATS_DEFAULT_LIFESPAN 10
/**
* rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct
* for drivers.
* @names - Array of static const char *
* @num_counters - How many elements in array
* @lifespan - How many milliseconds between updates
*/
static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
const char * const *names, int num_counters,
unsigned long lifespan)
{
struct rdma_hw_stats *stats;
stats = kzalloc(sizeof(*stats) + num_counters * sizeof(u64),
GFP_KERNEL);
if (!stats)
return NULL;
stats->names = names;
stats->num_counters = num_counters;
stats->lifespan = msecs_to_jiffies(lifespan);
return stats;
}
/* Define bits for the various functionality this port needs to be supported by
* the core.
*/
/* Management 0x00000FFF */
#define RDMA_CORE_CAP_IB_MAD 0x00000001
#define RDMA_CORE_CAP_IB_SMI 0x00000002
#define RDMA_CORE_CAP_IB_CM 0x00000004
#define RDMA_CORE_CAP_IW_CM 0x00000008
#define RDMA_CORE_CAP_IB_SA 0x00000010
#define RDMA_CORE_CAP_OPA_MAD 0x00000020
/* Address format 0x000FF000 */
#define RDMA_CORE_CAP_AF_IB 0x00001000
#define RDMA_CORE_CAP_ETH_AH 0x00002000
#define RDMA_CORE_CAP_OPA_AH 0x00004000
/* Protocol 0xFFF00000 */
#define RDMA_CORE_CAP_PROT_IB 0x00100000
#define RDMA_CORE_CAP_PROT_ROCE 0x00200000
#define RDMA_CORE_CAP_PROT_IWARP 0x00400000
#define RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP 0x00800000
#define RDMA_CORE_CAP_PROT_RAW_PACKET 0x01000000
#define RDMA_CORE_CAP_PROT_USNIC 0x02000000
#define RDMA_CORE_PORT_IBA_IB (RDMA_CORE_CAP_PROT_IB \
| RDMA_CORE_CAP_IB_MAD \
| RDMA_CORE_CAP_IB_SMI \
| RDMA_CORE_CAP_IB_CM \
| RDMA_CORE_CAP_IB_SA \
| RDMA_CORE_CAP_AF_IB)
#define RDMA_CORE_PORT_IBA_ROCE (RDMA_CORE_CAP_PROT_ROCE \
| RDMA_CORE_CAP_IB_MAD \
| RDMA_CORE_CAP_IB_CM \
| RDMA_CORE_CAP_AF_IB \
| RDMA_CORE_CAP_ETH_AH)
#define RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP \
(RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP \
| RDMA_CORE_CAP_IB_MAD \
| RDMA_CORE_CAP_IB_CM \
| RDMA_CORE_CAP_AF_IB \
| RDMA_CORE_CAP_ETH_AH)
#define RDMA_CORE_PORT_IWARP (RDMA_CORE_CAP_PROT_IWARP \
| RDMA_CORE_CAP_IW_CM)
#define RDMA_CORE_PORT_INTEL_OPA (RDMA_CORE_PORT_IBA_IB \
| RDMA_CORE_CAP_OPA_MAD)
#define RDMA_CORE_PORT_RAW_PACKET (RDMA_CORE_CAP_PROT_RAW_PACKET)
#define RDMA_CORE_PORT_USNIC (RDMA_CORE_CAP_PROT_USNIC)
struct ib_port_attr {
u64 subnet_prefix;
enum ib_port_state state;
enum ib_mtu max_mtu;
enum ib_mtu active_mtu;
int gid_tbl_len;
u32 port_cap_flags;
u32 max_msg_sz;
u32 bad_pkey_cntr;
u32 qkey_viol_cntr;
u16 pkey_tbl_len;
u32 sm_lid;
u32 lid;
u8 lmc;
u8 max_vl_num;
u8 sm_sl;
u8 subnet_timeout;
u8 init_type_reply;
u8 active_width;
u8 active_speed;
u8 phys_state;
bool grh_required;
};
enum ib_device_modify_flags {
IB_DEVICE_MODIFY_SYS_IMAGE_GUID = 1 << 0,
IB_DEVICE_MODIFY_NODE_DESC = 1 << 1
};
#define IB_DEVICE_NODE_DESC_MAX 64
struct ib_device_modify {
u64 sys_image_guid;
char node_desc[IB_DEVICE_NODE_DESC_MAX];
};
enum ib_port_modify_flags {
IB_PORT_SHUTDOWN = 1,
IB_PORT_INIT_TYPE = (1<<2),
IB_PORT_RESET_QKEY_CNTR = (1<<3),
IB_PORT_OPA_MASK_CHG = (1<<4)
};
struct ib_port_modify {
u32 set_port_cap_mask;
u32 clr_port_cap_mask;
u8 init_type;
};
enum ib_event_type {
IB_EVENT_CQ_ERR,
IB_EVENT_QP_FATAL,
IB_EVENT_QP_REQ_ERR,
IB_EVENT_QP_ACCESS_ERR,
IB_EVENT_COMM_EST,
IB_EVENT_SQ_DRAINED,
IB_EVENT_PATH_MIG,
IB_EVENT_PATH_MIG_ERR,
IB_EVENT_DEVICE_FATAL,
IB_EVENT_PORT_ACTIVE,
IB_EVENT_PORT_ERR,
IB_EVENT_LID_CHANGE,
IB_EVENT_PKEY_CHANGE,
IB_EVENT_SM_CHANGE,
IB_EVENT_SRQ_ERR,
IB_EVENT_SRQ_LIMIT_REACHED,
IB_EVENT_QP_LAST_WQE_REACHED,
IB_EVENT_CLIENT_REREGISTER,
IB_EVENT_GID_CHANGE,
IB_EVENT_WQ_FATAL,
};
const char *__attribute_const__ ib_event_msg(enum ib_event_type event);
struct ib_event {
struct ib_device *device;
union {
struct ib_cq *cq;
struct ib_qp *qp;
struct ib_srq *srq;
|