/*
* Copyright (c) 2004-2007 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/completion.h>
#include <linux/dma-mapping.h>
#include <linux/device.h>
#include <linux/err.h>
#include <linux/idr.h>
#include <linux/interrupt.h>
#include <linux/random.h>
#include <linux/rbtree.h>
#include <linux/spinlock.h>
#include <linux/sysfs.h>
#include <linux/workqueue.h>
#include <linux/kdev_t.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_cm.h>
#include "cm_msgs.h"
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("InfiniBand CM");
MODULE_LICENSE("Dual BSD/GPL");
static void cm_add_one(struct ib_device *device);
static void cm_remove_one(struct ib_device *device);
static struct ib_client cm_client = {
.name = "cm",
.add = cm_add_one,
.remove = cm_remove_one
};
static struct ib_cm {
spinlock_t lock;
struct list_head device_list;
rwlock_t device_lock;
struct rb_root listen_service_table;
u64 listen_service_id;
/* struct rb_root peer_service_table; todo: fix peer to peer */
struct rb_root remote_qp_table;
struct rb_root remote_id_table;
struct rb_root remote_sidr_table;
struct idr local_id_table;
__be32 random_id_operand;
struct list_head timewait_list;
struct workqueue_struct *wq;
} cm;
/* Counter indexes ordered by attribute ID */
enum {
CM_REQ_COUNTER,
CM_MRA_COUNTER,
CM_REJ_COUNTER,
CM_REP_COUNTER,
CM_RTU_COUNTER,
CM_DREQ_COUNTER,
CM_DREP_COUNTER,
CM_SIDR_REQ_COUNTER,
CM_SIDR_REP_COUNTER,
CM_LAP_COUNTER,
CM_APR_COUNTER,
CM_ATTR_COUNT,
CM_ATTR_ID_OFFSET = 0x0010,
};
enum {
CM_XMIT,
CM_XMIT_RETRIES,
CM_RECV,
CM_RECV_DUPLICATES,
CM_COUNTER_GROUPS
};
static char const counter_group_names[CM_COUNTER_GROUPS]
[sizeof("cm_rx_duplicates")] = {
"cm_tx_msgs", "cm_tx_retries",
"cm_rx_msgs", "cm_rx_duplicates"
};
struct cm_counter_group {
struct kobject obj;
atomic_long_t counter[CM_ATTR_COUNT];
};
struct cm_counter_attribute {
struct attribute attr;
int index;
};
#define CM_COUNTER_ATTR(_name, _index) \
struct cm_counter_attribute cm_##_name##_counter_attr = { \
.attr = { .name = __stringify(_name), .mode = 0444, .owner = THIS_MODULE }, \
.index = _index \
}
static CM_COUNTER_ATTR(req, CM_REQ_COUNTER);
static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER);
static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER);
static CM_COUNTER_ATTR(rep, CM_REP_COUNTER);
static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER);
static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER);
static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER);
static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER);
static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER);
static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER);
static CM_COUNTER_ATTR(apr, CM_APR_COUNTER);
static struct attribute *cm_counter_default_attrs[] = {
&cm_req_counter_attr.attr,
&cm_mra_counter_attr.attr,
&cm_rej_counter_attr.attr,
&cm_rep_counter_attr.attr,
&cm_rtu_counter_attr.attr,
&cm_dreq_counter_attr.attr,
&cm_drep_counter_attr.attr,
&cm_sidr_req_counter_attr.attr,
&cm_sidr_rep_counter_attr.attr,
&cm_lap_counter_attr.attr,
&cm_apr_counter_attr.attr,
NULL
};
struct cm_port {
struct cm_device *cm_dev;
struct ib_mad_agent *mad_agent;
struct kobject port_obj;
u8 port_num;
struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
};
struct cm_device {
struct list_head list;
struct ib_device *ib_device;
struct device *device;
u8 ack_delay;
struct cm_port *port[0];
};
struct cm_av {
struct cm_port *port;
union ib_gid dgid;
struct ib_ah_attr ah_attr;
u16 pkey_index;
u8 timeout;
};
struct cm_work {
struct delayed_work work;
struct list_head list;
struct cm_port *port;
struct ib_mad_recv_wc *mad_recv_wc; /* Received MADs */
__be32 local_id; /* Established / timewait */
__be32 remote_id;
struct ib_cm_event cm_event;
struct ib_sa_path_rec path[0];
};
struct cm_timewait_info {
struct cm_work work; /* Must be first. */
struct list_head list;
struct rb_node remote_qp_node;
struct rb_node remote_id_node;
__be64 remote_ca_guid;
__be32 remote_qpn;
u8 inserted_remote_qp;
u8 inserted_remote_id;
};
struct cm_id_private {
struct ib_cm_id id;
struct rb_node service_node;
struct rb_node sidr_id_node;
spinlock_t lock; /* Do not acquire inside cm.lock */
struct completion comp;
atomic_t refcount;
struct ib_mad_send_buf *msg;
struct cm_timewait_info *timewait_info;
/* todo: use alternate port on send failure */
struct cm_av av;
struct cm_av alt_av;
struct ib_cm_compare_data *compare_data;
void *private_data;
__be64 tid;
__be32 local_qpn;
__be32 remote_qpn;
enum ib_qp_type qp_type;
__be32 sq_psn;
__be32 rq_psn;
int timeout_ms;
enum ib_mtu path_mtu;
__be16 pkey;
u8 private_data_len;
u8 max_cm_retries;
u8 peer_to_peer;
u8 responder_resources;
u8 initiator_depth;
u8 retry_count;
u8 rnr_retry_count;
u8 service_timeout;
u8 target_ack_delay;
struct list_head work_list;
atomic_t work_count;
};
static void cm_work_handler(struct work_struct *work);
static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
{
if (atomic_dec_and_test(&cm_id_priv->refcount))
complete(&cm_id_priv->comp);
}
static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
struct ib_mad_send_buf **msg)
{
struct ib_mad_agent *mad_agent;
struct ib_mad_send_buf *m;
struct ib_ah *ah;
mad_agent = cm_id_priv->av.port->mad_agent;
ah = ib_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr);
if (IS_ERR(ah))
return PTR_ERR(ah);
m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
cm_id_priv->av.pkey_index,
0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
GFP_ATOMIC);
if (IS_ERR(m)) {
ib_destroy_ah(ah);
return PTR_ERR(m);
}
/* Timeout set by caller if response is expected. */
m->ah = ah;
m->retries = cm_id_priv->max_cm_retries;
atomic_inc(&cm_id_priv->refcount);
m->context[0] = cm_id_priv;
*msg = m;
return 0;
}
static int cm_alloc_response_msg(struct cm_port *port,
struct ib_mad_recv_wc *mad_recv_wc,
struct ib_mad_send_buf **msg)
{
struct ib_mad_send_buf *m;
struct ib_ah *ah;
ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
mad_recv_wc->recv_buf.grh, port->port_num);
if (IS_ERR(ah))
return PTR_ERR(ah);
m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
GFP_ATOMIC);
if (IS_ERR(m)) {
ib_destroy_ah(ah);
return PTR_ERR(m);
}
m->ah = ah;
*msg = m;
return 0;
}
static void cm_free_msg(struct ib_mad_send_buf *msg)
{
ib_destroy_ah(msg->ah);
if (msg->context[0])
cm_deref_id(msg->context[0]);
ib_free_send_mad(msg);
}
static void * cm_copy_private_data(const void *private_data,
u8 private_data_len)
{
void *data;
if (!private_data || !private_data_len)
return NULL;
data = kmemdup(private_data, private_data_len, GFP_KERNEL);
if (!data)
return ERR_PTR(-ENOMEM);
return data;
}
static void cm_set_private_data(struct cm_id_private *cm_id_priv,
void *private_data, u8 private_data_len)
{
if (cm_id_priv->private_data && cm_id_priv->private_data_len)
kfree(cm_id_priv->private_data);
cm_id_priv->private_data = private_data;
cm_id_priv->private_data_len = private_data_len;
}
static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
struct ib_grh *grh, struct cm_av *av)
{
av->port = port;
av->pkey_index = wc->pkey_index;
ib_init_ah_from_wc(port->cm_dev->ib_device, port->port_num, wc,
grh, &av->ah_attr);
}
static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
{
struct cm_device *cm_dev;
struct cm_port *port = NULL;
unsigned long flags;
int ret;
u8 p;
read_lock_irqsave(&cm.device_lock, flags);
list_for_each_entry(cm_dev, &cm.device_list, list) {
if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
&p, NULL)) {
port = cm_dev->port[p-1];
break;
}
}
read_unlock_irqrestore(&cm.device_lock, flags);
if (!port)
return -EINVAL;
ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
be16_to_cpu(path->pkey), &av->pkey_index);
if (ret)
return ret;
av->port = port;
ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
&av->ah_attr);
av->timeout = path->packet_life_time + 1;
return 0;
}
static int cm_alloc_id(struct cm_id_private *cm_id_priv)
{
unsigned long flags;
int ret, id;
static int next_id;
do {
spin_lock_irqsave(&cm.lock, flags);
ret = idr_get_new_above(&cm.local_id_table, cm_id_priv,
next_id, &id);
if (!ret)
next_id = ((unsigned) id + 1) & MAX_ID_MASK;
spin_unlock_irqrestore(&cm.lock, flags);