aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/ulp/ipoib/ipoib.h
diff options
context:
space:
mode:
authorMichael S. Tsirkin <mst@mellanox.co.il>2007-02-05 15:12:23 -0500
committerRoland Dreier <rolandd@cisco.com>2007-02-10 11:00:48 -0500
commit839fcaba355abaffb7b44f0f4504093acb0b11cf (patch)
tree9e23f61ab0569ff144e6d9d4cb6a0887783f923c /drivers/infiniband/ulp/ipoib/ipoib.h
parent9a6b090c0d1cd5c90f21db772dbe2fbcf14366de (diff)
IPoIB: Connected mode experimental support
The following patch adds experimental support for IPoIB connected mode, as defined by the draft from the IETF ipoib working group. The idea is to increase performance by increasing the MTU from the maximum of 2K (theoretically 4K) supported by IPoIB on top of UD. With this code, I'm able to get 800MByte/sec or more with netperf without options on a Mellanox 4x back-to-back DDR system. Some notes on code: 1. SRQ is used for scalability to large cluster sizes 2. Only RC connections are used (UC does not support SRQ now) 3. Retry count is set to 0 since spec draft warns against retries 4. Each connection is used for data transfers in only 1 direction, so each connection is either active(TX) or passive (RX). 2 sides that want to communicate create 2 connections. 5. Each active (TX) connection has a separate CQ for send completions - this keeps the code simple without CQ resize and other tricks 6. To detect stale passive side connections (where the remote side is down), we keep an LRU list of passive connections (updated once per second per connection) and destroy a connection after it has been unused for several seconds. The LRU rule makes it possible to avoid scanning connections that have recently been active. Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/ulp/ipoib/ipoib.h')
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h215
1 files changed, 215 insertions, 0 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 07deee8f81ce..2594db2030b3 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -62,6 +62,10 @@ enum {
62 62
63 IPOIB_ENCAP_LEN = 4, 63 IPOIB_ENCAP_LEN = 4,
64 64
65 IPOIB_CM_MTU = 0x10000 - 0x10, /* padding to align header to 16 */
66 IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN,
67 IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE,
68 IPOIB_CM_RX_SG = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE,
65 IPOIB_RX_RING_SIZE = 128, 69 IPOIB_RX_RING_SIZE = 128,
66 IPOIB_TX_RING_SIZE = 64, 70 IPOIB_TX_RING_SIZE = 64,
67 IPOIB_MAX_QUEUE_SIZE = 8192, 71 IPOIB_MAX_QUEUE_SIZE = 8192,
@@ -81,6 +85,8 @@ enum {
81 IPOIB_MCAST_RUN = 6, 85 IPOIB_MCAST_RUN = 6,
82 IPOIB_STOP_REAPER = 7, 86 IPOIB_STOP_REAPER = 7,
83 IPOIB_MCAST_STARTED = 8, 87 IPOIB_MCAST_STARTED = 8,
88 IPOIB_FLAG_NETIF_STOPPED = 9,
89 IPOIB_FLAG_ADMIN_CM = 10,
84 90
85 IPOIB_MAX_BACKOFF_SECONDS = 16, 91 IPOIB_MAX_BACKOFF_SECONDS = 16,
86 92
@@ -90,6 +96,13 @@ enum {
90 IPOIB_MCAST_FLAG_ATTACHED = 3, 96 IPOIB_MCAST_FLAG_ATTACHED = 3,
91}; 97};
92 98
99#define IPOIB_OP_RECV (1ul << 31)
100#ifdef CONFIG_INFINIBAND_IPOIB_CM
101#define IPOIB_CM_OP_SRQ (1ul << 30)
102#else
103#define IPOIB_CM_OP_SRQ (0)
104#endif
105
93/* structs */ 106/* structs */
94 107
95struct ipoib_header { 108struct ipoib_header {
@@ -113,6 +126,59 @@ struct ipoib_tx_buf {
113 u64 mapping; 126 u64 mapping;
114}; 127};
115 128
129struct ib_cm_id;
130
131struct ipoib_cm_data {
132 __be32 qpn; /* High byte MUST be ignored on receive */
133 __be32 mtu;
134};
135
136struct ipoib_cm_rx {
137 struct ib_cm_id *id;
138 struct ib_qp *qp;
139 struct list_head list;
140 struct net_device *dev;
141 unsigned long jiffies;
142};
143
144struct ipoib_cm_tx {
145 struct ib_cm_id *id;
146 struct ib_cq *cq;
147 struct ib_qp *qp;
148 struct list_head list;
149 struct net_device *dev;
150 struct ipoib_neigh *neigh;
151 struct ipoib_path *path;
152 struct ipoib_tx_buf *tx_ring;
153 unsigned tx_head;
154 unsigned tx_tail;
155 unsigned long flags;
156 u32 mtu;
157 struct ib_wc ibwc[IPOIB_NUM_WC];
158};
159
160struct ipoib_cm_rx_buf {
161 struct sk_buff *skb;
162 u64 mapping[IPOIB_CM_RX_SG];
163};
164
165struct ipoib_cm_dev_priv {
166 struct ib_srq *srq;
167 struct ipoib_cm_rx_buf *srq_ring;
168 struct ib_cm_id *id;
169 struct list_head passive_ids;
170 struct work_struct start_task;
171 struct work_struct reap_task;
172 struct work_struct skb_task;
173 struct delayed_work stale_task;
174 struct sk_buff_head skb_queue;
175 struct list_head start_list;
176 struct list_head reap_list;
177 struct ib_wc ibwc[IPOIB_NUM_WC];
178 struct ib_sge rx_sge[IPOIB_CM_RX_SG];
179 struct ib_recv_wr rx_wr;
180};
181
116/* 182/*
117 * Device private locking: tx_lock protects members used in TX fast 183 * Device private locking: tx_lock protects members used in TX fast
118 * path (and we use LLTX so upper layers don't do extra locking). 184 * path (and we use LLTX so upper layers don't do extra locking).
@@ -179,6 +245,10 @@ struct ipoib_dev_priv {
179 struct list_head child_intfs; 245 struct list_head child_intfs;
180 struct list_head list; 246 struct list_head list;
181 247
248#ifdef CONFIG_INFINIBAND_IPOIB_CM
249 struct ipoib_cm_dev_priv cm;
250#endif
251
182#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 252#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
183 struct list_head fs_list; 253 struct list_head fs_list;
184 struct dentry *mcg_dentry; 254 struct dentry *mcg_dentry;
@@ -212,6 +282,9 @@ struct ipoib_path {
212 282
213struct ipoib_neigh { 283struct ipoib_neigh {
214 struct ipoib_ah *ah; 284 struct ipoib_ah *ah;
285#ifdef CONFIG_INFINIBAND_IPOIB_CM
286 struct ipoib_cm_tx *cm;
287#endif
215 union ib_gid dgid; 288 union ib_gid dgid;
216 struct sk_buff_head queue; 289 struct sk_buff_head queue;
217 290
@@ -315,6 +388,146 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey);
315void ipoib_pkey_poll(struct work_struct *work); 388void ipoib_pkey_poll(struct work_struct *work);
316int ipoib_pkey_dev_delay_open(struct net_device *dev); 389int ipoib_pkey_dev_delay_open(struct net_device *dev);
317 390
391#ifdef CONFIG_INFINIBAND_IPOIB_CM
392
393#define IPOIB_FLAGS_RC 0x80
394#define IPOIB_FLAGS_UC 0x40
395
396/* We don't support UC connections at the moment */
397#define IPOIB_CM_SUPPORTED(ha) (ha[0] & (IPOIB_FLAGS_RC))
398
399static inline int ipoib_cm_admin_enabled(struct net_device *dev)
400{
401 struct ipoib_dev_priv *priv = netdev_priv(dev);
402 return IPOIB_CM_SUPPORTED(dev->dev_addr) &&
403 test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
404}
405
406static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n)
407{
408 struct ipoib_dev_priv *priv = netdev_priv(dev);
409 return IPOIB_CM_SUPPORTED(n->ha) &&
410 test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
411}
412
413static inline int ipoib_cm_up(struct ipoib_neigh *neigh)
414
415{
416 return test_bit(IPOIB_FLAG_OPER_UP, &neigh->cm->flags);
417}
418
419static inline struct ipoib_cm_tx *ipoib_cm_get(struct ipoib_neigh *neigh)
420{
421 return neigh->cm;
422}
423
424static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *tx)
425{
426 neigh->cm = tx;
427}
428
429void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx);
430int ipoib_cm_dev_open(struct net_device *dev);
431void ipoib_cm_dev_stop(struct net_device *dev);
432int ipoib_cm_dev_init(struct net_device *dev);
433int ipoib_cm_add_mode_attr(struct net_device *dev);
434void ipoib_cm_dev_cleanup(struct net_device *dev);
435struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path,
436 struct ipoib_neigh *neigh);
437void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx);
438void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
439 unsigned int mtu);
440void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc);
441#else
442
443struct ipoib_cm_tx;
444
445static inline int ipoib_cm_admin_enabled(struct net_device *dev)
446{
447 return 0;
448}
449static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n)
450
451{
452 return 0;
453}
454
455static inline int ipoib_cm_up(struct ipoib_neigh *neigh)
456
457{
458 return 0;
459}
460
461static inline struct ipoib_cm_tx *ipoib_cm_get(struct ipoib_neigh *neigh)
462{
463 return NULL;
464}
465
466static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *tx)
467{
468}
469
470static inline
471void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
472{
473 return;
474}
475
476static inline
477int ipoib_cm_dev_open(struct net_device *dev)
478{
479 return 0;
480}
481
482static inline
483void ipoib_cm_dev_stop(struct net_device *dev)
484{
485 return;
486}
487
488static inline
489int ipoib_cm_dev_init(struct net_device *dev)
490{
491 return -ENOSYS;
492}
493
494static inline
495void ipoib_cm_dev_cleanup(struct net_device *dev)
496{
497 return;
498}
499
500static inline
501struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path,
502 struct ipoib_neigh *neigh)
503{
504 return NULL;
505}
506
507static inline
508void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
509{
510 return;
511}
512
513static inline
514int ipoib_cm_add_mode_attr(struct net_device *dev)
515{
516 return 0;
517}
518
519static inline void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
520 unsigned int mtu)
521{
522 dev_kfree_skb_any(skb);
523}
524
525static inline void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
526{
527}
528
529#endif
530
318#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 531#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
319void ipoib_create_debug_files(struct net_device *dev); 532void ipoib_create_debug_files(struct net_device *dev);
320void ipoib_delete_debug_files(struct net_device *dev); 533void ipoib_delete_debug_files(struct net_device *dev);
@@ -392,4 +605,6 @@ extern int ipoib_debug_level;
392 605
393#define IPOIB_GID_ARG(gid) IPOIB_GID_RAW_ARG((gid).raw) 606#define IPOIB_GID_ARG(gid) IPOIB_GID_RAW_ARG((gid).raw)
394 607
608#define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff)
609
395#endif /* _IPOIB_H */ 610#endif /* _IPOIB_H */