aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/ulp
diff options
context:
space:
mode:
authorJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
committerJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
commit8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch)
treea8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /drivers/infiniband/ulp
parent406089d01562f1e2bf9f089fd7637009ebaad589 (diff)
Patched in Tegra support.
Diffstat (limited to 'drivers/infiniband/ulp')
-rw-r--r--drivers/infiniband/ulp/ipoib/Makefile3
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h85
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c66
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_fs.c8
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c55
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c756
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c77
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_netlink.c172
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c124
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c115
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h22
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c57
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c8
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c178
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c499
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h11
-rw-r--r--drivers/infiniband/ulp/srpt/Kconfig12
-rw-r--r--drivers/infiniband/ulp/srpt/Makefile2
-rw-r--r--drivers/infiniband/ulp/srpt/ib_dm_mad.h139
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c4018
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h442
21 files changed, 737 insertions, 6112 deletions
diff --git a/drivers/infiniband/ulp/ipoib/Makefile b/drivers/infiniband/ulp/ipoib/Makefile
index e5430dd5076..3090100f0de 100644
--- a/drivers/infiniband/ulp/ipoib/Makefile
+++ b/drivers/infiniband/ulp/ipoib/Makefile
@@ -5,8 +5,7 @@ ib_ipoib-y := ipoib_main.o \
5 ipoib_multicast.o \ 5 ipoib_multicast.o \
6 ipoib_verbs.o \ 6 ipoib_verbs.o \
7 ipoib_vlan.o \ 7 ipoib_vlan.o \
8 ipoib_ethtool.o \ 8 ipoib_ethtool.o
9 ipoib_netlink.o
10ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_CM) += ipoib_cm.o 9ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_CM) += ipoib_cm.o
11ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_DEBUG) += ipoib_fs.o 10ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_DEBUG) += ipoib_fs.o
12 11
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 07ca6fd5546..b3cc1e062b1 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -44,7 +44,6 @@
44#include <linux/mutex.h> 44#include <linux/mutex.h>
45 45
46#include <net/neighbour.h> 46#include <net/neighbour.h>
47#include <net/sch_generic.h>
48 47
49#include <linux/atomic.h> 48#include <linux/atomic.h>
50 49
@@ -92,8 +91,6 @@ enum {
92 IPOIB_STOP_REAPER = 7, 91 IPOIB_STOP_REAPER = 7,
93 IPOIB_FLAG_ADMIN_CM = 9, 92 IPOIB_FLAG_ADMIN_CM = 9,
94 IPOIB_FLAG_UMCAST = 10, 93 IPOIB_FLAG_UMCAST = 10,
95 IPOIB_STOP_NEIGH_GC = 11,
96 IPOIB_NEIGH_TBL_FLUSH = 12,
97 94
98 IPOIB_MAX_BACKOFF_SECONDS = 16, 95 IPOIB_MAX_BACKOFF_SECONDS = 16,
99 96
@@ -104,10 +101,6 @@ enum {
104 101
105 MAX_SEND_CQE = 16, 102 MAX_SEND_CQE = 16,
106 IPOIB_CM_COPYBREAK = 256, 103 IPOIB_CM_COPYBREAK = 256,
107
108 IPOIB_NON_CHILD = 0,
109 IPOIB_LEGACY_CHILD = 1,
110 IPOIB_RTNL_CHILD = 2,
111}; 104};
112 105
113#define IPOIB_OP_RECV (1ul << 31) 106#define IPOIB_OP_RECV (1ul << 31)
@@ -124,9 +117,8 @@ struct ipoib_header {
124 u16 reserved; 117 u16 reserved;
125}; 118};
126 119
127struct ipoib_cb { 120struct ipoib_pseudoheader {
128 struct qdisc_skb_cb qdisc_cb; 121 u8 hwaddr[INFINIBAND_ALEN];
129 u8 hwaddr[INFINIBAND_ALEN];
130}; 122};
131 123
132/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */ 124/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
@@ -266,23 +258,6 @@ struct ipoib_ethtool_st {
266 u16 max_coalesced_frames; 258 u16 max_coalesced_frames;
267}; 259};
268 260
269struct ipoib_neigh_table;
270
271struct ipoib_neigh_hash {
272 struct ipoib_neigh_table *ntbl;
273 struct ipoib_neigh __rcu **buckets;
274 struct rcu_head rcu;
275 u32 mask;
276 u32 size;
277};
278
279struct ipoib_neigh_table {
280 struct ipoib_neigh_hash __rcu *htbl;
281 atomic_t entries;
282 struct completion flushed;
283 struct completion deleted;
284};
285
286/* 261/*
287 * Device private locking: network stack tx_lock protects members used 262 * Device private locking: network stack tx_lock protects members used
288 * in TX fast path, lock protects everything else. lock nests inside 263 * in TX fast path, lock protects everything else. lock nests inside
@@ -302,8 +277,6 @@ struct ipoib_dev_priv {
302 struct rb_root path_tree; 277 struct rb_root path_tree;
303 struct list_head path_list; 278 struct list_head path_list;
304 279
305 struct ipoib_neigh_table ntbl;
306
307 struct ipoib_mcast *broadcast; 280 struct ipoib_mcast *broadcast;
308 struct list_head multicast_list; 281 struct list_head multicast_list;
309 struct rb_root multicast_tree; 282 struct rb_root multicast_tree;
@@ -316,7 +289,7 @@ struct ipoib_dev_priv {
316 struct work_struct flush_heavy; 289 struct work_struct flush_heavy;
317 struct work_struct restart_task; 290 struct work_struct restart_task;
318 struct delayed_work ah_reap_task; 291 struct delayed_work ah_reap_task;
319 struct delayed_work neigh_reap_task; 292
320 struct ib_device *ca; 293 struct ib_device *ca;
321 u8 port; 294 u8 port;
322 u16 pkey; 295 u16 pkey;
@@ -357,7 +330,6 @@ struct ipoib_dev_priv {
357 struct net_device *parent; 330 struct net_device *parent;
358 struct list_head child_intfs; 331 struct list_head child_intfs;
359 struct list_head list; 332 struct list_head list;
360 int child_type;
361 333
362#ifdef CONFIG_INFINIBAND_IPOIB_CM 334#ifdef CONFIG_INFINIBAND_IPOIB_CM
363 struct ipoib_cm_dev_priv cm; 335 struct ipoib_cm_dev_priv cm;
@@ -403,16 +375,13 @@ struct ipoib_neigh {
403#ifdef CONFIG_INFINIBAND_IPOIB_CM 375#ifdef CONFIG_INFINIBAND_IPOIB_CM
404 struct ipoib_cm_tx *cm; 376 struct ipoib_cm_tx *cm;
405#endif 377#endif
406 u8 daddr[INFINIBAND_ALEN]; 378 union ib_gid dgid;
407 struct sk_buff_head queue; 379 struct sk_buff_head queue;
408 380
381 struct neighbour *neighbour;
409 struct net_device *dev; 382 struct net_device *dev;
410 383
411 struct list_head list; 384 struct list_head list;
412 struct ipoib_neigh __rcu *hnext;
413 struct rcu_head rcu;
414 atomic_t refcnt;
415 unsigned long alive;
416}; 385};
417 386
418#define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN) 387#define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN)
@@ -423,17 +392,21 @@ static inline int ipoib_ud_need_sg(unsigned int ib_mtu)
423 return IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE; 392 return IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE;
424} 393}
425 394
426void ipoib_neigh_dtor(struct ipoib_neigh *neigh); 395/*
427static inline void ipoib_neigh_put(struct ipoib_neigh *neigh) 396 * We stash a pointer to our private neighbour information after our
397 * hardware address in neigh->ha. The ALIGN() expression here makes
398 * sure that this pointer is stored aligned so that an unaligned
399 * load is not needed to dereference it.
400 */
401static inline struct ipoib_neigh **to_ipoib_neigh(struct neighbour *neigh)
428{ 402{
429 if (atomic_dec_and_test(&neigh->refcnt)) 403 return (void*) neigh + ALIGN(offsetof(struct neighbour, ha) +
430 ipoib_neigh_dtor(neigh); 404 INFINIBAND_ALEN, sizeof(void *));
431} 405}
432struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr); 406
433struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr, 407struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh,
434 struct net_device *dev); 408 struct net_device *dev);
435void ipoib_neigh_free(struct ipoib_neigh *neigh); 409void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh);
436void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid);
437 410
438extern struct workqueue_struct *ipoib_workqueue; 411extern struct workqueue_struct *ipoib_workqueue;
439 412
@@ -450,6 +423,7 @@ static inline void ipoib_put_ah(struct ipoib_ah *ah)
450{ 423{
451 kref_put(&ah->ref, ipoib_free_ah); 424 kref_put(&ah->ref, ipoib_free_ah);
452} 425}
426
453int ipoib_open(struct net_device *dev); 427int ipoib_open(struct net_device *dev);
454int ipoib_add_pkey_attr(struct net_device *dev); 428int ipoib_add_pkey_attr(struct net_device *dev);
455int ipoib_add_umcast_attr(struct net_device *dev); 429int ipoib_add_umcast_attr(struct net_device *dev);
@@ -479,7 +453,7 @@ void ipoib_dev_cleanup(struct net_device *dev);
479 453
480void ipoib_mcast_join_task(struct work_struct *work); 454void ipoib_mcast_join_task(struct work_struct *work);
481void ipoib_mcast_carrier_on_task(struct work_struct *work); 455void ipoib_mcast_carrier_on_task(struct work_struct *work);
482void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb); 456void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb);
483 457
484void ipoib_mcast_restart_task(struct work_struct *work); 458void ipoib_mcast_restart_task(struct work_struct *work);
485int ipoib_mcast_start_thread(struct net_device *dev); 459int ipoib_mcast_start_thread(struct net_device *dev);
@@ -517,17 +491,6 @@ void ipoib_event(struct ib_event_handler *handler,
517int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey); 491int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey);
518int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey); 492int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey);
519 493
520int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
521 u16 pkey, int child_type);
522
523int __init ipoib_netlink_init(void);
524void __exit ipoib_netlink_fini(void);
525
526void ipoib_set_umcast(struct net_device *ndev, int umcast_val);
527int ipoib_set_mode(struct net_device *dev, const char *buf);
528
529void ipoib_setup(struct net_device *dev);
530
531void ipoib_pkey_poll(struct work_struct *work); 494void ipoib_pkey_poll(struct work_struct *work);
532int ipoib_pkey_dev_delay_open(struct net_device *dev); 495int ipoib_pkey_dev_delay_open(struct net_device *dev);
533void ipoib_drain_cq(struct net_device *dev); 496void ipoib_drain_cq(struct net_device *dev);
@@ -535,14 +498,14 @@ void ipoib_drain_cq(struct net_device *dev);
535void ipoib_set_ethtool_ops(struct net_device *dev); 498void ipoib_set_ethtool_ops(struct net_device *dev);
536int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca); 499int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca);
537 500
501#ifdef CONFIG_INFINIBAND_IPOIB_CM
502
538#define IPOIB_FLAGS_RC 0x80 503#define IPOIB_FLAGS_RC 0x80
539#define IPOIB_FLAGS_UC 0x40 504#define IPOIB_FLAGS_UC 0x40
540 505
541/* We don't support UC connections at the moment */ 506/* We don't support UC connections at the moment */
542#define IPOIB_CM_SUPPORTED(ha) (ha[0] & (IPOIB_FLAGS_RC)) 507#define IPOIB_CM_SUPPORTED(ha) (ha[0] & (IPOIB_FLAGS_RC))
543 508
544#ifdef CONFIG_INFINIBAND_IPOIB_CM
545
546extern int ipoib_max_conn_qp; 509extern int ipoib_max_conn_qp;
547 510
548static inline int ipoib_cm_admin_enabled(struct net_device *dev) 511static inline int ipoib_cm_admin_enabled(struct net_device *dev)
@@ -552,10 +515,10 @@ static inline int ipoib_cm_admin_enabled(struct net_device *dev)
552 test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); 515 test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
553} 516}
554 517
555static inline int ipoib_cm_enabled(struct net_device *dev, u8 *hwaddr) 518static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n)
556{ 519{
557 struct ipoib_dev_priv *priv = netdev_priv(dev); 520 struct ipoib_dev_priv *priv = netdev_priv(dev);
558 return IPOIB_CM_SUPPORTED(hwaddr) && 521 return IPOIB_CM_SUPPORTED(n->ha) &&
559 test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); 522 test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
560} 523}
561 524
@@ -610,7 +573,7 @@ static inline int ipoib_cm_admin_enabled(struct net_device *dev)
610{ 573{
611 return 0; 574 return 0;
612} 575}
613static inline int ipoib_cm_enabled(struct net_device *dev, u8 *hwaddr) 576static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n)
614 577
615{ 578{
616 return 0; 579 return 0;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 03103d2bd64..39913a065f9 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -37,7 +37,6 @@
37#include <linux/delay.h> 37#include <linux/delay.h>
38#include <linux/slab.h> 38#include <linux/slab.h>
39#include <linux/vmalloc.h> 39#include <linux/vmalloc.h>
40#include <linux/moduleparam.h>
41 40
42#include "ipoib.h" 41#include "ipoib.h"
43 42
@@ -85,7 +84,7 @@ static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags,
85 ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); 84 ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);
86 85
87 for (i = 0; i < frags; ++i) 86 for (i = 0; i < frags; ++i)
88 ib_dma_unmap_page(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE); 87 ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
89} 88}
90 89
91static int ipoib_cm_post_receive_srq(struct net_device *dev, int id) 90static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
@@ -170,7 +169,7 @@ static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
170 goto partial_error; 169 goto partial_error;
171 skb_fill_page_desc(skb, i, page, 0, PAGE_SIZE); 170 skb_fill_page_desc(skb, i, page, 0, PAGE_SIZE);
172 171
173 mapping[i + 1] = ib_dma_map_page(priv->ca, page, 172 mapping[i + 1] = ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[i].page,
174 0, PAGE_SIZE, DMA_FROM_DEVICE); 173 0, PAGE_SIZE, DMA_FROM_DEVICE);
175 if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1]))) 174 if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1])))
176 goto partial_error; 175 goto partial_error;
@@ -184,7 +183,7 @@ partial_error:
184 ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); 183 ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);
185 184
186 for (; i > 0; --i) 185 for (; i > 0; --i)
187 ib_dma_unmap_page(priv->ca, mapping[i], PAGE_SIZE, DMA_FROM_DEVICE); 186 ib_dma_unmap_single(priv->ca, mapping[i], PAGE_SIZE, DMA_FROM_DEVICE);
188 187
189 dev_kfree_skb_any(skb); 188 dev_kfree_skb_any(skb);
190 return NULL; 189 return NULL;
@@ -538,13 +537,12 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
538 537
539 if (length == 0) { 538 if (length == 0) {
540 /* don't need this page */ 539 /* don't need this page */
541 skb_fill_page_desc(toskb, i, skb_frag_page(frag), 540 skb_fill_page_desc(toskb, i, frag->page, 0, PAGE_SIZE);
542 0, PAGE_SIZE);
543 --skb_shinfo(skb)->nr_frags; 541 --skb_shinfo(skb)->nr_frags;
544 } else { 542 } else {
545 size = min(length, (unsigned) PAGE_SIZE); 543 size = min(length, (unsigned) PAGE_SIZE);
546 544
547 skb_frag_size_set(frag, size); 545 frag->size = size;
548 skb->data_len += size; 546 skb->data_len += size;
549 skb->truesize += size; 547 skb->truesize += size;
550 skb->len += size; 548 skb->len += size;
@@ -752,9 +750,6 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
752 dev->trans_start = jiffies; 750 dev->trans_start = jiffies;
753 ++tx->tx_head; 751 ++tx->tx_head;
754 752
755 skb_orphan(skb);
756 skb_dst_drop(skb);
757
758 if (++priv->tx_outstanding == ipoib_sendq_size) { 753 if (++priv->tx_outstanding == ipoib_sendq_size) {
759 ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n", 754 ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n",
760 tx->qp->qp_num); 755 tx->qp->qp_num);
@@ -814,7 +809,9 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
814 if (neigh) { 809 if (neigh) {
815 neigh->cm = NULL; 810 neigh->cm = NULL;
816 list_del(&neigh->list); 811 list_del(&neigh->list);
817 ipoib_neigh_free(neigh); 812 if (neigh->ah)
813 ipoib_put_ah(neigh->ah);
814 ipoib_neigh_free(dev, neigh);
818 815
819 tx->neigh = NULL; 816 tx->neigh = NULL;
820 } 817 }
@@ -1231,7 +1228,9 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
1231 if (neigh) { 1228 if (neigh) {
1232 neigh->cm = NULL; 1229 neigh->cm = NULL;
1233 list_del(&neigh->list); 1230 list_del(&neigh->list);
1234 ipoib_neigh_free(neigh); 1231 if (neigh->ah)
1232 ipoib_put_ah(neigh->ah);
1233 ipoib_neigh_free(dev, neigh);
1235 1234
1236 tx->neigh = NULL; 1235 tx->neigh = NULL;
1237 } 1236 }
@@ -1274,15 +1273,12 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
1274void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) 1273void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
1275{ 1274{
1276 struct ipoib_dev_priv *priv = netdev_priv(tx->dev); 1275 struct ipoib_dev_priv *priv = netdev_priv(tx->dev);
1277 unsigned long flags;
1278 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { 1276 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
1279 spin_lock_irqsave(&priv->lock, flags);
1280 list_move(&tx->list, &priv->cm.reap_list); 1277 list_move(&tx->list, &priv->cm.reap_list);
1281 queue_work(ipoib_workqueue, &priv->cm.reap_task); 1278 queue_work(ipoib_workqueue, &priv->cm.reap_task);
1282 ipoib_dbg(priv, "Reap connection for gid %pI6\n", 1279 ipoib_dbg(priv, "Reap connection for gid %pI6\n",
1283 tx->neigh->daddr + 4); 1280 tx->neigh->dgid.raw);
1284 tx->neigh = NULL; 1281 tx->neigh = NULL;
1285 spin_unlock_irqrestore(&priv->lock, flags);
1286 } 1282 }
1287} 1283}
1288 1284
@@ -1306,7 +1302,7 @@ static void ipoib_cm_tx_start(struct work_struct *work)
1306 p = list_entry(priv->cm.start_list.next, typeof(*p), list); 1302 p = list_entry(priv->cm.start_list.next, typeof(*p), list);
1307 list_del_init(&p->list); 1303 list_del_init(&p->list);
1308 neigh = p->neigh; 1304 neigh = p->neigh;
1309 qpn = IPOIB_QPN(neigh->daddr); 1305 qpn = IPOIB_QPN(neigh->neighbour->ha);
1310 memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); 1306 memcpy(&pathrec, &p->path->pathrec, sizeof pathrec);
1311 1307
1312 spin_unlock_irqrestore(&priv->lock, flags); 1308 spin_unlock_irqrestore(&priv->lock, flags);
@@ -1322,7 +1318,9 @@ static void ipoib_cm_tx_start(struct work_struct *work)
1322 if (neigh) { 1318 if (neigh) {
1323 neigh->cm = NULL; 1319 neigh->cm = NULL;
1324 list_del(&neigh->list); 1320 list_del(&neigh->list);
1325 ipoib_neigh_free(neigh); 1321 if (neigh->ah)
1322 ipoib_put_ah(neigh->ah);
1323 ipoib_neigh_free(dev, neigh);
1326 } 1324 }
1327 list_del(&p->list); 1325 list_del(&p->list);
1328 kfree(p); 1326 kfree(p);
@@ -1376,7 +1374,7 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
1376 1374
1377 if (skb->protocol == htons(ETH_P_IP)) 1375 if (skb->protocol == htons(ETH_P_IP))
1378 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 1376 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
1379#if IS_ENABLED(CONFIG_IPV6) 1377#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1380 else if (skb->protocol == htons(ETH_P_IPV6)) 1378 else if (skb->protocol == htons(ETH_P_IPV6))
1381 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1379 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1382#endif 1380#endif
@@ -1397,7 +1395,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
1397 int e = skb_queue_empty(&priv->cm.skb_queue); 1395 int e = skb_queue_empty(&priv->cm.skb_queue);
1398 1396
1399 if (skb_dst(skb)) 1397 if (skb_dst(skb))
1400 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 1398 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
1401 1399
1402 skb_queue_tail(&priv->cm.skb_queue, skb); 1400 skb_queue_tail(&priv->cm.skb_queue, skb);
1403 if (e) 1401 if (e)
@@ -1455,19 +1453,36 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
1455 const char *buf, size_t count) 1453 const char *buf, size_t count)
1456{ 1454{
1457 struct net_device *dev = to_net_dev(d); 1455 struct net_device *dev = to_net_dev(d);
1458 int ret; 1456 struct ipoib_dev_priv *priv = netdev_priv(dev);
1459 1457
1460 if (!rtnl_trylock()) 1458 if (!rtnl_trylock())
1461 return restart_syscall(); 1459 return restart_syscall();
1462 1460
1463 ret = ipoib_set_mode(dev, buf); 1461 /* flush paths if we switch modes so that connections are restarted */
1462 if (IPOIB_CM_SUPPORTED(dev->dev_addr) && !strcmp(buf, "connected\n")) {
1463 set_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
1464 ipoib_warn(priv, "enabling connected mode "
1465 "will cause multicast packet drops\n");
1466 netdev_update_features(dev);
1467 rtnl_unlock();
1468 priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
1464 1469
1465 rtnl_unlock(); 1470 ipoib_flush_paths(dev);
1471 return count;
1472 }
1473
1474 if (!strcmp(buf, "datagram\n")) {
1475 clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
1476 netdev_update_features(dev);
1477 dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu));
1478 rtnl_unlock();
1479 ipoib_flush_paths(dev);
1466 1480
1467 if (!ret)
1468 return count; 1481 return count;
1482 }
1483 rtnl_unlock();
1469 1484
1470 return ret; 1485 return -EINVAL;
1471} 1486}
1472 1487
1473static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, show_mode, set_mode); 1488static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, show_mode, set_mode);
@@ -1481,7 +1496,6 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
1481{ 1496{
1482 struct ipoib_dev_priv *priv = netdev_priv(dev); 1497 struct ipoib_dev_priv *priv = netdev_priv(dev);
1483 struct ib_srq_init_attr srq_init_attr = { 1498 struct ib_srq_init_attr srq_init_attr = {
1484 .srq_type = IB_SRQT_BASIC,
1485 .attr = { 1499 .attr = {
1486 .max_wr = ipoib_recvq_size, 1500 .max_wr = ipoib_recvq_size,
1487 .max_sge = max_sge 1501 .max_sge = max_sge
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index 50061854616..86eae229dc4 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -37,7 +37,6 @@
37struct file_operations; 37struct file_operations;
38 38
39#include <linux/debugfs.h> 39#include <linux/debugfs.h>
40#include <linux/export.h>
41 40
42#include "ipoib.h" 41#include "ipoib.h"
43 42
@@ -213,15 +212,16 @@ static int ipoib_path_seq_show(struct seq_file *file, void *iter_ptr)
213 gid_buf, path.pathrec.dlid ? "yes" : "no"); 212 gid_buf, path.pathrec.dlid ? "yes" : "no");
214 213
215 if (path.pathrec.dlid) { 214 if (path.pathrec.dlid) {
216 rate = ib_rate_to_mbps(path.pathrec.rate); 215 rate = ib_rate_to_mult(path.pathrec.rate) * 25;
217 216
218 seq_printf(file, 217 seq_printf(file,
219 " DLID: 0x%04x\n" 218 " DLID: 0x%04x\n"
220 " SL: %12d\n" 219 " SL: %12d\n"
221 " rate: %8d.%d Gb/sec\n", 220 " rate: %*d%s Gb/sec\n",
222 be16_to_cpu(path.pathrec.dlid), 221 be16_to_cpu(path.pathrec.dlid),
223 path.pathrec.sl, 222 path.pathrec.sl,
224 rate / 1000, rate % 1000); 223 10 - ((rate % 10) ? 2 : 0),
224 rate / 10, rate % 10 ? ".5" : "");
225 } 225 }
226 226
227 seq_putc(file, '\n'); 227 seq_putc(file, '\n');
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index a1bca70e20a..81ae61d68a2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -34,7 +34,6 @@
34 */ 34 */
35 35
36#include <linux/delay.h> 36#include <linux/delay.h>
37#include <linux/moduleparam.h>
38#include <linux/dma-mapping.h> 37#include <linux/dma-mapping.h>
39#include <linux/slab.h> 38#include <linux/slab.h>
40 39
@@ -57,24 +56,21 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
57 struct ib_pd *pd, struct ib_ah_attr *attr) 56 struct ib_pd *pd, struct ib_ah_attr *attr)
58{ 57{
59 struct ipoib_ah *ah; 58 struct ipoib_ah *ah;
60 struct ib_ah *vah;
61 59
62 ah = kmalloc(sizeof *ah, GFP_KERNEL); 60 ah = kmalloc(sizeof *ah, GFP_KERNEL);
63 if (!ah) 61 if (!ah)
64 return ERR_PTR(-ENOMEM); 62 return NULL;
65 63
66 ah->dev = dev; 64 ah->dev = dev;
67 ah->last_send = 0; 65 ah->last_send = 0;
68 kref_init(&ah->ref); 66 kref_init(&ah->ref);
69 67
70 vah = ib_create_ah(pd, attr); 68 ah->ah = ib_create_ah(pd, attr);
71 if (IS_ERR(vah)) { 69 if (IS_ERR(ah->ah)) {
72 kfree(ah); 70 kfree(ah);
73 ah = (struct ipoib_ah *)vah; 71 ah = NULL;
74 } else { 72 } else
75 ah->ah = vah;
76 ipoib_dbg(netdev_priv(dev), "Created ah %p\n", ah->ah); 73 ipoib_dbg(netdev_priv(dev), "Created ah %p\n", ah->ah);
77 }
78 74
79 return ah; 75 return ah;
80} 76}
@@ -121,9 +117,9 @@ static void ipoib_ud_skb_put_frags(struct ipoib_dev_priv *priv,
121 117
122 size = length - IPOIB_UD_HEAD_SIZE; 118 size = length - IPOIB_UD_HEAD_SIZE;
123 119
124 skb_frag_size_set(frag, size); 120 frag->size = size;
125 skb->data_len += size; 121 skb->data_len += size;
126 skb->truesize += PAGE_SIZE; 122 skb->truesize += size;
127 } else 123 } else
128 skb_put(skb, length); 124 skb_put(skb, length);
129 125
@@ -156,18 +152,14 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
156 struct ipoib_dev_priv *priv = netdev_priv(dev); 152 struct ipoib_dev_priv *priv = netdev_priv(dev);
157 struct sk_buff *skb; 153 struct sk_buff *skb;
158 int buf_size; 154 int buf_size;
159 int tailroom;
160 u64 *mapping; 155 u64 *mapping;
161 156
162 if (ipoib_ud_need_sg(priv->max_ib_mtu)) { 157 if (ipoib_ud_need_sg(priv->max_ib_mtu))
163 buf_size = IPOIB_UD_HEAD_SIZE; 158 buf_size = IPOIB_UD_HEAD_SIZE;
164 tailroom = 128; /* reserve some tailroom for IP/TCP headers */ 159 else
165 } else {
166 buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu); 160 buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
167 tailroom = 0;
168 }
169 161
170 skb = dev_alloc_skb(buf_size + tailroom + 4); 162 skb = dev_alloc_skb(buf_size + 4);
171 if (unlikely(!skb)) 163 if (unlikely(!skb))
172 return NULL; 164 return NULL;
173 165
@@ -190,7 +182,7 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
190 goto partial_error; 182 goto partial_error;
191 skb_fill_page_desc(skb, 0, page, 0, PAGE_SIZE); 183 skb_fill_page_desc(skb, 0, page, 0, PAGE_SIZE);
192 mapping[1] = 184 mapping[1] =
193 ib_dma_map_page(priv->ca, page, 185 ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[0].page,
194 0, PAGE_SIZE, DMA_FROM_DEVICE); 186 0, PAGE_SIZE, DMA_FROM_DEVICE);
195 if (unlikely(ib_dma_mapping_error(priv->ca, mapping[1]))) 187 if (unlikely(ib_dma_mapping_error(priv->ca, mapping[1])))
196 goto partial_error; 188 goto partial_error;
@@ -300,8 +292,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
300 dev->stats.rx_bytes += skb->len; 292 dev->stats.rx_bytes += skb->len;
301 293
302 skb->dev = dev; 294 skb->dev = dev;
303 if ((dev->features & NETIF_F_RXCSUM) && 295 if ((dev->features & NETIF_F_RXCSUM) && likely(wc->csum_ok))
304 likely(wc->wc_flags & IB_WC_IP_CSUM_OK))
305 skb->ip_summed = CHECKSUM_UNNECESSARY; 296 skb->ip_summed = CHECKSUM_UNNECESSARY;
306 297
307 napi_gro_receive(&priv->napi, skb); 298 napi_gro_receive(&priv->napi, skb);
@@ -331,10 +322,9 @@ static int ipoib_dma_map_tx(struct ib_device *ca,
331 off = 0; 322 off = 0;
332 323
333 for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) { 324 for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
334 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 325 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
335 mapping[i + off] = ib_dma_map_page(ca, 326 mapping[i + off] = ib_dma_map_page(ca, frag->page,
336 skb_frag_page(frag), 327 frag->page_offset, frag->size,
337 frag->page_offset, skb_frag_size(frag),
338 DMA_TO_DEVICE); 328 DMA_TO_DEVICE);
339 if (unlikely(ib_dma_mapping_error(ca, mapping[i + off]))) 329 if (unlikely(ib_dma_mapping_error(ca, mapping[i + off])))
340 goto partial_error; 330 goto partial_error;
@@ -343,9 +333,8 @@ static int ipoib_dma_map_tx(struct ib_device *ca,
343 333
344partial_error: 334partial_error:
345 for (; i > 0; --i) { 335 for (; i > 0; --i) {
346 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; 336 skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
347 337 ib_dma_unmap_page(ca, mapping[i - !off], frag->size, DMA_TO_DEVICE);
348 ib_dma_unmap_page(ca, mapping[i - !off], skb_frag_size(frag), DMA_TO_DEVICE);
349 } 338 }
350 339
351 if (off) 340 if (off)
@@ -369,9 +358,8 @@ static void ipoib_dma_unmap_tx(struct ib_device *ca,
369 off = 0; 358 off = 0;
370 359
371 for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) { 360 for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
372 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 361 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
373 362 ib_dma_unmap_page(ca, mapping[i + off], frag->size,
374 ib_dma_unmap_page(ca, mapping[i + off], skb_frag_size(frag),
375 DMA_TO_DEVICE); 363 DMA_TO_DEVICE);
376 } 364 }
377} 365}
@@ -521,7 +509,7 @@ static inline int post_send(struct ipoib_dev_priv *priv,
521 509
522 for (i = 0; i < nr_frags; ++i) { 510 for (i = 0; i < nr_frags; ++i) {
523 priv->tx_sge[i + off].addr = mapping[i + off]; 511 priv->tx_sge[i + off].addr = mapping[i + off];
524 priv->tx_sge[i + off].length = skb_frag_size(&frags[i]); 512 priv->tx_sge[i + off].length = frags[i].size;
525 } 513 }
526 priv->tx_wr.num_sge = nr_frags + off; 514 priv->tx_wr.num_sge = nr_frags + off;
527 priv->tx_wr.wr_id = wr_id; 515 priv->tx_wr.wr_id = wr_id;
@@ -615,9 +603,8 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
615 603
616 address->last_send = priv->tx_head; 604 address->last_send = priv->tx_head;
617 ++priv->tx_head; 605 ++priv->tx_head;
618
619 skb_orphan(skb); 606 skb_orphan(skb);
620 skb_dst_drop(skb); 607
621 } 608 }
622 609
623 if (unlikely(priv->tx_outstanding > MAX_SEND_CQE)) 610 if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 6fdc9e78da0..a98c414978e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -46,8 +46,7 @@
46#include <linux/ip.h> 46#include <linux/ip.h>
47#include <linux/in.h> 47#include <linux/in.h>
48 48
49#include <linux/jhash.h> 49#include <net/dst.h>
50#include <net/arp.h>
51 50
52MODULE_AUTHOR("Roland Dreier"); 51MODULE_AUTHOR("Roland Dreier");
53MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); 52MODULE_DESCRIPTION("IP-over-InfiniBand net driver");
@@ -85,7 +84,6 @@ struct ib_sa_client ipoib_sa_client;
85 84
86static void ipoib_add_one(struct ib_device *device); 85static void ipoib_add_one(struct ib_device *device);
87static void ipoib_remove_one(struct ib_device *device); 86static void ipoib_remove_one(struct ib_device *device);
88static void ipoib_neigh_reclaim(struct rcu_head *rp);
89 87
90static struct ib_client ipoib_client = { 88static struct ib_client ipoib_client = {
91 .name = "ipoib", 89 .name = "ipoib",
@@ -150,7 +148,7 @@ static int ipoib_stop(struct net_device *dev)
150 148
151 netif_stop_queue(dev); 149 netif_stop_queue(dev);
152 150
153 ipoib_ib_dev_down(dev, 1); 151 ipoib_ib_dev_down(dev, 0);
154 ipoib_ib_dev_stop(dev, 0); 152 ipoib_ib_dev_stop(dev, 0);
155 153
156 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { 154 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
@@ -173,12 +171,7 @@ static int ipoib_stop(struct net_device *dev)
173 return 0; 171 return 0;
174} 172}
175 173
176static void ipoib_uninit(struct net_device *dev) 174static u32 ipoib_fix_features(struct net_device *dev, u32 features)
177{
178 ipoib_dev_cleanup(dev);
179}
180
181static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_features_t features)
182{ 175{
183 struct ipoib_dev_priv *priv = netdev_priv(dev); 176 struct ipoib_dev_priv *priv = netdev_priv(dev);
184 177
@@ -215,37 +208,6 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
215 return 0; 208 return 0;
216} 209}
217 210
218int ipoib_set_mode(struct net_device *dev, const char *buf)
219{
220 struct ipoib_dev_priv *priv = netdev_priv(dev);
221
222 /* flush paths if we switch modes so that connections are restarted */
223 if (IPOIB_CM_SUPPORTED(dev->dev_addr) && !strcmp(buf, "connected\n")) {
224 set_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
225 ipoib_warn(priv, "enabling connected mode "
226 "will cause multicast packet drops\n");
227 netdev_update_features(dev);
228 rtnl_unlock();
229 priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
230
231 ipoib_flush_paths(dev);
232 rtnl_lock();
233 return 0;
234 }
235
236 if (!strcmp(buf, "datagram\n")) {
237 clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
238 netdev_update_features(dev);
239 dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu));
240 rtnl_unlock();
241 ipoib_flush_paths(dev);
242 rtnl_lock();
243 return 0;
244 }
245
246 return -EINVAL;
247}
248
249static struct ipoib_path *__path_find(struct net_device *dev, void *gid) 211static struct ipoib_path *__path_find(struct net_device *dev, void *gid)
250{ 212{
251 struct ipoib_dev_priv *priv = netdev_priv(dev); 213 struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -302,15 +264,30 @@ static int __path_add(struct net_device *dev, struct ipoib_path *path)
302 264
303static void path_free(struct net_device *dev, struct ipoib_path *path) 265static void path_free(struct net_device *dev, struct ipoib_path *path)
304{ 266{
267 struct ipoib_dev_priv *priv = netdev_priv(dev);
268 struct ipoib_neigh *neigh, *tn;
305 struct sk_buff *skb; 269 struct sk_buff *skb;
270 unsigned long flags;
306 271
307 while ((skb = __skb_dequeue(&path->queue))) 272 while ((skb = __skb_dequeue(&path->queue)))
308 dev_kfree_skb_irq(skb); 273 dev_kfree_skb_irq(skb);
309 274
310 ipoib_dbg(netdev_priv(dev), "path_free\n"); 275 spin_lock_irqsave(&priv->lock, flags);
276
277 list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) {
278 /*
279 * It's safe to call ipoib_put_ah() inside priv->lock
280 * here, because we know that path->ah will always
281 * hold one more reference, so ipoib_put_ah() will
282 * never do more than decrement the ref count.
283 */
284 if (neigh->ah)
285 ipoib_put_ah(neigh->ah);
286
287 ipoib_neigh_free(dev, neigh);
288 }
311 289
312 /* remove all neigh connected to this path */ 290 spin_unlock_irqrestore(&priv->lock, flags);
313 ipoib_del_neighs_by_gid(dev, path->pathrec.dgid.raw);
314 291
315 if (path->ah) 292 if (path->ah)
316 ipoib_put_ah(path->ah); 293 ipoib_put_ah(path->ah);
@@ -455,7 +432,7 @@ static void path_rec_completion(int status,
455 432
456 spin_lock_irqsave(&priv->lock, flags); 433 spin_lock_irqsave(&priv->lock, flags);
457 434
458 if (!IS_ERR_OR_NULL(ah)) { 435 if (ah) {
459 path->pathrec = *pathrec; 436 path->pathrec = *pathrec;
460 437
461 old_ah = path->ah; 438 old_ah = path->ah;
@@ -481,15 +458,19 @@ static void path_rec_completion(int status,
481 } 458 }
482 kref_get(&path->ah->ref); 459 kref_get(&path->ah->ref);
483 neigh->ah = path->ah; 460 neigh->ah = path->ah;
461 memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw,
462 sizeof(union ib_gid));
484 463
485 if (ipoib_cm_enabled(dev, neigh->daddr)) { 464 if (ipoib_cm_enabled(dev, neigh->neighbour)) {
486 if (!ipoib_cm_get(neigh)) 465 if (!ipoib_cm_get(neigh))
487 ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, 466 ipoib_cm_set(neigh, ipoib_cm_create_tx(dev,
488 path, 467 path,
489 neigh)); 468 neigh));
490 if (!ipoib_cm_get(neigh)) { 469 if (!ipoib_cm_get(neigh)) {
491 list_del(&neigh->list); 470 list_del(&neigh->list);
492 ipoib_neigh_free(neigh); 471 if (neigh->ah)
472 ipoib_put_ah(neigh->ah);
473 ipoib_neigh_free(dev, neigh);
493 continue; 474 continue;
494 } 475 }
495 } 476 }
@@ -574,26 +555,28 @@ static int path_rec_start(struct net_device *dev,
574 return 0; 555 return 0;
575} 556}
576 557
577static void neigh_add_path(struct sk_buff *skb, u8 *daddr, 558/* called with rcu_read_lock */
578 struct net_device *dev) 559static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
579{ 560{
580 struct ipoib_dev_priv *priv = netdev_priv(dev); 561 struct ipoib_dev_priv *priv = netdev_priv(dev);
581 struct ipoib_path *path; 562 struct ipoib_path *path;
582 struct ipoib_neigh *neigh; 563 struct ipoib_neigh *neigh;
564 struct neighbour *n;
583 unsigned long flags; 565 unsigned long flags;
584 566
585 spin_lock_irqsave(&priv->lock, flags); 567 n = dst_get_neighbour(skb_dst(skb));
586 neigh = ipoib_neigh_alloc(daddr, dev); 568 neigh = ipoib_neigh_alloc(n, skb->dev);
587 if (!neigh) { 569 if (!neigh) {
588 spin_unlock_irqrestore(&priv->lock, flags);
589 ++dev->stats.tx_dropped; 570 ++dev->stats.tx_dropped;
590 dev_kfree_skb_any(skb); 571 dev_kfree_skb_any(skb);
591 return; 572 return;
592 } 573 }
593 574
594 path = __path_find(dev, daddr + 4); 575 spin_lock_irqsave(&priv->lock, flags);
576
577 path = __path_find(dev, n->ha + 4);
595 if (!path) { 578 if (!path) {
596 path = path_rec_create(dev, daddr + 4); 579 path = path_rec_create(dev, n->ha + 4);
597 if (!path) 580 if (!path)
598 goto err_path; 581 goto err_path;
599 582
@@ -605,13 +588,17 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
605 if (path->ah) { 588 if (path->ah) {
606 kref_get(&path->ah->ref); 589 kref_get(&path->ah->ref);
607 neigh->ah = path->ah; 590 neigh->ah = path->ah;
591 memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw,
592 sizeof(union ib_gid));
608 593
609 if (ipoib_cm_enabled(dev, neigh->daddr)) { 594 if (ipoib_cm_enabled(dev, neigh->neighbour)) {
610 if (!ipoib_cm_get(neigh)) 595 if (!ipoib_cm_get(neigh))
611 ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh)); 596 ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh));
612 if (!ipoib_cm_get(neigh)) { 597 if (!ipoib_cm_get(neigh)) {
613 list_del(&neigh->list); 598 list_del(&neigh->list);
614 ipoib_neigh_free(neigh); 599 if (neigh->ah)
600 ipoib_put_ah(neigh->ah);
601 ipoib_neigh_free(dev, neigh);
615 goto err_drop; 602 goto err_drop;
616 } 603 }
617 if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) 604 if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)
@@ -623,8 +610,7 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
623 } 610 }
624 } else { 611 } else {
625 spin_unlock_irqrestore(&priv->lock, flags); 612 spin_unlock_irqrestore(&priv->lock, flags);
626 ipoib_send(dev, skb, path->ah, IPOIB_QPN(daddr)); 613 ipoib_send(dev, skb, path->ah, IPOIB_QPN(n->ha));
627 ipoib_neigh_put(neigh);
628 return; 614 return;
629 } 615 }
630 } else { 616 } else {
@@ -637,24 +623,42 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
637 } 623 }
638 624
639 spin_unlock_irqrestore(&priv->lock, flags); 625 spin_unlock_irqrestore(&priv->lock, flags);
640 ipoib_neigh_put(neigh);
641 return; 626 return;
642 627
643err_list: 628err_list:
644 list_del(&neigh->list); 629 list_del(&neigh->list);
645 630
646err_path: 631err_path:
647 ipoib_neigh_free(neigh); 632 ipoib_neigh_free(dev, neigh);
648err_drop: 633err_drop:
649 ++dev->stats.tx_dropped; 634 ++dev->stats.tx_dropped;
650 dev_kfree_skb_any(skb); 635 dev_kfree_skb_any(skb);
651 636
652 spin_unlock_irqrestore(&priv->lock, flags); 637 spin_unlock_irqrestore(&priv->lock, flags);
653 ipoib_neigh_put(neigh); 638}
639
640/* called with rcu_read_lock */
641static void ipoib_path_lookup(struct sk_buff *skb, struct net_device *dev)
642{
643 struct ipoib_dev_priv *priv = netdev_priv(skb->dev);
644 struct dst_entry *dst = skb_dst(skb);
645 struct neighbour *n;
646
647 /* Look up path record for unicasts */
648 n = dst_get_neighbour(dst);
649 if (n->ha[4] != 0xff) {
650 neigh_add_path(skb, dev);
651 return;
652 }
653
654 /* Add in the P_Key for multicasts */
655 n->ha[8] = (priv->pkey >> 8) & 0xff;
656 n->ha[9] = priv->pkey & 0xff;
657 ipoib_mcast_send(dev, n->ha + 4, skb);
654} 658}
655 659
656static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, 660static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
657 struct ipoib_cb *cb) 661 struct ipoib_pseudoheader *phdr)
658{ 662{
659 struct ipoib_dev_priv *priv = netdev_priv(dev); 663 struct ipoib_dev_priv *priv = netdev_priv(dev);
660 struct ipoib_path *path; 664 struct ipoib_path *path;
@@ -662,15 +666,17 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
662 666
663 spin_lock_irqsave(&priv->lock, flags); 667 spin_lock_irqsave(&priv->lock, flags);
664 668
665 path = __path_find(dev, cb->hwaddr + 4); 669 path = __path_find(dev, phdr->hwaddr + 4);
666 if (!path || !path->valid) { 670 if (!path || !path->valid) {
667 int new_path = 0; 671 int new_path = 0;
668 672
669 if (!path) { 673 if (!path) {
670 path = path_rec_create(dev, cb->hwaddr + 4); 674 path = path_rec_create(dev, phdr->hwaddr + 4);
671 new_path = 1; 675 new_path = 1;
672 } 676 }
673 if (path) { 677 if (path) {
678 /* put pseudoheader back on for next time */
679 skb_push(skb, sizeof *phdr);
674 __skb_queue_tail(&path->queue, skb); 680 __skb_queue_tail(&path->queue, skb);
675 681
676 if (!path->query && path_rec_start(dev, path)) { 682 if (!path->query && path_rec_start(dev, path)) {
@@ -694,10 +700,12 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
694 be16_to_cpu(path->pathrec.dlid)); 700 be16_to_cpu(path->pathrec.dlid));
695 701
696 spin_unlock_irqrestore(&priv->lock, flags); 702 spin_unlock_irqrestore(&priv->lock, flags);
697 ipoib_send(dev, skb, path->ah, IPOIB_QPN(cb->hwaddr)); 703 ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
698 return; 704 return;
699 } else if ((path->query || !path_rec_start(dev, path)) && 705 } else if ((path->query || !path_rec_start(dev, path)) &&
700 skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) { 706 skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
707 /* put pseudoheader back on for next time */
708 skb_push(skb, sizeof *phdr);
701 __skb_queue_tail(&path->queue, skb); 709 __skb_queue_tail(&path->queue, skb);
702 } else { 710 } else {
703 ++dev->stats.tx_dropped; 711 ++dev->stats.tx_dropped;
@@ -711,80 +719,91 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
711{ 719{
712 struct ipoib_dev_priv *priv = netdev_priv(dev); 720 struct ipoib_dev_priv *priv = netdev_priv(dev);
713 struct ipoib_neigh *neigh; 721 struct ipoib_neigh *neigh;
714 struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb; 722 struct neighbour *n = NULL;
715 struct ipoib_header *header;
716 unsigned long flags; 723 unsigned long flags;
717 724
718 header = (struct ipoib_header *) skb->data; 725 rcu_read_lock();
726 if (likely(skb_dst(skb)))
727 n = dst_get_neighbour(skb_dst(skb));
719 728
720 if (unlikely(cb->hwaddr[4] == 0xff)) { 729 if (likely(n)) {
721 /* multicast, arrange "if" according to probability */ 730 if (unlikely(!*to_ipoib_neigh(n))) {
722 if ((header->proto != htons(ETH_P_IP)) && 731 ipoib_path_lookup(skb, dev);
723 (header->proto != htons(ETH_P_IPV6)) && 732 goto unlock;
724 (header->proto != htons(ETH_P_ARP)) &&
725 (header->proto != htons(ETH_P_RARP))) {
726 /* ethertype not supported by IPoIB */
727 ++dev->stats.tx_dropped;
728 dev_kfree_skb_any(skb);
729 return NETDEV_TX_OK;
730 } 733 }
731 /* Add in the P_Key for multicast*/
732 cb->hwaddr[8] = (priv->pkey >> 8) & 0xff;
733 cb->hwaddr[9] = priv->pkey & 0xff;
734
735 neigh = ipoib_neigh_get(dev, cb->hwaddr);
736 if (likely(neigh))
737 goto send_using_neigh;
738 ipoib_mcast_send(dev, cb->hwaddr, skb);
739 return NETDEV_TX_OK;
740 }
741 734
742 /* unicast, arrange "switch" according to probability */ 735 neigh = *to_ipoib_neigh(n);
743 switch (header->proto) { 736
744 case htons(ETH_P_IP): 737 if (unlikely((memcmp(&neigh->dgid.raw,
745 case htons(ETH_P_IPV6): 738 n->ha + 4,
746 neigh = ipoib_neigh_get(dev, cb->hwaddr); 739 sizeof(union ib_gid))) ||
747 if (unlikely(!neigh)) { 740 (neigh->dev != dev))) {
748 neigh_add_path(skb, cb->hwaddr, dev); 741 spin_lock_irqsave(&priv->lock, flags);
749 return NETDEV_TX_OK; 742 /*
743 * It's safe to call ipoib_put_ah() inside
744 * priv->lock here, because we know that
745 * path->ah will always hold one more reference,
746 * so ipoib_put_ah() will never do more than
747 * decrement the ref count.
748 */
749 if (neigh->ah)
750 ipoib_put_ah(neigh->ah);
751 list_del(&neigh->list);
752 ipoib_neigh_free(dev, neigh);
753 spin_unlock_irqrestore(&priv->lock, flags);
754 ipoib_path_lookup(skb, dev);
755 goto unlock;
750 } 756 }
751 break;
752 case htons(ETH_P_ARP):
753 case htons(ETH_P_RARP):
754 /* for unicast ARP and RARP should always perform path find */
755 unicast_arp_send(skb, dev, cb);
756 return NETDEV_TX_OK;
757 default:
758 /* ethertype not supported by IPoIB */
759 ++dev->stats.tx_dropped;
760 dev_kfree_skb_any(skb);
761 return NETDEV_TX_OK;
762 }
763 757
764send_using_neigh: 758 if (ipoib_cm_get(neigh)) {
765 /* note we now hold a ref to neigh */ 759 if (ipoib_cm_up(neigh)) {
766 if (ipoib_cm_get(neigh)) { 760 ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
767 if (ipoib_cm_up(neigh)) { 761 goto unlock;
768 ipoib_cm_send(dev, skb, ipoib_cm_get(neigh)); 762 }
769 goto unref; 763 } else if (neigh->ah) {
764 ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(n->ha));
765 goto unlock;
770 } 766 }
771 } else if (neigh->ah) {
772 ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(cb->hwaddr));
773 goto unref;
774 }
775 767
776 if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { 768 if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
777 spin_lock_irqsave(&priv->lock, flags); 769 spin_lock_irqsave(&priv->lock, flags);
778 __skb_queue_tail(&neigh->queue, skb); 770 __skb_queue_tail(&neigh->queue, skb);
779 spin_unlock_irqrestore(&priv->lock, flags); 771 spin_unlock_irqrestore(&priv->lock, flags);
772 } else {
773 ++dev->stats.tx_dropped;
774 dev_kfree_skb_any(skb);
775 }
780 } else { 776 } else {
781 ++dev->stats.tx_dropped; 777 struct ipoib_pseudoheader *phdr =
782 dev_kfree_skb_any(skb); 778 (struct ipoib_pseudoheader *) skb->data;
783 } 779 skb_pull(skb, sizeof *phdr);
784 780
785unref: 781 if (phdr->hwaddr[4] == 0xff) {
786 ipoib_neigh_put(neigh); 782 /* Add in the P_Key for multicast*/
783 phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff;
784 phdr->hwaddr[9] = priv->pkey & 0xff;
785
786 ipoib_mcast_send(dev, phdr->hwaddr + 4, skb);
787 } else {
788 /* unicast GID -- should be ARP or RARP reply */
789
790 if ((be16_to_cpup((__be16 *) skb->data) != ETH_P_ARP) &&
791 (be16_to_cpup((__be16 *) skb->data) != ETH_P_RARP)) {
792 ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x %pI6\n",
793 skb_dst(skb) ? "neigh" : "dst",
794 be16_to_cpup((__be16 *) skb->data),
795 IPOIB_QPN(phdr->hwaddr),
796 phdr->hwaddr + 4);
797 dev_kfree_skb_any(skb);
798 ++dev->stats.tx_dropped;
799 goto unlock;
800 }
787 801
802 unicast_arp_send(skb, dev, phdr);
803 }
804 }
805unlock:
806 rcu_read_unlock();
788 return NETDEV_TX_OK; 807 return NETDEV_TX_OK;
789} 808}
790 809
@@ -806,7 +825,8 @@ static int ipoib_hard_header(struct sk_buff *skb,
806 const void *daddr, const void *saddr, unsigned len) 825 const void *daddr, const void *saddr, unsigned len)
807{ 826{
808 struct ipoib_header *header; 827 struct ipoib_header *header;
809 struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb; 828 struct dst_entry *dst;
829 struct neighbour *n;
810 830
811 header = (struct ipoib_header *) skb_push(skb, sizeof *header); 831 header = (struct ipoib_header *) skb_push(skb, sizeof *header);
812 832
@@ -814,11 +834,19 @@ static int ipoib_hard_header(struct sk_buff *skb,
814 header->reserved = 0; 834 header->reserved = 0;
815 835
816 /* 836 /*
817 * we don't rely on dst_entry structure, always stuff the 837 * If we don't have a neighbour structure, stuff the
818 * destination address into skb->cb so we can figure out where 838 * destination address onto the front of the skb so we can
819 * to send the packet later. 839 * figure out where to send the packet later.
820 */ 840 */
821 memcpy(cb->hwaddr, daddr, INFINIBAND_ALEN); 841 dst = skb_dst(skb);
842 n = NULL;
843 if (dst)
844 n = dst_get_neighbour_raw(dst);
845 if ((!dst || !n) && daddr) {
846 struct ipoib_pseudoheader *phdr =
847 (struct ipoib_pseudoheader *) skb_push(skb, sizeof *phdr);
848 memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN);
849 }
822 850
823 return 0; 851 return 0;
824} 852}
@@ -835,433 +863,86 @@ static void ipoib_set_mcast_list(struct net_device *dev)
835 queue_work(ipoib_workqueue, &priv->restart_task); 863 queue_work(ipoib_workqueue, &priv->restart_task);
836} 864}
837 865
838static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr) 866static void ipoib_neigh_cleanup(struct neighbour *n)
839{
840 /*
841 * Use only the address parts that contributes to spreading
842 * The subnet prefix is not used as one can not connect to
843 * same remote port (GUID) using the same remote QPN via two
844 * different subnets.
845 */
846 /* qpn octets[1:4) & port GUID octets[12:20) */
847 u32 *daddr_32 = (u32 *) daddr;
848 u32 hv;
849
850 hv = jhash_3words(daddr_32[3], daddr_32[4], 0xFFFFFF & daddr_32[0], 0);
851 return hv & htbl->mask;
852}
853
854struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr)
855{
856 struct ipoib_dev_priv *priv = netdev_priv(dev);
857 struct ipoib_neigh_table *ntbl = &priv->ntbl;
858 struct ipoib_neigh_hash *htbl;
859 struct ipoib_neigh *neigh = NULL;
860 u32 hash_val;
861
862 rcu_read_lock_bh();
863
864 htbl = rcu_dereference_bh(ntbl->htbl);
865
866 if (!htbl)
867 goto out_unlock;
868
869 hash_val = ipoib_addr_hash(htbl, daddr);
870 for (neigh = rcu_dereference_bh(htbl->buckets[hash_val]);
871 neigh != NULL;
872 neigh = rcu_dereference_bh(neigh->hnext)) {
873 if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) {
874 /* found, take one ref on behalf of the caller */
875 if (!atomic_inc_not_zero(&neigh->refcnt)) {
876 /* deleted */
877 neigh = NULL;
878 goto out_unlock;
879 }
880 neigh->alive = jiffies;
881 goto out_unlock;
882 }
883 }
884
885out_unlock:
886 rcu_read_unlock_bh();
887 return neigh;
888}
889
890static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
891{ 867{
892 struct ipoib_neigh_table *ntbl = &priv->ntbl; 868 struct ipoib_neigh *neigh;
893 struct ipoib_neigh_hash *htbl; 869 struct ipoib_dev_priv *priv = netdev_priv(n->dev);
894 unsigned long neigh_obsolete;
895 unsigned long dt;
896 unsigned long flags; 870 unsigned long flags;
897 int i; 871 struct ipoib_ah *ah = NULL;
898 872
899 if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) 873 neigh = *to_ipoib_neigh(n);
874 if (neigh)
875 priv = netdev_priv(neigh->dev);
876 else
900 return; 877 return;
878 ipoib_dbg(priv,
879 "neigh_cleanup for %06x %pI6\n",
880 IPOIB_QPN(n->ha),
881 n->ha + 4);
901 882
902 spin_lock_irqsave(&priv->lock, flags); 883 spin_lock_irqsave(&priv->lock, flags);
903 884
904 htbl = rcu_dereference_protected(ntbl->htbl, 885 if (neigh->ah)
905 lockdep_is_held(&priv->lock)); 886 ah = neigh->ah;
906 887 list_del(&neigh->list);
907 if (!htbl) 888 ipoib_neigh_free(n->dev, neigh);
908 goto out_unlock;
909
910 /* neigh is obsolete if it was idle for two GC periods */
911 dt = 2 * arp_tbl.gc_interval;
912 neigh_obsolete = jiffies - dt;
913 /* handle possible race condition */
914 if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
915 goto out_unlock;
916
917 for (i = 0; i < htbl->size; i++) {
918 struct ipoib_neigh *neigh;
919 struct ipoib_neigh __rcu **np = &htbl->buckets[i];
920
921 while ((neigh = rcu_dereference_protected(*np,
922 lockdep_is_held(&priv->lock))) != NULL) {
923 /* was the neigh idle for two GC periods */
924 if (time_after(neigh_obsolete, neigh->alive)) {
925 rcu_assign_pointer(*np,
926 rcu_dereference_protected(neigh->hnext,
927 lockdep_is_held(&priv->lock)));
928 /* remove from path/mc list */
929 list_del(&neigh->list);
930 call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
931 } else {
932 np = &neigh->hnext;
933 }
934
935 }
936 }
937 889
938out_unlock:
939 spin_unlock_irqrestore(&priv->lock, flags); 890 spin_unlock_irqrestore(&priv->lock, flags);
940}
941
942static void ipoib_reap_neigh(struct work_struct *work)
943{
944 struct ipoib_dev_priv *priv =
945 container_of(work, struct ipoib_dev_priv, neigh_reap_task.work);
946 891
947 __ipoib_reap_neigh(priv); 892 if (ah)
948 893 ipoib_put_ah(ah);
949 if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
950 queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
951 arp_tbl.gc_interval);
952} 894}
953 895
954 896struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour,
955static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr,
956 struct net_device *dev) 897 struct net_device *dev)
957{ 898{
958 struct ipoib_neigh *neigh; 899 struct ipoib_neigh *neigh;
959 900
960 neigh = kzalloc(sizeof *neigh, GFP_ATOMIC); 901 neigh = kmalloc(sizeof *neigh, GFP_ATOMIC);
961 if (!neigh) 902 if (!neigh)
962 return NULL; 903 return NULL;
963 904
905 neigh->neighbour = neighbour;
964 neigh->dev = dev; 906 neigh->dev = dev;
965 memcpy(&neigh->daddr, daddr, sizeof(neigh->daddr)); 907 memset(&neigh->dgid.raw, 0, sizeof (union ib_gid));
908 *to_ipoib_neigh(neighbour) = neigh;
966 skb_queue_head_init(&neigh->queue); 909 skb_queue_head_init(&neigh->queue);
967 INIT_LIST_HEAD(&neigh->list);
968 ipoib_cm_set(neigh, NULL); 910 ipoib_cm_set(neigh, NULL);
969 /* one ref on behalf of the caller */
970 atomic_set(&neigh->refcnt, 1);
971 911
972 return neigh; 912 return neigh;
973} 913}
974 914
975struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr, 915void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh)
976 struct net_device *dev)
977{ 916{
978 struct ipoib_dev_priv *priv = netdev_priv(dev);
979 struct ipoib_neigh_table *ntbl = &priv->ntbl;
980 struct ipoib_neigh_hash *htbl;
981 struct ipoib_neigh *neigh;
982 u32 hash_val;
983
984 htbl = rcu_dereference_protected(ntbl->htbl,
985 lockdep_is_held(&priv->lock));
986 if (!htbl) {
987 neigh = NULL;
988 goto out_unlock;
989 }
990
991 /* need to add a new neigh, but maybe some other thread succeeded?
992 * recalc hash, maybe hash resize took place so we do a search
993 */
994 hash_val = ipoib_addr_hash(htbl, daddr);
995 for (neigh = rcu_dereference_protected(htbl->buckets[hash_val],
996 lockdep_is_held(&priv->lock));
997 neigh != NULL;
998 neigh = rcu_dereference_protected(neigh->hnext,
999 lockdep_is_held(&priv->lock))) {
1000 if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) {
1001 /* found, take one ref on behalf of the caller */
1002 if (!atomic_inc_not_zero(&neigh->refcnt)) {
1003 /* deleted */
1004 neigh = NULL;
1005 break;
1006 }
1007 neigh->alive = jiffies;
1008 goto out_unlock;
1009 }
1010 }
1011
1012 neigh = ipoib_neigh_ctor(daddr, dev);
1013 if (!neigh)
1014 goto out_unlock;
1015
1016 /* one ref on behalf of the hash table */
1017 atomic_inc(&neigh->refcnt);
1018 neigh->alive = jiffies;
1019 /* put in hash */
1020 rcu_assign_pointer(neigh->hnext,
1021 rcu_dereference_protected(htbl->buckets[hash_val],
1022 lockdep_is_held(&priv->lock)));
1023 rcu_assign_pointer(htbl->buckets[hash_val], neigh);
1024 atomic_inc(&ntbl->entries);
1025
1026out_unlock:
1027
1028 return neigh;
1029}
1030
1031void ipoib_neigh_dtor(struct ipoib_neigh *neigh)
1032{
1033 /* neigh reference count was dropprd to zero */
1034 struct net_device *dev = neigh->dev;
1035 struct ipoib_dev_priv *priv = netdev_priv(dev);
1036 struct sk_buff *skb; 917 struct sk_buff *skb;
1037 if (neigh->ah) 918 *to_ipoib_neigh(neigh->neighbour) = NULL;
1038 ipoib_put_ah(neigh->ah);
1039 while ((skb = __skb_dequeue(&neigh->queue))) { 919 while ((skb = __skb_dequeue(&neigh->queue))) {
1040 ++dev->stats.tx_dropped; 920 ++dev->stats.tx_dropped;
1041 dev_kfree_skb_any(skb); 921 dev_kfree_skb_any(skb);
1042 } 922 }
1043 if (ipoib_cm_get(neigh)) 923 if (ipoib_cm_get(neigh))
1044 ipoib_cm_destroy_tx(ipoib_cm_get(neigh)); 924 ipoib_cm_destroy_tx(ipoib_cm_get(neigh));
1045 ipoib_dbg(netdev_priv(dev),
1046 "neigh free for %06x %pI6\n",
1047 IPOIB_QPN(neigh->daddr),
1048 neigh->daddr + 4);
1049 kfree(neigh); 925 kfree(neigh);
1050 if (atomic_dec_and_test(&priv->ntbl.entries)) {
1051 if (test_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags))
1052 complete(&priv->ntbl.flushed);
1053 }
1054}
1055
1056static void ipoib_neigh_reclaim(struct rcu_head *rp)
1057{
1058 /* Called as a result of removal from hash table */
1059 struct ipoib_neigh *neigh = container_of(rp, struct ipoib_neigh, rcu);
1060 /* note TX context may hold another ref */
1061 ipoib_neigh_put(neigh);
1062}
1063
1064void ipoib_neigh_free(struct ipoib_neigh *neigh)
1065{
1066 struct net_device *dev = neigh->dev;
1067 struct ipoib_dev_priv *priv = netdev_priv(dev);
1068 struct ipoib_neigh_table *ntbl = &priv->ntbl;
1069 struct ipoib_neigh_hash *htbl;
1070 struct ipoib_neigh __rcu **np;
1071 struct ipoib_neigh *n;
1072 u32 hash_val;
1073
1074 htbl = rcu_dereference_protected(ntbl->htbl,
1075 lockdep_is_held(&priv->lock));
1076 if (!htbl)
1077 return;
1078
1079 hash_val = ipoib_addr_hash(htbl, neigh->daddr);
1080 np = &htbl->buckets[hash_val];
1081 for (n = rcu_dereference_protected(*np,
1082 lockdep_is_held(&priv->lock));
1083 n != NULL;
1084 n = rcu_dereference_protected(*np,
1085 lockdep_is_held(&priv->lock))) {
1086 if (n == neigh) {
1087 /* found */
1088 rcu_assign_pointer(*np,
1089 rcu_dereference_protected(neigh->hnext,
1090 lockdep_is_held(&priv->lock)));
1091 call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
1092 return;
1093 } else {
1094 np = &n->hnext;
1095 }
1096 }
1097} 926}
1098 927
1099static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) 928static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms)
1100{ 929{
1101 struct ipoib_neigh_table *ntbl = &priv->ntbl; 930 parms->neigh_cleanup = ipoib_neigh_cleanup;
1102 struct ipoib_neigh_hash *htbl;
1103 struct ipoib_neigh **buckets;
1104 u32 size;
1105
1106 clear_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags);
1107 ntbl->htbl = NULL;
1108 htbl = kzalloc(sizeof(*htbl), GFP_KERNEL);
1109 if (!htbl)
1110 return -ENOMEM;
1111 set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
1112 size = roundup_pow_of_two(arp_tbl.gc_thresh3);
1113 buckets = kzalloc(size * sizeof(*buckets), GFP_KERNEL);
1114 if (!buckets) {
1115 kfree(htbl);
1116 return -ENOMEM;
1117 }
1118 htbl->size = size;
1119 htbl->mask = (size - 1);
1120 htbl->buckets = buckets;
1121 ntbl->htbl = htbl;
1122 htbl->ntbl = ntbl;
1123 atomic_set(&ntbl->entries, 0);
1124
1125 /* start garbage collection */
1126 clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
1127 queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
1128 arp_tbl.gc_interval);
1129 931
1130 return 0; 932 return 0;
1131} 933}
1132 934
1133static void neigh_hash_free_rcu(struct rcu_head *head)
1134{
1135 struct ipoib_neigh_hash *htbl = container_of(head,
1136 struct ipoib_neigh_hash,
1137 rcu);
1138 struct ipoib_neigh __rcu **buckets = htbl->buckets;
1139 struct ipoib_neigh_table *ntbl = htbl->ntbl;
1140
1141 kfree(buckets);
1142 kfree(htbl);
1143 complete(&ntbl->deleted);
1144}
1145
1146void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid)
1147{
1148 struct ipoib_dev_priv *priv = netdev_priv(dev);
1149 struct ipoib_neigh_table *ntbl = &priv->ntbl;
1150 struct ipoib_neigh_hash *htbl;
1151 unsigned long flags;
1152 int i;
1153
1154 /* remove all neigh connected to a given path or mcast */
1155 spin_lock_irqsave(&priv->lock, flags);
1156
1157 htbl = rcu_dereference_protected(ntbl->htbl,
1158 lockdep_is_held(&priv->lock));
1159
1160 if (!htbl)
1161 goto out_unlock;
1162
1163 for (i = 0; i < htbl->size; i++) {
1164 struct ipoib_neigh *neigh;
1165 struct ipoib_neigh __rcu **np = &htbl->buckets[i];
1166
1167 while ((neigh = rcu_dereference_protected(*np,
1168 lockdep_is_held(&priv->lock))) != NULL) {
1169 /* delete neighs belong to this parent */
1170 if (!memcmp(gid, neigh->daddr + 4, sizeof (union ib_gid))) {
1171 rcu_assign_pointer(*np,
1172 rcu_dereference_protected(neigh->hnext,
1173 lockdep_is_held(&priv->lock)));
1174 /* remove from parent list */
1175 list_del(&neigh->list);
1176 call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
1177 } else {
1178 np = &neigh->hnext;
1179 }
1180
1181 }
1182 }
1183out_unlock:
1184 spin_unlock_irqrestore(&priv->lock, flags);
1185}
1186
1187static void ipoib_flush_neighs(struct ipoib_dev_priv *priv)
1188{
1189 struct ipoib_neigh_table *ntbl = &priv->ntbl;
1190 struct ipoib_neigh_hash *htbl;
1191 unsigned long flags;
1192 int i, wait_flushed = 0;
1193
1194 init_completion(&priv->ntbl.flushed);
1195
1196 spin_lock_irqsave(&priv->lock, flags);
1197
1198 htbl = rcu_dereference_protected(ntbl->htbl,
1199 lockdep_is_held(&priv->lock));
1200 if (!htbl)
1201 goto out_unlock;
1202
1203 wait_flushed = atomic_read(&priv->ntbl.entries);
1204 if (!wait_flushed)
1205 goto free_htbl;
1206
1207 for (i = 0; i < htbl->size; i++) {
1208 struct ipoib_neigh *neigh;
1209 struct ipoib_neigh __rcu **np = &htbl->buckets[i];
1210
1211 while ((neigh = rcu_dereference_protected(*np,
1212 lockdep_is_held(&priv->lock))) != NULL) {
1213 rcu_assign_pointer(*np,
1214 rcu_dereference_protected(neigh->hnext,
1215 lockdep_is_held(&priv->lock)));
1216 /* remove from path/mc list */
1217 list_del(&neigh->list);
1218 call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
1219 }
1220 }
1221
1222free_htbl:
1223 rcu_assign_pointer(ntbl->htbl, NULL);
1224 call_rcu(&htbl->rcu, neigh_hash_free_rcu);
1225
1226out_unlock:
1227 spin_unlock_irqrestore(&priv->lock, flags);
1228 if (wait_flushed)
1229 wait_for_completion(&priv->ntbl.flushed);
1230}
1231
1232static void ipoib_neigh_hash_uninit(struct net_device *dev)
1233{
1234 struct ipoib_dev_priv *priv = netdev_priv(dev);
1235 int stopped;
1236
1237 ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n");
1238 init_completion(&priv->ntbl.deleted);
1239 set_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags);
1240
1241 /* Stop GC if called at init fail need to cancel work */
1242 stopped = test_and_set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
1243 if (!stopped)
1244 cancel_delayed_work(&priv->neigh_reap_task);
1245
1246 ipoib_flush_neighs(priv);
1247
1248 wait_for_completion(&priv->ntbl.deleted);
1249}
1250
1251
1252int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) 935int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
1253{ 936{
1254 struct ipoib_dev_priv *priv = netdev_priv(dev); 937 struct ipoib_dev_priv *priv = netdev_priv(dev);
1255 938
1256 if (ipoib_neigh_hash_init(priv) < 0)
1257 goto out;
1258 /* Allocate RX/TX "rings" to hold queued skbs */ 939 /* Allocate RX/TX "rings" to hold queued skbs */
1259 priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, 940 priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
1260 GFP_KERNEL); 941 GFP_KERNEL);
1261 if (!priv->rx_ring) { 942 if (!priv->rx_ring) {
1262 printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", 943 printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
1263 ca->name, ipoib_recvq_size); 944 ca->name, ipoib_recvq_size);
1264 goto out_neigh_hash_cleanup; 945 goto out;
1265 } 946 }
1266 947
1267 priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring); 948 priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
@@ -1284,8 +965,6 @@ out_tx_ring_cleanup:
1284out_rx_ring_cleanup: 965out_rx_ring_cleanup:
1285 kfree(priv->rx_ring); 966 kfree(priv->rx_ring);
1286 967
1287out_neigh_hash_cleanup:
1288 ipoib_neigh_hash_uninit(dev);
1289out: 968out:
1290 return -ENOMEM; 969 return -ENOMEM;
1291} 970}
@@ -1293,20 +972,15 @@ out:
1293void ipoib_dev_cleanup(struct net_device *dev) 972void ipoib_dev_cleanup(struct net_device *dev)
1294{ 973{
1295 struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv; 974 struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv;
1296 LIST_HEAD(head);
1297
1298 ASSERT_RTNL();
1299 975
1300 ipoib_delete_debug_files(dev); 976 ipoib_delete_debug_files(dev);
1301 977
1302 /* Delete any child interfaces first */ 978 /* Delete any child interfaces first */
1303 list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { 979 list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
1304 /* Stop GC on child */ 980 unregister_netdev(cpriv->dev);
1305 set_bit(IPOIB_STOP_NEIGH_GC, &cpriv->flags); 981 ipoib_dev_cleanup(cpriv->dev);
1306 cancel_delayed_work(&cpriv->neigh_reap_task); 982 free_netdev(cpriv->dev);
1307 unregister_netdevice_queue(cpriv->dev, &head);
1308 } 983 }
1309 unregister_netdevice_many(&head);
1310 984
1311 ipoib_ib_dev_cleanup(dev); 985 ipoib_ib_dev_cleanup(dev);
1312 986
@@ -1315,8 +989,6 @@ void ipoib_dev_cleanup(struct net_device *dev)
1315 989
1316 priv->rx_ring = NULL; 990 priv->rx_ring = NULL;
1317 priv->tx_ring = NULL; 991 priv->tx_ring = NULL;
1318
1319 ipoib_neigh_hash_uninit(dev);
1320} 992}
1321 993
1322static const struct header_ops ipoib_header_ops = { 994static const struct header_ops ipoib_header_ops = {
@@ -1324,17 +996,17 @@ static const struct header_ops ipoib_header_ops = {
1324}; 996};
1325 997
1326static const struct net_device_ops ipoib_netdev_ops = { 998static const struct net_device_ops ipoib_netdev_ops = {
1327 .ndo_uninit = ipoib_uninit,
1328 .ndo_open = ipoib_open, 999 .ndo_open = ipoib_open,
1329 .ndo_stop = ipoib_stop, 1000 .ndo_stop = ipoib_stop,
1330 .ndo_change_mtu = ipoib_change_mtu, 1001 .ndo_change_mtu = ipoib_change_mtu,
1331 .ndo_fix_features = ipoib_fix_features, 1002 .ndo_fix_features = ipoib_fix_features,
1332 .ndo_start_xmit = ipoib_start_xmit, 1003 .ndo_start_xmit = ipoib_start_xmit,
1333 .ndo_tx_timeout = ipoib_timeout, 1004 .ndo_tx_timeout = ipoib_timeout,
1334 .ndo_set_rx_mode = ipoib_set_mcast_list, 1005 .ndo_set_multicast_list = ipoib_set_mcast_list,
1006 .ndo_neigh_setup = ipoib_neigh_setup_dev,
1335}; 1007};
1336 1008
1337void ipoib_setup(struct net_device *dev) 1009static void ipoib_setup(struct net_device *dev)
1338{ 1010{
1339 struct ipoib_dev_priv *priv = netdev_priv(dev); 1011 struct ipoib_dev_priv *priv = netdev_priv(dev);
1340 1012
@@ -1349,7 +1021,11 @@ void ipoib_setup(struct net_device *dev)
1349 1021
1350 dev->flags |= IFF_BROADCAST | IFF_MULTICAST; 1022 dev->flags |= IFF_BROADCAST | IFF_MULTICAST;
1351 1023
1352 dev->hard_header_len = IPOIB_ENCAP_LEN; 1024 /*
1025 * We add in INFINIBAND_ALEN to allow for the destination
1026 * address "pseudoheader" for skbs without neighbour struct.
1027 */
1028 dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN;
1353 dev->addr_len = INFINIBAND_ALEN; 1029 dev->addr_len = INFINIBAND_ALEN;
1354 dev->type = ARPHRD_INFINIBAND; 1030 dev->type = ARPHRD_INFINIBAND;
1355 dev->tx_queue_len = ipoib_sendq_size * 2; 1031 dev->tx_queue_len = ipoib_sendq_size * 2;
@@ -1380,7 +1056,6 @@ void ipoib_setup(struct net_device *dev)
1380 INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy); 1056 INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy);
1381 INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); 1057 INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
1382 INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah); 1058 INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah);
1383 INIT_DELAYED_WORK(&priv->neigh_reap_task, ipoib_reap_neigh);
1384} 1059}
1385 1060
1386struct ipoib_dev_priv *ipoib_intf_alloc(const char *name) 1061struct ipoib_dev_priv *ipoib_intf_alloc(const char *name)
@@ -1412,9 +1087,12 @@ static ssize_t show_umcast(struct device *dev,
1412 return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags)); 1087 return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags));
1413} 1088}
1414 1089
1415void ipoib_set_umcast(struct net_device *ndev, int umcast_val) 1090static ssize_t set_umcast(struct device *dev,
1091 struct device_attribute *attr,
1092 const char *buf, size_t count)
1416{ 1093{
1417 struct ipoib_dev_priv *priv = netdev_priv(ndev); 1094 struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
1095 unsigned long umcast_val = simple_strtoul(buf, NULL, 0);
1418 1096
1419 if (umcast_val > 0) { 1097 if (umcast_val > 0) {
1420 set_bit(IPOIB_FLAG_UMCAST, &priv->flags); 1098 set_bit(IPOIB_FLAG_UMCAST, &priv->flags);
@@ -1422,15 +1100,6 @@ void ipoib_set_umcast(struct net_device *ndev, int umcast_val)
1422 "by userspace\n"); 1100 "by userspace\n");
1423 } else 1101 } else
1424 clear_bit(IPOIB_FLAG_UMCAST, &priv->flags); 1102 clear_bit(IPOIB_FLAG_UMCAST, &priv->flags);
1425}
1426
1427static ssize_t set_umcast(struct device *dev,
1428 struct device_attribute *attr,
1429 const char *buf, size_t count)
1430{
1431 unsigned long umcast_val = simple_strtoul(buf, NULL, 0);
1432
1433 ipoib_set_umcast(to_net_dev(dev), umcast_val);
1434 1103
1435 return count; 1104 return count;
1436} 1105}
@@ -1553,8 +1222,6 @@ static struct net_device *ipoib_add_port(const char *format,
1553 priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); 1222 priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu);
1554 priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu; 1223 priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu;
1555 1224
1556 priv->dev->neigh_priv_len = sizeof(struct ipoib_neigh);
1557
1558 result = ib_query_pkey(hca, port, 0, &priv->pkey); 1225 result = ib_query_pkey(hca, port, 0, &priv->pkey);
1559 if (result) { 1226 if (result) {
1560 printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n", 1227 printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n",
@@ -1627,9 +1294,6 @@ sysfs_failed:
1627 1294
1628register_failed: 1295register_failed:
1629 ib_unregister_event_handler(&priv->event_handler); 1296 ib_unregister_event_handler(&priv->event_handler);
1630 /* Stop GC if started before flush */
1631 set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
1632 cancel_delayed_work(&priv->neigh_reap_task);
1633 flush_workqueue(ipoib_workqueue); 1297 flush_workqueue(ipoib_workqueue);
1634 1298
1635event_failed: 1299event_failed:
@@ -1696,12 +1360,10 @@ static void ipoib_remove_one(struct ib_device *device)
1696 dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); 1360 dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP);
1697 rtnl_unlock(); 1361 rtnl_unlock();
1698 1362
1699 /* Stop GC */
1700 set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
1701 cancel_delayed_work(&priv->neigh_reap_task);
1702 flush_workqueue(ipoib_workqueue); 1363 flush_workqueue(ipoib_workqueue);
1703 1364
1704 unregister_netdev(priv->dev); 1365 unregister_netdev(priv->dev);
1366 ipoib_dev_cleanup(priv->dev);
1705 free_netdev(priv->dev); 1367 free_netdev(priv->dev);
1706 } 1368 }
1707 1369
@@ -1753,15 +1415,8 @@ static int __init ipoib_init_module(void)
1753 if (ret) 1415 if (ret)
1754 goto err_sa; 1416 goto err_sa;
1755 1417
1756 ret = ipoib_netlink_init();
1757 if (ret)
1758 goto err_client;
1759
1760 return 0; 1418 return 0;
1761 1419
1762err_client:
1763 ib_unregister_client(&ipoib_client);
1764
1765err_sa: 1420err_sa:
1766 ib_sa_unregister_client(&ipoib_sa_client); 1421 ib_sa_unregister_client(&ipoib_sa_client);
1767 destroy_workqueue(ipoib_workqueue); 1422 destroy_workqueue(ipoib_workqueue);
@@ -1774,7 +1429,6 @@ err_fs:
1774 1429
1775static void __exit ipoib_cleanup_module(void) 1430static void __exit ipoib_cleanup_module(void)
1776{ 1431{
1777 ipoib_netlink_fini();
1778 ib_unregister_client(&ipoib_client); 1432 ib_unregister_client(&ipoib_client);
1779 ib_sa_unregister_client(&ipoib_sa_client); 1433 ib_sa_unregister_client(&ipoib_sa_client);
1780 ipoib_unregister_debugfs(); 1434 ipoib_unregister_debugfs();
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index cecb98a4c66..a8d2a891b84 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -34,7 +34,6 @@
34 34
35#include <linux/skbuff.h> 35#include <linux/skbuff.h>
36#include <linux/rtnetlink.h> 36#include <linux/rtnetlink.h>
37#include <linux/moduleparam.h>
38#include <linux/ip.h> 37#include <linux/ip.h>
39#include <linux/in.h> 38#include <linux/in.h>
40#include <linux/igmp.h> 39#include <linux/igmp.h>
@@ -69,13 +68,28 @@ struct ipoib_mcast_iter {
69static void ipoib_mcast_free(struct ipoib_mcast *mcast) 68static void ipoib_mcast_free(struct ipoib_mcast *mcast)
70{ 69{
71 struct net_device *dev = mcast->dev; 70 struct net_device *dev = mcast->dev;
71 struct ipoib_dev_priv *priv = netdev_priv(dev);
72 struct ipoib_neigh *neigh, *tmp;
72 int tx_dropped = 0; 73 int tx_dropped = 0;
73 74
74 ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n", 75 ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n",
75 mcast->mcmember.mgid.raw); 76 mcast->mcmember.mgid.raw);
76 77
77 /* remove all neigh connected to this mcast */ 78 spin_lock_irq(&priv->lock);
78 ipoib_del_neighs_by_gid(dev, mcast->mcmember.mgid.raw); 79
80 list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) {
81 /*
82 * It's safe to call ipoib_put_ah() inside priv->lock
83 * here, because we know that mcast->ah will always
84 * hold one more reference, so ipoib_put_ah() will
85 * never do more than decrement the ref count.
86 */
87 if (neigh->ah)
88 ipoib_put_ah(neigh->ah);
89 ipoib_neigh_free(dev, neigh);
90 }
91
92 spin_unlock_irq(&priv->lock);
79 93
80 if (mcast->ah) 94 if (mcast->ah)
81 ipoib_put_ah(mcast->ah); 95 ipoib_put_ah(mcast->ah);
@@ -175,9 +189,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
175 189
176 mcast->mcmember = *mcmember; 190 mcast->mcmember = *mcmember;
177 191
178 /* Set the multicast MTU and cached Q_Key before we attach if it's 192 /* Set the cached Q_Key before we attach if it's the broadcast group */
179 * the broadcast group.
180 */
181 if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4, 193 if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4,
182 sizeof (union ib_gid))) { 194 sizeof (union ib_gid))) {
183 spin_lock_irq(&priv->lock); 195 spin_lock_irq(&priv->lock);
@@ -185,17 +197,10 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
185 spin_unlock_irq(&priv->lock); 197 spin_unlock_irq(&priv->lock);
186 return -EAGAIN; 198 return -EAGAIN;
187 } 199 }
188 priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
189 priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); 200 priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
190 spin_unlock_irq(&priv->lock); 201 spin_unlock_irq(&priv->lock);
191 priv->tx_wr.wr.ud.remote_qkey = priv->qkey; 202 priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
192 set_qkey = 1; 203 set_qkey = 1;
193
194 if (!ipoib_cm_admin_enabled(dev)) {
195 rtnl_lock();
196 dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
197 rtnl_unlock();
198 }
199 } 204 }
200 205
201 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 206 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
@@ -234,11 +239,8 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
234 av.grh.dgid = mcast->mcmember.mgid; 239 av.grh.dgid = mcast->mcmember.mgid;
235 240
236 ah = ipoib_create_ah(dev, priv->pd, &av); 241 ah = ipoib_create_ah(dev, priv->pd, &av);
237 if (IS_ERR(ah)) { 242 if (!ah) {
238 ipoib_warn(priv, "ib_address_create failed %ld\n", 243 ipoib_warn(priv, "ib_address_create failed\n");
239 -PTR_ERR(ah));
240 /* use original error */
241 return PTR_ERR(ah);
242 } else { 244 } else {
243 spin_lock_irq(&priv->lock); 245 spin_lock_irq(&priv->lock);
244 mcast->ah = ah; 246 mcast->ah = ah;
@@ -256,13 +258,21 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
256 netif_tx_lock_bh(dev); 258 netif_tx_lock_bh(dev);
257 while (!skb_queue_empty(&mcast->pkt_queue)) { 259 while (!skb_queue_empty(&mcast->pkt_queue)) {
258 struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); 260 struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue);
261 struct dst_entry *dst = skb_dst(skb);
262 struct neighbour *n = NULL;
259 263
260 netif_tx_unlock_bh(dev); 264 netif_tx_unlock_bh(dev);
261 265
262 skb->dev = dev; 266 skb->dev = dev;
267 if (dst)
268 n = dst_get_neighbour_raw(dst);
269 if (!dst || !n) {
270 /* put pseudoheader back on for next time */
271 skb_push(skb, sizeof (struct ipoib_pseudoheader));
272 }
273
263 if (dev_queue_xmit(skb)) 274 if (dev_queue_xmit(skb))
264 ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n"); 275 ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n");
265
266 netif_tx_lock_bh(dev); 276 netif_tx_lock_bh(dev);
267 } 277 }
268 netif_tx_unlock_bh(dev); 278 netif_tx_unlock_bh(dev);
@@ -583,6 +593,14 @@ void ipoib_mcast_join_task(struct work_struct *work)
583 return; 593 return;
584 } 594 }
585 595
596 priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
597
598 if (!ipoib_cm_admin_enabled(dev)) {
599 rtnl_lock();
600 dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
601 rtnl_unlock();
602 }
603
586 ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); 604 ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
587 605
588 clear_bit(IPOIB_MCAST_RUN, &priv->flags); 606 clear_bit(IPOIB_MCAST_RUN, &priv->flags);
@@ -641,12 +659,11 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
641 return 0; 659 return 0;
642} 660}
643 661
644void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb) 662void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
645{ 663{
646 struct ipoib_dev_priv *priv = netdev_priv(dev); 664 struct ipoib_dev_priv *priv = netdev_priv(dev);
647 struct ipoib_mcast *mcast; 665 struct ipoib_mcast *mcast;
648 unsigned long flags; 666 unsigned long flags;
649 void *mgid = daddr + 4;
650 667
651 spin_lock_irqsave(&priv->lock, flags); 668 spin_lock_irqsave(&priv->lock, flags);
652 669
@@ -702,23 +719,25 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
702 719
703out: 720out:
704 if (mcast && mcast->ah) { 721 if (mcast && mcast->ah) {
705 struct ipoib_neigh *neigh; 722 struct dst_entry *dst = skb_dst(skb);
723 struct neighbour *n = NULL;
724
725 rcu_read_lock();
726 if (dst)
727 n = dst_get_neighbour(dst);
728 if (n && !*to_ipoib_neigh(n)) {
729 struct ipoib_neigh *neigh = ipoib_neigh_alloc(n,
730 skb->dev);
706 731
707 spin_unlock_irqrestore(&priv->lock, flags);
708 neigh = ipoib_neigh_get(dev, daddr);
709 spin_lock_irqsave(&priv->lock, flags);
710 if (!neigh) {
711 neigh = ipoib_neigh_alloc(daddr, dev);
712 if (neigh) { 732 if (neigh) {
713 kref_get(&mcast->ah->ref); 733 kref_get(&mcast->ah->ref);
714 neigh->ah = mcast->ah; 734 neigh->ah = mcast->ah;
715 list_add_tail(&neigh->list, &mcast->neigh_list); 735 list_add_tail(&neigh->list, &mcast->neigh_list);
716 } 736 }
717 } 737 }
738 rcu_read_unlock();
718 spin_unlock_irqrestore(&priv->lock, flags); 739 spin_unlock_irqrestore(&priv->lock, flags);
719 ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN); 740 ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
720 if (neigh)
721 ipoib_neigh_put(neigh);
722 return; 741 return;
723 } 742 }
724 743
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
deleted file mode 100644
index 74685936c94..00000000000
--- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
+++ /dev/null
@@ -1,172 +0,0 @@
1/*
2 * Copyright (c) 2012 Mellanox Technologies. - All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/netdevice.h>
34#include <linux/module.h>
35#include <net/rtnetlink.h>
36#include "ipoib.h"
37
38static const struct nla_policy ipoib_policy[IFLA_IPOIB_MAX + 1] = {
39 [IFLA_IPOIB_PKEY] = { .type = NLA_U16 },
40 [IFLA_IPOIB_MODE] = { .type = NLA_U16 },
41 [IFLA_IPOIB_UMCAST] = { .type = NLA_U16 },
42};
43
44static int ipoib_fill_info(struct sk_buff *skb, const struct net_device *dev)
45{
46 struct ipoib_dev_priv *priv = netdev_priv(dev);
47 u16 val;
48
49 if (nla_put_u16(skb, IFLA_IPOIB_PKEY, priv->pkey))
50 goto nla_put_failure;
51
52 val = test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
53 if (nla_put_u16(skb, IFLA_IPOIB_MODE, val))
54 goto nla_put_failure;
55
56 val = test_bit(IPOIB_FLAG_UMCAST, &priv->flags);
57 if (nla_put_u16(skb, IFLA_IPOIB_UMCAST, val))
58 goto nla_put_failure;
59
60 return 0;
61
62nla_put_failure:
63 return -EMSGSIZE;
64}
65
66static int ipoib_changelink(struct net_device *dev,
67 struct nlattr *tb[], struct nlattr *data[])
68{
69 u16 mode, umcast;
70 int ret = 0;
71
72 if (data[IFLA_IPOIB_MODE]) {
73 mode = nla_get_u16(data[IFLA_IPOIB_MODE]);
74 if (mode == IPOIB_MODE_DATAGRAM)
75 ret = ipoib_set_mode(dev, "datagram\n");
76 else if (mode == IPOIB_MODE_CONNECTED)
77 ret = ipoib_set_mode(dev, "connected\n");
78 else
79 ret = -EINVAL;
80
81 if (ret < 0)
82 goto out_err;
83 }
84
85 if (data[IFLA_IPOIB_UMCAST]) {
86 umcast = nla_get_u16(data[IFLA_IPOIB_UMCAST]);
87 ipoib_set_umcast(dev, umcast);
88 }
89
90out_err:
91 return ret;
92}
93
94static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
95 struct nlattr *tb[], struct nlattr *data[])
96{
97 struct net_device *pdev;
98 struct ipoib_dev_priv *ppriv;
99 u16 child_pkey;
100 int err;
101
102 if (!tb[IFLA_LINK])
103 return -EINVAL;
104
105 pdev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
106 if (!pdev)
107 return -ENODEV;
108
109 ppriv = netdev_priv(pdev);
110
111 if (test_bit(IPOIB_FLAG_SUBINTERFACE, &ppriv->flags)) {
112 ipoib_warn(ppriv, "child creation disallowed for child devices\n");
113 return -EINVAL;
114 }
115
116 if (!data || !data[IFLA_IPOIB_PKEY]) {
117 ipoib_dbg(ppriv, "no pkey specified, using parent pkey\n");
118 child_pkey = ppriv->pkey;
119 } else
120 child_pkey = nla_get_u16(data[IFLA_IPOIB_PKEY]);
121
122 err = __ipoib_vlan_add(ppriv, netdev_priv(dev), child_pkey, IPOIB_RTNL_CHILD);
123
124 if (!err && data)
125 err = ipoib_changelink(dev, tb, data);
126 return err;
127}
128
129static void ipoib_unregister_child_dev(struct net_device *dev, struct list_head *head)
130{
131 struct ipoib_dev_priv *priv, *ppriv;
132
133 priv = netdev_priv(dev);
134 ppriv = netdev_priv(priv->parent);
135
136 mutex_lock(&ppriv->vlan_mutex);
137 unregister_netdevice_queue(dev, head);
138 list_del(&priv->list);
139 mutex_unlock(&ppriv->vlan_mutex);
140}
141
142static size_t ipoib_get_size(const struct net_device *dev)
143{
144 return nla_total_size(2) + /* IFLA_IPOIB_PKEY */
145 nla_total_size(2) + /* IFLA_IPOIB_MODE */
146 nla_total_size(2); /* IFLA_IPOIB_UMCAST */
147}
148
149static struct rtnl_link_ops ipoib_link_ops __read_mostly = {
150 .kind = "ipoib",
151 .maxtype = IFLA_IPOIB_MAX,
152 .policy = ipoib_policy,
153 .priv_size = sizeof(struct ipoib_dev_priv),
154 .setup = ipoib_setup,
155 .newlink = ipoib_new_child_link,
156 .changelink = ipoib_changelink,
157 .dellink = ipoib_unregister_child_dev,
158 .get_size = ipoib_get_size,
159 .fill_info = ipoib_fill_info,
160};
161
162int __init ipoib_netlink_init(void)
163{
164 return rtnl_link_register(&ipoib_link_ops);
165}
166
167void __exit ipoib_netlink_fini(void)
168{
169 rtnl_link_unregister(&ipoib_link_ops);
170}
171
172MODULE_ALIAS_RTNL_LINK("ipoib");
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 8292554bccb..d7e9740c724 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -49,11 +49,47 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr,
49} 49}
50static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL); 50static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL);
51 51
52int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, 52int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
53 u16 pkey, int type)
54{ 53{
54 struct ipoib_dev_priv *ppriv, *priv;
55 char intf_name[IFNAMSIZ];
55 int result; 56 int result;
56 57
58 if (!capable(CAP_NET_ADMIN))
59 return -EPERM;
60
61 ppriv = netdev_priv(pdev);
62
63 if (!rtnl_trylock())
64 return restart_syscall();
65 mutex_lock(&ppriv->vlan_mutex);
66
67 /*
68 * First ensure this isn't a duplicate. We check the parent device and
69 * then all of the child interfaces to make sure the Pkey doesn't match.
70 */
71 if (ppriv->pkey == pkey) {
72 result = -ENOTUNIQ;
73 priv = NULL;
74 goto err;
75 }
76
77 list_for_each_entry(priv, &ppriv->child_intfs, list) {
78 if (priv->pkey == pkey) {
79 result = -ENOTUNIQ;
80 priv = NULL;
81 goto err;
82 }
83 }
84
85 snprintf(intf_name, sizeof intf_name, "%s.%04x",
86 ppriv->dev->name, pkey);
87 priv = ipoib_intf_alloc(intf_name);
88 if (!priv) {
89 result = -ENOMEM;
90 goto err;
91 }
92
57 priv->max_ib_mtu = ppriv->max_ib_mtu; 93 priv->max_ib_mtu = ppriv->max_ib_mtu;
58 /* MTU will be reset when mcast join happens */ 94 /* MTU will be reset when mcast join happens */
59 priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); 95 priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu);
@@ -88,27 +124,24 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
88 124
89 ipoib_create_debug_files(priv->dev); 125 ipoib_create_debug_files(priv->dev);
90 126
91 /* RTNL childs don't need proprietary sysfs entries */ 127 if (ipoib_cm_add_mode_attr(priv->dev))
92 if (type == IPOIB_LEGACY_CHILD) { 128 goto sysfs_failed;
93 if (ipoib_cm_add_mode_attr(priv->dev)) 129 if (ipoib_add_pkey_attr(priv->dev))
94 goto sysfs_failed; 130 goto sysfs_failed;
95 if (ipoib_add_pkey_attr(priv->dev)) 131 if (ipoib_add_umcast_attr(priv->dev))
96 goto sysfs_failed; 132 goto sysfs_failed;
97 if (ipoib_add_umcast_attr(priv->dev)) 133
98 goto sysfs_failed; 134 if (device_create_file(&priv->dev->dev, &dev_attr_parent))
99 135 goto sysfs_failed;
100 if (device_create_file(&priv->dev->dev, &dev_attr_parent))
101 goto sysfs_failed;
102 }
103 136
104 priv->child_type = type;
105 priv->dev->iflink = ppriv->dev->ifindex;
106 list_add_tail(&priv->list, &ppriv->child_intfs); 137 list_add_tail(&priv->list, &ppriv->child_intfs);
107 138
139 mutex_unlock(&ppriv->vlan_mutex);
140 rtnl_unlock();
141
108 return 0; 142 return 0;
109 143
110sysfs_failed: 144sysfs_failed:
111 result = -ENOMEM;
112 ipoib_delete_debug_files(priv->dev); 145 ipoib_delete_debug_files(priv->dev);
113 unregister_netdevice(priv->dev); 146 unregister_netdevice(priv->dev);
114 147
@@ -116,59 +149,10 @@ register_failed:
116 ipoib_dev_cleanup(priv->dev); 149 ipoib_dev_cleanup(priv->dev);
117 150
118err: 151err:
119 return result;
120}
121
122int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
123{
124 struct ipoib_dev_priv *ppriv, *priv;
125 char intf_name[IFNAMSIZ];
126 struct ipoib_dev_priv *tpriv;
127 int result;
128
129 if (!capable(CAP_NET_ADMIN))
130 return -EPERM;
131
132 ppriv = netdev_priv(pdev);
133
134 snprintf(intf_name, sizeof intf_name, "%s.%04x",
135 ppriv->dev->name, pkey);
136 priv = ipoib_intf_alloc(intf_name);
137 if (!priv)
138 return -ENOMEM;
139
140 if (!rtnl_trylock())
141 return restart_syscall();
142
143 mutex_lock(&ppriv->vlan_mutex);
144
145 /*
146 * First ensure this isn't a duplicate. We check the parent device and
147 * then all of the legacy child interfaces to make sure the Pkey
148 * doesn't match.
149 */
150 if (ppriv->pkey == pkey) {
151 result = -ENOTUNIQ;
152 goto out;
153 }
154
155 list_for_each_entry(tpriv, &ppriv->child_intfs, list) {
156 if (tpriv->pkey == pkey &&
157 tpriv->child_type == IPOIB_LEGACY_CHILD) {
158 result = -ENOTUNIQ;
159 goto out;
160 }
161 }
162
163 result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD);
164
165out:
166 mutex_unlock(&ppriv->vlan_mutex); 152 mutex_unlock(&ppriv->vlan_mutex);
167
168 if (result)
169 free_netdev(priv->dev);
170
171 rtnl_unlock(); 153 rtnl_unlock();
154 if (priv)
155 free_netdev(priv->dev);
172 156
173 return result; 157 return result;
174} 158}
@@ -187,9 +171,9 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
187 return restart_syscall(); 171 return restart_syscall();
188 mutex_lock(&ppriv->vlan_mutex); 172 mutex_lock(&ppriv->vlan_mutex);
189 list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) { 173 list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
190 if (priv->pkey == pkey && 174 if (priv->pkey == pkey) {
191 priv->child_type == IPOIB_LEGACY_CHILD) {
192 unregister_netdevice(priv->dev); 175 unregister_netdevice(priv->dev);
176 ipoib_dev_cleanup(priv->dev);
193 list_del(&priv->list); 177 list_del(&priv->list);
194 dev = priv->dev; 178 dev = priv->dev;
195 break; 179 break;
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 0ab8c9cc3a7..9c61b9c2c59 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -57,7 +57,6 @@
57#include <linux/scatterlist.h> 57#include <linux/scatterlist.h>
58#include <linux/delay.h> 58#include <linux/delay.h>
59#include <linux/slab.h> 59#include <linux/slab.h>
60#include <linux/module.h>
61 60
62#include <net/sock.h> 61#include <net/sock.h>
63 62
@@ -152,6 +151,7 @@ int iser_initialize_task_headers(struct iscsi_task *task,
152 tx_desc->tx_sg[0].length = ISER_HEADERS_LEN; 151 tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
153 tx_desc->tx_sg[0].lkey = device->mr->lkey; 152 tx_desc->tx_sg[0].lkey = device->mr->lkey;
154 153
154 iser_task->headers_initialized = 1;
155 iser_task->iser_conn = iser_conn; 155 iser_task->iser_conn = iser_conn;
156 return 0; 156 return 0;
157} 157}
@@ -166,7 +166,8 @@ iscsi_iser_task_init(struct iscsi_task *task)
166{ 166{
167 struct iscsi_iser_task *iser_task = task->dd_data; 167 struct iscsi_iser_task *iser_task = task->dd_data;
168 168
169 if (iser_initialize_task_headers(task, &iser_task->desc)) 169 if (!iser_task->headers_initialized)
170 if (iser_initialize_task_headers(task, &iser_task->desc))
170 return -ENOMEM; 171 return -ENOMEM;
171 172
172 /* mgmt task */ 173 /* mgmt task */
@@ -277,13 +278,6 @@ iscsi_iser_task_xmit(struct iscsi_task *task)
277static void iscsi_iser_cleanup_task(struct iscsi_task *task) 278static void iscsi_iser_cleanup_task(struct iscsi_task *task)
278{ 279{
279 struct iscsi_iser_task *iser_task = task->dd_data; 280 struct iscsi_iser_task *iser_task = task->dd_data;
280 struct iser_tx_desc *tx_desc = &iser_task->desc;
281
282 struct iscsi_iser_conn *iser_conn = task->conn->dd_data;
283 struct iser_device *device = iser_conn->ib_conn->device;
284
285 ib_dma_unmap_single(device->ib_device,
286 tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
287 281
288 /* mgmt tasks do not need special cleanup */ 282 /* mgmt tasks do not need special cleanup */
289 if (!task->sc) 283 if (!task->sc)
@@ -364,9 +358,6 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
364 } 358 }
365 ib_conn = ep->dd_data; 359 ib_conn = ep->dd_data;
366 360
367 if (iser_alloc_rx_descriptors(ib_conn))
368 return -ENOMEM;
369
370 /* binds the iSER connection retrieved from the previously 361 /* binds the iSER connection retrieved from the previously
371 * connected ep_handle to the iSCSI layer connection. exchanges 362 * connected ep_handle to the iSCSI layer connection. exchanges
372 * connection pointers */ 363 * connection pointers */
@@ -401,6 +392,19 @@ iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
401 iser_conn->ib_conn = NULL; 392 iser_conn->ib_conn = NULL;
402} 393}
403 394
395static int
396iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn)
397{
398 struct iscsi_conn *conn = cls_conn->dd_data;
399 int err;
400
401 err = iser_conn_set_full_featured_mode(conn);
402 if (err)
403 return err;
404
405 return iscsi_conn_start(cls_conn);
406}
407
404static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session) 408static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)
405{ 409{
406 struct Scsi_Host *shost = iscsi_session_to_shost(cls_session); 410 struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
@@ -573,9 +577,10 @@ iscsi_iser_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr,
573 577
574 err = iser_connect(ib_conn, NULL, (struct sockaddr_in *)dst_addr, 578 err = iser_connect(ib_conn, NULL, (struct sockaddr_in *)dst_addr,
575 non_blocking); 579 non_blocking);
576 if (err) 580 if (err) {
581 iscsi_destroy_endpoint(ep);
577 return ERR_PTR(err); 582 return ERR_PTR(err);
578 583 }
579 return ep; 584 return ep;
580} 585}
581 586
@@ -627,59 +632,6 @@ iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep)
627 iser_conn_terminate(ib_conn); 632 iser_conn_terminate(ib_conn);
628} 633}
629 634
630static umode_t iser_attr_is_visible(int param_type, int param)
631{
632 switch (param_type) {
633 case ISCSI_HOST_PARAM:
634 switch (param) {
635 case ISCSI_HOST_PARAM_NETDEV_NAME:
636 case ISCSI_HOST_PARAM_HWADDRESS:
637 case ISCSI_HOST_PARAM_INITIATOR_NAME:
638 return S_IRUGO;
639 default:
640 return 0;
641 }
642 case ISCSI_PARAM:
643 switch (param) {
644 case ISCSI_PARAM_MAX_RECV_DLENGTH:
645 case ISCSI_PARAM_MAX_XMIT_DLENGTH:
646 case ISCSI_PARAM_HDRDGST_EN:
647 case ISCSI_PARAM_DATADGST_EN:
648 case ISCSI_PARAM_CONN_ADDRESS:
649 case ISCSI_PARAM_CONN_PORT:
650 case ISCSI_PARAM_EXP_STATSN:
651 case ISCSI_PARAM_PERSISTENT_ADDRESS:
652 case ISCSI_PARAM_PERSISTENT_PORT:
653 case ISCSI_PARAM_PING_TMO:
654 case ISCSI_PARAM_RECV_TMO:
655 case ISCSI_PARAM_INITIAL_R2T_EN:
656 case ISCSI_PARAM_MAX_R2T:
657 case ISCSI_PARAM_IMM_DATA_EN:
658 case ISCSI_PARAM_FIRST_BURST:
659 case ISCSI_PARAM_MAX_BURST:
660 case ISCSI_PARAM_PDU_INORDER_EN:
661 case ISCSI_PARAM_DATASEQ_INORDER_EN:
662 case ISCSI_PARAM_TARGET_NAME:
663 case ISCSI_PARAM_TPGT:
664 case ISCSI_PARAM_USERNAME:
665 case ISCSI_PARAM_PASSWORD:
666 case ISCSI_PARAM_USERNAME_IN:
667 case ISCSI_PARAM_PASSWORD_IN:
668 case ISCSI_PARAM_FAST_ABORT:
669 case ISCSI_PARAM_ABORT_TMO:
670 case ISCSI_PARAM_LU_RESET_TMO:
671 case ISCSI_PARAM_TGT_RESET_TMO:
672 case ISCSI_PARAM_IFACE_NAME:
673 case ISCSI_PARAM_INITIATOR_NAME:
674 return S_IRUGO;
675 default:
676 return 0;
677 }
678 }
679
680 return 0;
681}
682
683static struct scsi_host_template iscsi_iser_sht = { 635static struct scsi_host_template iscsi_iser_sht = {
684 .module = THIS_MODULE, 636 .module = THIS_MODULE,
685 .name = "iSCSI Initiator over iSER, v." DRV_VER, 637 .name = "iSCSI Initiator over iSER, v." DRV_VER,
@@ -701,6 +653,32 @@ static struct iscsi_transport iscsi_iser_transport = {
701 .owner = THIS_MODULE, 653 .owner = THIS_MODULE,
702 .name = "iser", 654 .name = "iser",
703 .caps = CAP_RECOVERY_L0 | CAP_MULTI_R2T, 655 .caps = CAP_RECOVERY_L0 | CAP_MULTI_R2T,
656 .param_mask = ISCSI_MAX_RECV_DLENGTH |
657 ISCSI_MAX_XMIT_DLENGTH |
658 ISCSI_HDRDGST_EN |
659 ISCSI_DATADGST_EN |
660 ISCSI_INITIAL_R2T_EN |
661 ISCSI_MAX_R2T |
662 ISCSI_IMM_DATA_EN |
663 ISCSI_FIRST_BURST |
664 ISCSI_MAX_BURST |
665 ISCSI_PDU_INORDER_EN |
666 ISCSI_DATASEQ_INORDER_EN |
667 ISCSI_CONN_PORT |
668 ISCSI_CONN_ADDRESS |
669 ISCSI_EXP_STATSN |
670 ISCSI_PERSISTENT_PORT |
671 ISCSI_PERSISTENT_ADDRESS |
672 ISCSI_TARGET_NAME | ISCSI_TPGT |
673 ISCSI_USERNAME | ISCSI_PASSWORD |
674 ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN |
675 ISCSI_FAST_ABORT | ISCSI_ABORT_TMO |
676 ISCSI_LU_RESET_TMO | ISCSI_TGT_RESET_TMO |
677 ISCSI_PING_TMO | ISCSI_RECV_TMO |
678 ISCSI_IFACE_NAME | ISCSI_INITIATOR_NAME,
679 .host_param_mask = ISCSI_HOST_HWADDRESS |
680 ISCSI_HOST_NETDEV_NAME |
681 ISCSI_HOST_INITIATOR_NAME,
704 /* session management */ 682 /* session management */
705 .create_session = iscsi_iser_session_create, 683 .create_session = iscsi_iser_session_create,
706 .destroy_session = iscsi_iser_session_destroy, 684 .destroy_session = iscsi_iser_session_destroy,
@@ -708,12 +686,11 @@ static struct iscsi_transport iscsi_iser_transport = {
708 .create_conn = iscsi_iser_conn_create, 686 .create_conn = iscsi_iser_conn_create,
709 .bind_conn = iscsi_iser_conn_bind, 687 .bind_conn = iscsi_iser_conn_bind,
710 .destroy_conn = iscsi_iser_conn_destroy, 688 .destroy_conn = iscsi_iser_conn_destroy,
711 .attr_is_visible = iser_attr_is_visible,
712 .set_param = iscsi_iser_set_param, 689 .set_param = iscsi_iser_set_param,
713 .get_conn_param = iscsi_conn_get_param, 690 .get_conn_param = iscsi_conn_get_param,
714 .get_ep_param = iscsi_iser_get_ep_param, 691 .get_ep_param = iscsi_iser_get_ep_param,
715 .get_session_param = iscsi_session_get_param, 692 .get_session_param = iscsi_session_get_param,
716 .start_conn = iscsi_conn_start, 693 .start_conn = iscsi_iser_conn_start,
717 .stop_conn = iscsi_iser_conn_stop, 694 .stop_conn = iscsi_iser_conn_stop,
718 /* iscsi host params */ 695 /* iscsi host params */
719 .get_host_param = iscsi_host_get_param, 696 .get_host_param = iscsi_host_get_param,
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index ef7d3be46c3..db6f3ce9f3b 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -177,7 +177,6 @@ struct iser_data_buf {
177 177
178/* fwd declarations */ 178/* fwd declarations */
179struct iser_device; 179struct iser_device;
180struct iser_cq_desc;
181struct iscsi_iser_conn; 180struct iscsi_iser_conn;
182struct iscsi_iser_task; 181struct iscsi_iser_task;
183struct iscsi_endpoint; 182struct iscsi_endpoint;
@@ -227,21 +226,16 @@ struct iser_rx_desc {
227 char pad[ISER_RX_PAD_SIZE]; 226 char pad[ISER_RX_PAD_SIZE];
228} __attribute__((packed)); 227} __attribute__((packed));
229 228
230#define ISER_MAX_CQ 4
231
232struct iser_device { 229struct iser_device {
233 struct ib_device *ib_device; 230 struct ib_device *ib_device;
234 struct ib_pd *pd; 231 struct ib_pd *pd;
235 struct ib_cq *rx_cq[ISER_MAX_CQ]; 232 struct ib_cq *rx_cq;
236 struct ib_cq *tx_cq[ISER_MAX_CQ]; 233 struct ib_cq *tx_cq;
237 struct ib_mr *mr; 234 struct ib_mr *mr;
238 struct tasklet_struct cq_tasklet[ISER_MAX_CQ]; 235 struct tasklet_struct cq_tasklet;
239 struct ib_event_handler event_handler; 236 struct ib_event_handler event_handler;
240 struct list_head ig_list; /* entry in ig devices list */ 237 struct list_head ig_list; /* entry in ig devices list */
241 int refcount; 238 int refcount;
242 int cq_active_qps[ISER_MAX_CQ];
243 int cqs_used;
244 struct iser_cq_desc *cq_desc;
245}; 239};
246 240
247struct iser_conn { 241struct iser_conn {
@@ -263,8 +257,7 @@ struct iser_conn {
263 struct list_head conn_list; /* entry in ig conn list */ 257 struct list_head conn_list; /* entry in ig conn list */
264 258
265 char *login_buf; 259 char *login_buf;
266 char *login_req_buf, *login_resp_buf; 260 u64 login_dma;
267 u64 login_req_dma, login_resp_dma;
268 unsigned int rx_desc_head; 261 unsigned int rx_desc_head;
269 struct iser_rx_desc *rx_descs; 262 struct iser_rx_desc *rx_descs;
270 struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; 263 struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX];
@@ -284,6 +277,7 @@ struct iscsi_iser_task {
284 struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */ 277 struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */
285 struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/ 278 struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/
286 struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */ 279 struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */
280 int headers_initialized;
287}; 281};
288 282
289struct iser_page_vec { 283struct iser_page_vec {
@@ -293,11 +287,6 @@ struct iser_page_vec {
293 int data_size; 287 int data_size;
294}; 288};
295 289
296struct iser_cq_desc {
297 struct iser_device *device;
298 int cq_index;
299};
300
301struct iser_global { 290struct iser_global {
302 struct mutex device_list_mutex;/* */ 291 struct mutex device_list_mutex;/* */
303 struct list_head device_list; /* all iSER devices */ 292 struct list_head device_list; /* all iSER devices */
@@ -377,5 +366,4 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
377void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task); 366void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task);
378int iser_initialize_task_headers(struct iscsi_task *task, 367int iser_initialize_task_headers(struct iscsi_task *task,
379 struct iser_tx_desc *tx_desc); 368 struct iser_tx_desc *tx_desc);
380int iser_alloc_rx_descriptors(struct iser_conn *ib_conn);
381#endif 369#endif
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index a00ccd1ca33..f299de6b419 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -170,7 +170,7 @@ static void iser_create_send_desc(struct iser_conn *ib_conn,
170} 170}
171 171
172 172
173int iser_alloc_rx_descriptors(struct iser_conn *ib_conn) 173static int iser_alloc_rx_descriptors(struct iser_conn *ib_conn)
174{ 174{
175 int i, j; 175 int i, j;
176 u64 dma_addr; 176 u64 dma_addr;
@@ -220,6 +220,12 @@ void iser_free_rx_descriptors(struct iser_conn *ib_conn)
220 struct iser_rx_desc *rx_desc; 220 struct iser_rx_desc *rx_desc;
221 struct iser_device *device = ib_conn->device; 221 struct iser_device *device = ib_conn->device;
222 222
223 if (ib_conn->login_buf) {
224 ib_dma_unmap_single(device->ib_device, ib_conn->login_dma,
225 ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
226 kfree(ib_conn->login_buf);
227 }
228
223 if (!ib_conn->rx_descs) 229 if (!ib_conn->rx_descs)
224 return; 230 return;
225 231
@@ -230,24 +236,23 @@ void iser_free_rx_descriptors(struct iser_conn *ib_conn)
230 kfree(ib_conn->rx_descs); 236 kfree(ib_conn->rx_descs);
231} 237}
232 238
233static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) 239/**
240 * iser_conn_set_full_featured_mode - (iSER API)
241 */
242int iser_conn_set_full_featured_mode(struct iscsi_conn *conn)
234{ 243{
235 struct iscsi_iser_conn *iser_conn = conn->dd_data; 244 struct iscsi_iser_conn *iser_conn = conn->dd_data;
236 245
237 iser_dbg("req op %x flags %x\n", req->opcode, req->flags); 246 iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX);
238 /* check if this is the last login - going to full feature phase */
239 if ((req->flags & ISCSI_FULL_FEATURE_PHASE) != ISCSI_FULL_FEATURE_PHASE)
240 return 0;
241 247
242 /* 248 /* Check that there is no posted recv or send buffers left - */
243 * Check that there is one posted recv buffer (for the last login 249 /* they must be consumed during the login phase */
244 * response) and no posted send buffers left - they must have been 250 BUG_ON(iser_conn->ib_conn->post_recv_buf_count != 0);
245 * consumed during previous login phases. 251 BUG_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0);
246 */ 252
247 WARN_ON(iser_conn->ib_conn->post_recv_buf_count != 1); 253 if (iser_alloc_rx_descriptors(iser_conn->ib_conn))
248 WARN_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0); 254 return -ENOMEM;
249 255
250 iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX);
251 /* Initial post receive buffers */ 256 /* Initial post receive buffers */
252 if (iser_post_recvm(iser_conn->ib_conn, ISER_MIN_POSTED_RX)) 257 if (iser_post_recvm(iser_conn->ib_conn, ISER_MIN_POSTED_RX))
253 return -ENOMEM; 258 return -ENOMEM;
@@ -389,7 +394,6 @@ int iser_send_control(struct iscsi_conn *conn,
389 unsigned long data_seg_len; 394 unsigned long data_seg_len;
390 int err = 0; 395 int err = 0;
391 struct iser_device *device; 396 struct iser_device *device;
392 struct iser_conn *ib_conn = iser_conn->ib_conn;
393 397
394 /* build the tx desc regd header and add it to the tx desc dto */ 398 /* build the tx desc regd header and add it to the tx desc dto */
395 mdesc->type = ISCSI_TX_CONTROL; 399 mdesc->type = ISCSI_TX_CONTROL;
@@ -405,19 +409,9 @@ int iser_send_control(struct iscsi_conn *conn,
405 iser_err("data present on non login task!!!\n"); 409 iser_err("data present on non login task!!!\n");
406 goto send_control_error; 410 goto send_control_error;
407 } 411 }
408 412 memcpy(iser_conn->ib_conn->login_buf, task->data,
409 ib_dma_sync_single_for_cpu(device->ib_device,
410 ib_conn->login_req_dma, task->data_count,
411 DMA_TO_DEVICE);
412
413 memcpy(iser_conn->ib_conn->login_req_buf, task->data,
414 task->data_count); 413 task->data_count);
415 414 tx_dsg->addr = iser_conn->ib_conn->login_dma;
416 ib_dma_sync_single_for_device(device->ib_device,
417 ib_conn->login_req_dma, task->data_count,
418 DMA_TO_DEVICE);
419
420 tx_dsg->addr = iser_conn->ib_conn->login_req_dma;
421 tx_dsg->length = task->data_count; 415 tx_dsg->length = task->data_count;
422 tx_dsg->lkey = device->mr->lkey; 416 tx_dsg->lkey = device->mr->lkey;
423 mdesc->num_sge = 2; 417 mdesc->num_sge = 2;
@@ -427,9 +421,6 @@ int iser_send_control(struct iscsi_conn *conn,
427 err = iser_post_recvl(iser_conn->ib_conn); 421 err = iser_post_recvl(iser_conn->ib_conn);
428 if (err) 422 if (err)
429 goto send_control_error; 423 goto send_control_error;
430 err = iser_post_rx_bufs(conn, task->hdr);
431 if (err)
432 goto send_control_error;
433 } 424 }
434 425
435 err = iser_post_send(iser_conn->ib_conn, mdesc); 426 err = iser_post_send(iser_conn->ib_conn, mdesc);
@@ -454,8 +445,8 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
454 int rx_buflen, outstanding, count, err; 445 int rx_buflen, outstanding, count, err;
455 446
456 /* differentiate between login to all other PDUs */ 447 /* differentiate between login to all other PDUs */
457 if ((char *)rx_desc == ib_conn->login_resp_buf) { 448 if ((char *)rx_desc == ib_conn->login_buf) {
458 rx_dma = ib_conn->login_resp_dma; 449 rx_dma = ib_conn->login_dma;
459 rx_buflen = ISER_RX_LOGIN_SIZE; 450 rx_buflen = ISER_RX_LOGIN_SIZE;
460 } else { 451 } else {
461 rx_dma = rx_desc->dma_addr; 452 rx_dma = rx_desc->dma_addr;
@@ -482,7 +473,7 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
482 * for the posted rx bufs refcount to become zero handles everything */ 473 * for the posted rx bufs refcount to become zero handles everything */
483 conn->ib_conn->post_recv_buf_count--; 474 conn->ib_conn->post_recv_buf_count--;
484 475
485 if (rx_dma == ib_conn->login_resp_dma) 476 if (rx_dma == ib_conn->login_dma)
486 return; 477 return;
487 478
488 outstanding = ib_conn->post_recv_buf_count; 479 outstanding = ib_conn->post_recv_buf_count;
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 2033a928d34..fb88d6896b6 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -73,11 +73,11 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
73 73
74 p = mem; 74 p = mem;
75 for_each_sg(sgl, sg, data->size, i) { 75 for_each_sg(sgl, sg, data->size, i) {
76 from = kmap_atomic(sg_page(sg)); 76 from = kmap_atomic(sg_page(sg), KM_USER0);
77 memcpy(p, 77 memcpy(p,
78 from + sg->offset, 78 from + sg->offset,
79 sg->length); 79 sg->length);
80 kunmap_atomic(from); 80 kunmap_atomic(from, KM_USER0);
81 p += sg->length; 81 p += sg->length;
82 } 82 }
83 } 83 }
@@ -133,11 +133,11 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
133 133
134 p = mem; 134 p = mem;
135 for_each_sg(sgl, sg, sg_size, i) { 135 for_each_sg(sgl, sg, sg_size, i) {
136 to = kmap_atomic(sg_page(sg)); 136 to = kmap_atomic(sg_page(sg), KM_SOFTIRQ0);
137 memcpy(to + sg->offset, 137 memcpy(to + sg->offset,
138 p, 138 p,
139 sg->length); 139 sg->length);
140 kunmap_atomic(to); 140 kunmap_atomic(to, KM_SOFTIRQ0);
141 p += sg->length; 141 p += sg->length;
142 } 142 }
143 } 143 }
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 95a49affee4..ede1475bee0 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -70,50 +70,32 @@ static void iser_event_handler(struct ib_event_handler *handler,
70 */ 70 */
71static int iser_create_device_ib_res(struct iser_device *device) 71static int iser_create_device_ib_res(struct iser_device *device)
72{ 72{
73 int i, j;
74 struct iser_cq_desc *cq_desc;
75
76 device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors);
77 iser_err("using %d CQs, device %s supports %d vectors\n", device->cqs_used,
78 device->ib_device->name, device->ib_device->num_comp_vectors);
79
80 device->cq_desc = kmalloc(sizeof(struct iser_cq_desc) * device->cqs_used,
81 GFP_KERNEL);
82 if (device->cq_desc == NULL)
83 goto cq_desc_err;
84 cq_desc = device->cq_desc;
85
86 device->pd = ib_alloc_pd(device->ib_device); 73 device->pd = ib_alloc_pd(device->ib_device);
87 if (IS_ERR(device->pd)) 74 if (IS_ERR(device->pd))
88 goto pd_err; 75 goto pd_err;
89 76
90 for (i = 0; i < device->cqs_used; i++) { 77 device->rx_cq = ib_create_cq(device->ib_device,
91 cq_desc[i].device = device; 78 iser_cq_callback,
92 cq_desc[i].cq_index = i; 79 iser_cq_event_callback,
80 (void *)device,
81 ISER_MAX_RX_CQ_LEN, 0);
82 if (IS_ERR(device->rx_cq))
83 goto rx_cq_err;
93 84
94 device->rx_cq[i] = ib_create_cq(device->ib_device, 85 device->tx_cq = ib_create_cq(device->ib_device,
95 iser_cq_callback, 86 NULL, iser_cq_event_callback,
96 iser_cq_event_callback, 87 (void *)device,
97 (void *)&cq_desc[i], 88 ISER_MAX_TX_CQ_LEN, 0);
98 ISER_MAX_RX_CQ_LEN, i);
99 if (IS_ERR(device->rx_cq[i]))
100 goto cq_err;
101 89
102 device->tx_cq[i] = ib_create_cq(device->ib_device, 90 if (IS_ERR(device->tx_cq))
103 NULL, iser_cq_event_callback, 91 goto tx_cq_err;
104 (void *)&cq_desc[i],
105 ISER_MAX_TX_CQ_LEN, i);
106 92
107 if (IS_ERR(device->tx_cq[i])) 93 if (ib_req_notify_cq(device->rx_cq, IB_CQ_NEXT_COMP))
108 goto cq_err; 94 goto cq_arm_err;
109 95
110 if (ib_req_notify_cq(device->rx_cq[i], IB_CQ_NEXT_COMP)) 96 tasklet_init(&device->cq_tasklet,
111 goto cq_err; 97 iser_cq_tasklet_fn,
112 98 (unsigned long)device);
113 tasklet_init(&device->cq_tasklet[i],
114 iser_cq_tasklet_fn,
115 (unsigned long)&cq_desc[i]);
116 }
117 99
118 device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE | 100 device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
119 IB_ACCESS_REMOTE_WRITE | 101 IB_ACCESS_REMOTE_WRITE |
@@ -131,19 +113,14 @@ static int iser_create_device_ib_res(struct iser_device *device)
131handler_err: 113handler_err:
132 ib_dereg_mr(device->mr); 114 ib_dereg_mr(device->mr);
133dma_mr_err: 115dma_mr_err:
134 for (j = 0; j < device->cqs_used; j++) 116 tasklet_kill(&device->cq_tasklet);
135 tasklet_kill(&device->cq_tasklet[j]); 117cq_arm_err:
136cq_err: 118 ib_destroy_cq(device->tx_cq);
137 for (j = 0; j < i; j++) { 119tx_cq_err:
138 if (device->tx_cq[j]) 120 ib_destroy_cq(device->rx_cq);
139 ib_destroy_cq(device->tx_cq[j]); 121rx_cq_err:
140 if (device->rx_cq[j])
141 ib_destroy_cq(device->rx_cq[j]);
142 }
143 ib_dealloc_pd(device->pd); 122 ib_dealloc_pd(device->pd);
144pd_err: 123pd_err:
145 kfree(device->cq_desc);
146cq_desc_err:
147 iser_err("failed to allocate an IB resource\n"); 124 iser_err("failed to allocate an IB resource\n");
148 return -1; 125 return -1;
149} 126}
@@ -154,24 +131,18 @@ cq_desc_err:
154 */ 131 */
155static void iser_free_device_ib_res(struct iser_device *device) 132static void iser_free_device_ib_res(struct iser_device *device)
156{ 133{
157 int i;
158 BUG_ON(device->mr == NULL); 134 BUG_ON(device->mr == NULL);
159 135
160 for (i = 0; i < device->cqs_used; i++) { 136 tasklet_kill(&device->cq_tasklet);
161 tasklet_kill(&device->cq_tasklet[i]);
162 (void)ib_destroy_cq(device->tx_cq[i]);
163 (void)ib_destroy_cq(device->rx_cq[i]);
164 device->tx_cq[i] = NULL;
165 device->rx_cq[i] = NULL;
166 }
167
168 (void)ib_unregister_event_handler(&device->event_handler); 137 (void)ib_unregister_event_handler(&device->event_handler);
169 (void)ib_dereg_mr(device->mr); 138 (void)ib_dereg_mr(device->mr);
139 (void)ib_destroy_cq(device->tx_cq);
140 (void)ib_destroy_cq(device->rx_cq);
170 (void)ib_dealloc_pd(device->pd); 141 (void)ib_dealloc_pd(device->pd);
171 142
172 kfree(device->cq_desc);
173
174 device->mr = NULL; 143 device->mr = NULL;
144 device->tx_cq = NULL;
145 device->rx_cq = NULL;
175 device->pd = NULL; 146 device->pd = NULL;
176} 147}
177 148
@@ -184,40 +155,20 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
184{ 155{
185 struct iser_device *device; 156 struct iser_device *device;
186 struct ib_qp_init_attr init_attr; 157 struct ib_qp_init_attr init_attr;
187 int req_err, resp_err, ret = -ENOMEM; 158 int ret = -ENOMEM;
188 struct ib_fmr_pool_param params; 159 struct ib_fmr_pool_param params;
189 int index, min_index = 0;
190 160
191 BUG_ON(ib_conn->device == NULL); 161 BUG_ON(ib_conn->device == NULL);
192 162
193 device = ib_conn->device; 163 device = ib_conn->device;
194 164
195 ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN + 165 ib_conn->login_buf = kmalloc(ISER_RX_LOGIN_SIZE, GFP_KERNEL);
196 ISER_RX_LOGIN_SIZE, GFP_KERNEL);
197 if (!ib_conn->login_buf) 166 if (!ib_conn->login_buf)
198 goto out_err; 167 goto out_err;
199 168
200 ib_conn->login_req_buf = ib_conn->login_buf; 169 ib_conn->login_dma = ib_dma_map_single(ib_conn->device->ib_device,
201 ib_conn->login_resp_buf = ib_conn->login_buf + ISCSI_DEF_MAX_RECV_SEG_LEN; 170 (void *)ib_conn->login_buf, ISER_RX_LOGIN_SIZE,
202 171 DMA_FROM_DEVICE);
203 ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device,
204 (void *)ib_conn->login_req_buf,
205 ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
206
207 ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device,
208 (void *)ib_conn->login_resp_buf,
209 ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
210
211 req_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_req_dma);
212 resp_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_resp_dma);
213
214 if (req_err || resp_err) {
215 if (req_err)
216 ib_conn->login_req_dma = 0;
217 if (resp_err)
218 ib_conn->login_resp_dma = 0;
219 goto out_err;
220 }
221 172
222 ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) + 173 ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) +
223 (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)), 174 (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)),
@@ -250,20 +201,10 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
250 201
251 memset(&init_attr, 0, sizeof init_attr); 202 memset(&init_attr, 0, sizeof init_attr);
252 203
253 mutex_lock(&ig.connlist_mutex);
254 /* select the CQ with the minimal number of usages */
255 for (index = 0; index < device->cqs_used; index++)
256 if (device->cq_active_qps[index] <
257 device->cq_active_qps[min_index])
258 min_index = index;
259 device->cq_active_qps[min_index]++;
260 mutex_unlock(&ig.connlist_mutex);
261 iser_err("cq index %d used for ib_conn %p\n", min_index, ib_conn);
262
263 init_attr.event_handler = iser_qp_event_callback; 204 init_attr.event_handler = iser_qp_event_callback;
264 init_attr.qp_context = (void *)ib_conn; 205 init_attr.qp_context = (void *)ib_conn;
265 init_attr.send_cq = device->tx_cq[min_index]; 206 init_attr.send_cq = device->tx_cq;
266 init_attr.recv_cq = device->rx_cq[min_index]; 207 init_attr.recv_cq = device->rx_cq;
267 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; 208 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS;
268 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; 209 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS;
269 init_attr.cap.max_send_sge = 2; 210 init_attr.cap.max_send_sge = 2;
@@ -292,7 +233,6 @@ out_err:
292 */ 233 */
293static int iser_free_ib_conn_res(struct iser_conn *ib_conn, int can_destroy_id) 234static int iser_free_ib_conn_res(struct iser_conn *ib_conn, int can_destroy_id)
294{ 235{
295 int cq_index;
296 BUG_ON(ib_conn == NULL); 236 BUG_ON(ib_conn == NULL);
297 237
298 iser_err("freeing conn %p cma_id %p fmr pool %p qp %p\n", 238 iser_err("freeing conn %p cma_id %p fmr pool %p qp %p\n",
@@ -303,12 +243,9 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn, int can_destroy_id)
303 if (ib_conn->fmr_pool != NULL) 243 if (ib_conn->fmr_pool != NULL)
304 ib_destroy_fmr_pool(ib_conn->fmr_pool); 244 ib_destroy_fmr_pool(ib_conn->fmr_pool);
305 245
306 if (ib_conn->qp != NULL) { 246 if (ib_conn->qp != NULL)
307 cq_index = ((struct iser_cq_desc *)ib_conn->qp->recv_cq->cq_context)->cq_index;
308 ib_conn->device->cq_active_qps[cq_index]--;
309
310 rdma_destroy_qp(ib_conn->cma_id); 247 rdma_destroy_qp(ib_conn->cma_id);
311 } 248
312 /* if cma handler context, the caller acts s.t the cma destroy the id */ 249 /* if cma handler context, the caller acts s.t the cma destroy the id */
313 if (ib_conn->cma_id != NULL && can_destroy_id) 250 if (ib_conn->cma_id != NULL && can_destroy_id)
314 rdma_destroy_id(ib_conn->cma_id); 251 rdma_destroy_id(ib_conn->cma_id);
@@ -318,18 +255,6 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn, int can_destroy_id)
318 ib_conn->cma_id = NULL; 255 ib_conn->cma_id = NULL;
319 kfree(ib_conn->page_vec); 256 kfree(ib_conn->page_vec);
320 257
321 if (ib_conn->login_buf) {
322 if (ib_conn->login_req_dma)
323 ib_dma_unmap_single(ib_conn->device->ib_device,
324 ib_conn->login_req_dma,
325 ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
326 if (ib_conn->login_resp_dma)
327 ib_dma_unmap_single(ib_conn->device->ib_device,
328 ib_conn->login_resp_dma,
329 ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
330 kfree(ib_conn->login_buf);
331 }
332
333 return 0; 258 return 0;
334} 259}
335 260
@@ -657,9 +582,8 @@ id_failure:
657 ib_conn->cma_id = NULL; 582 ib_conn->cma_id = NULL;
658addr_failure: 583addr_failure:
659 ib_conn->state = ISER_CONN_DOWN; 584 ib_conn->state = ISER_CONN_DOWN;
660 iser_conn_put(ib_conn, 1); /* deref ib conn's cma id */
661connect_failure: 585connect_failure:
662 iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */ 586 iser_conn_release(ib_conn, 1);
663 return err; 587 return err;
664} 588}
665 589
@@ -734,11 +658,11 @@ int iser_post_recvl(struct iser_conn *ib_conn)
734 struct ib_sge sge; 658 struct ib_sge sge;
735 int ib_ret; 659 int ib_ret;
736 660
737 sge.addr = ib_conn->login_resp_dma; 661 sge.addr = ib_conn->login_dma;
738 sge.length = ISER_RX_LOGIN_SIZE; 662 sge.length = ISER_RX_LOGIN_SIZE;
739 sge.lkey = ib_conn->device->mr->lkey; 663 sge.lkey = ib_conn->device->mr->lkey;
740 664
741 rx_wr.wr_id = (unsigned long)ib_conn->login_resp_buf; 665 rx_wr.wr_id = (unsigned long)ib_conn->login_buf;
742 rx_wr.sg_list = &sge; 666 rx_wr.sg_list = &sge;
743 rx_wr.num_sge = 1; 667 rx_wr.num_sge = 1;
744 rx_wr.next = NULL; 668 rx_wr.next = NULL;
@@ -835,9 +759,9 @@ static void iser_handle_comp_error(struct iser_tx_desc *desc,
835 } 759 }
836} 760}
837 761
838static int iser_drain_tx_cq(struct iser_device *device, int cq_index) 762static int iser_drain_tx_cq(struct iser_device *device)
839{ 763{
840 struct ib_cq *cq = device->tx_cq[cq_index]; 764 struct ib_cq *cq = device->tx_cq;
841 struct ib_wc wc; 765 struct ib_wc wc;
842 struct iser_tx_desc *tx_desc; 766 struct iser_tx_desc *tx_desc;
843 struct iser_conn *ib_conn; 767 struct iser_conn *ib_conn;
@@ -866,10 +790,8 @@ static int iser_drain_tx_cq(struct iser_device *device, int cq_index)
866 790
867static void iser_cq_tasklet_fn(unsigned long data) 791static void iser_cq_tasklet_fn(unsigned long data)
868{ 792{
869 struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)data; 793 struct iser_device *device = (struct iser_device *)data;
870 struct iser_device *device = cq_desc->device; 794 struct ib_cq *cq = device->rx_cq;
871 int cq_index = cq_desc->cq_index;
872 struct ib_cq *cq = device->rx_cq[cq_index];
873 struct ib_wc wc; 795 struct ib_wc wc;
874 struct iser_rx_desc *desc; 796 struct iser_rx_desc *desc;
875 unsigned long xfer_len; 797 unsigned long xfer_len;
@@ -897,21 +819,19 @@ static void iser_cq_tasklet_fn(unsigned long data)
897 } 819 }
898 completed_rx++; 820 completed_rx++;
899 if (!(completed_rx & 63)) 821 if (!(completed_rx & 63))
900 completed_tx += iser_drain_tx_cq(device, cq_index); 822 completed_tx += iser_drain_tx_cq(device);
901 } 823 }
902 /* #warning "it is assumed here that arming CQ only once its empty" * 824 /* #warning "it is assumed here that arming CQ only once its empty" *
903 * " would not cause interrupts to be missed" */ 825 * " would not cause interrupts to be missed" */
904 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 826 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
905 827
906 completed_tx += iser_drain_tx_cq(device, cq_index); 828 completed_tx += iser_drain_tx_cq(device);
907 iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx); 829 iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx);
908} 830}
909 831
910static void iser_cq_callback(struct ib_cq *cq, void *cq_context) 832static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
911{ 833{
912 struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)cq_context; 834 struct iser_device *device = (struct iser_device *)cq_context;
913 struct iser_device *device = cq_desc->device;
914 int cq_index = cq_desc->cq_index;
915 835
916 tasklet_schedule(&device->cq_tasklet[cq_index]); 836 tasklet_schedule(&device->cq_tasklet);
917} 837}
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index d5088ce7829..0bfa545675b 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -30,8 +30,6 @@
30 * SOFTWARE. 30 * SOFTWARE.
31 */ 31 */
32 32
33#define pr_fmt(fmt) PFX fmt
34
35#include <linux/module.h> 33#include <linux/module.h>
36#include <linux/init.h> 34#include <linux/init.h>
37#include <linux/slab.h> 35#include <linux/slab.h>
@@ -167,7 +165,7 @@ static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
167 165
168static void srp_qp_event(struct ib_event *event, void *context) 166static void srp_qp_event(struct ib_event *event, void *context)
169{ 167{
170 pr_debug("QP event %d\n", event->event); 168 printk(KERN_ERR PFX "QP event %d\n", event->event);
171} 169}
172 170
173static int srp_init_qp(struct srp_target_port *target, 171static int srp_init_qp(struct srp_target_port *target,
@@ -222,29 +220,27 @@ static int srp_new_cm_id(struct srp_target_port *target)
222static int srp_create_target_ib(struct srp_target_port *target) 220static int srp_create_target_ib(struct srp_target_port *target)
223{ 221{
224 struct ib_qp_init_attr *init_attr; 222 struct ib_qp_init_attr *init_attr;
225 struct ib_cq *recv_cq, *send_cq;
226 struct ib_qp *qp;
227 int ret; 223 int ret;
228 224
229 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); 225 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
230 if (!init_attr) 226 if (!init_attr)
231 return -ENOMEM; 227 return -ENOMEM;
232 228
233 recv_cq = ib_create_cq(target->srp_host->srp_dev->dev, 229 target->recv_cq = ib_create_cq(target->srp_host->srp_dev->dev,
234 srp_recv_completion, NULL, target, SRP_RQ_SIZE, 0); 230 srp_recv_completion, NULL, target, SRP_RQ_SIZE, 0);
235 if (IS_ERR(recv_cq)) { 231 if (IS_ERR(target->recv_cq)) {
236 ret = PTR_ERR(recv_cq); 232 ret = PTR_ERR(target->recv_cq);
237 goto err; 233 goto err;
238 } 234 }
239 235
240 send_cq = ib_create_cq(target->srp_host->srp_dev->dev, 236 target->send_cq = ib_create_cq(target->srp_host->srp_dev->dev,
241 srp_send_completion, NULL, target, SRP_SQ_SIZE, 0); 237 srp_send_completion, NULL, target, SRP_SQ_SIZE, 0);
242 if (IS_ERR(send_cq)) { 238 if (IS_ERR(target->send_cq)) {
243 ret = PTR_ERR(send_cq); 239 ret = PTR_ERR(target->send_cq);
244 goto err_recv_cq; 240 goto err_recv_cq;
245 } 241 }
246 242
247 ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP); 243 ib_req_notify_cq(target->recv_cq, IB_CQ_NEXT_COMP);
248 244
249 init_attr->event_handler = srp_qp_event; 245 init_attr->event_handler = srp_qp_event;
250 init_attr->cap.max_send_wr = SRP_SQ_SIZE; 246 init_attr->cap.max_send_wr = SRP_SQ_SIZE;
@@ -253,41 +249,30 @@ static int srp_create_target_ib(struct srp_target_port *target)
253 init_attr->cap.max_send_sge = 1; 249 init_attr->cap.max_send_sge = 1;
254 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; 250 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
255 init_attr->qp_type = IB_QPT_RC; 251 init_attr->qp_type = IB_QPT_RC;
256 init_attr->send_cq = send_cq; 252 init_attr->send_cq = target->send_cq;
257 init_attr->recv_cq = recv_cq; 253 init_attr->recv_cq = target->recv_cq;
258 254
259 qp = ib_create_qp(target->srp_host->srp_dev->pd, init_attr); 255 target->qp = ib_create_qp(target->srp_host->srp_dev->pd, init_attr);
260 if (IS_ERR(qp)) { 256 if (IS_ERR(target->qp)) {
261 ret = PTR_ERR(qp); 257 ret = PTR_ERR(target->qp);
262 goto err_send_cq; 258 goto err_send_cq;
263 } 259 }
264 260
265 ret = srp_init_qp(target, qp); 261 ret = srp_init_qp(target, target->qp);
266 if (ret) 262 if (ret)
267 goto err_qp; 263 goto err_qp;
268 264
269 if (target->qp)
270 ib_destroy_qp(target->qp);
271 if (target->recv_cq)
272 ib_destroy_cq(target->recv_cq);
273 if (target->send_cq)
274 ib_destroy_cq(target->send_cq);
275
276 target->qp = qp;
277 target->recv_cq = recv_cq;
278 target->send_cq = send_cq;
279
280 kfree(init_attr); 265 kfree(init_attr);
281 return 0; 266 return 0;
282 267
283err_qp: 268err_qp:
284 ib_destroy_qp(qp); 269 ib_destroy_qp(target->qp);
285 270
286err_send_cq: 271err_send_cq:
287 ib_destroy_cq(send_cq); 272 ib_destroy_cq(target->send_cq);
288 273
289err_recv_cq: 274err_recv_cq:
290 ib_destroy_cq(recv_cq); 275 ib_destroy_cq(target->recv_cq);
291 276
292err: 277err:
293 kfree(init_attr); 278 kfree(init_attr);
@@ -302,9 +287,6 @@ static void srp_free_target_ib(struct srp_target_port *target)
302 ib_destroy_cq(target->send_cq); 287 ib_destroy_cq(target->send_cq);
303 ib_destroy_cq(target->recv_cq); 288 ib_destroy_cq(target->recv_cq);
304 289
305 target->qp = NULL;
306 target->send_cq = target->recv_cq = NULL;
307
308 for (i = 0; i < SRP_RQ_SIZE; ++i) 290 for (i = 0; i < SRP_RQ_SIZE; ++i)
309 srp_free_iu(target->srp_host, target->rx_ring[i]); 291 srp_free_iu(target->srp_host, target->rx_ring[i]);
310 for (i = 0; i < SRP_SQ_SIZE; ++i) 292 for (i = 0; i < SRP_SQ_SIZE; ++i)
@@ -444,50 +426,34 @@ static int srp_send_req(struct srp_target_port *target)
444 return status; 426 return status;
445} 427}
446 428
447static bool srp_queue_remove_work(struct srp_target_port *target) 429static void srp_disconnect_target(struct srp_target_port *target)
448{ 430{
449 bool changed = false; 431 /* XXX should send SRP_I_LOGOUT request */
450 432
451 spin_lock_irq(&target->lock); 433 init_completion(&target->done);
452 if (target->state != SRP_TARGET_REMOVED) { 434 if (ib_send_cm_dreq(target->cm_id, NULL, 0)) {
453 target->state = SRP_TARGET_REMOVED; 435 shost_printk(KERN_DEBUG, target->scsi_host,
454 changed = true; 436 PFX "Sending CM DREQ failed\n");
437 return;
455 } 438 }
456 spin_unlock_irq(&target->lock); 439 wait_for_completion(&target->done);
457
458 if (changed)
459 queue_work(system_long_wq, &target->remove_work);
460
461 return changed;
462} 440}
463 441
464static bool srp_change_conn_state(struct srp_target_port *target, 442static bool srp_change_state(struct srp_target_port *target,
465 bool connected) 443 enum srp_target_state old,
444 enum srp_target_state new)
466{ 445{
467 bool changed = false; 446 bool changed = false;
468 447
469 spin_lock_irq(&target->lock); 448 spin_lock_irq(&target->lock);
470 if (target->connected != connected) { 449 if (target->state == old) {
471 target->connected = connected; 450 target->state = new;
472 changed = true; 451 changed = true;
473 } 452 }
474 spin_unlock_irq(&target->lock); 453 spin_unlock_irq(&target->lock);
475
476 return changed; 454 return changed;
477} 455}
478 456
479static void srp_disconnect_target(struct srp_target_port *target)
480{
481 if (srp_change_conn_state(target, false)) {
482 /* XXX should send SRP_I_LOGOUT request */
483
484 if (ib_send_cm_dreq(target->cm_id, NULL, 0)) {
485 shost_printk(KERN_DEBUG, target->scsi_host,
486 PFX "Sending CM DREQ failed\n");
487 }
488 }
489}
490
491static void srp_free_req_data(struct srp_target_port *target) 457static void srp_free_req_data(struct srp_target_port *target)
492{ 458{
493 struct ib_device *ibdev = target->srp_host->srp_dev->dev; 459 struct ib_device *ibdev = target->srp_host->srp_dev->dev;
@@ -506,65 +472,31 @@ static void srp_free_req_data(struct srp_target_port *target)
506 } 472 }
507} 473}
508 474
509/** 475static void srp_remove_work(struct work_struct *work)
510 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
511 * @shost: SCSI host whose attributes to remove from sysfs.
512 *
513 * Note: Any attributes defined in the host template and that did not exist
514 * before invocation of this function will be ignored.
515 */
516static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
517{ 476{
518 struct device_attribute **attr; 477 struct srp_target_port *target =
478 container_of(work, struct srp_target_port, work);
519 479
520 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr) 480 if (!srp_change_state(target, SRP_TARGET_DEAD, SRP_TARGET_REMOVED))
521 device_remove_file(&shost->shost_dev, *attr); 481 return;
522}
523 482
524static void srp_remove_target(struct srp_target_port *target) 483 spin_lock(&target->srp_host->target_lock);
525{ 484 list_del(&target->list);
526 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); 485 spin_unlock(&target->srp_host->target_lock);
527 486
528 srp_del_scsi_host_attr(target->scsi_host);
529 srp_remove_host(target->scsi_host); 487 srp_remove_host(target->scsi_host);
530 scsi_remove_host(target->scsi_host); 488 scsi_remove_host(target->scsi_host);
531 srp_disconnect_target(target);
532 ib_destroy_cm_id(target->cm_id); 489 ib_destroy_cm_id(target->cm_id);
533 srp_free_target_ib(target); 490 srp_free_target_ib(target);
534 srp_free_req_data(target); 491 srp_free_req_data(target);
535 scsi_host_put(target->scsi_host); 492 scsi_host_put(target->scsi_host);
536} 493}
537 494
538static void srp_remove_work(struct work_struct *work)
539{
540 struct srp_target_port *target =
541 container_of(work, struct srp_target_port, remove_work);
542
543 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
544
545 spin_lock(&target->srp_host->target_lock);
546 list_del(&target->list);
547 spin_unlock(&target->srp_host->target_lock);
548
549 srp_remove_target(target);
550}
551
552static void srp_rport_delete(struct srp_rport *rport)
553{
554 struct srp_target_port *target = rport->lld_data;
555
556 srp_queue_remove_work(target);
557}
558
559static int srp_connect_target(struct srp_target_port *target) 495static int srp_connect_target(struct srp_target_port *target)
560{ 496{
561 int retries = 3; 497 int retries = 3;
562 int ret; 498 int ret;
563 499
564 WARN_ON_ONCE(target->connected);
565
566 target->qp_in_error = false;
567
568 ret = srp_lookup_path(target); 500 ret = srp_lookup_path(target);
569 if (ret) 501 if (ret)
570 return ret; 502 return ret;
@@ -584,7 +516,6 @@ static int srp_connect_target(struct srp_target_port *target)
584 */ 516 */
585 switch (target->status) { 517 switch (target->status) {
586 case 0: 518 case 0:
587 srp_change_conn_state(target, true);
588 return 0; 519 return 0;
589 520
590 case SRP_PORT_REDIRECT: 521 case SRP_PORT_REDIRECT:
@@ -637,74 +568,35 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
637 scmnd->sc_data_direction); 568 scmnd->sc_data_direction);
638} 569}
639 570
640/** 571static void srp_remove_req(struct srp_target_port *target,
641 * srp_claim_req - Take ownership of the scmnd associated with a request. 572 struct srp_request *req, s32 req_lim_delta)
642 * @target: SRP target port.
643 * @req: SRP request.
644 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
645 * ownership of @req->scmnd if it equals @scmnd.
646 *
647 * Return value:
648 * Either NULL or a pointer to the SCSI command the caller became owner of.
649 */
650static struct scsi_cmnd *srp_claim_req(struct srp_target_port *target,
651 struct srp_request *req,
652 struct scsi_cmnd *scmnd)
653{ 573{
654 unsigned long flags; 574 unsigned long flags;
655 575
656 spin_lock_irqsave(&target->lock, flags); 576 srp_unmap_data(req->scmnd, target, req);
657 if (!scmnd) {
658 scmnd = req->scmnd;
659 req->scmnd = NULL;
660 } else if (req->scmnd == scmnd) {
661 req->scmnd = NULL;
662 } else {
663 scmnd = NULL;
664 }
665 spin_unlock_irqrestore(&target->lock, flags);
666
667 return scmnd;
668}
669
670/**
671 * srp_free_req() - Unmap data and add request to the free request list.
672 */
673static void srp_free_req(struct srp_target_port *target,
674 struct srp_request *req, struct scsi_cmnd *scmnd,
675 s32 req_lim_delta)
676{
677 unsigned long flags;
678
679 srp_unmap_data(scmnd, target, req);
680
681 spin_lock_irqsave(&target->lock, flags); 577 spin_lock_irqsave(&target->lock, flags);
682 target->req_lim += req_lim_delta; 578 target->req_lim += req_lim_delta;
579 req->scmnd = NULL;
683 list_add_tail(&req->list, &target->free_reqs); 580 list_add_tail(&req->list, &target->free_reqs);
684 spin_unlock_irqrestore(&target->lock, flags); 581 spin_unlock_irqrestore(&target->lock, flags);
685} 582}
686 583
687static void srp_reset_req(struct srp_target_port *target, struct srp_request *req) 584static void srp_reset_req(struct srp_target_port *target, struct srp_request *req)
688{ 585{
689 struct scsi_cmnd *scmnd = srp_claim_req(target, req, NULL); 586 req->scmnd->result = DID_RESET << 16;
690 587 req->scmnd->scsi_done(req->scmnd);
691 if (scmnd) { 588 srp_remove_req(target, req, 0);
692 srp_free_req(target, req, scmnd, 0);
693 scmnd->result = DID_RESET << 16;
694 scmnd->scsi_done(scmnd);
695 }
696} 589}
697 590
698static int srp_reconnect_target(struct srp_target_port *target) 591static int srp_reconnect_target(struct srp_target_port *target)
699{ 592{
700 struct Scsi_Host *shost = target->scsi_host; 593 struct ib_qp_attr qp_attr;
594 struct ib_wc wc;
701 int i, ret; 595 int i, ret;
702 596
703 if (target->state != SRP_TARGET_LIVE) 597 if (!srp_change_state(target, SRP_TARGET_LIVE, SRP_TARGET_CONNECTING))
704 return -EAGAIN; 598 return -EAGAIN;
705 599
706 scsi_target_block(&shost->shost_gendev);
707
708 srp_disconnect_target(target); 600 srp_disconnect_target(target);
709 /* 601 /*
710 * Now get a new local CM ID so that we avoid confusing the 602 * Now get a new local CM ID so that we avoid confusing the
@@ -712,11 +604,21 @@ static int srp_reconnect_target(struct srp_target_port *target)
712 */ 604 */
713 ret = srp_new_cm_id(target); 605 ret = srp_new_cm_id(target);
714 if (ret) 606 if (ret)
715 goto unblock; 607 goto err;
716 608
717 ret = srp_create_target_ib(target); 609 qp_attr.qp_state = IB_QPS_RESET;
610 ret = ib_modify_qp(target->qp, &qp_attr, IB_QP_STATE);
611 if (ret)
612 goto err;
613
614 ret = srp_init_qp(target, target->qp);
718 if (ret) 615 if (ret)
719 goto unblock; 616 goto err;
617
618 while (ib_poll_cq(target->recv_cq, 1, &wc) > 0)
619 ; /* nothing */
620 while (ib_poll_cq(target->send_cq, 1, &wc) > 0)
621 ; /* nothing */
720 622
721 for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { 623 for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) {
722 struct srp_request *req = &target->req_ring[i]; 624 struct srp_request *req = &target->req_ring[i];
@@ -728,16 +630,13 @@ static int srp_reconnect_target(struct srp_target_port *target)
728 for (i = 0; i < SRP_SQ_SIZE; ++i) 630 for (i = 0; i < SRP_SQ_SIZE; ++i)
729 list_add(&target->tx_ring[i]->list, &target->free_tx); 631 list_add(&target->tx_ring[i]->list, &target->free_tx);
730 632
633 target->qp_in_error = 0;
731 ret = srp_connect_target(target); 634 ret = srp_connect_target(target);
732
733unblock:
734 scsi_target_unblock(&shost->shost_gendev, ret == 0 ? SDEV_RUNNING :
735 SDEV_TRANSPORT_OFFLINE);
736
737 if (ret) 635 if (ret)
738 goto err; 636 goto err;
739 637
740 shost_printk(KERN_INFO, target->scsi_host, PFX "reconnect succeeded\n"); 638 if (!srp_change_state(target, SRP_TARGET_CONNECTING, SRP_TARGET_LIVE))
639 ret = -EAGAIN;
741 640
742 return ret; 641 return ret;
743 642
@@ -750,8 +649,17 @@ err:
750 * However, we have to defer the real removal because we 649 * However, we have to defer the real removal because we
751 * are in the context of the SCSI error handler now, which 650 * are in the context of the SCSI error handler now, which
752 * will deadlock if we call scsi_remove_host(). 651 * will deadlock if we call scsi_remove_host().
652 *
653 * Schedule our work inside the lock to avoid a race with
654 * the flush_scheduled_work() in srp_remove_one().
753 */ 655 */
754 srp_queue_remove_work(target); 656 spin_lock_irq(&target->lock);
657 if (target->state == SRP_TARGET_CONNECTING) {
658 target->state = SRP_TARGET_DEAD;
659 INIT_WORK(&target->work, srp_remove_work);
660 queue_work(ib_wq, &target->work);
661 }
662 spin_unlock_irq(&target->lock);
755 663
756 return ret; 664 return ret;
757} 665}
@@ -1147,18 +1055,11 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
1147 complete(&target->tsk_mgmt_done); 1055 complete(&target->tsk_mgmt_done);
1148 } else { 1056 } else {
1149 req = &target->req_ring[rsp->tag]; 1057 req = &target->req_ring[rsp->tag];
1150 scmnd = srp_claim_req(target, req, NULL); 1058 scmnd = req->scmnd;
1151 if (!scmnd) { 1059 if (!scmnd)
1152 shost_printk(KERN_ERR, target->scsi_host, 1060 shost_printk(KERN_ERR, target->scsi_host,
1153 "Null scmnd for RSP w/tag %016llx\n", 1061 "Null scmnd for RSP w/tag %016llx\n",
1154 (unsigned long long) rsp->tag); 1062 (unsigned long long) rsp->tag);
1155
1156 spin_lock_irqsave(&target->lock, flags);
1157 target->req_lim += be32_to_cpu(rsp->req_lim_delta);
1158 spin_unlock_irqrestore(&target->lock, flags);
1159
1160 return;
1161 }
1162 scmnd->result = rsp->status; 1063 scmnd->result = rsp->status;
1163 1064
1164 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) { 1065 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
@@ -1173,9 +1074,7 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
1173 else if (rsp->flags & (SRP_RSP_FLAG_DIOVER | SRP_RSP_FLAG_DIUNDER)) 1074 else if (rsp->flags & (SRP_RSP_FLAG_DIOVER | SRP_RSP_FLAG_DIUNDER))
1174 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt)); 1075 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1175 1076
1176 srp_free_req(target, req, scmnd, 1077 srp_remove_req(target, req, be32_to_cpu(rsp->req_lim_delta));
1177 be32_to_cpu(rsp->req_lim_delta));
1178
1179 scmnd->host_scribble = NULL; 1078 scmnd->host_scribble = NULL;
1180 scmnd->scsi_done(scmnd); 1079 scmnd->scsi_done(scmnd);
1181 } 1080 }
@@ -1298,19 +1197,6 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
1298 PFX "Recv failed with error code %d\n", res); 1197 PFX "Recv failed with error code %d\n", res);
1299} 1198}
1300 1199
1301static void srp_handle_qp_err(enum ib_wc_status wc_status,
1302 enum ib_wc_opcode wc_opcode,
1303 struct srp_target_port *target)
1304{
1305 if (target->connected && !target->qp_in_error) {
1306 shost_printk(KERN_ERR, target->scsi_host,
1307 PFX "failed %s status %d\n",
1308 wc_opcode & IB_WC_RECV ? "receive" : "send",
1309 wc_status);
1310 }
1311 target->qp_in_error = true;
1312}
1313
1314static void srp_recv_completion(struct ib_cq *cq, void *target_ptr) 1200static void srp_recv_completion(struct ib_cq *cq, void *target_ptr)
1315{ 1201{
1316 struct srp_target_port *target = target_ptr; 1202 struct srp_target_port *target = target_ptr;
@@ -1318,11 +1204,15 @@ static void srp_recv_completion(struct ib_cq *cq, void *target_ptr)
1318 1204
1319 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 1205 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
1320 while (ib_poll_cq(cq, 1, &wc) > 0) { 1206 while (ib_poll_cq(cq, 1, &wc) > 0) {
1321 if (likely(wc.status == IB_WC_SUCCESS)) { 1207 if (wc.status) {
1322 srp_handle_recv(target, &wc); 1208 shost_printk(KERN_ERR, target->scsi_host,
1323 } else { 1209 PFX "failed receive status %d\n",
1324 srp_handle_qp_err(wc.status, wc.opcode, target); 1210 wc.status);
1211 target->qp_in_error = 1;
1212 break;
1325 } 1213 }
1214
1215 srp_handle_recv(target, &wc);
1326 } 1216 }
1327} 1217}
1328 1218
@@ -1333,12 +1223,16 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr)
1333 struct srp_iu *iu; 1223 struct srp_iu *iu;
1334 1224
1335 while (ib_poll_cq(cq, 1, &wc) > 0) { 1225 while (ib_poll_cq(cq, 1, &wc) > 0) {
1336 if (likely(wc.status == IB_WC_SUCCESS)) { 1226 if (wc.status) {
1337 iu = (struct srp_iu *) (uintptr_t) wc.wr_id; 1227 shost_printk(KERN_ERR, target->scsi_host,
1338 list_add(&iu->list, &target->free_tx); 1228 PFX "failed send status %d\n",
1339 } else { 1229 wc.status);
1340 srp_handle_qp_err(wc.status, wc.opcode, target); 1230 target->qp_in_error = 1;
1231 break;
1341 } 1232 }
1233
1234 iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
1235 list_add(&iu->list, &target->free_tx);
1342 } 1236 }
1343} 1237}
1344 1238
@@ -1352,6 +1246,16 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
1352 unsigned long flags; 1246 unsigned long flags;
1353 int len; 1247 int len;
1354 1248
1249 if (target->state == SRP_TARGET_CONNECTING)
1250 goto err;
1251
1252 if (target->state == SRP_TARGET_DEAD ||
1253 target->state == SRP_TARGET_REMOVED) {
1254 scmnd->result = DID_BAD_TARGET << 16;
1255 scmnd->scsi_done(scmnd);
1256 return 0;
1257 }
1258
1355 spin_lock_irqsave(&target->lock, flags); 1259 spin_lock_irqsave(&target->lock, flags);
1356 iu = __srp_get_tx_iu(target, SRP_IU_CMD); 1260 iu = __srp_get_tx_iu(target, SRP_IU_CMD);
1357 if (!iu) 1261 if (!iu)
@@ -1408,6 +1312,7 @@ err_iu:
1408err_unlock: 1312err_unlock:
1409 spin_unlock_irqrestore(&target->lock, flags); 1313 spin_unlock_irqrestore(&target->lock, flags);
1410 1314
1315err:
1411 return SCSI_MLQUEUE_HOST_BUSY; 1316 return SCSI_MLQUEUE_HOST_BUSY;
1412} 1317}
1413 1318
@@ -1449,33 +1354,6 @@ err:
1449 return -ENOMEM; 1354 return -ENOMEM;
1450} 1355}
1451 1356
1452static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
1453{
1454 uint64_t T_tr_ns, max_compl_time_ms;
1455 uint32_t rq_tmo_jiffies;
1456
1457 /*
1458 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
1459 * table 91), both the QP timeout and the retry count have to be set
1460 * for RC QP's during the RTR to RTS transition.
1461 */
1462 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
1463 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
1464
1465 /*
1466 * Set target->rq_tmo_jiffies to one second more than the largest time
1467 * it can take before an error completion is generated. See also
1468 * C9-140..142 in the IBTA spec for more information about how to
1469 * convert the QP Local ACK Timeout value to nanoseconds.
1470 */
1471 T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
1472 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
1473 do_div(max_compl_time_ms, NSEC_PER_MSEC);
1474 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
1475
1476 return rq_tmo_jiffies;
1477}
1478
1479static void srp_cm_rep_handler(struct ib_cm_id *cm_id, 1357static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
1480 struct srp_login_rsp *lrsp, 1358 struct srp_login_rsp *lrsp,
1481 struct srp_target_port *target) 1359 struct srp_target_port *target)
@@ -1535,8 +1413,6 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
1535 if (ret) 1413 if (ret)
1536 goto error_free; 1414 goto error_free;
1537 1415
1538 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
1539
1540 ret = ib_modify_qp(target->qp, qp_attr, attr_mask); 1416 ret = ib_modify_qp(target->qp, qp_attr, attr_mask);
1541 if (ret) 1417 if (ret)
1542 goto error_free; 1418 goto error_free;
@@ -1658,7 +1534,6 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1658 case IB_CM_DREQ_RECEIVED: 1534 case IB_CM_DREQ_RECEIVED:
1659 shost_printk(KERN_WARNING, target->scsi_host, 1535 shost_printk(KERN_WARNING, target->scsi_host,
1660 PFX "DREQ received - connection closed\n"); 1536 PFX "DREQ received - connection closed\n");
1661 srp_change_conn_state(target, false);
1662 if (ib_send_cm_drep(cm_id, NULL, 0)) 1537 if (ib_send_cm_drep(cm_id, NULL, 0))
1663 shost_printk(KERN_ERR, target->scsi_host, 1538 shost_printk(KERN_ERR, target->scsi_host,
1664 PFX "Sending CM DREP failed\n"); 1539 PFX "Sending CM DREP failed\n");
@@ -1668,6 +1543,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1668 shost_printk(KERN_ERR, target->scsi_host, 1543 shost_printk(KERN_ERR, target->scsi_host,
1669 PFX "connection closed\n"); 1544 PFX "connection closed\n");
1670 1545
1546 comp = 1;
1671 target->status = 0; 1547 target->status = 0;
1672 break; 1548 break;
1673 1549
@@ -1695,6 +1571,10 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target,
1695 struct srp_iu *iu; 1571 struct srp_iu *iu;
1696 struct srp_tsk_mgmt *tsk_mgmt; 1572 struct srp_tsk_mgmt *tsk_mgmt;
1697 1573
1574 if (target->state == SRP_TARGET_DEAD ||
1575 target->state == SRP_TARGET_REMOVED)
1576 return -1;
1577
1698 init_completion(&target->tsk_mgmt_done); 1578 init_completion(&target->tsk_mgmt_done);
1699 1579
1700 spin_lock_irq(&target->lock); 1580 spin_lock_irq(&target->lock);
@@ -1733,18 +1613,25 @@ static int srp_abort(struct scsi_cmnd *scmnd)
1733{ 1613{
1734 struct srp_target_port *target = host_to_target(scmnd->device->host); 1614 struct srp_target_port *target = host_to_target(scmnd->device->host);
1735 struct srp_request *req = (struct srp_request *) scmnd->host_scribble; 1615 struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
1616 int ret = SUCCESS;
1736 1617
1737 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); 1618 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
1738 1619
1739 if (!req || target->qp_in_error || !srp_claim_req(target, req, scmnd)) 1620 if (!req || target->qp_in_error)
1621 return FAILED;
1622 if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun,
1623 SRP_TSK_ABORT_TASK))
1740 return FAILED; 1624 return FAILED;
1741 srp_send_tsk_mgmt(target, req->index, scmnd->device->lun,
1742 SRP_TSK_ABORT_TASK);
1743 srp_free_req(target, req, scmnd, 0);
1744 scmnd->result = DID_ABORT << 16;
1745 scmnd->scsi_done(scmnd);
1746 1625
1747 return SUCCESS; 1626 if (req->scmnd) {
1627 if (!target->tsk_mgmt_status) {
1628 srp_remove_req(target, req, 0);
1629 scmnd->result = DID_ABORT << 16;
1630 } else
1631 ret = FAILED;
1632 }
1633
1634 return ret;
1748} 1635}
1749 1636
1750static int srp_reset_device(struct scsi_cmnd *scmnd) 1637static int srp_reset_device(struct scsi_cmnd *scmnd)
@@ -1784,26 +1671,15 @@ static int srp_reset_host(struct scsi_cmnd *scmnd)
1784 return ret; 1671 return ret;
1785} 1672}
1786 1673
1787static int srp_slave_configure(struct scsi_device *sdev)
1788{
1789 struct Scsi_Host *shost = sdev->host;
1790 struct srp_target_port *target = host_to_target(shost);
1791 struct request_queue *q = sdev->request_queue;
1792 unsigned long timeout;
1793
1794 if (sdev->type == TYPE_DISK) {
1795 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
1796 blk_queue_rq_timeout(q, timeout);
1797 }
1798
1799 return 0;
1800}
1801
1802static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr, 1674static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
1803 char *buf) 1675 char *buf)
1804{ 1676{
1805 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 1677 struct srp_target_port *target = host_to_target(class_to_shost(dev));
1806 1678
1679 if (target->state == SRP_TARGET_DEAD ||
1680 target->state == SRP_TARGET_REMOVED)
1681 return -ENODEV;
1682
1807 return sprintf(buf, "0x%016llx\n", 1683 return sprintf(buf, "0x%016llx\n",
1808 (unsigned long long) be64_to_cpu(target->id_ext)); 1684 (unsigned long long) be64_to_cpu(target->id_ext));
1809} 1685}
@@ -1813,6 +1689,10 @@ static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
1813{ 1689{
1814 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 1690 struct srp_target_port *target = host_to_target(class_to_shost(dev));
1815 1691
1692 if (target->state == SRP_TARGET_DEAD ||
1693 target->state == SRP_TARGET_REMOVED)
1694 return -ENODEV;
1695
1816 return sprintf(buf, "0x%016llx\n", 1696 return sprintf(buf, "0x%016llx\n",
1817 (unsigned long long) be64_to_cpu(target->ioc_guid)); 1697 (unsigned long long) be64_to_cpu(target->ioc_guid));
1818} 1698}
@@ -1822,6 +1702,10 @@ static ssize_t show_service_id(struct device *dev,
1822{ 1702{
1823 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 1703 struct srp_target_port *target = host_to_target(class_to_shost(dev));
1824 1704
1705 if (target->state == SRP_TARGET_DEAD ||
1706 target->state == SRP_TARGET_REMOVED)
1707 return -ENODEV;
1708
1825 return sprintf(buf, "0x%016llx\n", 1709 return sprintf(buf, "0x%016llx\n",
1826 (unsigned long long) be64_to_cpu(target->service_id)); 1710 (unsigned long long) be64_to_cpu(target->service_id));
1827} 1711}
@@ -1831,6 +1715,10 @@ static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
1831{ 1715{
1832 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 1716 struct srp_target_port *target = host_to_target(class_to_shost(dev));
1833 1717
1718 if (target->state == SRP_TARGET_DEAD ||
1719 target->state == SRP_TARGET_REMOVED)
1720 return -ENODEV;
1721
1834 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->path.pkey)); 1722 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->path.pkey));
1835} 1723}
1836 1724
@@ -1839,6 +1727,10 @@ static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
1839{ 1727{
1840 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 1728 struct srp_target_port *target = host_to_target(class_to_shost(dev));
1841 1729
1730 if (target->state == SRP_TARGET_DEAD ||
1731 target->state == SRP_TARGET_REMOVED)
1732 return -ENODEV;
1733
1842 return sprintf(buf, "%pI6\n", target->path.dgid.raw); 1734 return sprintf(buf, "%pI6\n", target->path.dgid.raw);
1843} 1735}
1844 1736
@@ -1847,6 +1739,10 @@ static ssize_t show_orig_dgid(struct device *dev,
1847{ 1739{
1848 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 1740 struct srp_target_port *target = host_to_target(class_to_shost(dev));
1849 1741
1742 if (target->state == SRP_TARGET_DEAD ||
1743 target->state == SRP_TARGET_REMOVED)
1744 return -ENODEV;
1745
1850 return sprintf(buf, "%pI6\n", target->orig_dgid); 1746 return sprintf(buf, "%pI6\n", target->orig_dgid);
1851} 1747}
1852 1748
@@ -1855,6 +1751,10 @@ static ssize_t show_req_lim(struct device *dev,
1855{ 1751{
1856 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 1752 struct srp_target_port *target = host_to_target(class_to_shost(dev));
1857 1753
1754 if (target->state == SRP_TARGET_DEAD ||
1755 target->state == SRP_TARGET_REMOVED)
1756 return -ENODEV;
1757
1858 return sprintf(buf, "%d\n", target->req_lim); 1758 return sprintf(buf, "%d\n", target->req_lim);
1859} 1759}
1860 1760
@@ -1863,6 +1763,10 @@ static ssize_t show_zero_req_lim(struct device *dev,
1863{ 1763{
1864 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 1764 struct srp_target_port *target = host_to_target(class_to_shost(dev));
1865 1765
1766 if (target->state == SRP_TARGET_DEAD ||
1767 target->state == SRP_TARGET_REMOVED)
1768 return -ENODEV;
1769
1866 return sprintf(buf, "%d\n", target->zero_req_lim); 1770 return sprintf(buf, "%d\n", target->zero_req_lim);
1867} 1771}
1868 1772
@@ -1931,7 +1835,6 @@ static struct scsi_host_template srp_template = {
1931 .module = THIS_MODULE, 1835 .module = THIS_MODULE,
1932 .name = "InfiniBand SRP initiator", 1836 .name = "InfiniBand SRP initiator",
1933 .proc_name = DRV_NAME, 1837 .proc_name = DRV_NAME,
1934 .slave_configure = srp_slave_configure,
1935 .info = srp_target_info, 1838 .info = srp_target_info,
1936 .queuecommand = srp_queuecommand, 1839 .queuecommand = srp_queuecommand,
1937 .eh_abort_handler = srp_abort, 1840 .eh_abort_handler = srp_abort,
@@ -1965,14 +1868,11 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
1965 return PTR_ERR(rport); 1868 return PTR_ERR(rport);
1966 } 1869 }
1967 1870
1968 rport->lld_data = target;
1969
1970 spin_lock(&host->target_lock); 1871 spin_lock(&host->target_lock);
1971 list_add_tail(&target->list, &host->target_list); 1872 list_add_tail(&target->list, &host->target_list);
1972 spin_unlock(&host->target_lock); 1873 spin_unlock(&host->target_lock);
1973 1874
1974 target->state = SRP_TARGET_LIVE; 1875 target->state = SRP_TARGET_LIVE;
1975 target->connected = false;
1976 1876
1977 scsi_scan_target(&target->scsi_host->shost_gendev, 1877 scsi_scan_target(&target->scsi_host->shost_gendev,
1978 0, target->scsi_id, SCAN_WILD_CARD, 0); 1878 0, target->scsi_id, SCAN_WILD_CARD, 0);
@@ -2089,7 +1989,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2089 goto out; 1989 goto out;
2090 } 1990 }
2091 if (strlen(p) != 32) { 1991 if (strlen(p) != 32) {
2092 pr_warn("bad dest GID parameter '%s'\n", p); 1992 printk(KERN_WARNING PFX "bad dest GID parameter '%s'\n", p);
2093 kfree(p); 1993 kfree(p);
2094 goto out; 1994 goto out;
2095 } 1995 }
@@ -2104,7 +2004,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2104 2004
2105 case SRP_OPT_PKEY: 2005 case SRP_OPT_PKEY:
2106 if (match_hex(args, &token)) { 2006 if (match_hex(args, &token)) {
2107 pr_warn("bad P_Key parameter '%s'\n", p); 2007 printk(KERN_WARNING PFX "bad P_Key parameter '%s'\n", p);
2108 goto out; 2008 goto out;
2109 } 2009 }
2110 target->path.pkey = cpu_to_be16(token); 2010 target->path.pkey = cpu_to_be16(token);
@@ -2123,7 +2023,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2123 2023
2124 case SRP_OPT_MAX_SECT: 2024 case SRP_OPT_MAX_SECT:
2125 if (match_int(args, &token)) { 2025 if (match_int(args, &token)) {
2126 pr_warn("bad max sect parameter '%s'\n", p); 2026 printk(KERN_WARNING PFX "bad max sect parameter '%s'\n", p);
2127 goto out; 2027 goto out;
2128 } 2028 }
2129 target->scsi_host->max_sectors = token; 2029 target->scsi_host->max_sectors = token;
@@ -2131,8 +2031,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2131 2031
2132 case SRP_OPT_MAX_CMD_PER_LUN: 2032 case SRP_OPT_MAX_CMD_PER_LUN:
2133 if (match_int(args, &token)) { 2033 if (match_int(args, &token)) {
2134 pr_warn("bad max cmd_per_lun parameter '%s'\n", 2034 printk(KERN_WARNING PFX "bad max cmd_per_lun parameter '%s'\n", p);
2135 p);
2136 goto out; 2035 goto out;
2137 } 2036 }
2138 target->scsi_host->cmd_per_lun = min(token, SRP_CMD_SQ_SIZE); 2037 target->scsi_host->cmd_per_lun = min(token, SRP_CMD_SQ_SIZE);
@@ -2140,14 +2039,14 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2140 2039
2141 case SRP_OPT_IO_CLASS: 2040 case SRP_OPT_IO_CLASS:
2142 if (match_hex(args, &token)) { 2041 if (match_hex(args, &token)) {
2143 pr_warn("bad IO class parameter '%s'\n", p); 2042 printk(KERN_WARNING PFX "bad IO class parameter '%s' \n", p);
2144 goto out; 2043 goto out;
2145 } 2044 }
2146 if (token != SRP_REV10_IB_IO_CLASS && 2045 if (token != SRP_REV10_IB_IO_CLASS &&
2147 token != SRP_REV16A_IB_IO_CLASS) { 2046 token != SRP_REV16A_IB_IO_CLASS) {
2148 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n", 2047 printk(KERN_WARNING PFX "unknown IO class parameter value"
2149 token, SRP_REV10_IB_IO_CLASS, 2048 " %x specified (use %x or %x).\n",
2150 SRP_REV16A_IB_IO_CLASS); 2049 token, SRP_REV10_IB_IO_CLASS, SRP_REV16A_IB_IO_CLASS);
2151 goto out; 2050 goto out;
2152 } 2051 }
2153 target->io_class = token; 2052 target->io_class = token;
@@ -2165,8 +2064,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2165 2064
2166 case SRP_OPT_CMD_SG_ENTRIES: 2065 case SRP_OPT_CMD_SG_ENTRIES:
2167 if (match_int(args, &token) || token < 1 || token > 255) { 2066 if (match_int(args, &token) || token < 1 || token > 255) {
2168 pr_warn("bad max cmd_sg_entries parameter '%s'\n", 2067 printk(KERN_WARNING PFX "bad max cmd_sg_entries parameter '%s'\n", p);
2169 p);
2170 goto out; 2068 goto out;
2171 } 2069 }
2172 target->cmd_sg_cnt = token; 2070 target->cmd_sg_cnt = token;
@@ -2174,7 +2072,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2174 2072
2175 case SRP_OPT_ALLOW_EXT_SG: 2073 case SRP_OPT_ALLOW_EXT_SG:
2176 if (match_int(args, &token)) { 2074 if (match_int(args, &token)) {
2177 pr_warn("bad allow_ext_sg parameter '%s'\n", p); 2075 printk(KERN_WARNING PFX "bad allow_ext_sg parameter '%s'\n", p);
2178 goto out; 2076 goto out;
2179 } 2077 }
2180 target->allow_ext_sg = !!token; 2078 target->allow_ext_sg = !!token;
@@ -2183,16 +2081,15 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2183 case SRP_OPT_SG_TABLESIZE: 2081 case SRP_OPT_SG_TABLESIZE:
2184 if (match_int(args, &token) || token < 1 || 2082 if (match_int(args, &token) || token < 1 ||
2185 token > SCSI_MAX_SG_CHAIN_SEGMENTS) { 2083 token > SCSI_MAX_SG_CHAIN_SEGMENTS) {
2186 pr_warn("bad max sg_tablesize parameter '%s'\n", 2084 printk(KERN_WARNING PFX "bad max sg_tablesize parameter '%s'\n", p);
2187 p);
2188 goto out; 2085 goto out;
2189 } 2086 }
2190 target->sg_tablesize = token; 2087 target->sg_tablesize = token;
2191 break; 2088 break;
2192 2089
2193 default: 2090 default:
2194 pr_warn("unknown parameter or missing value '%s' in target creation request\n", 2091 printk(KERN_WARNING PFX "unknown parameter or missing value "
2195 p); 2092 "'%s' in target creation request\n", p);
2196 goto out; 2093 goto out;
2197 } 2094 }
2198 } 2095 }
@@ -2203,8 +2100,9 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2203 for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i) 2100 for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
2204 if ((srp_opt_tokens[i].token & SRP_OPT_ALL) && 2101 if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
2205 !(srp_opt_tokens[i].token & opt_mask)) 2102 !(srp_opt_tokens[i].token & opt_mask))
2206 pr_warn("target creation request is missing parameter '%s'\n", 2103 printk(KERN_WARNING PFX "target creation request is "
2207 srp_opt_tokens[i].pattern); 2104 "missing parameter '%s'\n",
2105 srp_opt_tokens[i].pattern);
2208 2106
2209out: 2107out:
2210 kfree(options); 2108 kfree(options);
@@ -2251,7 +2149,7 @@ static ssize_t srp_create_target(struct device *dev,
2251 2149
2252 if (!host->srp_dev->fmr_pool && !target->allow_ext_sg && 2150 if (!host->srp_dev->fmr_pool && !target->allow_ext_sg &&
2253 target->cmd_sg_cnt < target->sg_tablesize) { 2151 target->cmd_sg_cnt < target->sg_tablesize) {
2254 pr_warn("No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); 2152 printk(KERN_WARNING PFX "No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
2255 target->sg_tablesize = target->cmd_sg_cnt; 2153 target->sg_tablesize = target->cmd_sg_cnt;
2256 } 2154 }
2257 2155
@@ -2262,7 +2160,6 @@ static ssize_t srp_create_target(struct device *dev,
2262 sizeof (struct srp_indirect_buf) + 2160 sizeof (struct srp_indirect_buf) +
2263 target->cmd_sg_cnt * sizeof (struct srp_direct_buf); 2161 target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
2264 2162
2265 INIT_WORK(&target->remove_work, srp_remove_work);
2266 spin_lock_init(&target->lock); 2163 spin_lock_init(&target->lock);
2267 INIT_LIST_HEAD(&target->free_tx); 2164 INIT_LIST_HEAD(&target->free_tx);
2268 INIT_LIST_HEAD(&target->free_reqs); 2165 INIT_LIST_HEAD(&target->free_reqs);
@@ -2307,6 +2204,7 @@ static ssize_t srp_create_target(struct device *dev,
2307 if (ret) 2204 if (ret)
2308 goto err_free_ib; 2205 goto err_free_ib;
2309 2206
2207 target->qp_in_error = 0;
2310 ret = srp_connect_target(target); 2208 ret = srp_connect_target(target);
2311 if (ret) { 2209 if (ret) {
2312 shost_printk(KERN_ERR, target->scsi_host, 2210 shost_printk(KERN_ERR, target->scsi_host,
@@ -2411,7 +2309,8 @@ static void srp_add_one(struct ib_device *device)
2411 return; 2309 return;
2412 2310
2413 if (ib_query_device(device, dev_attr)) { 2311 if (ib_query_device(device, dev_attr)) {
2414 pr_warn("Query device failed for %s\n", device->name); 2312 printk(KERN_WARNING PFX "Query device failed for %s\n",
2313 device->name);
2415 goto free_attr; 2314 goto free_attr;
2416 } 2315 }
2417 2316
@@ -2496,7 +2395,8 @@ static void srp_remove_one(struct ib_device *device)
2496{ 2395{
2497 struct srp_device *srp_dev; 2396 struct srp_device *srp_dev;
2498 struct srp_host *host, *tmp_host; 2397 struct srp_host *host, *tmp_host;
2499 struct srp_target_port *target; 2398 LIST_HEAD(target_list);
2399 struct srp_target_port *target, *tmp_target;
2500 2400
2501 srp_dev = ib_get_client_data(device, &srp_client); 2401 srp_dev = ib_get_client_data(device, &srp_client);
2502 2402
@@ -2509,17 +2409,34 @@ static void srp_remove_one(struct ib_device *device)
2509 wait_for_completion(&host->released); 2409 wait_for_completion(&host->released);
2510 2410
2511 /* 2411 /*
2512 * Remove all target ports. 2412 * Mark all target ports as removed, so we stop queueing
2413 * commands and don't try to reconnect.
2513 */ 2414 */
2514 spin_lock(&host->target_lock); 2415 spin_lock(&host->target_lock);
2515 list_for_each_entry(target, &host->target_list, list) 2416 list_for_each_entry(target, &host->target_list, list) {
2516 srp_queue_remove_work(target); 2417 spin_lock_irq(&target->lock);
2418 target->state = SRP_TARGET_REMOVED;
2419 spin_unlock_irq(&target->lock);
2420 }
2517 spin_unlock(&host->target_lock); 2421 spin_unlock(&host->target_lock);
2518 2422
2519 /* 2423 /*
2520 * Wait for target port removal tasks. 2424 * Wait for any reconnection tasks that may have
2425 * started before we marked our target ports as
2426 * removed, and any target port removal tasks.
2521 */ 2427 */
2522 flush_workqueue(system_long_wq); 2428 flush_workqueue(ib_wq);
2429
2430 list_for_each_entry_safe(target, tmp_target,
2431 &host->target_list, list) {
2432 srp_remove_host(target->scsi_host);
2433 scsi_remove_host(target->scsi_host);
2434 srp_disconnect_target(target);
2435 ib_destroy_cm_id(target->cm_id);
2436 srp_free_target_ib(target);
2437 srp_free_req_data(target);
2438 scsi_host_put(target->scsi_host);
2439 }
2523 2440
2524 kfree(host); 2441 kfree(host);
2525 } 2442 }
@@ -2533,7 +2450,6 @@ static void srp_remove_one(struct ib_device *device)
2533} 2450}
2534 2451
2535static struct srp_function_template ib_srp_transport_functions = { 2452static struct srp_function_template ib_srp_transport_functions = {
2536 .rport_delete = srp_rport_delete,
2537}; 2453};
2538 2454
2539static int __init srp_init_module(void) 2455static int __init srp_init_module(void)
@@ -2543,7 +2459,7 @@ static int __init srp_init_module(void)
2543 BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *)); 2459 BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *));
2544 2460
2545 if (srp_sg_tablesize) { 2461 if (srp_sg_tablesize) {
2546 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n"); 2462 printk(KERN_WARNING PFX "srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
2547 if (!cmd_sg_entries) 2463 if (!cmd_sg_entries)
2548 cmd_sg_entries = srp_sg_tablesize; 2464 cmd_sg_entries = srp_sg_tablesize;
2549 } 2465 }
@@ -2552,15 +2468,14 @@ static int __init srp_init_module(void)
2552 cmd_sg_entries = SRP_DEF_SG_TABLESIZE; 2468 cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
2553 2469
2554 if (cmd_sg_entries > 255) { 2470 if (cmd_sg_entries > 255) {
2555 pr_warn("Clamping cmd_sg_entries to 255\n"); 2471 printk(KERN_WARNING PFX "Clamping cmd_sg_entries to 255\n");
2556 cmd_sg_entries = 255; 2472 cmd_sg_entries = 255;
2557 } 2473 }
2558 2474
2559 if (!indirect_sg_entries) 2475 if (!indirect_sg_entries)
2560 indirect_sg_entries = cmd_sg_entries; 2476 indirect_sg_entries = cmd_sg_entries;
2561 else if (indirect_sg_entries < cmd_sg_entries) { 2477 else if (indirect_sg_entries < cmd_sg_entries) {
2562 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n", 2478 printk(KERN_WARNING PFX "Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n", cmd_sg_entries);
2563 cmd_sg_entries);
2564 indirect_sg_entries = cmd_sg_entries; 2479 indirect_sg_entries = cmd_sg_entries;
2565 } 2480 }
2566 2481
@@ -2571,7 +2486,7 @@ static int __init srp_init_module(void)
2571 2486
2572 ret = class_register(&srp_class); 2487 ret = class_register(&srp_class);
2573 if (ret) { 2488 if (ret) {
2574 pr_err("couldn't register class infiniband_srp\n"); 2489 printk(KERN_ERR PFX "couldn't register class infiniband_srp\n");
2575 srp_release_transport(ib_srp_transport_template); 2490 srp_release_transport(ib_srp_transport_template);
2576 return ret; 2491 return ret;
2577 } 2492 }
@@ -2580,7 +2495,7 @@ static int __init srp_init_module(void)
2580 2495
2581 ret = ib_register_client(&srp_client); 2496 ret = ib_register_client(&srp_client);
2582 if (ret) { 2497 if (ret) {
2583 pr_err("couldn't register IB client\n"); 2498 printk(KERN_ERR PFX "couldn't register IB client\n");
2584 srp_release_transport(ib_srp_transport_template); 2499 srp_release_transport(ib_srp_transport_template);
2585 ib_sa_unregister_client(&srp_sa_client); 2500 ib_sa_unregister_client(&srp_sa_client);
2586 class_unregister(&srp_class); 2501 class_unregister(&srp_class);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index de2d0b3c0bf..020caf0c378 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -80,7 +80,9 @@ enum {
80 80
81enum srp_target_state { 81enum srp_target_state {
82 SRP_TARGET_LIVE, 82 SRP_TARGET_LIVE,
83 SRP_TARGET_REMOVED, 83 SRP_TARGET_CONNECTING,
84 SRP_TARGET_DEAD,
85 SRP_TARGET_REMOVED
84}; 86};
85 87
86enum srp_iu_type { 88enum srp_iu_type {
@@ -161,9 +163,6 @@ struct srp_target_port {
161 struct ib_sa_query *path_query; 163 struct ib_sa_query *path_query;
162 int path_query_id; 164 int path_query_id;
163 165
164 u32 rq_tmo_jiffies;
165 bool connected;
166
167 struct ib_cm_id *cm_id; 166 struct ib_cm_id *cm_id;
168 167
169 int max_ti_iu_len; 168 int max_ti_iu_len;
@@ -174,12 +173,12 @@ struct srp_target_port {
174 struct srp_iu *rx_ring[SRP_RQ_SIZE]; 173 struct srp_iu *rx_ring[SRP_RQ_SIZE];
175 struct srp_request req_ring[SRP_CMD_SQ_SIZE]; 174 struct srp_request req_ring[SRP_CMD_SQ_SIZE];
176 175
177 struct work_struct remove_work; 176 struct work_struct work;
178 177
179 struct list_head list; 178 struct list_head list;
180 struct completion done; 179 struct completion done;
181 int status; 180 int status;
182 bool qp_in_error; 181 int qp_in_error;
183 182
184 struct completion tsk_mgmt_done; 183 struct completion tsk_mgmt_done;
185 u8 tsk_mgmt_status; 184 u8 tsk_mgmt_status;
diff --git a/drivers/infiniband/ulp/srpt/Kconfig b/drivers/infiniband/ulp/srpt/Kconfig
deleted file mode 100644
index 31ee83d528d..00000000000
--- a/drivers/infiniband/ulp/srpt/Kconfig
+++ /dev/null
@@ -1,12 +0,0 @@
1config INFINIBAND_SRPT
2 tristate "InfiniBand SCSI RDMA Protocol target support"
3 depends on INFINIBAND && TARGET_CORE
4 ---help---
5
6 Support for the SCSI RDMA Protocol (SRP) Target driver. The
7 SRP protocol is a protocol that allows an initiator to access
8 a block storage device on another host (target) over a network
9 that supports the RDMA protocol. Currently the RDMA protocol is
10 supported by InfiniBand and by iWarp network hardware. More
11 information about the SRP protocol can be found on the website
12 of the INCITS T10 technical committee (http://www.t10.org/).
diff --git a/drivers/infiniband/ulp/srpt/Makefile b/drivers/infiniband/ulp/srpt/Makefile
deleted file mode 100644
index e3ee4bdfffa..00000000000
--- a/drivers/infiniband/ulp/srpt/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
1ccflags-y := -Idrivers/target
2obj-$(CONFIG_INFINIBAND_SRPT) += ib_srpt.o
diff --git a/drivers/infiniband/ulp/srpt/ib_dm_mad.h b/drivers/infiniband/ulp/srpt/ib_dm_mad.h
deleted file mode 100644
index fb1de1f6f29..00000000000
--- a/drivers/infiniband/ulp/srpt/ib_dm_mad.h
+++ /dev/null
@@ -1,139 +0,0 @@
1/*
2 * Copyright (c) 2006 - 2009 Mellanox Technology Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33
34#ifndef IB_DM_MAD_H
35#define IB_DM_MAD_H
36
37#include <linux/types.h>
38
39#include <rdma/ib_mad.h>
40
41enum {
42 /*
43 * See also section 13.4.7 Status Field, table 115 MAD Common Status
44 * Field Bit Values and also section 16.3.1.1 Status Field in the
45 * InfiniBand Architecture Specification.
46 */
47 DM_MAD_STATUS_UNSUP_METHOD = 0x0008,
48 DM_MAD_STATUS_UNSUP_METHOD_ATTR = 0x000c,
49 DM_MAD_STATUS_INVALID_FIELD = 0x001c,
50 DM_MAD_STATUS_NO_IOC = 0x0100,
51
52 /*
53 * See also the Device Management chapter, section 16.3.3 Attributes,
54 * table 279 Device Management Attributes in the InfiniBand
55 * Architecture Specification.
56 */
57 DM_ATTR_CLASS_PORT_INFO = 0x01,
58 DM_ATTR_IOU_INFO = 0x10,
59 DM_ATTR_IOC_PROFILE = 0x11,
60 DM_ATTR_SVC_ENTRIES = 0x12
61};
62
63struct ib_dm_hdr {
64 u8 reserved[28];
65};
66
67/*
68 * Structure of management datagram sent by the SRP target implementation.
69 * Contains a management datagram header, reliable multi-packet transaction
70 * protocol (RMPP) header and ib_dm_hdr. Notes:
71 * - The SRP target implementation does not use RMPP or ib_dm_hdr when sending
72 * management datagrams.
73 * - The header size must be exactly 64 bytes (IB_MGMT_DEVICE_HDR), since this
74 * is the header size that is passed to ib_create_send_mad() in ib_srpt.c.
75 * - The maximum supported size for a management datagram when not using RMPP
76 * is 256 bytes -- 64 bytes header and 192 (IB_MGMT_DEVICE_DATA) bytes data.
77 */
78struct ib_dm_mad {
79 struct ib_mad_hdr mad_hdr;
80 struct ib_rmpp_hdr rmpp_hdr;
81 struct ib_dm_hdr dm_hdr;
82 u8 data[IB_MGMT_DEVICE_DATA];
83};
84
85/*
86 * IOUnitInfo as defined in section 16.3.3.3 IOUnitInfo of the InfiniBand
87 * Architecture Specification.
88 */
89struct ib_dm_iou_info {
90 __be16 change_id;
91 u8 max_controllers;
92 u8 op_rom;
93 u8 controller_list[128];
94};
95
96/*
97 * IOControllerprofile as defined in section 16.3.3.4 IOControllerProfile of
98 * the InfiniBand Architecture Specification.
99 */
100struct ib_dm_ioc_profile {
101 __be64 guid;
102 __be32 vendor_id;
103 __be32 device_id;
104 __be16 device_version;
105 __be16 reserved1;
106 __be32 subsys_vendor_id;
107 __be32 subsys_device_id;
108 __be16 io_class;
109 __be16 io_subclass;
110 __be16 protocol;
111 __be16 protocol_version;
112 __be16 service_conn;
113 __be16 initiators_supported;
114 __be16 send_queue_depth;
115 u8 reserved2;
116 u8 rdma_read_depth;
117 __be32 send_size;
118 __be32 rdma_size;
119 u8 op_cap_mask;
120 u8 svc_cap_mask;
121 u8 num_svc_entries;
122 u8 reserved3[9];
123 u8 id_string[64];
124};
125
126struct ib_dm_svc_entry {
127 u8 name[40];
128 __be64 id;
129};
130
131/*
132 * See also section 16.3.3.5 ServiceEntries in the InfiniBand Architecture
133 * Specification. See also section B.7, table B.8 in the T10 SRP r16a document.
134 */
135struct ib_dm_svc_entries {
136 struct ib_dm_svc_entry service_entries[4];
137};
138
139#endif
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
deleted file mode 100644
index c09d41b1a2f..00000000000
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ /dev/null
@@ -1,4018 +0,0 @@
1/*
2 * Copyright (c) 2006 - 2009 Mellanox Technology Inc. All rights reserved.
3 * Copyright (C) 2008 - 2011 Bart Van Assche <bvanassche@acm.org>.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 */
34
35#include <linux/module.h>
36#include <linux/init.h>
37#include <linux/slab.h>
38#include <linux/err.h>
39#include <linux/ctype.h>
40#include <linux/kthread.h>
41#include <linux/string.h>
42#include <linux/delay.h>
43#include <linux/atomic.h>
44#include <scsi/scsi_tcq.h>
45#include <target/configfs_macros.h>
46#include <target/target_core_base.h>
47#include <target/target_core_fabric_configfs.h>
48#include <target/target_core_fabric.h>
49#include <target/target_core_configfs.h>
50#include "ib_srpt.h"
51
52/* Name of this kernel module. */
53#define DRV_NAME "ib_srpt"
54#define DRV_VERSION "2.0.0"
55#define DRV_RELDATE "2011-02-14"
56
57#define SRPT_ID_STRING "Linux SRP target"
58
59#undef pr_fmt
60#define pr_fmt(fmt) DRV_NAME " " fmt
61
62MODULE_AUTHOR("Vu Pham and Bart Van Assche");
63MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol target "
64 "v" DRV_VERSION " (" DRV_RELDATE ")");
65MODULE_LICENSE("Dual BSD/GPL");
66
67/*
68 * Global Variables
69 */
70
71static u64 srpt_service_guid;
72static DEFINE_SPINLOCK(srpt_dev_lock); /* Protects srpt_dev_list. */
73static LIST_HEAD(srpt_dev_list); /* List of srpt_device structures. */
74
75static unsigned srp_max_req_size = DEFAULT_MAX_REQ_SIZE;
76module_param(srp_max_req_size, int, 0444);
77MODULE_PARM_DESC(srp_max_req_size,
78 "Maximum size of SRP request messages in bytes.");
79
80static int srpt_srq_size = DEFAULT_SRPT_SRQ_SIZE;
81module_param(srpt_srq_size, int, 0444);
82MODULE_PARM_DESC(srpt_srq_size,
83 "Shared receive queue (SRQ) size.");
84
85static int srpt_get_u64_x(char *buffer, struct kernel_param *kp)
86{
87 return sprintf(buffer, "0x%016llx", *(u64 *)kp->arg);
88}
89module_param_call(srpt_service_guid, NULL, srpt_get_u64_x, &srpt_service_guid,
90 0444);
91MODULE_PARM_DESC(srpt_service_guid,
92 "Using this value for ioc_guid, id_ext, and cm_listen_id"
93 " instead of using the node_guid of the first HCA.");
94
95static struct ib_client srpt_client;
96static struct target_fabric_configfs *srpt_target;
97static void srpt_release_channel(struct srpt_rdma_ch *ch);
98static int srpt_queue_status(struct se_cmd *cmd);
99
100/**
101 * opposite_dma_dir() - Swap DMA_TO_DEVICE and DMA_FROM_DEVICE.
102 */
103static inline
104enum dma_data_direction opposite_dma_dir(enum dma_data_direction dir)
105{
106 switch (dir) {
107 case DMA_TO_DEVICE: return DMA_FROM_DEVICE;
108 case DMA_FROM_DEVICE: return DMA_TO_DEVICE;
109 default: return dir;
110 }
111}
112
113/**
114 * srpt_sdev_name() - Return the name associated with the HCA.
115 *
116 * Examples are ib0, ib1, ...
117 */
118static inline const char *srpt_sdev_name(struct srpt_device *sdev)
119{
120 return sdev->device->name;
121}
122
123static enum rdma_ch_state srpt_get_ch_state(struct srpt_rdma_ch *ch)
124{
125 unsigned long flags;
126 enum rdma_ch_state state;
127
128 spin_lock_irqsave(&ch->spinlock, flags);
129 state = ch->state;
130 spin_unlock_irqrestore(&ch->spinlock, flags);
131 return state;
132}
133
134static enum rdma_ch_state
135srpt_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state new_state)
136{
137 unsigned long flags;
138 enum rdma_ch_state prev;
139
140 spin_lock_irqsave(&ch->spinlock, flags);
141 prev = ch->state;
142 ch->state = new_state;
143 spin_unlock_irqrestore(&ch->spinlock, flags);
144 return prev;
145}
146
147/**
148 * srpt_test_and_set_ch_state() - Test and set the channel state.
149 *
150 * Returns true if and only if the channel state has been set to the new state.
151 */
152static bool
153srpt_test_and_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state old,
154 enum rdma_ch_state new)
155{
156 unsigned long flags;
157 enum rdma_ch_state prev;
158
159 spin_lock_irqsave(&ch->spinlock, flags);
160 prev = ch->state;
161 if (prev == old)
162 ch->state = new;
163 spin_unlock_irqrestore(&ch->spinlock, flags);
164 return prev == old;
165}
166
167/**
168 * srpt_event_handler() - Asynchronous IB event callback function.
169 *
170 * Callback function called by the InfiniBand core when an asynchronous IB
171 * event occurs. This callback may occur in interrupt context. See also
172 * section 11.5.2, Set Asynchronous Event Handler in the InfiniBand
173 * Architecture Specification.
174 */
175static void srpt_event_handler(struct ib_event_handler *handler,
176 struct ib_event *event)
177{
178 struct srpt_device *sdev;
179 struct srpt_port *sport;
180
181 sdev = ib_get_client_data(event->device, &srpt_client);
182 if (!sdev || sdev->device != event->device)
183 return;
184
185 pr_debug("ASYNC event= %d on device= %s\n", event->event,
186 srpt_sdev_name(sdev));
187
188 switch (event->event) {
189 case IB_EVENT_PORT_ERR:
190 if (event->element.port_num <= sdev->device->phys_port_cnt) {
191 sport = &sdev->port[event->element.port_num - 1];
192 sport->lid = 0;
193 sport->sm_lid = 0;
194 }
195 break;
196 case IB_EVENT_PORT_ACTIVE:
197 case IB_EVENT_LID_CHANGE:
198 case IB_EVENT_PKEY_CHANGE:
199 case IB_EVENT_SM_CHANGE:
200 case IB_EVENT_CLIENT_REREGISTER:
201 /* Refresh port data asynchronously. */
202 if (event->element.port_num <= sdev->device->phys_port_cnt) {
203 sport = &sdev->port[event->element.port_num - 1];
204 if (!sport->lid && !sport->sm_lid)
205 schedule_work(&sport->work);
206 }
207 break;
208 default:
209 printk(KERN_ERR "received unrecognized IB event %d\n",
210 event->event);
211 break;
212 }
213}
214
215/**
216 * srpt_srq_event() - SRQ event callback function.
217 */
218static void srpt_srq_event(struct ib_event *event, void *ctx)
219{
220 printk(KERN_INFO "SRQ event %d\n", event->event);
221}
222
223/**
224 * srpt_qp_event() - QP event callback function.
225 */
226static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch)
227{
228 pr_debug("QP event %d on cm_id=%p sess_name=%s state=%d\n",
229 event->event, ch->cm_id, ch->sess_name, srpt_get_ch_state(ch));
230
231 switch (event->event) {
232 case IB_EVENT_COMM_EST:
233 ib_cm_notify(ch->cm_id, event->event);
234 break;
235 case IB_EVENT_QP_LAST_WQE_REACHED:
236 if (srpt_test_and_set_ch_state(ch, CH_DRAINING,
237 CH_RELEASING))
238 srpt_release_channel(ch);
239 else
240 pr_debug("%s: state %d - ignored LAST_WQE.\n",
241 ch->sess_name, srpt_get_ch_state(ch));
242 break;
243 default:
244 printk(KERN_ERR "received unrecognized IB QP event %d\n",
245 event->event);
246 break;
247 }
248}
249
250/**
251 * srpt_set_ioc() - Helper function for initializing an IOUnitInfo structure.
252 *
253 * @slot: one-based slot number.
254 * @value: four-bit value.
255 *
256 * Copies the lowest four bits of value in element slot of the array of four
257 * bit elements called c_list (controller list). The index slot is one-based.
258 */
259static void srpt_set_ioc(u8 *c_list, u32 slot, u8 value)
260{
261 u16 id;
262 u8 tmp;
263
264 id = (slot - 1) / 2;
265 if (slot & 0x1) {
266 tmp = c_list[id] & 0xf;
267 c_list[id] = (value << 4) | tmp;
268 } else {
269 tmp = c_list[id] & 0xf0;
270 c_list[id] = (value & 0xf) | tmp;
271 }
272}
273
274/**
275 * srpt_get_class_port_info() - Copy ClassPortInfo to a management datagram.
276 *
277 * See also section 16.3.3.1 ClassPortInfo in the InfiniBand Architecture
278 * Specification.
279 */
280static void srpt_get_class_port_info(struct ib_dm_mad *mad)
281{
282 struct ib_class_port_info *cif;
283
284 cif = (struct ib_class_port_info *)mad->data;
285 memset(cif, 0, sizeof *cif);
286 cif->base_version = 1;
287 cif->class_version = 1;
288 cif->resp_time_value = 20;
289
290 mad->mad_hdr.status = 0;
291}
292
293/**
294 * srpt_get_iou() - Write IOUnitInfo to a management datagram.
295 *
296 * See also section 16.3.3.3 IOUnitInfo in the InfiniBand Architecture
297 * Specification. See also section B.7, table B.6 in the SRP r16a document.
298 */
299static void srpt_get_iou(struct ib_dm_mad *mad)
300{
301 struct ib_dm_iou_info *ioui;
302 u8 slot;
303 int i;
304
305 ioui = (struct ib_dm_iou_info *)mad->data;
306 ioui->change_id = __constant_cpu_to_be16(1);
307 ioui->max_controllers = 16;
308
309 /* set present for slot 1 and empty for the rest */
310 srpt_set_ioc(ioui->controller_list, 1, 1);
311 for (i = 1, slot = 2; i < 16; i++, slot++)
312 srpt_set_ioc(ioui->controller_list, slot, 0);
313
314 mad->mad_hdr.status = 0;
315}
316
317/**
318 * srpt_get_ioc() - Write IOControllerprofile to a management datagram.
319 *
320 * See also section 16.3.3.4 IOControllerProfile in the InfiniBand
321 * Architecture Specification. See also section B.7, table B.7 in the SRP
322 * r16a document.
323 */
324static void srpt_get_ioc(struct srpt_port *sport, u32 slot,
325 struct ib_dm_mad *mad)
326{
327 struct srpt_device *sdev = sport->sdev;
328 struct ib_dm_ioc_profile *iocp;
329
330 iocp = (struct ib_dm_ioc_profile *)mad->data;
331
332 if (!slot || slot > 16) {
333 mad->mad_hdr.status
334 = __constant_cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
335 return;
336 }
337
338 if (slot > 2) {
339 mad->mad_hdr.status
340 = __constant_cpu_to_be16(DM_MAD_STATUS_NO_IOC);
341 return;
342 }
343
344 memset(iocp, 0, sizeof *iocp);
345 strcpy(iocp->id_string, SRPT_ID_STRING);
346 iocp->guid = cpu_to_be64(srpt_service_guid);
347 iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
348 iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id);
349 iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver);
350 iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
351 iocp->subsys_device_id = 0x0;
352 iocp->io_class = __constant_cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
353 iocp->io_subclass = __constant_cpu_to_be16(SRP_IO_SUBCLASS);
354 iocp->protocol = __constant_cpu_to_be16(SRP_PROTOCOL);
355 iocp->protocol_version = __constant_cpu_to_be16(SRP_PROTOCOL_VERSION);
356 iocp->send_queue_depth = cpu_to_be16(sdev->srq_size);
357 iocp->rdma_read_depth = 4;
358 iocp->send_size = cpu_to_be32(srp_max_req_size);
359 iocp->rdma_size = cpu_to_be32(min(sport->port_attrib.srp_max_rdma_size,
360 1U << 24));
361 iocp->num_svc_entries = 1;
362 iocp->op_cap_mask = SRP_SEND_TO_IOC | SRP_SEND_FROM_IOC |
363 SRP_RDMA_READ_FROM_IOC | SRP_RDMA_WRITE_FROM_IOC;
364
365 mad->mad_hdr.status = 0;
366}
367
368/**
369 * srpt_get_svc_entries() - Write ServiceEntries to a management datagram.
370 *
371 * See also section 16.3.3.5 ServiceEntries in the InfiniBand Architecture
372 * Specification. See also section B.7, table B.8 in the SRP r16a document.
373 */
374static void srpt_get_svc_entries(u64 ioc_guid,
375 u16 slot, u8 hi, u8 lo, struct ib_dm_mad *mad)
376{
377 struct ib_dm_svc_entries *svc_entries;
378
379 WARN_ON(!ioc_guid);
380
381 if (!slot || slot > 16) {
382 mad->mad_hdr.status
383 = __constant_cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
384 return;
385 }
386
387 if (slot > 2 || lo > hi || hi > 1) {
388 mad->mad_hdr.status
389 = __constant_cpu_to_be16(DM_MAD_STATUS_NO_IOC);
390 return;
391 }
392
393 svc_entries = (struct ib_dm_svc_entries *)mad->data;
394 memset(svc_entries, 0, sizeof *svc_entries);
395 svc_entries->service_entries[0].id = cpu_to_be64(ioc_guid);
396 snprintf(svc_entries->service_entries[0].name,
397 sizeof(svc_entries->service_entries[0].name),
398 "%s%016llx",
399 SRP_SERVICE_NAME_PREFIX,
400 ioc_guid);
401
402 mad->mad_hdr.status = 0;
403}
404
405/**
406 * srpt_mgmt_method_get() - Process a received management datagram.
407 * @sp: source port through which the MAD has been received.
408 * @rq_mad: received MAD.
409 * @rsp_mad: response MAD.
410 */
411static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
412 struct ib_dm_mad *rsp_mad)
413{
414 u16 attr_id;
415 u32 slot;
416 u8 hi, lo;
417
418 attr_id = be16_to_cpu(rq_mad->mad_hdr.attr_id);
419 switch (attr_id) {
420 case DM_ATTR_CLASS_PORT_INFO:
421 srpt_get_class_port_info(rsp_mad);
422 break;
423 case DM_ATTR_IOU_INFO:
424 srpt_get_iou(rsp_mad);
425 break;
426 case DM_ATTR_IOC_PROFILE:
427 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
428 srpt_get_ioc(sp, slot, rsp_mad);
429 break;
430 case DM_ATTR_SVC_ENTRIES:
431 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
432 hi = (u8) ((slot >> 8) & 0xff);
433 lo = (u8) (slot & 0xff);
434 slot = (u16) ((slot >> 16) & 0xffff);
435 srpt_get_svc_entries(srpt_service_guid,
436 slot, hi, lo, rsp_mad);
437 break;
438 default:
439 rsp_mad->mad_hdr.status =
440 __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
441 break;
442 }
443}
444
445/**
446 * srpt_mad_send_handler() - Post MAD-send callback function.
447 */
448static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
449 struct ib_mad_send_wc *mad_wc)
450{
451 ib_destroy_ah(mad_wc->send_buf->ah);
452 ib_free_send_mad(mad_wc->send_buf);
453}
454
455/**
456 * srpt_mad_recv_handler() - MAD reception callback function.
457 */
458static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
459 struct ib_mad_recv_wc *mad_wc)
460{
461 struct srpt_port *sport = (struct srpt_port *)mad_agent->context;
462 struct ib_ah *ah;
463 struct ib_mad_send_buf *rsp;
464 struct ib_dm_mad *dm_mad;
465
466 if (!mad_wc || !mad_wc->recv_buf.mad)
467 return;
468
469 ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc,
470 mad_wc->recv_buf.grh, mad_agent->port_num);
471 if (IS_ERR(ah))
472 goto err;
473
474 BUILD_BUG_ON(offsetof(struct ib_dm_mad, data) != IB_MGMT_DEVICE_HDR);
475
476 rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp,
477 mad_wc->wc->pkey_index, 0,
478 IB_MGMT_DEVICE_HDR, IB_MGMT_DEVICE_DATA,
479 GFP_KERNEL);
480 if (IS_ERR(rsp))
481 goto err_rsp;
482
483 rsp->ah = ah;
484
485 dm_mad = rsp->mad;
486 memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad);
487 dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
488 dm_mad->mad_hdr.status = 0;
489
490 switch (mad_wc->recv_buf.mad->mad_hdr.method) {
491 case IB_MGMT_METHOD_GET:
492 srpt_mgmt_method_get(sport, mad_wc->recv_buf.mad, dm_mad);
493 break;
494 case IB_MGMT_METHOD_SET:
495 dm_mad->mad_hdr.status =
496 __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
497 break;
498 default:
499 dm_mad->mad_hdr.status =
500 __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD);
501 break;
502 }
503
504 if (!ib_post_send_mad(rsp, NULL)) {
505 ib_free_recv_mad(mad_wc);
506 /* will destroy_ah & free_send_mad in send completion */
507 return;
508 }
509
510 ib_free_send_mad(rsp);
511
512err_rsp:
513 ib_destroy_ah(ah);
514err:
515 ib_free_recv_mad(mad_wc);
516}
517
518/**
519 * srpt_refresh_port() - Configure a HCA port.
520 *
521 * Enable InfiniBand management datagram processing, update the cached sm_lid,
522 * lid and gid values, and register a callback function for processing MADs
523 * on the specified port.
524 *
525 * Note: It is safe to call this function more than once for the same port.
526 */
527static int srpt_refresh_port(struct srpt_port *sport)
528{
529 struct ib_mad_reg_req reg_req;
530 struct ib_port_modify port_modify;
531 struct ib_port_attr port_attr;
532 int ret;
533
534 memset(&port_modify, 0, sizeof port_modify);
535 port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
536 port_modify.clr_port_cap_mask = 0;
537
538 ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
539 if (ret)
540 goto err_mod_port;
541
542 ret = ib_query_port(sport->sdev->device, sport->port, &port_attr);
543 if (ret)
544 goto err_query_port;
545
546 sport->sm_lid = port_attr.sm_lid;
547 sport->lid = port_attr.lid;
548
549 ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid);
550 if (ret)
551 goto err_query_port;
552
553 if (!sport->mad_agent) {
554 memset(&reg_req, 0, sizeof reg_req);
555 reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
556 reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION;
557 set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
558 set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
559
560 sport->mad_agent = ib_register_mad_agent(sport->sdev->device,
561 sport->port,
562 IB_QPT_GSI,
563 &reg_req, 0,
564 srpt_mad_send_handler,
565 srpt_mad_recv_handler,
566 sport);
567 if (IS_ERR(sport->mad_agent)) {
568 ret = PTR_ERR(sport->mad_agent);
569 sport->mad_agent = NULL;
570 goto err_query_port;
571 }
572 }
573
574 return 0;
575
576err_query_port:
577
578 port_modify.set_port_cap_mask = 0;
579 port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
580 ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
581
582err_mod_port:
583
584 return ret;
585}
586
587/**
588 * srpt_unregister_mad_agent() - Unregister MAD callback functions.
589 *
590 * Note: It is safe to call this function more than once for the same device.
591 */
592static void srpt_unregister_mad_agent(struct srpt_device *sdev)
593{
594 struct ib_port_modify port_modify = {
595 .clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP,
596 };
597 struct srpt_port *sport;
598 int i;
599
600 for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
601 sport = &sdev->port[i - 1];
602 WARN_ON(sport->port != i);
603 if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0)
604 printk(KERN_ERR "disabling MAD processing failed.\n");
605 if (sport->mad_agent) {
606 ib_unregister_mad_agent(sport->mad_agent);
607 sport->mad_agent = NULL;
608 }
609 }
610}
611
612/**
613 * srpt_alloc_ioctx() - Allocate an SRPT I/O context structure.
614 */
615static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev,
616 int ioctx_size, int dma_size,
617 enum dma_data_direction dir)
618{
619 struct srpt_ioctx *ioctx;
620
621 ioctx = kmalloc(ioctx_size, GFP_KERNEL);
622 if (!ioctx)
623 goto err;
624
625 ioctx->buf = kmalloc(dma_size, GFP_KERNEL);
626 if (!ioctx->buf)
627 goto err_free_ioctx;
628
629 ioctx->dma = ib_dma_map_single(sdev->device, ioctx->buf, dma_size, dir);
630 if (ib_dma_mapping_error(sdev->device, ioctx->dma))
631 goto err_free_buf;
632
633 return ioctx;
634
635err_free_buf:
636 kfree(ioctx->buf);
637err_free_ioctx:
638 kfree(ioctx);
639err:
640 return NULL;
641}
642
643/**
644 * srpt_free_ioctx() - Free an SRPT I/O context structure.
645 */
646static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx,
647 int dma_size, enum dma_data_direction dir)
648{
649 if (!ioctx)
650 return;
651
652 ib_dma_unmap_single(sdev->device, ioctx->dma, dma_size, dir);
653 kfree(ioctx->buf);
654 kfree(ioctx);
655}
656
657/**
658 * srpt_alloc_ioctx_ring() - Allocate a ring of SRPT I/O context structures.
659 * @sdev: Device to allocate the I/O context ring for.
660 * @ring_size: Number of elements in the I/O context ring.
661 * @ioctx_size: I/O context size.
662 * @dma_size: DMA buffer size.
663 * @dir: DMA data direction.
664 */
665static struct srpt_ioctx **srpt_alloc_ioctx_ring(struct srpt_device *sdev,
666 int ring_size, int ioctx_size,
667 int dma_size, enum dma_data_direction dir)
668{
669 struct srpt_ioctx **ring;
670 int i;
671
672 WARN_ON(ioctx_size != sizeof(struct srpt_recv_ioctx)
673 && ioctx_size != sizeof(struct srpt_send_ioctx));
674
675 ring = kmalloc(ring_size * sizeof(ring[0]), GFP_KERNEL);
676 if (!ring)
677 goto out;
678 for (i = 0; i < ring_size; ++i) {
679 ring[i] = srpt_alloc_ioctx(sdev, ioctx_size, dma_size, dir);
680 if (!ring[i])
681 goto err;
682 ring[i]->index = i;
683 }
684 goto out;
685
686err:
687 while (--i >= 0)
688 srpt_free_ioctx(sdev, ring[i], dma_size, dir);
689 kfree(ring);
690 ring = NULL;
691out:
692 return ring;
693}
694
695/**
696 * srpt_free_ioctx_ring() - Free the ring of SRPT I/O context structures.
697 */
698static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring,
699 struct srpt_device *sdev, int ring_size,
700 int dma_size, enum dma_data_direction dir)
701{
702 int i;
703
704 for (i = 0; i < ring_size; ++i)
705 srpt_free_ioctx(sdev, ioctx_ring[i], dma_size, dir);
706 kfree(ioctx_ring);
707}
708
709/**
710 * srpt_get_cmd_state() - Get the state of a SCSI command.
711 */
712static enum srpt_command_state srpt_get_cmd_state(struct srpt_send_ioctx *ioctx)
713{
714 enum srpt_command_state state;
715 unsigned long flags;
716
717 BUG_ON(!ioctx);
718
719 spin_lock_irqsave(&ioctx->spinlock, flags);
720 state = ioctx->state;
721 spin_unlock_irqrestore(&ioctx->spinlock, flags);
722 return state;
723}
724
725/**
726 * srpt_set_cmd_state() - Set the state of a SCSI command.
727 *
728 * Does not modify the state of aborted commands. Returns the previous command
729 * state.
730 */
731static enum srpt_command_state srpt_set_cmd_state(struct srpt_send_ioctx *ioctx,
732 enum srpt_command_state new)
733{
734 enum srpt_command_state previous;
735 unsigned long flags;
736
737 BUG_ON(!ioctx);
738
739 spin_lock_irqsave(&ioctx->spinlock, flags);
740 previous = ioctx->state;
741 if (previous != SRPT_STATE_DONE)
742 ioctx->state = new;
743 spin_unlock_irqrestore(&ioctx->spinlock, flags);
744
745 return previous;
746}
747
748/**
749 * srpt_test_and_set_cmd_state() - Test and set the state of a command.
750 *
751 * Returns true if and only if the previous command state was equal to 'old'.
752 */
753static bool srpt_test_and_set_cmd_state(struct srpt_send_ioctx *ioctx,
754 enum srpt_command_state old,
755 enum srpt_command_state new)
756{
757 enum srpt_command_state previous;
758 unsigned long flags;
759
760 WARN_ON(!ioctx);
761 WARN_ON(old == SRPT_STATE_DONE);
762 WARN_ON(new == SRPT_STATE_NEW);
763
764 spin_lock_irqsave(&ioctx->spinlock, flags);
765 previous = ioctx->state;
766 if (previous == old)
767 ioctx->state = new;
768 spin_unlock_irqrestore(&ioctx->spinlock, flags);
769 return previous == old;
770}
771
772/**
773 * srpt_post_recv() - Post an IB receive request.
774 */
775static int srpt_post_recv(struct srpt_device *sdev,
776 struct srpt_recv_ioctx *ioctx)
777{
778 struct ib_sge list;
779 struct ib_recv_wr wr, *bad_wr;
780
781 BUG_ON(!sdev);
782 wr.wr_id = encode_wr_id(SRPT_RECV, ioctx->ioctx.index);
783
784 list.addr = ioctx->ioctx.dma;
785 list.length = srp_max_req_size;
786 list.lkey = sdev->mr->lkey;
787
788 wr.next = NULL;
789 wr.sg_list = &list;
790 wr.num_sge = 1;
791
792 return ib_post_srq_recv(sdev->srq, &wr, &bad_wr);
793}
794
795/**
796 * srpt_post_send() - Post an IB send request.
797 *
798 * Returns zero upon success and a non-zero value upon failure.
799 */
800static int srpt_post_send(struct srpt_rdma_ch *ch,
801 struct srpt_send_ioctx *ioctx, int len)
802{
803 struct ib_sge list;
804 struct ib_send_wr wr, *bad_wr;
805 struct srpt_device *sdev = ch->sport->sdev;
806 int ret;
807
808 atomic_inc(&ch->req_lim);
809
810 ret = -ENOMEM;
811 if (unlikely(atomic_dec_return(&ch->sq_wr_avail) < 0)) {
812 printk(KERN_WARNING "IB send queue full (needed 1)\n");
813 goto out;
814 }
815
816 ib_dma_sync_single_for_device(sdev->device, ioctx->ioctx.dma, len,
817 DMA_TO_DEVICE);
818
819 list.addr = ioctx->ioctx.dma;
820 list.length = len;
821 list.lkey = sdev->mr->lkey;
822
823 wr.next = NULL;
824 wr.wr_id = encode_wr_id(SRPT_SEND, ioctx->ioctx.index);
825 wr.sg_list = &list;
826 wr.num_sge = 1;
827 wr.opcode = IB_WR_SEND;
828 wr.send_flags = IB_SEND_SIGNALED;
829
830 ret = ib_post_send(ch->qp, &wr, &bad_wr);
831
832out:
833 if (ret < 0) {
834 atomic_inc(&ch->sq_wr_avail);
835 atomic_dec(&ch->req_lim);
836 }
837 return ret;
838}
839
840/**
841 * srpt_get_desc_tbl() - Parse the data descriptors of an SRP_CMD request.
842 * @ioctx: Pointer to the I/O context associated with the request.
843 * @srp_cmd: Pointer to the SRP_CMD request data.
844 * @dir: Pointer to the variable to which the transfer direction will be
845 * written.
846 * @data_len: Pointer to the variable to which the total data length of all
847 * descriptors in the SRP_CMD request will be written.
848 *
849 * This function initializes ioctx->nrbuf and ioctx->r_bufs.
850 *
851 * Returns -EINVAL when the SRP_CMD request contains inconsistent descriptors;
852 * -ENOMEM when memory allocation fails and zero upon success.
853 */
854static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
855 struct srp_cmd *srp_cmd,
856 enum dma_data_direction *dir, u64 *data_len)
857{
858 struct srp_indirect_buf *idb;
859 struct srp_direct_buf *db;
860 unsigned add_cdb_offset;
861 int ret;
862
863 /*
864 * The pointer computations below will only be compiled correctly
865 * if srp_cmd::add_data is declared as s8*, u8*, s8[] or u8[], so check
866 * whether srp_cmd::add_data has been declared as a byte pointer.
867 */
868 BUILD_BUG_ON(!__same_type(srp_cmd->add_data[0], (s8)0)
869 && !__same_type(srp_cmd->add_data[0], (u8)0));
870
871 BUG_ON(!dir);
872 BUG_ON(!data_len);
873
874 ret = 0;
875 *data_len = 0;
876
877 /*
878 * The lower four bits of the buffer format field contain the DATA-IN
879 * buffer descriptor format, and the highest four bits contain the
880 * DATA-OUT buffer descriptor format.
881 */
882 *dir = DMA_NONE;
883 if (srp_cmd->buf_fmt & 0xf)
884 /* DATA-IN: transfer data from target to initiator (read). */
885 *dir = DMA_FROM_DEVICE;
886 else if (srp_cmd->buf_fmt >> 4)
887 /* DATA-OUT: transfer data from initiator to target (write). */
888 *dir = DMA_TO_DEVICE;
889
890 /*
891 * According to the SRP spec, the lower two bits of the 'ADDITIONAL
892 * CDB LENGTH' field are reserved and the size in bytes of this field
893 * is four times the value specified in bits 3..7. Hence the "& ~3".
894 */
895 add_cdb_offset = srp_cmd->add_cdb_len & ~3;
896 if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) ||
897 ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) {
898 ioctx->n_rbuf = 1;
899 ioctx->rbufs = &ioctx->single_rbuf;
900
901 db = (struct srp_direct_buf *)(srp_cmd->add_data
902 + add_cdb_offset);
903 memcpy(ioctx->rbufs, db, sizeof *db);
904 *data_len = be32_to_cpu(db->len);
905 } else if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_INDIRECT) ||
906 ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_INDIRECT)) {
907 idb = (struct srp_indirect_buf *)(srp_cmd->add_data
908 + add_cdb_offset);
909
910 ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db;
911
912 if (ioctx->n_rbuf >
913 (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
914 printk(KERN_ERR "received unsupported SRP_CMD request"
915 " type (%u out + %u in != %u / %zu)\n",
916 srp_cmd->data_out_desc_cnt,
917 srp_cmd->data_in_desc_cnt,
918 be32_to_cpu(idb->table_desc.len),
919 sizeof(*db));
920 ioctx->n_rbuf = 0;
921 ret = -EINVAL;
922 goto out;
923 }
924
925 if (ioctx->n_rbuf == 1)
926 ioctx->rbufs = &ioctx->single_rbuf;
927 else {
928 ioctx->rbufs =
929 kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC);
930 if (!ioctx->rbufs) {
931 ioctx->n_rbuf = 0;
932 ret = -ENOMEM;
933 goto out;
934 }
935 }
936
937 db = idb->desc_list;
938 memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db);
939 *data_len = be32_to_cpu(idb->len);
940 }
941out:
942 return ret;
943}
944
945/**
946 * srpt_init_ch_qp() - Initialize queue pair attributes.
947 *
948 * Initialized the attributes of queue pair 'qp' by allowing local write,
949 * remote read and remote write. Also transitions 'qp' to state IB_QPS_INIT.
950 */
951static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
952{
953 struct ib_qp_attr *attr;
954 int ret;
955
956 attr = kzalloc(sizeof *attr, GFP_KERNEL);
957 if (!attr)
958 return -ENOMEM;
959
960 attr->qp_state = IB_QPS_INIT;
961 attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
962 IB_ACCESS_REMOTE_WRITE;
963 attr->port_num = ch->sport->port;
964 attr->pkey_index = 0;
965
966 ret = ib_modify_qp(qp, attr,
967 IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PORT |
968 IB_QP_PKEY_INDEX);
969
970 kfree(attr);
971 return ret;
972}
973
974/**
975 * srpt_ch_qp_rtr() - Change the state of a channel to 'ready to receive' (RTR).
976 * @ch: channel of the queue pair.
977 * @qp: queue pair to change the state of.
978 *
979 * Returns zero upon success and a negative value upon failure.
980 *
981 * Note: currently a struct ib_qp_attr takes 136 bytes on a 64-bit system.
982 * If this structure ever becomes larger, it might be necessary to allocate
983 * it dynamically instead of on the stack.
984 */
985static int srpt_ch_qp_rtr(struct srpt_rdma_ch *ch, struct ib_qp *qp)
986{
987 struct ib_qp_attr qp_attr;
988 int attr_mask;
989 int ret;
990
991 qp_attr.qp_state = IB_QPS_RTR;
992 ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask);
993 if (ret)
994 goto out;
995
996 qp_attr.max_dest_rd_atomic = 4;
997
998 ret = ib_modify_qp(qp, &qp_attr, attr_mask);
999
1000out:
1001 return ret;
1002}
1003
1004/**
1005 * srpt_ch_qp_rts() - Change the state of a channel to 'ready to send' (RTS).
1006 * @ch: channel of the queue pair.
1007 * @qp: queue pair to change the state of.
1008 *
1009 * Returns zero upon success and a negative value upon failure.
1010 *
1011 * Note: currently a struct ib_qp_attr takes 136 bytes on a 64-bit system.
1012 * If this structure ever becomes larger, it might be necessary to allocate
1013 * it dynamically instead of on the stack.
1014 */
1015static int srpt_ch_qp_rts(struct srpt_rdma_ch *ch, struct ib_qp *qp)
1016{
1017 struct ib_qp_attr qp_attr;
1018 int attr_mask;
1019 int ret;
1020
1021 qp_attr.qp_state = IB_QPS_RTS;
1022 ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask);
1023 if (ret)
1024 goto out;
1025
1026 qp_attr.max_rd_atomic = 4;
1027
1028 ret = ib_modify_qp(qp, &qp_attr, attr_mask);
1029
1030out:
1031 return ret;
1032}
1033
1034/**
1035 * srpt_ch_qp_err() - Set the channel queue pair state to 'error'.
1036 */
1037static int srpt_ch_qp_err(struct srpt_rdma_ch *ch)
1038{
1039 struct ib_qp_attr qp_attr;
1040
1041 qp_attr.qp_state = IB_QPS_ERR;
1042 return ib_modify_qp(ch->qp, &qp_attr, IB_QP_STATE);
1043}
1044
1045/**
1046 * srpt_unmap_sg_to_ib_sge() - Unmap an IB SGE list.
1047 */
1048static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1049 struct srpt_send_ioctx *ioctx)
1050{
1051 struct scatterlist *sg;
1052 enum dma_data_direction dir;
1053
1054 BUG_ON(!ch);
1055 BUG_ON(!ioctx);
1056 BUG_ON(ioctx->n_rdma && !ioctx->rdma_ius);
1057
1058 while (ioctx->n_rdma)
1059 kfree(ioctx->rdma_ius[--ioctx->n_rdma].sge);
1060
1061 kfree(ioctx->rdma_ius);
1062 ioctx->rdma_ius = NULL;
1063
1064 if (ioctx->mapped_sg_count) {
1065 sg = ioctx->sg;
1066 WARN_ON(!sg);
1067 dir = ioctx->cmd.data_direction;
1068 BUG_ON(dir == DMA_NONE);
1069 ib_dma_unmap_sg(ch->sport->sdev->device, sg, ioctx->sg_cnt,
1070 opposite_dma_dir(dir));
1071 ioctx->mapped_sg_count = 0;
1072 }
1073}
1074
1075/**
1076 * srpt_map_sg_to_ib_sge() - Map an SG list to an IB SGE list.
1077 */
1078static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1079 struct srpt_send_ioctx *ioctx)
1080{
1081 struct se_cmd *cmd;
1082 struct scatterlist *sg, *sg_orig;
1083 int sg_cnt;
1084 enum dma_data_direction dir;
1085 struct rdma_iu *riu;
1086 struct srp_direct_buf *db;
1087 dma_addr_t dma_addr;
1088 struct ib_sge *sge;
1089 u64 raddr;
1090 u32 rsize;
1091 u32 tsize;
1092 u32 dma_len;
1093 int count, nrdma;
1094 int i, j, k;
1095
1096 BUG_ON(!ch);
1097 BUG_ON(!ioctx);
1098 cmd = &ioctx->cmd;
1099 dir = cmd->data_direction;
1100 BUG_ON(dir == DMA_NONE);
1101
1102 ioctx->sg = sg = sg_orig = cmd->t_data_sg;
1103 ioctx->sg_cnt = sg_cnt = cmd->t_data_nents;
1104
1105 count = ib_dma_map_sg(ch->sport->sdev->device, sg, sg_cnt,
1106 opposite_dma_dir(dir));
1107 if (unlikely(!count))
1108 return -EAGAIN;
1109
1110 ioctx->mapped_sg_count = count;
1111
1112 if (ioctx->rdma_ius && ioctx->n_rdma_ius)
1113 nrdma = ioctx->n_rdma_ius;
1114 else {
1115 nrdma = (count + SRPT_DEF_SG_PER_WQE - 1) / SRPT_DEF_SG_PER_WQE
1116 + ioctx->n_rbuf;
1117
1118 ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu, GFP_KERNEL);
1119 if (!ioctx->rdma_ius)
1120 goto free_mem;
1121
1122 ioctx->n_rdma_ius = nrdma;
1123 }
1124
1125 db = ioctx->rbufs;
1126 tsize = cmd->data_length;
1127 dma_len = sg_dma_len(&sg[0]);
1128 riu = ioctx->rdma_ius;
1129
1130 /*
1131 * For each remote desc - calculate the #ib_sge.
1132 * If #ib_sge < SRPT_DEF_SG_PER_WQE per rdma operation then
1133 * each remote desc rdma_iu is required a rdma wr;
1134 * else
1135 * we need to allocate extra rdma_iu to carry extra #ib_sge in
1136 * another rdma wr
1137 */
1138 for (i = 0, j = 0;
1139 j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1140 rsize = be32_to_cpu(db->len);
1141 raddr = be64_to_cpu(db->va);
1142 riu->raddr = raddr;
1143 riu->rkey = be32_to_cpu(db->key);
1144 riu->sge_cnt = 0;
1145
1146 /* calculate how many sge required for this remote_buf */
1147 while (rsize > 0 && tsize > 0) {
1148
1149 if (rsize >= dma_len) {
1150 tsize -= dma_len;
1151 rsize -= dma_len;
1152 raddr += dma_len;
1153
1154 if (tsize > 0) {
1155 ++j;
1156 if (j < count) {
1157 sg = sg_next(sg);
1158 dma_len = sg_dma_len(sg);
1159 }
1160 }
1161 } else {
1162 tsize -= rsize;
1163 dma_len -= rsize;
1164 rsize = 0;
1165 }
1166
1167 ++riu->sge_cnt;
1168
1169 if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) {
1170 ++ioctx->n_rdma;
1171 riu->sge =
1172 kmalloc(riu->sge_cnt * sizeof *riu->sge,
1173 GFP_KERNEL);
1174 if (!riu->sge)
1175 goto free_mem;
1176
1177 ++riu;
1178 riu->sge_cnt = 0;
1179 riu->raddr = raddr;
1180 riu->rkey = be32_to_cpu(db->key);
1181 }
1182 }
1183
1184 ++ioctx->n_rdma;
1185 riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge,
1186 GFP_KERNEL);
1187 if (!riu->sge)
1188 goto free_mem;
1189 }
1190
1191 db = ioctx->rbufs;
1192 tsize = cmd->data_length;
1193 riu = ioctx->rdma_ius;
1194 sg = sg_orig;
1195 dma_len = sg_dma_len(&sg[0]);
1196 dma_addr = sg_dma_address(&sg[0]);
1197
1198 /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */
1199 for (i = 0, j = 0;
1200 j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1201 rsize = be32_to_cpu(db->len);
1202 sge = riu->sge;
1203 k = 0;
1204
1205 while (rsize > 0 && tsize > 0) {
1206 sge->addr = dma_addr;
1207 sge->lkey = ch->sport->sdev->mr->lkey;
1208
1209 if (rsize >= dma_len) {
1210 sge->length =
1211 (tsize < dma_len) ? tsize : dma_len;
1212 tsize -= dma_len;
1213 rsize -= dma_len;
1214
1215 if (tsize > 0) {
1216 ++j;
1217 if (j < count) {
1218 sg = sg_next(sg);
1219 dma_len = sg_dma_len(sg);
1220 dma_addr = sg_dma_address(sg);
1221 }
1222 }
1223 } else {
1224 sge->length = (tsize < rsize) ? tsize : rsize;
1225 tsize -= rsize;
1226 dma_len -= rsize;
1227 dma_addr += rsize;
1228 rsize = 0;
1229 }
1230
1231 ++k;
1232 if (k == riu->sge_cnt && rsize > 0 && tsize > 0) {
1233 ++riu;
1234 sge = riu->sge;
1235 k = 0;
1236 } else if (rsize > 0 && tsize > 0)
1237 ++sge;
1238 }
1239 }
1240
1241 return 0;
1242
1243free_mem:
1244 srpt_unmap_sg_to_ib_sge(ch, ioctx);
1245
1246 return -ENOMEM;
1247}
1248
1249/**
1250 * srpt_get_send_ioctx() - Obtain an I/O context for sending to the initiator.
1251 */
1252static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch)
1253{
1254 struct srpt_send_ioctx *ioctx;
1255 unsigned long flags;
1256
1257 BUG_ON(!ch);
1258
1259 ioctx = NULL;
1260 spin_lock_irqsave(&ch->spinlock, flags);
1261 if (!list_empty(&ch->free_list)) {
1262 ioctx = list_first_entry(&ch->free_list,
1263 struct srpt_send_ioctx, free_list);
1264 list_del(&ioctx->free_list);
1265 }
1266 spin_unlock_irqrestore(&ch->spinlock, flags);
1267
1268 if (!ioctx)
1269 return ioctx;
1270
1271 BUG_ON(ioctx->ch != ch);
1272 spin_lock_init(&ioctx->spinlock);
1273 ioctx->state = SRPT_STATE_NEW;
1274 ioctx->n_rbuf = 0;
1275 ioctx->rbufs = NULL;
1276 ioctx->n_rdma = 0;
1277 ioctx->n_rdma_ius = 0;
1278 ioctx->rdma_ius = NULL;
1279 ioctx->mapped_sg_count = 0;
1280 init_completion(&ioctx->tx_done);
1281 ioctx->queue_status_only = false;
1282 /*
1283 * transport_init_se_cmd() does not initialize all fields, so do it
1284 * here.
1285 */
1286 memset(&ioctx->cmd, 0, sizeof(ioctx->cmd));
1287 memset(&ioctx->sense_data, 0, sizeof(ioctx->sense_data));
1288
1289 return ioctx;
1290}
1291
1292/**
1293 * srpt_abort_cmd() - Abort a SCSI command.
1294 * @ioctx: I/O context associated with the SCSI command.
1295 * @context: Preferred execution context.
1296 */
1297static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
1298{
1299 enum srpt_command_state state;
1300 unsigned long flags;
1301
1302 BUG_ON(!ioctx);
1303
1304 /*
1305 * If the command is in a state where the target core is waiting for
1306 * the ib_srpt driver, change the state to the next state. Changing
1307 * the state of the command from SRPT_STATE_NEED_DATA to
1308 * SRPT_STATE_DATA_IN ensures that srpt_xmit_response() will call this
1309 * function a second time.
1310 */
1311
1312 spin_lock_irqsave(&ioctx->spinlock, flags);
1313 state = ioctx->state;
1314 switch (state) {
1315 case SRPT_STATE_NEED_DATA:
1316 ioctx->state = SRPT_STATE_DATA_IN;
1317 break;
1318 case SRPT_STATE_DATA_IN:
1319 case SRPT_STATE_CMD_RSP_SENT:
1320 case SRPT_STATE_MGMT_RSP_SENT:
1321 ioctx->state = SRPT_STATE_DONE;
1322 break;
1323 default:
1324 break;
1325 }
1326 spin_unlock_irqrestore(&ioctx->spinlock, flags);
1327
1328 if (state == SRPT_STATE_DONE) {
1329 struct srpt_rdma_ch *ch = ioctx->ch;
1330
1331 BUG_ON(ch->sess == NULL);
1332
1333 target_put_sess_cmd(ch->sess, &ioctx->cmd);
1334 goto out;
1335 }
1336
1337 pr_debug("Aborting cmd with state %d and tag %lld\n", state,
1338 ioctx->tag);
1339
1340 switch (state) {
1341 case SRPT_STATE_NEW:
1342 case SRPT_STATE_DATA_IN:
1343 case SRPT_STATE_MGMT:
1344 /*
1345 * Do nothing - defer abort processing until
1346 * srpt_queue_response() is invoked.
1347 */
1348 WARN_ON(!transport_check_aborted_status(&ioctx->cmd, false));
1349 break;
1350 case SRPT_STATE_NEED_DATA:
1351 /* DMA_TO_DEVICE (write) - RDMA read error. */
1352
1353 /* XXX(hch): this is a horrible layering violation.. */
1354 spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags);
1355 ioctx->cmd.transport_state |= CMD_T_LUN_STOP;
1356 ioctx->cmd.transport_state &= ~CMD_T_ACTIVE;
1357 spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags);
1358
1359 complete(&ioctx->cmd.transport_lun_stop_comp);
1360 break;
1361 case SRPT_STATE_CMD_RSP_SENT:
1362 /*
1363 * SRP_RSP sending failed or the SRP_RSP send completion has
1364 * not been received in time.
1365 */
1366 srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx);
1367 spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags);
1368 ioctx->cmd.transport_state |= CMD_T_LUN_STOP;
1369 spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags);
1370 target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd);
1371 break;
1372 case SRPT_STATE_MGMT_RSP_SENT:
1373 srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
1374 target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd);
1375 break;
1376 default:
1377 WARN_ON("ERROR: unexpected command state");
1378 break;
1379 }
1380
1381out:
1382 return state;
1383}
1384
1385/**
1386 * srpt_handle_send_err_comp() - Process an IB_WC_SEND error completion.
1387 */
1388static void srpt_handle_send_err_comp(struct srpt_rdma_ch *ch, u64 wr_id)
1389{
1390 struct srpt_send_ioctx *ioctx;
1391 enum srpt_command_state state;
1392 struct se_cmd *cmd;
1393 u32 index;
1394
1395 atomic_inc(&ch->sq_wr_avail);
1396
1397 index = idx_from_wr_id(wr_id);
1398 ioctx = ch->ioctx_ring[index];
1399 state = srpt_get_cmd_state(ioctx);
1400 cmd = &ioctx->cmd;
1401
1402 WARN_ON(state != SRPT_STATE_CMD_RSP_SENT
1403 && state != SRPT_STATE_MGMT_RSP_SENT
1404 && state != SRPT_STATE_NEED_DATA
1405 && state != SRPT_STATE_DONE);
1406
1407 /* If SRP_RSP sending failed, undo the ch->req_lim change. */
1408 if (state == SRPT_STATE_CMD_RSP_SENT
1409 || state == SRPT_STATE_MGMT_RSP_SENT)
1410 atomic_dec(&ch->req_lim);
1411
1412 srpt_abort_cmd(ioctx);
1413}
1414
1415/**
1416 * srpt_handle_send_comp() - Process an IB send completion notification.
1417 */
1418static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
1419 struct srpt_send_ioctx *ioctx)
1420{
1421 enum srpt_command_state state;
1422
1423 atomic_inc(&ch->sq_wr_avail);
1424
1425 state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
1426
1427 if (WARN_ON(state != SRPT_STATE_CMD_RSP_SENT
1428 && state != SRPT_STATE_MGMT_RSP_SENT
1429 && state != SRPT_STATE_DONE))
1430 pr_debug("state = %d\n", state);
1431
1432 if (state != SRPT_STATE_DONE) {
1433 srpt_unmap_sg_to_ib_sge(ch, ioctx);
1434 transport_generic_free_cmd(&ioctx->cmd, 0);
1435 } else {
1436 printk(KERN_ERR "IB completion has been received too late for"
1437 " wr_id = %u.\n", ioctx->ioctx.index);
1438 }
1439}
1440
1441/**
1442 * srpt_handle_rdma_comp() - Process an IB RDMA completion notification.
1443 *
1444 * XXX: what is now target_execute_cmd used to be asynchronous, and unmapping
1445 * the data that has been transferred via IB RDMA had to be postponed until the
1446 * check_stop_free() callback. None of this is necessary anymore and needs to
1447 * be cleaned up.
1448 */
1449static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
1450 struct srpt_send_ioctx *ioctx,
1451 enum srpt_opcode opcode)
1452{
1453 WARN_ON(ioctx->n_rdma <= 0);
1454 atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
1455
1456 if (opcode == SRPT_RDMA_READ_LAST) {
1457 if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
1458 SRPT_STATE_DATA_IN))
1459 target_execute_cmd(&ioctx->cmd);
1460 else
1461 printk(KERN_ERR "%s[%d]: wrong state = %d\n", __func__,
1462 __LINE__, srpt_get_cmd_state(ioctx));
1463 } else if (opcode == SRPT_RDMA_ABORT) {
1464 ioctx->rdma_aborted = true;
1465 } else {
1466 WARN(true, "unexpected opcode %d\n", opcode);
1467 }
1468}
1469
1470/**
1471 * srpt_handle_rdma_err_comp() - Process an IB RDMA error completion.
1472 */
1473static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
1474 struct srpt_send_ioctx *ioctx,
1475 enum srpt_opcode opcode)
1476{
1477 struct se_cmd *cmd;
1478 enum srpt_command_state state;
1479 unsigned long flags;
1480
1481 cmd = &ioctx->cmd;
1482 state = srpt_get_cmd_state(ioctx);
1483 switch (opcode) {
1484 case SRPT_RDMA_READ_LAST:
1485 if (ioctx->n_rdma <= 0) {
1486 printk(KERN_ERR "Received invalid RDMA read"
1487 " error completion with idx %d\n",
1488 ioctx->ioctx.index);
1489 break;
1490 }
1491 atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
1492 if (state == SRPT_STATE_NEED_DATA)
1493 srpt_abort_cmd(ioctx);
1494 else
1495 printk(KERN_ERR "%s[%d]: wrong state = %d\n",
1496 __func__, __LINE__, state);
1497 break;
1498 case SRPT_RDMA_WRITE_LAST:
1499 spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags);
1500 ioctx->cmd.transport_state |= CMD_T_LUN_STOP;
1501 spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags);
1502 break;
1503 default:
1504 printk(KERN_ERR "%s[%d]: opcode = %u\n", __func__,
1505 __LINE__, opcode);
1506 break;
1507 }
1508}
1509
1510/**
1511 * srpt_build_cmd_rsp() - Build an SRP_RSP response.
1512 * @ch: RDMA channel through which the request has been received.
1513 * @ioctx: I/O context associated with the SRP_CMD request. The response will
1514 * be built in the buffer ioctx->buf points at and hence this function will
1515 * overwrite the request data.
1516 * @tag: tag of the request for which this response is being generated.
1517 * @status: value for the STATUS field of the SRP_RSP information unit.
1518 *
1519 * Returns the size in bytes of the SRP_RSP response.
1520 *
1521 * An SRP_RSP response contains a SCSI status or service response. See also
1522 * section 6.9 in the SRP r16a document for the format of an SRP_RSP
1523 * response. See also SPC-2 for more information about sense data.
1524 */
1525static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
1526 struct srpt_send_ioctx *ioctx, u64 tag,
1527 int status)
1528{
1529 struct srp_rsp *srp_rsp;
1530 const u8 *sense_data;
1531 int sense_data_len, max_sense_len;
1532
1533 /*
1534 * The lowest bit of all SAM-3 status codes is zero (see also
1535 * paragraph 5.3 in SAM-3).
1536 */
1537 WARN_ON(status & 1);
1538
1539 srp_rsp = ioctx->ioctx.buf;
1540 BUG_ON(!srp_rsp);
1541
1542 sense_data = ioctx->sense_data;
1543 sense_data_len = ioctx->cmd.scsi_sense_length;
1544 WARN_ON(sense_data_len > sizeof(ioctx->sense_data));
1545
1546 memset(srp_rsp, 0, sizeof *srp_rsp);
1547 srp_rsp->opcode = SRP_RSP;
1548 srp_rsp->req_lim_delta =
1549 __constant_cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0));
1550 srp_rsp->tag = tag;
1551 srp_rsp->status = status;
1552
1553 if (sense_data_len) {
1554 BUILD_BUG_ON(MIN_MAX_RSP_SIZE <= sizeof(*srp_rsp));
1555 max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp);
1556 if (sense_data_len > max_sense_len) {
1557 printk(KERN_WARNING "truncated sense data from %d to %d"
1558 " bytes\n", sense_data_len, max_sense_len);
1559 sense_data_len = max_sense_len;
1560 }
1561
1562 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
1563 srp_rsp->sense_data_len = cpu_to_be32(sense_data_len);
1564 memcpy(srp_rsp + 1, sense_data, sense_data_len);
1565 }
1566
1567 return sizeof(*srp_rsp) + sense_data_len;
1568}
1569
1570/**
1571 * srpt_build_tskmgmt_rsp() - Build a task management response.
1572 * @ch: RDMA channel through which the request has been received.
1573 * @ioctx: I/O context in which the SRP_RSP response will be built.
1574 * @rsp_code: RSP_CODE that will be stored in the response.
1575 * @tag: Tag of the request for which this response is being generated.
1576 *
1577 * Returns the size in bytes of the SRP_RSP response.
1578 *
1579 * An SRP_RSP response contains a SCSI status or service response. See also
1580 * section 6.9 in the SRP r16a document for the format of an SRP_RSP
1581 * response.
1582 */
1583static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
1584 struct srpt_send_ioctx *ioctx,
1585 u8 rsp_code, u64 tag)
1586{
1587 struct srp_rsp *srp_rsp;
1588 int resp_data_len;
1589 int resp_len;
1590
1591 resp_data_len = (rsp_code == SRP_TSK_MGMT_SUCCESS) ? 0 : 4;
1592 resp_len = sizeof(*srp_rsp) + resp_data_len;
1593
1594 srp_rsp = ioctx->ioctx.buf;
1595 BUG_ON(!srp_rsp);
1596 memset(srp_rsp, 0, sizeof *srp_rsp);
1597
1598 srp_rsp->opcode = SRP_RSP;
1599 srp_rsp->req_lim_delta = __constant_cpu_to_be32(1
1600 + atomic_xchg(&ch->req_lim_delta, 0));
1601 srp_rsp->tag = tag;
1602
1603 if (rsp_code != SRP_TSK_MGMT_SUCCESS) {
1604 srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID;
1605 srp_rsp->resp_data_len = cpu_to_be32(resp_data_len);
1606 srp_rsp->data[3] = rsp_code;
1607 }
1608
1609 return resp_len;
1610}
1611
1612#define NO_SUCH_LUN ((uint64_t)-1LL)
1613
1614/*
1615 * SCSI LUN addressing method. See also SAM-2 and the section about
1616 * eight byte LUNs.
1617 */
1618enum scsi_lun_addr_method {
1619 SCSI_LUN_ADDR_METHOD_PERIPHERAL = 0,
1620 SCSI_LUN_ADDR_METHOD_FLAT = 1,
1621 SCSI_LUN_ADDR_METHOD_LUN = 2,
1622 SCSI_LUN_ADDR_METHOD_EXTENDED_LUN = 3,
1623};
1624
1625/*
1626 * srpt_unpack_lun() - Convert from network LUN to linear LUN.
1627 *
1628 * Convert an 2-byte, 4-byte, 6-byte or 8-byte LUN structure in network byte
1629 * order (big endian) to a linear LUN. Supports three LUN addressing methods:
1630 * peripheral, flat and logical unit. See also SAM-2, section 4.9.4 (page 40).
1631 */
1632static uint64_t srpt_unpack_lun(const uint8_t *lun, int len)
1633{
1634 uint64_t res = NO_SUCH_LUN;
1635 int addressing_method;
1636
1637 if (unlikely(len < 2)) {
1638 printk(KERN_ERR "Illegal LUN length %d, expected 2 bytes or "
1639 "more", len);
1640 goto out;
1641 }
1642
1643 switch (len) {
1644 case 8:
1645 if ((*((__be64 *)lun) &
1646 __constant_cpu_to_be64(0x0000FFFFFFFFFFFFLL)) != 0)
1647 goto out_err;
1648 break;
1649 case 4:
1650 if (*((__be16 *)&lun[2]) != 0)
1651 goto out_err;
1652 break;
1653 case 6:
1654 if (*((__be32 *)&lun[2]) != 0)
1655 goto out_err;
1656 break;
1657 case 2:
1658 break;
1659 default:
1660 goto out_err;
1661 }
1662
1663 addressing_method = (*lun) >> 6; /* highest two bits of byte 0 */
1664 switch (addressing_method) {
1665 case SCSI_LUN_ADDR_METHOD_PERIPHERAL:
1666 case SCSI_LUN_ADDR_METHOD_FLAT:
1667 case SCSI_LUN_ADDR_METHOD_LUN:
1668 res = *(lun + 1) | (((*lun) & 0x3f) << 8);
1669 break;
1670
1671 case SCSI_LUN_ADDR_METHOD_EXTENDED_LUN:
1672 default:
1673 printk(KERN_ERR "Unimplemented LUN addressing method %u",
1674 addressing_method);
1675 break;
1676 }
1677
1678out:
1679 return res;
1680
1681out_err:
1682 printk(KERN_ERR "Support for multi-level LUNs has not yet been"
1683 " implemented");
1684 goto out;
1685}
1686
1687static int srpt_check_stop_free(struct se_cmd *cmd)
1688{
1689 struct srpt_send_ioctx *ioctx = container_of(cmd,
1690 struct srpt_send_ioctx, cmd);
1691
1692 return target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd);
1693}
1694
1695/**
1696 * srpt_handle_cmd() - Process SRP_CMD.
1697 */
1698static int srpt_handle_cmd(struct srpt_rdma_ch *ch,
1699 struct srpt_recv_ioctx *recv_ioctx,
1700 struct srpt_send_ioctx *send_ioctx)
1701{
1702 struct se_cmd *cmd;
1703 struct srp_cmd *srp_cmd;
1704 uint64_t unpacked_lun;
1705 u64 data_len;
1706 enum dma_data_direction dir;
1707 sense_reason_t ret;
1708 int rc;
1709
1710 BUG_ON(!send_ioctx);
1711
1712 srp_cmd = recv_ioctx->ioctx.buf;
1713 cmd = &send_ioctx->cmd;
1714 send_ioctx->tag = srp_cmd->tag;
1715
1716 switch (srp_cmd->task_attr) {
1717 case SRP_CMD_SIMPLE_Q:
1718 cmd->sam_task_attr = MSG_SIMPLE_TAG;
1719 break;
1720 case SRP_CMD_ORDERED_Q:
1721 default:
1722 cmd->sam_task_attr = MSG_ORDERED_TAG;
1723 break;
1724 case SRP_CMD_HEAD_OF_Q:
1725 cmd->sam_task_attr = MSG_HEAD_TAG;
1726 break;
1727 case SRP_CMD_ACA:
1728 cmd->sam_task_attr = MSG_ACA_TAG;
1729 break;
1730 }
1731
1732 if (srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &data_len)) {
1733 printk(KERN_ERR "0x%llx: parsing SRP descriptor table failed.\n",
1734 srp_cmd->tag);
1735 ret = TCM_INVALID_CDB_FIELD;
1736 goto send_sense;
1737 }
1738
1739 unpacked_lun = srpt_unpack_lun((uint8_t *)&srp_cmd->lun,
1740 sizeof(srp_cmd->lun));
1741 rc = target_submit_cmd(cmd, ch->sess, srp_cmd->cdb,
1742 &send_ioctx->sense_data[0], unpacked_lun, data_len,
1743 MSG_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF);
1744 if (rc != 0) {
1745 ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
1746 goto send_sense;
1747 }
1748 return 0;
1749
1750send_sense:
1751 transport_send_check_condition_and_sense(cmd, ret, 0);
1752 return -1;
1753}
1754
1755/**
1756 * srpt_rx_mgmt_fn_tag() - Process a task management function by tag.
1757 * @ch: RDMA channel of the task management request.
1758 * @fn: Task management function to perform.
1759 * @req_tag: Tag of the SRP task management request.
1760 * @mgmt_ioctx: I/O context of the task management request.
1761 *
1762 * Returns zero if the target core will process the task management
1763 * request asynchronously.
1764 *
1765 * Note: It is assumed that the initiator serializes tag-based task management
1766 * requests.
1767 */
1768static int srpt_rx_mgmt_fn_tag(struct srpt_send_ioctx *ioctx, u64 tag)
1769{
1770 struct srpt_device *sdev;
1771 struct srpt_rdma_ch *ch;
1772 struct srpt_send_ioctx *target;
1773 int ret, i;
1774
1775 ret = -EINVAL;
1776 ch = ioctx->ch;
1777 BUG_ON(!ch);
1778 BUG_ON(!ch->sport);
1779 sdev = ch->sport->sdev;
1780 BUG_ON(!sdev);
1781 spin_lock_irq(&sdev->spinlock);
1782 for (i = 0; i < ch->rq_size; ++i) {
1783 target = ch->ioctx_ring[i];
1784 if (target->cmd.se_lun == ioctx->cmd.se_lun &&
1785 target->tag == tag &&
1786 srpt_get_cmd_state(target) != SRPT_STATE_DONE) {
1787 ret = 0;
1788 /* now let the target core abort &target->cmd; */
1789 break;
1790 }
1791 }
1792 spin_unlock_irq(&sdev->spinlock);
1793 return ret;
1794}
1795
1796static int srp_tmr_to_tcm(int fn)
1797{
1798 switch (fn) {
1799 case SRP_TSK_ABORT_TASK:
1800 return TMR_ABORT_TASK;
1801 case SRP_TSK_ABORT_TASK_SET:
1802 return TMR_ABORT_TASK_SET;
1803 case SRP_TSK_CLEAR_TASK_SET:
1804 return TMR_CLEAR_TASK_SET;
1805 case SRP_TSK_LUN_RESET:
1806 return TMR_LUN_RESET;
1807 case SRP_TSK_CLEAR_ACA:
1808 return TMR_CLEAR_ACA;
1809 default:
1810 return -1;
1811 }
1812}
1813
1814/**
1815 * srpt_handle_tsk_mgmt() - Process an SRP_TSK_MGMT information unit.
1816 *
1817 * Returns 0 if and only if the request will be processed by the target core.
1818 *
1819 * For more information about SRP_TSK_MGMT information units, see also section
1820 * 6.7 in the SRP r16a document.
1821 */
1822static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
1823 struct srpt_recv_ioctx *recv_ioctx,
1824 struct srpt_send_ioctx *send_ioctx)
1825{
1826 struct srp_tsk_mgmt *srp_tsk;
1827 struct se_cmd *cmd;
1828 struct se_session *sess = ch->sess;
1829 uint64_t unpacked_lun;
1830 uint32_t tag = 0;
1831 int tcm_tmr;
1832 int rc;
1833
1834 BUG_ON(!send_ioctx);
1835
1836 srp_tsk = recv_ioctx->ioctx.buf;
1837 cmd = &send_ioctx->cmd;
1838
1839 pr_debug("recv tsk_mgmt fn %d for task_tag %lld and cmd tag %lld"
1840 " cm_id %p sess %p\n", srp_tsk->tsk_mgmt_func,
1841 srp_tsk->task_tag, srp_tsk->tag, ch->cm_id, ch->sess);
1842
1843 srpt_set_cmd_state(send_ioctx, SRPT_STATE_MGMT);
1844 send_ioctx->tag = srp_tsk->tag;
1845 tcm_tmr = srp_tmr_to_tcm(srp_tsk->tsk_mgmt_func);
1846 if (tcm_tmr < 0) {
1847 send_ioctx->cmd.se_tmr_req->response =
1848 TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED;
1849 goto fail;
1850 }
1851 unpacked_lun = srpt_unpack_lun((uint8_t *)&srp_tsk->lun,
1852 sizeof(srp_tsk->lun));
1853
1854 if (srp_tsk->tsk_mgmt_func == SRP_TSK_ABORT_TASK) {
1855 rc = srpt_rx_mgmt_fn_tag(send_ioctx, srp_tsk->task_tag);
1856 if (rc < 0) {
1857 send_ioctx->cmd.se_tmr_req->response =
1858 TMR_TASK_DOES_NOT_EXIST;
1859 goto fail;
1860 }
1861 tag = srp_tsk->task_tag;
1862 }
1863 rc = target_submit_tmr(&send_ioctx->cmd, sess, NULL, unpacked_lun,
1864 srp_tsk, tcm_tmr, GFP_KERNEL, tag,
1865 TARGET_SCF_ACK_KREF);
1866 if (rc != 0) {
1867 send_ioctx->cmd.se_tmr_req->response = TMR_FUNCTION_REJECTED;
1868 goto fail;
1869 }
1870 return;
1871fail:
1872 transport_send_check_condition_and_sense(cmd, 0, 0); // XXX:
1873}
1874
1875/**
1876 * srpt_handle_new_iu() - Process a newly received information unit.
1877 * @ch: RDMA channel through which the information unit has been received.
1878 * @ioctx: SRPT I/O context associated with the information unit.
1879 */
1880static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
1881 struct srpt_recv_ioctx *recv_ioctx,
1882 struct srpt_send_ioctx *send_ioctx)
1883{
1884 struct srp_cmd *srp_cmd;
1885 enum rdma_ch_state ch_state;
1886
1887 BUG_ON(!ch);
1888 BUG_ON(!recv_ioctx);
1889
1890 ib_dma_sync_single_for_cpu(ch->sport->sdev->device,
1891 recv_ioctx->ioctx.dma, srp_max_req_size,
1892 DMA_FROM_DEVICE);
1893
1894 ch_state = srpt_get_ch_state(ch);
1895 if (unlikely(ch_state == CH_CONNECTING)) {
1896 list_add_tail(&recv_ioctx->wait_list, &ch->cmd_wait_list);
1897 goto out;
1898 }
1899
1900 if (unlikely(ch_state != CH_LIVE))
1901 goto out;
1902
1903 srp_cmd = recv_ioctx->ioctx.buf;
1904 if (srp_cmd->opcode == SRP_CMD || srp_cmd->opcode == SRP_TSK_MGMT) {
1905 if (!send_ioctx)
1906 send_ioctx = srpt_get_send_ioctx(ch);
1907 if (unlikely(!send_ioctx)) {
1908 list_add_tail(&recv_ioctx->wait_list,
1909 &ch->cmd_wait_list);
1910 goto out;
1911 }
1912 }
1913
1914 switch (srp_cmd->opcode) {
1915 case SRP_CMD:
1916 srpt_handle_cmd(ch, recv_ioctx, send_ioctx);
1917 break;
1918 case SRP_TSK_MGMT:
1919 srpt_handle_tsk_mgmt(ch, recv_ioctx, send_ioctx);
1920 break;
1921 case SRP_I_LOGOUT:
1922 printk(KERN_ERR "Not yet implemented: SRP_I_LOGOUT\n");
1923 break;
1924 case SRP_CRED_RSP:
1925 pr_debug("received SRP_CRED_RSP\n");
1926 break;
1927 case SRP_AER_RSP:
1928 pr_debug("received SRP_AER_RSP\n");
1929 break;
1930 case SRP_RSP:
1931 printk(KERN_ERR "Received SRP_RSP\n");
1932 break;
1933 default:
1934 printk(KERN_ERR "received IU with unknown opcode 0x%x\n",
1935 srp_cmd->opcode);
1936 break;
1937 }
1938
1939 srpt_post_recv(ch->sport->sdev, recv_ioctx);
1940out:
1941 return;
1942}
1943
1944static void srpt_process_rcv_completion(struct ib_cq *cq,
1945 struct srpt_rdma_ch *ch,
1946 struct ib_wc *wc)
1947{
1948 struct srpt_device *sdev = ch->sport->sdev;
1949 struct srpt_recv_ioctx *ioctx;
1950 u32 index;
1951
1952 index = idx_from_wr_id(wc->wr_id);
1953 if (wc->status == IB_WC_SUCCESS) {
1954 int req_lim;
1955
1956 req_lim = atomic_dec_return(&ch->req_lim);
1957 if (unlikely(req_lim < 0))
1958 printk(KERN_ERR "req_lim = %d < 0\n", req_lim);
1959 ioctx = sdev->ioctx_ring[index];
1960 srpt_handle_new_iu(ch, ioctx, NULL);
1961 } else {
1962 printk(KERN_INFO "receiving failed for idx %u with status %d\n",
1963 index, wc->status);
1964 }
1965}
1966
1967/**
1968 * srpt_process_send_completion() - Process an IB send completion.
1969 *
1970 * Note: Although this has not yet been observed during tests, at least in
1971 * theory it is possible that the srpt_get_send_ioctx() call invoked by
1972 * srpt_handle_new_iu() fails. This is possible because the req_lim_delta
1973 * value in each response is set to one, and it is possible that this response
1974 * makes the initiator send a new request before the send completion for that
1975 * response has been processed. This could e.g. happen if the call to
1976 * srpt_put_send_iotcx() is delayed because of a higher priority interrupt or
1977 * if IB retransmission causes generation of the send completion to be
1978 * delayed. Incoming information units for which srpt_get_send_ioctx() fails
1979 * are queued on cmd_wait_list. The code below processes these delayed
1980 * requests one at a time.
1981 */
1982static void srpt_process_send_completion(struct ib_cq *cq,
1983 struct srpt_rdma_ch *ch,
1984 struct ib_wc *wc)
1985{
1986 struct srpt_send_ioctx *send_ioctx;
1987 uint32_t index;
1988 enum srpt_opcode opcode;
1989
1990 index = idx_from_wr_id(wc->wr_id);
1991 opcode = opcode_from_wr_id(wc->wr_id);
1992 send_ioctx = ch->ioctx_ring[index];
1993 if (wc->status == IB_WC_SUCCESS) {
1994 if (opcode == SRPT_SEND)
1995 srpt_handle_send_comp(ch, send_ioctx);
1996 else {
1997 WARN_ON(opcode != SRPT_RDMA_ABORT &&
1998 wc->opcode != IB_WC_RDMA_READ);
1999 srpt_handle_rdma_comp(ch, send_ioctx, opcode);
2000 }
2001 } else {
2002 if (opcode == SRPT_SEND) {
2003 printk(KERN_INFO "sending response for idx %u failed"
2004 " with status %d\n", index, wc->status);
2005 srpt_handle_send_err_comp(ch, wc->wr_id);
2006 } else if (opcode != SRPT_RDMA_MID) {
2007 printk(KERN_INFO "RDMA t %d for idx %u failed with"
2008 " status %d", opcode, index, wc->status);
2009 srpt_handle_rdma_err_comp(ch, send_ioctx, opcode);
2010 }
2011 }
2012
2013 while (unlikely(opcode == SRPT_SEND
2014 && !list_empty(&ch->cmd_wait_list)
2015 && srpt_get_ch_state(ch) == CH_LIVE
2016 && (send_ioctx = srpt_get_send_ioctx(ch)) != NULL)) {
2017 struct srpt_recv_ioctx *recv_ioctx;
2018
2019 recv_ioctx = list_first_entry(&ch->cmd_wait_list,
2020 struct srpt_recv_ioctx,
2021 wait_list);
2022 list_del(&recv_ioctx->wait_list);
2023 srpt_handle_new_iu(ch, recv_ioctx, send_ioctx);
2024 }
2025}
2026
2027static void srpt_process_completion(struct ib_cq *cq, struct srpt_rdma_ch *ch)
2028{
2029 struct ib_wc *const wc = ch->wc;
2030 int i, n;
2031
2032 WARN_ON(cq != ch->cq);
2033
2034 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
2035 while ((n = ib_poll_cq(cq, ARRAY_SIZE(ch->wc), wc)) > 0) {
2036 for (i = 0; i < n; i++) {
2037 if (opcode_from_wr_id(wc[i].wr_id) == SRPT_RECV)
2038 srpt_process_rcv_completion(cq, ch, &wc[i]);
2039 else
2040 srpt_process_send_completion(cq, ch, &wc[i]);
2041 }
2042 }
2043}
2044
2045/**
2046 * srpt_completion() - IB completion queue callback function.
2047 *
2048 * Notes:
2049 * - It is guaranteed that a completion handler will never be invoked
2050 * concurrently on two different CPUs for the same completion queue. See also
2051 * Documentation/infiniband/core_locking.txt and the implementation of
2052 * handle_edge_irq() in kernel/irq/chip.c.
2053 * - When threaded IRQs are enabled, completion handlers are invoked in thread
2054 * context instead of interrupt context.
2055 */
2056static void srpt_completion(struct ib_cq *cq, void *ctx)
2057{
2058 struct srpt_rdma_ch *ch = ctx;
2059
2060 wake_up_interruptible(&ch->wait_queue);
2061}
2062
2063static int srpt_compl_thread(void *arg)
2064{
2065 struct srpt_rdma_ch *ch;
2066
2067 /* Hibernation / freezing of the SRPT kernel thread is not supported. */
2068 current->flags |= PF_NOFREEZE;
2069
2070 ch = arg;
2071 BUG_ON(!ch);
2072 printk(KERN_INFO "Session %s: kernel thread %s (PID %d) started\n",
2073 ch->sess_name, ch->thread->comm, current->pid);
2074 while (!kthread_should_stop()) {
2075 wait_event_interruptible(ch->wait_queue,
2076 (srpt_process_completion(ch->cq, ch),
2077 kthread_should_stop()));
2078 }
2079 printk(KERN_INFO "Session %s: kernel thread %s (PID %d) stopped\n",
2080 ch->sess_name, ch->thread->comm, current->pid);
2081 return 0;
2082}
2083
2084/**
2085 * srpt_create_ch_ib() - Create receive and send completion queues.
2086 */
2087static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
2088{
2089 struct ib_qp_init_attr *qp_init;
2090 struct srpt_port *sport = ch->sport;
2091 struct srpt_device *sdev = sport->sdev;
2092 u32 srp_sq_size = sport->port_attrib.srp_sq_size;
2093 int ret;
2094
2095 WARN_ON(ch->rq_size < 1);
2096
2097 ret = -ENOMEM;
2098 qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL);
2099 if (!qp_init)
2100 goto out;
2101
2102 ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch,
2103 ch->rq_size + srp_sq_size, 0);
2104 if (IS_ERR(ch->cq)) {
2105 ret = PTR_ERR(ch->cq);
2106 printk(KERN_ERR "failed to create CQ cqe= %d ret= %d\n",
2107 ch->rq_size + srp_sq_size, ret);
2108 goto out;
2109 }
2110
2111 qp_init->qp_context = (void *)ch;
2112 qp_init->event_handler
2113 = (void(*)(struct ib_event *, void*))srpt_qp_event;
2114 qp_init->send_cq = ch->cq;
2115 qp_init->recv_cq = ch->cq;
2116 qp_init->srq = sdev->srq;
2117 qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
2118 qp_init->qp_type = IB_QPT_RC;
2119 qp_init->cap.max_send_wr = srp_sq_size;
2120 qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
2121
2122 ch->qp = ib_create_qp(sdev->pd, qp_init);
2123 if (IS_ERR(ch->qp)) {
2124 ret = PTR_ERR(ch->qp);
2125 printk(KERN_ERR "failed to create_qp ret= %d\n", ret);
2126 goto err_destroy_cq;
2127 }
2128
2129 atomic_set(&ch->sq_wr_avail, qp_init->cap.max_send_wr);
2130
2131 pr_debug("%s: max_cqe= %d max_sge= %d sq_size = %d cm_id= %p\n",
2132 __func__, ch->cq->cqe, qp_init->cap.max_send_sge,
2133 qp_init->cap.max_send_wr, ch->cm_id);
2134
2135 ret = srpt_init_ch_qp(ch, ch->qp);
2136 if (ret)
2137 goto err_destroy_qp;
2138
2139 init_waitqueue_head(&ch->wait_queue);
2140
2141 pr_debug("creating thread for session %s\n", ch->sess_name);
2142
2143 ch->thread = kthread_run(srpt_compl_thread, ch, "ib_srpt_compl");
2144 if (IS_ERR(ch->thread)) {
2145 printk(KERN_ERR "failed to create kernel thread %ld\n",
2146 PTR_ERR(ch->thread));
2147 ch->thread = NULL;
2148 goto err_destroy_qp;
2149 }
2150
2151out:
2152 kfree(qp_init);
2153 return ret;
2154
2155err_destroy_qp:
2156 ib_destroy_qp(ch->qp);
2157err_destroy_cq:
2158 ib_destroy_cq(ch->cq);
2159 goto out;
2160}
2161
2162static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch)
2163{
2164 if (ch->thread)
2165 kthread_stop(ch->thread);
2166
2167 ib_destroy_qp(ch->qp);
2168 ib_destroy_cq(ch->cq);
2169}
2170
2171/**
2172 * __srpt_close_ch() - Close an RDMA channel by setting the QP error state.
2173 *
2174 * Reset the QP and make sure all resources associated with the channel will
2175 * be deallocated at an appropriate time.
2176 *
2177 * Note: The caller must hold ch->sport->sdev->spinlock.
2178 */
2179static void __srpt_close_ch(struct srpt_rdma_ch *ch)
2180{
2181 struct srpt_device *sdev;
2182 enum rdma_ch_state prev_state;
2183 unsigned long flags;
2184
2185 sdev = ch->sport->sdev;
2186
2187 spin_lock_irqsave(&ch->spinlock, flags);
2188 prev_state = ch->state;
2189 switch (prev_state) {
2190 case CH_CONNECTING:
2191 case CH_LIVE:
2192 ch->state = CH_DISCONNECTING;
2193 break;
2194 default:
2195 break;
2196 }
2197 spin_unlock_irqrestore(&ch->spinlock, flags);
2198
2199 switch (prev_state) {
2200 case CH_CONNECTING:
2201 ib_send_cm_rej(ch->cm_id, IB_CM_REJ_NO_RESOURCES, NULL, 0,
2202 NULL, 0);
2203 /* fall through */
2204 case CH_LIVE:
2205 if (ib_send_cm_dreq(ch->cm_id, NULL, 0) < 0)
2206 printk(KERN_ERR "sending CM DREQ failed.\n");
2207 break;
2208 case CH_DISCONNECTING:
2209 break;
2210 case CH_DRAINING:
2211 case CH_RELEASING:
2212 break;
2213 }
2214}
2215
2216/**
2217 * srpt_close_ch() - Close an RDMA channel.
2218 */
2219static void srpt_close_ch(struct srpt_rdma_ch *ch)
2220{
2221 struct srpt_device *sdev;
2222
2223 sdev = ch->sport->sdev;
2224 spin_lock_irq(&sdev->spinlock);
2225 __srpt_close_ch(ch);
2226 spin_unlock_irq(&sdev->spinlock);
2227}
2228
2229/**
2230 * srpt_drain_channel() - Drain a channel by resetting the IB queue pair.
2231 * @cm_id: Pointer to the CM ID of the channel to be drained.
2232 *
2233 * Note: Must be called from inside srpt_cm_handler to avoid a race between
2234 * accessing sdev->spinlock and the call to kfree(sdev) in srpt_remove_one()
2235 * (the caller of srpt_cm_handler holds the cm_id spinlock; srpt_remove_one()
2236 * waits until all target sessions for the associated IB device have been
2237 * unregistered and target session registration involves a call to
2238 * ib_destroy_cm_id(), which locks the cm_id spinlock and hence waits until
2239 * this function has finished).
2240 */
2241static void srpt_drain_channel(struct ib_cm_id *cm_id)
2242{
2243 struct srpt_device *sdev;
2244 struct srpt_rdma_ch *ch;
2245 int ret;
2246 bool do_reset = false;
2247
2248 WARN_ON_ONCE(irqs_disabled());
2249
2250 sdev = cm_id->context;
2251 BUG_ON(!sdev);
2252 spin_lock_irq(&sdev->spinlock);
2253 list_for_each_entry(ch, &sdev->rch_list, list) {
2254 if (ch->cm_id == cm_id) {
2255 do_reset = srpt_test_and_set_ch_state(ch,
2256 CH_CONNECTING, CH_DRAINING) ||
2257 srpt_test_and_set_ch_state(ch,
2258 CH_LIVE, CH_DRAINING) ||
2259 srpt_test_and_set_ch_state(ch,
2260 CH_DISCONNECTING, CH_DRAINING);
2261 break;
2262 }
2263 }
2264 spin_unlock_irq(&sdev->spinlock);
2265
2266 if (do_reset) {
2267 ret = srpt_ch_qp_err(ch);
2268 if (ret < 0)
2269 printk(KERN_ERR "Setting queue pair in error state"
2270 " failed: %d\n", ret);
2271 }
2272}
2273
2274/**
2275 * srpt_find_channel() - Look up an RDMA channel.
2276 * @cm_id: Pointer to the CM ID of the channel to be looked up.
2277 *
2278 * Return NULL if no matching RDMA channel has been found.
2279 */
2280static struct srpt_rdma_ch *srpt_find_channel(struct srpt_device *sdev,
2281 struct ib_cm_id *cm_id)
2282{
2283 struct srpt_rdma_ch *ch;
2284 bool found;
2285
2286 WARN_ON_ONCE(irqs_disabled());
2287 BUG_ON(!sdev);
2288
2289 found = false;
2290 spin_lock_irq(&sdev->spinlock);
2291 list_for_each_entry(ch, &sdev->rch_list, list) {
2292 if (ch->cm_id == cm_id) {
2293 found = true;
2294 break;
2295 }
2296 }
2297 spin_unlock_irq(&sdev->spinlock);
2298
2299 return found ? ch : NULL;
2300}
2301
2302/**
2303 * srpt_release_channel() - Release channel resources.
2304 *
2305 * Schedules the actual release because:
2306 * - Calling the ib_destroy_cm_id() call from inside an IB CM callback would
2307 * trigger a deadlock.
2308 * - It is not safe to call TCM transport_* functions from interrupt context.
2309 */
2310static void srpt_release_channel(struct srpt_rdma_ch *ch)
2311{
2312 schedule_work(&ch->release_work);
2313}
2314
2315static void srpt_release_channel_work(struct work_struct *w)
2316{
2317 struct srpt_rdma_ch *ch;
2318 struct srpt_device *sdev;
2319 struct se_session *se_sess;
2320
2321 ch = container_of(w, struct srpt_rdma_ch, release_work);
2322 pr_debug("ch = %p; ch->sess = %p; release_done = %p\n", ch, ch->sess,
2323 ch->release_done);
2324
2325 sdev = ch->sport->sdev;
2326 BUG_ON(!sdev);
2327
2328 se_sess = ch->sess;
2329 BUG_ON(!se_sess);
2330
2331 target_wait_for_sess_cmds(se_sess, 0);
2332
2333 transport_deregister_session_configfs(se_sess);
2334 transport_deregister_session(se_sess);
2335 ch->sess = NULL;
2336
2337 srpt_destroy_ch_ib(ch);
2338
2339 srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring,
2340 ch->sport->sdev, ch->rq_size,
2341 ch->rsp_size, DMA_TO_DEVICE);
2342
2343 spin_lock_irq(&sdev->spinlock);
2344 list_del(&ch->list);
2345 spin_unlock_irq(&sdev->spinlock);
2346
2347 ib_destroy_cm_id(ch->cm_id);
2348
2349 if (ch->release_done)
2350 complete(ch->release_done);
2351
2352 wake_up(&sdev->ch_releaseQ);
2353
2354 kfree(ch);
2355}
2356
2357static struct srpt_node_acl *__srpt_lookup_acl(struct srpt_port *sport,
2358 u8 i_port_id[16])
2359{
2360 struct srpt_node_acl *nacl;
2361
2362 list_for_each_entry(nacl, &sport->port_acl_list, list)
2363 if (memcmp(nacl->i_port_id, i_port_id,
2364 sizeof(nacl->i_port_id)) == 0)
2365 return nacl;
2366
2367 return NULL;
2368}
2369
2370static struct srpt_node_acl *srpt_lookup_acl(struct srpt_port *sport,
2371 u8 i_port_id[16])
2372{
2373 struct srpt_node_acl *nacl;
2374
2375 spin_lock_irq(&sport->port_acl_lock);
2376 nacl = __srpt_lookup_acl(sport, i_port_id);
2377 spin_unlock_irq(&sport->port_acl_lock);
2378
2379 return nacl;
2380}
2381
2382/**
2383 * srpt_cm_req_recv() - Process the event IB_CM_REQ_RECEIVED.
2384 *
2385 * Ownership of the cm_id is transferred to the target session if this
2386 * functions returns zero. Otherwise the caller remains the owner of cm_id.
2387 */
2388static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
2389 struct ib_cm_req_event_param *param,
2390 void *private_data)
2391{
2392 struct srpt_device *sdev = cm_id->context;
2393 struct srpt_port *sport = &sdev->port[param->port - 1];
2394 struct srp_login_req *req;
2395 struct srp_login_rsp *rsp;
2396 struct srp_login_rej *rej;
2397 struct ib_cm_rep_param *rep_param;
2398 struct srpt_rdma_ch *ch, *tmp_ch;
2399 struct srpt_node_acl *nacl;
2400 u32 it_iu_len;
2401 int i;
2402 int ret = 0;
2403
2404 WARN_ON_ONCE(irqs_disabled());
2405
2406 if (WARN_ON(!sdev || !private_data))
2407 return -EINVAL;
2408
2409 req = (struct srp_login_req *)private_data;
2410
2411 it_iu_len = be32_to_cpu(req->req_it_iu_len);
2412
2413 printk(KERN_INFO "Received SRP_LOGIN_REQ with i_port_id 0x%llx:0x%llx,"
2414 " t_port_id 0x%llx:0x%llx and it_iu_len %d on port %d"
2415 " (guid=0x%llx:0x%llx)\n",
2416 be64_to_cpu(*(__be64 *)&req->initiator_port_id[0]),
2417 be64_to_cpu(*(__be64 *)&req->initiator_port_id[8]),
2418 be64_to_cpu(*(__be64 *)&req->target_port_id[0]),
2419 be64_to_cpu(*(__be64 *)&req->target_port_id[8]),
2420 it_iu_len,
2421 param->port,
2422 be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[0]),
2423 be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[8]));
2424
2425 rsp = kzalloc(sizeof *rsp, GFP_KERNEL);
2426 rej = kzalloc(sizeof *rej, GFP_KERNEL);
2427 rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL);
2428
2429 if (!rsp || !rej || !rep_param) {
2430 ret = -ENOMEM;
2431 goto out;
2432 }
2433
2434 if (it_iu_len > srp_max_req_size || it_iu_len < 64) {
2435 rej->reason = __constant_cpu_to_be32(
2436 SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
2437 ret = -EINVAL;
2438 printk(KERN_ERR "rejected SRP_LOGIN_REQ because its"
2439 " length (%d bytes) is out of range (%d .. %d)\n",
2440 it_iu_len, 64, srp_max_req_size);
2441 goto reject;
2442 }
2443
2444 if (!sport->enabled) {
2445 rej->reason = __constant_cpu_to_be32(
2446 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2447 ret = -EINVAL;
2448 printk(KERN_ERR "rejected SRP_LOGIN_REQ because the target port"
2449 " has not yet been enabled\n");
2450 goto reject;
2451 }
2452
2453 if ((req->req_flags & SRP_MTCH_ACTION) == SRP_MULTICHAN_SINGLE) {
2454 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
2455
2456 spin_lock_irq(&sdev->spinlock);
2457
2458 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
2459 if (!memcmp(ch->i_port_id, req->initiator_port_id, 16)
2460 && !memcmp(ch->t_port_id, req->target_port_id, 16)
2461 && param->port == ch->sport->port
2462 && param->listen_id == ch->sport->sdev->cm_id
2463 && ch->cm_id) {
2464 enum rdma_ch_state ch_state;
2465
2466 ch_state = srpt_get_ch_state(ch);
2467 if (ch_state != CH_CONNECTING
2468 && ch_state != CH_LIVE)
2469 continue;
2470
2471 /* found an existing channel */
2472 pr_debug("Found existing channel %s"
2473 " cm_id= %p state= %d\n",
2474 ch->sess_name, ch->cm_id, ch_state);
2475
2476 __srpt_close_ch(ch);
2477
2478 rsp->rsp_flags =
2479 SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
2480 }
2481 }
2482
2483 spin_unlock_irq(&sdev->spinlock);
2484
2485 } else
2486 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
2487
2488 if (*(__be64 *)req->target_port_id != cpu_to_be64(srpt_service_guid)
2489 || *(__be64 *)(req->target_port_id + 8) !=
2490 cpu_to_be64(srpt_service_guid)) {
2491 rej->reason = __constant_cpu_to_be32(
2492 SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
2493 ret = -ENOMEM;
2494 printk(KERN_ERR "rejected SRP_LOGIN_REQ because it"
2495 " has an invalid target port identifier.\n");
2496 goto reject;
2497 }
2498
2499 ch = kzalloc(sizeof *ch, GFP_KERNEL);
2500 if (!ch) {
2501 rej->reason = __constant_cpu_to_be32(
2502 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2503 printk(KERN_ERR "rejected SRP_LOGIN_REQ because no memory.\n");
2504 ret = -ENOMEM;
2505 goto reject;
2506 }
2507
2508 INIT_WORK(&ch->release_work, srpt_release_channel_work);
2509 memcpy(ch->i_port_id, req->initiator_port_id, 16);
2510 memcpy(ch->t_port_id, req->target_port_id, 16);
2511 ch->sport = &sdev->port[param->port - 1];
2512 ch->cm_id = cm_id;
2513 /*
2514 * Avoid QUEUE_FULL conditions by limiting the number of buffers used
2515 * for the SRP protocol to the command queue size.
2516 */
2517 ch->rq_size = SRPT_RQ_SIZE;
2518 spin_lock_init(&ch->spinlock);
2519 ch->state = CH_CONNECTING;
2520 INIT_LIST_HEAD(&ch->cmd_wait_list);
2521 ch->rsp_size = ch->sport->port_attrib.srp_max_rsp_size;
2522
2523 ch->ioctx_ring = (struct srpt_send_ioctx **)
2524 srpt_alloc_ioctx_ring(ch->sport->sdev, ch->rq_size,
2525 sizeof(*ch->ioctx_ring[0]),
2526 ch->rsp_size, DMA_TO_DEVICE);
2527 if (!ch->ioctx_ring)
2528 goto free_ch;
2529
2530 INIT_LIST_HEAD(&ch->free_list);
2531 for (i = 0; i < ch->rq_size; i++) {
2532 ch->ioctx_ring[i]->ch = ch;
2533 list_add_tail(&ch->ioctx_ring[i]->free_list, &ch->free_list);
2534 }
2535
2536 ret = srpt_create_ch_ib(ch);
2537 if (ret) {
2538 rej->reason = __constant_cpu_to_be32(
2539 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2540 printk(KERN_ERR "rejected SRP_LOGIN_REQ because creating"
2541 " a new RDMA channel failed.\n");
2542 goto free_ring;
2543 }
2544
2545 ret = srpt_ch_qp_rtr(ch, ch->qp);
2546 if (ret) {
2547 rej->reason = __constant_cpu_to_be32(
2548 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2549 printk(KERN_ERR "rejected SRP_LOGIN_REQ because enabling"
2550 " RTR failed (error code = %d)\n", ret);
2551 goto destroy_ib;
2552 }
2553 /*
2554 * Use the initator port identifier as the session name.
2555 */
2556 snprintf(ch->sess_name, sizeof(ch->sess_name), "0x%016llx%016llx",
2557 be64_to_cpu(*(__be64 *)ch->i_port_id),
2558 be64_to_cpu(*(__be64 *)(ch->i_port_id + 8)));
2559
2560 pr_debug("registering session %s\n", ch->sess_name);
2561
2562 nacl = srpt_lookup_acl(sport, ch->i_port_id);
2563 if (!nacl) {
2564 printk(KERN_INFO "Rejected login because no ACL has been"
2565 " configured yet for initiator %s.\n", ch->sess_name);
2566 rej->reason = __constant_cpu_to_be32(
2567 SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED);
2568 goto destroy_ib;
2569 }
2570
2571 ch->sess = transport_init_session();
2572 if (IS_ERR(ch->sess)) {
2573 rej->reason = __constant_cpu_to_be32(
2574 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2575 pr_debug("Failed to create session\n");
2576 goto deregister_session;
2577 }
2578 ch->sess->se_node_acl = &nacl->nacl;
2579 transport_register_session(&sport->port_tpg_1, &nacl->nacl, ch->sess, ch);
2580
2581 pr_debug("Establish connection sess=%p name=%s cm_id=%p\n", ch->sess,
2582 ch->sess_name, ch->cm_id);
2583
2584 /* create srp_login_response */
2585 rsp->opcode = SRP_LOGIN_RSP;
2586 rsp->tag = req->tag;
2587 rsp->max_it_iu_len = req->req_it_iu_len;
2588 rsp->max_ti_iu_len = req->req_it_iu_len;
2589 ch->max_ti_iu_len = it_iu_len;
2590 rsp->buf_fmt = __constant_cpu_to_be16(SRP_BUF_FORMAT_DIRECT
2591 | SRP_BUF_FORMAT_INDIRECT);
2592 rsp->req_lim_delta = cpu_to_be32(ch->rq_size);
2593 atomic_set(&ch->req_lim, ch->rq_size);
2594 atomic_set(&ch->req_lim_delta, 0);
2595
2596 /* create cm reply */
2597 rep_param->qp_num = ch->qp->qp_num;
2598 rep_param->private_data = (void *)rsp;
2599 rep_param->private_data_len = sizeof *rsp;
2600 rep_param->rnr_retry_count = 7;
2601 rep_param->flow_control = 1;
2602 rep_param->failover_accepted = 0;
2603 rep_param->srq = 1;
2604 rep_param->responder_resources = 4;
2605 rep_param->initiator_depth = 4;
2606
2607 ret = ib_send_cm_rep(cm_id, rep_param);
2608 if (ret) {
2609 printk(KERN_ERR "sending SRP_LOGIN_REQ response failed"
2610 " (error code = %d)\n", ret);
2611 goto release_channel;
2612 }
2613
2614 spin_lock_irq(&sdev->spinlock);
2615 list_add_tail(&ch->list, &sdev->rch_list);
2616 spin_unlock_irq(&sdev->spinlock);
2617
2618 goto out;
2619
2620release_channel:
2621 srpt_set_ch_state(ch, CH_RELEASING);
2622 transport_deregister_session_configfs(ch->sess);
2623
2624deregister_session:
2625 transport_deregister_session(ch->sess);
2626 ch->sess = NULL;
2627
2628destroy_ib:
2629 srpt_destroy_ch_ib(ch);
2630
2631free_ring:
2632 srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring,
2633 ch->sport->sdev, ch->rq_size,
2634 ch->rsp_size, DMA_TO_DEVICE);
2635free_ch:
2636 kfree(ch);
2637
2638reject:
2639 rej->opcode = SRP_LOGIN_REJ;
2640 rej->tag = req->tag;
2641 rej->buf_fmt = __constant_cpu_to_be16(SRP_BUF_FORMAT_DIRECT
2642 | SRP_BUF_FORMAT_INDIRECT);
2643
2644 ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
2645 (void *)rej, sizeof *rej);
2646
2647out:
2648 kfree(rep_param);
2649 kfree(rsp);
2650 kfree(rej);
2651
2652 return ret;
2653}
2654
2655static void srpt_cm_rej_recv(struct ib_cm_id *cm_id)
2656{
2657 printk(KERN_INFO "Received IB REJ for cm_id %p.\n", cm_id);
2658 srpt_drain_channel(cm_id);
2659}
2660
2661/**
2662 * srpt_cm_rtu_recv() - Process an IB_CM_RTU_RECEIVED or USER_ESTABLISHED event.
2663 *
2664 * An IB_CM_RTU_RECEIVED message indicates that the connection is established
2665 * and that the recipient may begin transmitting (RTU = ready to use).
2666 */
2667static void srpt_cm_rtu_recv(struct ib_cm_id *cm_id)
2668{
2669 struct srpt_rdma_ch *ch;
2670 int ret;
2671
2672 ch = srpt_find_channel(cm_id->context, cm_id);
2673 BUG_ON(!ch);
2674
2675 if (srpt_test_and_set_ch_state(ch, CH_CONNECTING, CH_LIVE)) {
2676 struct srpt_recv_ioctx *ioctx, *ioctx_tmp;
2677
2678 ret = srpt_ch_qp_rts(ch, ch->qp);
2679
2680 list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list,
2681 wait_list) {
2682 list_del(&ioctx->wait_list);
2683 srpt_handle_new_iu(ch, ioctx, NULL);
2684 }
2685 if (ret)
2686 srpt_close_ch(ch);
2687 }
2688}
2689
2690static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
2691{
2692 printk(KERN_INFO "Received IB TimeWait exit for cm_id %p.\n", cm_id);
2693 srpt_drain_channel(cm_id);
2694}
2695
2696static void srpt_cm_rep_error(struct ib_cm_id *cm_id)
2697{
2698 printk(KERN_INFO "Received IB REP error for cm_id %p.\n", cm_id);
2699 srpt_drain_channel(cm_id);
2700}
2701
2702/**
2703 * srpt_cm_dreq_recv() - Process reception of a DREQ message.
2704 */
2705static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
2706{
2707 struct srpt_rdma_ch *ch;
2708 unsigned long flags;
2709 bool send_drep = false;
2710
2711 ch = srpt_find_channel(cm_id->context, cm_id);
2712 BUG_ON(!ch);
2713
2714 pr_debug("cm_id= %p ch->state= %d\n", cm_id, srpt_get_ch_state(ch));
2715
2716 spin_lock_irqsave(&ch->spinlock, flags);
2717 switch (ch->state) {
2718 case CH_CONNECTING:
2719 case CH_LIVE:
2720 send_drep = true;
2721 ch->state = CH_DISCONNECTING;
2722 break;
2723 case CH_DISCONNECTING:
2724 case CH_DRAINING:
2725 case CH_RELEASING:
2726 WARN(true, "unexpected channel state %d\n", ch->state);
2727 break;
2728 }
2729 spin_unlock_irqrestore(&ch->spinlock, flags);
2730
2731 if (send_drep) {
2732 if (ib_send_cm_drep(ch->cm_id, NULL, 0) < 0)
2733 printk(KERN_ERR "Sending IB DREP failed.\n");
2734 printk(KERN_INFO "Received DREQ and sent DREP for session %s.\n",
2735 ch->sess_name);
2736 }
2737}
2738
2739/**
2740 * srpt_cm_drep_recv() - Process reception of a DREP message.
2741 */
2742static void srpt_cm_drep_recv(struct ib_cm_id *cm_id)
2743{
2744 printk(KERN_INFO "Received InfiniBand DREP message for cm_id %p.\n",
2745 cm_id);
2746 srpt_drain_channel(cm_id);
2747}
2748
2749/**
2750 * srpt_cm_handler() - IB connection manager callback function.
2751 *
2752 * A non-zero return value will cause the caller destroy the CM ID.
2753 *
2754 * Note: srpt_cm_handler() must only return a non-zero value when transferring
2755 * ownership of the cm_id to a channel by srpt_cm_req_recv() failed. Returning
2756 * a non-zero value in any other case will trigger a race with the
2757 * ib_destroy_cm_id() call in srpt_release_channel().
2758 */
2759static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2760{
2761 int ret;
2762
2763 ret = 0;
2764 switch (event->event) {
2765 case IB_CM_REQ_RECEIVED:
2766 ret = srpt_cm_req_recv(cm_id, &event->param.req_rcvd,
2767 event->private_data);
2768 break;
2769 case IB_CM_REJ_RECEIVED:
2770 srpt_cm_rej_recv(cm_id);
2771 break;
2772 case IB_CM_RTU_RECEIVED:
2773 case IB_CM_USER_ESTABLISHED:
2774 srpt_cm_rtu_recv(cm_id);
2775 break;
2776 case IB_CM_DREQ_RECEIVED:
2777 srpt_cm_dreq_recv(cm_id);
2778 break;
2779 case IB_CM_DREP_RECEIVED:
2780 srpt_cm_drep_recv(cm_id);
2781 break;
2782 case IB_CM_TIMEWAIT_EXIT:
2783 srpt_cm_timewait_exit(cm_id);
2784 break;
2785 case IB_CM_REP_ERROR:
2786 srpt_cm_rep_error(cm_id);
2787 break;
2788 case IB_CM_DREQ_ERROR:
2789 printk(KERN_INFO "Received IB DREQ ERROR event.\n");
2790 break;
2791 case IB_CM_MRA_RECEIVED:
2792 printk(KERN_INFO "Received IB MRA event\n");
2793 break;
2794 default:
2795 printk(KERN_ERR "received unrecognized IB CM event %d\n",
2796 event->event);
2797 break;
2798 }
2799
2800 return ret;
2801}
2802
2803/**
2804 * srpt_perform_rdmas() - Perform IB RDMA.
2805 *
2806 * Returns zero upon success or a negative number upon failure.
2807 */
2808static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
2809 struct srpt_send_ioctx *ioctx)
2810{
2811 struct ib_send_wr wr;
2812 struct ib_send_wr *bad_wr;
2813 struct rdma_iu *riu;
2814 int i;
2815 int ret;
2816 int sq_wr_avail;
2817 enum dma_data_direction dir;
2818 const int n_rdma = ioctx->n_rdma;
2819
2820 dir = ioctx->cmd.data_direction;
2821 if (dir == DMA_TO_DEVICE) {
2822 /* write */
2823 ret = -ENOMEM;
2824 sq_wr_avail = atomic_sub_return(n_rdma, &ch->sq_wr_avail);
2825 if (sq_wr_avail < 0) {
2826 printk(KERN_WARNING "IB send queue full (needed %d)\n",
2827 n_rdma);
2828 goto out;
2829 }
2830 }
2831
2832 ioctx->rdma_aborted = false;
2833 ret = 0;
2834 riu = ioctx->rdma_ius;
2835 memset(&wr, 0, sizeof wr);
2836
2837 for (i = 0; i < n_rdma; ++i, ++riu) {
2838 if (dir == DMA_FROM_DEVICE) {
2839 wr.opcode = IB_WR_RDMA_WRITE;
2840 wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
2841 SRPT_RDMA_WRITE_LAST :
2842 SRPT_RDMA_MID,
2843 ioctx->ioctx.index);
2844 } else {
2845 wr.opcode = IB_WR_RDMA_READ;
2846 wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
2847 SRPT_RDMA_READ_LAST :
2848 SRPT_RDMA_MID,
2849 ioctx->ioctx.index);
2850 }
2851 wr.next = NULL;
2852 wr.wr.rdma.remote_addr = riu->raddr;
2853 wr.wr.rdma.rkey = riu->rkey;
2854 wr.num_sge = riu->sge_cnt;
2855 wr.sg_list = riu->sge;
2856
2857 /* only get completion event for the last rdma write */
2858 if (i == (n_rdma - 1) && dir == DMA_TO_DEVICE)
2859 wr.send_flags = IB_SEND_SIGNALED;
2860
2861 ret = ib_post_send(ch->qp, &wr, &bad_wr);
2862 if (ret)
2863 break;
2864 }
2865
2866 if (ret)
2867 printk(KERN_ERR "%s[%d]: ib_post_send() returned %d for %d/%d",
2868 __func__, __LINE__, ret, i, n_rdma);
2869 if (ret && i > 0) {
2870 wr.num_sge = 0;
2871 wr.wr_id = encode_wr_id(SRPT_RDMA_ABORT, ioctx->ioctx.index);
2872 wr.send_flags = IB_SEND_SIGNALED;
2873 while (ch->state == CH_LIVE &&
2874 ib_post_send(ch->qp, &wr, &bad_wr) != 0) {
2875 printk(KERN_INFO "Trying to abort failed RDMA transfer [%d]",
2876 ioctx->ioctx.index);
2877 msleep(1000);
2878 }
2879 while (ch->state != CH_RELEASING && !ioctx->rdma_aborted) {
2880 printk(KERN_INFO "Waiting until RDMA abort finished [%d]",
2881 ioctx->ioctx.index);
2882 msleep(1000);
2883 }
2884 }
2885out:
2886 if (unlikely(dir == DMA_TO_DEVICE && ret < 0))
2887 atomic_add(n_rdma, &ch->sq_wr_avail);
2888 return ret;
2889}
2890
2891/**
2892 * srpt_xfer_data() - Start data transfer from initiator to target.
2893 */
2894static int srpt_xfer_data(struct srpt_rdma_ch *ch,
2895 struct srpt_send_ioctx *ioctx)
2896{
2897 int ret;
2898
2899 ret = srpt_map_sg_to_ib_sge(ch, ioctx);
2900 if (ret) {
2901 printk(KERN_ERR "%s[%d] ret=%d\n", __func__, __LINE__, ret);
2902 goto out;
2903 }
2904
2905 ret = srpt_perform_rdmas(ch, ioctx);
2906 if (ret) {
2907 if (ret == -EAGAIN || ret == -ENOMEM)
2908 printk(KERN_INFO "%s[%d] queue full -- ret=%d\n",
2909 __func__, __LINE__, ret);
2910 else
2911 printk(KERN_ERR "%s[%d] fatal error -- ret=%d\n",
2912 __func__, __LINE__, ret);
2913 goto out_unmap;
2914 }
2915
2916out:
2917 return ret;
2918out_unmap:
2919 srpt_unmap_sg_to_ib_sge(ch, ioctx);
2920 goto out;
2921}
2922
2923static int srpt_write_pending_status(struct se_cmd *se_cmd)
2924{
2925 struct srpt_send_ioctx *ioctx;
2926
2927 ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd);
2928 return srpt_get_cmd_state(ioctx) == SRPT_STATE_NEED_DATA;
2929}
2930
2931/*
2932 * srpt_write_pending() - Start data transfer from initiator to target (write).
2933 */
2934static int srpt_write_pending(struct se_cmd *se_cmd)
2935{
2936 struct srpt_rdma_ch *ch;
2937 struct srpt_send_ioctx *ioctx;
2938 enum srpt_command_state new_state;
2939 enum rdma_ch_state ch_state;
2940 int ret;
2941
2942 ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd);
2943
2944 new_state = srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA);
2945 WARN_ON(new_state == SRPT_STATE_DONE);
2946
2947 ch = ioctx->ch;
2948 BUG_ON(!ch);
2949
2950 ch_state = srpt_get_ch_state(ch);
2951 switch (ch_state) {
2952 case CH_CONNECTING:
2953 WARN(true, "unexpected channel state %d\n", ch_state);
2954 ret = -EINVAL;
2955 goto out;
2956 case CH_LIVE:
2957 break;
2958 case CH_DISCONNECTING:
2959 case CH_DRAINING:
2960 case CH_RELEASING:
2961 pr_debug("cmd with tag %lld: channel disconnecting\n",
2962 ioctx->tag);
2963 srpt_set_cmd_state(ioctx, SRPT_STATE_DATA_IN);
2964 ret = -EINVAL;
2965 goto out;
2966 }
2967 ret = srpt_xfer_data(ch, ioctx);
2968
2969out:
2970 return ret;
2971}
2972
2973static u8 tcm_to_srp_tsk_mgmt_status(const int tcm_mgmt_status)
2974{
2975 switch (tcm_mgmt_status) {
2976 case TMR_FUNCTION_COMPLETE:
2977 return SRP_TSK_MGMT_SUCCESS;
2978 case TMR_FUNCTION_REJECTED:
2979 return SRP_TSK_MGMT_FUNC_NOT_SUPP;
2980 }
2981 return SRP_TSK_MGMT_FAILED;
2982}
2983
2984/**
2985 * srpt_queue_response() - Transmits the response to a SCSI command.
2986 *
2987 * Callback function called by the TCM core. Must not block since it can be
2988 * invoked on the context of the IB completion handler.
2989 */
2990static int srpt_queue_response(struct se_cmd *cmd)
2991{
2992 struct srpt_rdma_ch *ch;
2993 struct srpt_send_ioctx *ioctx;
2994 enum srpt_command_state state;
2995 unsigned long flags;
2996 int ret;
2997 enum dma_data_direction dir;
2998 int resp_len;
2999 u8 srp_tm_status;
3000
3001 ret = 0;
3002
3003 ioctx = container_of(cmd, struct srpt_send_ioctx, cmd);
3004 ch = ioctx->ch;
3005 BUG_ON(!ch);
3006
3007 spin_lock_irqsave(&ioctx->spinlock, flags);
3008 state = ioctx->state;
3009 switch (state) {
3010 case SRPT_STATE_NEW:
3011 case SRPT_STATE_DATA_IN:
3012 ioctx->state = SRPT_STATE_CMD_RSP_SENT;
3013 break;
3014 case SRPT_STATE_MGMT:
3015 ioctx->state = SRPT_STATE_MGMT_RSP_SENT;
3016 break;
3017 default:
3018 WARN(true, "ch %p; cmd %d: unexpected command state %d\n",
3019 ch, ioctx->ioctx.index, ioctx->state);
3020 break;
3021 }
3022 spin_unlock_irqrestore(&ioctx->spinlock, flags);
3023
3024 if (unlikely(transport_check_aborted_status(&ioctx->cmd, false)
3025 || WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT))) {
3026 atomic_inc(&ch->req_lim_delta);
3027 srpt_abort_cmd(ioctx);
3028 goto out;
3029 }
3030
3031 dir = ioctx->cmd.data_direction;
3032
3033 /* For read commands, transfer the data to the initiator. */
3034 if (dir == DMA_FROM_DEVICE && ioctx->cmd.data_length &&
3035 !ioctx->queue_status_only) {
3036 ret = srpt_xfer_data(ch, ioctx);
3037 if (ret) {
3038 printk(KERN_ERR "xfer_data failed for tag %llu\n",
3039 ioctx->tag);
3040 goto out;
3041 }
3042 }
3043
3044 if (state != SRPT_STATE_MGMT)
3045 resp_len = srpt_build_cmd_rsp(ch, ioctx, ioctx->tag,
3046 cmd->scsi_status);
3047 else {
3048 srp_tm_status
3049 = tcm_to_srp_tsk_mgmt_status(cmd->se_tmr_req->response);
3050 resp_len = srpt_build_tskmgmt_rsp(ch, ioctx, srp_tm_status,
3051 ioctx->tag);
3052 }
3053 ret = srpt_post_send(ch, ioctx, resp_len);
3054 if (ret) {
3055 printk(KERN_ERR "sending cmd response failed for tag %llu\n",
3056 ioctx->tag);
3057 srpt_unmap_sg_to_ib_sge(ch, ioctx);
3058 srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
3059 target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd);
3060 }
3061
3062out:
3063 return ret;
3064}
3065
3066static int srpt_queue_status(struct se_cmd *cmd)
3067{
3068 struct srpt_send_ioctx *ioctx;
3069
3070 ioctx = container_of(cmd, struct srpt_send_ioctx, cmd);
3071 BUG_ON(ioctx->sense_data != cmd->sense_buffer);
3072 if (cmd->se_cmd_flags &
3073 (SCF_TRANSPORT_TASK_SENSE | SCF_EMULATED_TASK_SENSE))
3074 WARN_ON(cmd->scsi_status != SAM_STAT_CHECK_CONDITION);
3075 ioctx->queue_status_only = true;
3076 return srpt_queue_response(cmd);
3077}
3078
3079static void srpt_refresh_port_work(struct work_struct *work)
3080{
3081 struct srpt_port *sport = container_of(work, struct srpt_port, work);
3082
3083 srpt_refresh_port(sport);
3084}
3085
3086static int srpt_ch_list_empty(struct srpt_device *sdev)
3087{
3088 int res;
3089
3090 spin_lock_irq(&sdev->spinlock);
3091 res = list_empty(&sdev->rch_list);
3092 spin_unlock_irq(&sdev->spinlock);
3093
3094 return res;
3095}
3096
3097/**
3098 * srpt_release_sdev() - Free the channel resources associated with a target.
3099 */
3100static int srpt_release_sdev(struct srpt_device *sdev)
3101{
3102 struct srpt_rdma_ch *ch, *tmp_ch;
3103 int res;
3104
3105 WARN_ON_ONCE(irqs_disabled());
3106
3107 BUG_ON(!sdev);
3108
3109 spin_lock_irq(&sdev->spinlock);
3110 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list)
3111 __srpt_close_ch(ch);
3112 spin_unlock_irq(&sdev->spinlock);
3113
3114 res = wait_event_interruptible(sdev->ch_releaseQ,
3115 srpt_ch_list_empty(sdev));
3116 if (res)
3117 printk(KERN_ERR "%s: interrupted.\n", __func__);
3118
3119 return 0;
3120}
3121
3122static struct srpt_port *__srpt_lookup_port(const char *name)
3123{
3124 struct ib_device *dev;
3125 struct srpt_device *sdev;
3126 struct srpt_port *sport;
3127 int i;
3128
3129 list_for_each_entry(sdev, &srpt_dev_list, list) {
3130 dev = sdev->device;
3131 if (!dev)
3132 continue;
3133
3134 for (i = 0; i < dev->phys_port_cnt; i++) {
3135 sport = &sdev->port[i];
3136
3137 if (!strcmp(sport->port_guid, name))
3138 return sport;
3139 }
3140 }
3141
3142 return NULL;
3143}
3144
3145static struct srpt_port *srpt_lookup_port(const char *name)
3146{
3147 struct srpt_port *sport;
3148
3149 spin_lock(&srpt_dev_lock);
3150 sport = __srpt_lookup_port(name);
3151 spin_unlock(&srpt_dev_lock);
3152
3153 return sport;
3154}
3155
3156/**
3157 * srpt_add_one() - Infiniband device addition callback function.
3158 */
3159static void srpt_add_one(struct ib_device *device)
3160{
3161 struct srpt_device *sdev;
3162 struct srpt_port *sport;
3163 struct ib_srq_init_attr srq_attr;
3164 int i;
3165
3166 pr_debug("device = %p, device->dma_ops = %p\n", device,
3167 device->dma_ops);
3168
3169 sdev = kzalloc(sizeof *sdev, GFP_KERNEL);
3170 if (!sdev)
3171 goto err;
3172
3173 sdev->device = device;
3174 INIT_LIST_HEAD(&sdev->rch_list);
3175 init_waitqueue_head(&sdev->ch_releaseQ);
3176 spin_lock_init(&sdev->spinlock);
3177
3178 if (ib_query_device(device, &sdev->dev_attr))
3179 goto free_dev;
3180
3181 sdev->pd = ib_alloc_pd(device);
3182 if (IS_ERR(sdev->pd))
3183 goto free_dev;
3184
3185 sdev->mr = ib_get_dma_mr(sdev->pd, IB_ACCESS_LOCAL_WRITE);
3186 if (IS_ERR(sdev->mr))
3187 goto err_pd;
3188
3189 sdev->srq_size = min(srpt_srq_size, sdev->dev_attr.max_srq_wr);
3190
3191 srq_attr.event_handler = srpt_srq_event;
3192 srq_attr.srq_context = (void *)sdev;
3193 srq_attr.attr.max_wr = sdev->srq_size;
3194 srq_attr.attr.max_sge = 1;
3195 srq_attr.attr.srq_limit = 0;
3196 srq_attr.srq_type = IB_SRQT_BASIC;
3197
3198 sdev->srq = ib_create_srq(sdev->pd, &srq_attr);
3199 if (IS_ERR(sdev->srq))
3200 goto err_mr;
3201
3202 pr_debug("%s: create SRQ #wr= %d max_allow=%d dev= %s\n",
3203 __func__, sdev->srq_size, sdev->dev_attr.max_srq_wr,
3204 device->name);
3205
3206 if (!srpt_service_guid)
3207 srpt_service_guid = be64_to_cpu(device->node_guid);
3208
3209 sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev);
3210 if (IS_ERR(sdev->cm_id))
3211 goto err_srq;
3212
3213 /* print out target login information */
3214 pr_debug("Target login info: id_ext=%016llx,ioc_guid=%016llx,"
3215 "pkey=ffff,service_id=%016llx\n", srpt_service_guid,
3216 srpt_service_guid, srpt_service_guid);
3217
3218 /*
3219 * We do not have a consistent service_id (ie. also id_ext of target_id)
3220 * to identify this target. We currently use the guid of the first HCA
3221 * in the system as service_id; therefore, the target_id will change
3222 * if this HCA is gone bad and replaced by different HCA
3223 */
3224 if (ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid), 0, NULL))
3225 goto err_cm;
3226
3227 INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device,
3228 srpt_event_handler);
3229 if (ib_register_event_handler(&sdev->event_handler))
3230 goto err_cm;
3231
3232 sdev->ioctx_ring = (struct srpt_recv_ioctx **)
3233 srpt_alloc_ioctx_ring(sdev, sdev->srq_size,
3234 sizeof(*sdev->ioctx_ring[0]),
3235 srp_max_req_size, DMA_FROM_DEVICE);
3236 if (!sdev->ioctx_ring)
3237 goto err_event;
3238
3239 for (i = 0; i < sdev->srq_size; ++i)
3240 srpt_post_recv(sdev, sdev->ioctx_ring[i]);
3241
3242 WARN_ON(sdev->device->phys_port_cnt > ARRAY_SIZE(sdev->port));
3243
3244 for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
3245 sport = &sdev->port[i - 1];
3246 sport->sdev = sdev;
3247 sport->port = i;
3248 sport->port_attrib.srp_max_rdma_size = DEFAULT_MAX_RDMA_SIZE;
3249 sport->port_attrib.srp_max_rsp_size = DEFAULT_MAX_RSP_SIZE;
3250 sport->port_attrib.srp_sq_size = DEF_SRPT_SQ_SIZE;
3251 INIT_WORK(&sport->work, srpt_refresh_port_work);
3252 INIT_LIST_HEAD(&sport->port_acl_list);
3253 spin_lock_init(&sport->port_acl_lock);
3254
3255 if (srpt_refresh_port(sport)) {
3256 printk(KERN_ERR "MAD registration failed for %s-%d.\n",
3257 srpt_sdev_name(sdev), i);
3258 goto err_ring;
3259 }
3260 snprintf(sport->port_guid, sizeof(sport->port_guid),
3261 "0x%016llx%016llx",
3262 be64_to_cpu(sport->gid.global.subnet_prefix),
3263 be64_to_cpu(sport->gid.global.interface_id));
3264 }
3265
3266 spin_lock(&srpt_dev_lock);
3267 list_add_tail(&sdev->list, &srpt_dev_list);
3268 spin_unlock(&srpt_dev_lock);
3269
3270out:
3271 ib_set_client_data(device, &srpt_client, sdev);
3272 pr_debug("added %s.\n", device->name);
3273 return;
3274
3275err_ring:
3276 srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev,
3277 sdev->srq_size, srp_max_req_size,
3278 DMA_FROM_DEVICE);
3279err_event:
3280 ib_unregister_event_handler(&sdev->event_handler);
3281err_cm:
3282 ib_destroy_cm_id(sdev->cm_id);
3283err_srq:
3284 ib_destroy_srq(sdev->srq);
3285err_mr:
3286 ib_dereg_mr(sdev->mr);
3287err_pd:
3288 ib_dealloc_pd(sdev->pd);
3289free_dev:
3290 kfree(sdev);
3291err:
3292 sdev = NULL;
3293 printk(KERN_INFO "%s(%s) failed.\n", __func__, device->name);
3294 goto out;
3295}
3296
3297/**
3298 * srpt_remove_one() - InfiniBand device removal callback function.
3299 */
3300static void srpt_remove_one(struct ib_device *device)
3301{
3302 struct srpt_device *sdev;
3303 int i;
3304
3305 sdev = ib_get_client_data(device, &srpt_client);
3306 if (!sdev) {
3307 printk(KERN_INFO "%s(%s): nothing to do.\n", __func__,
3308 device->name);
3309 return;
3310 }
3311
3312 srpt_unregister_mad_agent(sdev);
3313
3314 ib_unregister_event_handler(&sdev->event_handler);
3315
3316 /* Cancel any work queued by the just unregistered IB event handler. */
3317 for (i = 0; i < sdev->device->phys_port_cnt; i++)
3318 cancel_work_sync(&sdev->port[i].work);
3319
3320 ib_destroy_cm_id(sdev->cm_id);
3321
3322 /*
3323 * Unregistering a target must happen after destroying sdev->cm_id
3324 * such that no new SRP_LOGIN_REQ information units can arrive while
3325 * destroying the target.
3326 */
3327 spin_lock(&srpt_dev_lock);
3328 list_del(&sdev->list);
3329 spin_unlock(&srpt_dev_lock);
3330 srpt_release_sdev(sdev);
3331
3332 ib_destroy_srq(sdev->srq);
3333 ib_dereg_mr(sdev->mr);
3334 ib_dealloc_pd(sdev->pd);
3335
3336 srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev,
3337 sdev->srq_size, srp_max_req_size, DMA_FROM_DEVICE);
3338 sdev->ioctx_ring = NULL;
3339 kfree(sdev);
3340}
3341
3342static struct ib_client srpt_client = {
3343 .name = DRV_NAME,
3344 .add = srpt_add_one,
3345 .remove = srpt_remove_one
3346};
3347
3348static int srpt_check_true(struct se_portal_group *se_tpg)
3349{
3350 return 1;
3351}
3352
3353static int srpt_check_false(struct se_portal_group *se_tpg)
3354{
3355 return 0;
3356}
3357
3358static char *srpt_get_fabric_name(void)
3359{
3360 return "srpt";
3361}
3362
3363static u8 srpt_get_fabric_proto_ident(struct se_portal_group *se_tpg)
3364{
3365 return SCSI_TRANSPORTID_PROTOCOLID_SRP;
3366}
3367
3368static char *srpt_get_fabric_wwn(struct se_portal_group *tpg)
3369{
3370 struct srpt_port *sport = container_of(tpg, struct srpt_port, port_tpg_1);
3371
3372 return sport->port_guid;
3373}
3374
3375static u16 srpt_get_tag(struct se_portal_group *tpg)
3376{
3377 return 1;
3378}
3379
3380static u32 srpt_get_default_depth(struct se_portal_group *se_tpg)
3381{
3382 return 1;
3383}
3384
3385static u32 srpt_get_pr_transport_id(struct se_portal_group *se_tpg,
3386 struct se_node_acl *se_nacl,
3387 struct t10_pr_registration *pr_reg,
3388 int *format_code, unsigned char *buf)
3389{
3390 struct srpt_node_acl *nacl;
3391 struct spc_rdma_transport_id *tr_id;
3392
3393 nacl = container_of(se_nacl, struct srpt_node_acl, nacl);
3394 tr_id = (void *)buf;
3395 tr_id->protocol_identifier = SCSI_TRANSPORTID_PROTOCOLID_SRP;
3396 memcpy(tr_id->i_port_id, nacl->i_port_id, sizeof(tr_id->i_port_id));
3397 return sizeof(*tr_id);
3398}
3399
3400static u32 srpt_get_pr_transport_id_len(struct se_portal_group *se_tpg,
3401 struct se_node_acl *se_nacl,
3402 struct t10_pr_registration *pr_reg,
3403 int *format_code)
3404{
3405 *format_code = 0;
3406 return sizeof(struct spc_rdma_transport_id);
3407}
3408
3409static char *srpt_parse_pr_out_transport_id(struct se_portal_group *se_tpg,
3410 const char *buf, u32 *out_tid_len,
3411 char **port_nexus_ptr)
3412{
3413 struct spc_rdma_transport_id *tr_id;
3414
3415 *port_nexus_ptr = NULL;
3416 *out_tid_len = sizeof(struct spc_rdma_transport_id);
3417 tr_id = (void *)buf;
3418 return (char *)tr_id->i_port_id;
3419}
3420
3421static struct se_node_acl *srpt_alloc_fabric_acl(struct se_portal_group *se_tpg)
3422{
3423 struct srpt_node_acl *nacl;
3424
3425 nacl = kzalloc(sizeof(struct srpt_node_acl), GFP_KERNEL);
3426 if (!nacl) {
3427 printk(KERN_ERR "Unable to allocate struct srpt_node_acl\n");
3428 return NULL;
3429 }
3430
3431 return &nacl->nacl;
3432}
3433
3434static void srpt_release_fabric_acl(struct se_portal_group *se_tpg,
3435 struct se_node_acl *se_nacl)
3436{
3437 struct srpt_node_acl *nacl;
3438
3439 nacl = container_of(se_nacl, struct srpt_node_acl, nacl);
3440 kfree(nacl);
3441}
3442
3443static u32 srpt_tpg_get_inst_index(struct se_portal_group *se_tpg)
3444{
3445 return 1;
3446}
3447
3448static void srpt_release_cmd(struct se_cmd *se_cmd)
3449{
3450 struct srpt_send_ioctx *ioctx = container_of(se_cmd,
3451 struct srpt_send_ioctx, cmd);
3452 struct srpt_rdma_ch *ch = ioctx->ch;
3453 unsigned long flags;
3454
3455 WARN_ON(ioctx->state != SRPT_STATE_DONE);
3456 WARN_ON(ioctx->mapped_sg_count != 0);
3457
3458 if (ioctx->n_rbuf > 1) {
3459 kfree(ioctx->rbufs);
3460 ioctx->rbufs = NULL;
3461 ioctx->n_rbuf = 0;
3462 }
3463
3464 spin_lock_irqsave(&ch->spinlock, flags);
3465 list_add(&ioctx->free_list, &ch->free_list);
3466 spin_unlock_irqrestore(&ch->spinlock, flags);
3467}
3468
3469/**
3470 * srpt_shutdown_session() - Whether or not a session may be shut down.
3471 */
3472static int srpt_shutdown_session(struct se_session *se_sess)
3473{
3474 return true;
3475}
3476
3477/**
3478 * srpt_close_session() - Forcibly close a session.
3479 *
3480 * Callback function invoked by the TCM core to clean up sessions associated
3481 * with a node ACL when the user invokes
3482 * rmdir /sys/kernel/config/target/$driver/$port/$tpg/acls/$i_port_id
3483 */
3484static void srpt_close_session(struct se_session *se_sess)
3485{
3486 DECLARE_COMPLETION_ONSTACK(release_done);
3487 struct srpt_rdma_ch *ch;
3488 struct srpt_device *sdev;
3489 int res;
3490
3491 ch = se_sess->fabric_sess_ptr;
3492 WARN_ON(ch->sess != se_sess);
3493
3494 pr_debug("ch %p state %d\n", ch, srpt_get_ch_state(ch));
3495
3496 sdev = ch->sport->sdev;
3497 spin_lock_irq(&sdev->spinlock);
3498 BUG_ON(ch->release_done);
3499 ch->release_done = &release_done;
3500 __srpt_close_ch(ch);
3501 spin_unlock_irq(&sdev->spinlock);
3502
3503 res = wait_for_completion_timeout(&release_done, 60 * HZ);
3504 WARN_ON(res <= 0);
3505}
3506
3507/**
3508 * srpt_sess_get_index() - Return the value of scsiAttIntrPortIndex (SCSI-MIB).
3509 *
3510 * A quote from RFC 4455 (SCSI-MIB) about this MIB object:
3511 * This object represents an arbitrary integer used to uniquely identify a
3512 * particular attached remote initiator port to a particular SCSI target port
3513 * within a particular SCSI target device within a particular SCSI instance.
3514 */
3515static u32 srpt_sess_get_index(struct se_session *se_sess)
3516{
3517 return 0;
3518}
3519
3520static void srpt_set_default_node_attrs(struct se_node_acl *nacl)
3521{
3522}
3523
3524static u32 srpt_get_task_tag(struct se_cmd *se_cmd)
3525{
3526 struct srpt_send_ioctx *ioctx;
3527
3528 ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd);
3529 return ioctx->tag;
3530}
3531
3532/* Note: only used from inside debug printk's by the TCM core. */
3533static int srpt_get_tcm_cmd_state(struct se_cmd *se_cmd)
3534{
3535 struct srpt_send_ioctx *ioctx;
3536
3537 ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd);
3538 return srpt_get_cmd_state(ioctx);
3539}
3540
3541/**
3542 * srpt_parse_i_port_id() - Parse an initiator port ID.
3543 * @name: ASCII representation of a 128-bit initiator port ID.
3544 * @i_port_id: Binary 128-bit port ID.
3545 */
3546static int srpt_parse_i_port_id(u8 i_port_id[16], const char *name)
3547{
3548 const char *p;
3549 unsigned len, count, leading_zero_bytes;
3550 int ret, rc;
3551
3552 p = name;
3553 if (strnicmp(p, "0x", 2) == 0)
3554 p += 2;
3555 ret = -EINVAL;
3556 len = strlen(p);
3557 if (len % 2)
3558 goto out;
3559 count = min(len / 2, 16U);
3560 leading_zero_bytes = 16 - count;
3561 memset(i_port_id, 0, leading_zero_bytes);
3562 rc = hex2bin(i_port_id + leading_zero_bytes, p, count);
3563 if (rc < 0)
3564 pr_debug("hex2bin failed for srpt_parse_i_port_id: %d\n", rc);
3565 ret = 0;
3566out:
3567 return ret;
3568}
3569
3570/*
3571 * configfs callback function invoked for
3572 * mkdir /sys/kernel/config/target/$driver/$port/$tpg/acls/$i_port_id
3573 */
3574static struct se_node_acl *srpt_make_nodeacl(struct se_portal_group *tpg,
3575 struct config_group *group,
3576 const char *name)
3577{
3578 struct srpt_port *sport = container_of(tpg, struct srpt_port, port_tpg_1);
3579 struct se_node_acl *se_nacl, *se_nacl_new;
3580 struct srpt_node_acl *nacl;
3581 int ret = 0;
3582 u32 nexus_depth = 1;
3583 u8 i_port_id[16];
3584
3585 if (srpt_parse_i_port_id(i_port_id, name) < 0) {
3586 printk(KERN_ERR "invalid initiator port ID %s\n", name);
3587 ret = -EINVAL;
3588 goto err;
3589 }
3590
3591 se_nacl_new = srpt_alloc_fabric_acl(tpg);
3592 if (!se_nacl_new) {
3593 ret = -ENOMEM;
3594 goto err;
3595 }
3596 /*
3597 * nacl_new may be released by core_tpg_add_initiator_node_acl()
3598 * when converting a node ACL from demo mode to explict
3599 */
3600 se_nacl = core_tpg_add_initiator_node_acl(tpg, se_nacl_new, name,
3601 nexus_depth);
3602 if (IS_ERR(se_nacl)) {
3603 ret = PTR_ERR(se_nacl);
3604 goto err;
3605 }
3606 /* Locate our struct srpt_node_acl and set sdev and i_port_id. */
3607 nacl = container_of(se_nacl, struct srpt_node_acl, nacl);
3608 memcpy(&nacl->i_port_id[0], &i_port_id[0], 16);
3609 nacl->sport = sport;
3610
3611 spin_lock_irq(&sport->port_acl_lock);
3612 list_add_tail(&nacl->list, &sport->port_acl_list);
3613 spin_unlock_irq(&sport->port_acl_lock);
3614
3615 return se_nacl;
3616err:
3617 return ERR_PTR(ret);
3618}
3619
3620/*
3621 * configfs callback function invoked for
3622 * rmdir /sys/kernel/config/target/$driver/$port/$tpg/acls/$i_port_id
3623 */
3624static void srpt_drop_nodeacl(struct se_node_acl *se_nacl)
3625{
3626 struct srpt_node_acl *nacl;
3627 struct srpt_device *sdev;
3628 struct srpt_port *sport;
3629
3630 nacl = container_of(se_nacl, struct srpt_node_acl, nacl);
3631 sport = nacl->sport;
3632 sdev = sport->sdev;
3633 spin_lock_irq(&sport->port_acl_lock);
3634 list_del(&nacl->list);
3635 spin_unlock_irq(&sport->port_acl_lock);
3636 core_tpg_del_initiator_node_acl(&sport->port_tpg_1, se_nacl, 1);
3637 srpt_release_fabric_acl(NULL, se_nacl);
3638}
3639
3640static ssize_t srpt_tpg_attrib_show_srp_max_rdma_size(
3641 struct se_portal_group *se_tpg,
3642 char *page)
3643{
3644 struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
3645
3646 return sprintf(page, "%u\n", sport->port_attrib.srp_max_rdma_size);
3647}
3648
3649static ssize_t srpt_tpg_attrib_store_srp_max_rdma_size(
3650 struct se_portal_group *se_tpg,
3651 const char *page,
3652 size_t count)
3653{
3654 struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
3655 unsigned long val;
3656 int ret;
3657
3658 ret = strict_strtoul(page, 0, &val);
3659 if (ret < 0) {
3660 pr_err("strict_strtoul() failed with ret: %d\n", ret);
3661 return -EINVAL;
3662 }
3663 if (val > MAX_SRPT_RDMA_SIZE) {
3664 pr_err("val: %lu exceeds MAX_SRPT_RDMA_SIZE: %d\n", val,
3665 MAX_SRPT_RDMA_SIZE);
3666 return -EINVAL;
3667 }
3668 if (val < DEFAULT_MAX_RDMA_SIZE) {
3669 pr_err("val: %lu smaller than DEFAULT_MAX_RDMA_SIZE: %d\n",
3670 val, DEFAULT_MAX_RDMA_SIZE);
3671 return -EINVAL;
3672 }
3673 sport->port_attrib.srp_max_rdma_size = val;
3674
3675 return count;
3676}
3677
3678TF_TPG_ATTRIB_ATTR(srpt, srp_max_rdma_size, S_IRUGO | S_IWUSR);
3679
3680static ssize_t srpt_tpg_attrib_show_srp_max_rsp_size(
3681 struct se_portal_group *se_tpg,
3682 char *page)
3683{
3684 struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
3685
3686 return sprintf(page, "%u\n", sport->port_attrib.srp_max_rsp_size);
3687}
3688
3689static ssize_t srpt_tpg_attrib_store_srp_max_rsp_size(
3690 struct se_portal_group *se_tpg,
3691 const char *page,
3692 size_t count)
3693{
3694 struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
3695 unsigned long val;
3696 int ret;
3697
3698 ret = strict_strtoul(page, 0, &val);
3699 if (ret < 0) {
3700 pr_err("strict_strtoul() failed with ret: %d\n", ret);
3701 return -EINVAL;
3702 }
3703 if (val > MAX_SRPT_RSP_SIZE) {
3704 pr_err("val: %lu exceeds MAX_SRPT_RSP_SIZE: %d\n", val,
3705 MAX_SRPT_RSP_SIZE);
3706 return -EINVAL;
3707 }
3708 if (val < MIN_MAX_RSP_SIZE) {
3709 pr_err("val: %lu smaller than MIN_MAX_RSP_SIZE: %d\n", val,
3710 MIN_MAX_RSP_SIZE);
3711 return -EINVAL;
3712 }
3713 sport->port_attrib.srp_max_rsp_size = val;
3714
3715 return count;
3716}
3717
3718TF_TPG_ATTRIB_ATTR(srpt, srp_max_rsp_size, S_IRUGO | S_IWUSR);
3719
3720static ssize_t srpt_tpg_attrib_show_srp_sq_size(
3721 struct se_portal_group *se_tpg,
3722 char *page)
3723{
3724 struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
3725
3726 return sprintf(page, "%u\n", sport->port_attrib.srp_sq_size);
3727}
3728
3729static ssize_t srpt_tpg_attrib_store_srp_sq_size(
3730 struct se_portal_group *se_tpg,
3731 const char *page,
3732 size_t count)
3733{
3734 struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
3735 unsigned long val;
3736 int ret;
3737
3738 ret = strict_strtoul(page, 0, &val);
3739 if (ret < 0) {
3740 pr_err("strict_strtoul() failed with ret: %d\n", ret);
3741 return -EINVAL;
3742 }
3743 if (val > MAX_SRPT_SRQ_SIZE) {
3744 pr_err("val: %lu exceeds MAX_SRPT_SRQ_SIZE: %d\n", val,
3745 MAX_SRPT_SRQ_SIZE);
3746 return -EINVAL;
3747 }
3748 if (val < MIN_SRPT_SRQ_SIZE) {
3749 pr_err("val: %lu smaller than MIN_SRPT_SRQ_SIZE: %d\n", val,
3750 MIN_SRPT_SRQ_SIZE);
3751 return -EINVAL;
3752 }
3753 sport->port_attrib.srp_sq_size = val;
3754
3755 return count;
3756}
3757
3758TF_TPG_ATTRIB_ATTR(srpt, srp_sq_size, S_IRUGO | S_IWUSR);
3759
3760static struct configfs_attribute *srpt_tpg_attrib_attrs[] = {
3761 &srpt_tpg_attrib_srp_max_rdma_size.attr,
3762 &srpt_tpg_attrib_srp_max_rsp_size.attr,
3763 &srpt_tpg_attrib_srp_sq_size.attr,
3764 NULL,
3765};
3766
3767static ssize_t srpt_tpg_show_enable(
3768 struct se_portal_group *se_tpg,
3769 char *page)
3770{
3771 struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
3772
3773 return snprintf(page, PAGE_SIZE, "%d\n", (sport->enabled) ? 1: 0);
3774}
3775
3776static ssize_t srpt_tpg_store_enable(
3777 struct se_portal_group *se_tpg,
3778 const char *page,
3779 size_t count)
3780{
3781 struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
3782 unsigned long tmp;
3783 int ret;
3784
3785 ret = strict_strtoul(page, 0, &tmp);
3786 if (ret < 0) {
3787 printk(KERN_ERR "Unable to extract srpt_tpg_store_enable\n");
3788 return -EINVAL;
3789 }
3790
3791 if ((tmp != 0) && (tmp != 1)) {
3792 printk(KERN_ERR "Illegal value for srpt_tpg_store_enable: %lu\n", tmp);
3793 return -EINVAL;
3794 }
3795 if (tmp == 1)
3796 sport->enabled = true;
3797 else
3798 sport->enabled = false;
3799
3800 return count;
3801}
3802
3803TF_TPG_BASE_ATTR(srpt, enable, S_IRUGO | S_IWUSR);
3804
3805static struct configfs_attribute *srpt_tpg_attrs[] = {
3806 &srpt_tpg_enable.attr,
3807 NULL,
3808};
3809
3810/**
3811 * configfs callback invoked for
3812 * mkdir /sys/kernel/config/target/$driver/$port/$tpg
3813 */
3814static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn,
3815 struct config_group *group,
3816 const char *name)
3817{
3818 struct srpt_port *sport = container_of(wwn, struct srpt_port, port_wwn);
3819 int res;
3820
3821 /* Initialize sport->port_wwn and sport->port_tpg_1 */
3822 res = core_tpg_register(&srpt_target->tf_ops, &sport->port_wwn,
3823 &sport->port_tpg_1, sport, TRANSPORT_TPG_TYPE_NORMAL);
3824 if (res)
3825 return ERR_PTR(res);
3826
3827 return &sport->port_tpg_1;
3828}
3829
3830/**
3831 * configfs callback invoked for
3832 * rmdir /sys/kernel/config/target/$driver/$port/$tpg
3833 */
3834static void srpt_drop_tpg(struct se_portal_group *tpg)
3835{
3836 struct srpt_port *sport = container_of(tpg,
3837 struct srpt_port, port_tpg_1);
3838
3839 sport->enabled = false;
3840 core_tpg_deregister(&sport->port_tpg_1);
3841}
3842
3843/**
3844 * configfs callback invoked for
3845 * mkdir /sys/kernel/config/target/$driver/$port
3846 */
3847static struct se_wwn *srpt_make_tport(struct target_fabric_configfs *tf,
3848 struct config_group *group,
3849 const char *name)
3850{
3851 struct srpt_port *sport;
3852 int ret;
3853
3854 sport = srpt_lookup_port(name);
3855 pr_debug("make_tport(%s)\n", name);
3856 ret = -EINVAL;
3857 if (!sport)
3858 goto err;
3859
3860 return &sport->port_wwn;
3861
3862err:
3863 return ERR_PTR(ret);
3864}
3865
3866/**
3867 * configfs callback invoked for
3868 * rmdir /sys/kernel/config/target/$driver/$port
3869 */
3870static void srpt_drop_tport(struct se_wwn *wwn)
3871{
3872 struct srpt_port *sport = container_of(wwn, struct srpt_port, port_wwn);
3873
3874 pr_debug("drop_tport(%s\n", config_item_name(&sport->port_wwn.wwn_group.cg_item));
3875}
3876
3877static ssize_t srpt_wwn_show_attr_version(struct target_fabric_configfs *tf,
3878 char *buf)
3879{
3880 return scnprintf(buf, PAGE_SIZE, "%s\n", DRV_VERSION);
3881}
3882
3883TF_WWN_ATTR_RO(srpt, version);
3884
3885static struct configfs_attribute *srpt_wwn_attrs[] = {
3886 &srpt_wwn_version.attr,
3887 NULL,
3888};
3889
3890static struct target_core_fabric_ops srpt_template = {
3891 .get_fabric_name = srpt_get_fabric_name,
3892 .get_fabric_proto_ident = srpt_get_fabric_proto_ident,
3893 .tpg_get_wwn = srpt_get_fabric_wwn,
3894 .tpg_get_tag = srpt_get_tag,
3895 .tpg_get_default_depth = srpt_get_default_depth,
3896 .tpg_get_pr_transport_id = srpt_get_pr_transport_id,
3897 .tpg_get_pr_transport_id_len = srpt_get_pr_transport_id_len,
3898 .tpg_parse_pr_out_transport_id = srpt_parse_pr_out_transport_id,
3899 .tpg_check_demo_mode = srpt_check_false,
3900 .tpg_check_demo_mode_cache = srpt_check_true,
3901 .tpg_check_demo_mode_write_protect = srpt_check_true,
3902 .tpg_check_prod_mode_write_protect = srpt_check_false,
3903 .tpg_alloc_fabric_acl = srpt_alloc_fabric_acl,
3904 .tpg_release_fabric_acl = srpt_release_fabric_acl,
3905 .tpg_get_inst_index = srpt_tpg_get_inst_index,
3906 .release_cmd = srpt_release_cmd,
3907 .check_stop_free = srpt_check_stop_free,
3908 .shutdown_session = srpt_shutdown_session,
3909 .close_session = srpt_close_session,
3910 .sess_get_index = srpt_sess_get_index,
3911 .sess_get_initiator_sid = NULL,
3912 .write_pending = srpt_write_pending,
3913 .write_pending_status = srpt_write_pending_status,
3914 .set_default_node_attributes = srpt_set_default_node_attrs,
3915 .get_task_tag = srpt_get_task_tag,
3916 .get_cmd_state = srpt_get_tcm_cmd_state,
3917 .queue_data_in = srpt_queue_response,
3918 .queue_status = srpt_queue_status,
3919 .queue_tm_rsp = srpt_queue_response,
3920 /*
3921 * Setup function pointers for generic logic in
3922 * target_core_fabric_configfs.c
3923 */
3924 .fabric_make_wwn = srpt_make_tport,
3925 .fabric_drop_wwn = srpt_drop_tport,
3926 .fabric_make_tpg = srpt_make_tpg,
3927 .fabric_drop_tpg = srpt_drop_tpg,
3928 .fabric_post_link = NULL,
3929 .fabric_pre_unlink = NULL,
3930 .fabric_make_np = NULL,
3931 .fabric_drop_np = NULL,
3932 .fabric_make_nodeacl = srpt_make_nodeacl,
3933 .fabric_drop_nodeacl = srpt_drop_nodeacl,
3934};
3935
3936/**
3937 * srpt_init_module() - Kernel module initialization.
3938 *
3939 * Note: Since ib_register_client() registers callback functions, and since at
3940 * least one of these callback functions (srpt_add_one()) calls target core
3941 * functions, this driver must be registered with the target core before
3942 * ib_register_client() is called.
3943 */
3944static int __init srpt_init_module(void)
3945{
3946 int ret;
3947
3948 ret = -EINVAL;
3949 if (srp_max_req_size < MIN_MAX_REQ_SIZE) {
3950 printk(KERN_ERR "invalid value %d for kernel module parameter"
3951 " srp_max_req_size -- must be at least %d.\n",
3952 srp_max_req_size, MIN_MAX_REQ_SIZE);
3953 goto out;
3954 }
3955
3956 if (srpt_srq_size < MIN_SRPT_SRQ_SIZE
3957 || srpt_srq_size > MAX_SRPT_SRQ_SIZE) {
3958 printk(KERN_ERR "invalid value %d for kernel module parameter"
3959 " srpt_srq_size -- must be in the range [%d..%d].\n",
3960 srpt_srq_size, MIN_SRPT_SRQ_SIZE, MAX_SRPT_SRQ_SIZE);
3961 goto out;
3962 }
3963
3964 srpt_target = target_fabric_configfs_init(THIS_MODULE, "srpt");
3965 if (IS_ERR(srpt_target)) {
3966 printk(KERN_ERR "couldn't register\n");
3967 ret = PTR_ERR(srpt_target);
3968 goto out;
3969 }
3970
3971 srpt_target->tf_ops = srpt_template;
3972
3973 /*
3974 * Set up default attribute lists.
3975 */
3976 srpt_target->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = srpt_wwn_attrs;
3977 srpt_target->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = srpt_tpg_attrs;
3978 srpt_target->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = srpt_tpg_attrib_attrs;
3979 srpt_target->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL;
3980 srpt_target->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;
3981 srpt_target->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL;
3982 srpt_target->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;
3983 srpt_target->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL;
3984 srpt_target->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL;
3985
3986 ret = target_fabric_configfs_register(srpt_target);
3987 if (ret < 0) {
3988 printk(KERN_ERR "couldn't register\n");
3989 goto out_free_target;
3990 }
3991
3992 ret = ib_register_client(&srpt_client);
3993 if (ret) {
3994 printk(KERN_ERR "couldn't register IB client\n");
3995 goto out_unregister_target;
3996 }
3997
3998 return 0;
3999
4000out_unregister_target:
4001 target_fabric_configfs_deregister(srpt_target);
4002 srpt_target = NULL;
4003out_free_target:
4004 if (srpt_target)
4005 target_fabric_configfs_free(srpt_target);
4006out:
4007 return ret;
4008}
4009
4010static void __exit srpt_cleanup_module(void)
4011{
4012 ib_unregister_client(&srpt_client);
4013 target_fabric_configfs_deregister(srpt_target);
4014 srpt_target = NULL;
4015}
4016
4017module_init(srpt_init_module);
4018module_exit(srpt_cleanup_module);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
deleted file mode 100644
index 4caf55cda7b..00000000000
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ /dev/null
@@ -1,442 +0,0 @@
1/*
2 * Copyright (c) 2006 - 2009 Mellanox Technology Inc. All rights reserved.
3 * Copyright (C) 2009 - 2010 Bart Van Assche <bvanassche@acm.org>.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 */
34
35#ifndef IB_SRPT_H
36#define IB_SRPT_H
37
38#include <linux/types.h>
39#include <linux/list.h>
40#include <linux/wait.h>
41
42#include <rdma/ib_verbs.h>
43#include <rdma/ib_sa.h>
44#include <rdma/ib_cm.h>
45
46#include <scsi/srp.h>
47
48#include "ib_dm_mad.h"
49
50/*
51 * The prefix the ServiceName field must start with in the device management
52 * ServiceEntries attribute pair. See also the SRP specification.
53 */
54#define SRP_SERVICE_NAME_PREFIX "SRP.T10:"
55
56enum {
57 /*
58 * SRP IOControllerProfile attributes for SRP target ports that have
59 * not been defined in <scsi/srp.h>. Source: section B.7, table B.7
60 * in the SRP specification.
61 */
62 SRP_PROTOCOL = 0x0108,
63 SRP_PROTOCOL_VERSION = 0x0001,
64 SRP_IO_SUBCLASS = 0x609e,
65 SRP_SEND_TO_IOC = 0x01,
66 SRP_SEND_FROM_IOC = 0x02,
67 SRP_RDMA_READ_FROM_IOC = 0x08,
68 SRP_RDMA_WRITE_FROM_IOC = 0x20,
69
70 /*
71 * srp_login_cmd.req_flags bitmasks. See also table 9 in the SRP
72 * specification.
73 */
74 SRP_MTCH_ACTION = 0x03, /* MULTI-CHANNEL ACTION */
75 SRP_LOSOLNT = 0x10, /* logout solicited notification */
76 SRP_CRSOLNT = 0x20, /* credit request solicited notification */
77 SRP_AESOLNT = 0x40, /* asynchronous event solicited notification */
78
79 /*
80 * srp_cmd.sol_nt / srp_tsk_mgmt.sol_not bitmasks. See also tables
81 * 18 and 20 in the SRP specification.
82 */
83 SRP_SCSOLNT = 0x02, /* SCSOLNT = successful solicited notification */
84 SRP_UCSOLNT = 0x04, /* UCSOLNT = unsuccessful solicited notification */
85
86 /*
87 * srp_rsp.sol_not / srp_t_logout.sol_not bitmasks. See also tables
88 * 16 and 22 in the SRP specification.
89 */
90 SRP_SOLNT = 0x01, /* SOLNT = solicited notification */
91
92 /* See also table 24 in the SRP specification. */
93 SRP_TSK_MGMT_SUCCESS = 0x00,
94 SRP_TSK_MGMT_FUNC_NOT_SUPP = 0x04,
95 SRP_TSK_MGMT_FAILED = 0x05,
96
97 /* See also table 21 in the SRP specification. */
98 SRP_CMD_SIMPLE_Q = 0x0,
99 SRP_CMD_HEAD_OF_Q = 0x1,
100 SRP_CMD_ORDERED_Q = 0x2,
101 SRP_CMD_ACA = 0x4,
102
103 SRP_LOGIN_RSP_MULTICHAN_NO_CHAN = 0x0,
104 SRP_LOGIN_RSP_MULTICHAN_TERMINATED = 0x1,
105 SRP_LOGIN_RSP_MULTICHAN_MAINTAINED = 0x2,
106
107 SRPT_DEF_SG_TABLESIZE = 128,
108 SRPT_DEF_SG_PER_WQE = 16,
109
110 MIN_SRPT_SQ_SIZE = 16,
111 DEF_SRPT_SQ_SIZE = 4096,
112 SRPT_RQ_SIZE = 128,
113 MIN_SRPT_SRQ_SIZE = 4,
114 DEFAULT_SRPT_SRQ_SIZE = 4095,
115 MAX_SRPT_SRQ_SIZE = 65535,
116 MAX_SRPT_RDMA_SIZE = 1U << 24,
117 MAX_SRPT_RSP_SIZE = 1024,
118
119 MIN_MAX_REQ_SIZE = 996,
120 DEFAULT_MAX_REQ_SIZE
121 = sizeof(struct srp_cmd)/*48*/
122 + sizeof(struct srp_indirect_buf)/*20*/
123 + 128 * sizeof(struct srp_direct_buf)/*16*/,
124
125 MIN_MAX_RSP_SIZE = sizeof(struct srp_rsp)/*36*/ + 4,
126 DEFAULT_MAX_RSP_SIZE = 256, /* leaves 220 bytes for sense data */
127
128 DEFAULT_MAX_RDMA_SIZE = 65536,
129};
130
131enum srpt_opcode {
132 SRPT_RECV,
133 SRPT_SEND,
134 SRPT_RDMA_MID,
135 SRPT_RDMA_ABORT,
136 SRPT_RDMA_READ_LAST,
137 SRPT_RDMA_WRITE_LAST,
138};
139
140static inline u64 encode_wr_id(u8 opcode, u32 idx)
141{
142 return ((u64)opcode << 32) | idx;
143}
144static inline enum srpt_opcode opcode_from_wr_id(u64 wr_id)
145{
146 return wr_id >> 32;
147}
148static inline u32 idx_from_wr_id(u64 wr_id)
149{
150 return (u32)wr_id;
151}
152
153struct rdma_iu {
154 u64 raddr;
155 u32 rkey;
156 struct ib_sge *sge;
157 u32 sge_cnt;
158 int mem_id;
159};
160
161/**
162 * enum srpt_command_state - SCSI command state managed by SRPT.
163 * @SRPT_STATE_NEW: New command arrived and is being processed.
164 * @SRPT_STATE_NEED_DATA: Processing a write or bidir command and waiting
165 * for data arrival.
166 * @SRPT_STATE_DATA_IN: Data for the write or bidir command arrived and is
167 * being processed.
168 * @SRPT_STATE_CMD_RSP_SENT: SRP_RSP for SRP_CMD has been sent.
169 * @SRPT_STATE_MGMT: Processing a SCSI task management command.
170 * @SRPT_STATE_MGMT_RSP_SENT: SRP_RSP for SRP_TSK_MGMT has been sent.
171 * @SRPT_STATE_DONE: Command processing finished successfully, command
172 * processing has been aborted or command processing
173 * failed.
174 */
175enum srpt_command_state {
176 SRPT_STATE_NEW = 0,
177 SRPT_STATE_NEED_DATA = 1,
178 SRPT_STATE_DATA_IN = 2,
179 SRPT_STATE_CMD_RSP_SENT = 3,
180 SRPT_STATE_MGMT = 4,
181 SRPT_STATE_MGMT_RSP_SENT = 5,
182 SRPT_STATE_DONE = 6,
183};
184
185/**
186 * struct srpt_ioctx - Shared SRPT I/O context information.
187 * @buf: Pointer to the buffer.
188 * @dma: DMA address of the buffer.
189 * @index: Index of the I/O context in its ioctx_ring array.
190 */
191struct srpt_ioctx {
192 void *buf;
193 dma_addr_t dma;
194 uint32_t index;
195};
196
197/**
198 * struct srpt_recv_ioctx - SRPT receive I/O context.
199 * @ioctx: See above.
200 * @wait_list: Node for insertion in srpt_rdma_ch.cmd_wait_list.
201 */
202struct srpt_recv_ioctx {
203 struct srpt_ioctx ioctx;
204 struct list_head wait_list;
205};
206
207/**
208 * struct srpt_send_ioctx - SRPT send I/O context.
209 * @ioctx: See above.
210 * @ch: Channel pointer.
211 * @free_list: Node in srpt_rdma_ch.free_list.
212 * @n_rbuf: Number of data buffers in the received SRP command.
213 * @rbufs: Pointer to SRP data buffer array.
214 * @single_rbuf: SRP data buffer if the command has only a single buffer.
215 * @sg: Pointer to sg-list associated with this I/O context.
216 * @sg_cnt: SG-list size.
217 * @mapped_sg_count: ib_dma_map_sg() return value.
218 * @n_rdma_ius: Number of elements in the rdma_ius array.
219 * @rdma_ius: Array with information about the RDMA mapping.
220 * @tag: Tag of the received SRP information unit.
221 * @spinlock: Protects 'state'.
222 * @state: I/O context state.
223 * @rdma_aborted: If initiating a multipart RDMA transfer failed, whether
224 * the already initiated transfers have finished.
225 * @cmd: Target core command data structure.
226 * @sense_data: SCSI sense data.
227 */
228struct srpt_send_ioctx {
229 struct srpt_ioctx ioctx;
230 struct srpt_rdma_ch *ch;
231 struct rdma_iu *rdma_ius;
232 struct srp_direct_buf *rbufs;
233 struct srp_direct_buf single_rbuf;
234 struct scatterlist *sg;
235 struct list_head free_list;
236 spinlock_t spinlock;
237 enum srpt_command_state state;
238 bool rdma_aborted;
239 struct se_cmd cmd;
240 struct completion tx_done;
241 u64 tag;
242 int sg_cnt;
243 int mapped_sg_count;
244 u16 n_rdma_ius;
245 u8 n_rdma;
246 u8 n_rbuf;
247 bool queue_status_only;
248 u8 sense_data[SCSI_SENSE_BUFFERSIZE];
249};
250
251/**
252 * enum rdma_ch_state - SRP channel state.
253 * @CH_CONNECTING: QP is in RTR state; waiting for RTU.
254 * @CH_LIVE: QP is in RTS state.
255 * @CH_DISCONNECTING: DREQ has been received; waiting for DREP
256 * or DREQ has been send and waiting for DREP
257 * or .
258 * @CH_DRAINING: QP is in ERR state; waiting for last WQE event.
259 * @CH_RELEASING: Last WQE event has been received; releasing resources.
260 */
261enum rdma_ch_state {
262 CH_CONNECTING,
263 CH_LIVE,
264 CH_DISCONNECTING,
265 CH_DRAINING,
266 CH_RELEASING
267};
268
269/**
270 * struct srpt_rdma_ch - RDMA channel.
271 * @wait_queue: Allows the kernel thread to wait for more work.
272 * @thread: Kernel thread that processes the IB queues associated with
273 * the channel.
274 * @cm_id: IB CM ID associated with the channel.
275 * @qp: IB queue pair used for communicating over this channel.
276 * @cq: IB completion queue for this channel.
277 * @rq_size: IB receive queue size.
278 * @rsp_size IB response message size in bytes.
279 * @sq_wr_avail: number of work requests available in the send queue.
280 * @sport: pointer to the information of the HCA port used by this
281 * channel.
282 * @i_port_id: 128-bit initiator port identifier copied from SRP_LOGIN_REQ.
283 * @t_port_id: 128-bit target port identifier copied from SRP_LOGIN_REQ.
284 * @max_ti_iu_len: maximum target-to-initiator information unit length.
285 * @req_lim: request limit: maximum number of requests that may be sent
286 * by the initiator without having received a response.
287 * @req_lim_delta: Number of credits not yet sent back to the initiator.
288 * @spinlock: Protects free_list and state.
289 * @free_list: Head of list with free send I/O contexts.
290 * @state: channel state. See also enum rdma_ch_state.
291 * @ioctx_ring: Send ring.
292 * @wc: IB work completion array for srpt_process_completion().
293 * @list: Node for insertion in the srpt_device.rch_list list.
294 * @cmd_wait_list: List of SCSI commands that arrived before the RTU event. This
295 * list contains struct srpt_ioctx elements and is protected
296 * against concurrent modification by the cm_id spinlock.
297 * @sess: Session information associated with this SRP channel.
298 * @sess_name: Session name.
299 * @release_work: Allows scheduling of srpt_release_channel().
300 * @release_done: Enables waiting for srpt_release_channel() completion.
301 */
302struct srpt_rdma_ch {
303 wait_queue_head_t wait_queue;
304 struct task_struct *thread;
305 struct ib_cm_id *cm_id;
306 struct ib_qp *qp;
307 struct ib_cq *cq;
308 int rq_size;
309 u32 rsp_size;
310 atomic_t sq_wr_avail;
311 struct srpt_port *sport;
312 u8 i_port_id[16];
313 u8 t_port_id[16];
314 int max_ti_iu_len;
315 atomic_t req_lim;
316 atomic_t req_lim_delta;
317 spinlock_t spinlock;
318 struct list_head free_list;
319 enum rdma_ch_state state;
320 struct srpt_send_ioctx **ioctx_ring;
321 struct ib_wc wc[16];
322 struct list_head list;
323 struct list_head cmd_wait_list;
324 struct se_session *sess;
325 u8 sess_name[36];
326 struct work_struct release_work;
327 struct completion *release_done;
328};
329
330/**
331 * struct srpt_port_attib - Attributes for SRPT port
332 * @srp_max_rdma_size: Maximum size of SRP RDMA transfers for new connections.
333 * @srp_max_rsp_size: Maximum size of SRP response messages in bytes.
334 * @srp_sq_size: Shared receive queue (SRQ) size.
335 */
336struct srpt_port_attrib {
337 u32 srp_max_rdma_size;
338 u32 srp_max_rsp_size;
339 u32 srp_sq_size;
340};
341
342/**
343 * struct srpt_port - Information associated by SRPT with a single IB port.
344 * @sdev: backpointer to the HCA information.
345 * @mad_agent: per-port management datagram processing information.
346 * @enabled: Whether or not this target port is enabled.
347 * @port_guid: ASCII representation of Port GUID
348 * @port: one-based port number.
349 * @sm_lid: cached value of the port's sm_lid.
350 * @lid: cached value of the port's lid.
351 * @gid: cached value of the port's gid.
352 * @port_acl_lock spinlock for port_acl_list:
353 * @work: work structure for refreshing the aforementioned cached values.
354 * @port_tpg_1 Target portal group = 1 data.
355 * @port_wwn: Target core WWN data.
356 * @port_acl_list: Head of the list with all node ACLs for this port.
357 */
358struct srpt_port {
359 struct srpt_device *sdev;
360 struct ib_mad_agent *mad_agent;
361 bool enabled;
362 u8 port_guid[64];
363 u8 port;
364 u16 sm_lid;
365 u16 lid;
366 union ib_gid gid;
367 spinlock_t port_acl_lock;
368 struct work_struct work;
369 struct se_portal_group port_tpg_1;
370 struct se_wwn port_wwn;
371 struct list_head port_acl_list;
372 struct srpt_port_attrib port_attrib;
373};
374
375/**
376 * struct srpt_device - Information associated by SRPT with a single HCA.
377 * @device: Backpointer to the struct ib_device managed by the IB core.
378 * @pd: IB protection domain.
379 * @mr: L_Key (local key) with write access to all local memory.
380 * @srq: Per-HCA SRQ (shared receive queue).
381 * @cm_id: Connection identifier.
382 * @dev_attr: Attributes of the InfiniBand device as obtained during the
383 * ib_client.add() callback.
384 * @srq_size: SRQ size.
385 * @ioctx_ring: Per-HCA SRQ.
386 * @rch_list: Per-device channel list -- see also srpt_rdma_ch.list.
387 * @ch_releaseQ: Enables waiting for removal from rch_list.
388 * @spinlock: Protects rch_list and tpg.
389 * @port: Information about the ports owned by this HCA.
390 * @event_handler: Per-HCA asynchronous IB event handler.
391 * @list: Node in srpt_dev_list.
392 */
393struct srpt_device {
394 struct ib_device *device;
395 struct ib_pd *pd;
396 struct ib_mr *mr;
397 struct ib_srq *srq;
398 struct ib_cm_id *cm_id;
399 struct ib_device_attr dev_attr;
400 int srq_size;
401 struct srpt_recv_ioctx **ioctx_ring;
402 struct list_head rch_list;
403 wait_queue_head_t ch_releaseQ;
404 spinlock_t spinlock;
405 struct srpt_port port[2];
406 struct ib_event_handler event_handler;
407 struct list_head list;
408};
409
410/**
411 * struct srpt_node_acl - Per-initiator ACL data (managed via configfs).
412 * @i_port_id: 128-bit SRP initiator port ID.
413 * @sport: port information.
414 * @nacl: Target core node ACL information.
415 * @list: Element of the per-HCA ACL list.
416 */
417struct srpt_node_acl {
418 u8 i_port_id[16];
419 struct srpt_port *sport;
420 struct se_node_acl nacl;
421 struct list_head list;
422};
423
424/*
425 * SRP-releated SCSI persistent reservation definitions.
426 *
427 * See also SPC4r28, section 7.6.1 (Protocol specific parameters introduction).
428 * See also SPC4r28, section 7.6.4.5 (TransportID for initiator ports using
429 * SCSI over an RDMA interface).
430 */
431
432enum {
433 SCSI_TRANSPORTID_PROTOCOLID_SRP = 4,
434};
435
436struct spc_rdma_transport_id {
437 uint8_t protocol_identifier;
438 uint8_t reserved[7];
439 uint8_t i_port_id[16];
440};
441
442#endif /* IB_SRPT_H */