aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-12 19:45:40 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-12 19:45:40 -0400
commit0cdf6990e992902ae59cbc625d28cb41390f378e (patch)
tree0c01cf792be5f36ea34064036005f424ab95a571 /drivers/infiniband
parentde081fa517fed81b0369f2e90ca87c30182879c8 (diff)
parentcec7c893d8654723028f09d33341e42673558057 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (76 commits) IB: Update MAINTAINERS with Hal's new email address IB/mlx4: Implement query SRQ IB/mlx4: Implement query QP IB/cm: Send no match if a SIDR REQ does not match a listen IB/cm: Fix handling of duplicate SIDR REQs IB/cm: cm_msgs.h should include ib_cm.h IB/cm: Include HCA ACK delay in local ACK timeout IB/cm: Use spin_lock_irq() instead of spin_lock_irqsave() when possible IB/sa: Make sure SA queries use default P_Key IPoIB: Recycle loopback skbs instead of freeing and reallocating IB/mthca: Replace memset(<addr>, 0, PAGE_SIZE) with clear_page(<addr>) IPoIB/cm: Fix warning if IPV6 is not enabled IB/core: Take sizeof the correct pointer when calling kmalloc() IB/ehca: Improve latency by unlocking after triggering the hardware IB/ehca: Notify consumers of LID/PKEY/SM changes after nondisruptive events IB/ehca: Return QP pointer in poll_cq() IB/ehca: Change idr spinlocks into rwlocks IB/ehca: Refactor sync between completions and destroy_cq using atomic_t IB/ehca: Lock renaming, static initializers IB/ehca: Report RDMA atomic attributes in query_qp() ...
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig15
-rw-r--r--drivers/infiniband/core/agent.c19
-rw-r--r--drivers/infiniband/core/cm.c247
-rw-r--r--drivers/infiniband/core/cm_msgs.h1
-rw-r--r--drivers/infiniband/core/cma.c1
-rw-r--r--drivers/infiniband/core/mad.c50
-rw-r--r--drivers/infiniband/core/multicast.c2
-rw-r--r--drivers/infiniband/core/sa.h2
-rw-r--r--drivers/infiniband/core/sa_query.c87
-rw-r--r--drivers/infiniband/core/smi.c16
-rw-r--r--drivers/infiniband/core/smi.h2
-rw-r--r--drivers/infiniband/core/sysfs.c2
-rw-r--r--drivers/infiniband/core/ucm.c1
-rw-r--r--drivers/infiniband/core/umem.c1
-rw-r--r--drivers/infiniband/hw/amso1100/Kconfig2
-rw-r--r--drivers/infiniband/hw/cxgb3/Kconfig2
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c6
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_wr.h3
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c108
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.h1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c7
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c7
-rw-r--r--drivers/infiniband/hw/ehca/Kconfig2
-rw-r--r--drivers/infiniband/hw/ehca/ehca_av.c6
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h75
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes_pSeries.h4
-rw-r--r--drivers/infiniband/hw/ehca/ehca_cq.c50
-rw-r--r--drivers/infiniband/hw/ehca/ehca_hca.c61
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c140
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.h1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_iverbs.h18
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c98
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c751
-rw-r--r--drivers/infiniband/hw/ehca/ehca_reqs.c85
-rw-r--r--drivers/infiniband/hw/ehca/ehca_tools.h1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_uverbs.c13
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.c58
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.h1
-rw-r--r--drivers/infiniband/hw/ehca/hipz_hw.h19
-rw-r--r--drivers/infiniband/hw/ehca/ipz_pt_fn.h28
-rw-r--r--drivers/infiniband/hw/ipath/Kconfig2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_common.h33
-rw-r--r--drivers/infiniband/hw/ipath/ipath_cq.c7
-rw-r--r--drivers/infiniband/hw/ipath/ipath_debug.h2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_diag.c41
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c187
-rw-r--r--drivers/infiniband/hw/ipath/ipath_eeprom.c303
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c205
-rw-r--r--drivers/infiniband/hw/ipath/ipath_fs.c9
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6110.c101
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6120.c92
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c26
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c141
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h85
-rw-r--r--drivers/infiniband/hw/ipath/ipath_keys.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_layer.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_layer.h2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mad.c11
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mmap.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mr.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c19
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c116
-rw-r--r--drivers/infiniband/hw/ipath/ipath_registers.h2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c36
-rw-r--r--drivers/infiniband/hw/ipath/ipath_srq.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_stats.c25
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sysfs.c43
-rw-r--r--drivers/infiniband/hw/ipath/ipath_uc.c9
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c6
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_pages.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c29
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h3
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs_mcast.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_wc_ppc64.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_wc_x86_64.c29
-rw-r--r--drivers/infiniband/hw/mlx4/Kconfig1
-rw-r--r--drivers/infiniband/hw/mlx4/main.c6
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h4
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c137
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c18
-rw-r--r--drivers/infiniband/hw/mthca/Kconfig2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_allocator.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/Kconfig2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c33
-rw-r--r--drivers/infiniband/ulp/iser/Kconfig2
-rw-r--r--drivers/infiniband/ulp/srp/Kconfig2
88 files changed, 2738 insertions, 1052 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 994decc7bcf2..a193dfbf99d2 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -1,14 +1,14 @@
1menu "InfiniBand support" 1menuconfig INFINIBAND
2 depends on HAS_IOMEM
3
4config INFINIBAND
5 depends on PCI || BROKEN
6 tristate "InfiniBand support" 2 tristate "InfiniBand support"
3 depends on PCI || BROKEN
4 depends on HAS_IOMEM
7 ---help--- 5 ---help---
8 Core support for InfiniBand (IB). Make sure to also select 6 Core support for InfiniBand (IB). Make sure to also select
9 any protocols you wish to use as well as drivers for your 7 any protocols you wish to use as well as drivers for your
10 InfiniBand hardware. 8 InfiniBand hardware.
11 9
10if INFINIBAND
11
12config INFINIBAND_USER_MAD 12config INFINIBAND_USER_MAD
13 tristate "InfiniBand userspace MAD support" 13 tristate "InfiniBand userspace MAD support"
14 depends on INFINIBAND 14 depends on INFINIBAND
@@ -20,7 +20,6 @@ config INFINIBAND_USER_MAD
20 20
21config INFINIBAND_USER_ACCESS 21config INFINIBAND_USER_ACCESS
22 tristate "InfiniBand userspace access (verbs and CM)" 22 tristate "InfiniBand userspace access (verbs and CM)"
23 depends on INFINIBAND
24 ---help--- 23 ---help---
25 Userspace InfiniBand access support. This enables the 24 Userspace InfiniBand access support. This enables the
26 kernel side of userspace verbs and the userspace 25 kernel side of userspace verbs and the userspace
@@ -37,7 +36,7 @@ config INFINIBAND_USER_MEM
37 36
38config INFINIBAND_ADDR_TRANS 37config INFINIBAND_ADDR_TRANS
39 bool 38 bool
40 depends on INFINIBAND && INET 39 depends on INET
41 default y 40 default y
42 41
43source "drivers/infiniband/hw/mthca/Kconfig" 42source "drivers/infiniband/hw/mthca/Kconfig"
@@ -54,4 +53,4 @@ source "drivers/infiniband/ulp/srp/Kconfig"
54 53
55source "drivers/infiniband/ulp/iser/Kconfig" 54source "drivers/infiniband/ulp/iser/Kconfig"
56 55
57endmenu 56endif # INFINIBAND
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index ecd1a3057c61..db2633e4aae6 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -3,7 +3,7 @@
3 * Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved. 3 * Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved.
4 * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. 4 * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved.
5 * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved. 5 * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved.
6 * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. 6 * Copyright (c) 2004-2007 Voltaire Corporation. All rights reserved.
7 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 7 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
8 * 8 *
9 * This software is available to you under a choice of one of two 9 * This software is available to you under a choice of one of two
@@ -34,7 +34,6 @@
34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 * SOFTWARE. 35 * SOFTWARE.
36 * 36 *
37 * $Id: agent.c 1389 2004-12-27 22:56:47Z roland $
38 */ 37 */
39 38
40#include <linux/slab.h> 39#include <linux/slab.h>
@@ -42,6 +41,7 @@
42 41
43#include "agent.h" 42#include "agent.h"
44#include "smi.h" 43#include "smi.h"
44#include "mad_priv.h"
45 45
46#define SPFX "ib_agent: " 46#define SPFX "ib_agent: "
47 47
@@ -87,8 +87,13 @@ int agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
87 struct ib_mad_send_buf *send_buf; 87 struct ib_mad_send_buf *send_buf;
88 struct ib_ah *ah; 88 struct ib_ah *ah;
89 int ret; 89 int ret;
90 struct ib_mad_send_wr_private *mad_send_wr;
91
92 if (device->node_type == RDMA_NODE_IB_SWITCH)
93 port_priv = ib_get_agent_port(device, 0);
94 else
95 port_priv = ib_get_agent_port(device, port_num);
90 96
91 port_priv = ib_get_agent_port(device, port_num);
92 if (!port_priv) { 97 if (!port_priv) {
93 printk(KERN_ERR SPFX "Unable to find port agent\n"); 98 printk(KERN_ERR SPFX "Unable to find port agent\n");
94 return -ENODEV; 99 return -ENODEV;
@@ -113,6 +118,14 @@ int agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
113 118
114 memcpy(send_buf->mad, mad, sizeof *mad); 119 memcpy(send_buf->mad, mad, sizeof *mad);
115 send_buf->ah = ah; 120 send_buf->ah = ah;
121
122 if (device->node_type == RDMA_NODE_IB_SWITCH) {
123 mad_send_wr = container_of(send_buf,
124 struct ib_mad_send_wr_private,
125 send_buf);
126 mad_send_wr->send_wr.wr.ud.port_num = port_num;
127 }
128
116 if ((ret = ib_post_send_mad(send_buf, NULL))) { 129 if ((ret = ib_post_send_mad(send_buf, NULL))) {
117 printk(KERN_ERR SPFX "ib_post_send_mad error:%d\n", ret); 130 printk(KERN_ERR SPFX "ib_post_send_mad error:%d\n", ret);
118 goto err2; 131 goto err2;
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 40c004a2697e..9820c67ba47d 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -87,6 +87,7 @@ struct cm_port {
87struct cm_device { 87struct cm_device {
88 struct list_head list; 88 struct list_head list;
89 struct ib_device *device; 89 struct ib_device *device;
90 u8 ack_delay;
90 struct cm_port port[0]; 91 struct cm_port port[0];
91}; 92};
92 93
@@ -95,7 +96,7 @@ struct cm_av {
95 union ib_gid dgid; 96 union ib_gid dgid;
96 struct ib_ah_attr ah_attr; 97 struct ib_ah_attr ah_attr;
97 u16 pkey_index; 98 u16 pkey_index;
98 u8 packet_life_time; 99 u8 timeout;
99}; 100};
100 101
101struct cm_work { 102struct cm_work {
@@ -154,6 +155,7 @@ struct cm_id_private {
154 u8 retry_count; 155 u8 retry_count;
155 u8 rnr_retry_count; 156 u8 rnr_retry_count;
156 u8 service_timeout; 157 u8 service_timeout;
158 u8 target_ack_delay;
157 159
158 struct list_head work_list; 160 struct list_head work_list;
159 atomic_t work_count; 161 atomic_t work_count;
@@ -293,7 +295,7 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
293 av->port = port; 295 av->port = port;
294 ib_init_ah_from_path(cm_dev->device, port->port_num, path, 296 ib_init_ah_from_path(cm_dev->device, port->port_num, path,
295 &av->ah_attr); 297 &av->ah_attr);
296 av->packet_life_time = path->packet_life_time; 298 av->timeout = path->packet_life_time + 1;
297 return 0; 299 return 0;
298} 300}
299 301
@@ -318,12 +320,10 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv)
318 320
319static void cm_free_id(__be32 local_id) 321static void cm_free_id(__be32 local_id)
320{ 322{
321 unsigned long flags; 323 spin_lock_irq(&cm.lock);
322
323 spin_lock_irqsave(&cm.lock, flags);
324 idr_remove(&cm.local_id_table, 324 idr_remove(&cm.local_id_table,
325 (__force int) (local_id ^ cm.random_id_operand)); 325 (__force int) (local_id ^ cm.random_id_operand));
326 spin_unlock_irqrestore(&cm.lock, flags); 326 spin_unlock_irq(&cm.lock);
327} 327}
328 328
329static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id) 329static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
@@ -345,11 +345,10 @@ static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
345static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id) 345static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
346{ 346{
347 struct cm_id_private *cm_id_priv; 347 struct cm_id_private *cm_id_priv;
348 unsigned long flags;
349 348
350 spin_lock_irqsave(&cm.lock, flags); 349 spin_lock_irq(&cm.lock);
351 cm_id_priv = cm_get_id(local_id, remote_id); 350 cm_id_priv = cm_get_id(local_id, remote_id);
352 spin_unlock_irqrestore(&cm.lock, flags); 351 spin_unlock_irq(&cm.lock);
353 352
354 return cm_id_priv; 353 return cm_id_priv;
355} 354}
@@ -646,6 +645,25 @@ static inline int cm_convert_to_ms(int iba_time)
646 return 1 << max(iba_time - 8, 0); 645 return 1 << max(iba_time - 8, 0);
647} 646}
648 647
648/*
649 * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time
650 * Because of how ack_timeout is stored, adding one doubles the timeout.
651 * To avoid large timeouts, select the max(ack_delay, life_time + 1), and
652 * increment it (round up) only if the other is within 50%.
653 */
654static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
655{
656 int ack_timeout = packet_life_time + 1;
657
658 if (ack_timeout >= ca_ack_delay)
659 ack_timeout += (ca_ack_delay >= (ack_timeout - 1));
660 else
661 ack_timeout = ca_ack_delay +
662 (ack_timeout >= (ca_ack_delay - 1));
663
664 return min(31, ack_timeout);
665}
666
649static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info) 667static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
650{ 668{
651 if (timewait_info->inserted_remote_id) { 669 if (timewait_info->inserted_remote_id) {
@@ -689,7 +707,7 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
689 * timewait before notifying the user that we've exited timewait. 707 * timewait before notifying the user that we've exited timewait.
690 */ 708 */
691 cm_id_priv->id.state = IB_CM_TIMEWAIT; 709 cm_id_priv->id.state = IB_CM_TIMEWAIT;
692 wait_time = cm_convert_to_ms(cm_id_priv->av.packet_life_time + 1); 710 wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
693 queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work, 711 queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
694 msecs_to_jiffies(wait_time)); 712 msecs_to_jiffies(wait_time));
695 cm_id_priv->timewait_info = NULL; 713 cm_id_priv->timewait_info = NULL;
@@ -713,31 +731,30 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
713{ 731{
714 struct cm_id_private *cm_id_priv; 732 struct cm_id_private *cm_id_priv;
715 struct cm_work *work; 733 struct cm_work *work;
716 unsigned long flags;
717 734
718 cm_id_priv = container_of(cm_id, struct cm_id_private, id); 735 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
719retest: 736retest:
720 spin_lock_irqsave(&cm_id_priv->lock, flags); 737 spin_lock_irq(&cm_id_priv->lock);
721 switch (cm_id->state) { 738 switch (cm_id->state) {
722 case IB_CM_LISTEN: 739 case IB_CM_LISTEN:
723 cm_id->state = IB_CM_IDLE; 740 cm_id->state = IB_CM_IDLE;
724 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 741 spin_unlock_irq(&cm_id_priv->lock);
725 spin_lock_irqsave(&cm.lock, flags); 742 spin_lock_irq(&cm.lock);
726 rb_erase(&cm_id_priv->service_node, &cm.listen_service_table); 743 rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
727 spin_unlock_irqrestore(&cm.lock, flags); 744 spin_unlock_irq(&cm.lock);
728 break; 745 break;
729 case IB_CM_SIDR_REQ_SENT: 746 case IB_CM_SIDR_REQ_SENT:
730 cm_id->state = IB_CM_IDLE; 747 cm_id->state = IB_CM_IDLE;
731 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); 748 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
732 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 749 spin_unlock_irq(&cm_id_priv->lock);
733 break; 750 break;
734 case IB_CM_SIDR_REQ_RCVD: 751 case IB_CM_SIDR_REQ_RCVD:
735 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 752 spin_unlock_irq(&cm_id_priv->lock);
736 cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT); 753 cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
737 break; 754 break;
738 case IB_CM_REQ_SENT: 755 case IB_CM_REQ_SENT:
739 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); 756 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
740 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 757 spin_unlock_irq(&cm_id_priv->lock);
741 ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT, 758 ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
742 &cm_id_priv->id.device->node_guid, 759 &cm_id_priv->id.device->node_guid,
743 sizeof cm_id_priv->id.device->node_guid, 760 sizeof cm_id_priv->id.device->node_guid,
@@ -747,9 +764,9 @@ retest:
747 if (err == -ENOMEM) { 764 if (err == -ENOMEM) {
748 /* Do not reject to allow future retries. */ 765 /* Do not reject to allow future retries. */
749 cm_reset_to_idle(cm_id_priv); 766 cm_reset_to_idle(cm_id_priv);
750 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 767 spin_unlock_irq(&cm_id_priv->lock);
751 } else { 768 } else {
752 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 769 spin_unlock_irq(&cm_id_priv->lock);
753 ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, 770 ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
754 NULL, 0, NULL, 0); 771 NULL, 0, NULL, 0);
755 } 772 }
@@ -762,25 +779,25 @@ retest:
762 case IB_CM_MRA_REQ_SENT: 779 case IB_CM_MRA_REQ_SENT:
763 case IB_CM_REP_RCVD: 780 case IB_CM_REP_RCVD:
764 case IB_CM_MRA_REP_SENT: 781 case IB_CM_MRA_REP_SENT:
765 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 782 spin_unlock_irq(&cm_id_priv->lock);
766 ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, 783 ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
767 NULL, 0, NULL, 0); 784 NULL, 0, NULL, 0);
768 break; 785 break;
769 case IB_CM_ESTABLISHED: 786 case IB_CM_ESTABLISHED:
770 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 787 spin_unlock_irq(&cm_id_priv->lock);
771 ib_send_cm_dreq(cm_id, NULL, 0); 788 ib_send_cm_dreq(cm_id, NULL, 0);
772 goto retest; 789 goto retest;
773 case IB_CM_DREQ_SENT: 790 case IB_CM_DREQ_SENT:
774 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); 791 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
775 cm_enter_timewait(cm_id_priv); 792 cm_enter_timewait(cm_id_priv);
776 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 793 spin_unlock_irq(&cm_id_priv->lock);
777 break; 794 break;
778 case IB_CM_DREQ_RCVD: 795 case IB_CM_DREQ_RCVD:
779 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 796 spin_unlock_irq(&cm_id_priv->lock);
780 ib_send_cm_drep(cm_id, NULL, 0); 797 ib_send_cm_drep(cm_id, NULL, 0);
781 break; 798 break;
782 default: 799 default:
783 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 800 spin_unlock_irq(&cm_id_priv->lock);
784 break; 801 break;
785 } 802 }
786 803
@@ -912,7 +929,8 @@ static void cm_format_req(struct cm_req_msg *req_msg,
912 cm_req_set_primary_sl(req_msg, param->primary_path->sl); 929 cm_req_set_primary_sl(req_msg, param->primary_path->sl);
913 cm_req_set_primary_subnet_local(req_msg, 1); /* local only... */ 930 cm_req_set_primary_subnet_local(req_msg, 1); /* local only... */
914 cm_req_set_primary_local_ack_timeout(req_msg, 931 cm_req_set_primary_local_ack_timeout(req_msg,
915 min(31, param->primary_path->packet_life_time + 1)); 932 cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
933 param->primary_path->packet_life_time));
916 934
917 if (param->alternate_path) { 935 if (param->alternate_path) {
918 req_msg->alt_local_lid = param->alternate_path->slid; 936 req_msg->alt_local_lid = param->alternate_path->slid;
@@ -927,7 +945,8 @@ static void cm_format_req(struct cm_req_msg *req_msg,
927 cm_req_set_alt_sl(req_msg, param->alternate_path->sl); 945 cm_req_set_alt_sl(req_msg, param->alternate_path->sl);
928 cm_req_set_alt_subnet_local(req_msg, 1); /* local only... */ 946 cm_req_set_alt_subnet_local(req_msg, 1); /* local only... */
929 cm_req_set_alt_local_ack_timeout(req_msg, 947 cm_req_set_alt_local_ack_timeout(req_msg,
930 min(31, param->alternate_path->packet_life_time + 1)); 948 cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
949 param->alternate_path->packet_life_time));
931 } 950 }
932 951
933 if (param->private_data && param->private_data_len) 952 if (param->private_data && param->private_data_len)
@@ -1169,7 +1188,6 @@ static void cm_format_req_event(struct cm_work *work,
1169static void cm_process_work(struct cm_id_private *cm_id_priv, 1188static void cm_process_work(struct cm_id_private *cm_id_priv,
1170 struct cm_work *work) 1189 struct cm_work *work)
1171{ 1190{
1172 unsigned long flags;
1173 int ret; 1191 int ret;
1174 1192
1175 /* We will typically only have the current event to report. */ 1193 /* We will typically only have the current event to report. */
@@ -1177,9 +1195,9 @@ static void cm_process_work(struct cm_id_private *cm_id_priv,
1177 cm_free_work(work); 1195 cm_free_work(work);
1178 1196
1179 while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) { 1197 while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) {
1180 spin_lock_irqsave(&cm_id_priv->lock, flags); 1198 spin_lock_irq(&cm_id_priv->lock);
1181 work = cm_dequeue_work(cm_id_priv); 1199 work = cm_dequeue_work(cm_id_priv);
1182 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1200 spin_unlock_irq(&cm_id_priv->lock);
1183 BUG_ON(!work); 1201 BUG_ON(!work);
1184 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, 1202 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
1185 &work->cm_event); 1203 &work->cm_event);
@@ -1250,7 +1268,6 @@ static void cm_dup_req_handler(struct cm_work *work,
1250 struct cm_id_private *cm_id_priv) 1268 struct cm_id_private *cm_id_priv)
1251{ 1269{
1252 struct ib_mad_send_buf *msg = NULL; 1270 struct ib_mad_send_buf *msg = NULL;
1253 unsigned long flags;
1254 int ret; 1271 int ret;
1255 1272
1256 /* Quick state check to discard duplicate REQs. */ 1273 /* Quick state check to discard duplicate REQs. */
@@ -1261,7 +1278,7 @@ static void cm_dup_req_handler(struct cm_work *work,
1261 if (ret) 1278 if (ret)
1262 return; 1279 return;
1263 1280
1264 spin_lock_irqsave(&cm_id_priv->lock, flags); 1281 spin_lock_irq(&cm_id_priv->lock);
1265 switch (cm_id_priv->id.state) { 1282 switch (cm_id_priv->id.state) {
1266 case IB_CM_MRA_REQ_SENT: 1283 case IB_CM_MRA_REQ_SENT:
1267 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, 1284 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
@@ -1276,14 +1293,14 @@ static void cm_dup_req_handler(struct cm_work *work,
1276 default: 1293 default:
1277 goto unlock; 1294 goto unlock;
1278 } 1295 }
1279 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1296 spin_unlock_irq(&cm_id_priv->lock);
1280 1297
1281 ret = ib_post_send_mad(msg, NULL); 1298 ret = ib_post_send_mad(msg, NULL);
1282 if (ret) 1299 if (ret)
1283 goto free; 1300 goto free;
1284 return; 1301 return;
1285 1302
1286unlock: spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1303unlock: spin_unlock_irq(&cm_id_priv->lock);
1287free: cm_free_msg(msg); 1304free: cm_free_msg(msg);
1288} 1305}
1289 1306
@@ -1293,17 +1310,16 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
1293 struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv; 1310 struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
1294 struct cm_timewait_info *timewait_info; 1311 struct cm_timewait_info *timewait_info;
1295 struct cm_req_msg *req_msg; 1312 struct cm_req_msg *req_msg;
1296 unsigned long flags;
1297 1313
1298 req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; 1314 req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1299 1315
1300 /* Check for possible duplicate REQ. */ 1316 /* Check for possible duplicate REQ. */
1301 spin_lock_irqsave(&cm.lock, flags); 1317 spin_lock_irq(&cm.lock);
1302 timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info); 1318 timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
1303 if (timewait_info) { 1319 if (timewait_info) {
1304 cur_cm_id_priv = cm_get_id(timewait_info->work.local_id, 1320 cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
1305 timewait_info->work.remote_id); 1321 timewait_info->work.remote_id);
1306 spin_unlock_irqrestore(&cm.lock, flags); 1322 spin_unlock_irq(&cm.lock);
1307 if (cur_cm_id_priv) { 1323 if (cur_cm_id_priv) {
1308 cm_dup_req_handler(work, cur_cm_id_priv); 1324 cm_dup_req_handler(work, cur_cm_id_priv);
1309 cm_deref_id(cur_cm_id_priv); 1325 cm_deref_id(cur_cm_id_priv);
@@ -1315,7 +1331,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
1315 timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info); 1331 timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
1316 if (timewait_info) { 1332 if (timewait_info) {
1317 cm_cleanup_timewait(cm_id_priv->timewait_info); 1333 cm_cleanup_timewait(cm_id_priv->timewait_info);
1318 spin_unlock_irqrestore(&cm.lock, flags); 1334 spin_unlock_irq(&cm.lock);
1319 cm_issue_rej(work->port, work->mad_recv_wc, 1335 cm_issue_rej(work->port, work->mad_recv_wc,
1320 IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ, 1336 IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
1321 NULL, 0); 1337 NULL, 0);
@@ -1328,7 +1344,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
1328 req_msg->private_data); 1344 req_msg->private_data);
1329 if (!listen_cm_id_priv) { 1345 if (!listen_cm_id_priv) {
1330 cm_cleanup_timewait(cm_id_priv->timewait_info); 1346 cm_cleanup_timewait(cm_id_priv->timewait_info);
1331 spin_unlock_irqrestore(&cm.lock, flags); 1347 spin_unlock_irq(&cm.lock);
1332 cm_issue_rej(work->port, work->mad_recv_wc, 1348 cm_issue_rej(work->port, work->mad_recv_wc,
1333 IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ, 1349 IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
1334 NULL, 0); 1350 NULL, 0);
@@ -1338,7 +1354,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
1338 atomic_inc(&cm_id_priv->refcount); 1354 atomic_inc(&cm_id_priv->refcount);
1339 cm_id_priv->id.state = IB_CM_REQ_RCVD; 1355 cm_id_priv->id.state = IB_CM_REQ_RCVD;
1340 atomic_inc(&cm_id_priv->work_count); 1356 atomic_inc(&cm_id_priv->work_count);
1341 spin_unlock_irqrestore(&cm.lock, flags); 1357 spin_unlock_irq(&cm.lock);
1342out: 1358out:
1343 return listen_cm_id_priv; 1359 return listen_cm_id_priv;
1344} 1360}
@@ -1440,7 +1456,8 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg,
1440 cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn)); 1456 cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
1441 rep_msg->resp_resources = param->responder_resources; 1457 rep_msg->resp_resources = param->responder_resources;
1442 rep_msg->initiator_depth = param->initiator_depth; 1458 rep_msg->initiator_depth = param->initiator_depth;
1443 cm_rep_set_target_ack_delay(rep_msg, param->target_ack_delay); 1459 cm_rep_set_target_ack_delay(rep_msg,
1460 cm_id_priv->av.port->cm_dev->ack_delay);
1444 cm_rep_set_failover(rep_msg, param->failover_accepted); 1461 cm_rep_set_failover(rep_msg, param->failover_accepted);
1445 cm_rep_set_flow_ctrl(rep_msg, param->flow_control); 1462 cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
1446 cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count); 1463 cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
@@ -1591,7 +1608,6 @@ static void cm_dup_rep_handler(struct cm_work *work)
1591 struct cm_id_private *cm_id_priv; 1608 struct cm_id_private *cm_id_priv;
1592 struct cm_rep_msg *rep_msg; 1609 struct cm_rep_msg *rep_msg;
1593 struct ib_mad_send_buf *msg = NULL; 1610 struct ib_mad_send_buf *msg = NULL;
1594 unsigned long flags;
1595 int ret; 1611 int ret;
1596 1612
1597 rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad; 1613 rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
@@ -1604,7 +1620,7 @@ static void cm_dup_rep_handler(struct cm_work *work)
1604 if (ret) 1620 if (ret)
1605 goto deref; 1621 goto deref;
1606 1622
1607 spin_lock_irqsave(&cm_id_priv->lock, flags); 1623 spin_lock_irq(&cm_id_priv->lock);
1608 if (cm_id_priv->id.state == IB_CM_ESTABLISHED) 1624 if (cm_id_priv->id.state == IB_CM_ESTABLISHED)
1609 cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv, 1625 cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
1610 cm_id_priv->private_data, 1626 cm_id_priv->private_data,
@@ -1616,14 +1632,14 @@ static void cm_dup_rep_handler(struct cm_work *work)
1616 cm_id_priv->private_data_len); 1632 cm_id_priv->private_data_len);
1617 else 1633 else
1618 goto unlock; 1634 goto unlock;
1619 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1635 spin_unlock_irq(&cm_id_priv->lock);
1620 1636
1621 ret = ib_post_send_mad(msg, NULL); 1637 ret = ib_post_send_mad(msg, NULL);
1622 if (ret) 1638 if (ret)
1623 goto free; 1639 goto free;
1624 goto deref; 1640 goto deref;
1625 1641
1626unlock: spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1642unlock: spin_unlock_irq(&cm_id_priv->lock);
1627free: cm_free_msg(msg); 1643free: cm_free_msg(msg);
1628deref: cm_deref_id(cm_id_priv); 1644deref: cm_deref_id(cm_id_priv);
1629} 1645}
@@ -1632,7 +1648,6 @@ static int cm_rep_handler(struct cm_work *work)
1632{ 1648{
1633 struct cm_id_private *cm_id_priv; 1649 struct cm_id_private *cm_id_priv;
1634 struct cm_rep_msg *rep_msg; 1650 struct cm_rep_msg *rep_msg;
1635 unsigned long flags;
1636 int ret; 1651 int ret;
1637 1652
1638 rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad; 1653 rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1644,13 +1659,13 @@ static int cm_rep_handler(struct cm_work *work)
1644 1659
1645 cm_format_rep_event(work); 1660 cm_format_rep_event(work);
1646 1661
1647 spin_lock_irqsave(&cm_id_priv->lock, flags); 1662 spin_lock_irq(&cm_id_priv->lock);
1648 switch (cm_id_priv->id.state) { 1663 switch (cm_id_priv->id.state) {
1649 case IB_CM_REQ_SENT: 1664 case IB_CM_REQ_SENT:
1650 case IB_CM_MRA_REQ_RCVD: 1665 case IB_CM_MRA_REQ_RCVD:
1651 break; 1666 break;
1652 default: 1667 default:
1653 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1668 spin_unlock_irq(&cm_id_priv->lock);
1654 ret = -EINVAL; 1669 ret = -EINVAL;
1655 goto error; 1670 goto error;
1656 } 1671 }
@@ -1663,7 +1678,7 @@ static int cm_rep_handler(struct cm_work *work)
1663 /* Check for duplicate REP. */ 1678 /* Check for duplicate REP. */
1664 if (cm_insert_remote_id(cm_id_priv->timewait_info)) { 1679 if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
1665 spin_unlock(&cm.lock); 1680 spin_unlock(&cm.lock);
1666 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1681 spin_unlock_irq(&cm_id_priv->lock);
1667 ret = -EINVAL; 1682 ret = -EINVAL;
1668 goto error; 1683 goto error;
1669 } 1684 }
@@ -1673,7 +1688,7 @@ static int cm_rep_handler(struct cm_work *work)
1673 &cm.remote_id_table); 1688 &cm.remote_id_table);
1674 cm_id_priv->timewait_info->inserted_remote_id = 0; 1689 cm_id_priv->timewait_info->inserted_remote_id = 0;
1675 spin_unlock(&cm.lock); 1690 spin_unlock(&cm.lock);
1676 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1691 spin_unlock_irq(&cm_id_priv->lock);
1677 cm_issue_rej(work->port, work->mad_recv_wc, 1692 cm_issue_rej(work->port, work->mad_recv_wc,
1678 IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP, 1693 IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
1679 NULL, 0); 1694 NULL, 0);
@@ -1689,6 +1704,13 @@ static int cm_rep_handler(struct cm_work *work)
1689 cm_id_priv->responder_resources = rep_msg->initiator_depth; 1704 cm_id_priv->responder_resources = rep_msg->initiator_depth;
1690 cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg); 1705 cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
1691 cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg); 1706 cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
1707 cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
1708 cm_id_priv->av.timeout =
1709 cm_ack_timeout(cm_id_priv->target_ack_delay,
1710 cm_id_priv->av.timeout - 1);
1711 cm_id_priv->alt_av.timeout =
1712 cm_ack_timeout(cm_id_priv->target_ack_delay,
1713 cm_id_priv->alt_av.timeout - 1);
1692 1714
1693 /* todo: handle peer_to_peer */ 1715 /* todo: handle peer_to_peer */
1694 1716
@@ -1696,7 +1718,7 @@ static int cm_rep_handler(struct cm_work *work)
1696 ret = atomic_inc_and_test(&cm_id_priv->work_count); 1718 ret = atomic_inc_and_test(&cm_id_priv->work_count);
1697 if (!ret) 1719 if (!ret)
1698 list_add_tail(&work->list, &cm_id_priv->work_list); 1720 list_add_tail(&work->list, &cm_id_priv->work_list);
1699 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1721 spin_unlock_irq(&cm_id_priv->lock);
1700 1722
1701 if (ret) 1723 if (ret)
1702 cm_process_work(cm_id_priv, work); 1724 cm_process_work(cm_id_priv, work);
@@ -1712,7 +1734,6 @@ error:
1712static int cm_establish_handler(struct cm_work *work) 1734static int cm_establish_handler(struct cm_work *work)
1713{ 1735{
1714 struct cm_id_private *cm_id_priv; 1736 struct cm_id_private *cm_id_priv;
1715 unsigned long flags;
1716 int ret; 1737 int ret;
1717 1738
1718 /* See comment in cm_establish about lookup. */ 1739 /* See comment in cm_establish about lookup. */
@@ -1720,9 +1741,9 @@ static int cm_establish_handler(struct cm_work *work)
1720 if (!cm_id_priv) 1741 if (!cm_id_priv)
1721 return -EINVAL; 1742 return -EINVAL;
1722 1743
1723 spin_lock_irqsave(&cm_id_priv->lock, flags); 1744 spin_lock_irq(&cm_id_priv->lock);
1724 if (cm_id_priv->id.state != IB_CM_ESTABLISHED) { 1745 if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
1725 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1746 spin_unlock_irq(&cm_id_priv->lock);
1726 goto out; 1747 goto out;
1727 } 1748 }
1728 1749
@@ -1730,7 +1751,7 @@ static int cm_establish_handler(struct cm_work *work)
1730 ret = atomic_inc_and_test(&cm_id_priv->work_count); 1751 ret = atomic_inc_and_test(&cm_id_priv->work_count);
1731 if (!ret) 1752 if (!ret)
1732 list_add_tail(&work->list, &cm_id_priv->work_list); 1753 list_add_tail(&work->list, &cm_id_priv->work_list);
1733 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1754 spin_unlock_irq(&cm_id_priv->lock);
1734 1755
1735 if (ret) 1756 if (ret)
1736 cm_process_work(cm_id_priv, work); 1757 cm_process_work(cm_id_priv, work);
@@ -1746,7 +1767,6 @@ static int cm_rtu_handler(struct cm_work *work)
1746{ 1767{
1747 struct cm_id_private *cm_id_priv; 1768 struct cm_id_private *cm_id_priv;
1748 struct cm_rtu_msg *rtu_msg; 1769 struct cm_rtu_msg *rtu_msg;
1749 unsigned long flags;
1750 int ret; 1770 int ret;
1751 1771
1752 rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad; 1772 rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1757,10 +1777,10 @@ static int cm_rtu_handler(struct cm_work *work)
1757 1777
1758 work->cm_event.private_data = &rtu_msg->private_data; 1778 work->cm_event.private_data = &rtu_msg->private_data;
1759 1779
1760 spin_lock_irqsave(&cm_id_priv->lock, flags); 1780 spin_lock_irq(&cm_id_priv->lock);
1761 if (cm_id_priv->id.state != IB_CM_REP_SENT && 1781 if (cm_id_priv->id.state != IB_CM_REP_SENT &&
1762 cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) { 1782 cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
1763 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1783 spin_unlock_irq(&cm_id_priv->lock);
1764 goto out; 1784 goto out;
1765 } 1785 }
1766 cm_id_priv->id.state = IB_CM_ESTABLISHED; 1786 cm_id_priv->id.state = IB_CM_ESTABLISHED;
@@ -1769,7 +1789,7 @@ static int cm_rtu_handler(struct cm_work *work)
1769 ret = atomic_inc_and_test(&cm_id_priv->work_count); 1789 ret = atomic_inc_and_test(&cm_id_priv->work_count);
1770 if (!ret) 1790 if (!ret)
1771 list_add_tail(&work->list, &cm_id_priv->work_list); 1791 list_add_tail(&work->list, &cm_id_priv->work_list);
1772 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1792 spin_unlock_irq(&cm_id_priv->lock);
1773 1793
1774 if (ret) 1794 if (ret)
1775 cm_process_work(cm_id_priv, work); 1795 cm_process_work(cm_id_priv, work);
@@ -1932,7 +1952,6 @@ static int cm_dreq_handler(struct cm_work *work)
1932 struct cm_id_private *cm_id_priv; 1952 struct cm_id_private *cm_id_priv;
1933 struct cm_dreq_msg *dreq_msg; 1953 struct cm_dreq_msg *dreq_msg;
1934 struct ib_mad_send_buf *msg = NULL; 1954 struct ib_mad_send_buf *msg = NULL;
1935 unsigned long flags;
1936 int ret; 1955 int ret;
1937 1956
1938 dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad; 1957 dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1945,7 +1964,7 @@ static int cm_dreq_handler(struct cm_work *work)
1945 1964
1946 work->cm_event.private_data = &dreq_msg->private_data; 1965 work->cm_event.private_data = &dreq_msg->private_data;
1947 1966
1948 spin_lock_irqsave(&cm_id_priv->lock, flags); 1967 spin_lock_irq(&cm_id_priv->lock);
1949 if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg)) 1968 if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg))
1950 goto unlock; 1969 goto unlock;
1951 1970
@@ -1964,7 +1983,7 @@ static int cm_dreq_handler(struct cm_work *work)
1964 cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, 1983 cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
1965 cm_id_priv->private_data, 1984 cm_id_priv->private_data,
1966 cm_id_priv->private_data_len); 1985 cm_id_priv->private_data_len);
1967 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1986 spin_unlock_irq(&cm_id_priv->lock);
1968 1987
1969 if (ib_post_send_mad(msg, NULL)) 1988 if (ib_post_send_mad(msg, NULL))
1970 cm_free_msg(msg); 1989 cm_free_msg(msg);
@@ -1977,7 +1996,7 @@ static int cm_dreq_handler(struct cm_work *work)
1977 ret = atomic_inc_and_test(&cm_id_priv->work_count); 1996 ret = atomic_inc_and_test(&cm_id_priv->work_count);
1978 if (!ret) 1997 if (!ret)
1979 list_add_tail(&work->list, &cm_id_priv->work_list); 1998 list_add_tail(&work->list, &cm_id_priv->work_list);
1980 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1999 spin_unlock_irq(&cm_id_priv->lock);
1981 2000
1982 if (ret) 2001 if (ret)
1983 cm_process_work(cm_id_priv, work); 2002 cm_process_work(cm_id_priv, work);
@@ -1985,7 +2004,7 @@ static int cm_dreq_handler(struct cm_work *work)
1985 cm_deref_id(cm_id_priv); 2004 cm_deref_id(cm_id_priv);
1986 return 0; 2005 return 0;
1987 2006
1988unlock: spin_unlock_irqrestore(&cm_id_priv->lock, flags); 2007unlock: spin_unlock_irq(&cm_id_priv->lock);
1989deref: cm_deref_id(cm_id_priv); 2008deref: cm_deref_id(cm_id_priv);
1990 return -EINVAL; 2009 return -EINVAL;
1991} 2010}
@@ -1994,7 +2013,6 @@ static int cm_drep_handler(struct cm_work *work)
1994{ 2013{
1995 struct cm_id_private *cm_id_priv; 2014 struct cm_id_private *cm_id_priv;
1996 struct cm_drep_msg *drep_msg; 2015 struct cm_drep_msg *drep_msg;
1997 unsigned long flags;
1998 int ret; 2016 int ret;
1999 2017
2000 drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad; 2018 drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -2005,10 +2023,10 @@ static int cm_drep_handler(struct cm_work *work)
2005 2023
2006 work->cm_event.private_data = &drep_msg->private_data; 2024 work->cm_event.private_data = &drep_msg->private_data;
2007 2025
2008 spin_lock_irqsave(&cm_id_priv->lock, flags); 2026 spin_lock_irq(&cm_id_priv->lock);
2009 if (cm_id_priv->id.state != IB_CM_DREQ_SENT && 2027 if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
2010 cm_id_priv->id.state != IB_CM_DREQ_RCVD) { 2028 cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
2011 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 2029 spin_unlock_irq(&cm_id_priv->lock);
2012 goto out; 2030 goto out;
2013 } 2031 }
2014 cm_enter_timewait(cm_id_priv); 2032 cm_enter_timewait(cm_id_priv);
@@ -2017,7 +2035,7 @@ static int cm_drep_handler(struct cm_work *work)
2017 ret = atomic_inc_and_test(&cm_id_priv->work_count); 2035 ret = atomic_inc_and_test(&cm_id_priv->work_count);
2018 if (!ret) 2036 if (!ret)
2019 list_add_tail(&work->list, &cm_id_priv->work_list); 2037 list_add_tail(&work->list, &cm_id_priv->work_list);
2020 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 2038 spin_unlock_irq(&cm_id_priv->lock);
2021 2039
2022 if (ret) 2040 if (ret)
2023 cm_process_work(cm_id_priv, work); 2041 cm_process_work(cm_id_priv, work);
@@ -2107,17 +2125,16 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
2107{ 2125{
2108 struct cm_timewait_info *timewait_info; 2126 struct cm_timewait_info *timewait_info;
2109 struct cm_id_private *cm_id_priv; 2127 struct cm_id_private *cm_id_priv;
2110 unsigned long flags;
2111 __be32 remote_id; 2128 __be32 remote_id;
2112 2129
2113 remote_id = rej_msg->local_comm_id; 2130 remote_id = rej_msg->local_comm_id;
2114 2131
2115 if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) { 2132 if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) {
2116 spin_lock_irqsave(&cm.lock, flags); 2133 spin_lock_irq(&cm.lock);
2117 timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari), 2134 timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari),
2118 remote_id); 2135 remote_id);
2119 if (!timewait_info) { 2136 if (!timewait_info) {
2120 spin_unlock_irqrestore(&cm.lock, flags); 2137 spin_unlock_irq(&cm.lock);
2121 return NULL; 2138 return NULL;
2122 } 2139 }
2123 cm_id_priv = idr_find(&cm.local_id_table, (__force int) 2140 cm_id_priv = idr_find(&cm.local_id_table, (__force int)
@@ -2129,7 +2146,7 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
2129 else 2146 else
2130 cm_id_priv = NULL; 2147 cm_id_priv = NULL;
2131 } 2148 }
2132 spin_unlock_irqrestore(&cm.lock, flags); 2149 spin_unlock_irq(&cm.lock);
2133 } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ) 2150 } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ)
2134 cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0); 2151 cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0);
2135 else 2152 else
@@ -2142,7 +2159,6 @@ static int cm_rej_handler(struct cm_work *work)
2142{ 2159{
2143 struct cm_id_private *cm_id_priv; 2160 struct cm_id_private *cm_id_priv;
2144 struct cm_rej_msg *rej_msg; 2161 struct cm_rej_msg *rej_msg;
2145 unsigned long flags;
2146 int ret; 2162 int ret;
2147 2163
2148 rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad; 2164 rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -2152,7 +2168,7 @@ static int cm_rej_handler(struct cm_work *work)
2152 2168
2153 cm_format_rej_event(work); 2169 cm_format_rej_event(work);
2154 2170
2155 spin_lock_irqsave(&cm_id_priv->lock, flags); 2171 spin_lock_irq(&cm_id_priv->lock);
2156 switch (cm_id_priv->id.state) { 2172 switch (cm_id_priv->id.state) {
2157 case IB_CM_REQ_SENT: 2173 case IB_CM_REQ_SENT:
2158 case IB_CM_MRA_REQ_RCVD: 2174 case IB_CM_MRA_REQ_RCVD:
@@ -2176,7 +2192,7 @@ static int cm_rej_handler(struct cm_work *work)
2176 cm_enter_timewait(cm_id_priv); 2192 cm_enter_timewait(cm_id_priv);
2177 break; 2193 break;
2178 default: 2194 default:
2179 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 2195 spin_unlock_irq(&cm_id_priv->lock);
2180 ret = -EINVAL; 2196 ret = -EINVAL;
2181 goto out; 2197 goto out;
2182 } 2198 }
@@ -2184,7 +2200,7 @@ static int cm_rej_handler(struct cm_work *work)
2184 ret = atomic_inc_and_test(&cm_id_priv->work_count); 2200 ret = atomic_inc_and_test(&cm_id_priv->work_count);
2185 if (!ret) 2201 if (!ret)
2186 list_add_tail(&work->list, &cm_id_priv->work_list); 2202 list_add_tail(&work->list, &cm_id_priv->work_list);
2187 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 2203 spin_unlock_irq(&cm_id_priv->lock);
2188 2204
2189 if (ret) 2205 if (ret)
2190 cm_process_work(cm_id_priv, work); 2206 cm_process_work(cm_id_priv, work);
@@ -2295,7 +2311,6 @@ static int cm_mra_handler(struct cm_work *work)
2295{ 2311{
2296 struct cm_id_private *cm_id_priv; 2312 struct cm_id_private *cm_id_priv;
2297 struct cm_mra_msg *mra_msg; 2313 struct cm_mra_msg *mra_msg;
2298 unsigned long flags;
2299 int timeout, ret; 2314 int timeout, ret;
2300 2315
2301 mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad; 2316 mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -2307,9 +2322,9 @@ static int cm_mra_handler(struct cm_work *work)
2307 work->cm_event.param.mra_rcvd.service_timeout = 2322 work->cm_event.param.mra_rcvd.service_timeout =
2308 cm_mra_get_service_timeout(mra_msg); 2323 cm_mra_get_service_timeout(mra_msg);
2309 timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) + 2324 timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
2310 cm_convert_to_ms(cm_id_priv->av.packet_life_time); 2325 cm_convert_to_ms(cm_id_priv->av.timeout);
2311 2326
2312 spin_lock_irqsave(&cm_id_priv->lock, flags); 2327 spin_lock_irq(&cm_id_priv->lock);
2313 switch (cm_id_priv->id.state) { 2328 switch (cm_id_priv->id.state) {
2314 case IB_CM_REQ_SENT: 2329 case IB_CM_REQ_SENT:
2315 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ || 2330 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
@@ -2342,7 +2357,7 @@ static int cm_mra_handler(struct cm_work *work)
2342 ret = atomic_inc_and_test(&cm_id_priv->work_count); 2357 ret = atomic_inc_and_test(&cm_id_priv->work_count);
2343 if (!ret) 2358 if (!ret)
2344 list_add_tail(&work->list, &cm_id_priv->work_list); 2359 list_add_tail(&work->list, &cm_id_priv->work_list);
2345 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 2360 spin_unlock_irq(&cm_id_priv->lock);
2346 2361
2347 if (ret) 2362 if (ret)
2348 cm_process_work(cm_id_priv, work); 2363 cm_process_work(cm_id_priv, work);
@@ -2350,7 +2365,7 @@ static int cm_mra_handler(struct cm_work *work)
2350 cm_deref_id(cm_id_priv); 2365 cm_deref_id(cm_id_priv);
2351 return 0; 2366 return 0;
2352out: 2367out:
2353 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 2368 spin_unlock_irq(&cm_id_priv->lock);
2354 cm_deref_id(cm_id_priv); 2369 cm_deref_id(cm_id_priv);
2355 return -EINVAL; 2370 return -EINVAL;
2356} 2371}
@@ -2379,7 +2394,8 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg,
2379 cm_lap_set_sl(lap_msg, alternate_path->sl); 2394 cm_lap_set_sl(lap_msg, alternate_path->sl);
2380 cm_lap_set_subnet_local(lap_msg, 1); /* local only... */ 2395 cm_lap_set_subnet_local(lap_msg, 1); /* local only... */
2381 cm_lap_set_local_ack_timeout(lap_msg, 2396 cm_lap_set_local_ack_timeout(lap_msg,
2382 min(31, alternate_path->packet_life_time + 1)); 2397 cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
2398 alternate_path->packet_life_time));
2383 2399
2384 if (private_data && private_data_len) 2400 if (private_data && private_data_len)
2385 memcpy(lap_msg->private_data, private_data, private_data_len); 2401 memcpy(lap_msg->private_data, private_data, private_data_len);
@@ -2410,6 +2426,9 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id,
2410 ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av); 2426 ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av);
2411 if (ret) 2427 if (ret)
2412 goto out; 2428 goto out;
2429 cm_id_priv->alt_av.timeout =
2430 cm_ack_timeout(cm_id_priv->target_ack_delay,
2431 cm_id_priv->alt_av.timeout - 1);
2413 2432
2414 ret = cm_alloc_msg(cm_id_priv, &msg); 2433 ret = cm_alloc_msg(cm_id_priv, &msg);
2415 if (ret) 2434 if (ret)
@@ -2465,7 +2484,6 @@ static int cm_lap_handler(struct cm_work *work)
2465 struct cm_lap_msg *lap_msg; 2484 struct cm_lap_msg *lap_msg;
2466 struct ib_cm_lap_event_param *param; 2485 struct ib_cm_lap_event_param *param;
2467 struct ib_mad_send_buf *msg = NULL; 2486 struct ib_mad_send_buf *msg = NULL;
2468 unsigned long flags;
2469 int ret; 2487 int ret;
2470 2488
2471 /* todo: verify LAP request and send reject APR if invalid. */ 2489 /* todo: verify LAP request and send reject APR if invalid. */
@@ -2480,7 +2498,7 @@ static int cm_lap_handler(struct cm_work *work)
2480 cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg); 2498 cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
2481 work->cm_event.private_data = &lap_msg->private_data; 2499 work->cm_event.private_data = &lap_msg->private_data;
2482 2500
2483 spin_lock_irqsave(&cm_id_priv->lock, flags); 2501 spin_lock_irq(&cm_id_priv->lock);
2484 if (cm_id_priv->id.state != IB_CM_ESTABLISHED) 2502 if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
2485 goto unlock; 2503 goto unlock;
2486 2504
@@ -2497,7 +2515,7 @@ static int cm_lap_handler(struct cm_work *work)
2497 cm_id_priv->service_timeout, 2515 cm_id_priv->service_timeout,
2498 cm_id_priv->private_data, 2516 cm_id_priv->private_data,
2499 cm_id_priv->private_data_len); 2517 cm_id_priv->private_data_len);
2500 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 2518 spin_unlock_irq(&cm_id_priv->lock);
2501 2519
2502 if (ib_post_send_mad(msg, NULL)) 2520 if (ib_post_send_mad(msg, NULL))
2503 cm_free_msg(msg); 2521 cm_free_msg(msg);
@@ -2515,7 +2533,7 @@ static int cm_lap_handler(struct cm_work *work)
2515 ret = atomic_inc_and_test(&cm_id_priv->work_count); 2533 ret = atomic_inc_and_test(&cm_id_priv->work_count);
2516 if (!ret) 2534 if (!ret)
2517 list_add_tail(&work->list, &cm_id_priv->work_list); 2535 list_add_tail(&work->list, &cm_id_priv->work_list);
2518 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 2536 spin_unlock_irq(&cm_id_priv->lock);
2519 2537
2520 if (ret) 2538 if (ret)
2521 cm_process_work(cm_id_priv, work); 2539 cm_process_work(cm_id_priv, work);
@@ -2523,7 +2541,7 @@ static int cm_lap_handler(struct cm_work *work)
2523 cm_deref_id(cm_id_priv); 2541 cm_deref_id(cm_id_priv);
2524 return 0; 2542 return 0;
2525 2543
2526unlock: spin_unlock_irqrestore(&cm_id_priv->lock, flags); 2544unlock: spin_unlock_irq(&cm_id_priv->lock);
2527deref: cm_deref_id(cm_id_priv); 2545deref: cm_deref_id(cm_id_priv);
2528 return -EINVAL; 2546 return -EINVAL;
2529} 2547}
@@ -2598,7 +2616,6 @@ static int cm_apr_handler(struct cm_work *work)
2598{ 2616{
2599 struct cm_id_private *cm_id_priv; 2617 struct cm_id_private *cm_id_priv;
2600 struct cm_apr_msg *apr_msg; 2618 struct cm_apr_msg *apr_msg;
2601 unsigned long flags;
2602 int ret; 2619 int ret;
2603 2620
2604 apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad; 2621 apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -2612,11 +2629,11 @@ static int cm_apr_handler(struct cm_work *work)
2612 work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length; 2629 work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length;
2613 work->cm_event.private_data = &apr_msg->private_data; 2630 work->cm_event.private_data = &apr_msg->private_data;
2614 2631
2615 spin_lock_irqsave(&cm_id_priv->lock, flags); 2632 spin_lock_irq(&cm_id_priv->lock);
2616 if (cm_id_priv->id.state != IB_CM_ESTABLISHED || 2633 if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
2617 (cm_id_priv->id.lap_state != IB_CM_LAP_SENT && 2634 (cm_id_priv->id.lap_state != IB_CM_LAP_SENT &&
2618 cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) { 2635 cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) {
2619 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 2636 spin_unlock_irq(&cm_id_priv->lock);
2620 goto out; 2637 goto out;
2621 } 2638 }
2622 cm_id_priv->id.lap_state = IB_CM_LAP_IDLE; 2639 cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
@@ -2626,7 +2643,7 @@ static int cm_apr_handler(struct cm_work *work)
2626 ret = atomic_inc_and_test(&cm_id_priv->work_count); 2643 ret = atomic_inc_and_test(&cm_id_priv->work_count);
2627 if (!ret) 2644 if (!ret)
2628 list_add_tail(&work->list, &cm_id_priv->work_list); 2645 list_add_tail(&work->list, &cm_id_priv->work_list);
2629 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 2646 spin_unlock_irq(&cm_id_priv->lock);
2630 2647
2631 if (ret) 2648 if (ret)
2632 cm_process_work(cm_id_priv, work); 2649 cm_process_work(cm_id_priv, work);
@@ -2761,7 +2778,6 @@ static int cm_sidr_req_handler(struct cm_work *work)
2761 struct cm_id_private *cm_id_priv, *cur_cm_id_priv; 2778 struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
2762 struct cm_sidr_req_msg *sidr_req_msg; 2779 struct cm_sidr_req_msg *sidr_req_msg;
2763 struct ib_wc *wc; 2780 struct ib_wc *wc;
2764 unsigned long flags;
2765 2781
2766 cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL); 2782 cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL);
2767 if (IS_ERR(cm_id)) 2783 if (IS_ERR(cm_id))
@@ -2778,27 +2794,26 @@ static int cm_sidr_req_handler(struct cm_work *work)
2778 work->mad_recv_wc->recv_buf.grh, 2794 work->mad_recv_wc->recv_buf.grh,
2779 &cm_id_priv->av); 2795 &cm_id_priv->av);
2780 cm_id_priv->id.remote_id = sidr_req_msg->request_id; 2796 cm_id_priv->id.remote_id = sidr_req_msg->request_id;
2781 cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
2782 cm_id_priv->tid = sidr_req_msg->hdr.tid; 2797 cm_id_priv->tid = sidr_req_msg->hdr.tid;
2783 atomic_inc(&cm_id_priv->work_count); 2798 atomic_inc(&cm_id_priv->work_count);
2784 2799
2785 spin_lock_irqsave(&cm.lock, flags); 2800 spin_lock_irq(&cm.lock);
2786 cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv); 2801 cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
2787 if (cur_cm_id_priv) { 2802 if (cur_cm_id_priv) {
2788 spin_unlock_irqrestore(&cm.lock, flags); 2803 spin_unlock_irq(&cm.lock);
2789 goto out; /* Duplicate message. */ 2804 goto out; /* Duplicate message. */
2790 } 2805 }
2806 cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
2791 cur_cm_id_priv = cm_find_listen(cm_id->device, 2807 cur_cm_id_priv = cm_find_listen(cm_id->device,
2792 sidr_req_msg->service_id, 2808 sidr_req_msg->service_id,
2793 sidr_req_msg->private_data); 2809 sidr_req_msg->private_data);
2794 if (!cur_cm_id_priv) { 2810 if (!cur_cm_id_priv) {
2795 rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); 2811 spin_unlock_irq(&cm.lock);
2796 spin_unlock_irqrestore(&cm.lock, flags); 2812 cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
2797 /* todo: reply with no match */
2798 goto out; /* No match. */ 2813 goto out; /* No match. */
2799 } 2814 }
2800 atomic_inc(&cur_cm_id_priv->refcount); 2815 atomic_inc(&cur_cm_id_priv->refcount);
2801 spin_unlock_irqrestore(&cm.lock, flags); 2816 spin_unlock_irq(&cm.lock);
2802 2817
2803 cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler; 2818 cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
2804 cm_id_priv->id.context = cur_cm_id_priv->id.context; 2819 cm_id_priv->id.context = cur_cm_id_priv->id.context;
@@ -2899,7 +2914,6 @@ static int cm_sidr_rep_handler(struct cm_work *work)
2899{ 2914{
2900 struct cm_sidr_rep_msg *sidr_rep_msg; 2915 struct cm_sidr_rep_msg *sidr_rep_msg;
2901 struct cm_id_private *cm_id_priv; 2916 struct cm_id_private *cm_id_priv;
2902 unsigned long flags;
2903 2917
2904 sidr_rep_msg = (struct cm_sidr_rep_msg *) 2918 sidr_rep_msg = (struct cm_sidr_rep_msg *)
2905 work->mad_recv_wc->recv_buf.mad; 2919 work->mad_recv_wc->recv_buf.mad;
@@ -2907,14 +2921,14 @@ static int cm_sidr_rep_handler(struct cm_work *work)
2907 if (!cm_id_priv) 2921 if (!cm_id_priv)
2908 return -EINVAL; /* Unmatched reply. */ 2922 return -EINVAL; /* Unmatched reply. */
2909 2923
2910 spin_lock_irqsave(&cm_id_priv->lock, flags); 2924 spin_lock_irq(&cm_id_priv->lock);
2911 if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) { 2925 if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) {
2912 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 2926 spin_unlock_irq(&cm_id_priv->lock);
2913 goto out; 2927 goto out;
2914 } 2928 }
2915 cm_id_priv->id.state = IB_CM_IDLE; 2929 cm_id_priv->id.state = IB_CM_IDLE;
2916 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); 2930 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2917 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 2931 spin_unlock_irq(&cm_id_priv->lock);
2918 2932
2919 cm_format_sidr_rep_event(work); 2933 cm_format_sidr_rep_event(work);
2920 cm_process_work(cm_id_priv, work); 2934 cm_process_work(cm_id_priv, work);
@@ -2930,14 +2944,13 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg,
2930 struct cm_id_private *cm_id_priv; 2944 struct cm_id_private *cm_id_priv;
2931 struct ib_cm_event cm_event; 2945 struct ib_cm_event cm_event;
2932 enum ib_cm_state state; 2946 enum ib_cm_state state;
2933 unsigned long flags;
2934 int ret; 2947 int ret;
2935 2948
2936 memset(&cm_event, 0, sizeof cm_event); 2949 memset(&cm_event, 0, sizeof cm_event);
2937 cm_id_priv = msg->context[0]; 2950 cm_id_priv = msg->context[0];
2938 2951
2939 /* Discard old sends or ones without a response. */ 2952 /* Discard old sends or ones without a response. */
2940 spin_lock_irqsave(&cm_id_priv->lock, flags); 2953 spin_lock_irq(&cm_id_priv->lock);
2941 state = (enum ib_cm_state) (unsigned long) msg->context[1]; 2954 state = (enum ib_cm_state) (unsigned long) msg->context[1];
2942 if (msg != cm_id_priv->msg || state != cm_id_priv->id.state) 2955 if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
2943 goto discard; 2956 goto discard;
@@ -2964,7 +2977,7 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg,
2964 default: 2977 default:
2965 goto discard; 2978 goto discard;
2966 } 2979 }
2967 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 2980 spin_unlock_irq(&cm_id_priv->lock);
2968 cm_event.param.send_status = wc_status; 2981 cm_event.param.send_status = wc_status;
2969 2982
2970 /* No other events can occur on the cm_id at this point. */ 2983 /* No other events can occur on the cm_id at this point. */
@@ -2974,7 +2987,7 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg,
2974 ib_destroy_cm_id(&cm_id_priv->id); 2987 ib_destroy_cm_id(&cm_id_priv->id);
2975 return; 2988 return;
2976discard: 2989discard:
2977 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 2990 spin_unlock_irq(&cm_id_priv->lock);
2978 cm_free_msg(msg); 2991 cm_free_msg(msg);
2979} 2992}
2980 2993
@@ -3269,8 +3282,7 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
3269 *qp_attr_mask |= IB_QP_ALT_PATH; 3282 *qp_attr_mask |= IB_QP_ALT_PATH;
3270 qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; 3283 qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
3271 qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; 3284 qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
3272 qp_attr->alt_timeout = 3285 qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
3273 cm_id_priv->alt_av.packet_life_time + 1;
3274 qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; 3286 qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
3275 } 3287 }
3276 ret = 0; 3288 ret = 0;
@@ -3308,8 +3320,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
3308 *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | 3320 *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
3309 IB_QP_RNR_RETRY | 3321 IB_QP_RNR_RETRY |
3310 IB_QP_MAX_QP_RD_ATOMIC; 3322 IB_QP_MAX_QP_RD_ATOMIC;
3311 qp_attr->timeout = 3323 qp_attr->timeout = cm_id_priv->av.timeout;
3312 cm_id_priv->av.packet_life_time + 1;
3313 qp_attr->retry_cnt = cm_id_priv->retry_count; 3324 qp_attr->retry_cnt = cm_id_priv->retry_count;
3314 qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; 3325 qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
3315 qp_attr->max_rd_atomic = 3326 qp_attr->max_rd_atomic =
@@ -3323,8 +3334,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
3323 *qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE; 3334 *qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
3324 qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; 3335 qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
3325 qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; 3336 qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
3326 qp_attr->alt_timeout = 3337 qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
3327 cm_id_priv->alt_av.packet_life_time + 1;
3328 qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; 3338 qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
3329 qp_attr->path_mig_state = IB_MIG_REARM; 3339 qp_attr->path_mig_state = IB_MIG_REARM;
3330 } 3340 }
@@ -3364,6 +3374,16 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
3364} 3374}
3365EXPORT_SYMBOL(ib_cm_init_qp_attr); 3375EXPORT_SYMBOL(ib_cm_init_qp_attr);
3366 3376
3377void cm_get_ack_delay(struct cm_device *cm_dev)
3378{
3379 struct ib_device_attr attr;
3380
3381 if (ib_query_device(cm_dev->device, &attr))
3382 cm_dev->ack_delay = 0; /* acks will rely on packet life time */
3383 else
3384 cm_dev->ack_delay = attr.local_ca_ack_delay;
3385}
3386
3367static void cm_add_one(struct ib_device *device) 3387static void cm_add_one(struct ib_device *device)
3368{ 3388{
3369 struct cm_device *cm_dev; 3389 struct cm_device *cm_dev;
@@ -3388,6 +3408,7 @@ static void cm_add_one(struct ib_device *device)
3388 return; 3408 return;
3389 3409
3390 cm_dev->device = device; 3410 cm_dev->device = device;
3411 cm_get_ack_delay(cm_dev);
3391 3412
3392 set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); 3413 set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
3393 for (i = 1; i <= device->phys_port_cnt; i++) { 3414 for (i = 1; i <= device->phys_port_cnt; i++) {
diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h
index 4d3aee90c249..aec9c7af825d 100644
--- a/drivers/infiniband/core/cm_msgs.h
+++ b/drivers/infiniband/core/cm_msgs.h
@@ -35,6 +35,7 @@
35#define CM_MSGS_H 35#define CM_MSGS_H
36 36
37#include <rdma/ib_mad.h> 37#include <rdma/ib_mad.h>
38#include <rdma/ib_cm.h>
38 39
39/* 40/*
40 * Parameters to routines below should be in network-byte order, and values 41 * Parameters to routines below should be in network-byte order, and values
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 32a0e66d2a23..23af7a032a03 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2326,7 +2326,6 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
2326 rep.private_data_len = conn_param->private_data_len; 2326 rep.private_data_len = conn_param->private_data_len;
2327 rep.responder_resources = conn_param->responder_resources; 2327 rep.responder_resources = conn_param->responder_resources;
2328 rep.initiator_depth = conn_param->initiator_depth; 2328 rep.initiator_depth = conn_param->initiator_depth;
2329 rep.target_ack_delay = CMA_CM_RESPONSE_TIMEOUT;
2330 rep.failover_accepted = 0; 2329 rep.failover_accepted = 0;
2331 rep.flow_control = conn_param->flow_control; 2330 rep.flow_control = conn_param->flow_control;
2332 rep.rnr_retry_count = conn_param->rnr_retry_count; 2331 rep.rnr_retry_count = conn_param->rnr_retry_count;
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 85ccf13b8041..6b8faca02f8a 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -675,10 +675,16 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
675 struct ib_mad_port_private *port_priv; 675 struct ib_mad_port_private *port_priv;
676 struct ib_mad_agent_private *recv_mad_agent = NULL; 676 struct ib_mad_agent_private *recv_mad_agent = NULL;
677 struct ib_device *device = mad_agent_priv->agent.device; 677 struct ib_device *device = mad_agent_priv->agent.device;
678 u8 port_num = mad_agent_priv->agent.port_num; 678 u8 port_num;
679 struct ib_wc mad_wc; 679 struct ib_wc mad_wc;
680 struct ib_send_wr *send_wr = &mad_send_wr->send_wr; 680 struct ib_send_wr *send_wr = &mad_send_wr->send_wr;
681 681
682 if (device->node_type == RDMA_NODE_IB_SWITCH &&
683 smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
684 port_num = send_wr->wr.ud.port_num;
685 else
686 port_num = mad_agent_priv->agent.port_num;
687
682 /* 688 /*
683 * Directed route handling starts if the initial LID routed part of 689 * Directed route handling starts if the initial LID routed part of
684 * a request or the ending LID routed part of a response is empty. 690 * a request or the ending LID routed part of a response is empty.
@@ -1839,6 +1845,7 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
1839 struct ib_mad_private *recv, *response; 1845 struct ib_mad_private *recv, *response;
1840 struct ib_mad_list_head *mad_list; 1846 struct ib_mad_list_head *mad_list;
1841 struct ib_mad_agent_private *mad_agent; 1847 struct ib_mad_agent_private *mad_agent;
1848 int port_num;
1842 1849
1843 response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL); 1850 response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
1844 if (!response) 1851 if (!response)
@@ -1872,25 +1879,50 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
1872 if (!validate_mad(&recv->mad.mad, qp_info->qp->qp_num)) 1879 if (!validate_mad(&recv->mad.mad, qp_info->qp->qp_num))
1873 goto out; 1880 goto out;
1874 1881
1882 if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH)
1883 port_num = wc->port_num;
1884 else
1885 port_num = port_priv->port_num;
1886
1875 if (recv->mad.mad.mad_hdr.mgmt_class == 1887 if (recv->mad.mad.mad_hdr.mgmt_class ==
1876 IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { 1888 IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
1889 enum smi_forward_action retsmi;
1890
1877 if (smi_handle_dr_smp_recv(&recv->mad.smp, 1891 if (smi_handle_dr_smp_recv(&recv->mad.smp,
1878 port_priv->device->node_type, 1892 port_priv->device->node_type,
1879 port_priv->port_num, 1893 port_num,
1880 port_priv->device->phys_port_cnt) == 1894 port_priv->device->phys_port_cnt) ==
1881 IB_SMI_DISCARD) 1895 IB_SMI_DISCARD)
1882 goto out; 1896 goto out;
1883 1897
1884 if (smi_check_forward_dr_smp(&recv->mad.smp) == IB_SMI_LOCAL) 1898 retsmi = smi_check_forward_dr_smp(&recv->mad.smp);
1899 if (retsmi == IB_SMI_LOCAL)
1885 goto local; 1900 goto local;
1886 1901
1887 if (smi_handle_dr_smp_send(&recv->mad.smp, 1902 if (retsmi == IB_SMI_SEND) { /* don't forward */
1888 port_priv->device->node_type, 1903 if (smi_handle_dr_smp_send(&recv->mad.smp,
1889 port_priv->port_num) == IB_SMI_DISCARD) 1904 port_priv->device->node_type,
1890 goto out; 1905 port_num) == IB_SMI_DISCARD)
1906 goto out;
1907
1908 if (smi_check_local_smp(&recv->mad.smp, port_priv->device) == IB_SMI_DISCARD)
1909 goto out;
1910 } else if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) {
1911 /* forward case for switches */
1912 memcpy(response, recv, sizeof(*response));
1913 response->header.recv_wc.wc = &response->header.wc;
1914 response->header.recv_wc.recv_buf.mad = &response->mad.mad;
1915 response->header.recv_wc.recv_buf.grh = &response->grh;
1916
1917 if (!agent_send_response(&response->mad.mad,
1918 &response->grh, wc,
1919 port_priv->device,
1920 smi_get_fwd_port(&recv->mad.smp),
1921 qp_info->qp->qp_num))
1922 response = NULL;
1891 1923
1892 if (smi_check_local_smp(&recv->mad.smp, port_priv->device) == IB_SMI_DISCARD)
1893 goto out; 1924 goto out;
1925 }
1894 } 1926 }
1895 1927
1896local: 1928local:
@@ -1919,7 +1951,7 @@ local:
1919 agent_send_response(&response->mad.mad, 1951 agent_send_response(&response->mad.mad,
1920 &recv->grh, wc, 1952 &recv->grh, wc,
1921 port_priv->device, 1953 port_priv->device,
1922 port_priv->port_num, 1954 port_num,
1923 qp_info->qp->qp_num); 1955 qp_info->qp->qp_num);
1924 goto out; 1956 goto out;
1925 } 1957 }
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 1e13ab42b70b..15b4c4d3606d 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Intel Corporation.  All rights reserved. 2 * Copyright (c) 2006 Intel Corporation. All rights reserved.
3 * 3 *
4 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/core/sa.h b/drivers/infiniband/core/sa.h
index 24c93fd320fb..b1d4bbf4ce5c 100644
--- a/drivers/infiniband/core/sa.h
+++ b/drivers/infiniband/core/sa.h
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Voltaire, Inc.  All rights reserved. 3 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
4 * Copyright (c) 2006 Intel Corporation. All rights reserved. 4 * Copyright (c) 2006 Intel Corporation. All rights reserved.
5 * 5 *
6 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 6469406ea9d8..20ab6b3e484d 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Voltaire, Inc.  All rights reserved. 3 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
4 * Copyright (c) 2006 Intel Corporation. All rights reserved. 4 * Copyright (c) 2006 Intel Corporation. All rights reserved.
5 * 5 *
6 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
@@ -56,6 +56,7 @@ MODULE_LICENSE("Dual BSD/GPL");
56struct ib_sa_sm_ah { 56struct ib_sa_sm_ah {
57 struct ib_ah *ah; 57 struct ib_ah *ah;
58 struct kref ref; 58 struct kref ref;
59 u16 pkey_index;
59 u8 src_path_mask; 60 u8 src_path_mask;
60}; 61};
61 62
@@ -382,6 +383,13 @@ static void update_sm_ah(struct work_struct *work)
382 kref_init(&new_ah->ref); 383 kref_init(&new_ah->ref);
383 new_ah->src_path_mask = (1 << port_attr.lmc) - 1; 384 new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
384 385
386 new_ah->pkey_index = 0;
387 if (ib_find_pkey(port->agent->device, port->port_num,
388 IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index) &&
389 ib_find_pkey(port->agent->device, port->port_num,
390 IB_DEFAULT_PKEY_PARTIAL, &new_ah->pkey_index))
391 printk(KERN_ERR "Couldn't find index for default PKey\n");
392
385 memset(&ah_attr, 0, sizeof ah_attr); 393 memset(&ah_attr, 0, sizeof ah_attr);
386 ah_attr.dlid = port_attr.sm_lid; 394 ah_attr.dlid = port_attr.sm_lid;
387 ah_attr.sl = port_attr.sm_sl; 395 ah_attr.sl = port_attr.sm_sl;
@@ -512,6 +520,35 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
512} 520}
513EXPORT_SYMBOL(ib_init_ah_from_path); 521EXPORT_SYMBOL(ib_init_ah_from_path);
514 522
523static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
524{
525 unsigned long flags;
526
527 spin_lock_irqsave(&query->port->ah_lock, flags);
528 kref_get(&query->port->sm_ah->ref);
529 query->sm_ah = query->port->sm_ah;
530 spin_unlock_irqrestore(&query->port->ah_lock, flags);
531
532 query->mad_buf = ib_create_send_mad(query->port->agent, 1,
533 query->sm_ah->pkey_index,
534 0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
535 gfp_mask);
536 if (!query->mad_buf) {
537 kref_put(&query->sm_ah->ref, free_sm_ah);
538 return -ENOMEM;
539 }
540
541 query->mad_buf->ah = query->sm_ah->ah;
542
543 return 0;
544}
545
546static void free_mad(struct ib_sa_query *query)
547{
548 ib_free_send_mad(query->mad_buf);
549 kref_put(&query->sm_ah->ref, free_sm_ah);
550}
551
515static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent) 552static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
516{ 553{
517 unsigned long flags; 554 unsigned long flags;
@@ -548,20 +585,11 @@ retry:
548 query->mad_buf->context[0] = query; 585 query->mad_buf->context[0] = query;
549 query->id = id; 586 query->id = id;
550 587
551 spin_lock_irqsave(&query->port->ah_lock, flags);
552 kref_get(&query->port->sm_ah->ref);
553 query->sm_ah = query->port->sm_ah;
554 spin_unlock_irqrestore(&query->port->ah_lock, flags);
555
556 query->mad_buf->ah = query->sm_ah->ah;
557
558 ret = ib_post_send_mad(query->mad_buf, NULL); 588 ret = ib_post_send_mad(query->mad_buf, NULL);
559 if (ret) { 589 if (ret) {
560 spin_lock_irqsave(&idr_lock, flags); 590 spin_lock_irqsave(&idr_lock, flags);
561 idr_remove(&query_idr, id); 591 idr_remove(&query_idr, id);
562 spin_unlock_irqrestore(&idr_lock, flags); 592 spin_unlock_irqrestore(&idr_lock, flags);
563
564 kref_put(&query->sm_ah->ref, free_sm_ah);
565 } 593 }
566 594
567 /* 595 /*
@@ -647,13 +675,10 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
647 if (!query) 675 if (!query)
648 return -ENOMEM; 676 return -ENOMEM;
649 677
650 query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0, 678 query->sa_query.port = port;
651 0, IB_MGMT_SA_HDR, 679 ret = alloc_mad(&query->sa_query, gfp_mask);
652 IB_MGMT_SA_DATA, gfp_mask); 680 if (ret)
653 if (!query->sa_query.mad_buf) {
654 ret = -ENOMEM;
655 goto err1; 681 goto err1;
656 }
657 682
658 ib_sa_client_get(client); 683 ib_sa_client_get(client);
659 query->sa_query.client = client; 684 query->sa_query.client = client;
@@ -665,7 +690,6 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
665 690
666 query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL; 691 query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
667 query->sa_query.release = ib_sa_path_rec_release; 692 query->sa_query.release = ib_sa_path_rec_release;
668 query->sa_query.port = port;
669 mad->mad_hdr.method = IB_MGMT_METHOD_GET; 693 mad->mad_hdr.method = IB_MGMT_METHOD_GET;
670 mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC); 694 mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC);
671 mad->sa_hdr.comp_mask = comp_mask; 695 mad->sa_hdr.comp_mask = comp_mask;
@@ -683,7 +707,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
683err2: 707err2:
684 *sa_query = NULL; 708 *sa_query = NULL;
685 ib_sa_client_put(query->sa_query.client); 709 ib_sa_client_put(query->sa_query.client);
686 ib_free_send_mad(query->sa_query.mad_buf); 710 free_mad(&query->sa_query);
687 711
688err1: 712err1:
689 kfree(query); 713 kfree(query);
@@ -773,13 +797,10 @@ int ib_sa_service_rec_query(struct ib_sa_client *client,
773 if (!query) 797 if (!query)
774 return -ENOMEM; 798 return -ENOMEM;
775 799
776 query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0, 800 query->sa_query.port = port;
777 0, IB_MGMT_SA_HDR, 801 ret = alloc_mad(&query->sa_query, gfp_mask);
778 IB_MGMT_SA_DATA, gfp_mask); 802 if (ret)
779 if (!query->sa_query.mad_buf) {
780 ret = -ENOMEM;
781 goto err1; 803 goto err1;
782 }
783 804
784 ib_sa_client_get(client); 805 ib_sa_client_get(client);
785 query->sa_query.client = client; 806 query->sa_query.client = client;
@@ -791,7 +812,6 @@ int ib_sa_service_rec_query(struct ib_sa_client *client,
791 812
792 query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL; 813 query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
793 query->sa_query.release = ib_sa_service_rec_release; 814 query->sa_query.release = ib_sa_service_rec_release;
794 query->sa_query.port = port;
795 mad->mad_hdr.method = method; 815 mad->mad_hdr.method = method;
796 mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_SERVICE_REC); 816 mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
797 mad->sa_hdr.comp_mask = comp_mask; 817 mad->sa_hdr.comp_mask = comp_mask;
@@ -810,7 +830,7 @@ int ib_sa_service_rec_query(struct ib_sa_client *client,
810err2: 830err2:
811 *sa_query = NULL; 831 *sa_query = NULL;
812 ib_sa_client_put(query->sa_query.client); 832 ib_sa_client_put(query->sa_query.client);
813 ib_free_send_mad(query->sa_query.mad_buf); 833 free_mad(&query->sa_query);
814 834
815err1: 835err1:
816 kfree(query); 836 kfree(query);
@@ -869,13 +889,10 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
869 if (!query) 889 if (!query)
870 return -ENOMEM; 890 return -ENOMEM;
871 891
872 query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0, 892 query->sa_query.port = port;
873 0, IB_MGMT_SA_HDR, 893 ret = alloc_mad(&query->sa_query, gfp_mask);
874 IB_MGMT_SA_DATA, gfp_mask); 894 if (ret)
875 if (!query->sa_query.mad_buf) {
876 ret = -ENOMEM;
877 goto err1; 895 goto err1;
878 }
879 896
880 ib_sa_client_get(client); 897 ib_sa_client_get(client);
881 query->sa_query.client = client; 898 query->sa_query.client = client;
@@ -887,7 +904,6 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
887 904
888 query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL; 905 query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
889 query->sa_query.release = ib_sa_mcmember_rec_release; 906 query->sa_query.release = ib_sa_mcmember_rec_release;
890 query->sa_query.port = port;
891 mad->mad_hdr.method = method; 907 mad->mad_hdr.method = method;
892 mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC); 908 mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
893 mad->sa_hdr.comp_mask = comp_mask; 909 mad->sa_hdr.comp_mask = comp_mask;
@@ -906,7 +922,7 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
906err2: 922err2:
907 *sa_query = NULL; 923 *sa_query = NULL;
908 ib_sa_client_put(query->sa_query.client); 924 ib_sa_client_put(query->sa_query.client);
909 ib_free_send_mad(query->sa_query.mad_buf); 925 free_mad(&query->sa_query);
910 926
911err1: 927err1:
912 kfree(query); 928 kfree(query);
@@ -939,8 +955,7 @@ static void send_handler(struct ib_mad_agent *agent,
939 idr_remove(&query_idr, query->id); 955 idr_remove(&query_idr, query->id);
940 spin_unlock_irqrestore(&idr_lock, flags); 956 spin_unlock_irqrestore(&idr_lock, flags);
941 957
942 ib_free_send_mad(mad_send_wc->send_buf); 958 free_mad(query);
943 kref_put(&query->sm_ah->ref, free_sm_ah);
944 ib_sa_client_put(query->client); 959 ib_sa_client_put(query->client);
945 query->release(query); 960 query->release(query);
946} 961}
diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c
index 2bca753eb622..87236753bce9 100644
--- a/drivers/infiniband/core/smi.c
+++ b/drivers/infiniband/core/smi.c
@@ -192,7 +192,7 @@ enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type,
192 } 192 }
193 /* smp->hop_ptr updated when sending */ 193 /* smp->hop_ptr updated when sending */
194 return (node_type == RDMA_NODE_IB_SWITCH ? 194 return (node_type == RDMA_NODE_IB_SWITCH ?
195 IB_SMI_HANDLE: IB_SMI_DISCARD); 195 IB_SMI_HANDLE : IB_SMI_DISCARD);
196 } 196 }
197 197
198 /* C14-13:4 -- hop_ptr = 0 -> give to SM */ 198 /* C14-13:4 -- hop_ptr = 0 -> give to SM */
@@ -211,7 +211,7 @@ enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp)
211 if (!ib_get_smp_direction(smp)) { 211 if (!ib_get_smp_direction(smp)) {
212 /* C14-9:2 -- intermediate hop */ 212 /* C14-9:2 -- intermediate hop */
213 if (hop_ptr && hop_ptr < hop_cnt) 213 if (hop_ptr && hop_ptr < hop_cnt)
214 return IB_SMI_SEND; 214 return IB_SMI_FORWARD;
215 215
216 /* C14-9:3 -- at the end of the DR segment of path */ 216 /* C14-9:3 -- at the end of the DR segment of path */
217 if (hop_ptr == hop_cnt) 217 if (hop_ptr == hop_cnt)
@@ -224,7 +224,7 @@ enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp)
224 } else { 224 } else {
225 /* C14-13:2 -- intermediate hop */ 225 /* C14-13:2 -- intermediate hop */
226 if (2 <= hop_ptr && hop_ptr <= hop_cnt) 226 if (2 <= hop_ptr && hop_ptr <= hop_cnt)
227 return IB_SMI_SEND; 227 return IB_SMI_FORWARD;
228 228
229 /* C14-13:3 -- at the end of the DR segment of path */ 229 /* C14-13:3 -- at the end of the DR segment of path */
230 if (hop_ptr == 1) 230 if (hop_ptr == 1)
@@ -233,3 +233,13 @@ enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp)
233 } 233 }
234 return IB_SMI_LOCAL; 234 return IB_SMI_LOCAL;
235} 235}
236
237/*
238 * Return the forwarding port number from initial_path for outgoing SMP and
239 * from return_path for returning SMP
240 */
241int smi_get_fwd_port(struct ib_smp *smp)
242{
243 return (!ib_get_smp_direction(smp) ? smp->initial_path[smp->hop_ptr+1] :
244 smp->return_path[smp->hop_ptr-1]);
245}
diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h
index 9a4b349efc30..1cfc2984434f 100644
--- a/drivers/infiniband/core/smi.h
+++ b/drivers/infiniband/core/smi.h
@@ -48,10 +48,12 @@ enum smi_action {
48enum smi_forward_action { 48enum smi_forward_action {
49 IB_SMI_LOCAL, /* SMP should be completed up the stack */ 49 IB_SMI_LOCAL, /* SMP should be completed up the stack */
50 IB_SMI_SEND, /* received DR SMP should be forwarded to the send queue */ 50 IB_SMI_SEND, /* received DR SMP should be forwarded to the send queue */
51 IB_SMI_FORWARD /* SMP should be forwarded (for switches only) */
51}; 52};
52 53
53enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type, 54enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type,
54 int port_num, int phys_port_cnt); 55 int port_num, int phys_port_cnt);
56int smi_get_fwd_port(struct ib_smp *smp);
55extern enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp); 57extern enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp);
56extern enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp, 58extern enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
57 u8 node_type, int port_num); 59 u8 node_type, int port_num);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index bf9b99292048..70b77ae67422 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -311,7 +311,7 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
311 return sprintf(buf, "N/A (no PMA)\n"); 311 return sprintf(buf, "N/A (no PMA)\n");
312 312
313 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); 313 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
314 out_mad = kmalloc(sizeof *in_mad, GFP_KERNEL); 314 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
315 if (!in_mad || !out_mad) { 315 if (!in_mad || !out_mad) {
316 ret = -ENOMEM; 316 ret = -ENOMEM;
317 goto out; 317 goto out;
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 2586a3ee8eba..424983f5b1ee 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -823,7 +823,6 @@ static ssize_t ib_ucm_send_rep(struct ib_ucm_file *file,
823 param.private_data_len = cmd.len; 823 param.private_data_len = cmd.len;
824 param.responder_resources = cmd.responder_resources; 824 param.responder_resources = cmd.responder_resources;
825 param.initiator_depth = cmd.initiator_depth; 825 param.initiator_depth = cmd.initiator_depth;
826 param.target_ack_delay = cmd.target_ack_delay;
827 param.failover_accepted = cmd.failover_accepted; 826 param.failover_accepted = cmd.failover_accepted;
828 param.flow_control = cmd.flow_control; 827 param.flow_control = cmd.flow_control;
829 param.rnr_retry_count = cmd.rnr_retry_count; 828 param.rnr_retry_count = cmd.rnr_retry_count;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index d40652a80151..26d0470eef6e 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -121,6 +121,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
121 121
122 cur_base = addr & PAGE_MASK; 122 cur_base = addr & PAGE_MASK;
123 123
124 ret = 0;
124 while (npages) { 125 while (npages) {
125 ret = get_user_pages(current, current->mm, cur_base, 126 ret = get_user_pages(current, current->mm, cur_base,
126 min_t(int, npages, 127 min_t(int, npages,
diff --git a/drivers/infiniband/hw/amso1100/Kconfig b/drivers/infiniband/hw/amso1100/Kconfig
index 809cb14ac6de..e6ce5f209e47 100644
--- a/drivers/infiniband/hw/amso1100/Kconfig
+++ b/drivers/infiniband/hw/amso1100/Kconfig
@@ -1,6 +1,6 @@
1config INFINIBAND_AMSO1100 1config INFINIBAND_AMSO1100
2 tristate "Ammasso 1100 HCA support" 2 tristate "Ammasso 1100 HCA support"
3 depends on PCI && INET && INFINIBAND 3 depends on PCI && INET
4 ---help--- 4 ---help---
5 This is a low-level driver for the Ammasso 1100 host 5 This is a low-level driver for the Ammasso 1100 host
6 channel adapter (HCA). 6 channel adapter (HCA).
diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig
index 77977f55dca3..2acec3fadf69 100644
--- a/drivers/infiniband/hw/cxgb3/Kconfig
+++ b/drivers/infiniband/hw/cxgb3/Kconfig
@@ -1,6 +1,6 @@
1config INFINIBAND_CXGB3 1config INFINIBAND_CXGB3
2 tristate "Chelsio RDMA Driver" 2 tristate "Chelsio RDMA Driver"
3 depends on CHELSIO_T3 && INFINIBAND && INET 3 depends on CHELSIO_T3 && INET
4 select GENERIC_ALLOCATOR 4 select GENERIC_ALLOCATOR
5 ---help--- 5 ---help---
6 This is an iWARP/RDMA driver for the Chelsio T3 1GbE and 6 This is an iWARP/RDMA driver for the Chelsio T3 1GbE and
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index 76049afc7655..1518b41482ae 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -144,7 +144,7 @@ static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
144 } 144 }
145 wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe)); 145 wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
146 memset(wqe, 0, sizeof(*wqe)); 146 memset(wqe, 0, sizeof(*wqe));
147 build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 1, qpid, 7); 147 build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 0, qpid, 7);
148 wqe->flags = cpu_to_be32(MODQP_WRITE_EC); 148 wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
149 sge_cmd = qpid << 8 | 3; 149 sge_cmd = qpid << 8 | 3;
150 wqe->sge_cmd = cpu_to_be64(sge_cmd); 150 wqe->sge_cmd = cpu_to_be64(sge_cmd);
@@ -548,7 +548,7 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
548 V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32; 548 V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32;
549 wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe)); 549 wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
550 memset(wqe, 0, sizeof(*wqe)); 550 memset(wqe, 0, sizeof(*wqe));
551 build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 1, 551 build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0,
552 T3_CTL_QP_TID, 7); 552 T3_CTL_QP_TID, 7);
553 wqe->flags = cpu_to_be32(MODQP_WRITE_EC); 553 wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
554 sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3; 554 sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3;
@@ -833,7 +833,7 @@ int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
833 wqe->ird = cpu_to_be32(attr->ird); 833 wqe->ird = cpu_to_be32(attr->ird);
834 wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr); 834 wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr);
835 wqe->qp_dma_size = cpu_to_be32(attr->qp_dma_size); 835 wqe->qp_dma_size = cpu_to_be32(attr->qp_dma_size);
836 wqe->rsvd = 0; 836 wqe->irs = cpu_to_be32(attr->irs);
837 skb->priority = 0; /* 0=>ToeQ; 1=>CtrlQ */ 837 skb->priority = 0; /* 0=>ToeQ; 1=>CtrlQ */
838 return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb)); 838 return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb));
839} 839}
diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
index ff7290eacefb..c84d4ac49355 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_wr.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h
@@ -294,6 +294,7 @@ struct t3_rdma_init_attr {
294 u64 qp_dma_addr; 294 u64 qp_dma_addr;
295 u32 qp_dma_size; 295 u32 qp_dma_size;
296 u32 flags; 296 u32 flags;
297 u32 irs;
297}; 298};
298 299
299struct t3_rdma_init_wr { 300struct t3_rdma_init_wr {
@@ -314,7 +315,7 @@ struct t3_rdma_init_wr {
314 __be32 ird; 315 __be32 ird;
315 __be64 qp_dma_addr; /* 7 */ 316 __be64 qp_dma_addr; /* 7 */
316 __be32 qp_dma_size; /* 8 */ 317 __be32 qp_dma_size; /* 8 */
317 u32 rsvd; 318 u32 irs;
318}; 319};
319 320
320struct t3_genbit { 321struct t3_genbit {
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index b2faff5abce8..3b41dc0c39dd 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -254,8 +254,6 @@ static void release_ep_resources(struct iwch_ep *ep)
254 cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid); 254 cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
255 dst_release(ep->dst); 255 dst_release(ep->dst);
256 l2t_release(L2DATA(ep->com.tdev), ep->l2t); 256 l2t_release(L2DATA(ep->com.tdev), ep->l2t);
257 if (ep->com.tdev->type == T3B)
258 release_tid(ep->com.tdev, ep->hwtid, NULL);
259 put_ep(&ep->com); 257 put_ep(&ep->com);
260} 258}
261 259
@@ -515,7 +513,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
515 req->len = htonl(len); 513 req->len = htonl(len);
516 req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | 514 req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
517 V_TX_SNDBUF(snd_win>>15)); 515 V_TX_SNDBUF(snd_win>>15));
518 req->flags = htonl(F_TX_IMM_ACK|F_TX_INIT); 516 req->flags = htonl(F_TX_INIT);
519 req->sndseq = htonl(ep->snd_seq); 517 req->sndseq = htonl(ep->snd_seq);
520 BUG_ON(ep->mpa_skb); 518 BUG_ON(ep->mpa_skb);
521 ep->mpa_skb = skb; 519 ep->mpa_skb = skb;
@@ -566,7 +564,7 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
566 req->len = htonl(mpalen); 564 req->len = htonl(mpalen);
567 req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | 565 req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
568 V_TX_SNDBUF(snd_win>>15)); 566 V_TX_SNDBUF(snd_win>>15));
569 req->flags = htonl(F_TX_IMM_ACK|F_TX_INIT); 567 req->flags = htonl(F_TX_INIT);
570 req->sndseq = htonl(ep->snd_seq); 568 req->sndseq = htonl(ep->snd_seq);
571 BUG_ON(ep->mpa_skb); 569 BUG_ON(ep->mpa_skb);
572 ep->mpa_skb = skb; 570 ep->mpa_skb = skb;
@@ -618,7 +616,7 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
618 req->len = htonl(len); 616 req->len = htonl(len);
619 req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | 617 req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
620 V_TX_SNDBUF(snd_win>>15)); 618 V_TX_SNDBUF(snd_win>>15));
621 req->flags = htonl(F_TX_MORE | F_TX_IMM_ACK | F_TX_INIT); 619 req->flags = htonl(F_TX_INIT);
622 req->sndseq = htonl(ep->snd_seq); 620 req->sndseq = htonl(ep->snd_seq);
623 ep->mpa_skb = skb; 621 ep->mpa_skb = skb;
624 state_set(&ep->com, MPA_REP_SENT); 622 state_set(&ep->com, MPA_REP_SENT);
@@ -641,6 +639,7 @@ static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
641 cxgb3_insert_tid(ep->com.tdev, &t3c_client, ep, tid); 639 cxgb3_insert_tid(ep->com.tdev, &t3c_client, ep, tid);
642 640
643 ep->snd_seq = ntohl(req->snd_isn); 641 ep->snd_seq = ntohl(req->snd_isn);
642 ep->rcv_seq = ntohl(req->rcv_isn);
644 643
645 set_emss(ep, ntohs(req->tcp_opt)); 644 set_emss(ep, ntohs(req->tcp_opt));
646 645
@@ -1023,6 +1022,9 @@ static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1023 skb_pull(skb, sizeof(*hdr)); 1022 skb_pull(skb, sizeof(*hdr));
1024 skb_trim(skb, dlen); 1023 skb_trim(skb, dlen);
1025 1024
1025 ep->rcv_seq += dlen;
1026 BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen));
1027
1026 switch (state_read(&ep->com)) { 1028 switch (state_read(&ep->com)) {
1027 case MPA_REQ_SENT: 1029 case MPA_REQ_SENT:
1028 process_mpa_reply(ep, skb); 1030 process_mpa_reply(ep, skb);
@@ -1060,7 +1062,6 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1060 struct iwch_ep *ep = ctx; 1062 struct iwch_ep *ep = ctx;
1061 struct cpl_wr_ack *hdr = cplhdr(skb); 1063 struct cpl_wr_ack *hdr = cplhdr(skb);
1062 unsigned int credits = ntohs(hdr->credits); 1064 unsigned int credits = ntohs(hdr->credits);
1063 enum iwch_qp_attr_mask mask;
1064 1065
1065 PDBG("%s ep %p credits %u\n", __FUNCTION__, ep, credits); 1066 PDBG("%s ep %p credits %u\n", __FUNCTION__, ep, credits);
1066 1067
@@ -1072,30 +1073,6 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1072 ep->mpa_skb = NULL; 1073 ep->mpa_skb = NULL;
1073 dst_confirm(ep->dst); 1074 dst_confirm(ep->dst);
1074 if (state_read(&ep->com) == MPA_REP_SENT) { 1075 if (state_read(&ep->com) == MPA_REP_SENT) {
1075 struct iwch_qp_attributes attrs;
1076
1077 /* bind QP to EP and move to RTS */
1078 attrs.mpa_attr = ep->mpa_attr;
1079 attrs.max_ird = ep->ord;
1080 attrs.max_ord = ep->ord;
1081 attrs.llp_stream_handle = ep;
1082 attrs.next_state = IWCH_QP_STATE_RTS;
1083
1084 /* bind QP and TID with INIT_WR */
1085 mask = IWCH_QP_ATTR_NEXT_STATE |
1086 IWCH_QP_ATTR_LLP_STREAM_HANDLE |
1087 IWCH_QP_ATTR_MPA_ATTR |
1088 IWCH_QP_ATTR_MAX_IRD |
1089 IWCH_QP_ATTR_MAX_ORD;
1090
1091 ep->com.rpl_err = iwch_modify_qp(ep->com.qp->rhp,
1092 ep->com.qp, mask, &attrs, 1);
1093
1094 if (!ep->com.rpl_err) {
1095 state_set(&ep->com, FPDU_MODE);
1096 established_upcall(ep);
1097 }
1098
1099 ep->com.rpl_done = 1; 1076 ep->com.rpl_done = 1;
1100 PDBG("waking up ep %p\n", ep); 1077 PDBG("waking up ep %p\n", ep);
1101 wake_up(&ep->com.waitq); 1078 wake_up(&ep->com.waitq);
@@ -1124,6 +1101,15 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1124 return CPL_RET_BUF_DONE; 1101 return CPL_RET_BUF_DONE;
1125} 1102}
1126 1103
1104/*
1105 * Return whether a failed active open has allocated a TID
1106 */
1107static inline int act_open_has_tid(int status)
1108{
1109 return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
1110 status != CPL_ERR_ARP_MISS;
1111}
1112
1127static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) 1113static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1128{ 1114{
1129 struct iwch_ep *ep = ctx; 1115 struct iwch_ep *ep = ctx;
@@ -1133,7 +1119,7 @@ static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1133 status2errno(rpl->status)); 1119 status2errno(rpl->status));
1134 connect_reply_upcall(ep, status2errno(rpl->status)); 1120 connect_reply_upcall(ep, status2errno(rpl->status));
1135 state_set(&ep->com, DEAD); 1121 state_set(&ep->com, DEAD);
1136 if (ep->com.tdev->type == T3B) 1122 if (ep->com.tdev->type == T3B && act_open_has_tid(rpl->status))
1137 release_tid(ep->com.tdev, GET_TID(rpl), NULL); 1123 release_tid(ep->com.tdev, GET_TID(rpl), NULL);
1138 cxgb3_free_atid(ep->com.tdev, ep->atid); 1124 cxgb3_free_atid(ep->com.tdev, ep->atid);
1139 dst_release(ep->dst); 1125 dst_release(ep->dst);
@@ -1378,6 +1364,7 @@ static int pass_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1378 1364
1379 PDBG("%s ep %p\n", __FUNCTION__, ep); 1365 PDBG("%s ep %p\n", __FUNCTION__, ep);
1380 ep->snd_seq = ntohl(req->snd_isn); 1366 ep->snd_seq = ntohl(req->snd_isn);
1367 ep->rcv_seq = ntohl(req->rcv_isn);
1381 1368
1382 set_emss(ep, ntohs(req->tcp_opt)); 1369 set_emss(ep, ntohs(req->tcp_opt));
1383 1370
@@ -1485,6 +1472,13 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1485 int ret; 1472 int ret;
1486 int state; 1473 int state;
1487 1474
1475 if (is_neg_adv_abort(req->status)) {
1476 PDBG("%s neg_adv_abort ep %p tid %d\n", __FUNCTION__, ep,
1477 ep->hwtid);
1478 t3_l2t_send_event(ep->com.tdev, ep->l2t);
1479 return CPL_RET_BUF_DONE;
1480 }
1481
1488 /* 1482 /*
1489 * We get 2 peer aborts from the HW. The first one must 1483 * We get 2 peer aborts from the HW. The first one must
1490 * be ignored except for scribbling that we need one more. 1484 * be ignored except for scribbling that we need one more.
@@ -1494,13 +1488,6 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1494 return CPL_RET_BUF_DONE; 1488 return CPL_RET_BUF_DONE;
1495 } 1489 }
1496 1490
1497 if (is_neg_adv_abort(req->status)) {
1498 PDBG("%s neg_adv_abort ep %p tid %d\n", __FUNCTION__, ep,
1499 ep->hwtid);
1500 t3_l2t_send_event(ep->com.tdev, ep->l2t);
1501 return CPL_RET_BUF_DONE;
1502 }
1503
1504 state = state_read(&ep->com); 1491 state = state_read(&ep->com);
1505 PDBG("%s ep %p state %u\n", __FUNCTION__, ep, state); 1492 PDBG("%s ep %p state %u\n", __FUNCTION__, ep, state);
1506 switch (state) { 1493 switch (state) {
@@ -1732,10 +1719,8 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1732 struct iwch_qp *qp = get_qhp(h, conn_param->qpn); 1719 struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
1733 1720
1734 PDBG("%s ep %p tid %u\n", __FUNCTION__, ep, ep->hwtid); 1721 PDBG("%s ep %p tid %u\n", __FUNCTION__, ep, ep->hwtid);
1735 if (state_read(&ep->com) == DEAD) { 1722 if (state_read(&ep->com) == DEAD)
1736 put_ep(&ep->com);
1737 return -ECONNRESET; 1723 return -ECONNRESET;
1738 }
1739 1724
1740 BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); 1725 BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
1741 BUG_ON(!qp); 1726 BUG_ON(!qp);
@@ -1755,17 +1740,8 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1755 ep->ird = conn_param->ird; 1740 ep->ird = conn_param->ird;
1756 ep->ord = conn_param->ord; 1741 ep->ord = conn_param->ord;
1757 PDBG("%s %d ird %d ord %d\n", __FUNCTION__, __LINE__, ep->ird, ep->ord); 1742 PDBG("%s %d ird %d ord %d\n", __FUNCTION__, __LINE__, ep->ird, ep->ord);
1743
1758 get_ep(&ep->com); 1744 get_ep(&ep->com);
1759 err = send_mpa_reply(ep, conn_param->private_data,
1760 conn_param->private_data_len);
1761 if (err) {
1762 ep->com.cm_id = NULL;
1763 ep->com.qp = NULL;
1764 cm_id->rem_ref(cm_id);
1765 abort_connection(ep, NULL, GFP_KERNEL);
1766 put_ep(&ep->com);
1767 return err;
1768 }
1769 1745
1770 /* bind QP to EP and move to RTS */ 1746 /* bind QP to EP and move to RTS */
1771 attrs.mpa_attr = ep->mpa_attr; 1747 attrs.mpa_attr = ep->mpa_attr;
@@ -1783,16 +1759,28 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1783 1759
1784 err = iwch_modify_qp(ep->com.qp->rhp, 1760 err = iwch_modify_qp(ep->com.qp->rhp,
1785 ep->com.qp, mask, &attrs, 1); 1761 ep->com.qp, mask, &attrs, 1);
1762 if (err)
1763 goto err;
1786 1764
1787 if (err) { 1765 err = send_mpa_reply(ep, conn_param->private_data,
1788 ep->com.cm_id = NULL; 1766 conn_param->private_data_len);
1789 ep->com.qp = NULL; 1767 if (err)
1790 cm_id->rem_ref(cm_id); 1768 goto err;
1791 abort_connection(ep, NULL, GFP_KERNEL); 1769
1792 } else { 1770 /* wait for wr_ack */
1793 state_set(&ep->com, FPDU_MODE); 1771 wait_event(ep->com.waitq, ep->com.rpl_done);
1794 established_upcall(ep); 1772 err = ep->com.rpl_err;
1795 } 1773 if (err)
1774 goto err;
1775
1776 state_set(&ep->com, FPDU_MODE);
1777 established_upcall(ep);
1778 put_ep(&ep->com);
1779 return 0;
1780err:
1781 ep->com.cm_id = NULL;
1782 ep->com.qp = NULL;
1783 cm_id->rem_ref(cm_id);
1796 put_ep(&ep->com); 1784 put_ep(&ep->com);
1797 return err; 1785 return err;
1798} 1786}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
index 21a388c313cf..6107e7cd9b57 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h
@@ -175,6 +175,7 @@ struct iwch_ep {
175 unsigned int atid; 175 unsigned int atid;
176 u32 hwtid; 176 u32 hwtid;
177 u32 snd_seq; 177 u32 snd_seq;
178 u32 rcv_seq;
178 struct l2t_entry *l2t; 179 struct l2t_entry *l2t;
179 struct dst_entry *dst; 180 struct dst_entry *dst;
180 struct sk_buff *mpa_skb; 181 struct sk_buff *mpa_skb;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index e7c2c3948037..f0c777589374 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1163,9 +1163,10 @@ int iwch_register_device(struct iwch_dev *dev)
1163 dev->ibdev.post_recv = iwch_post_receive; 1163 dev->ibdev.post_recv = iwch_post_receive;
1164 1164
1165 1165
1166 dev->ibdev.iwcm = 1166 dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
1167 (struct iw_cm_verbs *) kmalloc(sizeof(struct iw_cm_verbs), 1167 if (!dev->ibdev.iwcm)
1168 GFP_KERNEL); 1168 return -ENOMEM;
1169
1169 dev->ibdev.iwcm->connect = iwch_connect; 1170 dev->ibdev.iwcm->connect = iwch_connect;
1170 dev->ibdev.iwcm->accept = iwch_accept_cr; 1171 dev->ibdev.iwcm->accept = iwch_accept_cr;
1171 dev->ibdev.iwcm->reject = iwch_reject_cr; 1172 dev->ibdev.iwcm->reject = iwch_reject_cr;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 714dddbc9a98..dd89b6b91f9c 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -628,9 +628,9 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
628 /* immediate data starts here. */ 628 /* immediate data starts here. */
629 term = (struct terminate_message *)wqe->send.sgl; 629 term = (struct terminate_message *)wqe->send.sgl;
630 build_term_codes(rsp_msg, &term->layer_etype, &term->ecode); 630 build_term_codes(rsp_msg, &term->layer_etype, &term->ecode);
631 build_fw_riwrh((void *)wqe, T3_WR_SEND, 631 wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_SEND) |
632 T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 1, 632 V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG));
633 qhp->ep->hwtid, 5); 633 wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid));
634 skb->priority = CPL_PRIORITY_DATA; 634 skb->priority = CPL_PRIORITY_DATA;
635 return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb); 635 return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb);
636} 636}
@@ -732,6 +732,7 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
732 init_attr.qp_dma_addr = qhp->wq.dma_addr; 732 init_attr.qp_dma_addr = qhp->wq.dma_addr;
733 init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); 733 init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
734 init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0; 734 init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0;
735 init_attr.irs = qhp->ep->rcv_seq;
735 PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d " 736 PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
736 "flags 0x%x qpcaps 0x%x\n", __FUNCTION__, 737 "flags 0x%x qpcaps 0x%x\n", __FUNCTION__,
737 init_attr.rq_addr, init_attr.rq_size, 738 init_attr.rq_addr, init_attr.rq_size,
diff --git a/drivers/infiniband/hw/ehca/Kconfig b/drivers/infiniband/hw/ehca/Kconfig
index 1a854598e0e6..59f807d8d58e 100644
--- a/drivers/infiniband/hw/ehca/Kconfig
+++ b/drivers/infiniband/hw/ehca/Kconfig
@@ -1,6 +1,6 @@
1config INFINIBAND_EHCA 1config INFINIBAND_EHCA
2 tristate "eHCA support" 2 tristate "eHCA support"
3 depends on IBMEBUS && INFINIBAND 3 depends on IBMEBUS
4 ---help--- 4 ---help---
5 This driver supports the IBM pSeries eHCA InfiniBand adapter. 5 This driver supports the IBM pSeries eHCA InfiniBand adapter.
6 6
diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c
index 0d6e2c4bb245..3cd6bf3402d1 100644
--- a/drivers/infiniband/hw/ehca/ehca_av.c
+++ b/drivers/infiniband/hw/ehca/ehca_av.c
@@ -118,7 +118,7 @@ struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
118 } 118 }
119 memcpy(&av->av.grh.word_1, &gid, sizeof(gid)); 119 memcpy(&av->av.grh.word_1, &gid, sizeof(gid));
120 } 120 }
121 av->av.pmtu = EHCA_MAX_MTU; 121 av->av.pmtu = shca->max_mtu;
122 122
123 /* dgid comes in grh.word_3 */ 123 /* dgid comes in grh.word_3 */
124 memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid, 124 memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid,
@@ -137,6 +137,8 @@ int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
137 struct ehca_av *av; 137 struct ehca_av *av;
138 struct ehca_ud_av new_ehca_av; 138 struct ehca_ud_av new_ehca_av;
139 struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd); 139 struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
140 struct ehca_shca *shca = container_of(ah->pd->device, struct ehca_shca,
141 ib_device);
140 u32 cur_pid = current->tgid; 142 u32 cur_pid = current->tgid;
141 143
142 if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && 144 if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
@@ -192,7 +194,7 @@ int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
192 memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid)); 194 memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid));
193 } 195 }
194 196
195 new_ehca_av.pmtu = EHCA_MAX_MTU; 197 new_ehca_av.pmtu = shca->max_mtu;
196 198
197 memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid, 199 memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid,
198 sizeof(ah_attr->grh.dgid)); 200 sizeof(ah_attr->grh.dgid));
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 1d286d3cc2d5..daf823ea1ace 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -5,6 +5,7 @@
5 * 5 *
6 * Authors: Heiko J Schick <schickhj@de.ibm.com> 6 * Authors: Heiko J Schick <schickhj@de.ibm.com>
7 * Christoph Raisch <raisch@de.ibm.com> 7 * Christoph Raisch <raisch@de.ibm.com>
8 * Joachim Fenkes <fenkes@de.ibm.com>
8 * 9 *
9 * Copyright (c) 2005 IBM Corporation 10 * Copyright (c) 2005 IBM Corporation
10 * 11 *
@@ -86,11 +87,17 @@ struct ehca_eq {
86 struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE]; 87 struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE];
87}; 88};
88 89
90struct ehca_sma_attr {
91 u16 lid, lmc, sm_sl, sm_lid;
92 u16 pkey_tbl_len, pkeys[16];
93};
94
89struct ehca_sport { 95struct ehca_sport {
90 struct ib_cq *ibcq_aqp1; 96 struct ib_cq *ibcq_aqp1;
91 struct ib_qp *ibqp_aqp1; 97 struct ib_qp *ibqp_aqp1;
92 enum ib_rate rate; 98 enum ib_rate rate;
93 enum ib_port_state port_state; 99 enum ib_port_state port_state;
100 struct ehca_sma_attr saved_attr;
94}; 101};
95 102
96struct ehca_shca { 103struct ehca_shca {
@@ -107,6 +114,8 @@ struct ehca_shca {
107 struct ehca_pd *pd; 114 struct ehca_pd *pd;
108 struct h_galpas galpas; 115 struct h_galpas galpas;
109 struct mutex modify_mutex; 116 struct mutex modify_mutex;
117 u64 hca_cap;
118 int max_mtu;
110}; 119};
111 120
112struct ehca_pd { 121struct ehca_pd {
@@ -115,9 +124,20 @@ struct ehca_pd {
115 u32 ownpid; 124 u32 ownpid;
116}; 125};
117 126
127enum ehca_ext_qp_type {
128 EQPT_NORMAL = 0,
129 EQPT_LLQP = 1,
130 EQPT_SRQBASE = 2,
131 EQPT_SRQ = 3,
132};
133
118struct ehca_qp { 134struct ehca_qp {
119 struct ib_qp ib_qp; 135 union {
136 struct ib_qp ib_qp;
137 struct ib_srq ib_srq;
138 };
120 u32 qp_type; 139 u32 qp_type;
140 enum ehca_ext_qp_type ext_type;
121 struct ipz_queue ipz_squeue; 141 struct ipz_queue ipz_squeue;
122 struct ipz_queue ipz_rqueue; 142 struct ipz_queue ipz_rqueue;
123 struct h_galpas galpas; 143 struct h_galpas galpas;
@@ -140,6 +160,10 @@ struct ehca_qp {
140 u32 mm_count_galpa; 160 u32 mm_count_galpa;
141}; 161};
142 162
163#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
164#define HAS_SQ(qp) (qp->ext_type != EQPT_SRQ)
165#define HAS_RQ(qp) (qp->ext_type != EQPT_SRQBASE)
166
143/* must be power of 2 */ 167/* must be power of 2 */
144#define QP_HASHTAB_LEN 8 168#define QP_HASHTAB_LEN 8
145 169
@@ -156,8 +180,8 @@ struct ehca_cq {
156 spinlock_t cb_lock; 180 spinlock_t cb_lock;
157 struct hlist_head qp_hashtab[QP_HASHTAB_LEN]; 181 struct hlist_head qp_hashtab[QP_HASHTAB_LEN];
158 struct list_head entry; 182 struct list_head entry;
159 u32 nr_callbacks; /* #events assigned to cpu by scaling code */ 183 u32 nr_callbacks; /* #events assigned to cpu by scaling code */
160 u32 nr_events; /* #events seen */ 184 atomic_t nr_events; /* #events seen */
161 wait_queue_head_t wait_completion; 185 wait_queue_head_t wait_completion;
162 spinlock_t task_lock; 186 spinlock_t task_lock;
163 u32 ownpid; 187 u32 ownpid;
@@ -275,9 +299,8 @@ void ehca_cleanup_av_cache(void);
275int ehca_init_mrmw_cache(void); 299int ehca_init_mrmw_cache(void);
276void ehca_cleanup_mrmw_cache(void); 300void ehca_cleanup_mrmw_cache(void);
277 301
278extern spinlock_t ehca_qp_idr_lock; 302extern rwlock_t ehca_qp_idr_lock;
279extern spinlock_t ehca_cq_idr_lock; 303extern rwlock_t ehca_cq_idr_lock;
280extern spinlock_t hcall_lock;
281extern struct idr ehca_qp_idr; 304extern struct idr ehca_qp_idr;
282extern struct idr ehca_cq_idr; 305extern struct idr ehca_cq_idr;
283 306
@@ -305,6 +328,7 @@ struct ehca_create_qp_resp {
305 u32 qp_num; 328 u32 qp_num;
306 u32 token; 329 u32 token;
307 u32 qp_type; 330 u32 qp_type;
331 u32 ext_type;
308 u32 qkey; 332 u32 qkey;
309 /* qp_num assigned by ehca: sqp0/1 may have got different numbers */ 333 /* qp_num assigned by ehca: sqp0/1 may have got different numbers */
310 u32 real_qp_num; 334 u32 real_qp_num;
@@ -320,14 +344,42 @@ struct ehca_alloc_cq_parms {
320 struct ipz_eq_handle eq_handle; 344 struct ipz_eq_handle eq_handle;
321}; 345};
322 346
347enum ehca_service_type {
348 ST_RC = 0,
349 ST_UC = 1,
350 ST_RD = 2,
351 ST_UD = 3,
352};
353
354enum ehca_ll_comp_flags {
355 LLQP_SEND_COMP = 0x20,
356 LLQP_RECV_COMP = 0x40,
357 LLQP_COMP_MASK = 0x60,
358};
359
323struct ehca_alloc_qp_parms { 360struct ehca_alloc_qp_parms {
324 int servicetype; 361/* input parameters */
362 enum ehca_service_type servicetype;
325 int sigtype; 363 int sigtype;
326 int daqp_ctrl; 364 enum ehca_ext_qp_type ext_type;
327 int max_send_sge; 365 enum ehca_ll_comp_flags ll_comp_flags;
328 int max_recv_sge; 366
367 int max_send_wr, max_recv_wr;
368 int max_send_sge, max_recv_sge;
329 int ud_av_l_key_ctl; 369 int ud_av_l_key_ctl;
330 370
371 u32 token;
372 struct ipz_eq_handle eq_handle;
373 struct ipz_pd pd;
374 struct ipz_cq_handle send_cq_handle, recv_cq_handle;
375
376 u32 srq_qpn, srq_token, srq_limit;
377
378/* output parameters */
379 u32 real_qp_num;
380 struct ipz_qp_handle qp_handle;
381 struct h_galpas galpas;
382
331 u16 act_nr_send_wqes; 383 u16 act_nr_send_wqes;
332 u16 act_nr_recv_wqes; 384 u16 act_nr_recv_wqes;
333 u8 act_nr_recv_sges; 385 u8 act_nr_recv_sges;
@@ -335,9 +387,6 @@ struct ehca_alloc_qp_parms {
335 387
336 u32 nr_rq_pages; 388 u32 nr_rq_pages;
337 u32 nr_sq_pages; 389 u32 nr_sq_pages;
338
339 struct ipz_eq_handle ipz_eq_handle;
340 struct ipz_pd pd;
341}; 390};
342 391
343int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp); 392int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp);
diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
index 5665f213b81a..fb3df5c271e7 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
@@ -228,8 +228,8 @@ struct hcp_modify_qp_control_block {
228#define MQPCB_QP_NUMBER EHCA_BMASK_IBM(8,31) 228#define MQPCB_QP_NUMBER EHCA_BMASK_IBM(8,31)
229#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48,48) 229#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48,48)
230#define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31,31) 230#define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31,31)
231#define MQPCB_MASK_CURR_SQR_LIMIT EHCA_BMASK_IBM(49,49) 231#define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49,49)
232#define MQPCB_CURR_SQR_LIMIT EHCA_BMASK_IBM(15,31) 232#define MQPCB_CURR_SRQ_LIMIT EHCA_BMASK_IBM(16,31)
233#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50,50) 233#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50,50)
234#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51,51) 234#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51,51)
235 235
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index 67f0670fe3b1..01d4a148bd71 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -56,11 +56,11 @@ int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp)
56{ 56{
57 unsigned int qp_num = qp->real_qp_num; 57 unsigned int qp_num = qp->real_qp_num;
58 unsigned int key = qp_num & (QP_HASHTAB_LEN-1); 58 unsigned int key = qp_num & (QP_HASHTAB_LEN-1);
59 unsigned long spl_flags; 59 unsigned long flags;
60 60
61 spin_lock_irqsave(&cq->spinlock, spl_flags); 61 spin_lock_irqsave(&cq->spinlock, flags);
62 hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]); 62 hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]);
63 spin_unlock_irqrestore(&cq->spinlock, spl_flags); 63 spin_unlock_irqrestore(&cq->spinlock, flags);
64 64
65 ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x", 65 ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x",
66 cq->cq_number, qp_num); 66 cq->cq_number, qp_num);
@@ -74,9 +74,9 @@ int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num)
74 unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); 74 unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
75 struct hlist_node *iter; 75 struct hlist_node *iter;
76 struct ehca_qp *qp; 76 struct ehca_qp *qp;
77 unsigned long spl_flags; 77 unsigned long flags;
78 78
79 spin_lock_irqsave(&cq->spinlock, spl_flags); 79 spin_lock_irqsave(&cq->spinlock, flags);
80 hlist_for_each(iter, &cq->qp_hashtab[key]) { 80 hlist_for_each(iter, &cq->qp_hashtab[key]) {
81 qp = hlist_entry(iter, struct ehca_qp, list_entries); 81 qp = hlist_entry(iter, struct ehca_qp, list_entries);
82 if (qp->real_qp_num == real_qp_num) { 82 if (qp->real_qp_num == real_qp_num) {
@@ -88,7 +88,7 @@ int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num)
88 break; 88 break;
89 } 89 }
90 } 90 }
91 spin_unlock_irqrestore(&cq->spinlock, spl_flags); 91 spin_unlock_irqrestore(&cq->spinlock, flags);
92 if (ret) 92 if (ret)
93 ehca_err(cq->ib_cq.device, 93 ehca_err(cq->ib_cq.device,
94 "qp not found cq_num=%x real_qp_num=%x", 94 "qp not found cq_num=%x real_qp_num=%x",
@@ -146,6 +146,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
146 spin_lock_init(&my_cq->spinlock); 146 spin_lock_init(&my_cq->spinlock);
147 spin_lock_init(&my_cq->cb_lock); 147 spin_lock_init(&my_cq->cb_lock);
148 spin_lock_init(&my_cq->task_lock); 148 spin_lock_init(&my_cq->task_lock);
149 atomic_set(&my_cq->nr_events, 0);
149 init_waitqueue_head(&my_cq->wait_completion); 150 init_waitqueue_head(&my_cq->wait_completion);
150 my_cq->ownpid = current->tgid; 151 my_cq->ownpid = current->tgid;
151 152
@@ -162,9 +163,9 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
162 goto create_cq_exit1; 163 goto create_cq_exit1;
163 } 164 }
164 165
165 spin_lock_irqsave(&ehca_cq_idr_lock, flags); 166 write_lock_irqsave(&ehca_cq_idr_lock, flags);
166 ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token); 167 ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token);
167 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); 168 write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
168 169
169 } while (ret == -EAGAIN); 170 } while (ret == -EAGAIN);
170 171
@@ -293,9 +294,9 @@ create_cq_exit3:
293 "cq_num=%x h_ret=%lx", my_cq, my_cq->cq_number, h_ret); 294 "cq_num=%x h_ret=%lx", my_cq, my_cq->cq_number, h_ret);
294 295
295create_cq_exit2: 296create_cq_exit2:
296 spin_lock_irqsave(&ehca_cq_idr_lock, flags); 297 write_lock_irqsave(&ehca_cq_idr_lock, flags);
297 idr_remove(&ehca_cq_idr, my_cq->token); 298 idr_remove(&ehca_cq_idr, my_cq->token);
298 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); 299 write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
299 300
300create_cq_exit1: 301create_cq_exit1:
301 kmem_cache_free(cq_cache, my_cq); 302 kmem_cache_free(cq_cache, my_cq);
@@ -303,16 +304,6 @@ create_cq_exit1:
303 return cq; 304 return cq;
304} 305}
305 306
306static int get_cq_nr_events(struct ehca_cq *my_cq)
307{
308 int ret;
309 unsigned long flags;
310 spin_lock_irqsave(&ehca_cq_idr_lock, flags);
311 ret = my_cq->nr_events;
312 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
313 return ret;
314}
315
316int ehca_destroy_cq(struct ib_cq *cq) 307int ehca_destroy_cq(struct ib_cq *cq)
317{ 308{
318 u64 h_ret; 309 u64 h_ret;
@@ -339,17 +330,18 @@ int ehca_destroy_cq(struct ib_cq *cq)
339 } 330 }
340 } 331 }
341 332
342 spin_lock_irqsave(&ehca_cq_idr_lock, flags); 333 /*
343 while (my_cq->nr_events) { 334 * remove the CQ from the idr first to make sure
344 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); 335 * no more interrupt tasklets will touch this CQ
345 wait_event(my_cq->wait_completion, !get_cq_nr_events(my_cq)); 336 */
346 spin_lock_irqsave(&ehca_cq_idr_lock, flags); 337 write_lock_irqsave(&ehca_cq_idr_lock, flags);
347 /* recheck nr_events to assure no cqe has just arrived */
348 }
349
350 idr_remove(&ehca_cq_idr, my_cq->token); 338 idr_remove(&ehca_cq_idr, my_cq->token);
351 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); 339 write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
340
341 /* now wait until all pending events have completed */
342 wait_event(my_cq->wait_completion, !atomic_read(&my_cq->nr_events));
352 343
344 /* nobody's using our CQ any longer -- we can destroy it */
353 h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0); 345 h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0);
354 if (h_ret == H_R_STATE) { 346 if (h_ret == H_R_STATE) {
355 /* cq in err: read err data and destroy it forcibly */ 347 /* cq in err: read err data and destroy it forcibly */
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
index 32b55a4f0e5b..bbd3c6a5822f 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -45,11 +45,25 @@
45 45
46int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) 46int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
47{ 47{
48 int ret = 0; 48 int i, ret = 0;
49 struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, 49 struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
50 ib_device); 50 ib_device);
51 struct hipz_query_hca *rblock; 51 struct hipz_query_hca *rblock;
52 52
53 static const u32 cap_mapping[] = {
54 IB_DEVICE_RESIZE_MAX_WR, HCA_CAP_WQE_RESIZE,
55 IB_DEVICE_BAD_PKEY_CNTR, HCA_CAP_BAD_P_KEY_CTR,
56 IB_DEVICE_BAD_QKEY_CNTR, HCA_CAP_Q_KEY_VIOL_CTR,
57 IB_DEVICE_RAW_MULTI, HCA_CAP_RAW_PACKET_MCAST,
58 IB_DEVICE_AUTO_PATH_MIG, HCA_CAP_AUTO_PATH_MIG,
59 IB_DEVICE_CHANGE_PHY_PORT, HCA_CAP_SQD_RTS_PORT_CHANGE,
60 IB_DEVICE_UD_AV_PORT_ENFORCE, HCA_CAP_AH_PORT_NR_CHECK,
61 IB_DEVICE_CURR_QP_STATE_MOD, HCA_CAP_CUR_QP_STATE_MOD,
62 IB_DEVICE_SHUTDOWN_PORT, HCA_CAP_SHUTDOWN_PORT,
63 IB_DEVICE_INIT_TYPE, HCA_CAP_INIT_TYPE,
64 IB_DEVICE_PORT_ACTIVE_EVENT, HCA_CAP_PORT_ACTIVE_EVENT,
65 };
66
53 rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); 67 rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
54 if (!rblock) { 68 if (!rblock) {
55 ehca_err(&shca->ib_device, "Can't allocate rblock memory."); 69 ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
@@ -96,6 +110,13 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
96 props->max_total_mcast_qp_attach 110 props->max_total_mcast_qp_attach
97 = min_t(int, rblock->max_total_mcast_qp_attach, INT_MAX); 111 = min_t(int, rblock->max_total_mcast_qp_attach, INT_MAX);
98 112
113 /* translate device capabilities */
114 props->device_cap_flags = IB_DEVICE_SYS_IMAGE_GUID |
115 IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_N_NOTIFY_CQ;
116 for (i = 0; i < ARRAY_SIZE(cap_mapping); i += 2)
117 if (rblock->hca_cap_indicators & cap_mapping[i + 1])
118 props->device_cap_flags |= cap_mapping[i];
119
99query_device1: 120query_device1:
100 ehca_free_fw_ctrlblock(rblock); 121 ehca_free_fw_ctrlblock(rblock);
101 122
@@ -172,6 +193,40 @@ query_port1:
172 return ret; 193 return ret;
173} 194}
174 195
196int ehca_query_sma_attr(struct ehca_shca *shca,
197 u8 port, struct ehca_sma_attr *attr)
198{
199 int ret = 0;
200 struct hipz_query_port *rblock;
201
202 rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
203 if (!rblock) {
204 ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
205 return -ENOMEM;
206 }
207
208 if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
209 ehca_err(&shca->ib_device, "Can't query port properties");
210 ret = -EINVAL;
211 goto query_sma_attr1;
212 }
213
214 memset(attr, 0, sizeof(struct ehca_sma_attr));
215
216 attr->lid = rblock->lid;
217 attr->lmc = rblock->lmc;
218 attr->sm_sl = rblock->sm_sl;
219 attr->sm_lid = rblock->sm_lid;
220
221 attr->pkey_tbl_len = rblock->pkey_tbl_len;
222 memcpy(attr->pkeys, rblock->pkey_entries, sizeof(attr->pkeys));
223
224query_sma_attr1:
225 ehca_free_fw_ctrlblock(rblock);
226
227 return ret;
228}
229
175int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) 230int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
176{ 231{
177 int ret = 0; 232 int ret = 0;
@@ -261,7 +316,7 @@ int ehca_modify_port(struct ib_device *ibdev,
261 } 316 }
262 317
263 if (mutex_lock_interruptible(&shca->modify_mutex)) 318 if (mutex_lock_interruptible(&shca->modify_mutex))
264 return -ERESTARTSYS; 319 return -ERESTARTSYS;
265 320
266 rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); 321 rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
267 if (!rblock) { 322 if (!rblock) {
@@ -290,7 +345,7 @@ modify_port2:
290 ehca_free_fw_ctrlblock(rblock); 345 ehca_free_fw_ctrlblock(rblock);
291 346
292modify_port1: 347modify_port1:
293 mutex_unlock(&shca->modify_mutex); 348 mutex_unlock(&shca->modify_mutex);
294 349
295 return ret; 350 return ret;
296} 351}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index 100329ba3343..96eba3830754 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -5,6 +5,8 @@
5 * 5 *
6 * Authors: Heiko J Schick <schickhj@de.ibm.com> 6 * Authors: Heiko J Schick <schickhj@de.ibm.com>
7 * Khadija Souissi <souissi@de.ibm.com> 7 * Khadija Souissi <souissi@de.ibm.com>
8 * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
9 * Joachim Fenkes <fenkes@de.ibm.com>
8 * 10 *
9 * Copyright (c) 2005 IBM Corporation 11 * Copyright (c) 2005 IBM Corporation
10 * 12 *
@@ -59,6 +61,7 @@
59#define NEQE_EVENT_CODE EHCA_BMASK_IBM(2,7) 61#define NEQE_EVENT_CODE EHCA_BMASK_IBM(2,7)
60#define NEQE_PORT_NUMBER EHCA_BMASK_IBM(8,15) 62#define NEQE_PORT_NUMBER EHCA_BMASK_IBM(8,15)
61#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16) 63#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16)
64#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16,16)
62 65
63#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52,63) 66#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52,63)
64#define ERROR_DATA_TYPE EHCA_BMASK_IBM(0,7) 67#define ERROR_DATA_TYPE EHCA_BMASK_IBM(0,7)
@@ -178,12 +181,11 @@ static void qp_event_callback(struct ehca_shca *shca,
178{ 181{
179 struct ib_event event; 182 struct ib_event event;
180 struct ehca_qp *qp; 183 struct ehca_qp *qp;
181 unsigned long flags;
182 u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe); 184 u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe);
183 185
184 spin_lock_irqsave(&ehca_qp_idr_lock, flags); 186 read_lock(&ehca_qp_idr_lock);
185 qp = idr_find(&ehca_qp_idr, token); 187 qp = idr_find(&ehca_qp_idr, token);
186 spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); 188 read_unlock(&ehca_qp_idr_lock);
187 189
188 190
189 if (!qp) 191 if (!qp)
@@ -207,18 +209,22 @@ static void cq_event_callback(struct ehca_shca *shca,
207 u64 eqe) 209 u64 eqe)
208{ 210{
209 struct ehca_cq *cq; 211 struct ehca_cq *cq;
210 unsigned long flags;
211 u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe); 212 u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe);
212 213
213 spin_lock_irqsave(&ehca_cq_idr_lock, flags); 214 read_lock(&ehca_cq_idr_lock);
214 cq = idr_find(&ehca_cq_idr, token); 215 cq = idr_find(&ehca_cq_idr, token);
215 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); 216 if (cq)
217 atomic_inc(&cq->nr_events);
218 read_unlock(&ehca_cq_idr_lock);
216 219
217 if (!cq) 220 if (!cq)
218 return; 221 return;
219 222
220 ehca_error_data(shca, cq, cq->ipz_cq_handle.handle); 223 ehca_error_data(shca, cq, cq->ipz_cq_handle.handle);
221 224
225 if (atomic_dec_and_test(&cq->nr_events))
226 wake_up(&cq->wait_completion);
227
222 return; 228 return;
223} 229}
224 230
@@ -281,30 +287,61 @@ static void parse_identifier(struct ehca_shca *shca, u64 eqe)
281 return; 287 return;
282} 288}
283 289
284static void parse_ec(struct ehca_shca *shca, u64 eqe) 290static void dispatch_port_event(struct ehca_shca *shca, int port_num,
291 enum ib_event_type type, const char *msg)
285{ 292{
286 struct ib_event event; 293 struct ib_event event;
294
295 ehca_info(&shca->ib_device, "port %d %s.", port_num, msg);
296 event.device = &shca->ib_device;
297 event.event = type;
298 event.element.port_num = port_num;
299 ib_dispatch_event(&event);
300}
301
302static void notify_port_conf_change(struct ehca_shca *shca, int port_num)
303{
304 struct ehca_sma_attr new_attr;
305 struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr;
306
307 ehca_query_sma_attr(shca, port_num, &new_attr);
308
309 if (new_attr.sm_sl != old_attr->sm_sl ||
310 new_attr.sm_lid != old_attr->sm_lid)
311 dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE,
312 "SM changed");
313
314 if (new_attr.lid != old_attr->lid ||
315 new_attr.lmc != old_attr->lmc)
316 dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE,
317 "LID changed");
318
319 if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len ||
320 memcmp(new_attr.pkeys, old_attr->pkeys,
321 sizeof(u16) * new_attr.pkey_tbl_len))
322 dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE,
323 "P_Key changed");
324
325 *old_attr = new_attr;
326}
327
328static void parse_ec(struct ehca_shca *shca, u64 eqe)
329{
287 u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe); 330 u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
288 u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe); 331 u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
289 332
290 switch (ec) { 333 switch (ec) {
291 case 0x30: /* port availability change */ 334 case 0x30: /* port availability change */
292 if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) { 335 if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
293 ehca_info(&shca->ib_device,
294 "port %x is active.", port);
295 event.device = &shca->ib_device;
296 event.event = IB_EVENT_PORT_ACTIVE;
297 event.element.port_num = port;
298 shca->sport[port - 1].port_state = IB_PORT_ACTIVE; 336 shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
299 ib_dispatch_event(&event); 337 dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
338 "is active");
339 ehca_query_sma_attr(shca, port,
340 &shca->sport[port - 1].saved_attr);
300 } else { 341 } else {
301 ehca_info(&shca->ib_device,
302 "port %x is inactive.", port);
303 event.device = &shca->ib_device;
304 event.event = IB_EVENT_PORT_ERR;
305 event.element.port_num = port;
306 shca->sport[port - 1].port_state = IB_PORT_DOWN; 342 shca->sport[port - 1].port_state = IB_PORT_DOWN;
307 ib_dispatch_event(&event); 343 dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
344 "is inactive");
308 } 345 }
309 break; 346 break;
310 case 0x31: 347 case 0x31:
@@ -312,24 +349,19 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
312 * disruptive change is caused by 349 * disruptive change is caused by
313 * LID, PKEY or SM change 350 * LID, PKEY or SM change
314 */ 351 */
315 ehca_warn(&shca->ib_device, 352 if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) {
316 "disruptive port %x configuration change", port); 353 ehca_warn(&shca->ib_device, "disruptive port "
317 354 "%d configuration change", port);
318 ehca_info(&shca->ib_device, 355
319 "port %x is inactive.", port); 356 shca->sport[port - 1].port_state = IB_PORT_DOWN;
320 event.device = &shca->ib_device; 357 dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
321 event.event = IB_EVENT_PORT_ERR; 358 "is inactive");
322 event.element.port_num = port; 359
323 shca->sport[port - 1].port_state = IB_PORT_DOWN; 360 shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
324 ib_dispatch_event(&event); 361 dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
325 362 "is active");
326 ehca_info(&shca->ib_device, 363 } else
327 "port %x is active.", port); 364 notify_port_conf_change(shca, port);
328 event.device = &shca->ib_device;
329 event.event = IB_EVENT_PORT_ACTIVE;
330 event.element.port_num = port;
331 shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
332 ib_dispatch_event(&event);
333 break; 365 break;
334 case 0x32: /* adapter malfunction */ 366 case 0x32: /* adapter malfunction */
335 ehca_err(&shca->ib_device, "Adapter malfunction."); 367 ehca_err(&shca->ib_device, "Adapter malfunction.");
@@ -404,7 +436,6 @@ static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
404{ 436{
405 u64 eqe_value; 437 u64 eqe_value;
406 u32 token; 438 u32 token;
407 unsigned long flags;
408 struct ehca_cq *cq; 439 struct ehca_cq *cq;
409 440
410 eqe_value = eqe->entry; 441 eqe_value = eqe->entry;
@@ -412,27 +443,24 @@ static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
412 if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { 443 if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
413 ehca_dbg(&shca->ib_device, "Got completion event"); 444 ehca_dbg(&shca->ib_device, "Got completion event");
414 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); 445 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
415 spin_lock_irqsave(&ehca_cq_idr_lock, flags); 446 read_lock(&ehca_cq_idr_lock);
416 cq = idr_find(&ehca_cq_idr, token); 447 cq = idr_find(&ehca_cq_idr, token);
448 if (cq)
449 atomic_inc(&cq->nr_events);
450 read_unlock(&ehca_cq_idr_lock);
417 if (cq == NULL) { 451 if (cq == NULL) {
418 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
419 ehca_err(&shca->ib_device, 452 ehca_err(&shca->ib_device,
420 "Invalid eqe for non-existing cq token=%x", 453 "Invalid eqe for non-existing cq token=%x",
421 token); 454 token);
422 return; 455 return;
423 } 456 }
424 reset_eq_pending(cq); 457 reset_eq_pending(cq);
425 cq->nr_events++;
426 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
427 if (ehca_scaling_code) 458 if (ehca_scaling_code)
428 queue_comp_task(cq); 459 queue_comp_task(cq);
429 else { 460 else {
430 comp_event_callback(cq); 461 comp_event_callback(cq);
431 spin_lock_irqsave(&ehca_cq_idr_lock, flags); 462 if (atomic_dec_and_test(&cq->nr_events))
432 cq->nr_events--;
433 if (!cq->nr_events)
434 wake_up(&cq->wait_completion); 463 wake_up(&cq->wait_completion);
435 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
436 } 464 }
437 } else { 465 } else {
438 ehca_dbg(&shca->ib_device, "Got non completion event"); 466 ehca_dbg(&shca->ib_device, "Got non completion event");
@@ -476,17 +504,17 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq)
476 eqe_value = eqe_cache[eqe_cnt].eqe->entry; 504 eqe_value = eqe_cache[eqe_cnt].eqe->entry;
477 if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { 505 if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
478 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); 506 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
479 spin_lock(&ehca_cq_idr_lock); 507 read_lock(&ehca_cq_idr_lock);
480 eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token); 508 eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token);
509 if (eqe_cache[eqe_cnt].cq)
510 atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events);
511 read_unlock(&ehca_cq_idr_lock);
481 if (!eqe_cache[eqe_cnt].cq) { 512 if (!eqe_cache[eqe_cnt].cq) {
482 spin_unlock(&ehca_cq_idr_lock);
483 ehca_err(&shca->ib_device, 513 ehca_err(&shca->ib_device,
484 "Invalid eqe for non-existing cq " 514 "Invalid eqe for non-existing cq "
485 "token=%x", token); 515 "token=%x", token);
486 continue; 516 continue;
487 } 517 }
488 eqe_cache[eqe_cnt].cq->nr_events++;
489 spin_unlock(&ehca_cq_idr_lock);
490 } else 518 } else
491 eqe_cache[eqe_cnt].cq = NULL; 519 eqe_cache[eqe_cnt].cq = NULL;
492 eqe_cnt++; 520 eqe_cnt++;
@@ -517,11 +545,8 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq)
517 else { 545 else {
518 struct ehca_cq *cq = eq->eqe_cache[i].cq; 546 struct ehca_cq *cq = eq->eqe_cache[i].cq;
519 comp_event_callback(cq); 547 comp_event_callback(cq);
520 spin_lock(&ehca_cq_idr_lock); 548 if (atomic_dec_and_test(&cq->nr_events))
521 cq->nr_events--;
522 if (!cq->nr_events)
523 wake_up(&cq->wait_completion); 549 wake_up(&cq->wait_completion);
524 spin_unlock(&ehca_cq_idr_lock);
525 } 550 }
526 } else { 551 } else {
527 ehca_dbg(&shca->ib_device, "Got non completion event"); 552 ehca_dbg(&shca->ib_device, "Got non completion event");
@@ -621,13 +646,10 @@ static void run_comp_task(struct ehca_cpu_comp_task* cct)
621 while (!list_empty(&cct->cq_list)) { 646 while (!list_empty(&cct->cq_list)) {
622 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); 647 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
623 spin_unlock_irqrestore(&cct->task_lock, flags); 648 spin_unlock_irqrestore(&cct->task_lock, flags);
624 comp_event_callback(cq);
625 649
626 spin_lock_irqsave(&ehca_cq_idr_lock, flags); 650 comp_event_callback(cq);
627 cq->nr_events--; 651 if (atomic_dec_and_test(&cq->nr_events))
628 if (!cq->nr_events)
629 wake_up(&cq->wait_completion); 652 wake_up(&cq->wait_completion);
630 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
631 653
632 spin_lock_irqsave(&cct->task_lock, flags); 654 spin_lock_irqsave(&cct->task_lock, flags);
633 spin_lock(&cq->task_lock); 655 spin_lock(&cq->task_lock);
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h
index 6ed06ee033ed..3346cb06cea6 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.h
+++ b/drivers/infiniband/hw/ehca/ehca_irq.h
@@ -47,7 +47,6 @@ struct ehca_shca;
47 47
48#include <linux/interrupt.h> 48#include <linux/interrupt.h>
49#include <linux/types.h> 49#include <linux/types.h>
50#include <asm/atomic.h>
51 50
52int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource); 51int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource);
53 52
diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h
index 37e7fe0908cf..77aeca6a2c2f 100644
--- a/drivers/infiniband/hw/ehca/ehca_iverbs.h
+++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h
@@ -49,6 +49,9 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props);
49int ehca_query_port(struct ib_device *ibdev, u8 port, 49int ehca_query_port(struct ib_device *ibdev, u8 port,
50 struct ib_port_attr *props); 50 struct ib_port_attr *props);
51 51
52int ehca_query_sma_attr(struct ehca_shca *shca, u8 port,
53 struct ehca_sma_attr *attr);
54
52int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey); 55int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey);
53 56
54int ehca_query_gid(struct ib_device *ibdev, u8 port, int index, 57int ehca_query_gid(struct ib_device *ibdev, u8 port, int index,
@@ -154,6 +157,21 @@ int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr,
154int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr, 157int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
155 struct ib_recv_wr **bad_recv_wr); 158 struct ib_recv_wr **bad_recv_wr);
156 159
160int ehca_post_srq_recv(struct ib_srq *srq,
161 struct ib_recv_wr *recv_wr,
162 struct ib_recv_wr **bad_recv_wr);
163
164struct ib_srq *ehca_create_srq(struct ib_pd *pd,
165 struct ib_srq_init_attr *init_attr,
166 struct ib_udata *udata);
167
168int ehca_modify_srq(struct ib_srq *srq, struct ib_srq_attr *attr,
169 enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
170
171int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
172
173int ehca_destroy_srq(struct ib_srq *srq);
174
157u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp, 175u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp,
158 struct ib_qp_init_attr *qp_init_attr); 176 struct ib_qp_init_attr *qp_init_attr);
159 177
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index c3f99f33b49c..28ba2dd24216 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -94,17 +94,15 @@ MODULE_PARM_DESC(poll_all_eqs,
94MODULE_PARM_DESC(static_rate, 94MODULE_PARM_DESC(static_rate,
95 "set permanent static rate (default: disabled)"); 95 "set permanent static rate (default: disabled)");
96MODULE_PARM_DESC(scaling_code, 96MODULE_PARM_DESC(scaling_code,
97 "set scaling code (0: disabled, 1: enabled/default)"); 97 "set scaling code (0: disabled/default, 1: enabled)");
98 98
99spinlock_t ehca_qp_idr_lock; 99DEFINE_RWLOCK(ehca_qp_idr_lock);
100spinlock_t ehca_cq_idr_lock; 100DEFINE_RWLOCK(ehca_cq_idr_lock);
101spinlock_t hcall_lock;
102DEFINE_IDR(ehca_qp_idr); 101DEFINE_IDR(ehca_qp_idr);
103DEFINE_IDR(ehca_cq_idr); 102DEFINE_IDR(ehca_cq_idr);
104 103
105 104static LIST_HEAD(shca_list); /* list of all registered ehcas */
106static struct list_head shca_list; /* list of all registered ehcas */ 105static DEFINE_SPINLOCK(shca_list_lock);
107static spinlock_t shca_list_lock;
108 106
109static struct timer_list poll_eqs_timer; 107static struct timer_list poll_eqs_timer;
110 108
@@ -205,11 +203,35 @@ static void ehca_destroy_slab_caches(void)
205#define EHCA_HCAAVER EHCA_BMASK_IBM(32,39) 203#define EHCA_HCAAVER EHCA_BMASK_IBM(32,39)
206#define EHCA_REVID EHCA_BMASK_IBM(40,63) 204#define EHCA_REVID EHCA_BMASK_IBM(40,63)
207 205
206static struct cap_descr {
207 u64 mask;
208 char *descr;
209} hca_cap_descr[] = {
210 { HCA_CAP_AH_PORT_NR_CHECK, "HCA_CAP_AH_PORT_NR_CHECK" },
211 { HCA_CAP_ATOMIC, "HCA_CAP_ATOMIC" },
212 { HCA_CAP_AUTO_PATH_MIG, "HCA_CAP_AUTO_PATH_MIG" },
213 { HCA_CAP_BAD_P_KEY_CTR, "HCA_CAP_BAD_P_KEY_CTR" },
214 { HCA_CAP_SQD_RTS_PORT_CHANGE, "HCA_CAP_SQD_RTS_PORT_CHANGE" },
215 { HCA_CAP_CUR_QP_STATE_MOD, "HCA_CAP_CUR_QP_STATE_MOD" },
216 { HCA_CAP_INIT_TYPE, "HCA_CAP_INIT_TYPE" },
217 { HCA_CAP_PORT_ACTIVE_EVENT, "HCA_CAP_PORT_ACTIVE_EVENT" },
218 { HCA_CAP_Q_KEY_VIOL_CTR, "HCA_CAP_Q_KEY_VIOL_CTR" },
219 { HCA_CAP_WQE_RESIZE, "HCA_CAP_WQE_RESIZE" },
220 { HCA_CAP_RAW_PACKET_MCAST, "HCA_CAP_RAW_PACKET_MCAST" },
221 { HCA_CAP_SHUTDOWN_PORT, "HCA_CAP_SHUTDOWN_PORT" },
222 { HCA_CAP_RC_LL_QP, "HCA_CAP_RC_LL_QP" },
223 { HCA_CAP_SRQ, "HCA_CAP_SRQ" },
224 { HCA_CAP_UD_LL_QP, "HCA_CAP_UD_LL_QP" },
225 { HCA_CAP_RESIZE_MR, "HCA_CAP_RESIZE_MR" },
226 { HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" },
227};
228
208int ehca_sense_attributes(struct ehca_shca *shca) 229int ehca_sense_attributes(struct ehca_shca *shca)
209{ 230{
210 int ret = 0; 231 int i, ret = 0;
211 u64 h_ret; 232 u64 h_ret;
212 struct hipz_query_hca *rblock; 233 struct hipz_query_hca *rblock;
234 struct hipz_query_port *port;
213 235
214 rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); 236 rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
215 if (!rblock) { 237 if (!rblock) {
@@ -222,7 +244,7 @@ int ehca_sense_attributes(struct ehca_shca *shca)
222 ehca_gen_err("Cannot query device properties. h_ret=%lx", 244 ehca_gen_err("Cannot query device properties. h_ret=%lx",
223 h_ret); 245 h_ret);
224 ret = -EPERM; 246 ret = -EPERM;
225 goto num_ports1; 247 goto sense_attributes1;
226 } 248 }
227 249
228 if (ehca_nr_ports == 1) 250 if (ehca_nr_ports == 1)
@@ -242,18 +264,44 @@ int ehca_sense_attributes(struct ehca_shca *shca)
242 ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid); 264 ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid);
243 265
244 if ((hcaaver == 1) && (revid == 0)) 266 if ((hcaaver == 1) && (revid == 0))
245 shca->hw_level = 0; 267 shca->hw_level = 0x11;
246 else if ((hcaaver == 1) && (revid == 1)) 268 else if ((hcaaver == 1) && (revid == 1))
247 shca->hw_level = 1; 269 shca->hw_level = 0x12;
248 else if ((hcaaver == 1) && (revid == 2)) 270 else if ((hcaaver == 1) && (revid == 2))
249 shca->hw_level = 2; 271 shca->hw_level = 0x13;
272 else if ((hcaaver == 2) && (revid == 0))
273 shca->hw_level = 0x21;
274 else if ((hcaaver == 2) && (revid == 0x10))
275 shca->hw_level = 0x22;
276 else {
277 ehca_gen_warn("unknown hardware version"
278 " - assuming default level");
279 shca->hw_level = 0x22;
280 }
250 } 281 }
251 ehca_gen_dbg(" ... hardware level=%x", shca->hw_level); 282 ehca_gen_dbg(" ... hardware level=%x", shca->hw_level);
252 283
253 shca->sport[0].rate = IB_RATE_30_GBPS; 284 shca->sport[0].rate = IB_RATE_30_GBPS;
254 shca->sport[1].rate = IB_RATE_30_GBPS; 285 shca->sport[1].rate = IB_RATE_30_GBPS;
255 286
256num_ports1: 287 shca->hca_cap = rblock->hca_cap_indicators;
288 ehca_gen_dbg(" ... HCA capabilities:");
289 for (i = 0; i < ARRAY_SIZE(hca_cap_descr); i++)
290 if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap))
291 ehca_gen_dbg(" %s", hca_cap_descr[i].descr);
292
293 port = (struct hipz_query_port *) rblock;
294 h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port);
295 if (h_ret != H_SUCCESS) {
296 ehca_gen_err("Cannot query port properties. h_ret=%lx",
297 h_ret);
298 ret = -EPERM;
299 goto sense_attributes1;
300 }
301
302 shca->max_mtu = port->max_mtu;
303
304sense_attributes1:
257 ehca_free_fw_ctrlblock(rblock); 305 ehca_free_fw_ctrlblock(rblock);
258 return ret; 306 return ret;
259} 307}
@@ -293,7 +341,7 @@ int ehca_init_device(struct ehca_shca *shca)
293 strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX); 341 strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX);
294 shca->ib_device.owner = THIS_MODULE; 342 shca->ib_device.owner = THIS_MODULE;
295 343
296 shca->ib_device.uverbs_abi_ver = 6; 344 shca->ib_device.uverbs_abi_ver = 7;
297 shca->ib_device.uverbs_cmd_mask = 345 shca->ib_device.uverbs_cmd_mask =
298 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 346 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
299 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 347 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
@@ -361,6 +409,20 @@ int ehca_init_device(struct ehca_shca *shca)
361 /* shca->ib_device.process_mad = ehca_process_mad; */ 409 /* shca->ib_device.process_mad = ehca_process_mad; */
362 shca->ib_device.mmap = ehca_mmap; 410 shca->ib_device.mmap = ehca_mmap;
363 411
412 if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) {
413 shca->ib_device.uverbs_cmd_mask |=
414 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
415 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
416 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
417 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
418
419 shca->ib_device.create_srq = ehca_create_srq;
420 shca->ib_device.modify_srq = ehca_modify_srq;
421 shca->ib_device.query_srq = ehca_query_srq;
422 shca->ib_device.destroy_srq = ehca_destroy_srq;
423 shca->ib_device.post_srq_recv = ehca_post_srq_recv;
424 }
425
364 return ret; 426 return ret;
365} 427}
366 428
@@ -800,14 +862,6 @@ int __init ehca_module_init(void)
800 862
801 printk(KERN_INFO "eHCA Infiniband Device Driver " 863 printk(KERN_INFO "eHCA Infiniband Device Driver "
802 "(Rel.: SVNEHCA_0023)\n"); 864 "(Rel.: SVNEHCA_0023)\n");
803 idr_init(&ehca_qp_idr);
804 idr_init(&ehca_cq_idr);
805 spin_lock_init(&ehca_qp_idr_lock);
806 spin_lock_init(&ehca_cq_idr_lock);
807 spin_lock_init(&hcall_lock);
808
809 INIT_LIST_HEAD(&shca_list);
810 spin_lock_init(&shca_list_lock);
811 865
812 if ((ret = ehca_create_comp_pool())) { 866 if ((ret = ehca_create_comp_pool())) {
813 ehca_gen_err("Cannot create comp pool."); 867 ehca_gen_err("Cannot create comp pool.");
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index b5bc787c77b6..74671250303f 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -3,7 +3,9 @@
3 * 3 *
4 * QP functions 4 * QP functions
5 * 5 *
6 * Authors: Waleri Fomin <fomin@de.ibm.com> 6 * Authors: Joachim Fenkes <fenkes@de.ibm.com>
7 * Stefan Roscher <stefan.roscher@de.ibm.com>
8 * Waleri Fomin <fomin@de.ibm.com>
7 * Hoang-Nam Nguyen <hnguyen@de.ibm.com> 9 * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
8 * Reinhard Ernst <rernst@de.ibm.com> 10 * Reinhard Ernst <rernst@de.ibm.com>
9 * Heiko J Schick <schickhj@de.ibm.com> 11 * Heiko J Schick <schickhj@de.ibm.com>
@@ -234,13 +236,6 @@ static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate,
234 return index; 236 return index;
235} 237}
236 238
237enum ehca_service_type {
238 ST_RC = 0,
239 ST_UC = 1,
240 ST_RD = 2,
241 ST_UD = 3
242};
243
244/* 239/*
245 * ibqptype2servicetype returns hcp service type corresponding to given 240 * ibqptype2servicetype returns hcp service type corresponding to given
246 * ib qp type used by create_qp() 241 * ib qp type used by create_qp()
@@ -268,15 +263,34 @@ static inline int ibqptype2servicetype(enum ib_qp_type ibqptype)
268} 263}
269 264
270/* 265/*
271 * init_qp_queues initializes/constructs r/squeue and registers queue pages. 266 * init userspace queue info from ipz_queue data
272 */ 267 */
273static inline int init_qp_queues(struct ehca_shca *shca, 268static inline void queue2resp(struct ipzu_queue_resp *resp,
274 struct ehca_qp *my_qp, 269 struct ipz_queue *queue)
275 int nr_sq_pages, 270{
276 int nr_rq_pages, 271 resp->qe_size = queue->qe_size;
277 int swqe_size, 272 resp->act_nr_of_sg = queue->act_nr_of_sg;
278 int rwqe_size, 273 resp->queue_length = queue->queue_length;
279 int nr_send_sges, int nr_receive_sges) 274 resp->pagesize = queue->pagesize;
275 resp->toggle_state = queue->toggle_state;
276}
277
278static inline int ll_qp_msg_size(int nr_sge)
279{
280 return 128 << nr_sge;
281}
282
283/*
284 * init_qp_queue initializes/constructs r/squeue and registers queue pages.
285 */
286static inline int init_qp_queue(struct ehca_shca *shca,
287 struct ehca_qp *my_qp,
288 struct ipz_queue *queue,
289 int q_type,
290 u64 expected_hret,
291 int nr_q_pages,
292 int wqe_size,
293 int nr_sges)
280{ 294{
281 int ret, cnt, ipz_rc; 295 int ret, cnt, ipz_rc;
282 void *vpage; 296 void *vpage;
@@ -284,127 +298,93 @@ static inline int init_qp_queues(struct ehca_shca *shca,
284 struct ib_device *ib_dev = &shca->ib_device; 298 struct ib_device *ib_dev = &shca->ib_device;
285 struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle; 299 struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle;
286 300
287 ipz_rc = ipz_queue_ctor(&my_qp->ipz_squeue, 301 if (!nr_q_pages)
288 nr_sq_pages, 302 return 0;
289 EHCA_PAGESIZE, swqe_size, nr_send_sges); 303
304 ipz_rc = ipz_queue_ctor(queue, nr_q_pages, EHCA_PAGESIZE,
305 wqe_size, nr_sges);
290 if (!ipz_rc) { 306 if (!ipz_rc) {
291 ehca_err(ib_dev,"Cannot allocate page for squeue. ipz_rc=%x", 307 ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%x",
292 ipz_rc); 308 ipz_rc);
293 return -EBUSY; 309 return -EBUSY;
294 } 310 }
295 311
296 ipz_rc = ipz_queue_ctor(&my_qp->ipz_rqueue, 312 /* register queue pages */
297 nr_rq_pages, 313 for (cnt = 0; cnt < nr_q_pages; cnt++) {
298 EHCA_PAGESIZE, rwqe_size, nr_receive_sges); 314 vpage = ipz_qpageit_get_inc(queue);
299 if (!ipz_rc) {
300 ehca_err(ib_dev, "Cannot allocate page for rqueue. ipz_rc=%x",
301 ipz_rc);
302 ret = -EBUSY;
303 goto init_qp_queues0;
304 }
305 /* register SQ pages */
306 for (cnt = 0; cnt < nr_sq_pages; cnt++) {
307 vpage = ipz_qpageit_get_inc(&my_qp->ipz_squeue);
308 if (!vpage) { 315 if (!vpage) {
309 ehca_err(ib_dev, "SQ ipz_qpageit_get_inc() " 316 ehca_err(ib_dev, "ipz_qpageit_get_inc() "
310 "failed p_vpage= %p", vpage); 317 "failed p_vpage= %p", vpage);
311 ret = -EINVAL; 318 ret = -EINVAL;
312 goto init_qp_queues1; 319 goto init_qp_queue1;
313 } 320 }
314 rpage = virt_to_abs(vpage); 321 rpage = virt_to_abs(vpage);
315 322
316 h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, 323 h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
317 my_qp->ipz_qp_handle, 324 my_qp->ipz_qp_handle,
318 &my_qp->pf, 0, 0, 325 NULL, 0, q_type,
319 rpage, 1, 326 rpage, 1,
320 my_qp->galpas.kernel); 327 my_qp->galpas.kernel);
321 if (h_ret < H_SUCCESS) { 328 if (cnt == (nr_q_pages - 1)) { /* last page! */
322 ehca_err(ib_dev, "SQ hipz_qp_register_rpage()" 329 if (h_ret != expected_hret) {
323 " failed rc=%lx", h_ret); 330 ehca_err(ib_dev, "hipz_qp_register_rpage() "
324 ret = ehca2ib_return_code(h_ret);
325 goto init_qp_queues1;
326 }
327 }
328
329 ipz_qeit_reset(&my_qp->ipz_squeue);
330
331 /* register RQ pages */
332 for (cnt = 0; cnt < nr_rq_pages; cnt++) {
333 vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue);
334 if (!vpage) {
335 ehca_err(ib_dev, "RQ ipz_qpageit_get_inc() "
336 "failed p_vpage = %p", vpage);
337 ret = -EINVAL;
338 goto init_qp_queues1;
339 }
340
341 rpage = virt_to_abs(vpage);
342
343 h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
344 my_qp->ipz_qp_handle,
345 &my_qp->pf, 0, 1,
346 rpage, 1,my_qp->galpas.kernel);
347 if (h_ret < H_SUCCESS) {
348 ehca_err(ib_dev, "RQ hipz_qp_register_rpage() failed "
349 "rc=%lx", h_ret);
350 ret = ehca2ib_return_code(h_ret);
351 goto init_qp_queues1;
352 }
353 if (cnt == (nr_rq_pages - 1)) { /* last page! */
354 if (h_ret != H_SUCCESS) {
355 ehca_err(ib_dev, "RQ hipz_qp_register_rpage() "
356 "h_ret= %lx ", h_ret); 331 "h_ret= %lx ", h_ret);
357 ret = ehca2ib_return_code(h_ret); 332 ret = ehca2ib_return_code(h_ret);
358 goto init_qp_queues1; 333 goto init_qp_queue1;
359 } 334 }
360 vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue); 335 vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue);
361 if (vpage) { 336 if (vpage) {
362 ehca_err(ib_dev, "ipz_qpageit_get_inc() " 337 ehca_err(ib_dev, "ipz_qpageit_get_inc() "
363 "should not succeed vpage=%p", vpage); 338 "should not succeed vpage=%p", vpage);
364 ret = -EINVAL; 339 ret = -EINVAL;
365 goto init_qp_queues1; 340 goto init_qp_queue1;
366 } 341 }
367 } else { 342 } else {
368 if (h_ret != H_PAGE_REGISTERED) { 343 if (h_ret != H_PAGE_REGISTERED) {
369 ehca_err(ib_dev, "RQ hipz_qp_register_rpage() " 344 ehca_err(ib_dev, "hipz_qp_register_rpage() "
370 "h_ret= %lx ", h_ret); 345 "h_ret= %lx ", h_ret);
371 ret = ehca2ib_return_code(h_ret); 346 ret = ehca2ib_return_code(h_ret);
372 goto init_qp_queues1; 347 goto init_qp_queue1;
373 } 348 }
374 } 349 }
375 } 350 }
376 351
377 ipz_qeit_reset(&my_qp->ipz_rqueue); 352 ipz_qeit_reset(queue);
378 353
379 return 0; 354 return 0;
380 355
381init_qp_queues1: 356init_qp_queue1:
382 ipz_queue_dtor(&my_qp->ipz_rqueue); 357 ipz_queue_dtor(queue);
383init_qp_queues0:
384 ipz_queue_dtor(&my_qp->ipz_squeue);
385 return ret; 358 return ret;
386} 359}
387 360
388struct ib_qp *ehca_create_qp(struct ib_pd *pd, 361/*
389 struct ib_qp_init_attr *init_attr, 362 * Create an ib_qp struct that is either a QP or an SRQ, depending on
390 struct ib_udata *udata) 363 * the value of the is_srq parameter. If init_attr and srq_init_attr share
364 * fields, the field out of init_attr is used.
365 */
366struct ehca_qp *internal_create_qp(struct ib_pd *pd,
367 struct ib_qp_init_attr *init_attr,
368 struct ib_srq_init_attr *srq_init_attr,
369 struct ib_udata *udata, int is_srq)
391{ 370{
392 static int da_rc_msg_size[]={ 128, 256, 512, 1024, 2048, 4096 };
393 static int da_ud_sq_msg_size[]={ 128, 384, 896, 1920, 3968 };
394 struct ehca_qp *my_qp; 371 struct ehca_qp *my_qp;
395 struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); 372 struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
396 struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, 373 struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
397 ib_device); 374 ib_device);
398 struct ib_ucontext *context = NULL; 375 struct ib_ucontext *context = NULL;
399 u64 h_ret; 376 u64 h_ret;
400 int max_send_sge, max_recv_sge, ret; 377 int is_llqp = 0, has_srq = 0;
378 int qp_type, max_send_sge, max_recv_sge, ret;
401 379
402 /* h_call's out parameters */ 380 /* h_call's out parameters */
403 struct ehca_alloc_qp_parms parms; 381 struct ehca_alloc_qp_parms parms;
404 u32 swqe_size = 0, rwqe_size = 0; 382 u32 swqe_size = 0, rwqe_size = 0, ib_qp_num;
405 u8 daqp_completion, isdaqp;
406 unsigned long flags; 383 unsigned long flags;
407 384
385 memset(&parms, 0, sizeof(parms));
386 qp_type = init_attr->qp_type;
387
408 if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR && 388 if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR &&
409 init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) { 389 init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) {
410 ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed", 390 ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed",
@@ -412,41 +392,98 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
412 return ERR_PTR(-EINVAL); 392 return ERR_PTR(-EINVAL);
413 } 393 }
414 394
415 /* save daqp completion bits */ 395 /* save LLQP info */
416 daqp_completion = init_attr->qp_type & 0x60; 396 if (qp_type & 0x80) {
417 /* save daqp bit */ 397 is_llqp = 1;
418 isdaqp = (init_attr->qp_type & 0x80) ? 1 : 0; 398 parms.ext_type = EQPT_LLQP;
419 init_attr->qp_type = init_attr->qp_type & 0x1F; 399 parms.ll_comp_flags = qp_type & LLQP_COMP_MASK;
400 }
401 qp_type &= 0x1F;
402 init_attr->qp_type &= 0x1F;
420 403
421 if (init_attr->qp_type != IB_QPT_UD && 404 /* handle SRQ base QPs */
422 init_attr->qp_type != IB_QPT_SMI && 405 if (init_attr->srq) {
423 init_attr->qp_type != IB_QPT_GSI && 406 struct ehca_qp *my_srq =
424 init_attr->qp_type != IB_QPT_UC && 407 container_of(init_attr->srq, struct ehca_qp, ib_srq);
425 init_attr->qp_type != IB_QPT_RC) { 408
426 ehca_err(pd->device, "wrong QP Type=%x", init_attr->qp_type); 409 has_srq = 1;
427 return ERR_PTR(-EINVAL); 410 parms.ext_type = EQPT_SRQBASE;
411 parms.srq_qpn = my_srq->real_qp_num;
412 parms.srq_token = my_srq->token;
428 } 413 }
429 if ((init_attr->qp_type != IB_QPT_RC && init_attr->qp_type != IB_QPT_UD) 414
430 && isdaqp) { 415 if (is_llqp && has_srq) {
431 ehca_err(pd->device, "unsupported LL QP Type=%x", 416 ehca_err(pd->device, "LLQPs can't have an SRQ");
432 init_attr->qp_type);
433 return ERR_PTR(-EINVAL); 417 return ERR_PTR(-EINVAL);
434 } else if (init_attr->qp_type == IB_QPT_RC && isdaqp && 418 }
435 (init_attr->cap.max_send_wr > 255 || 419
436 init_attr->cap.max_recv_wr > 255 )) { 420 /* handle SRQs */
437 ehca_err(pd->device, "Invalid Number of max_sq_wr =%x " 421 if (is_srq) {
438 "or max_rq_wr=%x for QP Type=%x", 422 parms.ext_type = EQPT_SRQ;
439 init_attr->cap.max_send_wr, 423 parms.srq_limit = srq_init_attr->attr.srq_limit;
440 init_attr->cap.max_recv_wr,init_attr->qp_type); 424 if (init_attr->cap.max_recv_sge > 3) {
441 return ERR_PTR(-EINVAL); 425 ehca_err(pd->device, "no more than three SGEs "
442 } else if (init_attr->qp_type == IB_QPT_UD && isdaqp && 426 "supported for SRQ pd=%p max_sge=%x",
443 init_attr->cap.max_send_wr > 255) { 427 pd, init_attr->cap.max_recv_sge);
444 ehca_err(pd->device, 428 return ERR_PTR(-EINVAL);
445 "Invalid Number of max_send_wr=%x for UD QP_TYPE=%x", 429 }
446 init_attr->cap.max_send_wr, init_attr->qp_type); 430 }
431
432 /* check QP type */
433 if (qp_type != IB_QPT_UD &&
434 qp_type != IB_QPT_UC &&
435 qp_type != IB_QPT_RC &&
436 qp_type != IB_QPT_SMI &&
437 qp_type != IB_QPT_GSI) {
438 ehca_err(pd->device, "wrong QP Type=%x", qp_type);
447 return ERR_PTR(-EINVAL); 439 return ERR_PTR(-EINVAL);
448 } 440 }
449 441
442 if (is_llqp) {
443 switch (qp_type) {
444 case IB_QPT_RC:
445 if ((init_attr->cap.max_send_wr > 255) ||
446 (init_attr->cap.max_recv_wr > 255)) {
447 ehca_err(pd->device,
448 "Invalid Number of max_sq_wr=%x "
449 "or max_rq_wr=%x for RC LLQP",
450 init_attr->cap.max_send_wr,
451 init_attr->cap.max_recv_wr);
452 return ERR_PTR(-EINVAL);
453 }
454 break;
455 case IB_QPT_UD:
456 if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) {
457 ehca_err(pd->device, "UD LLQP not supported "
458 "by this adapter");
459 return ERR_PTR(-ENOSYS);
460 }
461 if (!(init_attr->cap.max_send_sge <= 5
462 && init_attr->cap.max_send_sge >= 1
463 && init_attr->cap.max_recv_sge <= 5
464 && init_attr->cap.max_recv_sge >= 1)) {
465 ehca_err(pd->device,
466 "Invalid Number of max_send_sge=%x "
467 "or max_recv_sge=%x for UD LLQP",
468 init_attr->cap.max_send_sge,
469 init_attr->cap.max_recv_sge);
470 return ERR_PTR(-EINVAL);
471 } else if (init_attr->cap.max_send_wr > 255) {
472 ehca_err(pd->device,
473 "Invalid Number of "
474 "ax_send_wr=%x for UD QP_TYPE=%x",
475 init_attr->cap.max_send_wr, qp_type);
476 return ERR_PTR(-EINVAL);
477 }
478 break;
479 default:
480 ehca_err(pd->device, "unsupported LL QP Type=%x",
481 qp_type);
482 return ERR_PTR(-EINVAL);
483 break;
484 }
485 }
486
450 if (pd->uobject && udata) 487 if (pd->uobject && udata)
451 context = pd->uobject->context; 488 context = pd->uobject->context;
452 489
@@ -456,16 +493,17 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
456 return ERR_PTR(-ENOMEM); 493 return ERR_PTR(-ENOMEM);
457 } 494 }
458 495
459 memset (&parms, 0, sizeof(struct ehca_alloc_qp_parms));
460 spin_lock_init(&my_qp->spinlock_s); 496 spin_lock_init(&my_qp->spinlock_s);
461 spin_lock_init(&my_qp->spinlock_r); 497 spin_lock_init(&my_qp->spinlock_r);
498 my_qp->qp_type = qp_type;
499 my_qp->ext_type = parms.ext_type;
462 500
463 my_qp->recv_cq = 501 if (init_attr->recv_cq)
464 container_of(init_attr->recv_cq, struct ehca_cq, ib_cq); 502 my_qp->recv_cq =
465 my_qp->send_cq = 503 container_of(init_attr->recv_cq, struct ehca_cq, ib_cq);
466 container_of(init_attr->send_cq, struct ehca_cq, ib_cq); 504 if (init_attr->send_cq)
467 505 my_qp->send_cq =
468 my_qp->init_attr = *init_attr; 506 container_of(init_attr->send_cq, struct ehca_cq, ib_cq);
469 507
470 do { 508 do {
471 if (!idr_pre_get(&ehca_qp_idr, GFP_KERNEL)) { 509 if (!idr_pre_get(&ehca_qp_idr, GFP_KERNEL)) {
@@ -474,9 +512,9 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
474 goto create_qp_exit0; 512 goto create_qp_exit0;
475 } 513 }
476 514
477 spin_lock_irqsave(&ehca_qp_idr_lock, flags); 515 write_lock_irqsave(&ehca_qp_idr_lock, flags);
478 ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token); 516 ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token);
479 spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); 517 write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
480 518
481 } while (ret == -EAGAIN); 519 } while (ret == -EAGAIN);
482 520
@@ -486,10 +524,10 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
486 goto create_qp_exit0; 524 goto create_qp_exit0;
487 } 525 }
488 526
489 parms.servicetype = ibqptype2servicetype(init_attr->qp_type); 527 parms.servicetype = ibqptype2servicetype(qp_type);
490 if (parms.servicetype < 0) { 528 if (parms.servicetype < 0) {
491 ret = -EINVAL; 529 ret = -EINVAL;
492 ehca_err(pd->device, "Invalid qp_type=%x", init_attr->qp_type); 530 ehca_err(pd->device, "Invalid qp_type=%x", qp_type);
493 goto create_qp_exit0; 531 goto create_qp_exit0;
494 } 532 }
495 533
@@ -501,21 +539,25 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
501 /* UD_AV CIRCUMVENTION */ 539 /* UD_AV CIRCUMVENTION */
502 max_send_sge = init_attr->cap.max_send_sge; 540 max_send_sge = init_attr->cap.max_send_sge;
503 max_recv_sge = init_attr->cap.max_recv_sge; 541 max_recv_sge = init_attr->cap.max_recv_sge;
504 if (IB_QPT_UD == init_attr->qp_type || 542 if (parms.servicetype == ST_UD && !is_llqp) {
505 IB_QPT_GSI == init_attr->qp_type ||
506 IB_QPT_SMI == init_attr->qp_type) {
507 max_send_sge += 2; 543 max_send_sge += 2;
508 max_recv_sge += 2; 544 max_recv_sge += 2;
509 } 545 }
510 546
511 parms.ipz_eq_handle = shca->eq.ipz_eq_handle; 547 parms.token = my_qp->token;
512 parms.daqp_ctrl = isdaqp | daqp_completion; 548 parms.eq_handle = shca->eq.ipz_eq_handle;
513 parms.pd = my_pd->fw_pd; 549 parms.pd = my_pd->fw_pd;
514 parms.max_recv_sge = max_recv_sge; 550 if (my_qp->send_cq)
515 parms.max_send_sge = max_send_sge; 551 parms.send_cq_handle = my_qp->send_cq->ipz_cq_handle;
552 if (my_qp->recv_cq)
553 parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle;
516 554
517 h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, my_qp, &parms); 555 parms.max_send_wr = init_attr->cap.max_send_wr;
556 parms.max_recv_wr = init_attr->cap.max_recv_wr;
557 parms.max_send_sge = max_send_sge;
558 parms.max_recv_sge = max_recv_sge;
518 559
560 h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms);
519 if (h_ret != H_SUCCESS) { 561 if (h_ret != H_SUCCESS) {
520 ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lx", 562 ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lx",
521 h_ret); 563 h_ret);
@@ -523,18 +565,20 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
523 goto create_qp_exit1; 565 goto create_qp_exit1;
524 } 566 }
525 567
526 my_qp->ib_qp.qp_num = my_qp->real_qp_num; 568 ib_qp_num = my_qp->real_qp_num = parms.real_qp_num;
569 my_qp->ipz_qp_handle = parms.qp_handle;
570 my_qp->galpas = parms.galpas;
527 571
528 switch (init_attr->qp_type) { 572 switch (qp_type) {
529 case IB_QPT_RC: 573 case IB_QPT_RC:
530 if (isdaqp == 0) { 574 if (!is_llqp) {
531 swqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[ 575 swqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[
532 (parms.act_nr_send_sges)]); 576 (parms.act_nr_send_sges)]);
533 rwqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[ 577 rwqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[
534 (parms.act_nr_recv_sges)]); 578 (parms.act_nr_recv_sges)]);
535 } else { /* for daqp we need to use msg size, not wqe size */ 579 } else { /* for LLQP we need to use msg size, not wqe size */
536 swqe_size = da_rc_msg_size[max_send_sge]; 580 swqe_size = ll_qp_msg_size(max_send_sge);
537 rwqe_size = da_rc_msg_size[max_recv_sge]; 581 rwqe_size = ll_qp_msg_size(max_recv_sge);
538 parms.act_nr_send_sges = 1; 582 parms.act_nr_send_sges = 1;
539 parms.act_nr_recv_sges = 1; 583 parms.act_nr_recv_sges = 1;
540 } 584 }
@@ -549,29 +593,27 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
549 case IB_QPT_UD: 593 case IB_QPT_UD:
550 case IB_QPT_GSI: 594 case IB_QPT_GSI:
551 case IB_QPT_SMI: 595 case IB_QPT_SMI:
552 /* UD circumvention */ 596 if (is_llqp) {
553 parms.act_nr_recv_sges -= 2; 597 swqe_size = ll_qp_msg_size(parms.act_nr_send_sges);
554 parms.act_nr_send_sges -= 2; 598 rwqe_size = ll_qp_msg_size(parms.act_nr_recv_sges);
555 if (isdaqp) {
556 swqe_size = da_ud_sq_msg_size[max_send_sge];
557 rwqe_size = da_rc_msg_size[max_recv_sge];
558 parms.act_nr_send_sges = 1; 599 parms.act_nr_send_sges = 1;
559 parms.act_nr_recv_sges = 1; 600 parms.act_nr_recv_sges = 1;
560 } else { 601 } else {
602 /* UD circumvention */
603 parms.act_nr_send_sges -= 2;
604 parms.act_nr_recv_sges -= 2;
561 swqe_size = offsetof(struct ehca_wqe, 605 swqe_size = offsetof(struct ehca_wqe,
562 u.ud_av.sg_list[parms.act_nr_send_sges]); 606 u.ud_av.sg_list[parms.act_nr_send_sges]);
563 rwqe_size = offsetof(struct ehca_wqe, 607 rwqe_size = offsetof(struct ehca_wqe,
564 u.ud_av.sg_list[parms.act_nr_recv_sges]); 608 u.ud_av.sg_list[parms.act_nr_recv_sges]);
565 } 609 }
566 610
567 if (IB_QPT_GSI == init_attr->qp_type || 611 if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) {
568 IB_QPT_SMI == init_attr->qp_type) {
569 parms.act_nr_send_wqes = init_attr->cap.max_send_wr; 612 parms.act_nr_send_wqes = init_attr->cap.max_send_wr;
570 parms.act_nr_recv_wqes = init_attr->cap.max_recv_wr; 613 parms.act_nr_recv_wqes = init_attr->cap.max_recv_wr;
571 parms.act_nr_send_sges = init_attr->cap.max_send_sge; 614 parms.act_nr_send_sges = init_attr->cap.max_send_sge;
572 parms.act_nr_recv_sges = init_attr->cap.max_recv_sge; 615 parms.act_nr_recv_sges = init_attr->cap.max_recv_sge;
573 my_qp->ib_qp.qp_num = 616 ib_qp_num = (qp_type == IB_QPT_SMI) ? 0 : 1;
574 (init_attr->qp_type == IB_QPT_SMI) ? 0 : 1;
575 } 617 }
576 618
577 break; 619 break;
@@ -580,108 +622,234 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
580 break; 622 break;
581 } 623 }
582 624
583 /* initializes r/squeue and registers queue pages */ 625 /* initialize r/squeue and register queue pages */
584 ret = init_qp_queues(shca, my_qp, 626 if (HAS_SQ(my_qp)) {
585 parms.nr_sq_pages, parms.nr_rq_pages, 627 ret = init_qp_queue(
586 swqe_size, rwqe_size, 628 shca, my_qp, &my_qp->ipz_squeue, 0,
587 parms.act_nr_send_sges, parms.act_nr_recv_sges); 629 HAS_RQ(my_qp) ? H_PAGE_REGISTERED : H_SUCCESS,
588 if (ret) { 630 parms.nr_sq_pages, swqe_size,
589 ehca_err(pd->device, 631 parms.act_nr_send_sges);
590 "Couldn't initialize r/squeue and pages ret=%x", ret); 632 if (ret) {
591 goto create_qp_exit2; 633 ehca_err(pd->device, "Couldn't initialize squeue "
634 "and pages ret=%x", ret);
635 goto create_qp_exit2;
636 }
592 } 637 }
593 638
594 my_qp->ib_qp.pd = &my_pd->ib_pd; 639 if (HAS_RQ(my_qp)) {
595 my_qp->ib_qp.device = my_pd->ib_pd.device; 640 ret = init_qp_queue(
641 shca, my_qp, &my_qp->ipz_rqueue, 1,
642 H_SUCCESS, parms.nr_rq_pages, rwqe_size,
643 parms.act_nr_recv_sges);
644 if (ret) {
645 ehca_err(pd->device, "Couldn't initialize rqueue "
646 "and pages ret=%x", ret);
647 goto create_qp_exit3;
648 }
649 }
596 650
597 my_qp->ib_qp.recv_cq = init_attr->recv_cq; 651 if (is_srq) {
598 my_qp->ib_qp.send_cq = init_attr->send_cq; 652 my_qp->ib_srq.pd = &my_pd->ib_pd;
653 my_qp->ib_srq.device = my_pd->ib_pd.device;
599 654
600 my_qp->ib_qp.qp_type = init_attr->qp_type; 655 my_qp->ib_srq.srq_context = init_attr->qp_context;
656 my_qp->ib_srq.event_handler = init_attr->event_handler;
657 } else {
658 my_qp->ib_qp.qp_num = ib_qp_num;
659 my_qp->ib_qp.pd = &my_pd->ib_pd;
660 my_qp->ib_qp.device = my_pd->ib_pd.device;
661
662 my_qp->ib_qp.recv_cq = init_attr->recv_cq;
663 my_qp->ib_qp.send_cq = init_attr->send_cq;
601 664
602 my_qp->qp_type = init_attr->qp_type; 665 my_qp->ib_qp.qp_type = qp_type;
603 my_qp->ib_qp.srq = init_attr->srq; 666 my_qp->ib_qp.srq = init_attr->srq;
604 667
605 my_qp->ib_qp.qp_context = init_attr->qp_context; 668 my_qp->ib_qp.qp_context = init_attr->qp_context;
606 my_qp->ib_qp.event_handler = init_attr->event_handler; 669 my_qp->ib_qp.event_handler = init_attr->event_handler;
670 }
607 671
608 init_attr->cap.max_inline_data = 0; /* not supported yet */ 672 init_attr->cap.max_inline_data = 0; /* not supported yet */
609 init_attr->cap.max_recv_sge = parms.act_nr_recv_sges; 673 init_attr->cap.max_recv_sge = parms.act_nr_recv_sges;
610 init_attr->cap.max_recv_wr = parms.act_nr_recv_wqes; 674 init_attr->cap.max_recv_wr = parms.act_nr_recv_wqes;
611 init_attr->cap.max_send_sge = parms.act_nr_send_sges; 675 init_attr->cap.max_send_sge = parms.act_nr_send_sges;
612 init_attr->cap.max_send_wr = parms.act_nr_send_wqes; 676 init_attr->cap.max_send_wr = parms.act_nr_send_wqes;
677 my_qp->init_attr = *init_attr;
613 678
614 /* NOTE: define_apq0() not supported yet */ 679 /* NOTE: define_apq0() not supported yet */
615 if (init_attr->qp_type == IB_QPT_GSI) { 680 if (qp_type == IB_QPT_GSI) {
616 h_ret = ehca_define_sqp(shca, my_qp, init_attr); 681 h_ret = ehca_define_sqp(shca, my_qp, init_attr);
617 if (h_ret != H_SUCCESS) { 682 if (h_ret != H_SUCCESS) {
618 ehca_err(pd->device, "ehca_define_sqp() failed rc=%lx", 683 ehca_err(pd->device, "ehca_define_sqp() failed rc=%lx",
619 h_ret); 684 h_ret);
620 ret = ehca2ib_return_code(h_ret); 685 ret = ehca2ib_return_code(h_ret);
621 goto create_qp_exit3; 686 goto create_qp_exit4;
622 } 687 }
623 } 688 }
624 if (init_attr->send_cq) { 689
625 struct ehca_cq *cq = container_of(init_attr->send_cq, 690 if (my_qp->send_cq) {
626 struct ehca_cq, ib_cq); 691 ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp);
627 ret = ehca_cq_assign_qp(cq, my_qp);
628 if (ret) { 692 if (ret) {
629 ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%x", 693 ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%x",
630 ret); 694 ret);
631 goto create_qp_exit3; 695 goto create_qp_exit4;
632 } 696 }
633 my_qp->send_cq = cq;
634 } 697 }
698
635 /* copy queues, galpa data to user space */ 699 /* copy queues, galpa data to user space */
636 if (context && udata) { 700 if (context && udata) {
637 struct ipz_queue *ipz_rqueue = &my_qp->ipz_rqueue;
638 struct ipz_queue *ipz_squeue = &my_qp->ipz_squeue;
639 struct ehca_create_qp_resp resp; 701 struct ehca_create_qp_resp resp;
640 memset(&resp, 0, sizeof(resp)); 702 memset(&resp, 0, sizeof(resp));
641 703
642 resp.qp_num = my_qp->real_qp_num; 704 resp.qp_num = my_qp->real_qp_num;
643 resp.token = my_qp->token; 705 resp.token = my_qp->token;
644 resp.qp_type = my_qp->qp_type; 706 resp.qp_type = my_qp->qp_type;
707 resp.ext_type = my_qp->ext_type;
645 resp.qkey = my_qp->qkey; 708 resp.qkey = my_qp->qkey;
646 resp.real_qp_num = my_qp->real_qp_num; 709 resp.real_qp_num = my_qp->real_qp_num;
647 /* rqueue properties */ 710 if (HAS_SQ(my_qp))
648 resp.ipz_rqueue.qe_size = ipz_rqueue->qe_size; 711 queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue);
649 resp.ipz_rqueue.act_nr_of_sg = ipz_rqueue->act_nr_of_sg; 712 if (HAS_RQ(my_qp))
650 resp.ipz_rqueue.queue_length = ipz_rqueue->queue_length; 713 queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue);
651 resp.ipz_rqueue.pagesize = ipz_rqueue->pagesize; 714
652 resp.ipz_rqueue.toggle_state = ipz_rqueue->toggle_state;
653 /* squeue properties */
654 resp.ipz_squeue.qe_size = ipz_squeue->qe_size;
655 resp.ipz_squeue.act_nr_of_sg = ipz_squeue->act_nr_of_sg;
656 resp.ipz_squeue.queue_length = ipz_squeue->queue_length;
657 resp.ipz_squeue.pagesize = ipz_squeue->pagesize;
658 resp.ipz_squeue.toggle_state = ipz_squeue->toggle_state;
659 if (ib_copy_to_udata(udata, &resp, sizeof resp)) { 715 if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
660 ehca_err(pd->device, "Copy to udata failed"); 716 ehca_err(pd->device, "Copy to udata failed");
661 ret = -EINVAL; 717 ret = -EINVAL;
662 goto create_qp_exit3; 718 goto create_qp_exit4;
663 } 719 }
664 } 720 }
665 721
666 return &my_qp->ib_qp; 722 return my_qp;
723
724create_qp_exit4:
725 if (HAS_RQ(my_qp))
726 ipz_queue_dtor(&my_qp->ipz_rqueue);
667 727
668create_qp_exit3: 728create_qp_exit3:
669 ipz_queue_dtor(&my_qp->ipz_rqueue); 729 if (HAS_SQ(my_qp))
670 ipz_queue_dtor(&my_qp->ipz_squeue); 730 ipz_queue_dtor(&my_qp->ipz_squeue);
671 731
672create_qp_exit2: 732create_qp_exit2:
673 hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); 733 hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
674 734
675create_qp_exit1: 735create_qp_exit1:
676 spin_lock_irqsave(&ehca_qp_idr_lock, flags); 736 write_lock_irqsave(&ehca_qp_idr_lock, flags);
677 idr_remove(&ehca_qp_idr, my_qp->token); 737 idr_remove(&ehca_qp_idr, my_qp->token);
678 spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); 738 write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
679 739
680create_qp_exit0: 740create_qp_exit0:
681 kmem_cache_free(qp_cache, my_qp); 741 kmem_cache_free(qp_cache, my_qp);
682 return ERR_PTR(ret); 742 return ERR_PTR(ret);
683} 743}
684 744
745struct ib_qp *ehca_create_qp(struct ib_pd *pd,
746 struct ib_qp_init_attr *qp_init_attr,
747 struct ib_udata *udata)
748{
749 struct ehca_qp *ret;
750
751 ret = internal_create_qp(pd, qp_init_attr, NULL, udata, 0);
752 return IS_ERR(ret) ? (struct ib_qp *) ret : &ret->ib_qp;
753}
754
755int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
756 struct ib_uobject *uobject);
757
758struct ib_srq *ehca_create_srq(struct ib_pd *pd,
759 struct ib_srq_init_attr *srq_init_attr,
760 struct ib_udata *udata)
761{
762 struct ib_qp_init_attr qp_init_attr;
763 struct ehca_qp *my_qp;
764 struct ib_srq *ret;
765 struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
766 ib_device);
767 struct hcp_modify_qp_control_block *mqpcb;
768 u64 hret, update_mask;
769
770 /* For common attributes, internal_create_qp() takes its info
771 * out of qp_init_attr, so copy all common attrs there.
772 */
773 memset(&qp_init_attr, 0, sizeof(qp_init_attr));
774 qp_init_attr.event_handler = srq_init_attr->event_handler;
775 qp_init_attr.qp_context = srq_init_attr->srq_context;
776 qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
777 qp_init_attr.qp_type = IB_QPT_RC;
778 qp_init_attr.cap.max_recv_wr = srq_init_attr->attr.max_wr;
779 qp_init_attr.cap.max_recv_sge = srq_init_attr->attr.max_sge;
780
781 my_qp = internal_create_qp(pd, &qp_init_attr, srq_init_attr, udata, 1);
782 if (IS_ERR(my_qp))
783 return (struct ib_srq *) my_qp;
784
785 /* copy back return values */
786 srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr;
787 srq_init_attr->attr.max_sge = qp_init_attr.cap.max_recv_sge;
788
789 /* drive SRQ into RTR state */
790 mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
791 if (!mqpcb) {
792 ehca_err(pd->device, "Could not get zeroed page for mqpcb "
793 "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num);
794 ret = ERR_PTR(-ENOMEM);
795 goto create_srq1;
796 }
797
798 mqpcb->qp_state = EHCA_QPS_INIT;
799 mqpcb->prim_phys_port = 1;
800 update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
801 hret = hipz_h_modify_qp(shca->ipz_hca_handle,
802 my_qp->ipz_qp_handle,
803 &my_qp->pf,
804 update_mask,
805 mqpcb, my_qp->galpas.kernel);
806 if (hret != H_SUCCESS) {
807 ehca_err(pd->device, "Could not modify SRQ to INIT"
808 "ehca_qp=%p qp_num=%x hret=%lx",
809 my_qp, my_qp->real_qp_num, hret);
810 goto create_srq2;
811 }
812
813 mqpcb->qp_enable = 1;
814 update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1);
815 hret = hipz_h_modify_qp(shca->ipz_hca_handle,
816 my_qp->ipz_qp_handle,
817 &my_qp->pf,
818 update_mask,
819 mqpcb, my_qp->galpas.kernel);
820 if (hret != H_SUCCESS) {
821 ehca_err(pd->device, "Could not enable SRQ"
822 "ehca_qp=%p qp_num=%x hret=%lx",
823 my_qp, my_qp->real_qp_num, hret);
824 goto create_srq2;
825 }
826
827 mqpcb->qp_state = EHCA_QPS_RTR;
828 update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
829 hret = hipz_h_modify_qp(shca->ipz_hca_handle,
830 my_qp->ipz_qp_handle,
831 &my_qp->pf,
832 update_mask,
833 mqpcb, my_qp->galpas.kernel);
834 if (hret != H_SUCCESS) {
835 ehca_err(pd->device, "Could not modify SRQ to RTR"
836 "ehca_qp=%p qp_num=%x hret=%lx",
837 my_qp, my_qp->real_qp_num, hret);
838 goto create_srq2;
839 }
840
841 return &my_qp->ib_srq;
842
843create_srq2:
844 ret = ERR_PTR(ehca2ib_return_code(hret));
845 ehca_free_fw_ctrlblock(mqpcb);
846
847create_srq1:
848 internal_destroy_qp(pd->device, my_qp, my_qp->ib_srq.uobject);
849
850 return ret;
851}
852
685/* 853/*
686 * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts 854 * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts
687 * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe 855 * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe
@@ -765,7 +933,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
765 u64 h_ret; 933 u64 h_ret;
766 int bad_wqe_cnt = 0; 934 int bad_wqe_cnt = 0;
767 int squeue_locked = 0; 935 int squeue_locked = 0;
768 unsigned long spl_flags = 0; 936 unsigned long flags = 0;
769 937
770 /* do query_qp to obtain current attr values */ 938 /* do query_qp to obtain current attr values */
771 mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); 939 mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
@@ -886,6 +1054,17 @@ static int internal_modify_qp(struct ib_qp *ibqp,
886 "ehca_qp=%p qp_num=%x <VALID STATE CHANGE> qp_state_xsit=%x", 1054 "ehca_qp=%p qp_num=%x <VALID STATE CHANGE> qp_state_xsit=%x",
887 my_qp, ibqp->qp_num, statetrans); 1055 my_qp, ibqp->qp_num, statetrans);
888 1056
1057 /* eHCA2 rev2 and higher require the SEND_GRH_FLAG to be set
1058 * in non-LL UD QPs.
1059 */
1060 if ((my_qp->qp_type == IB_QPT_UD) &&
1061 (my_qp->ext_type != EQPT_LLQP) &&
1062 (statetrans == IB_QPST_INIT2RTR) &&
1063 (shca->hw_level >= 0x22)) {
1064 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1);
1065 mqpcb->send_grh_flag = 1;
1066 }
1067
889 /* sqe -> rts: set purge bit of bad wqe before actual trans */ 1068 /* sqe -> rts: set purge bit of bad wqe before actual trans */
890 if ((my_qp->qp_type == IB_QPT_UD || 1069 if ((my_qp->qp_type == IB_QPT_UD ||
891 my_qp->qp_type == IB_QPT_GSI || 1070 my_qp->qp_type == IB_QPT_GSI ||
@@ -895,7 +1074,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
895 if (!ibqp->uobject) { 1074 if (!ibqp->uobject) {
896 struct ehca_wqe *wqe; 1075 struct ehca_wqe *wqe;
897 /* lock send queue */ 1076 /* lock send queue */
898 spin_lock_irqsave(&my_qp->spinlock_s, spl_flags); 1077 spin_lock_irqsave(&my_qp->spinlock_s, flags);
899 squeue_locked = 1; 1078 squeue_locked = 1;
900 /* mark next free wqe */ 1079 /* mark next free wqe */
901 wqe = (struct ehca_wqe*) 1080 wqe = (struct ehca_wqe*)
@@ -1181,7 +1360,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
1181 1360
1182modify_qp_exit2: 1361modify_qp_exit2:
1183 if (squeue_locked) { /* this means: sqe -> rts */ 1362 if (squeue_locked) { /* this means: sqe -> rts */
1184 spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags); 1363 spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
1185 my_qp->sqerr_purgeflag = 1; 1364 my_qp->sqerr_purgeflag = 1;
1186 } 1365 }
1187 1366
@@ -1312,6 +1491,9 @@ int ehca_query_qp(struct ib_qp *qp,
1312 qp_attr->alt_port_num = qpcb->alt_phys_port; 1491 qp_attr->alt_port_num = qpcb->alt_phys_port;
1313 qp_attr->alt_timeout = qpcb->timeout_al; 1492 qp_attr->alt_timeout = qpcb->timeout_al;
1314 1493
1494 qp_attr->max_dest_rd_atomic = qpcb->rdma_nr_atomic_resp_res;
1495 qp_attr->max_rd_atomic = qpcb->rdma_atomic_outst_dest_qp;
1496
1315 /* primary av */ 1497 /* primary av */
1316 qp_attr->ah_attr.sl = qpcb->service_level; 1498 qp_attr->ah_attr.sl = qpcb->service_level;
1317 1499
@@ -1367,53 +1549,170 @@ query_qp_exit1:
1367 return ret; 1549 return ret;
1368} 1550}
1369 1551
1370int ehca_destroy_qp(struct ib_qp *ibqp) 1552int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
1553 enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
1371{ 1554{
1372 struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); 1555 struct ehca_qp *my_qp =
1373 struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, 1556 container_of(ibsrq, struct ehca_qp, ib_srq);
1557 struct ehca_pd *my_pd =
1558 container_of(ibsrq->pd, struct ehca_pd, ib_pd);
1559 struct ehca_shca *shca =
1560 container_of(ibsrq->pd->device, struct ehca_shca, ib_device);
1561 struct hcp_modify_qp_control_block *mqpcb;
1562 u64 update_mask;
1563 u64 h_ret;
1564 int ret = 0;
1565
1566 u32 cur_pid = current->tgid;
1567 if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
1568 my_pd->ownpid != cur_pid) {
1569 ehca_err(ibsrq->pd->device, "Invalid caller pid=%x ownpid=%x",
1570 cur_pid, my_pd->ownpid);
1571 return -EINVAL;
1572 }
1573
1574 mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
1575 if (!mqpcb) {
1576 ehca_err(ibsrq->device, "Could not get zeroed page for mqpcb "
1577 "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num);
1578 return -ENOMEM;
1579 }
1580
1581 update_mask = 0;
1582 if (attr_mask & IB_SRQ_LIMIT) {
1583 attr_mask &= ~IB_SRQ_LIMIT;
1584 update_mask |=
1585 EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1)
1586 | EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1);
1587 mqpcb->curr_srq_limit =
1588 EHCA_BMASK_SET(MQPCB_CURR_SRQ_LIMIT, attr->srq_limit);
1589 mqpcb->qp_aff_asyn_ev_log_reg =
1590 EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1);
1591 }
1592
1593 /* by now, all bits in attr_mask should have been cleared */
1594 if (attr_mask) {
1595 ehca_err(ibsrq->device, "invalid attribute mask bits set "
1596 "attr_mask=%x", attr_mask);
1597 ret = -EINVAL;
1598 goto modify_srq_exit0;
1599 }
1600
1601 if (ehca_debug_level)
1602 ehca_dmp(mqpcb, 4*70, "qp_num=%x", my_qp->real_qp_num);
1603
1604 h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, my_qp->ipz_qp_handle,
1605 NULL, update_mask, mqpcb,
1606 my_qp->galpas.kernel);
1607
1608 if (h_ret != H_SUCCESS) {
1609 ret = ehca2ib_return_code(h_ret);
1610 ehca_err(ibsrq->device, "hipz_h_modify_qp() failed rc=%lx "
1611 "ehca_qp=%p qp_num=%x",
1612 h_ret, my_qp, my_qp->real_qp_num);
1613 }
1614
1615modify_srq_exit0:
1616 ehca_free_fw_ctrlblock(mqpcb);
1617
1618 return ret;
1619}
1620
1621int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr)
1622{
1623 struct ehca_qp *my_qp = container_of(srq, struct ehca_qp, ib_srq);
1624 struct ehca_pd *my_pd = container_of(srq->pd, struct ehca_pd, ib_pd);
1625 struct ehca_shca *shca = container_of(srq->device, struct ehca_shca,
1374 ib_device); 1626 ib_device);
1627 struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
1628 struct hcp_modify_qp_control_block *qpcb;
1629 u32 cur_pid = current->tgid;
1630 int ret = 0;
1631 u64 h_ret;
1632
1633 if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
1634 my_pd->ownpid != cur_pid) {
1635 ehca_err(srq->device, "Invalid caller pid=%x ownpid=%x",
1636 cur_pid, my_pd->ownpid);
1637 return -EINVAL;
1638 }
1639
1640 qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
1641 if (!qpcb) {
1642 ehca_err(srq->device, "Out of memory for qpcb "
1643 "ehca_qp=%p qp_num=%x", my_qp, my_qp->real_qp_num);
1644 return -ENOMEM;
1645 }
1646
1647 h_ret = hipz_h_query_qp(adapter_handle, my_qp->ipz_qp_handle,
1648 NULL, qpcb, my_qp->galpas.kernel);
1649
1650 if (h_ret != H_SUCCESS) {
1651 ret = ehca2ib_return_code(h_ret);
1652 ehca_err(srq->device, "hipz_h_query_qp() failed "
1653 "ehca_qp=%p qp_num=%x h_ret=%lx",
1654 my_qp, my_qp->real_qp_num, h_ret);
1655 goto query_srq_exit1;
1656 }
1657
1658 srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1;
1659 srq_attr->srq_limit = EHCA_BMASK_GET(
1660 MQPCB_CURR_SRQ_LIMIT, qpcb->curr_srq_limit);
1661
1662 if (ehca_debug_level)
1663 ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num);
1664
1665query_srq_exit1:
1666 ehca_free_fw_ctrlblock(qpcb);
1667
1668 return ret;
1669}
1670
1671int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
1672 struct ib_uobject *uobject)
1673{
1674 struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device);
1375 struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, 1675 struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
1376 ib_pd); 1676 ib_pd);
1377 u32 cur_pid = current->tgid; 1677 u32 cur_pid = current->tgid;
1378 u32 qp_num = ibqp->qp_num; 1678 u32 qp_num = my_qp->real_qp_num;
1379 int ret; 1679 int ret;
1380 u64 h_ret; 1680 u64 h_ret;
1381 u8 port_num; 1681 u8 port_num;
1382 enum ib_qp_type qp_type; 1682 enum ib_qp_type qp_type;
1383 unsigned long flags; 1683 unsigned long flags;
1384 1684
1385 if (ibqp->uobject) { 1685 if (uobject) {
1386 if (my_qp->mm_count_galpa || 1686 if (my_qp->mm_count_galpa ||
1387 my_qp->mm_count_rqueue || my_qp->mm_count_squeue) { 1687 my_qp->mm_count_rqueue || my_qp->mm_count_squeue) {
1388 ehca_err(ibqp->device, "Resources still referenced in " 1688 ehca_err(dev, "Resources still referenced in "
1389 "user space qp_num=%x", ibqp->qp_num); 1689 "user space qp_num=%x", qp_num);
1390 return -EINVAL; 1690 return -EINVAL;
1391 } 1691 }
1392 if (my_pd->ownpid != cur_pid) { 1692 if (my_pd->ownpid != cur_pid) {
1393 ehca_err(ibqp->device, "Invalid caller pid=%x ownpid=%x", 1693 ehca_err(dev, "Invalid caller pid=%x ownpid=%x",
1394 cur_pid, my_pd->ownpid); 1694 cur_pid, my_pd->ownpid);
1395 return -EINVAL; 1695 return -EINVAL;
1396 } 1696 }
1397 } 1697 }
1398 1698
1399 if (my_qp->send_cq) { 1699 if (my_qp->send_cq) {
1400 ret = ehca_cq_unassign_qp(my_qp->send_cq, 1700 ret = ehca_cq_unassign_qp(my_qp->send_cq, qp_num);
1401 my_qp->real_qp_num);
1402 if (ret) { 1701 if (ret) {
1403 ehca_err(ibqp->device, "Couldn't unassign qp from " 1702 ehca_err(dev, "Couldn't unassign qp from "
1404 "send_cq ret=%x qp_num=%x cq_num=%x", ret, 1703 "send_cq ret=%x qp_num=%x cq_num=%x", ret,
1405 my_qp->ib_qp.qp_num, my_qp->send_cq->cq_number); 1704 qp_num, my_qp->send_cq->cq_number);
1406 return ret; 1705 return ret;
1407 } 1706 }
1408 } 1707 }
1409 1708
1410 spin_lock_irqsave(&ehca_qp_idr_lock, flags); 1709 write_lock_irqsave(&ehca_qp_idr_lock, flags);
1411 idr_remove(&ehca_qp_idr, my_qp->token); 1710 idr_remove(&ehca_qp_idr, my_qp->token);
1412 spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); 1711 write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
1413 1712
1414 h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); 1713 h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
1415 if (h_ret != H_SUCCESS) { 1714 if (h_ret != H_SUCCESS) {
1416 ehca_err(ibqp->device, "hipz_h_destroy_qp() failed rc=%lx " 1715 ehca_err(dev, "hipz_h_destroy_qp() failed rc=%lx "
1417 "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num); 1716 "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num);
1418 return ehca2ib_return_code(h_ret); 1717 return ehca2ib_return_code(h_ret);
1419 } 1718 }
@@ -1424,7 +1723,7 @@ int ehca_destroy_qp(struct ib_qp *ibqp)
1424 /* no support for IB_QPT_SMI yet */ 1723 /* no support for IB_QPT_SMI yet */
1425 if (qp_type == IB_QPT_GSI) { 1724 if (qp_type == IB_QPT_GSI) {
1426 struct ib_event event; 1725 struct ib_event event;
1427 ehca_info(ibqp->device, "device %s: port %x is inactive.", 1726 ehca_info(dev, "device %s: port %x is inactive.",
1428 shca->ib_device.name, port_num); 1727 shca->ib_device.name, port_num);
1429 event.device = &shca->ib_device; 1728 event.device = &shca->ib_device;
1430 event.event = IB_EVENT_PORT_ERR; 1729 event.event = IB_EVENT_PORT_ERR;
@@ -1433,12 +1732,28 @@ int ehca_destroy_qp(struct ib_qp *ibqp)
1433 ib_dispatch_event(&event); 1732 ib_dispatch_event(&event);
1434 } 1733 }
1435 1734
1436 ipz_queue_dtor(&my_qp->ipz_rqueue); 1735 if (HAS_RQ(my_qp))
1437 ipz_queue_dtor(&my_qp->ipz_squeue); 1736 ipz_queue_dtor(&my_qp->ipz_rqueue);
1737 if (HAS_SQ(my_qp))
1738 ipz_queue_dtor(&my_qp->ipz_squeue);
1438 kmem_cache_free(qp_cache, my_qp); 1739 kmem_cache_free(qp_cache, my_qp);
1439 return 0; 1740 return 0;
1440} 1741}
1441 1742
1743int ehca_destroy_qp(struct ib_qp *qp)
1744{
1745 return internal_destroy_qp(qp->device,
1746 container_of(qp, struct ehca_qp, ib_qp),
1747 qp->uobject);
1748}
1749
1750int ehca_destroy_srq(struct ib_srq *srq)
1751{
1752 return internal_destroy_qp(srq->device,
1753 container_of(srq, struct ehca_qp, ib_srq),
1754 srq->uobject);
1755}
1756
1442int ehca_init_qp_cache(void) 1757int ehca_init_qp_cache(void)
1443{ 1758{
1444 qp_cache = kmem_cache_create("ehca_cache_qp", 1759 qp_cache = kmem_cache_create("ehca_cache_qp",
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index caec9dee09e1..61da65e6e5e0 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -3,8 +3,9 @@
3 * 3 *
4 * post_send/recv, poll_cq, req_notify 4 * post_send/recv, poll_cq, req_notify
5 * 5 *
6 * Authors: Waleri Fomin <fomin@de.ibm.com> 6 * Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
7 * Hoang-Nam Nguyen <hnguyen@de.ibm.com> 7 * Waleri Fomin <fomin@de.ibm.com>
8 * Joachim Fenkes <fenkes@de.ibm.com>
8 * Reinhard Ernst <rernst@de.ibm.com> 9 * Reinhard Ernst <rernst@de.ibm.com>
9 * 10 *
10 * Copyright (c) 2005 IBM Corporation 11 * Copyright (c) 2005 IBM Corporation
@@ -362,10 +363,10 @@ int ehca_post_send(struct ib_qp *qp,
362 struct ehca_wqe *wqe_p; 363 struct ehca_wqe *wqe_p;
363 int wqe_cnt = 0; 364 int wqe_cnt = 0;
364 int ret = 0; 365 int ret = 0;
365 unsigned long spl_flags; 366 unsigned long flags;
366 367
367 /* LOCK the QUEUE */ 368 /* LOCK the QUEUE */
368 spin_lock_irqsave(&my_qp->spinlock_s, spl_flags); 369 spin_lock_irqsave(&my_qp->spinlock_s, flags);
369 370
370 /* loop processes list of send reqs */ 371 /* loop processes list of send reqs */
371 for (cur_send_wr = send_wr; cur_send_wr != NULL; 372 for (cur_send_wr = send_wr; cur_send_wr != NULL;
@@ -406,26 +407,31 @@ int ehca_post_send(struct ib_qp *qp,
406 } /* eof for cur_send_wr */ 407 } /* eof for cur_send_wr */
407 408
408post_send_exit0: 409post_send_exit0:
409 /* UNLOCK the QUEUE */
410 spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags);
411 iosync(); /* serialize GAL register access */ 410 iosync(); /* serialize GAL register access */
412 hipz_update_sqa(my_qp, wqe_cnt); 411 hipz_update_sqa(my_qp, wqe_cnt);
412 spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
413 return ret; 413 return ret;
414} 414}
415 415
416int ehca_post_recv(struct ib_qp *qp, 416static int internal_post_recv(struct ehca_qp *my_qp,
417 struct ib_recv_wr *recv_wr, 417 struct ib_device *dev,
418 struct ib_recv_wr **bad_recv_wr) 418 struct ib_recv_wr *recv_wr,
419 struct ib_recv_wr **bad_recv_wr)
419{ 420{
420 struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
421 struct ib_recv_wr *cur_recv_wr; 421 struct ib_recv_wr *cur_recv_wr;
422 struct ehca_wqe *wqe_p; 422 struct ehca_wqe *wqe_p;
423 int wqe_cnt = 0; 423 int wqe_cnt = 0;
424 int ret = 0; 424 int ret = 0;
425 unsigned long spl_flags; 425 unsigned long flags;
426
427 if (unlikely(!HAS_RQ(my_qp))) {
428 ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d",
429 my_qp, my_qp->real_qp_num, my_qp->ext_type);
430 return -ENODEV;
431 }
426 432
427 /* LOCK the QUEUE */ 433 /* LOCK the QUEUE */
428 spin_lock_irqsave(&my_qp->spinlock_r, spl_flags); 434 spin_lock_irqsave(&my_qp->spinlock_r, flags);
429 435
430 /* loop processes list of send reqs */ 436 /* loop processes list of send reqs */
431 for (cur_recv_wr = recv_wr; cur_recv_wr != NULL; 437 for (cur_recv_wr = recv_wr; cur_recv_wr != NULL;
@@ -439,8 +445,8 @@ int ehca_post_recv(struct ib_qp *qp,
439 *bad_recv_wr = cur_recv_wr; 445 *bad_recv_wr = cur_recv_wr;
440 if (wqe_cnt == 0) { 446 if (wqe_cnt == 0) {
441 ret = -ENOMEM; 447 ret = -ENOMEM;
442 ehca_err(qp->device, "Too many posted WQEs " 448 ehca_err(dev, "Too many posted WQEs "
443 "qp_num=%x", qp->qp_num); 449 "qp_num=%x", my_qp->real_qp_num);
444 } 450 }
445 goto post_recv_exit0; 451 goto post_recv_exit0;
446 } 452 }
@@ -455,23 +461,39 @@ int ehca_post_recv(struct ib_qp *qp,
455 *bad_recv_wr = cur_recv_wr; 461 *bad_recv_wr = cur_recv_wr;
456 if (wqe_cnt == 0) { 462 if (wqe_cnt == 0) {
457 ret = -EINVAL; 463 ret = -EINVAL;
458 ehca_err(qp->device, "Could not write WQE " 464 ehca_err(dev, "Could not write WQE "
459 "qp_num=%x", qp->qp_num); 465 "qp_num=%x", my_qp->real_qp_num);
460 } 466 }
461 goto post_recv_exit0; 467 goto post_recv_exit0;
462 } 468 }
463 wqe_cnt++; 469 wqe_cnt++;
464 ehca_gen_dbg("ehca_qp=%p qp_num=%x wqe_cnt=%d", 470 ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d",
465 my_qp, qp->qp_num, wqe_cnt); 471 my_qp, my_qp->real_qp_num, wqe_cnt);
466 } /* eof for cur_recv_wr */ 472 } /* eof for cur_recv_wr */
467 473
468post_recv_exit0: 474post_recv_exit0:
469 spin_unlock_irqrestore(&my_qp->spinlock_r, spl_flags);
470 iosync(); /* serialize GAL register access */ 475 iosync(); /* serialize GAL register access */
471 hipz_update_rqa(my_qp, wqe_cnt); 476 hipz_update_rqa(my_qp, wqe_cnt);
477 spin_unlock_irqrestore(&my_qp->spinlock_r, flags);
472 return ret; 478 return ret;
473} 479}
474 480
481int ehca_post_recv(struct ib_qp *qp,
482 struct ib_recv_wr *recv_wr,
483 struct ib_recv_wr **bad_recv_wr)
484{
485 return internal_post_recv(container_of(qp, struct ehca_qp, ib_qp),
486 qp->device, recv_wr, bad_recv_wr);
487}
488
489int ehca_post_srq_recv(struct ib_srq *srq,
490 struct ib_recv_wr *recv_wr,
491 struct ib_recv_wr **bad_recv_wr)
492{
493 return internal_post_recv(container_of(srq, struct ehca_qp, ib_srq),
494 srq->device, recv_wr, bad_recv_wr);
495}
496
475/* 497/*
476 * ib_wc_opcode table converts ehca wc opcode to ib 498 * ib_wc_opcode table converts ehca wc opcode to ib
477 * Since we use zero to indicate invalid opcode, the actual ib opcode must 499 * Since we use zero to indicate invalid opcode, the actual ib opcode must
@@ -494,6 +516,7 @@ static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)
494 int ret = 0; 516 int ret = 0;
495 struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); 517 struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
496 struct ehca_cqe *cqe; 518 struct ehca_cqe *cqe;
519 struct ehca_qp *my_qp;
497 int cqe_count = 0; 520 int cqe_count = 0;
498 521
499poll_cq_one_read_cqe: 522poll_cq_one_read_cqe:
@@ -513,7 +536,7 @@ poll_cq_one_read_cqe:
513 if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) { 536 if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) {
514 struct ehca_qp *qp=ehca_cq_get_qp(my_cq, cqe->local_qp_number); 537 struct ehca_qp *qp=ehca_cq_get_qp(my_cq, cqe->local_qp_number);
515 int purgeflag; 538 int purgeflag;
516 unsigned long spl_flags; 539 unsigned long flags;
517 if (!qp) { 540 if (!qp) {
518 ehca_err(cq->device, "cq_num=%x qp_num=%x " 541 ehca_err(cq->device, "cq_num=%x qp_num=%x "
519 "could not find qp -> ignore cqe", 542 "could not find qp -> ignore cqe",
@@ -523,9 +546,9 @@ poll_cq_one_read_cqe:
523 /* ignore this purged cqe */ 546 /* ignore this purged cqe */
524 goto poll_cq_one_read_cqe; 547 goto poll_cq_one_read_cqe;
525 } 548 }
526 spin_lock_irqsave(&qp->spinlock_s, spl_flags); 549 spin_lock_irqsave(&qp->spinlock_s, flags);
527 purgeflag = qp->sqerr_purgeflag; 550 purgeflag = qp->sqerr_purgeflag;
528 spin_unlock_irqrestore(&qp->spinlock_s, spl_flags); 551 spin_unlock_irqrestore(&qp->spinlock_s, flags);
529 552
530 if (purgeflag) { 553 if (purgeflag) {
531 ehca_dbg(cq->device, "Got CQE with purged bit qp_num=%x " 554 ehca_dbg(cq->device, "Got CQE with purged bit qp_num=%x "
@@ -545,7 +568,7 @@ poll_cq_one_read_cqe:
545 } 568 }
546 569
547 /* tracing cqe */ 570 /* tracing cqe */
548 if (ehca_debug_level) { 571 if (unlikely(ehca_debug_level)) {
549 ehca_dbg(cq->device, 572 ehca_dbg(cq->device,
550 "Received COMPLETION ehca_cq=%p cq_num=%x -----", 573 "Received COMPLETION ehca_cq=%p cq_num=%x -----",
551 my_cq, my_cq->cq_number); 574 my_cq, my_cq->cq_number);
@@ -579,7 +602,11 @@ poll_cq_one_read_cqe:
579 } else 602 } else
580 wc->status = IB_WC_SUCCESS; 603 wc->status = IB_WC_SUCCESS;
581 604
582 wc->qp = NULL; 605 read_lock(&ehca_qp_idr_lock);
606 my_qp = idr_find(&ehca_qp_idr, cqe->qp_token);
607 wc->qp = &my_qp->ib_qp;
608 read_unlock(&ehca_qp_idr_lock);
609
583 wc->byte_len = cqe->nr_bytes_transferred; 610 wc->byte_len = cqe->nr_bytes_transferred;
584 wc->pkey_index = cqe->pkey_index; 611 wc->pkey_index = cqe->pkey_index;
585 wc->slid = cqe->rlid; 612 wc->slid = cqe->rlid;
@@ -589,7 +616,7 @@ poll_cq_one_read_cqe:
589 wc->imm_data = cpu_to_be32(cqe->immediate_data); 616 wc->imm_data = cpu_to_be32(cqe->immediate_data);
590 wc->sl = cqe->service_level; 617 wc->sl = cqe->service_level;
591 618
592 if (wc->status != IB_WC_SUCCESS) 619 if (unlikely(wc->status != IB_WC_SUCCESS))
593 ehca_dbg(cq->device, 620 ehca_dbg(cq->device,
594 "ehca_cq=%p cq_num=%x WARNING unsuccessful cqe " 621 "ehca_cq=%p cq_num=%x WARNING unsuccessful cqe "
595 "OPType=%x status=%x qp_num=%x src_qp=%x wr_id=%lx " 622 "OPType=%x status=%x qp_num=%x src_qp=%x wr_id=%lx "
@@ -610,7 +637,7 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
610 int nr; 637 int nr;
611 struct ib_wc *current_wc = wc; 638 struct ib_wc *current_wc = wc;
612 int ret = 0; 639 int ret = 0;
613 unsigned long spl_flags; 640 unsigned long flags;
614 641
615 if (num_entries < 1) { 642 if (num_entries < 1) {
616 ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p " 643 ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p "
@@ -619,14 +646,14 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
619 goto poll_cq_exit0; 646 goto poll_cq_exit0;
620 } 647 }
621 648
622 spin_lock_irqsave(&my_cq->spinlock, spl_flags); 649 spin_lock_irqsave(&my_cq->spinlock, flags);
623 for (nr = 0; nr < num_entries; nr++) { 650 for (nr = 0; nr < num_entries; nr++) {
624 ret = ehca_poll_cq_one(cq, current_wc); 651 ret = ehca_poll_cq_one(cq, current_wc);
625 if (ret) 652 if (ret)
626 break; 653 break;
627 current_wc++; 654 current_wc++;
628 } /* eof for nr */ 655 } /* eof for nr */
629 spin_unlock_irqrestore(&my_cq->spinlock, spl_flags); 656 spin_unlock_irqrestore(&my_cq->spinlock, flags);
630 if (ret == -EAGAIN || !ret) 657 if (ret == -EAGAIN || !ret)
631 ret = nr; 658 ret = nr;
632 659
@@ -637,7 +664,6 @@ poll_cq_exit0:
637int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags) 664int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags)
638{ 665{
639 struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); 666 struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
640 unsigned long spl_flags;
641 int ret = 0; 667 int ret = 0;
642 668
643 switch (notify_flags & IB_CQ_SOLICITED_MASK) { 669 switch (notify_flags & IB_CQ_SOLICITED_MASK) {
@@ -652,6 +678,7 @@ int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags)
652 } 678 }
653 679
654 if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) { 680 if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
681 unsigned long spl_flags;
655 spin_lock_irqsave(&my_cq->spinlock, spl_flags); 682 spin_lock_irqsave(&my_cq->spinlock, spl_flags);
656 ret = ipz_qeit_is_valid(&my_cq->ipz_queue); 683 ret = ipz_qeit_is_valid(&my_cq->ipz_queue);
657 spin_unlock_irqrestore(&my_cq->spinlock, spl_flags); 684 spin_unlock_irqrestore(&my_cq->spinlock, spl_flags);
diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h
index 973c4b591545..03b185f873da 100644
--- a/drivers/infiniband/hw/ehca/ehca_tools.h
+++ b/drivers/infiniband/hw/ehca/ehca_tools.h
@@ -59,6 +59,7 @@
59#include <linux/cpu.h> 59#include <linux/cpu.h>
60#include <linux/device.h> 60#include <linux/device.h>
61 61
62#include <asm/atomic.h>
62#include <asm/abs_addr.h> 63#include <asm/abs_addr.h>
63#include <asm/ibmebus.h> 64#include <asm/ibmebus.h>
64#include <asm/io.h> 65#include <asm/io.h>
diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c
index 73db920b6945..3031b3bb56f9 100644
--- a/drivers/infiniband/hw/ehca/ehca_uverbs.c
+++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c
@@ -253,16 +253,16 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
253 u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */ 253 u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */
254 u32 cur_pid = current->tgid; 254 u32 cur_pid = current->tgid;
255 u32 ret; 255 u32 ret;
256 unsigned long flags;
257 struct ehca_cq *cq; 256 struct ehca_cq *cq;
258 struct ehca_qp *qp; 257 struct ehca_qp *qp;
259 struct ehca_pd *pd; 258 struct ehca_pd *pd;
259 struct ib_uobject *uobject;
260 260
261 switch (q_type) { 261 switch (q_type) {
262 case 1: /* CQ */ 262 case 1: /* CQ */
263 spin_lock_irqsave(&ehca_cq_idr_lock, flags); 263 read_lock(&ehca_cq_idr_lock);
264 cq = idr_find(&ehca_cq_idr, idr_handle); 264 cq = idr_find(&ehca_cq_idr, idr_handle);
265 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); 265 read_unlock(&ehca_cq_idr_lock);
266 266
267 /* make sure this mmap really belongs to the authorized user */ 267 /* make sure this mmap really belongs to the authorized user */
268 if (!cq) 268 if (!cq)
@@ -288,9 +288,9 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
288 break; 288 break;
289 289
290 case 2: /* QP */ 290 case 2: /* QP */
291 spin_lock_irqsave(&ehca_qp_idr_lock, flags); 291 read_lock(&ehca_qp_idr_lock);
292 qp = idr_find(&ehca_qp_idr, idr_handle); 292 qp = idr_find(&ehca_qp_idr, idr_handle);
293 spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); 293 read_unlock(&ehca_qp_idr_lock);
294 294
295 /* make sure this mmap really belongs to the authorized user */ 295 /* make sure this mmap really belongs to the authorized user */
296 if (!qp) 296 if (!qp)
@@ -304,7 +304,8 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
304 return -ENOMEM; 304 return -ENOMEM;
305 } 305 }
306 306
307 if (!qp->ib_qp.uobject || qp->ib_qp.uobject->context != context) 307 uobject = IS_SRQ(qp) ? qp->ib_srq.uobject : qp->ib_qp.uobject;
308 if (!uobject || uobject->context != context)
308 return -EINVAL; 309 return -EINVAL;
309 310
310 ret = ehca_mmap_qp(vma, qp, rsrc_type); 311 ret = ehca_mmap_qp(vma, qp, rsrc_type);
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
index 5766ae3a2029..4776a8b0feec 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.c
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -5,6 +5,7 @@
5 * 5 *
6 * Authors: Christoph Raisch <raisch@de.ibm.com> 6 * Authors: Christoph Raisch <raisch@de.ibm.com>
7 * Hoang-Nam Nguyen <hnguyen@de.ibm.com> 7 * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
8 * Joachim Fenkes <fenkes@de.ibm.com>
8 * Gerd Bayer <gerd.bayer@de.ibm.com> 9 * Gerd Bayer <gerd.bayer@de.ibm.com>
9 * Waleri Fomin <fomin@de.ibm.com> 10 * Waleri Fomin <fomin@de.ibm.com>
10 * 11 *
@@ -62,6 +63,12 @@
62#define H_ALL_RES_QP_MAX_SEND_SGE EHCA_BMASK_IBM(32, 39) 63#define H_ALL_RES_QP_MAX_SEND_SGE EHCA_BMASK_IBM(32, 39)
63#define H_ALL_RES_QP_MAX_RECV_SGE EHCA_BMASK_IBM(40, 47) 64#define H_ALL_RES_QP_MAX_RECV_SGE EHCA_BMASK_IBM(40, 47)
64 65
66#define H_ALL_RES_QP_UD_AV_LKEY EHCA_BMASK_IBM(32, 63)
67#define H_ALL_RES_QP_SRQ_QP_TOKEN EHCA_BMASK_IBM(0, 31)
68#define H_ALL_RES_QP_SRQ_QP_HANDLE EHCA_BMASK_IBM(0, 64)
69#define H_ALL_RES_QP_SRQ_LIMIT EHCA_BMASK_IBM(48, 63)
70#define H_ALL_RES_QP_SRQ_QPN EHCA_BMASK_IBM(40, 63)
71
65#define H_ALL_RES_QP_ACT_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31) 72#define H_ALL_RES_QP_ACT_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31)
66#define H_ALL_RES_QP_ACT_OUTST_RECV_WR EHCA_BMASK_IBM(48, 63) 73#define H_ALL_RES_QP_ACT_OUTST_RECV_WR EHCA_BMASK_IBM(48, 63)
67#define H_ALL_RES_QP_ACT_SEND_SGE EHCA_BMASK_IBM(8, 15) 74#define H_ALL_RES_QP_ACT_SEND_SGE EHCA_BMASK_IBM(8, 15)
@@ -74,10 +81,7 @@
74#define H_MP_SHUTDOWN EHCA_BMASK_IBM(48, 48) 81#define H_MP_SHUTDOWN EHCA_BMASK_IBM(48, 48)
75#define H_MP_RESET_QKEY_CTR EHCA_BMASK_IBM(49, 49) 82#define H_MP_RESET_QKEY_CTR EHCA_BMASK_IBM(49, 49)
76 83
77/* direct access qp controls */ 84static DEFINE_SPINLOCK(hcall_lock);
78#define DAQP_CTRL_ENABLE 0x01
79#define DAQP_CTRL_SEND_COMP 0x20
80#define DAQP_CTRL_RECV_COMP 0x40
81 85
82static u32 get_longbusy_msecs(int longbusy_rc) 86static u32 get_longbusy_msecs(int longbusy_rc)
83{ 87{
@@ -155,7 +159,7 @@ static long ehca_plpar_hcall9(unsigned long opcode,
155{ 159{
156 long ret; 160 long ret;
157 int i, sleep_msecs, lock_is_set = 0; 161 int i, sleep_msecs, lock_is_set = 0;
158 unsigned long flags; 162 unsigned long flags = 0;
159 163
160 ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx " 164 ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx "
161 "arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx", 165 "arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx",
@@ -284,53 +288,53 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
284} 288}
285 289
286u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, 290u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
287 struct ehca_qp *qp,
288 struct ehca_alloc_qp_parms *parms) 291 struct ehca_alloc_qp_parms *parms)
289{ 292{
290 u64 ret; 293 u64 ret;
291 u64 allocate_controls; 294 u64 allocate_controls, max_r10_reg, r11, r12;
292 u64 max_r10_reg;
293 u64 outs[PLPAR_HCALL9_BUFSIZE]; 295 u64 outs[PLPAR_HCALL9_BUFSIZE];
294 u16 max_nr_receive_wqes = qp->init_attr.cap.max_recv_wr + 1;
295 u16 max_nr_send_wqes = qp->init_attr.cap.max_send_wr + 1;
296 int daqp_ctrl = parms->daqp_ctrl;
297 296
298 allocate_controls = 297 allocate_controls =
299 EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, 298 EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, parms->ext_type)
300 (daqp_ctrl & DAQP_CTRL_ENABLE) ? 1 : 0)
301 | EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0) 299 | EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0)
302 | EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype) 300 | EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype)
303 | EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype) 301 | EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype)
304 | EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING, 302 | EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING,
305 (daqp_ctrl & DAQP_CTRL_RECV_COMP) ? 1 : 0) 303 !!(parms->ll_comp_flags & LLQP_RECV_COMP))
306 | EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING, 304 | EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING,
307 (daqp_ctrl & DAQP_CTRL_SEND_COMP) ? 1 : 0) 305 !!(parms->ll_comp_flags & LLQP_SEND_COMP))
308 | EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL, 306 | EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL,
309 parms->ud_av_l_key_ctl) 307 parms->ud_av_l_key_ctl)
310 | EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1); 308 | EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1);
311 309
312 max_r10_reg = 310 max_r10_reg =
313 EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR, 311 EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR,
314 max_nr_send_wqes) 312 parms->max_send_wr + 1)
315 | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR, 313 | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR,
316 max_nr_receive_wqes) 314 parms->max_recv_wr + 1)
317 | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE, 315 | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE,
318 parms->max_send_sge) 316 parms->max_send_sge)
319 | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE, 317 | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE,
320 parms->max_recv_sge); 318 parms->max_recv_sge);
321 319
320 r11 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QP_TOKEN, parms->srq_token);
321
322 if (parms->ext_type == EQPT_SRQ)
323 r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_LIMIT, parms->srq_limit);
324 else
325 r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QPN, parms->srq_qpn);
326
322 ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, 327 ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
323 adapter_handle.handle, /* r4 */ 328 adapter_handle.handle, /* r4 */
324 allocate_controls, /* r5 */ 329 allocate_controls, /* r5 */
325 qp->send_cq->ipz_cq_handle.handle, 330 parms->send_cq_handle.handle,
326 qp->recv_cq->ipz_cq_handle.handle, 331 parms->recv_cq_handle.handle,
327 parms->ipz_eq_handle.handle, 332 parms->eq_handle.handle,
328 ((u64)qp->token << 32) | parms->pd.value, 333 ((u64)parms->token << 32) | parms->pd.value,
329 max_r10_reg, /* r10 */ 334 max_r10_reg, r11, r12);
330 parms->ud_av_l_key_ctl, /* r11 */ 335
331 0); 336 parms->qp_handle.handle = outs[0];
332 qp->ipz_qp_handle.handle = outs[0]; 337 parms->real_qp_num = (u32)outs[1];
333 qp->real_qp_num = (u32)outs[1];
334 parms->act_nr_send_wqes = 338 parms->act_nr_send_wqes =
335 (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]); 339 (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]);
336 parms->act_nr_recv_wqes = 340 parms->act_nr_recv_wqes =
@@ -345,7 +349,7 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
345 (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]); 349 (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]);
346 350
347 if (ret == H_SUCCESS) 351 if (ret == H_SUCCESS)
348 hcp_galpas_ctor(&qp->galpas, outs[6], outs[6]); 352 hcp_galpas_ctor(&parms->galpas, outs[6], outs[6]);
349 353
350 if (ret == H_NOT_ENOUGH_RESOURCES) 354 if (ret == H_NOT_ENOUGH_RESOURCES)
351 ehca_gen_err("Not enough resources. ret=%lx", ret); 355 ehca_gen_err("Not enough resources. ret=%lx", ret);
diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h
index 2869f7dd6196..60ce02b70663 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.h
+++ b/drivers/infiniband/hw/ehca/hcp_if.h
@@ -78,7 +78,6 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
78 * initialize resources, create empty QPPTs (2 rings). 78 * initialize resources, create empty QPPTs (2 rings).
79 */ 79 */
80u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, 80u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
81 struct ehca_qp *qp,
82 struct ehca_alloc_qp_parms *parms); 81 struct ehca_alloc_qp_parms *parms);
83 82
84u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, 83u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h
index fad91368dc5a..dad6dea5636b 100644
--- a/drivers/infiniband/hw/ehca/hipz_hw.h
+++ b/drivers/infiniband/hw/ehca/hipz_hw.h
@@ -163,6 +163,7 @@ struct hipz_qptemm {
163 163
164#define QPX_SQADDER EHCA_BMASK_IBM(48,63) 164#define QPX_SQADDER EHCA_BMASK_IBM(48,63)
165#define QPX_RQADDER EHCA_BMASK_IBM(48,63) 165#define QPX_RQADDER EHCA_BMASK_IBM(48,63)
166#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3,3)
166 167
167#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm,x) 168#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm,x)
168 169
@@ -360,6 +361,24 @@ struct hipz_query_hca {
360 u32 max_neq; 361 u32 max_neq;
361} __attribute__ ((packed)); 362} __attribute__ ((packed));
362 363
364#define HCA_CAP_AH_PORT_NR_CHECK EHCA_BMASK_IBM( 0, 0)
365#define HCA_CAP_ATOMIC EHCA_BMASK_IBM( 1, 1)
366#define HCA_CAP_AUTO_PATH_MIG EHCA_BMASK_IBM( 2, 2)
367#define HCA_CAP_BAD_P_KEY_CTR EHCA_BMASK_IBM( 3, 3)
368#define HCA_CAP_SQD_RTS_PORT_CHANGE EHCA_BMASK_IBM( 4, 4)
369#define HCA_CAP_CUR_QP_STATE_MOD EHCA_BMASK_IBM( 5, 5)
370#define HCA_CAP_INIT_TYPE EHCA_BMASK_IBM( 6, 6)
371#define HCA_CAP_PORT_ACTIVE_EVENT EHCA_BMASK_IBM( 7, 7)
372#define HCA_CAP_Q_KEY_VIOL_CTR EHCA_BMASK_IBM( 8, 8)
373#define HCA_CAP_WQE_RESIZE EHCA_BMASK_IBM( 9, 9)
374#define HCA_CAP_RAW_PACKET_MCAST EHCA_BMASK_IBM(10, 10)
375#define HCA_CAP_SHUTDOWN_PORT EHCA_BMASK_IBM(11, 11)
376#define HCA_CAP_RC_LL_QP EHCA_BMASK_IBM(12, 12)
377#define HCA_CAP_SRQ EHCA_BMASK_IBM(13, 13)
378#define HCA_CAP_UD_LL_QP EHCA_BMASK_IBM(16, 16)
379#define HCA_CAP_RESIZE_MR EHCA_BMASK_IBM(17, 17)
380#define HCA_CAP_MINI_QP EHCA_BMASK_IBM(18, 18)
381
363/* query port response block */ 382/* query port response block */
364struct hipz_query_port { 383struct hipz_query_port {
365 u32 state; 384 u32 state;
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
index 57f141a36bce..007f0882fd40 100644
--- a/drivers/infiniband/hw/ehca/ipz_pt_fn.h
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
@@ -105,7 +105,6 @@ void *ipz_qpageit_get_inc(struct ipz_queue *queue);
105 * step in struct ipz_queue, will wrap in ringbuffer 105 * step in struct ipz_queue, will wrap in ringbuffer
106 * returns address (kv) of Queue Entry BEFORE increment 106 * returns address (kv) of Queue Entry BEFORE increment
107 * warning don't use in parallel with ipz_qpageit_get_inc() 107 * warning don't use in parallel with ipz_qpageit_get_inc()
108 * warning unpredictable results may occur if steps>act_nr_of_queue_entries
109 */ 108 */
110static inline void *ipz_qeit_get_inc(struct ipz_queue *queue) 109static inline void *ipz_qeit_get_inc(struct ipz_queue *queue)
111{ 110{
@@ -121,31 +120,24 @@ static inline void *ipz_qeit_get_inc(struct ipz_queue *queue)
121} 120}
122 121
123/* 122/*
123 * return a bool indicating whether current Queue Entry is valid
124 */
125static inline int ipz_qeit_is_valid(struct ipz_queue *queue)
126{
127 struct ehca_cqe *cqe = ipz_qeit_get(queue);
128 return ((cqe->cqe_flags >> 7) == (queue->toggle_state & 1));
129}
130
131/*
124 * return current Queue Entry, increment Queue Entry iterator by one 132 * return current Queue Entry, increment Queue Entry iterator by one
125 * step in struct ipz_queue, will wrap in ringbuffer 133 * step in struct ipz_queue, will wrap in ringbuffer
126 * returns address (kv) of Queue Entry BEFORE increment 134 * returns address (kv) of Queue Entry BEFORE increment
127 * returns 0 and does not increment, if wrong valid state 135 * returns 0 and does not increment, if wrong valid state
128 * warning don't use in parallel with ipz_qpageit_get_inc() 136 * warning don't use in parallel with ipz_qpageit_get_inc()
129 * warning unpredictable results may occur if steps>act_nr_of_queue_entries
130 */ 137 */
131static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue) 138static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue)
132{ 139{
133 struct ehca_cqe *cqe = ipz_qeit_get(queue); 140 return ipz_qeit_is_valid(queue) ? ipz_qeit_get_inc(queue) : NULL;
134 u32 cqe_flags = cqe->cqe_flags;
135
136 if ((cqe_flags >> 7) != (queue->toggle_state & 1))
137 return NULL;
138
139 ipz_qeit_get_inc(queue);
140 return cqe;
141}
142
143static inline int ipz_qeit_is_valid(struct ipz_queue *queue)
144{
145 struct ehca_cqe *cqe = ipz_qeit_get(queue);
146 u32 cqe_flags = cqe->cqe_flags;
147
148 return cqe_flags >> 7 == (queue->toggle_state & 1);
149} 141}
150 142
151/* 143/*
diff --git a/drivers/infiniband/hw/ipath/Kconfig b/drivers/infiniband/hw/ipath/Kconfig
index 90c14543677d..044da5828a78 100644
--- a/drivers/infiniband/hw/ipath/Kconfig
+++ b/drivers/infiniband/hw/ipath/Kconfig
@@ -1,6 +1,6 @@
1config INFINIBAND_IPATH 1config INFINIBAND_IPATH
2 tristate "QLogic InfiniPath Driver" 2 tristate "QLogic InfiniPath Driver"
3 depends on (PCI_MSI || HT_IRQ) && 64BIT && INFINIBAND && NET 3 depends on (PCI_MSI || HT_IRQ) && 64BIT && NET
4 ---help--- 4 ---help---
5 This is a driver for QLogic InfiniPath host channel adapters, 5 This is a driver for QLogic InfiniPath host channel adapters,
6 including InfiniBand verbs support. This driver allows these 6 including InfiniBand verbs support. This driver allows these
diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
index 10c008f22ba6..b4b786d0dfca 100644
--- a/drivers/infiniband/hw/ipath/ipath_common.h
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -189,8 +189,7 @@ typedef enum _ipath_ureg {
189#define IPATH_RUNTIME_FORCE_WC_ORDER 0x4 189#define IPATH_RUNTIME_FORCE_WC_ORDER 0x4
190#define IPATH_RUNTIME_RCVHDR_COPY 0x8 190#define IPATH_RUNTIME_RCVHDR_COPY 0x8
191#define IPATH_RUNTIME_MASTER 0x10 191#define IPATH_RUNTIME_MASTER 0x10
192#define IPATH_RUNTIME_PBC_REWRITE 0x20 192/* 0x20 and 0x40 are no longer used, but are reserved for ABI compatibility */
193#define IPATH_RUNTIME_LOOSE_DMA_ALIGN 0x40
194 193
195/* 194/*
196 * This structure is returned by ipath_userinit() immediately after 195 * This structure is returned by ipath_userinit() immediately after
@@ -432,8 +431,15 @@ struct ipath_user_info {
432#define IPATH_CMD_UNUSED_1 25 431#define IPATH_CMD_UNUSED_1 25
433#define IPATH_CMD_UNUSED_2 26 432#define IPATH_CMD_UNUSED_2 26
434#define IPATH_CMD_PIOAVAILUPD 27 /* force an update of PIOAvail reg */ 433#define IPATH_CMD_PIOAVAILUPD 27 /* force an update of PIOAvail reg */
434#define IPATH_CMD_POLL_TYPE 28 /* set the kind of polling we want */
435 435
436#define IPATH_CMD_MAX 27 436#define IPATH_CMD_MAX 28
437
438/*
439 * Poll types
440 */
441#define IPATH_POLL_TYPE_URGENT 0x01
442#define IPATH_POLL_TYPE_OVERFLOW 0x02
437 443
438struct ipath_port_info { 444struct ipath_port_info {
439 __u32 num_active; /* number of active units */ 445 __u32 num_active; /* number of active units */
@@ -474,6 +480,8 @@ struct ipath_cmd {
474 __u16 part_key; 480 __u16 part_key;
475 /* user address of __u32 bitmask of active slaves */ 481 /* user address of __u32 bitmask of active slaves */
476 __u64 slave_mask_addr; 482 __u64 slave_mask_addr;
483 /* type of polling we want */
484 __u16 poll_type;
477 } cmd; 485 } cmd;
478}; 486};
479 487
@@ -502,13 +510,30 @@ struct __ipath_sendpkt {
502 struct ipath_iovec sps_iov[4]; 510 struct ipath_iovec sps_iov[4];
503}; 511};
504 512
505/* Passed into diag data special file's ->write method. */ 513/*
514 * diagnostics can send a packet by "writing" one of the following
515 * two structs to diag data special file
516 * The first is the legacy version for backward compatibility
517 */
506struct ipath_diag_pkt { 518struct ipath_diag_pkt {
507 __u32 unit; 519 __u32 unit;
508 __u64 data; 520 __u64 data;
509 __u32 len; 521 __u32 len;
510}; 522};
511 523
524/* The second diag_pkt struct is the expanded version that allows
525 * more control over the packet, specifically, by allowing a custom
526 * pbc (+ extra) qword, so that special modes and deliberate
527 * changes to CRCs can be used. The elements were also re-ordered
528 * for better alignment and to avoid padding issues.
529 */
530struct ipath_diag_xpkt {
531 __u64 data;
532 __u64 pbc_wd;
533 __u32 unit;
534 __u32 len;
535};
536
512/* 537/*
513 * Data layout in I2C flash (for GUID, etc.) 538 * Data layout in I2C flash (for GUID, etc.)
514 * All fields are little-endian binary unless otherwise stated 539 * All fields are little-endian binary unless otherwise stated
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
index 3e9241badba0..a6f04d27ec57 100644
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -90,6 +90,8 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
90 wc->queue[head].sl = entry->sl; 90 wc->queue[head].sl = entry->sl;
91 wc->queue[head].dlid_path_bits = entry->dlid_path_bits; 91 wc->queue[head].dlid_path_bits = entry->dlid_path_bits;
92 wc->queue[head].port_num = entry->port_num; 92 wc->queue[head].port_num = entry->port_num;
93 /* Make sure queue entry is written before the head index. */
94 smp_wmb();
93 wc->head = next; 95 wc->head = next;
94 96
95 if (cq->notify == IB_CQ_NEXT_COMP || 97 if (cq->notify == IB_CQ_NEXT_COMP ||
@@ -139,7 +141,8 @@ int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
139 141
140 if (tail == wc->head) 142 if (tail == wc->head)
141 break; 143 break;
142 144 /* Make sure entry is read after head index is read. */
145 smp_rmb();
143 qp = ipath_lookup_qpn(&to_idev(cq->ibcq.device)->qp_table, 146 qp = ipath_lookup_qpn(&to_idev(cq->ibcq.device)->qp_table,
144 wc->queue[tail].qp_num); 147 wc->queue[tail].qp_num);
145 entry->qp = &qp->ibqp; 148 entry->qp = &qp->ibqp;
diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h
index 42bfbdb0d3e6..19c56e6491eb 100644
--- a/drivers/infiniband/hw/ipath/ipath_debug.h
+++ b/drivers/infiniband/hw/ipath/ipath_debug.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index 63e8368b0e95..a698f1949d10 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -323,13 +323,14 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
323{ 323{
324 u32 __iomem *piobuf; 324 u32 __iomem *piobuf;
325 u32 plen, clen, pbufn; 325 u32 plen, clen, pbufn;
326 struct ipath_diag_pkt dp; 326 struct ipath_diag_pkt odp;
327 struct ipath_diag_xpkt dp;
327 u32 *tmpbuf = NULL; 328 u32 *tmpbuf = NULL;
328 struct ipath_devdata *dd; 329 struct ipath_devdata *dd;
329 ssize_t ret = 0; 330 ssize_t ret = 0;
330 u64 val; 331 u64 val;
331 332
332 if (count < sizeof(dp)) { 333 if (count != sizeof(dp)) {
333 ret = -EINVAL; 334 ret = -EINVAL;
334 goto bail; 335 goto bail;
335 } 336 }
@@ -339,6 +340,29 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
339 goto bail; 340 goto bail;
340 } 341 }
341 342
343 /*
344 * Due to padding/alignment issues (lessened with new struct)
345 * the old and new structs are the same length. We need to
346 * disambiguate them, which we can do because odp.len has never
347 * been less than the total of LRH+BTH+DETH so far, while
348 * dp.unit (same offset) unit is unlikely to get that high.
349 * Similarly, dp.data, the pointer to user at the same offset
350 * as odp.unit, is almost certainly at least one (512byte)page
351 * "above" NULL. The if-block below can be omitted if compatibility
352 * between a new driver and older diagnostic code is unimportant.
353 * compatibility the other direction (new diags, old driver) is
354 * handled in the diagnostic code, with a warning.
355 */
356 if (dp.unit >= 20 && dp.data < 512) {
357 /* very probable version mismatch. Fix it up */
358 memcpy(&odp, &dp, sizeof(odp));
359 /* We got a legacy dp, copy elements to dp */
360 dp.unit = odp.unit;
361 dp.data = odp.data;
362 dp.len = odp.len;
363 dp.pbc_wd = 0; /* Indicate we need to compute PBC wd */
364 }
365
342 /* send count must be an exact number of dwords */ 366 /* send count must be an exact number of dwords */
343 if (dp.len & 3) { 367 if (dp.len & 3) {
344 ret = -EINVAL; 368 ret = -EINVAL;
@@ -371,9 +395,10 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
371 ret = -ENODEV; 395 ret = -ENODEV;
372 goto bail; 396 goto bail;
373 } 397 }
398 /* Check link state, but not if we have custom PBC */
374 val = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK; 399 val = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
375 if (val != IPATH_IBSTATE_INIT && val != IPATH_IBSTATE_ARM && 400 if (!dp.pbc_wd && val != IPATH_IBSTATE_INIT &&
376 val != IPATH_IBSTATE_ACTIVE) { 401 val != IPATH_IBSTATE_ARM && val != IPATH_IBSTATE_ACTIVE) {
377 ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n", 402 ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n",
378 dd->ipath_unit, (unsigned long long) val); 403 dd->ipath_unit, (unsigned long long) val);
379 ret = -EINVAL; 404 ret = -EINVAL;
@@ -419,9 +444,13 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
419 ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n", 444 ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n",
420 dd->ipath_unit, plen - 1, pbufn); 445 dd->ipath_unit, plen - 1, pbufn);
421 446
447 if (dp.pbc_wd == 0)
448 /* Legacy operation, use computed pbc_wd */
449 dp.pbc_wd = plen;
450
422 /* we have to flush after the PBC for correctness on some cpus 451 /* we have to flush after the PBC for correctness on some cpus
423 * or WC buffer can be written out of order */ 452 * or WC buffer can be written out of order */
424 writeq(plen, piobuf); 453 writeq(dp.pbc_wd, piobuf);
425 ipath_flush_wc(); 454 ipath_flush_wc();
426 /* copy all by the trigger word, then flush, so it's written 455 /* copy all by the trigger word, then flush, so it's written
427 * to chip before trigger word, then write trigger word, then 456 * to chip before trigger word, then write trigger word, then
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 834e86f6c04e..9361f5ab8bd6 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -104,6 +104,9 @@ static int __devinit ipath_init_one(struct pci_dev *,
104#define PCI_DEVICE_ID_INFINIPATH_HT 0xd 104#define PCI_DEVICE_ID_INFINIPATH_HT 0xd
105#define PCI_DEVICE_ID_INFINIPATH_PE800 0x10 105#define PCI_DEVICE_ID_INFINIPATH_PE800 0x10
106 106
107/* Number of seconds before our card status check... */
108#define STATUS_TIMEOUT 60
109
107static const struct pci_device_id ipath_pci_tbl[] = { 110static const struct pci_device_id ipath_pci_tbl[] = {
108 { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) }, 111 { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) },
109 { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_PE800) }, 112 { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_PE800) },
@@ -119,6 +122,18 @@ static struct pci_driver ipath_driver = {
119 .id_table = ipath_pci_tbl, 122 .id_table = ipath_pci_tbl,
120}; 123};
121 124
125static void ipath_check_status(struct work_struct *work)
126{
127 struct ipath_devdata *dd = container_of(work, struct ipath_devdata,
128 status_work.work);
129
130 /*
131 * If we don't have any interrupts, let the user know and
132 * don't bother checking again.
133 */
134 if (dd->ipath_int_counter == 0)
135 dev_err(&dd->pcidev->dev, "No interrupts detected.\n");
136}
122 137
123static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev, 138static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev,
124 u32 *bar0, u32 *bar1) 139 u32 *bar0, u32 *bar1)
@@ -187,6 +202,8 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
187 dd->pcidev = pdev; 202 dd->pcidev = pdev;
188 pci_set_drvdata(pdev, dd); 203 pci_set_drvdata(pdev, dd);
189 204
205 INIT_DELAYED_WORK(&dd->status_work, ipath_check_status);
206
190 list_add(&dd->ipath_list, &ipath_dev_list); 207 list_add(&dd->ipath_list, &ipath_dev_list);
191 208
192bail_unlock: 209bail_unlock:
@@ -504,6 +521,9 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
504 ipath_diag_add(dd); 521 ipath_diag_add(dd);
505 ipath_register_ib_device(dd); 522 ipath_register_ib_device(dd);
506 523
524 /* Check that card status in STATUS_TIMEOUT seconds. */
525 schedule_delayed_work(&dd->status_work, HZ * STATUS_TIMEOUT);
526
507 goto bail; 527 goto bail;
508 528
509bail_irqsetup: 529bail_irqsetup:
@@ -631,6 +651,9 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev)
631 */ 651 */
632 ipath_shutdown_device(dd); 652 ipath_shutdown_device(dd);
633 653
654 cancel_delayed_work(&dd->status_work);
655 flush_scheduled_work();
656
634 if (dd->verbs_dev) 657 if (dd->verbs_dev)
635 ipath_unregister_ib_device(dd->verbs_dev); 658 ipath_unregister_ib_device(dd->verbs_dev);
636 659
@@ -699,9 +722,9 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
699 u64 sendctrl, sendorig; 722 u64 sendctrl, sendorig;
700 723
701 ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first); 724 ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first);
702 sendorig = dd->ipath_sendctrl | INFINIPATH_S_DISARM; 725 sendorig = dd->ipath_sendctrl;
703 for (i = first; i < last; i++) { 726 for (i = first; i < last; i++) {
704 sendctrl = sendorig | 727 sendctrl = sendorig | INFINIPATH_S_DISARM |
705 (i << INFINIPATH_S_DISARMPIOBUF_SHIFT); 728 (i << INFINIPATH_S_DISARMPIOBUF_SHIFT);
706 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 729 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
707 sendctrl); 730 sendctrl);
@@ -712,12 +735,12 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
712 * while we were looping; no critical bits that would require 735 * while we were looping; no critical bits that would require
713 * locking. 736 * locking.
714 * 737 *
715 * Write a 0, and then the original value, reading scratch in 738 * disable PIOAVAILUPD, then re-enable, reading scratch in
716 * between. This seems to avoid a chip timing race that causes 739 * between. This seems to avoid a chip timing race that causes
717 * pioavail updates to memory to stop. 740 * pioavail updates to memory to stop.
718 */ 741 */
719 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 742 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
720 0); 743 sendorig & ~IPATH_S_PIOBUFAVAILUPD);
721 sendorig = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 744 sendorig = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
722 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 745 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
723 dd->ipath_sendctrl); 746 dd->ipath_sendctrl);
@@ -1014,14 +1037,10 @@ void ipath_kreceive(struct ipath_devdata *dd)
1014 goto bail; 1037 goto bail;
1015 } 1038 }
1016 1039
1017 /* There is already a thread processing this queue. */
1018 if (test_and_set_bit(0, &dd->ipath_rcv_pending))
1019 goto bail;
1020
1021 l = dd->ipath_port0head; 1040 l = dd->ipath_port0head;
1022 hdrqtail = (u32) le64_to_cpu(*dd->ipath_hdrqtailptr); 1041 hdrqtail = (u32) le64_to_cpu(*dd->ipath_hdrqtailptr);
1023 if (l == hdrqtail) 1042 if (l == hdrqtail)
1024 goto done; 1043 goto bail;
1025 1044
1026reloop: 1045reloop:
1027 for (i = 0; l != hdrqtail; i++) { 1046 for (i = 0; l != hdrqtail; i++) {
@@ -1156,10 +1175,6 @@ reloop:
1156 ipath_stats.sps_avgpkts_call = 1175 ipath_stats.sps_avgpkts_call =
1157 ipath_stats.sps_port0pkts / ++totcalls; 1176 ipath_stats.sps_port0pkts / ++totcalls;
1158 1177
1159done:
1160 clear_bit(0, &dd->ipath_rcv_pending);
1161 smp_mb__after_clear_bit();
1162
1163bail:; 1178bail:;
1164} 1179}
1165 1180
@@ -1589,6 +1604,35 @@ int ipath_waitfor_mdio_cmdready(struct ipath_devdata *dd)
1589 return ret; 1604 return ret;
1590} 1605}
1591 1606
1607
1608/*
1609 * Flush all sends that might be in the ready to send state, as well as any
1610 * that are in the process of being sent. Used whenever we need to be
1611 * sure the send side is idle. Cleans up all buffer state by canceling
1612 * all pio buffers, and issuing an abort, which cleans up anything in the
1613 * launch fifo. The cancel is superfluous on some chip versions, but
1614 * it's safer to always do it.
1615 * PIOAvail bits are updated by the chip as if normal send had happened.
1616 */
1617void ipath_cancel_sends(struct ipath_devdata *dd)
1618{
1619 ipath_dbg("Cancelling all in-progress send buffers\n");
1620 dd->ipath_lastcancel = jiffies+HZ/2; /* skip armlaunch errs a bit */
1621 /*
1622 * the abort bit is auto-clearing. We read scratch to be sure
1623 * that cancels and the abort have taken effect in the chip.
1624 */
1625 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1626 INFINIPATH_S_ABORT);
1627 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1628 ipath_disarm_piobufs(dd, 0,
1629 (unsigned)(dd->ipath_piobcnt2k + dd->ipath_piobcnt4k));
1630
1631 /* and again, be sure all have hit the chip */
1632 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1633}
1634
1635
1592static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) 1636static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
1593{ 1637{
1594 static const char *what[4] = { 1638 static const char *what[4] = {
@@ -1610,14 +1654,8 @@ static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
1610 INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]); 1654 INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]);
1611 /* flush all queued sends when going to DOWN or INIT, to be sure that 1655 /* flush all queued sends when going to DOWN or INIT, to be sure that
1612 * they don't block MAD packets */ 1656 * they don't block MAD packets */
1613 if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT) { 1657 if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT)
1614 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 1658 ipath_cancel_sends(dd);
1615 INFINIPATH_S_ABORT);
1616 ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf,
1617 (unsigned)(dd->ipath_piobcnt2k +
1618 dd->ipath_piobcnt4k) -
1619 dd->ipath_lastport_piobuf);
1620 }
1621 1659
1622 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, 1660 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
1623 dd->ipath_ibcctrl | which); 1661 dd->ipath_ibcctrl | which);
@@ -1839,6 +1877,87 @@ void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno,
1839 ipath_write_kreg(dd, where, value); 1877 ipath_write_kreg(dd, where, value);
1840} 1878}
1841 1879
1880/*
1881 * Following deal with the "obviously simple" task of overriding the state
1882 * of the LEDS, which normally indicate link physical and logical status.
1883 * The complications arise in dealing with different hardware mappings
1884 * and the board-dependent routine being called from interrupts.
1885 * and then there's the requirement to _flash_ them.
1886 */
1887#define LED_OVER_FREQ_SHIFT 8
1888#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
1889/* Below is "non-zero" to force override, but both actual LEDs are off */
1890#define LED_OVER_BOTH_OFF (8)
1891
1892void ipath_run_led_override(unsigned long opaque)
1893{
1894 struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
1895 int timeoff;
1896 int pidx;
1897 u64 lstate, ltstate, val;
1898
1899 if (!(dd->ipath_flags & IPATH_INITTED))
1900 return;
1901
1902 pidx = dd->ipath_led_override_phase++ & 1;
1903 dd->ipath_led_override = dd->ipath_led_override_vals[pidx];
1904 timeoff = dd->ipath_led_override_timeoff;
1905
1906 /*
1907 * below potentially restores the LED values per current status,
1908 * should also possibly setup the traffic-blink register,
1909 * but leave that to per-chip functions.
1910 */
1911 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
1912 ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
1913 INFINIPATH_IBCS_LINKTRAININGSTATE_MASK;
1914 lstate = (val >> INFINIPATH_IBCS_LINKSTATE_SHIFT) &
1915 INFINIPATH_IBCS_LINKSTATE_MASK;
1916
1917 dd->ipath_f_setextled(dd, lstate, ltstate);
1918 mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff);
1919}
1920
1921void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val)
1922{
1923 int timeoff, freq;
1924
1925 if (!(dd->ipath_flags & IPATH_INITTED))
1926 return;
1927
1928 /* First check if we are blinking. If not, use 1HZ polling */
1929 timeoff = HZ;
1930 freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;
1931
1932 if (freq) {
1933 /* For blink, set each phase from one nybble of val */
1934 dd->ipath_led_override_vals[0] = val & 0xF;
1935 dd->ipath_led_override_vals[1] = (val >> 4) & 0xF;
1936 timeoff = (HZ << 4)/freq;
1937 } else {
1938 /* Non-blink set both phases the same. */
1939 dd->ipath_led_override_vals[0] = val & 0xF;
1940 dd->ipath_led_override_vals[1] = val & 0xF;
1941 }
1942 dd->ipath_led_override_timeoff = timeoff;
1943
1944 /*
1945 * If the timer has not already been started, do so. Use a "quick"
1946 * timeout so the function will be called soon, to look at our request.
1947 */
1948 if (atomic_inc_return(&dd->ipath_led_override_timer_active) == 1) {
1949 /* Need to start timer */
1950 init_timer(&dd->ipath_led_override_timer);
1951 dd->ipath_led_override_timer.function =
1952 ipath_run_led_override;
1953 dd->ipath_led_override_timer.data = (unsigned long) dd;
1954 dd->ipath_led_override_timer.expires = jiffies + 1;
1955 add_timer(&dd->ipath_led_override_timer);
1956 } else {
1957 atomic_dec(&dd->ipath_led_override_timer_active);
1958 }
1959}
1960
1842/** 1961/**
1843 * ipath_shutdown_device - shut down a device 1962 * ipath_shutdown_device - shut down a device
1844 * @dd: the infinipath device 1963 * @dd: the infinipath device
@@ -1879,17 +1998,9 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
1879 */ 1998 */
1880 udelay(5); 1999 udelay(5);
1881 2000
1882 /*
1883 * abort any armed or launched PIO buffers that didn't go. (self
1884 * clearing). Will cause any packet currently being transmitted to
1885 * go out with an EBP, and may also cause a short packet error on
1886 * the receiver.
1887 */
1888 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1889 INFINIPATH_S_ABORT);
1890
1891 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE << 2001 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
1892 INFINIPATH_IBCC_LINKINITCMD_SHIFT); 2002 INFINIPATH_IBCC_LINKINITCMD_SHIFT);
2003 ipath_cancel_sends(dd);
1893 2004
1894 /* disable IBC */ 2005 /* disable IBC */
1895 dd->ipath_control &= ~INFINIPATH_C_LINKENABLE; 2006 dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
@@ -1902,7 +2013,6 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
1902 * Turn the LEDs off explictly for the same reason. 2013 * Turn the LEDs off explictly for the same reason.
1903 */ 2014 */
1904 dd->ipath_f_quiet_serdes(dd); 2015 dd->ipath_f_quiet_serdes(dd);
1905 dd->ipath_f_setextled(dd, 0, 0);
1906 2016
1907 if (dd->ipath_stats_timer_active) { 2017 if (dd->ipath_stats_timer_active) {
1908 del_timer_sync(&dd->ipath_stats_timer); 2018 del_timer_sync(&dd->ipath_stats_timer);
@@ -1918,6 +2028,9 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
1918 ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED); 2028 ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED);
1919 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL); 2029 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
1920 ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL); 2030 ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
2031
2032 ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n");
2033 ipath_update_eeprom_log(dd);
1921} 2034}
1922 2035
1923/** 2036/**
@@ -2078,6 +2191,16 @@ int ipath_reset_device(int unit)
2078 goto bail; 2191 goto bail;
2079 } 2192 }
2080 2193
2194 if (atomic_read(&dd->ipath_led_override_timer_active)) {
2195 /* Need to stop LED timer, _then_ shut off LEDs */
2196 del_timer_sync(&dd->ipath_led_override_timer);
2197 atomic_set(&dd->ipath_led_override_timer_active, 0);
2198 }
2199
2200 /* Shut off LEDs after we are sure timer is not running */
2201 dd->ipath_led_override = LED_OVER_BOTH_OFF;
2202 dd->ipath_f_setextled(dd, 0, 0);
2203
2081 dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit); 2204 dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit);
2082 2205
2083 if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) { 2206 if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) {
diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c
index 030185f90ee2..6b9147964a4f 100644
--- a/drivers/infiniband/hw/ipath/ipath_eeprom.c
+++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -95,39 +95,37 @@ static int i2c_gpio_set(struct ipath_devdata *dd,
95 enum i2c_type line, 95 enum i2c_type line,
96 enum i2c_state new_line_state) 96 enum i2c_state new_line_state)
97{ 97{
98 u64 read_val, write_val, mask, *gpioval; 98 u64 out_mask, dir_mask, *gpioval;
99 unsigned long flags = 0;
99 100
100 gpioval = &dd->ipath_gpio_out; 101 gpioval = &dd->ipath_gpio_out;
101 read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
102 if (line == i2c_line_scl)
103 mask = dd->ipath_gpio_scl;
104 else
105 mask = dd->ipath_gpio_sda;
106 102
107 if (new_line_state == i2c_line_high) 103 if (line == i2c_line_scl) {
104 dir_mask = dd->ipath_gpio_scl;
105 out_mask = (1UL << dd->ipath_gpio_scl_num);
106 } else {
107 dir_mask = dd->ipath_gpio_sda;
108 out_mask = (1UL << dd->ipath_gpio_sda_num);
109 }
110
111 spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
112 if (new_line_state == i2c_line_high) {
108 /* tri-state the output rather than force high */ 113 /* tri-state the output rather than force high */
109 write_val = read_val & ~mask; 114 dd->ipath_extctrl &= ~dir_mask;
110 else 115 } else {
111 /* config line to be an output */ 116 /* config line to be an output */
112 write_val = read_val | mask; 117 dd->ipath_extctrl |= dir_mask;
113 ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val); 118 }
119 ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl);
114 120
115 /* set high and verify */ 121 /* set output as well (no real verify) */
116 if (new_line_state == i2c_line_high) 122 if (new_line_state == i2c_line_high)
117 write_val = 0x1UL; 123 *gpioval |= out_mask;
118 else 124 else
119 write_val = 0x0UL; 125 *gpioval &= ~out_mask;
120 126
121 if (line == i2c_line_scl) {
122 write_val <<= dd->ipath_gpio_scl_num;
123 *gpioval = *gpioval & ~(1UL << dd->ipath_gpio_scl_num);
124 *gpioval |= write_val;
125 } else {
126 write_val <<= dd->ipath_gpio_sda_num;
127 *gpioval = *gpioval & ~(1UL << dd->ipath_gpio_sda_num);
128 *gpioval |= write_val;
129 }
130 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval); 127 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval);
128 spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
131 129
132 return 0; 130 return 0;
133} 131}
@@ -145,8 +143,9 @@ static int i2c_gpio_get(struct ipath_devdata *dd,
145 enum i2c_type line, 143 enum i2c_type line,
146 enum i2c_state *curr_statep) 144 enum i2c_state *curr_statep)
147{ 145{
148 u64 read_val, write_val, mask; 146 u64 read_val, mask;
149 int ret; 147 int ret;
148 unsigned long flags = 0;
150 149
151 /* check args */ 150 /* check args */
152 if (curr_statep == NULL) { 151 if (curr_statep == NULL) {
@@ -154,15 +153,21 @@ static int i2c_gpio_get(struct ipath_devdata *dd,
154 goto bail; 153 goto bail;
155 } 154 }
156 155
157 read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
158 /* config line to be an input */ 156 /* config line to be an input */
159 if (line == i2c_line_scl) 157 if (line == i2c_line_scl)
160 mask = dd->ipath_gpio_scl; 158 mask = dd->ipath_gpio_scl;
161 else 159 else
162 mask = dd->ipath_gpio_sda; 160 mask = dd->ipath_gpio_sda;
163 write_val = read_val & ~mask; 161
164 ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val); 162 spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
163 dd->ipath_extctrl &= ~mask;
164 ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl);
165 /*
166 * Below is very unlikely to reflect true input state if Output
167 * Enable actually changed.
168 */
165 read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus); 169 read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
170 spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
166 171
167 if (read_val & mask) 172 if (read_val & mask)
168 *curr_statep = i2c_line_high; 173 *curr_statep = i2c_line_high;
@@ -192,6 +197,7 @@ static void i2c_wait_for_writes(struct ipath_devdata *dd)
192 197
193static void scl_out(struct ipath_devdata *dd, u8 bit) 198static void scl_out(struct ipath_devdata *dd, u8 bit)
194{ 199{
200 udelay(1);
195 i2c_gpio_set(dd, i2c_line_scl, bit ? i2c_line_high : i2c_line_low); 201 i2c_gpio_set(dd, i2c_line_scl, bit ? i2c_line_high : i2c_line_low);
196 202
197 i2c_wait_for_writes(dd); 203 i2c_wait_for_writes(dd);
@@ -314,12 +320,18 @@ static int eeprom_reset(struct ipath_devdata *dd)
314 int clock_cycles_left = 9; 320 int clock_cycles_left = 9;
315 u64 *gpioval = &dd->ipath_gpio_out; 321 u64 *gpioval = &dd->ipath_gpio_out;
316 int ret; 322 int ret;
323 unsigned long flags;
317 324
318 eeprom_init = 1; 325 spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
326 /* Make sure shadows are consistent */
327 dd->ipath_extctrl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
319 *gpioval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_out); 328 *gpioval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_out);
329 spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
330
320 ipath_cdbg(VERBOSE, "Resetting i2c eeprom; initial gpioout reg " 331 ipath_cdbg(VERBOSE, "Resetting i2c eeprom; initial gpioout reg "
321 "is %llx\n", (unsigned long long) *gpioval); 332 "is %llx\n", (unsigned long long) *gpioval);
322 333
334 eeprom_init = 1;
323 /* 335 /*
324 * This is to get the i2c into a known state, by first going low, 336 * This is to get the i2c into a known state, by first going low,
325 * then tristate sda (and then tristate scl as first thing 337 * then tristate sda (and then tristate scl as first thing
@@ -355,8 +367,8 @@ bail:
355 * @len: number of bytes to receive 367 * @len: number of bytes to receive
356 */ 368 */
357 369
358int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset, 370static int ipath_eeprom_internal_read(struct ipath_devdata *dd,
359 void *buffer, int len) 371 u8 eeprom_offset, void *buffer, int len)
360{ 372{
361 /* compiler complains unless initialized */ 373 /* compiler complains unless initialized */
362 u8 single_byte = 0; 374 u8 single_byte = 0;
@@ -406,6 +418,7 @@ bail:
406 return ret; 418 return ret;
407} 419}
408 420
421
409/** 422/**
410 * ipath_eeprom_write - writes data to the eeprom via I2C 423 * ipath_eeprom_write - writes data to the eeprom via I2C
411 * @dd: the infinipath device 424 * @dd: the infinipath device
@@ -413,8 +426,8 @@ bail:
413 * @buffer: data to write 426 * @buffer: data to write
414 * @len: number of bytes to write 427 * @len: number of bytes to write
415 */ 428 */
416int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset, 429int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset,
417 const void *buffer, int len) 430 const void *buffer, int len)
418{ 431{
419 u8 single_byte; 432 u8 single_byte;
420 int sub_len; 433 int sub_len;
@@ -488,6 +501,38 @@ bail:
488 return ret; 501 return ret;
489} 502}
490 503
504/*
505 * The public entry-points ipath_eeprom_read() and ipath_eeprom_write()
506 * are now just wrappers around the internal functions.
507 */
508int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
509 void *buff, int len)
510{
511 int ret;
512
513 ret = down_interruptible(&dd->ipath_eep_sem);
514 if (!ret) {
515 ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len);
516 up(&dd->ipath_eep_sem);
517 }
518
519 return ret;
520}
521
522int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
523 const void *buff, int len)
524{
525 int ret;
526
527 ret = down_interruptible(&dd->ipath_eep_sem);
528 if (!ret) {
529 ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len);
530 up(&dd->ipath_eep_sem);
531 }
532
533 return ret;
534}
535
491static u8 flash_csum(struct ipath_flash *ifp, int adjust) 536static u8 flash_csum(struct ipath_flash *ifp, int adjust)
492{ 537{
493 u8 *ip = (u8 *) ifp; 538 u8 *ip = (u8 *) ifp;
@@ -515,7 +560,7 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
515 void *buf; 560 void *buf;
516 struct ipath_flash *ifp; 561 struct ipath_flash *ifp;
517 __be64 guid; 562 __be64 guid;
518 int len; 563 int len, eep_stat;
519 u8 csum, *bguid; 564 u8 csum, *bguid;
520 int t = dd->ipath_unit; 565 int t = dd->ipath_unit;
521 struct ipath_devdata *dd0 = ipath_lookup(0); 566 struct ipath_devdata *dd0 = ipath_lookup(0);
@@ -559,7 +604,11 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
559 goto bail; 604 goto bail;
560 } 605 }
561 606
562 if (ipath_eeprom_read(dd, 0, buf, len)) { 607 down(&dd->ipath_eep_sem);
608 eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len);
609 up(&dd->ipath_eep_sem);
610
611 if (eep_stat) {
563 ipath_dev_err(dd, "Failed reading GUID from eeprom\n"); 612 ipath_dev_err(dd, "Failed reading GUID from eeprom\n");
564 goto done; 613 goto done;
565 } 614 }
@@ -634,8 +683,192 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
634 ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n", 683 ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
635 (unsigned long long) be64_to_cpu(dd->ipath_guid)); 684 (unsigned long long) be64_to_cpu(dd->ipath_guid));
636 685
686 memcpy(&dd->ipath_eep_st_errs, &ifp->if_errcntp, IPATH_EEP_LOG_CNT);
687 /*
688 * Power-on (actually "active") hours are kept as little-endian value
689 * in EEPROM, but as seconds in a (possibly as small as 24-bit)
690 * atomic_t while running.
691 */
692 atomic_set(&dd->ipath_active_time, 0);
693 dd->ipath_eep_hrs = ifp->if_powerhour[0] | (ifp->if_powerhour[1] << 8);
694
637done: 695done:
638 vfree(buf); 696 vfree(buf);
639 697
640bail:; 698bail:;
641} 699}
700
701/**
702 * ipath_update_eeprom_log - copy active-time and error counters to eeprom
703 * @dd: the infinipath device
704 *
705 * Although the time is kept as seconds in the ipath_devdata struct, it is
706 * rounded to hours for re-write, as we have only 16 bits in EEPROM.
707 * First-cut code reads whole (expected) struct ipath_flash, modifies,
708 * re-writes. Future direction: read/write only what we need, assuming
709 * that the EEPROM had to have been "good enough" for driver init, and
710 * if not, we aren't making it worse.
711 *
712 */
713
714int ipath_update_eeprom_log(struct ipath_devdata *dd)
715{
716 void *buf;
717 struct ipath_flash *ifp;
718 int len, hi_water;
719 uint32_t new_time, new_hrs;
720 u8 csum;
721 int ret, idx;
722 unsigned long flags;
723
724 /* first, check if we actually need to do anything. */
725 ret = 0;
726 for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
727 if (dd->ipath_eep_st_new_errs[idx]) {
728 ret = 1;
729 break;
730 }
731 }
732 new_time = atomic_read(&dd->ipath_active_time);
733
734 if (ret == 0 && new_time < 3600)
735 return 0;
736
737 /*
738 * The quick-check above determined that there is something worthy
739 * of logging, so get current contents and do a more detailed idea.
740 */
741 len = offsetof(struct ipath_flash, if_future);
742 buf = vmalloc(len);
743 ret = 1;
744 if (!buf) {
745 ipath_dev_err(dd, "Couldn't allocate memory to read %u "
746 "bytes from eeprom for logging\n", len);
747 goto bail;
748 }
749
750 /* Grab semaphore and read current EEPROM. If we get an
751 * error, let go, but if not, keep it until we finish write.
752 */
753 ret = down_interruptible(&dd->ipath_eep_sem);
754 if (ret) {
755 ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n");
756 goto free_bail;
757 }
758 ret = ipath_eeprom_internal_read(dd, 0, buf, len);
759 if (ret) {
760 up(&dd->ipath_eep_sem);
761 ipath_dev_err(dd, "Unable read EEPROM for logging\n");
762 goto free_bail;
763 }
764 ifp = (struct ipath_flash *)buf;
765
766 csum = flash_csum(ifp, 0);
767 if (csum != ifp->if_csum) {
768 up(&dd->ipath_eep_sem);
769 ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n",
770 csum, ifp->if_csum);
771 ret = 1;
772 goto free_bail;
773 }
774 hi_water = 0;
775 spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
776 for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
777 int new_val = dd->ipath_eep_st_new_errs[idx];
778 if (new_val) {
779 /*
780 * If we have seen any errors, add to EEPROM values
781 * We need to saturate at 0xFF (255) and we also
782 * would need to adjust the checksum if we were
783 * trying to minimize EEPROM traffic
784 * Note that we add to actual current count in EEPROM,
785 * in case it was altered while we were running.
786 */
787 new_val += ifp->if_errcntp[idx];
788 if (new_val > 0xFF)
789 new_val = 0xFF;
790 if (ifp->if_errcntp[idx] != new_val) {
791 ifp->if_errcntp[idx] = new_val;
792 hi_water = offsetof(struct ipath_flash,
793 if_errcntp) + idx;
794 }
795 /*
796 * update our shadow (used to minimize EEPROM
797 * traffic), to match what we are about to write.
798 */
799 dd->ipath_eep_st_errs[idx] = new_val;
800 dd->ipath_eep_st_new_errs[idx] = 0;
801 }
802 }
803 /*
804 * now update active-time. We would like to round to the nearest hour
805 * but unless atomic_t are sure to be proper signed ints we cannot,
806 * because we need to account for what we "transfer" to EEPROM and
807 * if we log an hour at 31 minutes, then we would need to set
808 * active_time to -29 to accurately count the _next_ hour.
809 */
810 if (new_time > 3600) {
811 new_hrs = new_time / 3600;
812 atomic_sub((new_hrs * 3600), &dd->ipath_active_time);
813 new_hrs += dd->ipath_eep_hrs;
814 if (new_hrs > 0xFFFF)
815 new_hrs = 0xFFFF;
816 dd->ipath_eep_hrs = new_hrs;
817 if ((new_hrs & 0xFF) != ifp->if_powerhour[0]) {
818 ifp->if_powerhour[0] = new_hrs & 0xFF;
819 hi_water = offsetof(struct ipath_flash, if_powerhour);
820 }
821 if ((new_hrs >> 8) != ifp->if_powerhour[1]) {
822 ifp->if_powerhour[1] = new_hrs >> 8;
823 hi_water = offsetof(struct ipath_flash, if_powerhour)
824 + 1;
825 }
826 }
827 /*
828 * There is a tiny possibility that we could somehow fail to write
829 * the EEPROM after updating our shadows, but problems from holding
830 * the spinlock too long are a much bigger issue.
831 */
832 spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
833 if (hi_water) {
834 /* we made some change to the data, uopdate cksum and write */
835 csum = flash_csum(ifp, 1);
836 ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1);
837 }
838 up(&dd->ipath_eep_sem);
839 if (ret)
840 ipath_dev_err(dd, "Failed updating EEPROM\n");
841
842free_bail:
843 vfree(buf);
844bail:
845 return ret;
846
847}
848
849/**
850 * ipath_inc_eeprom_err - increment one of the four error counters
851 * that are logged to EEPROM.
852 * @dd: the infinipath device
853 * @eidx: 0..3, the counter to increment
854 * @incr: how much to add
855 *
856 * Each counter is 8-bits, and saturates at 255 (0xFF). They
857 * are copied to the EEPROM (aka flash) whenever ipath_update_eeprom_log()
858 * is called, but it can only be called in a context that allows sleep.
859 * This function can be called even at interrupt level.
860 */
861
862void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr)
863{
864 uint new_val;
865 unsigned long flags;
866
867 spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
868 new_val = dd->ipath_eep_st_new_errs[eidx] + incr;
869 if (new_val > 255)
870 new_val = 255;
871 dd->ipath_eep_st_new_errs[eidx] = new_val;
872 spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
873 return;
874}
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 1272aaf2a785..33ab0d6b80ff 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -396,7 +396,8 @@ static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp,
396 "TID %u, vaddr %lx, physaddr %llx pgp %p\n", 396 "TID %u, vaddr %lx, physaddr %llx pgp %p\n",
397 tid, vaddr, (unsigned long long) physaddr, 397 tid, vaddr, (unsigned long long) physaddr,
398 pagep[i]); 398 pagep[i]);
399 dd->ipath_f_put_tid(dd, &tidbase[tid], 1, physaddr); 399 dd->ipath_f_put_tid(dd, &tidbase[tid], RCVHQ_RCV_TYPE_EXPECTED,
400 physaddr);
400 /* 401 /*
401 * don't check this tid in ipath_portshadow, since we 402 * don't check this tid in ipath_portshadow, since we
402 * just filled it in; start with the next one. 403 * just filled it in; start with the next one.
@@ -422,7 +423,8 @@ static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp,
422 if (dd->ipath_pageshadow[porttid + tid]) { 423 if (dd->ipath_pageshadow[porttid + tid]) {
423 ipath_cdbg(VERBOSE, "Freeing TID %u\n", 424 ipath_cdbg(VERBOSE, "Freeing TID %u\n",
424 tid); 425 tid);
425 dd->ipath_f_put_tid(dd, &tidbase[tid], 1, 426 dd->ipath_f_put_tid(dd, &tidbase[tid],
427 RCVHQ_RCV_TYPE_EXPECTED,
426 dd->ipath_tidinvalid); 428 dd->ipath_tidinvalid);
427 pci_unmap_page(dd->pcidev, 429 pci_unmap_page(dd->pcidev,
428 dd->ipath_physshadow[porttid + tid], 430 dd->ipath_physshadow[porttid + tid],
@@ -538,7 +540,8 @@ static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
538 if (dd->ipath_pageshadow[porttid + tid]) { 540 if (dd->ipath_pageshadow[porttid + tid]) {
539 ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n", 541 ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n",
540 pd->port_pid, tid); 542 pd->port_pid, tid);
541 dd->ipath_f_put_tid(dd, &tidbase[tid], 1, 543 dd->ipath_f_put_tid(dd, &tidbase[tid],
544 RCVHQ_RCV_TYPE_EXPECTED,
542 dd->ipath_tidinvalid); 545 dd->ipath_tidinvalid);
543 pci_unmap_page(dd->pcidev, 546 pci_unmap_page(dd->pcidev,
544 dd->ipath_physshadow[porttid + tid], 547 dd->ipath_physshadow[porttid + tid],
@@ -921,7 +924,8 @@ static int ipath_create_user_egr(struct ipath_portdata *pd)
921 (u64 __iomem *) 924 (u64 __iomem *)
922 ((char __iomem *) 925 ((char __iomem *)
923 dd->ipath_kregbase + 926 dd->ipath_kregbase +
924 dd->ipath_rcvegrbase), 0, pa); 927 dd->ipath_rcvegrbase),
928 RCVHQ_RCV_TYPE_EAGER, pa);
925 pa += egrsize; 929 pa += egrsize;
926 } 930 }
927 cond_resched(); /* don't hog the cpu */ 931 cond_resched(); /* don't hog the cpu */
@@ -1337,68 +1341,133 @@ bail:
1337 return ret; 1341 return ret;
1338} 1342}
1339 1343
1340static unsigned int ipath_poll(struct file *fp, 1344static unsigned int ipath_poll_urgent(struct ipath_portdata *pd,
1341 struct poll_table_struct *pt) 1345 struct file *fp,
1346 struct poll_table_struct *pt)
1342{ 1347{
1343 struct ipath_portdata *pd;
1344 u32 head, tail;
1345 int bit;
1346 unsigned pollflag = 0; 1348 unsigned pollflag = 0;
1347 struct ipath_devdata *dd; 1349 struct ipath_devdata *dd;
1348 1350
1349 pd = port_fp(fp);
1350 if (!pd)
1351 goto bail;
1352 dd = pd->port_dd; 1351 dd = pd->port_dd;
1353 1352
1354 bit = pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT; 1353 if (test_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag)) {
1355 set_bit(bit, &dd->ipath_rcvctrl); 1354 pollflag |= POLLERR;
1355 clear_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag);
1356 }
1356 1357
1357 /* 1358 if (test_bit(IPATH_PORT_WAITING_URG, &pd->int_flag)) {
1358 * Before blocking, make sure that head is still == tail, 1359 pollflag |= POLLIN | POLLRDNORM;
1359 * reading from the chip, so we can be sure the interrupt 1360 clear_bit(IPATH_PORT_WAITING_URG, &pd->int_flag);
1360 * enable has made it to the chip. If not equal, disable 1361 }
1361 * interrupt again and return immediately. This avoids races,
1362 * and the overhead of the chip read doesn't matter much at
1363 * this point, since we are waiting for something anyway.
1364 */
1365 1362
1366 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 1363 if (!pollflag) {
1367 dd->ipath_rcvctrl); 1364 set_bit(IPATH_PORT_WAITING_URG, &pd->port_flag);
1365 if (pd->poll_type & IPATH_POLL_TYPE_OVERFLOW)
1366 set_bit(IPATH_PORT_WAITING_OVERFLOW,
1367 &pd->port_flag);
1368
1369 poll_wait(fp, &pd->port_wait, pt);
1370 }
1371
1372 return pollflag;
1373}
1374
1375static unsigned int ipath_poll_next(struct ipath_portdata *pd,
1376 struct file *fp,
1377 struct poll_table_struct *pt)
1378{
1379 u32 head, tail;
1380 unsigned pollflag = 0;
1381 struct ipath_devdata *dd;
1382
1383 dd = pd->port_dd;
1368 1384
1369 head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port); 1385 head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port);
1370 tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port); 1386 tail = *(volatile u64 *)pd->port_rcvhdrtail_kvaddr;
1371 1387
1372 if (tail == head) { 1388 if (test_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag)) {
1389 pollflag |= POLLERR;
1390 clear_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag);
1391 }
1392
1393 if (tail != head ||
1394 test_bit(IPATH_PORT_WAITING_RCV, &pd->int_flag)) {
1395 pollflag |= POLLIN | POLLRDNORM;
1396 clear_bit(IPATH_PORT_WAITING_RCV, &pd->int_flag);
1397 }
1398
1399 if (!pollflag) {
1373 set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag); 1400 set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
1401 if (pd->poll_type & IPATH_POLL_TYPE_OVERFLOW)
1402 set_bit(IPATH_PORT_WAITING_OVERFLOW,
1403 &pd->port_flag);
1404
1405 set_bit(pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT,
1406 &dd->ipath_rcvctrl);
1407
1408 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
1409 dd->ipath_rcvctrl);
1410
1374 if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */ 1411 if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
1375 (void)ipath_write_ureg(dd, ur_rcvhdrhead, 1412 ipath_write_ureg(dd, ur_rcvhdrhead,
1376 dd->ipath_rhdrhead_intr_off 1413 dd->ipath_rhdrhead_intr_off | head,
1377 | head, pd->port_port); 1414 pd->port_port);
1378 poll_wait(fp, &pd->port_wait, pt);
1379 1415
1380 if (test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) { 1416 poll_wait(fp, &pd->port_wait, pt);
1381 /* timed out, no packets received */
1382 clear_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
1383 pd->port_rcvwait_to++;
1384 }
1385 else
1386 pollflag = POLLIN | POLLRDNORM;
1387 }
1388 else {
1389 /* it's already happened; don't do wait_event overhead */
1390 pollflag = POLLIN | POLLRDNORM;
1391 pd->port_rcvnowait++;
1392 } 1417 }
1393 1418
1394 clear_bit(bit, &dd->ipath_rcvctrl); 1419 return pollflag;
1395 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 1420}
1396 dd->ipath_rcvctrl); 1421
1422static unsigned int ipath_poll(struct file *fp,
1423 struct poll_table_struct *pt)
1424{
1425 struct ipath_portdata *pd;
1426 unsigned pollflag;
1427
1428 pd = port_fp(fp);
1429 if (!pd)
1430 pollflag = 0;
1431 else if (pd->poll_type & IPATH_POLL_TYPE_URGENT)
1432 pollflag = ipath_poll_urgent(pd, fp, pt);
1433 else
1434 pollflag = ipath_poll_next(pd, fp, pt);
1397 1435
1398bail:
1399 return pollflag; 1436 return pollflag;
1400} 1437}
1401 1438
1439static int ipath_supports_subports(int user_swmajor, int user_swminor)
1440{
1441 /* no subport implementation prior to software version 1.3 */
1442 return (user_swmajor > 1) || (user_swminor >= 3);
1443}
1444
1445static int ipath_compatible_subports(int user_swmajor, int user_swminor)
1446{
1447 /* this code is written long-hand for clarity */
1448 if (IPATH_USER_SWMAJOR != user_swmajor) {
1449 /* no promise of compatibility if major mismatch */
1450 return 0;
1451 }
1452 if (IPATH_USER_SWMAJOR == 1) {
1453 switch (IPATH_USER_SWMINOR) {
1454 case 0:
1455 case 1:
1456 case 2:
1457 /* no subport implementation so cannot be compatible */
1458 return 0;
1459 case 3:
1460 /* 3 is only compatible with itself */
1461 return user_swminor == 3;
1462 default:
1463 /* >= 4 are compatible (or are expected to be) */
1464 return user_swminor >= 4;
1465 }
1466 }
1467 /* make no promises yet for future major versions */
1468 return 0;
1469}
1470
1402static int init_subports(struct ipath_devdata *dd, 1471static int init_subports(struct ipath_devdata *dd,
1403 struct ipath_portdata *pd, 1472 struct ipath_portdata *pd,
1404 const struct ipath_user_info *uinfo) 1473 const struct ipath_user_info *uinfo)
@@ -1408,20 +1477,32 @@ static int init_subports(struct ipath_devdata *dd,
1408 size_t size; 1477 size_t size;
1409 1478
1410 /* 1479 /*
1411 * If the user is requesting zero or one port, 1480 * If the user is requesting zero subports,
1412 * skip the subport allocation. 1481 * skip the subport allocation.
1413 */ 1482 */
1414 if (uinfo->spu_subport_cnt <= 1) 1483 if (uinfo->spu_subport_cnt <= 0)
1484 goto bail;
1485
1486 /* Self-consistency check for ipath_compatible_subports() */
1487 if (ipath_supports_subports(IPATH_USER_SWMAJOR, IPATH_USER_SWMINOR) &&
1488 !ipath_compatible_subports(IPATH_USER_SWMAJOR,
1489 IPATH_USER_SWMINOR)) {
1490 dev_info(&dd->pcidev->dev,
1491 "Inconsistent ipath_compatible_subports()\n");
1415 goto bail; 1492 goto bail;
1493 }
1416 1494
1417 /* Old user binaries don't know about new subport implementation */ 1495 /* Check for subport compatibility */
1418 if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) { 1496 if (!ipath_compatible_subports(uinfo->spu_userversion >> 16,
1497 uinfo->spu_userversion & 0xffff)) {
1419 dev_info(&dd->pcidev->dev, 1498 dev_info(&dd->pcidev->dev,
1420 "Mismatched user minor version (%d) and driver " 1499 "Mismatched user version (%d.%d) and driver "
1421 "minor version (%d) while port sharing. Ensure " 1500 "version (%d.%d) while port sharing. Ensure "
1422 "that driver and library are from the same " 1501 "that driver and library are from the same "
1423 "release.\n", 1502 "release.\n",
1503 (int) (uinfo->spu_userversion >> 16),
1424 (int) (uinfo->spu_userversion & 0xffff), 1504 (int) (uinfo->spu_userversion & 0xffff),
1505 IPATH_USER_SWMAJOR,
1425 IPATH_USER_SWMINOR); 1506 IPATH_USER_SWMINOR);
1426 goto bail; 1507 goto bail;
1427 } 1508 }
@@ -1725,14 +1806,13 @@ static int ipath_open(struct inode *in, struct file *fp)
1725 return fp->private_data ? 0 : -ENOMEM; 1806 return fp->private_data ? 0 : -ENOMEM;
1726} 1807}
1727 1808
1728
1729/* Get port early, so can set affinity prior to memory allocation */ 1809/* Get port early, so can set affinity prior to memory allocation */
1730static int ipath_assign_port(struct file *fp, 1810static int ipath_assign_port(struct file *fp,
1731 const struct ipath_user_info *uinfo) 1811 const struct ipath_user_info *uinfo)
1732{ 1812{
1733 int ret; 1813 int ret;
1734 int i_minor; 1814 int i_minor;
1735 unsigned swminor; 1815 unsigned swmajor, swminor;
1736 1816
1737 /* Check to be sure we haven't already initialized this file */ 1817 /* Check to be sure we haven't already initialized this file */
1738 if (port_fp(fp)) { 1818 if (port_fp(fp)) {
@@ -1741,7 +1821,8 @@ static int ipath_assign_port(struct file *fp,
1741 } 1821 }
1742 1822
1743 /* for now, if major version is different, bail */ 1823 /* for now, if major version is different, bail */
1744 if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) { 1824 swmajor = uinfo->spu_userversion >> 16;
1825 if (swmajor != IPATH_USER_SWMAJOR) {
1745 ipath_dbg("User major version %d not same as driver " 1826 ipath_dbg("User major version %d not same as driver "
1746 "major %d\n", uinfo->spu_userversion >> 16, 1827 "major %d\n", uinfo->spu_userversion >> 16,
1747 IPATH_USER_SWMAJOR); 1828 IPATH_USER_SWMAJOR);
@@ -1756,7 +1837,8 @@ static int ipath_assign_port(struct file *fp,
1756 1837
1757 mutex_lock(&ipath_mutex); 1838 mutex_lock(&ipath_mutex);
1758 1839
1759 if (swminor == IPATH_USER_SWMINOR && uinfo->spu_subport_cnt && 1840 if (ipath_compatible_subports(swmajor, swminor) &&
1841 uinfo->spu_subport_cnt &&
1760 (ret = find_shared_port(fp, uinfo))) { 1842 (ret = find_shared_port(fp, uinfo))) {
1761 mutex_unlock(&ipath_mutex); 1843 mutex_unlock(&ipath_mutex);
1762 if (ret > 0) 1844 if (ret > 0)
@@ -2020,7 +2102,8 @@ static int ipath_port_info(struct ipath_portdata *pd, u16 subport,
2020 info.port = pd->port_port; 2102 info.port = pd->port_port;
2021 info.subport = subport; 2103 info.subport = subport;
2022 /* Don't return new fields if old library opened the port. */ 2104 /* Don't return new fields if old library opened the port. */
2023 if ((pd->userversion & 0xffff) == IPATH_USER_SWMINOR) { 2105 if (ipath_supports_subports(pd->userversion >> 16,
2106 pd->userversion & 0xffff)) {
2024 /* Number of user ports available for this device. */ 2107 /* Number of user ports available for this device. */
2025 info.num_ports = pd->port_dd->ipath_cfgports - 1; 2108 info.num_ports = pd->port_dd->ipath_cfgports - 1;
2026 info.num_subports = pd->port_subport_cnt; 2109 info.num_subports = pd->port_subport_cnt;
@@ -2123,6 +2206,11 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
2123 src = NULL; 2206 src = NULL;
2124 dest = NULL; 2207 dest = NULL;
2125 break; 2208 break;
2209 case IPATH_CMD_POLL_TYPE:
2210 copy = sizeof(cmd.cmd.poll_type);
2211 dest = &cmd.cmd.poll_type;
2212 src = &ucmd->cmd.poll_type;
2213 break;
2126 default: 2214 default:
2127 ret = -EINVAL; 2215 ret = -EINVAL;
2128 goto bail; 2216 goto bail;
@@ -2195,6 +2283,9 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
2195 case IPATH_CMD_PIOAVAILUPD: 2283 case IPATH_CMD_PIOAVAILUPD:
2196 ret = ipath_force_pio_avail_update(pd->port_dd); 2284 ret = ipath_force_pio_avail_update(pd->port_dd);
2197 break; 2285 break;
2286 case IPATH_CMD_POLL_TYPE:
2287 pd->poll_type = cmd.cmd.poll_type;
2288 break;
2198 } 2289 }
2199 2290
2200 if (ret >= 0) 2291 if (ret >= 0)
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index ebd5c7bd2cdb..2e689b974e1f 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -257,9 +257,14 @@ static ssize_t atomic_port_info_read(struct file *file, char __user *buf,
257 /* Notimpl InitType (actually, an SMA decision) */ 257 /* Notimpl InitType (actually, an SMA decision) */
258 /* VLHighLimit is 0 (only one VL) */ 258 /* VLHighLimit is 0 (only one VL) */
259 ; /* VLArbitrationHighCap is 0 (only one VL) */ 259 ; /* VLArbitrationHighCap is 0 (only one VL) */
260 /*
261 * Note: the chips support a maximum MTU of 4096, but the driver
262 * hasn't implemented this feature yet, so set the maximum
263 * to 2048.
264 */
260 portinfo[10] = /* VLArbitrationLowCap is 0 (only one VL) */ 265 portinfo[10] = /* VLArbitrationLowCap is 0 (only one VL) */
261 /* InitTypeReply is SMA decision */ 266 /* InitTypeReply is SMA decision */
262 (5 << 16) /* MTUCap 4096 */ 267 (4 << 16) /* MTUCap 2048 */
263 | (7 << 13) /* VLStallCount */ 268 | (7 << 13) /* VLStallCount */
264 | (0x1f << 8) /* HOQLife */ 269 | (0x1f << 8) /* HOQLife */
265 | (1 << 4) 270 | (1 << 4)
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 4171198fc202..650745d83fac 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -36,6 +36,7 @@
36 * HT chip. 36 * HT chip.
37 */ 37 */
38 38
39#include <linux/vmalloc.h>
39#include <linux/pci.h> 40#include <linux/pci.h>
40#include <linux/delay.h> 41#include <linux/delay.h>
41#include <linux/htirq.h> 42#include <linux/htirq.h>
@@ -439,6 +440,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
439 u32 bits, ctrl; 440 u32 bits, ctrl;
440 int isfatal = 0; 441 int isfatal = 0;
441 char bitsmsg[64]; 442 char bitsmsg[64];
443 int log_idx;
442 444
443 hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus); 445 hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
444 446
@@ -467,6 +469,11 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
467 469
468 hwerrs &= dd->ipath_hwerrmask; 470 hwerrs &= dd->ipath_hwerrmask;
469 471
472 /* We log some errors to EEPROM, check if we have any of those. */
473 for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
474 if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
475 ipath_inc_eeprom_err(dd, log_idx, 1);
476
470 /* 477 /*
471 * make sure we get this much out, unless told to be quiet, 478 * make sure we get this much out, unless told to be quiet,
472 * it's a parity error we may recover from, 479 * it's a parity error we may recover from,
@@ -502,9 +509,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
502 if (!hwerrs) { 509 if (!hwerrs) {
503 ipath_dbg("Clearing freezemode on ignored or " 510 ipath_dbg("Clearing freezemode on ignored or "
504 "recovered hardware error\n"); 511 "recovered hardware error\n");
505 ctrl &= ~INFINIPATH_C_FREEZEMODE; 512 ipath_clear_freeze(dd);
506 ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
507 ctrl);
508 } 513 }
509 } 514 }
510 515
@@ -672,10 +677,16 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
672 if (n) 677 if (n)
673 snprintf(name, namelen, "%s", n); 678 snprintf(name, namelen, "%s", n);
674 679
680 if (dd->ipath_boardrev != 6 && dd->ipath_boardrev != 7 &&
681 dd->ipath_boardrev != 11) {
682 ipath_dev_err(dd, "Unsupported InfiniPath board %s!\n", name);
683 ret = 1;
684 goto bail;
685 }
675 if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || 686 if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 ||
676 dd->ipath_minrev > 3)) { 687 dd->ipath_minrev > 4)) {
677 /* 688 /*
678 * This version of the driver only supports Rev 3.2 and 3.3 689 * This version of the driver only supports Rev 3.2 - 3.4
679 */ 690 */
680 ipath_dev_err(dd, 691 ipath_dev_err(dd,
681 "Unsupported InfiniPath hardware revision %u.%u!\n", 692 "Unsupported InfiniPath hardware revision %u.%u!\n",
@@ -689,36 +700,11 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
689 * copies 700 * copies
690 */ 701 */
691 dd->ipath_flags |= IPATH_32BITCOUNTERS; 702 dd->ipath_flags |= IPATH_32BITCOUNTERS;
703 dd->ipath_flags |= IPATH_GPIO_INTR;
692 if (dd->ipath_htspeed != 800) 704 if (dd->ipath_htspeed != 800)
693 ipath_dev_err(dd, 705 ipath_dev_err(dd,
694 "Incorrectly configured for HT @ %uMHz\n", 706 "Incorrectly configured for HT @ %uMHz\n",
695 dd->ipath_htspeed); 707 dd->ipath_htspeed);
696 if (dd->ipath_boardrev == 7 || dd->ipath_boardrev == 11 ||
697 dd->ipath_boardrev == 6)
698 dd->ipath_flags |= IPATH_GPIO_INTR;
699 else
700 dd->ipath_flags |= IPATH_POLL_RX_INTR;
701 if (dd->ipath_boardrev == 8) { /* LS/X-1 */
702 u64 val;
703 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
704 if (val & INFINIPATH_EXTS_SERDESSEL) {
705 /*
706 * hardware disabled
707 *
708 * This means that the chip is hardware disabled,
709 * and will not be able to bring up the link,
710 * in any case. We special case this and abort
711 * early, to avoid later messages. We also set
712 * the DISABLED status bit
713 */
714 ipath_dbg("Unit %u is hardware-disabled\n",
715 dd->ipath_unit);
716 *dd->ipath_statusp |= IPATH_STATUS_DISABLED;
717 /* this value is handled differently */
718 ret = 2;
719 goto bail;
720 }
721 }
722 ret = 0; 708 ret = 0;
723 709
724bail: 710bail:
@@ -1058,12 +1044,24 @@ static void ipath_setup_ht_setextled(struct ipath_devdata *dd,
1058 u64 lst, u64 ltst) 1044 u64 lst, u64 ltst)
1059{ 1045{
1060 u64 extctl; 1046 u64 extctl;
1047 unsigned long flags = 0;
1061 1048
1062 /* the diags use the LED to indicate diag info, so we leave 1049 /* the diags use the LED to indicate diag info, so we leave
1063 * the external LED alone when the diags are running */ 1050 * the external LED alone when the diags are running */
1064 if (ipath_diag_inuse) 1051 if (ipath_diag_inuse)
1065 return; 1052 return;
1066 1053
1054 /* Allow override of LED display for, e.g. Locating system in rack */
1055 if (dd->ipath_led_override) {
1056 ltst = (dd->ipath_led_override & IPATH_LED_PHYS)
1057 ? INFINIPATH_IBCS_LT_STATE_LINKUP
1058 : INFINIPATH_IBCS_LT_STATE_DISABLED;
1059 lst = (dd->ipath_led_override & IPATH_LED_LOG)
1060 ? INFINIPATH_IBCS_L_STATE_ACTIVE
1061 : INFINIPATH_IBCS_L_STATE_DOWN;
1062 }
1063
1064 spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
1067 /* 1065 /*
1068 * start by setting both LED control bits to off, then turn 1066 * start by setting both LED control bits to off, then turn
1069 * on the appropriate bit(s). 1067 * on the appropriate bit(s).
@@ -1092,6 +1090,7 @@ static void ipath_setup_ht_setextled(struct ipath_devdata *dd,
1092 } 1090 }
1093 dd->ipath_extctrl = extctl; 1091 dd->ipath_extctrl = extctl;
1094 ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl); 1092 ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
1093 spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
1095} 1094}
1096 1095
1097static void ipath_init_ht_variables(struct ipath_devdata *dd) 1096static void ipath_init_ht_variables(struct ipath_devdata *dd)
@@ -1157,6 +1156,22 @@ static void ipath_init_ht_variables(struct ipath_devdata *dd)
1157 1156
1158 dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; 1157 dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
1159 dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; 1158 dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
1159
1160 /*
1161 * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
1162 * 2 is Some Misc, 3 is reserved for future.
1163 */
1164 dd->ipath_eep_st_masks[0].hwerrs_to_log =
1165 INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
1166 INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;
1167
1168 dd->ipath_eep_st_masks[1].hwerrs_to_log =
1169 INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
1170 INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
1171
1172 dd->ipath_eep_st_masks[2].errs_to_log =
1173 INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET;
1174
1160} 1175}
1161 1176
1162/** 1177/**
@@ -1372,7 +1387,7 @@ static void ipath_ht_quiet_serdes(struct ipath_devdata *dd)
1372 * ipath_pe_put_tid - write a TID in chip 1387 * ipath_pe_put_tid - write a TID in chip
1373 * @dd: the infinipath device 1388 * @dd: the infinipath device
1374 * @tidptr: pointer to the expected TID (in chip) to udpate 1389 * @tidptr: pointer to the expected TID (in chip) to udpate
1375 * @tidtype: 0 for eager, 1 for expected 1390 * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected
1376 * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing 1391 * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
1377 * 1392 *
1378 * This exists as a separate routine to allow for special locking etc. 1393 * This exists as a separate routine to allow for special locking etc.
@@ -1393,7 +1408,7 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd,
1393 "40 bits, using only 40!!!\n", pa); 1408 "40 bits, using only 40!!!\n", pa);
1394 pa &= INFINIPATH_RT_ADDR_MASK; 1409 pa &= INFINIPATH_RT_ADDR_MASK;
1395 } 1410 }
1396 if (type == 0) 1411 if (type == RCVHQ_RCV_TYPE_EAGER)
1397 pa |= dd->ipath_tidtemplate; 1412 pa |= dd->ipath_tidtemplate;
1398 else { 1413 else {
1399 /* in words (fixed, full page). */ 1414 /* in words (fixed, full page). */
@@ -1433,7 +1448,8 @@ static void ipath_ht_clear_tids(struct ipath_devdata *dd, unsigned port)
1433 port * dd->ipath_rcvtidcnt * 1448 port * dd->ipath_rcvtidcnt *
1434 sizeof(*tidbase)); 1449 sizeof(*tidbase));
1435 for (i = 0; i < dd->ipath_rcvtidcnt; i++) 1450 for (i = 0; i < dd->ipath_rcvtidcnt; i++)
1436 ipath_ht_put_tid(dd, &tidbase[i], 1, dd->ipath_tidinvalid); 1451 ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED,
1452 dd->ipath_tidinvalid);
1437 1453
1438 tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) + 1454 tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
1439 dd->ipath_rcvegrbase + 1455 dd->ipath_rcvegrbase +
@@ -1441,7 +1457,8 @@ static void ipath_ht_clear_tids(struct ipath_devdata *dd, unsigned port)
1441 sizeof(*tidbase)); 1457 sizeof(*tidbase));
1442 1458
1443 for (i = 0; i < dd->ipath_rcvegrcnt; i++) 1459 for (i = 0; i < dd->ipath_rcvegrcnt; i++)
1444 ipath_ht_put_tid(dd, &tidbase[i], 0, dd->ipath_tidinvalid); 1460 ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER,
1461 dd->ipath_tidinvalid);
1445} 1462}
1446 1463
1447/** 1464/**
@@ -1528,11 +1545,6 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
1528 writel(16, piobuf); 1545 writel(16, piobuf);
1529 piobuf += pioincr; 1546 piobuf += pioincr;
1530 } 1547 }
1531 /*
1532 * self-clearing
1533 */
1534 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1535 INFINIPATH_S_ABORT);
1536 1548
1537 ipath_get_eeprom_info(dd); 1549 ipath_get_eeprom_info(dd);
1538 if (dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' && 1550 if (dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
@@ -1543,8 +1555,10 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
1543 * with 128, rather than 112. 1555 * with 128, rather than 112.
1544 */ 1556 */
1545 dd->ipath_flags |= IPATH_GPIO_INTR; 1557 dd->ipath_flags |= IPATH_GPIO_INTR;
1546 dd->ipath_flags &= ~IPATH_POLL_RX_INTR; 1558 } else
1547 } 1559 ipath_dev_err(dd, "Unsupported InfiniPath serial "
1560 "number %.16s!\n", dd->ipath_serial);
1561
1548 return 0; 1562 return 0;
1549} 1563}
1550 1564
@@ -1561,7 +1575,6 @@ static int ipath_ht_txe_recover(struct ipath_devdata *dd)
1561 } 1575 }
1562 dev_info(&dd->pcidev->dev, 1576 dev_info(&dd->pcidev->dev,
1563 "Recovering from TXE PIO parity error\n"); 1577 "Recovering from TXE PIO parity error\n");
1564 ipath_disarm_senderrbufs(dd, 1);
1565 return 1; 1578 return 1;
1566} 1579}
1567 1580
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index 4e2e3dfeb2c8..9868ccda5f26 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -296,13 +296,6 @@ static const struct ipath_cregs ipath_pe_cregs = {
296#define IPATH_GPIO_SCL (1ULL << \ 296#define IPATH_GPIO_SCL (1ULL << \
297 (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT)) 297 (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
298 298
299/*
300 * Rev2 silicon allows suppressing check for ArmLaunch errors.
301 * this can speed up short packet sends on systems that do
302 * not guaranteee write-order.
303 */
304#define INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR (1ULL<<63)
305
306/* 6120 specific hardware errors... */ 299/* 6120 specific hardware errors... */
307static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = { 300static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = {
308 INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"), 301 INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"),
@@ -347,6 +340,7 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
347 u32 bits, ctrl; 340 u32 bits, ctrl;
348 int isfatal = 0; 341 int isfatal = 0;
349 char bitsmsg[64]; 342 char bitsmsg[64];
343 int log_idx;
350 344
351 hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus); 345 hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
352 if (!hwerrs) { 346 if (!hwerrs) {
@@ -374,6 +368,11 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
374 368
375 hwerrs &= dd->ipath_hwerrmask; 369 hwerrs &= dd->ipath_hwerrmask;
376 370
371 /* We log some errors to EEPROM, check if we have any of those. */
372 for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
373 if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
374 ipath_inc_eeprom_err(dd, log_idx, 1);
375
377 /* 376 /*
378 * make sure we get this much out, unless told to be quiet, 377 * make sure we get this much out, unless told to be quiet,
379 * or it's occurred within the last 5 seconds 378 * or it's occurred within the last 5 seconds
@@ -431,10 +430,12 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
431 *dd->ipath_statusp |= IPATH_STATUS_HWERROR; 430 *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
432 dd->ipath_flags &= ~IPATH_INITTED; 431 dd->ipath_flags &= ~IPATH_INITTED;
433 } else { 432 } else {
434 ipath_dbg("Clearing freezemode on ignored hardware " 433 static u32 freeze_cnt;
435 "error\n"); 434
436 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 435 freeze_cnt++;
437 dd->ipath_control); 436 ipath_dbg("Clearing freezemode on ignored or recovered "
437 "hardware error (%u)\n", freeze_cnt);
438 ipath_clear_freeze(dd);
438 } 439 }
439 } 440 }
440 441
@@ -680,17 +681,6 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
680 val |= dd->ipath_rx_pol_inv << 681 val |= dd->ipath_rx_pol_inv <<
681 INFINIPATH_XGXS_RX_POL_SHIFT; 682 INFINIPATH_XGXS_RX_POL_SHIFT;
682 } 683 }
683 if (dd->ipath_minrev >= 2) {
684 /* Rev 2. can tolerate multiple writes to PBC, and
685 * allowing them can provide lower latency on some
686 * CPUs, but this feature is off by default, only
687 * turned on by setting D63 of XGXSconfig reg.
688 * May want to make this conditional more
689 * fine-grained in future. This is not exactly
690 * related to XGXS, but where the bit ended up.
691 */
692 val |= INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR;
693 }
694 if (val != prev_val) 684 if (val != prev_val)
695 ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); 685 ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
696 686
@@ -791,12 +781,24 @@ static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst,
791 u64 ltst) 781 u64 ltst)
792{ 782{
793 u64 extctl; 783 u64 extctl;
784 unsigned long flags = 0;
794 785
795 /* the diags use the LED to indicate diag info, so we leave 786 /* the diags use the LED to indicate diag info, so we leave
796 * the external LED alone when the diags are running */ 787 * the external LED alone when the diags are running */
797 if (ipath_diag_inuse) 788 if (ipath_diag_inuse)
798 return; 789 return;
799 790
791 /* Allow override of LED display for, e.g. Locating system in rack */
792 if (dd->ipath_led_override) {
793 ltst = (dd->ipath_led_override & IPATH_LED_PHYS)
794 ? INFINIPATH_IBCS_LT_STATE_LINKUP
795 : INFINIPATH_IBCS_LT_STATE_DISABLED;
796 lst = (dd->ipath_led_override & IPATH_LED_LOG)
797 ? INFINIPATH_IBCS_L_STATE_ACTIVE
798 : INFINIPATH_IBCS_L_STATE_DOWN;
799 }
800
801 spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
800 extctl = dd->ipath_extctrl & ~(INFINIPATH_EXTC_LED1PRIPORT_ON | 802 extctl = dd->ipath_extctrl & ~(INFINIPATH_EXTC_LED1PRIPORT_ON |
801 INFINIPATH_EXTC_LED2PRIPORT_ON); 803 INFINIPATH_EXTC_LED2PRIPORT_ON);
802 804
@@ -806,6 +808,7 @@ static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst,
806 extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON; 808 extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON;
807 dd->ipath_extctrl = extctl; 809 dd->ipath_extctrl = extctl;
808 ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl); 810 ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
811 spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
809} 812}
810 813
811/** 814/**
@@ -955,6 +958,27 @@ static void ipath_init_pe_variables(struct ipath_devdata *dd)
955 958
956 dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; 959 dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
957 dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; 960 dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
961
962 /*
963 * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
964 * 2 is Some Misc, 3 is reserved for future.
965 */
966 dd->ipath_eep_st_masks[0].hwerrs_to_log =
967 INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
968 INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;
969
970 /* Ignore errors in PIO/PBC on systems with unordered write-combining */
971 if (ipath_unordered_wc())
972 dd->ipath_eep_st_masks[0].hwerrs_to_log &= ~TXE_PIO_PARITY;
973
974 dd->ipath_eep_st_masks[1].hwerrs_to_log =
975 INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
976 INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
977
978 dd->ipath_eep_st_masks[2].errs_to_log =
979 INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET;
980
981
958} 982}
959 983
960/* setup the MSI stuff again after a reset. I'd like to just call 984/* setup the MSI stuff again after a reset. I'd like to just call
@@ -1082,7 +1106,7 @@ bail:
1082 * ipath_pe_put_tid - write a TID in chip 1106 * ipath_pe_put_tid - write a TID in chip
1083 * @dd: the infinipath device 1107 * @dd: the infinipath device
1084 * @tidptr: pointer to the expected TID (in chip) to udpate 1108 * @tidptr: pointer to the expected TID (in chip) to udpate
1085 * @tidtype: 0 for eager, 1 for expected 1109 * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected
1086 * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing 1110 * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
1087 * 1111 *
1088 * This exists as a separate routine to allow for special locking etc. 1112 * This exists as a separate routine to allow for special locking etc.
@@ -1108,7 +1132,7 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
1108 "BUG: Physical page address 0x%lx " 1132 "BUG: Physical page address 0x%lx "
1109 "has bits set in 31-29\n", pa); 1133 "has bits set in 31-29\n", pa);
1110 1134
1111 if (type == 0) 1135 if (type == RCVHQ_RCV_TYPE_EAGER)
1112 pa |= dd->ipath_tidtemplate; 1136 pa |= dd->ipath_tidtemplate;
1113 else /* for now, always full 4KB page */ 1137 else /* for now, always full 4KB page */
1114 pa |= 2 << 29; 1138 pa |= 2 << 29;
@@ -1132,7 +1156,7 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
1132 * ipath_pe_put_tid_2 - write a TID in chip, Revision 2 or higher 1156 * ipath_pe_put_tid_2 - write a TID in chip, Revision 2 or higher
1133 * @dd: the infinipath device 1157 * @dd: the infinipath device
1134 * @tidptr: pointer to the expected TID (in chip) to udpate 1158 * @tidptr: pointer to the expected TID (in chip) to udpate
1135 * @tidtype: 0 for eager, 1 for expected 1159 * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected
1136 * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing 1160 * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
1137 * 1161 *
1138 * This exists as a separate routine to allow for selection of the 1162 * This exists as a separate routine to allow for selection of the
@@ -1157,7 +1181,7 @@ static void ipath_pe_put_tid_2(struct ipath_devdata *dd, u64 __iomem *tidptr,
1157 "BUG: Physical page address 0x%lx " 1181 "BUG: Physical page address 0x%lx "
1158 "has bits set in 31-29\n", pa); 1182 "has bits set in 31-29\n", pa);
1159 1183
1160 if (type == 0) 1184 if (type == RCVHQ_RCV_TYPE_EAGER)
1161 pa |= dd->ipath_tidtemplate; 1185 pa |= dd->ipath_tidtemplate;
1162 else /* for now, always full 4KB page */ 1186 else /* for now, always full 4KB page */
1163 pa |= 2 << 29; 1187 pa |= 2 << 29;
@@ -1196,7 +1220,8 @@ static void ipath_pe_clear_tids(struct ipath_devdata *dd, unsigned port)
1196 port * dd->ipath_rcvtidcnt * sizeof(*tidbase)); 1220 port * dd->ipath_rcvtidcnt * sizeof(*tidbase));
1197 1221
1198 for (i = 0; i < dd->ipath_rcvtidcnt; i++) 1222 for (i = 0; i < dd->ipath_rcvtidcnt; i++)
1199 ipath_pe_put_tid(dd, &tidbase[i], 0, tidinv); 1223 ipath_pe_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED,
1224 tidinv);
1200 1225
1201 tidbase = (u64 __iomem *) 1226 tidbase = (u64 __iomem *)
1202 ((char __iomem *)(dd->ipath_kregbase) + 1227 ((char __iomem *)(dd->ipath_kregbase) +
@@ -1204,7 +1229,8 @@ static void ipath_pe_clear_tids(struct ipath_devdata *dd, unsigned port)
1204 port * dd->ipath_rcvegrcnt * sizeof(*tidbase)); 1229 port * dd->ipath_rcvegrcnt * sizeof(*tidbase));
1205 1230
1206 for (i = 0; i < dd->ipath_rcvegrcnt; i++) 1231 for (i = 0; i < dd->ipath_rcvegrcnt; i++)
1207 ipath_pe_put_tid(dd, &tidbase[i], 1, tidinv); 1232 ipath_pe_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER,
1233 tidinv);
1208} 1234}
1209 1235
1210/** 1236/**
@@ -1311,13 +1337,6 @@ static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
1311 1337
1312 dd = pd->port_dd; 1338 dd = pd->port_dd;
1313 1339
1314 if (dd != NULL && dd->ipath_minrev >= 2) {
1315 ipath_cdbg(PROC, "IBA6120 Rev2, allow multiple PBC write\n");
1316 kinfo->spi_runtime_flags |= IPATH_RUNTIME_PBC_REWRITE;
1317 ipath_cdbg(PROC, "IBA6120 Rev2, allow loose DMA alignment\n");
1318 kinfo->spi_runtime_flags |= IPATH_RUNTIME_LOOSE_DMA_ALIGN;
1319 }
1320
1321done: 1340done:
1322 kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE; 1341 kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE;
1323 return 0; 1342 return 0;
@@ -1354,7 +1373,6 @@ static int ipath_pe_txe_recover(struct ipath_devdata *dd)
1354 dev_info(&dd->pcidev->dev, 1373 dev_info(&dd->pcidev->dev,
1355 "Recovering from TXE PIO parity error\n"); 1374 "Recovering from TXE PIO parity error\n");
1356 } 1375 }
1357 ipath_disarm_senderrbufs(dd, 1);
1358 return 1; 1376 return 1;
1359} 1377}
1360 1378
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 7045ba689494..49951d583804 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -133,7 +133,8 @@ static int create_port0_egr(struct ipath_devdata *dd)
133 dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE); 133 dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE);
134 dd->ipath_f_put_tid(dd, e + (u64 __iomem *) 134 dd->ipath_f_put_tid(dd, e + (u64 __iomem *)
135 ((char __iomem *) dd->ipath_kregbase + 135 ((char __iomem *) dd->ipath_kregbase +
136 dd->ipath_rcvegrbase), 0, 136 dd->ipath_rcvegrbase),
137 RCVHQ_RCV_TYPE_EAGER,
137 dd->ipath_port0_skbinfo[e].phys); 138 dd->ipath_port0_skbinfo[e].phys);
138 } 139 }
139 140
@@ -310,7 +311,12 @@ static int init_chip_first(struct ipath_devdata *dd,
310 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiosize); 311 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiosize);
311 dd->ipath_piosize2k = val & ~0U; 312 dd->ipath_piosize2k = val & ~0U;
312 dd->ipath_piosize4k = val >> 32; 313 dd->ipath_piosize4k = val >> 32;
313 dd->ipath_ibmtu = 4096; /* default to largest legal MTU */ 314 /*
315 * Note: the chips support a maximum MTU of 4096, but the driver
316 * hasn't implemented this feature yet, so set the initial value
317 * to 2048.
318 */
319 dd->ipath_ibmtu = 2048;
314 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufcnt); 320 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufcnt);
315 dd->ipath_piobcnt2k = val & ~0U; 321 dd->ipath_piobcnt2k = val & ~0U;
316 dd->ipath_piobcnt4k = val >> 32; 322 dd->ipath_piobcnt4k = val >> 32;
@@ -340,6 +346,10 @@ static int init_chip_first(struct ipath_devdata *dd,
340 346
341 spin_lock_init(&dd->ipath_tid_lock); 347 spin_lock_init(&dd->ipath_tid_lock);
342 348
349 spin_lock_init(&dd->ipath_gpio_lock);
350 spin_lock_init(&dd->ipath_eep_st_lock);
351 sema_init(&dd->ipath_eep_sem, 1);
352
343done: 353done:
344 *pdp = pd; 354 *pdp = pd;
345 return ret; 355 return ret;
@@ -646,7 +656,7 @@ static int init_housekeeping(struct ipath_devdata *dd,
646 ret = dd->ipath_f_get_boardname(dd, boardn, sizeof boardn); 656 ret = dd->ipath_f_get_boardname(dd, boardn, sizeof boardn);
647 657
648 snprintf(dd->ipath_boardversion, sizeof(dd->ipath_boardversion), 658 snprintf(dd->ipath_boardversion, sizeof(dd->ipath_boardversion),
649 "Driver %u.%u, %s, InfiniPath%u %u.%u, PCI %u, " 659 "ChipABI %u.%u, %s, InfiniPath%u %u.%u, PCI %u, "
650 "SW Compat %u\n", 660 "SW Compat %u\n",
651 IPATH_CHIP_VERS_MAJ, IPATH_CHIP_VERS_MIN, boardn, 661 IPATH_CHIP_VERS_MAJ, IPATH_CHIP_VERS_MIN, boardn,
652 (unsigned)(dd->ipath_revision >> INFINIPATH_R_ARCH_SHIFT) & 662 (unsigned)(dd->ipath_revision >> INFINIPATH_R_ARCH_SHIFT) &
@@ -727,7 +737,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
727 uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0; 737 uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
728 if (ipath_kpiobufs == 0) { 738 if (ipath_kpiobufs == 0) {
729 /* not set by user (this is default) */ 739 /* not set by user (this is default) */
730 if (piobufs >= (uports * IPATH_MIN_USER_PORT_BUFCNT) + 32) 740 if (piobufs > 144)
731 kpiobufs = 32; 741 kpiobufs = 32;
732 else 742 else
733 kpiobufs = 16; 743 kpiobufs = 16;
@@ -767,6 +777,12 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
767 piobufs, dd->ipath_pbufsport, uports); 777 piobufs, dd->ipath_pbufsport, uports);
768 778
769 dd->ipath_f_early_init(dd); 779 dd->ipath_f_early_init(dd);
780 /*
781 * cancel any possible active sends from early driver load.
782 * Follows early_init because some chips have to initialize
783 * PIO buffers in early_init to avoid false parity errors.
784 */
785 ipath_cancel_sends(dd);
770 786
771 /* early_init sets rcvhdrentsize and rcvhdrsize, so this must be 787 /* early_init sets rcvhdrentsize and rcvhdrsize, so this must be
772 * done after early_init */ 788 * done after early_init */
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index a90d3b5699c4..47aa43428fbf 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -93,7 +93,8 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
93 93
94 if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) { 94 if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
95 int i; 95 int i;
96 if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG)) { 96 if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) &&
97 dd->ipath_lastcancel > jiffies) {
97 __IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG, 98 __IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG,
98 "SendbufErrs %lx %lx", sbuf[0], 99 "SendbufErrs %lx %lx", sbuf[0],
99 sbuf[1]); 100 sbuf[1]);
@@ -108,7 +109,8 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
108 ipath_clrpiobuf(dd, i); 109 ipath_clrpiobuf(dd, i);
109 ipath_disarm_piobufs(dd, i, 1); 110 ipath_disarm_piobufs(dd, i, 1);
110 } 111 }
111 dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */ 112 /* ignore armlaunch errs for a bit */
113 dd->ipath_lastcancel = jiffies+3;
112 } 114 }
113} 115}
114 116
@@ -131,6 +133,17 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
131 INFINIPATH_E_INVALIDADDR) 133 INFINIPATH_E_INVALIDADDR)
132 134
133/* 135/*
136 * this is similar to E_SUM_ERRS, but can't ignore armlaunch, don't ignore
137 * errors not related to freeze and cancelling buffers. Can't ignore
138 * armlaunch because could get more while still cleaning up, and need
139 * to cancel those as they happen.
140 */
141#define E_SPKT_ERRS_IGNORE \
142 (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
143 INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SMINPKTLEN | \
144 INFINIPATH_E_SPKTLEN)
145
146/*
134 * these are errors that can occur when the link changes state while 147 * these are errors that can occur when the link changes state while
135 * a packet is being sent or received. This doesn't cover things 148 * a packet is being sent or received. This doesn't cover things
136 * like EBP or VCRC that can be the result of a sending having the 149 * like EBP or VCRC that can be the result of a sending having the
@@ -290,12 +303,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
290 * Flush all queued sends when link went to DOWN or INIT, 303 * Flush all queued sends when link went to DOWN or INIT,
291 * to be sure that they don't block SMA and other MAD packets 304 * to be sure that they don't block SMA and other MAD packets
292 */ 305 */
293 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 306 ipath_cancel_sends(dd);
294 INFINIPATH_S_ABORT);
295 ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf,
296 (unsigned)(dd->ipath_piobcnt2k +
297 dd->ipath_piobcnt4k) -
298 dd->ipath_lastport_piobuf);
299 } 307 }
300 else if (lstate == IPATH_IBSTATE_INIT || lstate == IPATH_IBSTATE_ARM || 308 else if (lstate == IPATH_IBSTATE_INIT || lstate == IPATH_IBSTATE_ARM ||
301 lstate == IPATH_IBSTATE_ACTIVE) { 309 lstate == IPATH_IBSTATE_ACTIVE) {
@@ -505,6 +513,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
505 int i, iserr = 0; 513 int i, iserr = 0;
506 int chkerrpkts = 0, noprint = 0; 514 int chkerrpkts = 0, noprint = 0;
507 unsigned supp_msgs; 515 unsigned supp_msgs;
516 int log_idx;
508 517
509 supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint); 518 supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint);
510 519
@@ -518,6 +527,13 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
518 if (errs & INFINIPATH_E_HARDWARE) { 527 if (errs & INFINIPATH_E_HARDWARE) {
519 /* reuse same msg buf */ 528 /* reuse same msg buf */
520 dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg); 529 dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
530 } else {
531 u64 mask;
532 for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) {
533 mask = dd->ipath_eep_st_masks[log_idx].errs_to_log;
534 if (errs & mask)
535 ipath_inc_eeprom_err(dd, log_idx, 1);
536 }
521 } 537 }
522 538
523 if (!noprint && (errs & ~dd->ipath_e_bitsextant)) 539 if (!noprint && (errs & ~dd->ipath_e_bitsextant))
@@ -675,6 +691,17 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
675 chkerrpkts = 1; 691 chkerrpkts = 1;
676 dd->ipath_lastrcvhdrqtails[i] = tl; 692 dd->ipath_lastrcvhdrqtails[i] = tl;
677 pd->port_hdrqfull++; 693 pd->port_hdrqfull++;
694 if (test_bit(IPATH_PORT_WAITING_OVERFLOW,
695 &pd->port_flag)) {
696 clear_bit(
697 IPATH_PORT_WAITING_OVERFLOW,
698 &pd->port_flag);
699 set_bit(
700 IPATH_PORT_WAITING_OVERFLOW,
701 &pd->int_flag);
702 wake_up_interruptible(
703 &pd->port_wait);
704 }
678 } 705 }
679 } 706 }
680 } 707 }
@@ -744,6 +771,72 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
744 return chkerrpkts; 771 return chkerrpkts;
745} 772}
746 773
774
775/*
776 * try to cleanup as much as possible for anything that might have gone
777 * wrong while in freeze mode, such as pio buffers being written by user
778 * processes (causing armlaunch), send errors due to going into freeze mode,
779 * etc., and try to avoid causing extra interrupts while doing so.
780 * Forcibly update the in-memory pioavail register copies after cleanup
781 * because the chip won't do it for anything changing while in freeze mode
782 * (we don't want to wait for the next pio buffer state change).
783 * Make sure that we don't lose any important interrupts by using the chip
784 * feature that says that writing 0 to a bit in *clear that is set in
785 * *status will cause an interrupt to be generated again (if allowed by
786 * the *mask value).
787 */
788void ipath_clear_freeze(struct ipath_devdata *dd)
789{
790 int i, im;
791 __le64 val;
792
793 /* disable error interrupts, to avoid confusion */
794 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
795
796 /*
797 * clear all sends, because they have may been
798 * completed by usercode while in freeze mode, and
799 * therefore would not be sent, and eventually
800 * might cause the process to run out of bufs
801 */
802 ipath_cancel_sends(dd);
803 ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
804 dd->ipath_control);
805
806 /* ensure pio avail updates continue */
807 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
808 dd->ipath_sendctrl & ~IPATH_S_PIOBUFAVAILUPD);
809 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
810 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
811 dd->ipath_sendctrl);
812
813 /*
814 * We just enabled pioavailupdate, so dma copy is almost certainly
815 * not yet right, so read the registers directly. Similar to init
816 */
817 for (i = 0; i < dd->ipath_pioavregs; i++) {
818 /* deal with 6110 chip bug */
819 im = i > 3 ? ((i&1) ? i-1 : i+1) : i;
820 val = ipath_read_kreg64(dd, 0x1000+(im*sizeof(u64)));
821 dd->ipath_pioavailregs_dma[i] = dd->ipath_pioavailshadow[i]
822 = le64_to_cpu(val);
823 }
824
825 /*
826 * force new interrupt if any hwerr, error or interrupt bits are
827 * still set, and clear "safe" send packet errors related to freeze
828 * and cancelling sends. Re-enable error interrupts before possible
829 * force of re-interrupt on pending interrupts.
830 */
831 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
832 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
833 E_SPKT_ERRS_IGNORE);
834 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
835 ~dd->ipath_maskederrs);
836 ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
837}
838
839
747/* this is separate to allow for better optimization of ipath_intr() */ 840/* this is separate to allow for better optimization of ipath_intr() */
748 841
749static void ipath_bad_intr(struct ipath_devdata *dd, u32 * unexpectp) 842static void ipath_bad_intr(struct ipath_devdata *dd, u32 * unexpectp)
@@ -872,14 +965,25 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat)
872 dd->ipath_i_rcvurg_mask); 965 dd->ipath_i_rcvurg_mask);
873 for (i = 1; i < dd->ipath_cfgports; i++) { 966 for (i = 1; i < dd->ipath_cfgports; i++) {
874 struct ipath_portdata *pd = dd->ipath_pd[i]; 967 struct ipath_portdata *pd = dd->ipath_pd[i];
875 if (portr & (1 << i) && pd && pd->port_cnt && 968 if (portr & (1 << i) && pd && pd->port_cnt) {
876 test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) { 969 if (test_bit(IPATH_PORT_WAITING_RCV,
877 clear_bit(IPATH_PORT_WAITING_RCV, 970 &pd->port_flag)) {
878 &pd->port_flag); 971 clear_bit(IPATH_PORT_WAITING_RCV,
879 clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT, 972 &pd->port_flag);
880 &dd->ipath_rcvctrl); 973 set_bit(IPATH_PORT_WAITING_RCV,
881 wake_up_interruptible(&pd->port_wait); 974 &pd->int_flag);
882 rcvdint = 1; 975 clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT,
976 &dd->ipath_rcvctrl);
977 wake_up_interruptible(&pd->port_wait);
978 rcvdint = 1;
979 } else if (test_bit(IPATH_PORT_WAITING_URG,
980 &pd->port_flag)) {
981 clear_bit(IPATH_PORT_WAITING_URG,
982 &pd->port_flag);
983 set_bit(IPATH_PORT_WAITING_URG,
984 &pd->int_flag);
985 wake_up_interruptible(&pd->port_wait);
986 }
883 } 987 }
884 } 988 }
885 if (rcvdint) { 989 if (rcvdint) {
@@ -905,6 +1009,9 @@ irqreturn_t ipath_intr(int irq, void *data)
905 1009
906 ipath_stats.sps_ints++; 1010 ipath_stats.sps_ints++;
907 1011
1012 if (dd->ipath_int_counter != (u32) -1)
1013 dd->ipath_int_counter++;
1014
908 if (!(dd->ipath_flags & IPATH_PRESENT)) { 1015 if (!(dd->ipath_flags & IPATH_PRESENT)) {
909 /* 1016 /*
910 * This return value is not great, but we do not want the 1017 * This return value is not great, but we do not want the
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 12194f3dd8cc..3105005fc9d2 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -1,7 +1,7 @@
1#ifndef _IPATH_KERNEL_H 1#ifndef _IPATH_KERNEL_H
2#define _IPATH_KERNEL_H 2#define _IPATH_KERNEL_H
3/* 3/*
4 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 4 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
5 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 5 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
6 * 6 *
7 * This software is available to you under a choice of one of two 7 * This software is available to you under a choice of one of two
@@ -57,6 +57,24 @@
57extern struct infinipath_stats ipath_stats; 57extern struct infinipath_stats ipath_stats;
58 58
59#define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ 59#define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ
60/*
61 * First-cut critierion for "device is active" is
62 * two thousand dwords combined Tx, Rx traffic per
63 * 5-second interval. SMA packets are 64 dwords,
64 * and occur "a few per second", presumably each way.
65 */
66#define IPATH_TRAFFIC_ACTIVE_THRESHOLD (2000)
67/*
68 * Struct used to indicate which errors are logged in each of the
69 * error-counters that are logged to EEPROM. A counter is incremented
70 * _once_ (saturating at 255) for each event with any bits set in
71 * the error or hwerror register masks below.
72 */
73#define IPATH_EEP_LOG_CNT (4)
74struct ipath_eep_log_mask {
75 u64 errs_to_log;
76 u64 hwerrs_to_log;
77};
60 78
61struct ipath_portdata { 79struct ipath_portdata {
62 void **port_rcvegrbuf; 80 void **port_rcvegrbuf;
@@ -109,6 +127,8 @@ struct ipath_portdata {
109 u32 port_tidcursor; 127 u32 port_tidcursor;
110 /* next expected TID to check */ 128 /* next expected TID to check */
111 unsigned long port_flag; 129 unsigned long port_flag;
130 /* what happened */
131 unsigned long int_flag;
112 /* WAIT_RCV that timed out, no interrupt */ 132 /* WAIT_RCV that timed out, no interrupt */
113 u32 port_rcvwait_to; 133 u32 port_rcvwait_to;
114 /* WAIT_PIO that timed out, no interrupt */ 134 /* WAIT_PIO that timed out, no interrupt */
@@ -137,6 +157,8 @@ struct ipath_portdata {
137 u32 userversion; 157 u32 userversion;
138 /* Bitmask of active slaves */ 158 /* Bitmask of active slaves */
139 u32 active_slaves; 159 u32 active_slaves;
160 /* Type of packets or conditions we want to poll for */
161 u16 poll_type;
140}; 162};
141 163
142struct sk_buff; 164struct sk_buff;
@@ -275,6 +297,8 @@ struct ipath_devdata {
275 u32 ipath_lastport_piobuf; 297 u32 ipath_lastport_piobuf;
276 /* is a stats timer active */ 298 /* is a stats timer active */
277 u32 ipath_stats_timer_active; 299 u32 ipath_stats_timer_active;
300 /* number of interrupts for this device -- saturates... */
301 u32 ipath_int_counter;
278 /* dwords sent read from counter */ 302 /* dwords sent read from counter */
279 u32 ipath_lastsword; 303 u32 ipath_lastsword;
280 /* dwords received read from counter */ 304 /* dwords received read from counter */
@@ -369,9 +393,6 @@ struct ipath_devdata {
369 struct class_device *diag_class_dev; 393 struct class_device *diag_class_dev;
370 /* timer used to prevent stats overflow, error throttling, etc. */ 394 /* timer used to prevent stats overflow, error throttling, etc. */
371 struct timer_list ipath_stats_timer; 395 struct timer_list ipath_stats_timer;
372 /* check for stale messages in rcv queue */
373 /* only allow one intr at a time. */
374 unsigned long ipath_rcv_pending;
375 void *ipath_dummy_hdrq; /* used after port close */ 396 void *ipath_dummy_hdrq; /* used after port close */
376 dma_addr_t ipath_dummy_hdrq_phys; 397 dma_addr_t ipath_dummy_hdrq_phys;
377 398
@@ -399,6 +420,8 @@ struct ipath_devdata {
399 u64 ipath_gpio_out; 420 u64 ipath_gpio_out;
400 /* shadow the gpio mask register */ 421 /* shadow the gpio mask register */
401 u64 ipath_gpio_mask; 422 u64 ipath_gpio_mask;
423 /* shadow the gpio output enable, etc... */
424 u64 ipath_extctrl;
402 /* kr_revision shadow */ 425 /* kr_revision shadow */
403 u64 ipath_revision; 426 u64 ipath_revision;
404 /* 427 /*
@@ -473,8 +496,6 @@ struct ipath_devdata {
473 u32 ipath_cregbase; 496 u32 ipath_cregbase;
474 /* shadow the control register contents */ 497 /* shadow the control register contents */
475 u32 ipath_control; 498 u32 ipath_control;
476 /* shadow the gpio output contents */
477 u32 ipath_extctrl;
478 /* PCI revision register (HTC rev on FPGA) */ 499 /* PCI revision register (HTC rev on FPGA) */
479 u32 ipath_pcirev; 500 u32 ipath_pcirev;
480 501
@@ -552,6 +573,9 @@ struct ipath_devdata {
552 u32 ipath_overrun_thresh_errs; 573 u32 ipath_overrun_thresh_errs;
553 u32 ipath_lli_errs; 574 u32 ipath_lli_errs;
554 575
576 /* status check work */
577 struct delayed_work status_work;
578
555 /* 579 /*
556 * Not all devices managed by a driver instance are the same 580 * Not all devices managed by a driver instance are the same
557 * type, so these fields must be per-device. 581 * type, so these fields must be per-device.
@@ -575,6 +599,37 @@ struct ipath_devdata {
575 u16 ipath_gpio_scl_num; 599 u16 ipath_gpio_scl_num;
576 u64 ipath_gpio_sda; 600 u64 ipath_gpio_sda;
577 u64 ipath_gpio_scl; 601 u64 ipath_gpio_scl;
602
603 /* lock for doing RMW of shadows/regs for ExtCtrl and GPIO */
604 spinlock_t ipath_gpio_lock;
605
606 /* used to override LED behavior */
607 u8 ipath_led_override; /* Substituted for normal value, if non-zero */
608 u16 ipath_led_override_timeoff; /* delta to next timer event */
609 u8 ipath_led_override_vals[2]; /* Alternates per blink-frame */
610 u8 ipath_led_override_phase; /* Just counts, LSB picks from vals[] */
611 atomic_t ipath_led_override_timer_active;
612 /* Used to flash LEDs in override mode */
613 struct timer_list ipath_led_override_timer;
614
615 /* Support (including locks) for EEPROM logging of errors and time */
616 /* control access to actual counters, timer */
617 spinlock_t ipath_eep_st_lock;
618 /* control high-level access to EEPROM */
619 struct semaphore ipath_eep_sem;
620 /* Below inc'd by ipath_snap_cntrs(), locked by ipath_eep_st_lock */
621 uint64_t ipath_traffic_wds;
622 /* active time is kept in seconds, but logged in hours */
623 atomic_t ipath_active_time;
624 /* Below are nominal shadow of EEPROM, new since last EEPROM update */
625 uint8_t ipath_eep_st_errs[IPATH_EEP_LOG_CNT];
626 uint8_t ipath_eep_st_new_errs[IPATH_EEP_LOG_CNT];
627 uint16_t ipath_eep_hrs;
628 /*
629 * masks for which bits of errs, hwerrs that cause
630 * each of the counters to increment.
631 */
632 struct ipath_eep_log_mask ipath_eep_st_masks[IPATH_EEP_LOG_CNT];
578}; 633};
579 634
580/* Private data for file operations */ 635/* Private data for file operations */
@@ -592,6 +647,7 @@ int ipath_enable_wc(struct ipath_devdata *dd);
592void ipath_disable_wc(struct ipath_devdata *dd); 647void ipath_disable_wc(struct ipath_devdata *dd);
593int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp); 648int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp);
594void ipath_shutdown_device(struct ipath_devdata *); 649void ipath_shutdown_device(struct ipath_devdata *);
650void ipath_clear_freeze(struct ipath_devdata *);
595 651
596struct file_operations; 652struct file_operations;
597int ipath_cdev_init(int minor, char *name, const struct file_operations *fops, 653int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
@@ -627,6 +683,7 @@ int ipath_unordered_wc(void);
627 683
628void ipath_disarm_piobufs(struct ipath_devdata *, unsigned first, 684void ipath_disarm_piobufs(struct ipath_devdata *, unsigned first,
629 unsigned cnt); 685 unsigned cnt);
686void ipath_cancel_sends(struct ipath_devdata *);
630 687
631int ipath_create_rcvhdrq(struct ipath_devdata *, struct ipath_portdata *); 688int ipath_create_rcvhdrq(struct ipath_devdata *, struct ipath_portdata *);
632void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *); 689void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *);
@@ -685,7 +742,6 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
685 * are 64bit */ 742 * are 64bit */
686#define IPATH_32BITCOUNTERS 0x20000 743#define IPATH_32BITCOUNTERS 0x20000
687 /* can miss port0 rx interrupts */ 744 /* can miss port0 rx interrupts */
688#define IPATH_POLL_RX_INTR 0x40000
689#define IPATH_DISABLED 0x80000 /* administratively disabled */ 745#define IPATH_DISABLED 0x80000 /* administratively disabled */
690 /* Use GPIO interrupts for new counters */ 746 /* Use GPIO interrupts for new counters */
691#define IPATH_GPIO_ERRINTRS 0x100000 747#define IPATH_GPIO_ERRINTRS 0x100000
@@ -704,6 +760,10 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
704#define IPATH_PORT_WAITING_PIO 3 760#define IPATH_PORT_WAITING_PIO 3
705 /* master has not finished initializing */ 761 /* master has not finished initializing */
706#define IPATH_PORT_MASTER_UNINIT 4 762#define IPATH_PORT_MASTER_UNINIT 4
763 /* waiting for an urgent packet to arrive */
764#define IPATH_PORT_WAITING_URG 5
765 /* waiting for a header overflow */
766#define IPATH_PORT_WAITING_OVERFLOW 6
707 767
708/* free up any allocated data at closes */ 768/* free up any allocated data at closes */
709void ipath_free_data(struct ipath_portdata *dd); 769void ipath_free_data(struct ipath_portdata *dd);
@@ -713,10 +773,21 @@ u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *);
713void ipath_init_iba6120_funcs(struct ipath_devdata *); 773void ipath_init_iba6120_funcs(struct ipath_devdata *);
714void ipath_init_iba6110_funcs(struct ipath_devdata *); 774void ipath_init_iba6110_funcs(struct ipath_devdata *);
715void ipath_get_eeprom_info(struct ipath_devdata *); 775void ipath_get_eeprom_info(struct ipath_devdata *);
776int ipath_update_eeprom_log(struct ipath_devdata *dd);
777void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
716u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); 778u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
717void ipath_disarm_senderrbufs(struct ipath_devdata *, int); 779void ipath_disarm_senderrbufs(struct ipath_devdata *, int);
718 780
719/* 781/*
782 * Set LED override, only the two LSBs have "public" meaning, but
783 * any non-zero value substitutes them for the Link and LinkTrain
784 * LED states.
785 */
786#define IPATH_LED_PHYS 1 /* Physical (linktraining) GREEN LED */
787#define IPATH_LED_LOG 2 /* Logical (link) YELLOW LED */
788void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val);
789
790/*
720 * number of words used for protocol header if not set by ipath_userinit(); 791 * number of words used for protocol header if not set by ipath_userinit();
721 */ 792 */
722#define IPATH_DFLT_RCVHDRSIZE 9 793#define IPATH_DFLT_RCVHDRSIZE 9
diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c
index dd487c100f5b..85a4aefc6c03 100644
--- a/drivers/infiniband/hw/ipath/ipath_keys.c
+++ b/drivers/infiniband/hw/ipath/ipath_keys.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
diff --git a/drivers/infiniband/hw/ipath/ipath_layer.c b/drivers/infiniband/hw/ipath/ipath_layer.c
index 05a1d2b01d9d..82616b779e24 100644
--- a/drivers/infiniband/hw/ipath/ipath_layer.c
+++ b/drivers/infiniband/hw/ipath/ipath_layer.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
diff --git a/drivers/infiniband/hw/ipath/ipath_layer.h b/drivers/infiniband/hw/ipath/ipath_layer.h
index 3854a4eae684..415709c4d85b 100644
--- a/drivers/infiniband/hw/ipath/ipath_layer.h
+++ b/drivers/infiniband/hw/ipath/ipath_layer.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
index 25908b02fbe5..d61c03044545 100644
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -103,7 +103,7 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp,
103 /* This is already in network order */ 103 /* This is already in network order */
104 nip->sys_guid = to_idev(ibdev)->sys_image_guid; 104 nip->sys_guid = to_idev(ibdev)->sys_image_guid;
105 nip->node_guid = dd->ipath_guid; 105 nip->node_guid = dd->ipath_guid;
106 nip->port_guid = nip->sys_guid; 106 nip->port_guid = dd->ipath_guid;
107 nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd)); 107 nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd));
108 nip->device_id = cpu_to_be16(dd->ipath_deviceid); 108 nip->device_id = cpu_to_be16(dd->ipath_deviceid);
109 majrev = dd->ipath_majrev; 109 majrev = dd->ipath_majrev;
@@ -292,7 +292,12 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
292 /* pip->vl_arb_high_cap; // only one VL */ 292 /* pip->vl_arb_high_cap; // only one VL */
293 /* pip->vl_arb_low_cap; // only one VL */ 293 /* pip->vl_arb_low_cap; // only one VL */
294 /* InitTypeReply = 0 */ 294 /* InitTypeReply = 0 */
295 pip->inittypereply_mtucap = IB_MTU_4096; 295 /*
296 * Note: the chips support a maximum MTU of 4096, but the driver
297 * hasn't implemented this feature yet, so set the maximum value
298 * to 2048.
299 */
300 pip->inittypereply_mtucap = IB_MTU_2048;
296 // HCAs ignore VLStallCount and HOQLife 301 // HCAs ignore VLStallCount and HOQLife
297 /* pip->vlstallcnt_hoqlife; */ 302 /* pip->vlstallcnt_hoqlife; */
298 pip->operationalvl_pei_peo_fpi_fpo = 0x10; /* OVLs = 1 */ 303 pip->operationalvl_pei_peo_fpi_fpo = 0x10; /* OVLs = 1 */
diff --git a/drivers/infiniband/hw/ipath/ipath_mmap.c b/drivers/infiniband/hw/ipath/ipath_mmap.c
index 937bc3396b53..fa830e22002f 100644
--- a/drivers/infiniband/hw/ipath/ipath_mmap.c
+++ b/drivers/infiniband/hw/ipath/ipath_mmap.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * 3 *
4 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index bdeef8d4f279..e442470a2375 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index bfef08ecd342..1324b35ff1f8 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -336,7 +336,7 @@ static void ipath_reset_qp(struct ipath_qp *qp)
336 qp->qkey = 0; 336 qp->qkey = 0;
337 qp->qp_access_flags = 0; 337 qp->qp_access_flags = 0;
338 qp->s_busy = 0; 338 qp->s_busy = 0;
339 qp->s_flags &= ~IPATH_S_SIGNAL_REQ_WR; 339 qp->s_flags &= IPATH_S_SIGNAL_REQ_WR;
340 qp->s_hdrwords = 0; 340 qp->s_hdrwords = 0;
341 qp->s_psn = 0; 341 qp->s_psn = 0;
342 qp->r_psn = 0; 342 qp->r_psn = 0;
@@ -507,16 +507,13 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
507 attr->port_num > ibqp->device->phys_port_cnt) 507 attr->port_num > ibqp->device->phys_port_cnt)
508 goto inval; 508 goto inval;
509 509
510 /*
511 * Note: the chips support a maximum MTU of 4096, but the driver
512 * hasn't implemented this feature yet, so don't allow Path MTU
513 * values greater than 2048.
514 */
510 if (attr_mask & IB_QP_PATH_MTU) 515 if (attr_mask & IB_QP_PATH_MTU)
511 if (attr->path_mtu > IB_MTU_4096) 516 if (attr->path_mtu > IB_MTU_2048)
512 goto inval;
513
514 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
515 if (attr->max_dest_rd_atomic > 1)
516 goto inval;
517
518 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
519 if (attr->max_rd_atomic > 1)
520 goto inval; 517 goto inval;
521 518
522 if (attr_mask & IB_QP_PATH_MIG_STATE) 519 if (attr_mask & IB_QP_PATH_MIG_STATE)
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index 1915771fd038..46744ea2babd 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -125,8 +125,10 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
125 if (len > pmtu) { 125 if (len > pmtu) {
126 len = pmtu; 126 len = pmtu;
127 qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); 127 qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
128 } else 128 } else {
129 qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY); 129 qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
130 e->sent = 1;
131 }
130 ohdr->u.aeth = ipath_compute_aeth(qp); 132 ohdr->u.aeth = ipath_compute_aeth(qp);
131 hwords++; 133 hwords++;
132 qp->s_ack_rdma_psn = e->psn; 134 qp->s_ack_rdma_psn = e->psn;
@@ -143,6 +145,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
143 cpu_to_be32(e->atomic_data); 145 cpu_to_be32(e->atomic_data);
144 hwords += sizeof(ohdr->u.at) / sizeof(u32); 146 hwords += sizeof(ohdr->u.at) / sizeof(u32);
145 bth2 = e->psn; 147 bth2 = e->psn;
148 e->sent = 1;
146 } 149 }
147 bth0 = qp->s_ack_state << 24; 150 bth0 = qp->s_ack_state << 24;
148 break; 151 break;
@@ -158,6 +161,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
158 ohdr->u.aeth = ipath_compute_aeth(qp); 161 ohdr->u.aeth = ipath_compute_aeth(qp);
159 hwords++; 162 hwords++;
160 qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); 163 qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
164 qp->s_ack_queue[qp->s_tail_ack_queue].sent = 1;
161 } 165 }
162 bth0 = qp->s_ack_state << 24; 166 bth0 = qp->s_ack_state << 24;
163 bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK; 167 bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
@@ -188,7 +192,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
188 } 192 }
189 qp->s_hdrwords = hwords; 193 qp->s_hdrwords = hwords;
190 qp->s_cur_size = len; 194 qp->s_cur_size = len;
191 *bth0p = bth0; 195 *bth0p = bth0 | (1 << 22); /* Set M bit */
192 *bth2p = bth2; 196 *bth2p = bth2;
193 return 1; 197 return 1;
194 198
@@ -240,7 +244,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
240 244
241 /* header size in 32-bit words LRH+BTH = (8+12)/4. */ 245 /* header size in 32-bit words LRH+BTH = (8+12)/4. */
242 hwords = 5; 246 hwords = 5;
243 bth0 = 0; 247 bth0 = 1 << 22; /* Set M bit */
244 248
245 /* Send a request. */ 249 /* Send a request. */
246 wqe = get_swqe_ptr(qp, qp->s_cur); 250 wqe = get_swqe_ptr(qp, qp->s_cur);
@@ -604,7 +608,7 @@ static void send_rc_ack(struct ipath_qp *qp)
604 } 608 }
605 /* read pkey_index w/o lock (its atomic) */ 609 /* read pkey_index w/o lock (its atomic) */
606 bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index) | 610 bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index) |
607 OP(ACKNOWLEDGE) << 24; 611 (OP(ACKNOWLEDGE) << 24) | (1 << 22);
608 if (qp->r_nak_state) 612 if (qp->r_nak_state)
609 ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | 613 ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
610 (qp->r_nak_state << 614 (qp->r_nak_state <<
@@ -806,13 +810,15 @@ static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
806 * Called at interrupt level with the QP s_lock held and interrupts disabled. 810 * Called at interrupt level with the QP s_lock held and interrupts disabled.
807 * Returns 1 if OK, 0 if current operation should be aborted (NAK). 811 * Returns 1 if OK, 0 if current operation should be aborted (NAK).
808 */ 812 */
809static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) 813static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
814 u64 val)
810{ 815{
811 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 816 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
812 struct ib_wc wc; 817 struct ib_wc wc;
813 struct ipath_swqe *wqe; 818 struct ipath_swqe *wqe;
814 int ret = 0; 819 int ret = 0;
815 u32 ack_psn; 820 u32 ack_psn;
821 int diff;
816 822
817 /* 823 /*
818 * Remove the QP from the timeout queue (or RNR timeout queue). 824 * Remove the QP from the timeout queue (or RNR timeout queue).
@@ -840,7 +846,19 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
840 * The MSN might be for a later WQE than the PSN indicates so 846 * The MSN might be for a later WQE than the PSN indicates so
841 * only complete WQEs that the PSN finishes. 847 * only complete WQEs that the PSN finishes.
842 */ 848 */
843 while (ipath_cmp24(ack_psn, wqe->lpsn) >= 0) { 849 while ((diff = ipath_cmp24(ack_psn, wqe->lpsn)) >= 0) {
850 /*
851 * RDMA_READ_RESPONSE_ONLY is a special case since
852 * we want to generate completion events for everything
853 * before the RDMA read, copy the data, then generate
854 * the completion for the read.
855 */
856 if (wqe->wr.opcode == IB_WR_RDMA_READ &&
857 opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
858 diff == 0) {
859 ret = 1;
860 goto bail;
861 }
844 /* 862 /*
845 * If this request is a RDMA read or atomic, and the ACK is 863 * If this request is a RDMA read or atomic, and the ACK is
846 * for a later operation, this ACK NAKs the RDMA read or 864 * for a later operation, this ACK NAKs the RDMA read or
@@ -851,12 +869,10 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
851 * is sent but before the response is received. 869 * is sent but before the response is received.
852 */ 870 */
853 if ((wqe->wr.opcode == IB_WR_RDMA_READ && 871 if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
854 (opcode != OP(RDMA_READ_RESPONSE_LAST) || 872 (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
855 ipath_cmp24(ack_psn, wqe->lpsn) != 0)) ||
856 ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || 873 ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
857 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && 874 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
858 (opcode != OP(ATOMIC_ACKNOWLEDGE) || 875 (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
859 ipath_cmp24(wqe->psn, psn) != 0))) {
860 /* 876 /*
861 * The last valid PSN seen is the previous 877 * The last valid PSN seen is the previous
862 * request's. 878 * request's.
@@ -870,6 +886,9 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
870 */ 886 */
871 goto bail; 887 goto bail;
872 } 888 }
889 if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
890 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
891 *(u64 *) wqe->sg_list[0].vaddr = val;
873 if (qp->s_num_rd_atomic && 892 if (qp->s_num_rd_atomic &&
874 (wqe->wr.opcode == IB_WR_RDMA_READ || 893 (wqe->wr.opcode == IB_WR_RDMA_READ ||
875 wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || 894 wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
@@ -1079,6 +1098,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1079 int diff; 1098 int diff;
1080 u32 pad; 1099 u32 pad;
1081 u32 aeth; 1100 u32 aeth;
1101 u64 val;
1082 1102
1083 spin_lock_irqsave(&qp->s_lock, flags); 1103 spin_lock_irqsave(&qp->s_lock, flags);
1084 1104
@@ -1118,8 +1138,6 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1118 data += sizeof(__be32); 1138 data += sizeof(__be32);
1119 } 1139 }
1120 if (opcode == OP(ATOMIC_ACKNOWLEDGE)) { 1140 if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
1121 u64 val;
1122
1123 if (!header_in_data) { 1141 if (!header_in_data) {
1124 __be32 *p = ohdr->u.at.atomic_ack_eth; 1142 __be32 *p = ohdr->u.at.atomic_ack_eth;
1125 1143
@@ -1127,12 +1145,13 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1127 be32_to_cpu(p[1]); 1145 be32_to_cpu(p[1]);
1128 } else 1146 } else
1129 val = be64_to_cpu(((__be64 *) data)[0]); 1147 val = be64_to_cpu(((__be64 *) data)[0]);
1130 *(u64 *) wqe->sg_list[0].vaddr = val; 1148 } else
1131 } 1149 val = 0;
1132 if (!do_rc_ack(qp, aeth, psn, opcode) || 1150 if (!do_rc_ack(qp, aeth, psn, opcode, val) ||
1133 opcode != OP(RDMA_READ_RESPONSE_FIRST)) 1151 opcode != OP(RDMA_READ_RESPONSE_FIRST))
1134 goto ack_done; 1152 goto ack_done;
1135 hdrsize += 4; 1153 hdrsize += 4;
1154 wqe = get_swqe_ptr(qp, qp->s_last);
1136 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) 1155 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1137 goto ack_op_err; 1156 goto ack_op_err;
1138 /* 1157 /*
@@ -1176,13 +1195,12 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1176 goto bail; 1195 goto bail;
1177 1196
1178 case OP(RDMA_READ_RESPONSE_ONLY): 1197 case OP(RDMA_READ_RESPONSE_ONLY):
1179 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { 1198 if (!header_in_data)
1180 dev->n_rdma_seq++; 1199 aeth = be32_to_cpu(ohdr->u.aeth);
1181 ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); 1200 else
1201 aeth = be32_to_cpu(((__be32 *) data)[0]);
1202 if (!do_rc_ack(qp, aeth, psn, opcode, 0))
1182 goto ack_done; 1203 goto ack_done;
1183 }
1184 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1185 goto ack_op_err;
1186 /* Get the number of bytes the message was padded by. */ 1204 /* Get the number of bytes the message was padded by. */
1187 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; 1205 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
1188 /* 1206 /*
@@ -1197,6 +1215,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1197 * have to be careful to copy the data to the right 1215 * have to be careful to copy the data to the right
1198 * location. 1216 * location.
1199 */ 1217 */
1218 wqe = get_swqe_ptr(qp, qp->s_last);
1200 qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, 1219 qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
1201 wqe, psn, pmtu); 1220 wqe, psn, pmtu);
1202 goto read_last; 1221 goto read_last;
@@ -1230,7 +1249,8 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1230 data += sizeof(__be32); 1249 data += sizeof(__be32);
1231 } 1250 }
1232 ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen); 1251 ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen);
1233 (void) do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST)); 1252 (void) do_rc_ack(qp, aeth, psn,
1253 OP(RDMA_READ_RESPONSE_LAST), 0);
1234 goto ack_done; 1254 goto ack_done;
1235 } 1255 }
1236 1256
@@ -1344,8 +1364,11 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1344 e = NULL; 1364 e = NULL;
1345 break; 1365 break;
1346 } 1366 }
1347 if (ipath_cmp24(psn, e->psn) >= 0) 1367 if (ipath_cmp24(psn, e->psn) >= 0) {
1368 if (prev == qp->s_tail_ack_queue)
1369 old_req = 0;
1348 break; 1370 break;
1371 }
1349 } 1372 }
1350 switch (opcode) { 1373 switch (opcode) {
1351 case OP(RDMA_READ_REQUEST): { 1374 case OP(RDMA_READ_REQUEST): {
@@ -1460,6 +1483,22 @@ static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
1460 spin_unlock_irqrestore(&qp->s_lock, flags); 1483 spin_unlock_irqrestore(&qp->s_lock, flags);
1461} 1484}
1462 1485
1486static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n)
1487{
1488 unsigned long flags;
1489 unsigned next;
1490
1491 next = n + 1;
1492 if (next > IPATH_MAX_RDMA_ATOMIC)
1493 next = 0;
1494 spin_lock_irqsave(&qp->s_lock, flags);
1495 if (n == qp->s_tail_ack_queue) {
1496 qp->s_tail_ack_queue = next;
1497 qp->s_ack_state = OP(ACKNOWLEDGE);
1498 }
1499 spin_unlock_irqrestore(&qp->s_lock, flags);
1500}
1501
1463/** 1502/**
1464 * ipath_rc_rcv - process an incoming RC packet 1503 * ipath_rc_rcv - process an incoming RC packet
1465 * @dev: the device this packet came in on 1504 * @dev: the device this packet came in on
@@ -1672,6 +1711,9 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1672 case OP(RDMA_WRITE_FIRST): 1711 case OP(RDMA_WRITE_FIRST):
1673 case OP(RDMA_WRITE_ONLY): 1712 case OP(RDMA_WRITE_ONLY):
1674 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): 1713 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
1714 if (unlikely(!(qp->qp_access_flags &
1715 IB_ACCESS_REMOTE_WRITE)))
1716 goto nack_inv;
1675 /* consume RWQE */ 1717 /* consume RWQE */
1676 /* RETH comes after BTH */ 1718 /* RETH comes after BTH */
1677 if (!header_in_data) 1719 if (!header_in_data)
@@ -1701,9 +1743,6 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1701 qp->r_sge.sge.length = 0; 1743 qp->r_sge.sge.length = 0;
1702 qp->r_sge.sge.sge_length = 0; 1744 qp->r_sge.sge.sge_length = 0;
1703 } 1745 }
1704 if (unlikely(!(qp->qp_access_flags &
1705 IB_ACCESS_REMOTE_WRITE)))
1706 goto nack_acc;
1707 if (opcode == OP(RDMA_WRITE_FIRST)) 1746 if (opcode == OP(RDMA_WRITE_FIRST))
1708 goto send_middle; 1747 goto send_middle;
1709 else if (opcode == OP(RDMA_WRITE_ONLY)) 1748 else if (opcode == OP(RDMA_WRITE_ONLY))
@@ -1717,13 +1756,17 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1717 u32 len; 1756 u32 len;
1718 u8 next; 1757 u8 next;
1719 1758
1720 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) 1759 if (unlikely(!(qp->qp_access_flags &
1721 goto nack_acc; 1760 IB_ACCESS_REMOTE_READ)))
1761 goto nack_inv;
1722 next = qp->r_head_ack_queue + 1; 1762 next = qp->r_head_ack_queue + 1;
1723 if (next > IPATH_MAX_RDMA_ATOMIC) 1763 if (next > IPATH_MAX_RDMA_ATOMIC)
1724 next = 0; 1764 next = 0;
1725 if (unlikely(next == qp->s_tail_ack_queue)) 1765 if (unlikely(next == qp->s_tail_ack_queue)) {
1726 goto nack_inv; 1766 if (!qp->s_ack_queue[next].sent)
1767 goto nack_inv;
1768 ipath_update_ack_queue(qp, next);
1769 }
1727 e = &qp->s_ack_queue[qp->r_head_ack_queue]; 1770 e = &qp->s_ack_queue[qp->r_head_ack_queue];
1728 /* RETH comes after BTH */ 1771 /* RETH comes after BTH */
1729 if (!header_in_data) 1772 if (!header_in_data)
@@ -1758,6 +1801,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1758 e->rdma_sge.sge.sge_length = 0; 1801 e->rdma_sge.sge.sge_length = 0;
1759 } 1802 }
1760 e->opcode = opcode; 1803 e->opcode = opcode;
1804 e->sent = 0;
1761 e->psn = psn; 1805 e->psn = psn;
1762 /* 1806 /*
1763 * We need to increment the MSN here instead of when we 1807 * We need to increment the MSN here instead of when we
@@ -1789,12 +1833,15 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1789 1833
1790 if (unlikely(!(qp->qp_access_flags & 1834 if (unlikely(!(qp->qp_access_flags &
1791 IB_ACCESS_REMOTE_ATOMIC))) 1835 IB_ACCESS_REMOTE_ATOMIC)))
1792 goto nack_acc; 1836 goto nack_inv;
1793 next = qp->r_head_ack_queue + 1; 1837 next = qp->r_head_ack_queue + 1;
1794 if (next > IPATH_MAX_RDMA_ATOMIC) 1838 if (next > IPATH_MAX_RDMA_ATOMIC)
1795 next = 0; 1839 next = 0;
1796 if (unlikely(next == qp->s_tail_ack_queue)) 1840 if (unlikely(next == qp->s_tail_ack_queue)) {
1797 goto nack_inv; 1841 if (!qp->s_ack_queue[next].sent)
1842 goto nack_inv;
1843 ipath_update_ack_queue(qp, next);
1844 }
1798 if (!header_in_data) 1845 if (!header_in_data)
1799 ateth = &ohdr->u.atomic_eth; 1846 ateth = &ohdr->u.atomic_eth;
1800 else 1847 else
@@ -1819,6 +1866,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1819 be64_to_cpu(ateth->compare_data), 1866 be64_to_cpu(ateth->compare_data),
1820 sdata); 1867 sdata);
1821 e->opcode = opcode; 1868 e->opcode = opcode;
1869 e->sent = 0;
1822 e->psn = psn & IPATH_PSN_MASK; 1870 e->psn = psn & IPATH_PSN_MASK;
1823 qp->r_msn++; 1871 qp->r_msn++;
1824 qp->r_psn++; 1872 qp->r_psn++;
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index c182bcd62098..708eba3165d7 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index d9c2a9b15d86..85256747d8a1 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -194,6 +194,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
194 ret = 0; 194 ret = 0;
195 goto bail; 195 goto bail;
196 } 196 }
197 /* Make sure entry is read after head index is read. */
198 smp_rmb();
197 wqe = get_rwqe_ptr(rq, tail); 199 wqe = get_rwqe_ptr(rq, tail);
198 if (++tail >= rq->size) 200 if (++tail >= rq->size)
199 tail = 0; 201 tail = 0;
@@ -267,7 +269,7 @@ again:
267 spin_lock_irqsave(&sqp->s_lock, flags); 269 spin_lock_irqsave(&sqp->s_lock, flags);
268 270
269 if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) || 271 if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) ||
270 qp->s_rnr_timeout) { 272 sqp->s_rnr_timeout) {
271 spin_unlock_irqrestore(&sqp->s_lock, flags); 273 spin_unlock_irqrestore(&sqp->s_lock, flags);
272 goto done; 274 goto done;
273 } 275 }
@@ -319,12 +321,22 @@ again:
319 break; 321 break;
320 322
321 case IB_WR_RDMA_WRITE_WITH_IMM: 323 case IB_WR_RDMA_WRITE_WITH_IMM:
324 if (unlikely(!(qp->qp_access_flags &
325 IB_ACCESS_REMOTE_WRITE))) {
326 wc.status = IB_WC_REM_INV_REQ_ERR;
327 goto err;
328 }
322 wc.wc_flags = IB_WC_WITH_IMM; 329 wc.wc_flags = IB_WC_WITH_IMM;
323 wc.imm_data = wqe->wr.imm_data; 330 wc.imm_data = wqe->wr.imm_data;
324 if (!ipath_get_rwqe(qp, 1)) 331 if (!ipath_get_rwqe(qp, 1))
325 goto rnr_nak; 332 goto rnr_nak;
326 /* FALLTHROUGH */ 333 /* FALLTHROUGH */
327 case IB_WR_RDMA_WRITE: 334 case IB_WR_RDMA_WRITE:
335 if (unlikely(!(qp->qp_access_flags &
336 IB_ACCESS_REMOTE_WRITE))) {
337 wc.status = IB_WC_REM_INV_REQ_ERR;
338 goto err;
339 }
328 if (wqe->length == 0) 340 if (wqe->length == 0)
329 break; 341 break;
330 if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length, 342 if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
@@ -354,8 +366,10 @@ again:
354 366
355 case IB_WR_RDMA_READ: 367 case IB_WR_RDMA_READ:
356 if (unlikely(!(qp->qp_access_flags & 368 if (unlikely(!(qp->qp_access_flags &
357 IB_ACCESS_REMOTE_READ))) 369 IB_ACCESS_REMOTE_READ))) {
358 goto acc_err; 370 wc.status = IB_WC_REM_INV_REQ_ERR;
371 goto err;
372 }
359 if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length, 373 if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
360 wqe->wr.wr.rdma.remote_addr, 374 wqe->wr.wr.rdma.remote_addr,
361 wqe->wr.wr.rdma.rkey, 375 wqe->wr.wr.rdma.rkey,
@@ -369,8 +383,10 @@ again:
369 case IB_WR_ATOMIC_CMP_AND_SWP: 383 case IB_WR_ATOMIC_CMP_AND_SWP:
370 case IB_WR_ATOMIC_FETCH_AND_ADD: 384 case IB_WR_ATOMIC_FETCH_AND_ADD:
371 if (unlikely(!(qp->qp_access_flags & 385 if (unlikely(!(qp->qp_access_flags &
372 IB_ACCESS_REMOTE_ATOMIC))) 386 IB_ACCESS_REMOTE_ATOMIC))) {
373 goto acc_err; 387 wc.status = IB_WC_REM_INV_REQ_ERR;
388 goto err;
389 }
374 if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), 390 if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
375 wqe->wr.wr.atomic.remote_addr, 391 wqe->wr.wr.atomic.remote_addr,
376 wqe->wr.wr.atomic.rkey, 392 wqe->wr.wr.atomic.rkey,
@@ -396,6 +412,8 @@ again:
396 412
397 if (len > sge->length) 413 if (len > sge->length)
398 len = sge->length; 414 len = sge->length;
415 if (len > sge->sge_length)
416 len = sge->sge_length;
399 BUG_ON(len == 0); 417 BUG_ON(len == 0);
400 ipath_copy_sge(&qp->r_sge, sge->vaddr, len); 418 ipath_copy_sge(&qp->r_sge, sge->vaddr, len);
401 sge->vaddr += len; 419 sge->vaddr += len;
@@ -503,11 +521,9 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
503 * could be called. If we are still in the tasklet function, 521 * could be called. If we are still in the tasklet function,
504 * tasklet_hi_schedule() will not call us until the next time 522 * tasklet_hi_schedule() will not call us until the next time
505 * tasklet_hi_schedule() is called. 523 * tasklet_hi_schedule() is called.
506 * We clear the tasklet flag now since we are committing to return 524 * We leave the busy flag set so that another post send doesn't
507 * from the tasklet function. 525 * try to put the same QP on the piowait list again.
508 */ 526 */
509 clear_bit(IPATH_S_BUSY, &qp->s_busy);
510 tasklet_unlock(&qp->s_task);
511 want_buffer(dev->dd); 527 want_buffer(dev->dd);
512 dev->n_piowait++; 528 dev->n_piowait++;
513} 529}
diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c
index 03acae66ba81..40c36ec19016 100644
--- a/drivers/infiniband/hw/ipath/ipath_srq.c
+++ b/drivers/infiniband/hw/ipath/ipath_srq.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -80,6 +80,8 @@ int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
80 wqe->num_sge = wr->num_sge; 80 wqe->num_sge = wr->num_sge;
81 for (i = 0; i < wr->num_sge; i++) 81 for (i = 0; i < wr->num_sge; i++)
82 wqe->sg_list[i] = wr->sg_list[i]; 82 wqe->sg_list[i] = wr->sg_list[i];
83 /* Make sure queue entry is written before the head index. */
84 smp_wmb();
83 wq->head = next; 85 wq->head = next;
84 spin_unlock_irqrestore(&srq->rq.lock, flags); 86 spin_unlock_irqrestore(&srq->rq.lock, flags);
85 } 87 }
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
index d8b5e4cefe25..73ed17d03188 100644
--- a/drivers/infiniband/hw/ipath/ipath_stats.c
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -55,6 +55,7 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
55 u64 val64; 55 u64 val64;
56 unsigned long t0, t1; 56 unsigned long t0, t1;
57 u64 ret; 57 u64 ret;
58 unsigned long flags;
58 59
59 t0 = jiffies; 60 t0 = jiffies;
60 /* If fast increment counters are only 32 bits, snapshot them, 61 /* If fast increment counters are only 32 bits, snapshot them,
@@ -91,12 +92,18 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
91 if (creg == dd->ipath_cregs->cr_wordsendcnt) { 92 if (creg == dd->ipath_cregs->cr_wordsendcnt) {
92 if (val != dd->ipath_lastsword) { 93 if (val != dd->ipath_lastsword) {
93 dd->ipath_sword += val - dd->ipath_lastsword; 94 dd->ipath_sword += val - dd->ipath_lastsword;
95 spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
96 dd->ipath_traffic_wds += val - dd->ipath_lastsword;
97 spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
94 dd->ipath_lastsword = val; 98 dd->ipath_lastsword = val;
95 } 99 }
96 val64 = dd->ipath_sword; 100 val64 = dd->ipath_sword;
97 } else if (creg == dd->ipath_cregs->cr_wordrcvcnt) { 101 } else if (creg == dd->ipath_cregs->cr_wordrcvcnt) {
98 if (val != dd->ipath_lastrword) { 102 if (val != dd->ipath_lastrword) {
99 dd->ipath_rword += val - dd->ipath_lastrword; 103 dd->ipath_rword += val - dd->ipath_lastrword;
104 spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
105 dd->ipath_traffic_wds += val - dd->ipath_lastrword;
106 spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
100 dd->ipath_lastrword = val; 107 dd->ipath_lastrword = val;
101 } 108 }
102 val64 = dd->ipath_rword; 109 val64 = dd->ipath_rword;
@@ -200,6 +207,7 @@ void ipath_get_faststats(unsigned long opaque)
200 struct ipath_devdata *dd = (struct ipath_devdata *) opaque; 207 struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
201 u32 val; 208 u32 val;
202 static unsigned cnt; 209 static unsigned cnt;
210 unsigned long flags;
203 211
204 /* 212 /*
205 * don't access the chip while running diags, or memory diags can 213 * don't access the chip while running diags, or memory diags can
@@ -210,9 +218,20 @@ void ipath_get_faststats(unsigned long opaque)
210 /* but re-arm the timer, for diags case; won't hurt other */ 218 /* but re-arm the timer, for diags case; won't hurt other */
211 goto done; 219 goto done;
212 220
221 /*
222 * We now try to maintain a "active timer", based on traffic
223 * exceeding a threshold, so we need to check the word-counts
224 * even if they are 64-bit.
225 */
226 ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
227 ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
228 spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
229 if (dd->ipath_traffic_wds >= IPATH_TRAFFIC_ACTIVE_THRESHOLD)
230 atomic_add(5, &dd->ipath_active_time); /* S/B #define */
231 dd->ipath_traffic_wds = 0;
232 spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
233
213 if (dd->ipath_flags & IPATH_32BITCOUNTERS) { 234 if (dd->ipath_flags & IPATH_32BITCOUNTERS) {
214 ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
215 ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
216 ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); 235 ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
217 ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); 236 ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
218 } 237 }
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
index 4dc398d5e011..16238cd3a036 100644
--- a/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -596,6 +596,43 @@ bail:
596 return ret; 596 return ret;
597} 597}
598 598
599static ssize_t store_led_override(struct device *dev,
600 struct device_attribute *attr,
601 const char *buf,
602 size_t count)
603{
604 struct ipath_devdata *dd = dev_get_drvdata(dev);
605 int ret;
606 u16 val;
607
608 ret = ipath_parse_ushort(buf, &val);
609 if (ret > 0)
610 ipath_set_led_override(dd, val);
611 else
612 ipath_dev_err(dd, "attempt to set invalid LED override\n");
613 return ret;
614}
615
616static ssize_t show_logged_errs(struct device *dev,
617 struct device_attribute *attr,
618 char *buf)
619{
620 struct ipath_devdata *dd = dev_get_drvdata(dev);
621 int idx, count;
622
623 /* force consistency with actual EEPROM */
624 if (ipath_update_eeprom_log(dd) != 0)
625 return -ENXIO;
626
627 count = 0;
628 for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
629 count += scnprintf(buf + count, PAGE_SIZE - count, "%d%c",
630 dd->ipath_eep_st_errs[idx],
631 idx == (IPATH_EEP_LOG_CNT - 1) ? '\n' : ' ');
632 }
633
634 return count;
635}
599 636
600static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL); 637static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL);
601static DRIVER_ATTR(version, S_IRUGO, show_version, NULL); 638static DRIVER_ATTR(version, S_IRUGO, show_version, NULL);
@@ -625,6 +662,8 @@ static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL);
625static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); 662static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
626static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL); 663static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL);
627static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv); 664static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv);
665static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override);
666static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL);
628 667
629static struct attribute *dev_attributes[] = { 668static struct attribute *dev_attributes[] = {
630 &dev_attr_guid.attr, 669 &dev_attr_guid.attr,
@@ -641,6 +680,8 @@ static struct attribute *dev_attributes[] = {
641 &dev_attr_unit.attr, 680 &dev_attr_unit.attr,
642 &dev_attr_enabled.attr, 681 &dev_attr_enabled.attr,
643 &dev_attr_rx_pol_inv.attr, 682 &dev_attr_rx_pol_inv.attr,
683 &dev_attr_led_override.attr,
684 &dev_attr_logged_errors.attr,
644 NULL 685 NULL
645}; 686};
646 687
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index 1c2b03c2ef5e..8380fbc50d2c 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -58,7 +58,6 @@ static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe,
58 wc->port_num = 0; 58 wc->port_num = 0;
59 ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 0); 59 ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 0);
60 } 60 }
61 wqe = get_swqe_ptr(qp, qp->s_last);
62} 61}
63 62
64/** 63/**
@@ -87,7 +86,7 @@ int ipath_make_uc_req(struct ipath_qp *qp,
87 86
88 /* header size in 32-bit words LRH+BTH = (8+12)/4. */ 87 /* header size in 32-bit words LRH+BTH = (8+12)/4. */
89 hwords = 5; 88 hwords = 5;
90 bth0 = 0; 89 bth0 = 1 << 22; /* Set M bit */
91 90
92 /* Get the next send request. */ 91 /* Get the next send request. */
93 wqe = get_swqe_ptr(qp, qp->s_last); 92 wqe = get_swqe_ptr(qp, qp->s_last);
@@ -97,8 +96,10 @@ int ipath_make_uc_req(struct ipath_qp *qp,
97 * Signal the completion of the last send 96 * Signal the completion of the last send
98 * (if there is one). 97 * (if there is one).
99 */ 98 */
100 if (qp->s_last != qp->s_tail) 99 if (qp->s_last != qp->s_tail) {
101 complete_last_send(qp, wqe, &wc); 100 complete_last_send(qp, wqe, &wc);
101 wqe = get_swqe_ptr(qp, qp->s_last);
102 }
102 103
103 /* Check if send work queue is empty. */ 104 /* Check if send work queue is empty. */
104 if (qp->s_tail == qp->s_head) 105 if (qp->s_tail == qp->s_head)
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index a518f7c8fa83..f9a3338a5fb7 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -176,6 +176,8 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
176 dev->n_pkt_drops++; 176 dev->n_pkt_drops++;
177 goto bail_sge; 177 goto bail_sge;
178 } 178 }
179 /* Make sure entry is read after head index is read. */
180 smp_rmb();
179 wqe = get_rwqe_ptr(rq, tail); 181 wqe = get_rwqe_ptr(rq, tail);
180 if (++tail >= rq->size) 182 if (++tail >= rq->size)
181 tail = 0; 183 tail = 0;
@@ -231,6 +233,8 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
231 233
232 if (len > length) 234 if (len > length)
233 len = length; 235 len = length;
236 if (len > sge->sge_length)
237 len = sge->sge_length;
234 BUG_ON(len == 0); 238 BUG_ON(len == 0);
235 ipath_copy_sge(&rsge, sge->vaddr, len); 239 ipath_copy_sge(&rsge, sge->vaddr, len);
236 sge->vaddr += len; 240 sge->vaddr += len;
diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c
index 8536aeb96af8..27034d38b3dd 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_pages.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index bb70845279b8..65f7181e9cf8 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -164,9 +164,11 @@ void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length)
164 while (length) { 164 while (length) {
165 u32 len = sge->length; 165 u32 len = sge->length;
166 166
167 BUG_ON(len == 0);
168 if (len > length) 167 if (len > length)
169 len = length; 168 len = length;
169 if (len > sge->sge_length)
170 len = sge->sge_length;
171 BUG_ON(len == 0);
170 memcpy(sge->vaddr, data, len); 172 memcpy(sge->vaddr, data, len);
171 sge->vaddr += len; 173 sge->vaddr += len;
172 sge->length -= len; 174 sge->length -= len;
@@ -202,9 +204,11 @@ void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
202 while (length) { 204 while (length) {
203 u32 len = sge->length; 205 u32 len = sge->length;
204 206
205 BUG_ON(len == 0);
206 if (len > length) 207 if (len > length)
207 len = length; 208 len = length;
209 if (len > sge->sge_length)
210 len = sge->sge_length;
211 BUG_ON(len == 0);
208 sge->vaddr += len; 212 sge->vaddr += len;
209 sge->length -= len; 213 sge->length -= len;
210 sge->sge_length -= len; 214 sge->sge_length -= len;
@@ -323,6 +327,8 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
323 wqe->num_sge = wr->num_sge; 327 wqe->num_sge = wr->num_sge;
324 for (i = 0; i < wr->num_sge; i++) 328 for (i = 0; i < wr->num_sge; i++)
325 wqe->sg_list[i] = wr->sg_list[i]; 329 wqe->sg_list[i] = wr->sg_list[i];
330 /* Make sure queue entry is written before the head index. */
331 smp_wmb();
326 wq->head = next; 332 wq->head = next;
327 spin_unlock_irqrestore(&qp->r_rq.lock, flags); 333 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
328 } 334 }
@@ -948,6 +954,7 @@ int ipath_ib_piobufavail(struct ipath_ibdev *dev)
948 qp = list_entry(dev->piowait.next, struct ipath_qp, 954 qp = list_entry(dev->piowait.next, struct ipath_qp,
949 piowait); 955 piowait);
950 list_del_init(&qp->piowait); 956 list_del_init(&qp->piowait);
957 clear_bit(IPATH_S_BUSY, &qp->s_busy);
951 tasklet_hi_schedule(&qp->s_task); 958 tasklet_hi_schedule(&qp->s_task);
952 } 959 }
953 spin_unlock_irqrestore(&dev->pending_lock, flags); 960 spin_unlock_irqrestore(&dev->pending_lock, flags);
@@ -981,6 +988,8 @@ static int ipath_query_device(struct ib_device *ibdev,
981 props->max_ah = ib_ipath_max_ahs; 988 props->max_ah = ib_ipath_max_ahs;
982 props->max_cqe = ib_ipath_max_cqes; 989 props->max_cqe = ib_ipath_max_cqes;
983 props->max_mr = dev->lk_table.max; 990 props->max_mr = dev->lk_table.max;
991 props->max_fmr = dev->lk_table.max;
992 props->max_map_per_fmr = 32767;
984 props->max_pd = ib_ipath_max_pds; 993 props->max_pd = ib_ipath_max_pds;
985 props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC; 994 props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC;
986 props->max_qp_init_rd_atom = 255; 995 props->max_qp_init_rd_atom = 255;
@@ -1051,7 +1060,12 @@ static int ipath_query_port(struct ib_device *ibdev,
1051 props->max_vl_num = 1; /* VLCap = VL0 */ 1060 props->max_vl_num = 1; /* VLCap = VL0 */
1052 props->init_type_reply = 0; 1061 props->init_type_reply = 0;
1053 1062
1054 props->max_mtu = IB_MTU_4096; 1063 /*
1064 * Note: the chips support a maximum MTU of 4096, but the driver
1065 * hasn't implemented this feature yet, so set the maximum value
1066 * to 2048.
1067 */
1068 props->max_mtu = IB_MTU_2048;
1055 switch (dev->dd->ipath_ibmtu) { 1069 switch (dev->dd->ipath_ibmtu) {
1056 case 4096: 1070 case 4096:
1057 mtu = IB_MTU_4096; 1071 mtu = IB_MTU_4096;
@@ -1361,13 +1375,6 @@ static void __verbs_timer(unsigned long arg)
1361{ 1375{
1362 struct ipath_devdata *dd = (struct ipath_devdata *) arg; 1376 struct ipath_devdata *dd = (struct ipath_devdata *) arg;
1363 1377
1364 /*
1365 * If port 0 receive packet interrupts are not available, or
1366 * can be missed, poll the receive queue
1367 */
1368 if (dd->ipath_flags & IPATH_POLL_RX_INTR)
1369 ipath_kreceive(dd);
1370
1371 /* Handle verbs layer timeouts. */ 1378 /* Handle verbs layer timeouts. */
1372 ipath_ib_timer(dd->verbs_dev); 1379 ipath_ib_timer(dd->verbs_dev);
1373 1380
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 088b837ebea8..f3d1f2cee6f8 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -321,6 +321,7 @@ struct ipath_sge_state {
321 */ 321 */
322struct ipath_ack_entry { 322struct ipath_ack_entry {
323 u8 opcode; 323 u8 opcode;
324 u8 sent;
324 u32 psn; 325 u32 psn;
325 union { 326 union {
326 struct ipath_sge_state rdma_sge; 327 struct ipath_sge_state rdma_sge;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
index dd691cfa5079..9e5abf9c309d 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
diff --git a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
index 0095bb70f34e..1d7bd82a1fb1 100644
--- a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
+++ b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * 3 *
4 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
index 04696e62da87..3428acb0868c 100644
--- a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
+++ b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -63,12 +63,29 @@ int ipath_enable_wc(struct ipath_devdata *dd)
63 * of 2 address matching the length (which has to be a power of 2). 63 * of 2 address matching the length (which has to be a power of 2).
64 * For rev1, that means the base address, for rev2, it will be just 64 * For rev1, that means the base address, for rev2, it will be just
65 * the PIO buffers themselves. 65 * the PIO buffers themselves.
66 * For chips with two sets of buffers, the calculations are
67 * somewhat more complicated; we need to sum, and the piobufbase
68 * register has both offsets, 2K in low 32 bits, 4K in high 32 bits.
69 * The buffers are still packed, so a single range covers both.
66 */ 70 */
67 pioaddr = addr + dd->ipath_piobufbase; 71 if (dd->ipath_piobcnt2k && dd->ipath_piobcnt4k) { /* 2 sizes */
68 piolen = (dd->ipath_piobcnt2k + 72 unsigned long pio2kbase, pio4kbase;
69 dd->ipath_piobcnt4k) * 73 pio2kbase = dd->ipath_piobufbase & 0xffffffffUL;
70 ALIGN(dd->ipath_piobcnt2k + 74 pio4kbase = (dd->ipath_piobufbase >> 32) & 0xffffffffUL;
71 dd->ipath_piobcnt4k, dd->ipath_palign); 75 if (pio2kbase < pio4kbase) { /* all, for now */
76 pioaddr = addr + pio2kbase;
77 piolen = pio4kbase - pio2kbase +
78 dd->ipath_piobcnt4k * dd->ipath_4kalign;
79 } else {
80 pioaddr = addr + pio4kbase;
81 piolen = pio2kbase - pio4kbase +
82 dd->ipath_piobcnt2k * dd->ipath_palign;
83 }
84 } else { /* single buffer size (2K, currently) */
85 pioaddr = addr + dd->ipath_piobufbase;
86 piolen = dd->ipath_piobcnt2k * dd->ipath_palign +
87 dd->ipath_piobcnt4k * dd->ipath_4kalign;
88 }
72 89
73 for (bits = 0; !(piolen & (1ULL << bits)); bits++) 90 for (bits = 0; !(piolen & (1ULL << bits)); bits++)
74 /* do nothing */ ; 91 /* do nothing */ ;
diff --git a/drivers/infiniband/hw/mlx4/Kconfig b/drivers/infiniband/hw/mlx4/Kconfig
index b8912cdb9663..4175a4bd0c78 100644
--- a/drivers/infiniband/hw/mlx4/Kconfig
+++ b/drivers/infiniband/hw/mlx4/Kconfig
@@ -1,6 +1,5 @@
1config MLX4_INFINIBAND 1config MLX4_INFINIBAND
2 tristate "Mellanox ConnectX HCA support" 2 tristate "Mellanox ConnectX HCA support"
3 depends on INFINIBAND
4 select MLX4_CORE 3 select MLX4_CORE
5 ---help--- 4 ---help---
6 This driver provides low-level InfiniBand support for 5 This driver provides low-level InfiniBand support for
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index c591616dccde..dde8fe9af47e 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -169,7 +169,7 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
169 props->phys_state = out_mad->data[33] >> 4; 169 props->phys_state = out_mad->data[33] >> 4;
170 props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20)); 170 props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20));
171 props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port]; 171 props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
172 props->max_msg_sz = 0x80000000; 172 props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz;
173 props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port]; 173 props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port];
174 props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); 174 props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
175 props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); 175 props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48));
@@ -523,11 +523,13 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
523 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 523 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
524 (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 524 (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
525 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 525 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
526 (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
526 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 527 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
527 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | 528 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
528 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | 529 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
529 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | 530 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
530 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | 531 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
532 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
531 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); 533 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
532 534
533 ibdev->ib_dev.query_device = mlx4_ib_query_device; 535 ibdev->ib_dev.query_device = mlx4_ib_query_device;
@@ -546,10 +548,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
546 ibdev->ib_dev.destroy_ah = mlx4_ib_destroy_ah; 548 ibdev->ib_dev.destroy_ah = mlx4_ib_destroy_ah;
547 ibdev->ib_dev.create_srq = mlx4_ib_create_srq; 549 ibdev->ib_dev.create_srq = mlx4_ib_create_srq;
548 ibdev->ib_dev.modify_srq = mlx4_ib_modify_srq; 550 ibdev->ib_dev.modify_srq = mlx4_ib_modify_srq;
551 ibdev->ib_dev.query_srq = mlx4_ib_query_srq;
549 ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq; 552 ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq;
550 ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv; 553 ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv;
551 ibdev->ib_dev.create_qp = mlx4_ib_create_qp; 554 ibdev->ib_dev.create_qp = mlx4_ib_create_qp;
552 ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp; 555 ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp;
556 ibdev->ib_dev.query_qp = mlx4_ib_query_qp;
553 ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp; 557 ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp;
554 ibdev->ib_dev.post_send = mlx4_ib_post_send; 558 ibdev->ib_dev.post_send = mlx4_ib_post_send;
555 ibdev->ib_dev.post_recv = mlx4_ib_post_recv; 559 ibdev->ib_dev.post_recv = mlx4_ib_post_recv;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 24ccadd6e4f8..705ff2fa237e 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -35,6 +35,7 @@
35 35
36#include <linux/compiler.h> 36#include <linux/compiler.h>
37#include <linux/list.h> 37#include <linux/list.h>
38#include <linux/mutex.h>
38 39
39#include <rdma/ib_verbs.h> 40#include <rdma/ib_verbs.h>
40#include <rdma/ib_umem.h> 41#include <rdma/ib_umem.h>
@@ -255,6 +256,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
255 struct ib_udata *udata); 256 struct ib_udata *udata);
256int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, 257int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
257 enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); 258 enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
259int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
258int mlx4_ib_destroy_srq(struct ib_srq *srq); 260int mlx4_ib_destroy_srq(struct ib_srq *srq);
259void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index); 261void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index);
260int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, 262int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
@@ -266,6 +268,8 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
266int mlx4_ib_destroy_qp(struct ib_qp *qp); 268int mlx4_ib_destroy_qp(struct ib_qp *qp);
267int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 269int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
268 int attr_mask, struct ib_udata *udata); 270 int attr_mask, struct ib_udata *udata);
271int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
272 struct ib_qp_init_attr *qp_init_attr);
269int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 273int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
270 struct ib_send_wr **bad_wr); 274 struct ib_send_wr **bad_wr);
271int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 275int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 28a08bdd1800..40042184ad58 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1455,3 +1455,140 @@ out:
1455 1455
1456 return err; 1456 return err;
1457} 1457}
1458
1459static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state)
1460{
1461 switch (mlx4_state) {
1462 case MLX4_QP_STATE_RST: return IB_QPS_RESET;
1463 case MLX4_QP_STATE_INIT: return IB_QPS_INIT;
1464 case MLX4_QP_STATE_RTR: return IB_QPS_RTR;
1465 case MLX4_QP_STATE_RTS: return IB_QPS_RTS;
1466 case MLX4_QP_STATE_SQ_DRAINING:
1467 case MLX4_QP_STATE_SQD: return IB_QPS_SQD;
1468 case MLX4_QP_STATE_SQER: return IB_QPS_SQE;
1469 case MLX4_QP_STATE_ERR: return IB_QPS_ERR;
1470 default: return -1;
1471 }
1472}
1473
1474static inline enum ib_mig_state to_ib_mig_state(int mlx4_mig_state)
1475{
1476 switch (mlx4_mig_state) {
1477 case MLX4_QP_PM_ARMED: return IB_MIG_ARMED;
1478 case MLX4_QP_PM_REARM: return IB_MIG_REARM;
1479 case MLX4_QP_PM_MIGRATED: return IB_MIG_MIGRATED;
1480 default: return -1;
1481 }
1482}
1483
1484static int to_ib_qp_access_flags(int mlx4_flags)
1485{
1486 int ib_flags = 0;
1487
1488 if (mlx4_flags & MLX4_QP_BIT_RRE)
1489 ib_flags |= IB_ACCESS_REMOTE_READ;
1490 if (mlx4_flags & MLX4_QP_BIT_RWE)
1491 ib_flags |= IB_ACCESS_REMOTE_WRITE;
1492 if (mlx4_flags & MLX4_QP_BIT_RAE)
1493 ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
1494
1495 return ib_flags;
1496}
1497
1498static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr,
1499 struct mlx4_qp_path *path)
1500{
1501 memset(ib_ah_attr, 0, sizeof *path);
1502 ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1;
1503
1504 if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
1505 return;
1506
1507 ib_ah_attr->dlid = be16_to_cpu(path->rlid);
1508 ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf;
1509 ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f;
1510 ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0;
1511 ib_ah_attr->ah_flags = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
1512 if (ib_ah_attr->ah_flags) {
1513 ib_ah_attr->grh.sgid_index = path->mgid_index;
1514 ib_ah_attr->grh.hop_limit = path->hop_limit;
1515 ib_ah_attr->grh.traffic_class =
1516 (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff;
1517 ib_ah_attr->grh.flow_label =
1518 be32_to_cpu(path->tclass_flowlabel) & 0xffffff;
1519 memcpy(ib_ah_attr->grh.dgid.raw,
1520 path->rgid, sizeof ib_ah_attr->grh.dgid.raw);
1521 }
1522}
1523
1524int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
1525 struct ib_qp_init_attr *qp_init_attr)
1526{
1527 struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
1528 struct mlx4_ib_qp *qp = to_mqp(ibqp);
1529 struct mlx4_qp_context context;
1530 int mlx4_state;
1531 int err;
1532
1533 if (qp->state == IB_QPS_RESET) {
1534 qp_attr->qp_state = IB_QPS_RESET;
1535 goto done;
1536 }
1537
1538 err = mlx4_qp_query(dev->dev, &qp->mqp, &context);
1539 if (err)
1540 return -EINVAL;
1541
1542 mlx4_state = be32_to_cpu(context.flags) >> 28;
1543
1544 qp_attr->qp_state = to_ib_qp_state(mlx4_state);
1545 qp_attr->path_mtu = context.mtu_msgmax >> 5;
1546 qp_attr->path_mig_state =
1547 to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3);
1548 qp_attr->qkey = be32_to_cpu(context.qkey);
1549 qp_attr->rq_psn = be32_to_cpu(context.rnr_nextrecvpsn) & 0xffffff;
1550 qp_attr->sq_psn = be32_to_cpu(context.next_send_psn) & 0xffffff;
1551 qp_attr->dest_qp_num = be32_to_cpu(context.remote_qpn) & 0xffffff;
1552 qp_attr->qp_access_flags =
1553 to_ib_qp_access_flags(be32_to_cpu(context.params2));
1554
1555 if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
1556 to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path);
1557 to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.alt_path);
1558 qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;
1559 qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
1560 }
1561
1562 qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f;
1563 qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1;
1564
1565 /* qp_attr->en_sqd_async_notify is only applicable in modify qp */
1566 qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING;
1567
1568 qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context.params1) >> 21) & 0x7);
1569
1570 qp_attr->max_dest_rd_atomic =
1571 1 << ((be32_to_cpu(context.params2) >> 21) & 0x7);
1572 qp_attr->min_rnr_timer =
1573 (be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f;
1574 qp_attr->timeout = context.pri_path.ackto >> 3;
1575 qp_attr->retry_cnt = (be32_to_cpu(context.params1) >> 16) & 0x7;
1576 qp_attr->rnr_retry = (be32_to_cpu(context.params1) >> 13) & 0x7;
1577 qp_attr->alt_timeout = context.alt_path.ackto >> 3;
1578
1579done:
1580 qp_attr->cur_qp_state = qp_attr->qp_state;
1581 if (!ibqp->uobject) {
1582 qp_attr->cap.max_send_wr = qp->sq.wqe_cnt;
1583 qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt;
1584 qp_attr->cap.max_send_sge = qp->sq.max_gs;
1585 qp_attr->cap.max_recv_sge = qp->rq.max_gs;
1586 qp_attr->cap.max_inline_data = (1 << qp->sq.wqe_shift) -
1587 send_wqe_overhead(qp->ibqp.qp_type) -
1588 sizeof (struct mlx4_wqe_inline_seg);
1589 qp_init_attr->cap = qp_attr->cap;
1590 }
1591
1592 return 0;
1593}
1594
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 12fac1c8989d..408748fb5285 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -240,6 +240,24 @@ int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
240 return 0; 240 return 0;
241} 241}
242 242
243int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
244{
245 struct mlx4_ib_dev *dev = to_mdev(ibsrq->device);
246 struct mlx4_ib_srq *srq = to_msrq(ibsrq);
247 int ret;
248 int limit_watermark;
249
250 ret = mlx4_srq_query(dev->dev, &srq->msrq, &limit_watermark);
251 if (ret)
252 return ret;
253
254 srq_attr->srq_limit = be16_to_cpu(limit_watermark);
255 srq_attr->max_wr = srq->msrq.max - 1;
256 srq_attr->max_sge = srq->msrq.max_gs;
257
258 return 0;
259}
260
243int mlx4_ib_destroy_srq(struct ib_srq *srq) 261int mlx4_ib_destroy_srq(struct ib_srq *srq)
244{ 262{
245 struct mlx4_ib_dev *dev = to_mdev(srq->device); 263 struct mlx4_ib_dev *dev = to_mdev(srq->device);
diff --git a/drivers/infiniband/hw/mthca/Kconfig b/drivers/infiniband/hw/mthca/Kconfig
index 9aa5a4468a75..03efc074967e 100644
--- a/drivers/infiniband/hw/mthca/Kconfig
+++ b/drivers/infiniband/hw/mthca/Kconfig
@@ -1,6 +1,6 @@
1config INFINIBAND_MTHCA 1config INFINIBAND_MTHCA
2 tristate "Mellanox HCA support" 2 tristate "Mellanox HCA support"
3 depends on PCI && INFINIBAND 3 depends on PCI
4 ---help--- 4 ---help---
5 This is a low-level driver for Mellanox InfiniHost host 5 This is a low-level driver for Mellanox InfiniHost host
6 channel adapters (HCAs), including the MT23108 PCI-X HCA 6 channel adapters (HCAs), including the MT23108 PCI-X HCA
diff --git a/drivers/infiniband/hw/mthca/mthca_allocator.c b/drivers/infiniband/hw/mthca/mthca_allocator.c
index f930e55b58fc..a76306709618 100644
--- a/drivers/infiniband/hw/mthca/mthca_allocator.c
+++ b/drivers/infiniband/hw/mthca/mthca_allocator.c
@@ -255,7 +255,7 @@ int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct,
255 dma_list[i] = t; 255 dma_list[i] = t;
256 pci_unmap_addr_set(&buf->page_list[i], mapping, t); 256 pci_unmap_addr_set(&buf->page_list[i], mapping, t);
257 257
258 memset(buf->page_list[i].buf, 0, PAGE_SIZE); 258 clear_page(buf->page_list[i].buf);
259 } 259 }
260 } 260 }
261 261
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 8ec9fa1ff9ea..8592b26dc4e1 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -522,7 +522,7 @@ static int mthca_create_eq(struct mthca_dev *dev,
522 dma_list[i] = t; 522 dma_list[i] = t;
523 pci_unmap_addr_set(&eq->page_list[i], mapping, t); 523 pci_unmap_addr_set(&eq->page_list[i], mapping, t);
524 524
525 memset(eq->page_list[i].buf, 0, PAGE_SIZE); 525 clear_page(eq->page_list[i].buf);
526 } 526 }
527 527
528 for (i = 0; i < eq->nent; ++i) 528 for (i = 0; i < eq->nent; ++i)
diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig
index af78ccc4ce71..1f76bad020f3 100644
--- a/drivers/infiniband/ulp/ipoib/Kconfig
+++ b/drivers/infiniband/ulp/ipoib/Kconfig
@@ -1,6 +1,6 @@
1config INFINIBAND_IPOIB 1config INFINIBAND_IPOIB
2 tristate "IP-over-InfiniBand" 2 tristate "IP-over-InfiniBand"
3 depends on INFINIBAND && NETDEVICES && INET && (IPV6 || IPV6=n) 3 depends on NETDEVICES && INET && (IPV6 || IPV6=n)
4 ---help--- 4 ---help---
5 Support for the IP-over-InfiniBand protocol (IPoIB). This 5 Support for the IP-over-InfiniBand protocol (IPoIB). This
6 transports IP packets over InfiniBand so you can use your IB 6 transports IP packets over InfiniBand so you can use your IB
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index ea74d1eaf004..08b4676a3820 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -281,7 +281,6 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
281 rep.private_data_len = sizeof data; 281 rep.private_data_len = sizeof data;
282 rep.flow_control = 0; 282 rep.flow_control = 0;
283 rep.rnr_retry_count = req->rnr_retry_count; 283 rep.rnr_retry_count = req->rnr_retry_count;
284 rep.target_ack_delay = 20; /* FIXME */
285 rep.srq = 1; 284 rep.srq = 1;
286 rep.qp_num = qp->qp_num; 285 rep.qp_num = qp->qp_num;
287 rep.starting_psn = psn; 286 rep.starting_psn = psn;
@@ -1148,7 +1147,6 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
1148{ 1147{
1149 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 1148 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
1150 cm.skb_task); 1149 cm.skb_task);
1151 struct net_device *dev = priv->dev;
1152 struct sk_buff *skb; 1150 struct sk_buff *skb;
1153 1151
1154 unsigned mtu = priv->mcast_mtu; 1152 unsigned mtu = priv->mcast_mtu;
@@ -1162,7 +1160,7 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
1162 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 1160 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
1163#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 1161#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1164 else if (skb->protocol == htons(ETH_P_IPV6)) 1162 else if (skb->protocol == htons(ETH_P_IPV6))
1165 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); 1163 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, priv->dev);
1166#endif 1164#endif
1167 dev_kfree_skb_any(skb); 1165 dev_kfree_skb_any(skb);
1168 spin_lock_irq(&priv->tx_lock); 1166 spin_lock_irq(&priv->tx_lock);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 8404f05b2b6e..10944888cffd 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -197,6 +197,13 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
197 } 197 }
198 198
199 /* 199 /*
200 * Drop packets that this interface sent, ie multicast packets
201 * that the HCA has replicated.
202 */
203 if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num)
204 goto repost;
205
206 /*
200 * If we can't allocate a new RX buffer, dump 207 * If we can't allocate a new RX buffer, dump
201 * this packet and reuse the old buffer. 208 * this packet and reuse the old buffer.
202 */ 209 */
@@ -213,24 +220,18 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
213 skb_put(skb, wc->byte_len); 220 skb_put(skb, wc->byte_len);
214 skb_pull(skb, IB_GRH_BYTES); 221 skb_pull(skb, IB_GRH_BYTES);
215 222
216 if (wc->slid != priv->local_lid || 223 skb->protocol = ((struct ipoib_header *) skb->data)->proto;
217 wc->src_qp != priv->qp->qp_num) { 224 skb_reset_mac_header(skb);
218 skb->protocol = ((struct ipoib_header *) skb->data)->proto; 225 skb_pull(skb, IPOIB_ENCAP_LEN);
219 skb_reset_mac_header(skb);
220 skb_pull(skb, IPOIB_ENCAP_LEN);
221 226
222 dev->last_rx = jiffies; 227 dev->last_rx = jiffies;
223 ++priv->stats.rx_packets; 228 ++priv->stats.rx_packets;
224 priv->stats.rx_bytes += skb->len; 229 priv->stats.rx_bytes += skb->len;
225 230
226 skb->dev = dev; 231 skb->dev = dev;
227 /* XXX get correct PACKET_ type here */ 232 /* XXX get correct PACKET_ type here */
228 skb->pkt_type = PACKET_HOST; 233 skb->pkt_type = PACKET_HOST;
229 netif_receive_skb(skb); 234 netif_receive_skb(skb);
230 } else {
231 ipoib_dbg_data(priv, "dropping loopback packet\n");
232 dev_kfree_skb_any(skb);
233 }
234 235
235repost: 236repost:
236 if (unlikely(ipoib_ib_post_receive(dev, wr_id))) 237 if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
diff --git a/drivers/infiniband/ulp/iser/Kconfig b/drivers/infiniband/ulp/iser/Kconfig
index aecbb9083f0c..fe604c8d2996 100644
--- a/drivers/infiniband/ulp/iser/Kconfig
+++ b/drivers/infiniband/ulp/iser/Kconfig
@@ -1,6 +1,6 @@
1config INFINIBAND_ISER 1config INFINIBAND_ISER
2 tristate "iSCSI Extensions for RDMA (iSER)" 2 tristate "iSCSI Extensions for RDMA (iSER)"
3 depends on INFINIBAND && SCSI && INET 3 depends on SCSI && INET
4 select SCSI_ISCSI_ATTRS 4 select SCSI_ISCSI_ATTRS
5 ---help--- 5 ---help---
6 Support for the iSCSI Extensions for RDMA (iSER) Protocol 6 Support for the iSCSI Extensions for RDMA (iSER) Protocol
diff --git a/drivers/infiniband/ulp/srp/Kconfig b/drivers/infiniband/ulp/srp/Kconfig
index 8fe3be4e9910..3432dce29520 100644
--- a/drivers/infiniband/ulp/srp/Kconfig
+++ b/drivers/infiniband/ulp/srp/Kconfig
@@ -1,6 +1,6 @@
1config INFINIBAND_SRP 1config INFINIBAND_SRP
2 tristate "InfiniBand SCSI RDMA Protocol" 2 tristate "InfiniBand SCSI RDMA Protocol"
3 depends on INFINIBAND && SCSI 3 depends on SCSI
4 ---help--- 4 ---help---
5 Support for the SCSI RDMA Protocol over InfiniBand. This 5 Support for the SCSI RDMA Protocol over InfiniBand. This
6 allows you to access storage devices that speak SRP over 6 allows you to access storage devices that speak SRP over