aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/infiniband/user_mad.txt8
-rw-r--r--drivers/Makefile1
-rw-r--r--drivers/infiniband/core/mad.c34
-rw-r--r--drivers/infiniband/core/sa_query.c24
-rw-r--r--drivers/infiniband/core/smi.c86
-rw-r--r--drivers/infiniband/core/smi.h34
-rw-r--r--drivers/infiniband/core/sysfs.c1
-rw-r--r--drivers/infiniband/core/ucm.c23
-rw-r--r--drivers/infiniband/core/ucma.c22
-rw-r--r--drivers/infiniband/core/user_mad.c20
-rw-r--r--drivers/infiniband/hw/amso1100/c2_provider.c1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_hca.c55
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c1
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.c24
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.h4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_common.h23
-rw-r--r--drivers/infiniband/hw/ipath/ipath_cq.c38
-rw-r--r--drivers/infiniband/hw/ipath/ipath_debug.h1
-rw-r--r--drivers/infiniband/hw/ipath/ipath_diag.c11
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c123
-rw-r--r--drivers/infiniband/hw/ipath/ipath_eeprom.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c287
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6110.c152
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6120.c73
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c86
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c100
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h10
-rw-r--r--drivers/infiniband/hw/ipath/ipath_keys.c14
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mr.c12
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c133
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c920
-rw-r--r--drivers/infiniband/hw/ipath/ipath_registers.h22
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c63
-rw-r--r--drivers/infiniband/hw/ipath/ipath_stats.c16
-rw-r--r--drivers/infiniband/hw/ipath/ipath_uc.c6
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c8
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c15
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h57
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c10
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c6
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c7
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c64
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c8
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c12
47 files changed, 1620 insertions, 1002 deletions
diff --git a/Documentation/infiniband/user_mad.txt b/Documentation/infiniband/user_mad.txt
index 750fe5e80ebc..8ec54b974b67 100644
--- a/Documentation/infiniband/user_mad.txt
+++ b/Documentation/infiniband/user_mad.txt
@@ -91,6 +91,14 @@ Sending MADs
91 if (ret != sizeof *mad + mad_length) 91 if (ret != sizeof *mad + mad_length)
92 perror("write"); 92 perror("write");
93 93
94Transaction IDs
95
96 Users of the umad devices can use the lower 32 bits of the
97 transaction ID field (that is, the least significant half of the
98 field in network byte order) in MADs being sent to match
99 request/response pairs. The upper 32 bits are reserved for use by
100 the kernel and will be overwritten before a MAD is sent.
101
94Setting IsSM Capability Bit 102Setting IsSM Capability Bit
95 103
96 To set the IsSM capability bit for a port, simply open the 104 To set the IsSM capability bit for a port, simply open the
diff --git a/drivers/Makefile b/drivers/Makefile
index 3a718f51350e..920c975bb6d4 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -72,7 +72,6 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq/
72obj-$(CONFIG_MMC) += mmc/ 72obj-$(CONFIG_MMC) += mmc/
73obj-$(CONFIG_NEW_LEDS) += leds/ 73obj-$(CONFIG_NEW_LEDS) += leds/
74obj-$(CONFIG_INFINIBAND) += infiniband/ 74obj-$(CONFIG_INFINIBAND) += infiniband/
75obj-$(CONFIG_IPATH_CORE) += infiniband/
76obj-$(CONFIG_SGI_SN) += sn/ 75obj-$(CONFIG_SGI_SN) += sn/
77obj-y += firmware/ 76obj-y += firmware/
78obj-$(CONFIG_CRYPTO) += crypto/ 77obj-$(CONFIG_CRYPTO) += crypto/
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 13efd4170349..6edfecf1be72 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. 2 * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2005 Intel Corporation. All rights reserved. 3 * Copyright (c) 2005 Intel Corporation. All rights reserved.
4 * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. 4 * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.
5 * 5 *
@@ -31,7 +31,6 @@
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 * 33 *
34 * $Id: mad.c 5596 2006-03-03 01:00:07Z sean.hefty $
35 */ 34 */
36#include <linux/dma-mapping.h> 35#include <linux/dma-mapping.h>
37#include <rdma/ib_cache.h> 36#include <rdma/ib_cache.h>
@@ -668,7 +667,7 @@ static void build_smp_wc(struct ib_qp *qp,
668static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, 667static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
669 struct ib_mad_send_wr_private *mad_send_wr) 668 struct ib_mad_send_wr_private *mad_send_wr)
670{ 669{
671 int ret; 670 int ret = 0;
672 struct ib_smp *smp = mad_send_wr->send_buf.mad; 671 struct ib_smp *smp = mad_send_wr->send_buf.mad;
673 unsigned long flags; 672 unsigned long flags;
674 struct ib_mad_local_private *local; 673 struct ib_mad_local_private *local;
@@ -688,14 +687,15 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
688 */ 687 */
689 if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) == 688 if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) ==
690 IB_LID_PERMISSIVE && 689 IB_LID_PERMISSIVE &&
691 !smi_handle_dr_smp_send(smp, device->node_type, port_num)) { 690 smi_handle_dr_smp_send(smp, device->node_type, port_num) ==
691 IB_SMI_DISCARD) {
692 ret = -EINVAL; 692 ret = -EINVAL;
693 printk(KERN_ERR PFX "Invalid directed route\n"); 693 printk(KERN_ERR PFX "Invalid directed route\n");
694 goto out; 694 goto out;
695 } 695 }
696
696 /* Check to post send on QP or process locally */ 697 /* Check to post send on QP or process locally */
697 ret = smi_check_local_smp(smp, device); 698 if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD)
698 if (!ret)
699 goto out; 699 goto out;
700 700
701 local = kmalloc(sizeof *local, GFP_ATOMIC); 701 local = kmalloc(sizeof *local, GFP_ATOMIC);
@@ -1874,18 +1874,22 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
1874 1874
1875 if (recv->mad.mad.mad_hdr.mgmt_class == 1875 if (recv->mad.mad.mad_hdr.mgmt_class ==
1876 IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { 1876 IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
1877 if (!smi_handle_dr_smp_recv(&recv->mad.smp, 1877 if (smi_handle_dr_smp_recv(&recv->mad.smp,
1878 port_priv->device->node_type, 1878 port_priv->device->node_type,
1879 port_priv->port_num, 1879 port_priv->port_num,
1880 port_priv->device->phys_port_cnt)) 1880 port_priv->device->phys_port_cnt) ==
1881 IB_SMI_DISCARD)
1881 goto out; 1882 goto out;
1882 if (!smi_check_forward_dr_smp(&recv->mad.smp)) 1883
1884 if (smi_check_forward_dr_smp(&recv->mad.smp) == IB_SMI_LOCAL)
1883 goto local; 1885 goto local;
1884 if (!smi_handle_dr_smp_send(&recv->mad.smp, 1886
1885 port_priv->device->node_type, 1887 if (smi_handle_dr_smp_send(&recv->mad.smp,
1886 port_priv->port_num)) 1888 port_priv->device->node_type,
1889 port_priv->port_num) == IB_SMI_DISCARD)
1887 goto out; 1890 goto out;
1888 if (!smi_check_local_smp(&recv->mad.smp, port_priv->device)) 1891
1892 if (smi_check_local_smp(&recv->mad.smp, port_priv->device) == IB_SMI_DISCARD)
1889 goto out; 1893 goto out;
1890 } 1894 }
1891 1895
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 68db633711c5..9a7eaadb1688 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -57,6 +57,7 @@ MODULE_LICENSE("Dual BSD/GPL");
57struct ib_sa_sm_ah { 57struct ib_sa_sm_ah {
58 struct ib_ah *ah; 58 struct ib_ah *ah;
59 struct kref ref; 59 struct kref ref;
60 u8 src_path_mask;
60}; 61};
61 62
62struct ib_sa_port { 63struct ib_sa_port {
@@ -380,6 +381,7 @@ static void update_sm_ah(struct work_struct *work)
380 } 381 }
381 382
382 kref_init(&new_ah->ref); 383 kref_init(&new_ah->ref);
384 new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
383 385
384 memset(&ah_attr, 0, sizeof ah_attr); 386 memset(&ah_attr, 0, sizeof ah_attr);
385 ah_attr.dlid = port_attr.sm_lid; 387 ah_attr.dlid = port_attr.sm_lid;
@@ -460,6 +462,25 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query)
460} 462}
461EXPORT_SYMBOL(ib_sa_cancel_query); 463EXPORT_SYMBOL(ib_sa_cancel_query);
462 464
465static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
466{
467 struct ib_sa_device *sa_dev;
468 struct ib_sa_port *port;
469 unsigned long flags;
470 u8 src_path_mask;
471
472 sa_dev = ib_get_client_data(device, &sa_client);
473 if (!sa_dev)
474 return 0x7f;
475
476 port = &sa_dev->port[port_num - sa_dev->start_port];
477 spin_lock_irqsave(&port->ah_lock, flags);
478 src_path_mask = port->sm_ah ? port->sm_ah->src_path_mask : 0x7f;
479 spin_unlock_irqrestore(&port->ah_lock, flags);
480
481 return src_path_mask;
482}
483
463int ib_init_ah_from_path(struct ib_device *device, u8 port_num, 484int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
464 struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr) 485 struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr)
465{ 486{
@@ -469,7 +490,8 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
469 memset(ah_attr, 0, sizeof *ah_attr); 490 memset(ah_attr, 0, sizeof *ah_attr);
470 ah_attr->dlid = be16_to_cpu(rec->dlid); 491 ah_attr->dlid = be16_to_cpu(rec->dlid);
471 ah_attr->sl = rec->sl; 492 ah_attr->sl = rec->sl;
472 ah_attr->src_path_bits = be16_to_cpu(rec->slid) & 0x7f; 493 ah_attr->src_path_bits = be16_to_cpu(rec->slid) &
494 get_src_path_mask(device, port_num);
473 ah_attr->port_num = port_num; 495 ah_attr->port_num = port_num;
474 ah_attr->static_rate = rec->rate; 496 ah_attr->static_rate = rec->rate;
475 497
diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c
index 54b81e17ad50..2bca753eb622 100644
--- a/drivers/infiniband/core/smi.c
+++ b/drivers/infiniband/core/smi.c
@@ -3,7 +3,7 @@
3 * Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved. 3 * Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved.
4 * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. 4 * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved.
5 * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved. 5 * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved.
6 * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. 6 * Copyright (c) 2004-2007 Voltaire Corporation. All rights reserved.
7 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 7 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
8 * 8 *
9 * This software is available to you under a choice of one of two 9 * This software is available to you under a choice of one of two
@@ -34,7 +34,6 @@
34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 * SOFTWARE. 35 * SOFTWARE.
36 * 36 *
37 * $Id: smi.c 1389 2004-12-27 22:56:47Z roland $
38 */ 37 */
39 38
40#include <rdma/ib_smi.h> 39#include <rdma/ib_smi.h>
@@ -44,9 +43,8 @@
44 * Fixup a directed route SMP for sending 43 * Fixup a directed route SMP for sending
45 * Return 0 if the SMP should be discarded 44 * Return 0 if the SMP should be discarded
46 */ 45 */
47int smi_handle_dr_smp_send(struct ib_smp *smp, 46enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
48 u8 node_type, 47 u8 node_type, int port_num)
49 int port_num)
50{ 48{
51 u8 hop_ptr, hop_cnt; 49 u8 hop_ptr, hop_cnt;
52 50
@@ -59,18 +57,18 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
59 if (hop_cnt && hop_ptr == 0) { 57 if (hop_cnt && hop_ptr == 0) {
60 smp->hop_ptr++; 58 smp->hop_ptr++;
61 return (smp->initial_path[smp->hop_ptr] == 59 return (smp->initial_path[smp->hop_ptr] ==
62 port_num); 60 port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
63 } 61 }
64 62
65 /* C14-9:2 */ 63 /* C14-9:2 */
66 if (hop_ptr && hop_ptr < hop_cnt) { 64 if (hop_ptr && hop_ptr < hop_cnt) {
67 if (node_type != RDMA_NODE_IB_SWITCH) 65 if (node_type != RDMA_NODE_IB_SWITCH)
68 return 0; 66 return IB_SMI_DISCARD;
69 67
70 /* smp->return_path set when received */ 68 /* smp->return_path set when received */
71 smp->hop_ptr++; 69 smp->hop_ptr++;
72 return (smp->initial_path[smp->hop_ptr] == 70 return (smp->initial_path[smp->hop_ptr] ==
73 port_num); 71 port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
74 } 72 }
75 73
76 /* C14-9:3 -- We're at the end of the DR segment of path */ 74 /* C14-9:3 -- We're at the end of the DR segment of path */
@@ -78,29 +76,30 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
78 /* smp->return_path set when received */ 76 /* smp->return_path set when received */
79 smp->hop_ptr++; 77 smp->hop_ptr++;
80 return (node_type == RDMA_NODE_IB_SWITCH || 78 return (node_type == RDMA_NODE_IB_SWITCH ||
81 smp->dr_dlid == IB_LID_PERMISSIVE); 79 smp->dr_dlid == IB_LID_PERMISSIVE ?
80 IB_SMI_HANDLE : IB_SMI_DISCARD);
82 } 81 }
83 82
84 /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */ 83 /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */
85 /* C14-9:5 -- Fail unreasonable hop pointer */ 84 /* C14-9:5 -- Fail unreasonable hop pointer */
86 return (hop_ptr == hop_cnt + 1); 85 return (hop_ptr == hop_cnt + 1 ? IB_SMI_HANDLE : IB_SMI_DISCARD);
87 86
88 } else { 87 } else {
89 /* C14-13:1 */ 88 /* C14-13:1 */
90 if (hop_cnt && hop_ptr == hop_cnt + 1) { 89 if (hop_cnt && hop_ptr == hop_cnt + 1) {
91 smp->hop_ptr--; 90 smp->hop_ptr--;
92 return (smp->return_path[smp->hop_ptr] == 91 return (smp->return_path[smp->hop_ptr] ==
93 port_num); 92 port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
94 } 93 }
95 94
96 /* C14-13:2 */ 95 /* C14-13:2 */
97 if (2 <= hop_ptr && hop_ptr <= hop_cnt) { 96 if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
98 if (node_type != RDMA_NODE_IB_SWITCH) 97 if (node_type != RDMA_NODE_IB_SWITCH)
99 return 0; 98 return IB_SMI_DISCARD;
100 99
101 smp->hop_ptr--; 100 smp->hop_ptr--;
102 return (smp->return_path[smp->hop_ptr] == 101 return (smp->return_path[smp->hop_ptr] ==
103 port_num); 102 port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
104 } 103 }
105 104
106 /* C14-13:3 -- at the end of the DR segment of path */ 105 /* C14-13:3 -- at the end of the DR segment of path */
@@ -108,15 +107,16 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
108 smp->hop_ptr--; 107 smp->hop_ptr--;
109 /* C14-13:3 -- SMPs destined for SM shouldn't be here */ 108 /* C14-13:3 -- SMPs destined for SM shouldn't be here */
110 return (node_type == RDMA_NODE_IB_SWITCH || 109 return (node_type == RDMA_NODE_IB_SWITCH ||
111 smp->dr_slid == IB_LID_PERMISSIVE); 110 smp->dr_slid == IB_LID_PERMISSIVE ?
111 IB_SMI_HANDLE : IB_SMI_DISCARD);
112 } 112 }
113 113
114 /* C14-13:4 -- hop_ptr = 0 -> should have gone to SM */ 114 /* C14-13:4 -- hop_ptr = 0 -> should have gone to SM */
115 if (hop_ptr == 0) 115 if (hop_ptr == 0)
116 return 1; 116 return IB_SMI_HANDLE;
117 117
118 /* C14-13:5 -- Check for unreasonable hop pointer */ 118 /* C14-13:5 -- Check for unreasonable hop pointer */
119 return 0; 119 return IB_SMI_DISCARD;
120 } 120 }
121} 121}
122 122
@@ -124,10 +124,8 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
124 * Adjust information for a received SMP 124 * Adjust information for a received SMP
125 * Return 0 if the SMP should be dropped 125 * Return 0 if the SMP should be dropped
126 */ 126 */
127int smi_handle_dr_smp_recv(struct ib_smp *smp, 127enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type,
128 u8 node_type, 128 int port_num, int phys_port_cnt)
129 int port_num,
130 int phys_port_cnt)
131{ 129{
132 u8 hop_ptr, hop_cnt; 130 u8 hop_ptr, hop_cnt;
133 131
@@ -138,16 +136,17 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
138 if (!ib_get_smp_direction(smp)) { 136 if (!ib_get_smp_direction(smp)) {
139 /* C14-9:1 -- sender should have incremented hop_ptr */ 137 /* C14-9:1 -- sender should have incremented hop_ptr */
140 if (hop_cnt && hop_ptr == 0) 138 if (hop_cnt && hop_ptr == 0)
141 return 0; 139 return IB_SMI_DISCARD;
142 140
143 /* C14-9:2 -- intermediate hop */ 141 /* C14-9:2 -- intermediate hop */
144 if (hop_ptr && hop_ptr < hop_cnt) { 142 if (hop_ptr && hop_ptr < hop_cnt) {
145 if (node_type != RDMA_NODE_IB_SWITCH) 143 if (node_type != RDMA_NODE_IB_SWITCH)
146 return 0; 144 return IB_SMI_DISCARD;
147 145
148 smp->return_path[hop_ptr] = port_num; 146 smp->return_path[hop_ptr] = port_num;
149 /* smp->hop_ptr updated when sending */ 147 /* smp->hop_ptr updated when sending */
150 return (smp->initial_path[hop_ptr+1] <= phys_port_cnt); 148 return (smp->initial_path[hop_ptr+1] <= phys_port_cnt ?
149 IB_SMI_HANDLE : IB_SMI_DISCARD);
151 } 150 }
152 151
153 /* C14-9:3 -- We're at the end of the DR segment of path */ 152 /* C14-9:3 -- We're at the end of the DR segment of path */
@@ -157,12 +156,13 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
157 /* smp->hop_ptr updated when sending */ 156 /* smp->hop_ptr updated when sending */
158 157
159 return (node_type == RDMA_NODE_IB_SWITCH || 158 return (node_type == RDMA_NODE_IB_SWITCH ||
160 smp->dr_dlid == IB_LID_PERMISSIVE); 159 smp->dr_dlid == IB_LID_PERMISSIVE ?
160 IB_SMI_HANDLE : IB_SMI_DISCARD);
161 } 161 }
162 162
163 /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */ 163 /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */
164 /* C14-9:5 -- fail unreasonable hop pointer */ 164 /* C14-9:5 -- fail unreasonable hop pointer */
165 return (hop_ptr == hop_cnt + 1); 165 return (hop_ptr == hop_cnt + 1 ? IB_SMI_HANDLE : IB_SMI_DISCARD);
166 166
167 } else { 167 } else {
168 168
@@ -170,16 +170,17 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
170 if (hop_cnt && hop_ptr == hop_cnt + 1) { 170 if (hop_cnt && hop_ptr == hop_cnt + 1) {
171 smp->hop_ptr--; 171 smp->hop_ptr--;
172 return (smp->return_path[smp->hop_ptr] == 172 return (smp->return_path[smp->hop_ptr] ==
173 port_num); 173 port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
174 } 174 }
175 175
176 /* C14-13:2 */ 176 /* C14-13:2 */
177 if (2 <= hop_ptr && hop_ptr <= hop_cnt) { 177 if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
178 if (node_type != RDMA_NODE_IB_SWITCH) 178 if (node_type != RDMA_NODE_IB_SWITCH)
179 return 0; 179 return IB_SMI_DISCARD;
180 180
181 /* smp->hop_ptr updated when sending */ 181 /* smp->hop_ptr updated when sending */
182 return (smp->return_path[hop_ptr-1] <= phys_port_cnt); 182 return (smp->return_path[hop_ptr-1] <= phys_port_cnt ?
183 IB_SMI_HANDLE : IB_SMI_DISCARD);
183 } 184 }
184 185
185 /* C14-13:3 -- We're at the end of the DR segment of path */ 186 /* C14-13:3 -- We're at the end of the DR segment of path */
@@ -187,23 +188,20 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
187 if (smp->dr_slid == IB_LID_PERMISSIVE) { 188 if (smp->dr_slid == IB_LID_PERMISSIVE) {
188 /* giving SMP to SM - update hop_ptr */ 189 /* giving SMP to SM - update hop_ptr */
189 smp->hop_ptr--; 190 smp->hop_ptr--;
190 return 1; 191 return IB_SMI_HANDLE;
191 } 192 }
192 /* smp->hop_ptr updated when sending */ 193 /* smp->hop_ptr updated when sending */
193 return (node_type == RDMA_NODE_IB_SWITCH); 194 return (node_type == RDMA_NODE_IB_SWITCH ?
195 IB_SMI_HANDLE: IB_SMI_DISCARD);
194 } 196 }
195 197
196 /* C14-13:4 -- hop_ptr = 0 -> give to SM */ 198 /* C14-13:4 -- hop_ptr = 0 -> give to SM */
197 /* C14-13:5 -- Check for unreasonable hop pointer */ 199 /* C14-13:5 -- Check for unreasonable hop pointer */
198 return (hop_ptr == 0); 200 return (hop_ptr == 0 ? IB_SMI_HANDLE : IB_SMI_DISCARD);
199 } 201 }
200} 202}
201 203
202/* 204enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp)
203 * Return 1 if the received DR SMP should be forwarded to the send queue
204 * Return 0 if the SMP should be completed up the stack
205 */
206int smi_check_forward_dr_smp(struct ib_smp *smp)
207{ 205{
208 u8 hop_ptr, hop_cnt; 206 u8 hop_ptr, hop_cnt;
209 207
@@ -213,23 +211,25 @@ int smi_check_forward_dr_smp(struct ib_smp *smp)
213 if (!ib_get_smp_direction(smp)) { 211 if (!ib_get_smp_direction(smp)) {
214 /* C14-9:2 -- intermediate hop */ 212 /* C14-9:2 -- intermediate hop */
215 if (hop_ptr && hop_ptr < hop_cnt) 213 if (hop_ptr && hop_ptr < hop_cnt)
216 return 1; 214 return IB_SMI_SEND;
217 215
218 /* C14-9:3 -- at the end of the DR segment of path */ 216 /* C14-9:3 -- at the end of the DR segment of path */
219 if (hop_ptr == hop_cnt) 217 if (hop_ptr == hop_cnt)
220 return (smp->dr_dlid == IB_LID_PERMISSIVE); 218 return (smp->dr_dlid == IB_LID_PERMISSIVE ?
219 IB_SMI_SEND : IB_SMI_LOCAL);
221 220
222 /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */ 221 /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */
223 if (hop_ptr == hop_cnt + 1) 222 if (hop_ptr == hop_cnt + 1)
224 return 1; 223 return IB_SMI_SEND;
225 } else { 224 } else {
226 /* C14-13:2 */ 225 /* C14-13:2 -- intermediate hop */
227 if (2 <= hop_ptr && hop_ptr <= hop_cnt) 226 if (2 <= hop_ptr && hop_ptr <= hop_cnt)
228 return 1; 227 return IB_SMI_SEND;
229 228
230 /* C14-13:3 -- at the end of the DR segment of path */ 229 /* C14-13:3 -- at the end of the DR segment of path */
231 if (hop_ptr == 1) 230 if (hop_ptr == 1)
232 return (smp->dr_slid != IB_LID_PERMISSIVE); 231 return (smp->dr_slid != IB_LID_PERMISSIVE ?
232 IB_SMI_SEND : IB_SMI_LOCAL);
233 } 233 }
234 return 0; 234 return IB_SMI_LOCAL;
235} 235}
diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h
index 3011bfd86dc5..9a4b349efc30 100644
--- a/drivers/infiniband/core/smi.h
+++ b/drivers/infiniband/core/smi.h
@@ -3,7 +3,7 @@
3 * Copyright (c) 2004 Infinicon Corporation. All rights reserved. 3 * Copyright (c) 2004 Infinicon Corporation. All rights reserved.
4 * Copyright (c) 2004 Intel Corporation. All rights reserved. 4 * Copyright (c) 2004 Intel Corporation. All rights reserved.
5 * Copyright (c) 2004 Topspin Corporation. All rights reserved. 5 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
6 * Copyright (c) 2004 Voltaire Corporation. All rights reserved. 6 * Copyright (c) 2004-2007 Voltaire Corporation. All rights reserved.
7 * 7 *
8 * This software is available to you under a choice of one of two 8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU 9 * licenses. You may choose to be licensed under the terms of the GNU
@@ -33,7 +33,6 @@
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE. 34 * SOFTWARE.
35 * 35 *
36 * $Id: smi.h 1389 2004-12-27 22:56:47Z roland $
37 */ 36 */
38 37
39#ifndef __SMI_H_ 38#ifndef __SMI_H_
@@ -41,26 +40,33 @@
41 40
42#include <rdma/ib_smi.h> 41#include <rdma/ib_smi.h>
43 42
44int smi_handle_dr_smp_recv(struct ib_smp *smp, 43enum smi_action {
45 u8 node_type, 44 IB_SMI_DISCARD,
46 int port_num, 45 IB_SMI_HANDLE
47 int phys_port_cnt); 46};
48extern int smi_check_forward_dr_smp(struct ib_smp *smp); 47
49extern int smi_handle_dr_smp_send(struct ib_smp *smp, 48enum smi_forward_action {
50 u8 node_type, 49 IB_SMI_LOCAL, /* SMP should be completed up the stack */
51 int port_num); 50 IB_SMI_SEND, /* received DR SMP should be forwarded to the send queue */
51};
52
53enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type,
54 int port_num, int phys_port_cnt);
55extern enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp);
56extern enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
57 u8 node_type, int port_num);
52 58
53/* 59/*
54 * Return 1 if the SMP should be handled by the local SMA/SM via process_mad 60 * Return 1 if the SMP should be handled by the local SMA/SM via process_mad
55 */ 61 */
56static inline int smi_check_local_smp(struct ib_smp *smp, 62static inline enum smi_action smi_check_local_smp(struct ib_smp *smp,
57 struct ib_device *device) 63 struct ib_device *device)
58{ 64{
59 /* C14-9:3 -- We're at the end of the DR segment of path */ 65 /* C14-9:3 -- We're at the end of the DR segment of path */
60 /* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */ 66 /* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */
61 return ((device->process_mad && 67 return ((device->process_mad &&
62 !ib_get_smp_direction(smp) && 68 !ib_get_smp_direction(smp) &&
63 (smp->hop_ptr == smp->hop_cnt + 1))); 69 (smp->hop_ptr == smp->hop_cnt + 1)) ?
70 IB_SMI_HANDLE : IB_SMI_DISCARD);
64} 71}
65
66#endif /* __SMI_H_ */ 72#endif /* __SMI_H_ */
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 000c086bf2e9..08c299ebf4a8 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -683,6 +683,7 @@ int ib_device_register_sysfs(struct ib_device *device)
683 683
684 class_dev->class = &ib_class; 684 class_dev->class = &ib_class;
685 class_dev->class_data = device; 685 class_dev->class_data = device;
686 class_dev->dev = device->dma_device;
686 strlcpy(class_dev->class_id, device->name, BUS_ID_SIZE); 687 strlcpy(class_dev->class_id, device->name, BUS_ID_SIZE);
687 688
688 INIT_LIST_HEAD(&device->port_list); 689 INIT_LIST_HEAD(&device->port_list);
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index ee51d79a7ad5..2586a3ee8eba 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -407,29 +407,18 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
407 407
408 mutex_lock(&file->file_mutex); 408 mutex_lock(&file->file_mutex);
409 while (list_empty(&file->events)) { 409 while (list_empty(&file->events)) {
410 mutex_unlock(&file->file_mutex);
410 411
411 if (file->filp->f_flags & O_NONBLOCK) { 412 if (file->filp->f_flags & O_NONBLOCK)
412 result = -EAGAIN; 413 return -EAGAIN;
413 break;
414 }
415 414
416 if (signal_pending(current)) { 415 if (wait_event_interruptible(file->poll_wait,
417 result = -ERESTARTSYS; 416 !list_empty(&file->events)))
418 break; 417 return -ERESTARTSYS;
419 }
420 418
421 prepare_to_wait(&file->poll_wait, &wait, TASK_INTERRUPTIBLE);
422
423 mutex_unlock(&file->file_mutex);
424 schedule();
425 mutex_lock(&file->file_mutex); 419 mutex_lock(&file->file_mutex);
426
427 finish_wait(&file->poll_wait, &wait);
428 } 420 }
429 421
430 if (result)
431 goto done;
432
433 uevent = list_entry(file->events.next, struct ib_ucm_event, file_list); 422 uevent = list_entry(file->events.next, struct ib_ucm_event, file_list);
434 423
435 if (ib_ucm_new_cm_id(uevent->resp.event)) { 424 if (ib_ucm_new_cm_id(uevent->resp.event)) {
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index c859134c1daa..53b4c94a7eb5 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -306,26 +306,18 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
306 306
307 mutex_lock(&file->mut); 307 mutex_lock(&file->mut);
308 while (list_empty(&file->event_list)) { 308 while (list_empty(&file->event_list)) {
309 if (file->filp->f_flags & O_NONBLOCK) { 309 mutex_unlock(&file->mut);
310 ret = -EAGAIN;
311 break;
312 }
313 310
314 if (signal_pending(current)) { 311 if (file->filp->f_flags & O_NONBLOCK)
315 ret = -ERESTARTSYS; 312 return -EAGAIN;
316 break; 313
317 } 314 if (wait_event_interruptible(file->poll_wait,
315 !list_empty(&file->event_list)))
316 return -ERESTARTSYS;
318 317
319 prepare_to_wait(&file->poll_wait, &wait, TASK_INTERRUPTIBLE);
320 mutex_unlock(&file->mut);
321 schedule();
322 mutex_lock(&file->mut); 318 mutex_lock(&file->mut);
323 finish_wait(&file->poll_wait, &wait);
324 } 319 }
325 320
326 if (ret)
327 goto done;
328
329 uevent = list_entry(file->event_list.next, struct ucma_event, list); 321 uevent = list_entry(file->event_list.next, struct ucma_event, list);
330 322
331 if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) { 323 if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index c069ebeba8e3..8199b83052a9 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -135,7 +135,7 @@ static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
135 135
136static DEFINE_SPINLOCK(port_lock); 136static DEFINE_SPINLOCK(port_lock);
137static struct ib_umad_port *umad_port[IB_UMAD_MAX_PORTS]; 137static struct ib_umad_port *umad_port[IB_UMAD_MAX_PORTS];
138static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS * 2); 138static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);
139 139
140static void ib_umad_add_one(struct ib_device *device); 140static void ib_umad_add_one(struct ib_device *device);
141static void ib_umad_remove_one(struct ib_device *device); 141static void ib_umad_remove_one(struct ib_device *device);
@@ -231,12 +231,17 @@ static void recv_handler(struct ib_mad_agent *agent,
231 packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits; 231 packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits;
232 packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH); 232 packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH);
233 if (packet->mad.hdr.grh_present) { 233 if (packet->mad.hdr.grh_present) {
234 /* XXX parse GRH */ 234 struct ib_ah_attr ah_attr;
235 packet->mad.hdr.gid_index = 0; 235
236 packet->mad.hdr.hop_limit = 0; 236 ib_init_ah_from_wc(agent->device, agent->port_num,
237 packet->mad.hdr.traffic_class = 0; 237 mad_recv_wc->wc, mad_recv_wc->recv_buf.grh,
238 memset(packet->mad.hdr.gid, 0, 16); 238 &ah_attr);
239 packet->mad.hdr.flow_label = 0; 239
240 packet->mad.hdr.gid_index = ah_attr.grh.sgid_index;
241 packet->mad.hdr.hop_limit = ah_attr.grh.hop_limit;
242 packet->mad.hdr.traffic_class = ah_attr.grh.traffic_class;
243 memcpy(packet->mad.hdr.gid, &ah_attr.grh.dgid, 16);
244 packet->mad.hdr.flow_label = cpu_to_be32(ah_attr.grh.flow_label);
240 } 245 }
241 246
242 if (queue_packet(file, agent, packet)) 247 if (queue_packet(file, agent, packet))
@@ -473,6 +478,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
473 if (packet->mad.hdr.grh_present) { 478 if (packet->mad.hdr.grh_present) {
474 ah_attr.ah_flags = IB_AH_GRH; 479 ah_attr.ah_flags = IB_AH_GRH;
475 memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16); 480 memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16);
481 ah_attr.grh.sgid_index = packet->mad.hdr.gid_index;
476 ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label); 482 ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label);
477 ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit; 483 ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit;
478 ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class; 484 ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class;
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index fef972752912..607c09bf764c 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -796,7 +796,6 @@ int c2_register_device(struct c2_dev *dev)
796 memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6); 796 memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6);
797 dev->ibdev.phys_port_cnt = 1; 797 dev->ibdev.phys_port_cnt = 1;
798 dev->ibdev.dma_device = &dev->pcidev->dev; 798 dev->ibdev.dma_device = &dev->pcidev->dev;
799 dev->ibdev.class_dev.dev = &dev->pcidev->dev;
800 dev->ibdev.query_device = c2_query_device; 799 dev->ibdev.query_device = c2_query_device;
801 dev->ibdev.query_port = c2_query_port; 800 dev->ibdev.query_port = c2_query_port;
802 dev->ibdev.modify_port = c2_modify_port; 801 dev->ibdev.modify_port = c2_modify_port;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 24e0df04f7db..af28a317016d 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1108,7 +1108,6 @@ int iwch_register_device(struct iwch_dev *dev)
1108 memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC)); 1108 memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC));
1109 dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports; 1109 dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports;
1110 dev->ibdev.dma_device = &(dev->rdev.rnic_info.pdev->dev); 1110 dev->ibdev.dma_device = &(dev->rdev.rnic_info.pdev->dev);
1111 dev->ibdev.class_dev.dev = &(dev->rdev.rnic_info.pdev->dev);
1112 dev->ibdev.query_device = iwch_query_device; 1111 dev->ibdev.query_device = iwch_query_device;
1113 dev->ibdev.query_port = iwch_query_port; 1112 dev->ibdev.query_port = iwch_query_port;
1114 dev->ibdev.modify_port = iwch_modify_port; 1113 dev->ibdev.modify_port = iwch_modify_port;
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 82ded44c6cee..10fb8fbafa0c 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -106,6 +106,7 @@ struct ehca_shca {
106 struct ehca_mr *maxmr; 106 struct ehca_mr *maxmr;
107 struct ehca_pd *pd; 107 struct ehca_pd *pd;
108 struct h_galpas galpas; 108 struct h_galpas galpas;
109 struct mutex modify_mutex;
109}; 110};
110 111
111struct ehca_pd { 112struct ehca_pd {
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
index 30eb45df9f0b..32b55a4f0e5b 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -147,6 +147,7 @@ int ehca_query_port(struct ib_device *ibdev,
147 break; 147 break;
148 } 148 }
149 149
150 props->port_cap_flags = rblock->capability_mask;
150 props->gid_tbl_len = rblock->gid_tbl_len; 151 props->gid_tbl_len = rblock->gid_tbl_len;
151 props->max_msg_sz = rblock->max_msg_sz; 152 props->max_msg_sz = rblock->max_msg_sz;
152 props->bad_pkey_cntr = rblock->bad_pkey_cntr; 153 props->bad_pkey_cntr = rblock->bad_pkey_cntr;
@@ -236,10 +237,60 @@ query_gid1:
236 return ret; 237 return ret;
237} 238}
238 239
240const u32 allowed_port_caps = (
241 IB_PORT_SM | IB_PORT_LED_INFO_SUP | IB_PORT_CM_SUP |
242 IB_PORT_SNMP_TUNNEL_SUP | IB_PORT_DEVICE_MGMT_SUP |
243 IB_PORT_VENDOR_CLASS_SUP);
244
239int ehca_modify_port(struct ib_device *ibdev, 245int ehca_modify_port(struct ib_device *ibdev,
240 u8 port, int port_modify_mask, 246 u8 port, int port_modify_mask,
241 struct ib_port_modify *props) 247 struct ib_port_modify *props)
242{ 248{
243 /* Not implemented yet */ 249 int ret = 0;
244 return -EFAULT; 250 struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device);
251 struct hipz_query_port *rblock;
252 u32 cap;
253 u64 hret;
254
255 if ((props->set_port_cap_mask | props->clr_port_cap_mask)
256 & ~allowed_port_caps) {
257 ehca_err(&shca->ib_device, "Non-changeable bits set in masks "
258 "set=%x clr=%x allowed=%x", props->set_port_cap_mask,
259 props->clr_port_cap_mask, allowed_port_caps);
260 return -EINVAL;
261 }
262
263 if (mutex_lock_interruptible(&shca->modify_mutex))
264 return -ERESTARTSYS;
265
266 rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
267 if (!rblock) {
268 ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
269 ret = -ENOMEM;
270 goto modify_port1;
271 }
272
273 if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
274 ehca_err(&shca->ib_device, "Can't query port properties");
275 ret = -EINVAL;
276 goto modify_port2;
277 }
278
279 cap = (rblock->capability_mask | props->set_port_cap_mask)
280 & ~props->clr_port_cap_mask;
281
282 hret = hipz_h_modify_port(shca->ipz_hca_handle, port,
283 cap, props->init_type, port_modify_mask);
284 if (hret != H_SUCCESS) {
285 ehca_err(&shca->ib_device, "Modify port failed hret=%lx", hret);
286 ret = -EINVAL;
287 }
288
289modify_port2:
290 ehca_free_fw_ctrlblock(rblock);
291
292modify_port1:
293 mutex_unlock(&shca->modify_mutex);
294
295 return ret;
245} 296}
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 059da9628bb5..3b23d677cb86 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -587,6 +587,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev,
587 ehca_gen_err("Cannot allocate shca memory."); 587 ehca_gen_err("Cannot allocate shca memory.");
588 return -ENOMEM; 588 return -ENOMEM;
589 } 589 }
590 mutex_init(&shca->modify_mutex);
590 591
591 shca->ibmebus_dev = dev; 592 shca->ibmebus_dev = dev;
592 shca->ipz_hca_handle.handle = *handle; 593 shca->ipz_hca_handle.handle = *handle;
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
index 3fb46e67df87..b564fcd3b282 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.c
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -70,6 +70,10 @@
70#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES EHCA_BMASK_IBM(0, 31) 70#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES EHCA_BMASK_IBM(0, 31)
71#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES EHCA_BMASK_IBM(32, 63) 71#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES EHCA_BMASK_IBM(32, 63)
72 72
73#define H_MP_INIT_TYPE EHCA_BMASK_IBM(44, 47)
74#define H_MP_SHUTDOWN EHCA_BMASK_IBM(48, 48)
75#define H_MP_RESET_QKEY_CTR EHCA_BMASK_IBM(49, 49)
76
73/* direct access qp controls */ 77/* direct access qp controls */
74#define DAQP_CTRL_ENABLE 0x01 78#define DAQP_CTRL_ENABLE 0x01
75#define DAQP_CTRL_SEND_COMP 0x20 79#define DAQP_CTRL_SEND_COMP 0x20
@@ -364,6 +368,26 @@ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
364 return ret; 368 return ret;
365} 369}
366 370
371u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
372 const u8 port_id, const u32 port_cap,
373 const u8 init_type, const int modify_mask)
374{
375 u64 port_attributes = port_cap;
376
377 if (modify_mask & IB_PORT_SHUTDOWN)
378 port_attributes |= EHCA_BMASK_SET(H_MP_SHUTDOWN, 1);
379 if (modify_mask & IB_PORT_INIT_TYPE)
380 port_attributes |= EHCA_BMASK_SET(H_MP_INIT_TYPE, init_type);
381 if (modify_mask & IB_PORT_RESET_QKEY_CNTR)
382 port_attributes |= EHCA_BMASK_SET(H_MP_RESET_QKEY_CTR, 1);
383
384 return ehca_plpar_hcall_norets(H_MODIFY_PORT,
385 adapter_handle.handle, /* r4 */
386 port_id, /* r5 */
387 port_attributes, /* r6 */
388 0, 0, 0, 0);
389}
390
367u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, 391u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
368 struct hipz_query_hca *query_hca_rblock) 392 struct hipz_query_hca *query_hca_rblock)
369{ 393{
diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h
index 587ebd470959..2869f7dd6196 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.h
+++ b/drivers/infiniband/hw/ehca/hcp_if.h
@@ -85,6 +85,10 @@ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
85 const u8 port_id, 85 const u8 port_id,
86 struct hipz_query_port *query_port_response_block); 86 struct hipz_query_port *query_port_response_block);
87 87
88u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
89 const u8 port_id, const u32 port_cap,
90 const u8 init_type, const int modify_mask);
91
88u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, 92u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
89 struct hipz_query_hca *query_hca_rblock); 93 struct hipz_query_hca *query_hca_rblock);
90 94
diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
index 54139d398181..10c008f22ba6 100644
--- a/drivers/infiniband/hw/ipath/ipath_common.h
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -78,6 +78,8 @@
78#define IPATH_IB_LINKINIT 3 78#define IPATH_IB_LINKINIT 3
79#define IPATH_IB_LINKDOWN_SLEEP 4 79#define IPATH_IB_LINKDOWN_SLEEP 4
80#define IPATH_IB_LINKDOWN_DISABLE 5 80#define IPATH_IB_LINKDOWN_DISABLE 5
81#define IPATH_IB_LINK_LOOPBACK 6 /* enable local loopback */
82#define IPATH_IB_LINK_EXTERNAL 7 /* normal, disable local loopback */
81 83
82/* 84/*
83 * stats maintained by the driver. For now, at least, this is global 85 * stats maintained by the driver. For now, at least, this is global
@@ -316,11 +318,17 @@ struct ipath_base_info {
316 /* address of readonly memory copy of the rcvhdrq tail register. */ 318 /* address of readonly memory copy of the rcvhdrq tail register. */
317 __u64 spi_rcvhdr_tailaddr; 319 __u64 spi_rcvhdr_tailaddr;
318 320
319 /* shared memory pages for subports if IPATH_RUNTIME_MASTER is set */ 321 /* shared memory pages for subports if port is shared */
320 __u64 spi_subport_uregbase; 322 __u64 spi_subport_uregbase;
321 __u64 spi_subport_rcvegrbuf; 323 __u64 spi_subport_rcvegrbuf;
322 __u64 spi_subport_rcvhdr_base; 324 __u64 spi_subport_rcvhdr_base;
323 325
326 /* shared memory page for hardware port if it is shared */
327 __u64 spi_port_uregbase;
328 __u64 spi_port_rcvegrbuf;
329 __u64 spi_port_rcvhdr_base;
330 __u64 spi_port_rcvhdr_tailaddr;
331
324} __attribute__ ((aligned(8))); 332} __attribute__ ((aligned(8)));
325 333
326 334
@@ -344,7 +352,7 @@ struct ipath_base_info {
344 * may not be implemented; the user code must deal with this if it 352 * may not be implemented; the user code must deal with this if it
345 * cares, or it must abort after initialization reports the difference. 353 * cares, or it must abort after initialization reports the difference.
346 */ 354 */
347#define IPATH_USER_SWMINOR 3 355#define IPATH_USER_SWMINOR 5
348 356
349#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR) 357#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR)
350 358
@@ -418,11 +426,14 @@ struct ipath_user_info {
418#define IPATH_CMD_TID_UPDATE 19 /* update expected TID entries */ 426#define IPATH_CMD_TID_UPDATE 19 /* update expected TID entries */
419#define IPATH_CMD_TID_FREE 20 /* free expected TID entries */ 427#define IPATH_CMD_TID_FREE 20 /* free expected TID entries */
420#define IPATH_CMD_SET_PART_KEY 21 /* add partition key */ 428#define IPATH_CMD_SET_PART_KEY 21 /* add partition key */
421#define IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes */ 429#define __IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes (for old user code) */
422#define IPATH_CMD_ASSIGN_PORT 23 /* allocate HCA and port */ 430#define IPATH_CMD_ASSIGN_PORT 23 /* allocate HCA and port */
423#define IPATH_CMD_USER_INIT 24 /* set up userspace */ 431#define IPATH_CMD_USER_INIT 24 /* set up userspace */
432#define IPATH_CMD_UNUSED_1 25
433#define IPATH_CMD_UNUSED_2 26
434#define IPATH_CMD_PIOAVAILUPD 27 /* force an update of PIOAvail reg */
424 435
425#define IPATH_CMD_MAX 24 436#define IPATH_CMD_MAX 27
426 437
427struct ipath_port_info { 438struct ipath_port_info {
428 __u32 num_active; /* number of active units */ 439 __u32 num_active; /* number of active units */
@@ -430,7 +441,7 @@ struct ipath_port_info {
430 __u16 port; /* port on unit assigned to caller */ 441 __u16 port; /* port on unit assigned to caller */
431 __u16 subport; /* subport on unit assigned to caller */ 442 __u16 subport; /* subport on unit assigned to caller */
432 __u16 num_ports; /* number of ports available on unit */ 443 __u16 num_ports; /* number of ports available on unit */
433 __u16 num_subports; /* number of subport slaves opened on port */ 444 __u16 num_subports; /* number of subports opened on port */
434}; 445};
435 446
436struct ipath_tid_info { 447struct ipath_tid_info {
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
index 87462e0cb4d2..ea78e6dddc90 100644
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -76,7 +76,20 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
76 } 76 }
77 return; 77 return;
78 } 78 }
79 wc->queue[head] = *entry; 79 wc->queue[head].wr_id = entry->wr_id;
80 wc->queue[head].status = entry->status;
81 wc->queue[head].opcode = entry->opcode;
82 wc->queue[head].vendor_err = entry->vendor_err;
83 wc->queue[head].byte_len = entry->byte_len;
84 wc->queue[head].imm_data = (__u32 __force)entry->imm_data;
85 wc->queue[head].qp_num = entry->qp->qp_num;
86 wc->queue[head].src_qp = entry->src_qp;
87 wc->queue[head].wc_flags = entry->wc_flags;
88 wc->queue[head].pkey_index = entry->pkey_index;
89 wc->queue[head].slid = entry->slid;
90 wc->queue[head].sl = entry->sl;
91 wc->queue[head].dlid_path_bits = entry->dlid_path_bits;
92 wc->queue[head].port_num = entry->port_num;
80 wc->head = next; 93 wc->head = next;
81 94
82 if (cq->notify == IB_CQ_NEXT_COMP || 95 if (cq->notify == IB_CQ_NEXT_COMP ||
@@ -122,9 +135,30 @@ int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
122 if (tail > (u32) cq->ibcq.cqe) 135 if (tail > (u32) cq->ibcq.cqe)
123 tail = (u32) cq->ibcq.cqe; 136 tail = (u32) cq->ibcq.cqe;
124 for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { 137 for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
138 struct ipath_qp *qp;
139
125 if (tail == wc->head) 140 if (tail == wc->head)
126 break; 141 break;
127 *entry = wc->queue[tail]; 142
143 qp = ipath_lookup_qpn(&to_idev(cq->ibcq.device)->qp_table,
144 wc->queue[tail].qp_num);
145 entry->qp = &qp->ibqp;
146 if (atomic_dec_and_test(&qp->refcount))
147 wake_up(&qp->wait);
148
149 entry->wr_id = wc->queue[tail].wr_id;
150 entry->status = wc->queue[tail].status;
151 entry->opcode = wc->queue[tail].opcode;
152 entry->vendor_err = wc->queue[tail].vendor_err;
153 entry->byte_len = wc->queue[tail].byte_len;
154 entry->imm_data = wc->queue[tail].imm_data;
155 entry->src_qp = wc->queue[tail].src_qp;
156 entry->wc_flags = wc->queue[tail].wc_flags;
157 entry->pkey_index = wc->queue[tail].pkey_index;
158 entry->slid = wc->queue[tail].slid;
159 entry->sl = wc->queue[tail].sl;
160 entry->dlid_path_bits = wc->queue[tail].dlid_path_bits;
161 entry->port_num = wc->queue[tail].port_num;
128 if (tail >= cq->ibcq.cqe) 162 if (tail >= cq->ibcq.cqe)
129 tail = 0; 163 tail = 0;
130 else 164 else
diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h
index df69f0d80b8b..42bfbdb0d3e6 100644
--- a/drivers/infiniband/hw/ipath/ipath_debug.h
+++ b/drivers/infiniband/hw/ipath/ipath_debug.h
@@ -57,6 +57,7 @@
57#define __IPATH_PROCDBG 0x100 57#define __IPATH_PROCDBG 0x100
58/* print mmap/nopage stuff, not using VDBG any more */ 58/* print mmap/nopage stuff, not using VDBG any more */
59#define __IPATH_MMDBG 0x200 59#define __IPATH_MMDBG 0x200
60#define __IPATH_ERRPKTDBG 0x400
60#define __IPATH_USER_SEND 0x1000 /* use user mode send */ 61#define __IPATH_USER_SEND 0x1000 /* use user mode send */
61#define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */ 62#define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */
62#define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */ 63#define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index 0f13a2182cc7..63e8368b0e95 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -296,7 +296,7 @@ static int ipath_diag_open(struct inode *in, struct file *fp)
296 } 296 }
297 297
298 fp->private_data = dd; 298 fp->private_data = dd;
299 ipath_diag_inuse = 1; 299 ipath_diag_inuse = -2;
300 diag_set_link = 0; 300 diag_set_link = 0;
301 ret = 0; 301 ret = 0;
302 302
@@ -461,6 +461,8 @@ static ssize_t ipath_diag_read(struct file *fp, char __user *data,
461 else if ((count % 4) || (*off % 4)) 461 else if ((count % 4) || (*off % 4))
462 /* address or length is not 32-bit aligned, hence invalid */ 462 /* address or length is not 32-bit aligned, hence invalid */
463 ret = -EINVAL; 463 ret = -EINVAL;
464 else if (ipath_diag_inuse < 1 && (*off || count != 8))
465 ret = -EINVAL; /* prevent cat /dev/ipath_diag* */
464 else if ((count % 8) || (*off % 8)) 466 else if ((count % 8) || (*off % 8))
465 /* address or length not 64-bit aligned; do 32-bit reads */ 467 /* address or length not 64-bit aligned; do 32-bit reads */
466 ret = ipath_read_umem32(dd, data, kreg_base + *off, count); 468 ret = ipath_read_umem32(dd, data, kreg_base + *off, count);
@@ -470,6 +472,8 @@ static ssize_t ipath_diag_read(struct file *fp, char __user *data,
470 if (ret >= 0) { 472 if (ret >= 0) {
471 *off += count; 473 *off += count;
472 ret = count; 474 ret = count;
475 if (ipath_diag_inuse == -2)
476 ipath_diag_inuse++;
473 } 477 }
474 478
475 return ret; 479 return ret;
@@ -489,6 +493,9 @@ static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
489 else if ((count % 4) || (*off % 4)) 493 else if ((count % 4) || (*off % 4))
490 /* address or length is not 32-bit aligned, hence invalid */ 494 /* address or length is not 32-bit aligned, hence invalid */
491 ret = -EINVAL; 495 ret = -EINVAL;
496 else if ((ipath_diag_inuse == -1 && (*off || count != 8)) ||
497 ipath_diag_inuse == -2) /* read qw off 0, write qw off 0 */
498 ret = -EINVAL; /* before any other write allowed */
492 else if ((count % 8) || (*off % 8)) 499 else if ((count % 8) || (*off % 8))
493 /* address or length not 64-bit aligned; do 32-bit writes */ 500 /* address or length not 64-bit aligned; do 32-bit writes */
494 ret = ipath_write_umem32(dd, kreg_base + *off, data, count); 501 ret = ipath_write_umem32(dd, kreg_base + *off, data, count);
@@ -498,6 +505,8 @@ static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
498 if (ret >= 0) { 505 if (ret >= 0) {
499 *off += count; 506 *off += count;
500 ret = count; 507 ret = count;
508 if (ipath_diag_inuse == -1)
509 ipath_diag_inuse = 1; /* all read/write OK now */
501 } 510 }
502 511
503 return ret; 512 return ret;
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index ae7f21a0cdc0..e3a223209710 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -390,15 +390,23 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
390 390
391 /* setup the chip-specific functions, as early as possible. */ 391 /* setup the chip-specific functions, as early as possible. */
392 switch (ent->device) { 392 switch (ent->device) {
393#ifdef CONFIG_HT_IRQ
394 case PCI_DEVICE_ID_INFINIPATH_HT: 393 case PCI_DEVICE_ID_INFINIPATH_HT:
394#ifdef CONFIG_HT_IRQ
395 ipath_init_iba6110_funcs(dd); 395 ipath_init_iba6110_funcs(dd);
396 break; 396 break;
397#else
398 ipath_dev_err(dd, "QLogic HT device 0x%x cannot work if "
399 "CONFIG_HT_IRQ is not enabled\n", ent->device);
400 return -ENODEV;
397#endif 401#endif
398#ifdef CONFIG_PCI_MSI
399 case PCI_DEVICE_ID_INFINIPATH_PE800: 402 case PCI_DEVICE_ID_INFINIPATH_PE800:
403#ifdef CONFIG_PCI_MSI
400 ipath_init_iba6120_funcs(dd); 404 ipath_init_iba6120_funcs(dd);
401 break; 405 break;
406#else
407 ipath_dev_err(dd, "QLogic PCIE device 0x%x cannot work if "
408 "CONFIG_PCI_MSI is not enabled\n", ent->device);
409 return -ENODEV;
402#endif 410#endif
403 default: 411 default:
404 ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, " 412 ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
@@ -486,7 +494,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
486 494
487 ret = ipath_init_chip(dd, 0); /* do the chip-specific init */ 495 ret = ipath_init_chip(dd, 0); /* do the chip-specific init */
488 if (ret) 496 if (ret)
489 goto bail_iounmap; 497 goto bail_irqsetup;
490 498
491 ret = ipath_enable_wc(dd); 499 ret = ipath_enable_wc(dd);
492 500
@@ -505,6 +513,9 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
505 513
506 goto bail; 514 goto bail;
507 515
516bail_irqsetup:
517 if (pdev->irq) free_irq(pdev->irq, dd);
518
508bail_iounmap: 519bail_iounmap:
509 iounmap((volatile void __iomem *) dd->ipath_kregbase); 520 iounmap((volatile void __iomem *) dd->ipath_kregbase);
510 521
@@ -525,8 +536,6 @@ static void __devexit cleanup_device(struct ipath_devdata *dd)
525{ 536{
526 int port; 537 int port;
527 538
528 ipath_shutdown_device(dd);
529
530 if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) { 539 if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
531 /* can't do anything more with chip; needs re-init */ 540 /* can't do anything more with chip; needs re-init */
532 *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT; 541 *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
@@ -594,8 +603,9 @@ static void __devexit cleanup_device(struct ipath_devdata *dd)
594 603
595 ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n", 604 ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
596 dd->ipath_pageshadow); 605 dd->ipath_pageshadow);
597 vfree(dd->ipath_pageshadow); 606 tmpp = dd->ipath_pageshadow;
598 dd->ipath_pageshadow = NULL; 607 dd->ipath_pageshadow = NULL;
608 vfree(tmpp);
599 } 609 }
600 610
601 /* 611 /*
@@ -622,6 +632,12 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev)
622 632
623 ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd); 633 ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd);
624 634
635 /*
636 * disable the IB link early, to be sure no new packets arrive, which
637 * complicates the shutdown process
638 */
639 ipath_shutdown_device(dd);
640
625 if (dd->verbs_dev) 641 if (dd->verbs_dev)
626 ipath_unregister_ib_device(dd->verbs_dev); 642 ipath_unregister_ib_device(dd->verbs_dev);
627 643
@@ -754,9 +770,42 @@ static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state,
754 return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT; 770 return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;
755} 771}
756 772
757void ipath_decode_err(char *buf, size_t blen, ipath_err_t err) 773/*
774 * Decode the error status into strings, deciding whether to always
775 * print * it or not depending on "normal packet errors" vs everything
776 * else. Return 1 if "real" errors, otherwise 0 if only packet
777 * errors, so caller can decide what to print with the string.
778 */
779int ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
758{ 780{
781 int iserr = 1;
759 *buf = '\0'; 782 *buf = '\0';
783 if (err & INFINIPATH_E_PKTERRS) {
784 if (!(err & ~INFINIPATH_E_PKTERRS))
785 iserr = 0; // if only packet errors.
786 if (ipath_debug & __IPATH_ERRPKTDBG) {
787 if (err & INFINIPATH_E_REBP)
788 strlcat(buf, "EBP ", blen);
789 if (err & INFINIPATH_E_RVCRC)
790 strlcat(buf, "VCRC ", blen);
791 if (err & INFINIPATH_E_RICRC) {
792 strlcat(buf, "CRC ", blen);
793 // clear for check below, so only once
794 err &= INFINIPATH_E_RICRC;
795 }
796 if (err & INFINIPATH_E_RSHORTPKTLEN)
797 strlcat(buf, "rshortpktlen ", blen);
798 if (err & INFINIPATH_E_SDROPPEDDATAPKT)
799 strlcat(buf, "sdroppeddatapkt ", blen);
800 if (err & INFINIPATH_E_SPKTLEN)
801 strlcat(buf, "spktlen ", blen);
802 }
803 if ((err & INFINIPATH_E_RICRC) &&
804 !(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP)))
805 strlcat(buf, "CRC ", blen);
806 if (!iserr)
807 goto done;
808 }
760 if (err & INFINIPATH_E_RHDRLEN) 809 if (err & INFINIPATH_E_RHDRLEN)
761 strlcat(buf, "rhdrlen ", blen); 810 strlcat(buf, "rhdrlen ", blen);
762 if (err & INFINIPATH_E_RBADTID) 811 if (err & INFINIPATH_E_RBADTID)
@@ -767,12 +816,12 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
767 strlcat(buf, "rhdr ", blen); 816 strlcat(buf, "rhdr ", blen);
768 if (err & INFINIPATH_E_RLONGPKTLEN) 817 if (err & INFINIPATH_E_RLONGPKTLEN)
769 strlcat(buf, "rlongpktlen ", blen); 818 strlcat(buf, "rlongpktlen ", blen);
770 if (err & INFINIPATH_E_RSHORTPKTLEN)
771 strlcat(buf, "rshortpktlen ", blen);
772 if (err & INFINIPATH_E_RMAXPKTLEN) 819 if (err & INFINIPATH_E_RMAXPKTLEN)
773 strlcat(buf, "rmaxpktlen ", blen); 820 strlcat(buf, "rmaxpktlen ", blen);
774 if (err & INFINIPATH_E_RMINPKTLEN) 821 if (err & INFINIPATH_E_RMINPKTLEN)
775 strlcat(buf, "rminpktlen ", blen); 822 strlcat(buf, "rminpktlen ", blen);
823 if (err & INFINIPATH_E_SMINPKTLEN)
824 strlcat(buf, "sminpktlen ", blen);
776 if (err & INFINIPATH_E_RFORMATERR) 825 if (err & INFINIPATH_E_RFORMATERR)
777 strlcat(buf, "rformaterr ", blen); 826 strlcat(buf, "rformaterr ", blen);
778 if (err & INFINIPATH_E_RUNSUPVL) 827 if (err & INFINIPATH_E_RUNSUPVL)
@@ -781,32 +830,20 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
781 strlcat(buf, "runexpchar ", blen); 830 strlcat(buf, "runexpchar ", blen);
782 if (err & INFINIPATH_E_RIBFLOW) 831 if (err & INFINIPATH_E_RIBFLOW)
783 strlcat(buf, "ribflow ", blen); 832 strlcat(buf, "ribflow ", blen);
784 if (err & INFINIPATH_E_REBP)
785 strlcat(buf, "EBP ", blen);
786 if (err & INFINIPATH_E_SUNDERRUN) 833 if (err & INFINIPATH_E_SUNDERRUN)
787 strlcat(buf, "sunderrun ", blen); 834 strlcat(buf, "sunderrun ", blen);
788 if (err & INFINIPATH_E_SPIOARMLAUNCH) 835 if (err & INFINIPATH_E_SPIOARMLAUNCH)
789 strlcat(buf, "spioarmlaunch ", blen); 836 strlcat(buf, "spioarmlaunch ", blen);
790 if (err & INFINIPATH_E_SUNEXPERRPKTNUM) 837 if (err & INFINIPATH_E_SUNEXPERRPKTNUM)
791 strlcat(buf, "sunexperrpktnum ", blen); 838 strlcat(buf, "sunexperrpktnum ", blen);
792 if (err & INFINIPATH_E_SDROPPEDDATAPKT)
793 strlcat(buf, "sdroppeddatapkt ", blen);
794 if (err & INFINIPATH_E_SDROPPEDSMPPKT) 839 if (err & INFINIPATH_E_SDROPPEDSMPPKT)
795 strlcat(buf, "sdroppedsmppkt ", blen); 840 strlcat(buf, "sdroppedsmppkt ", blen);
796 if (err & INFINIPATH_E_SMAXPKTLEN) 841 if (err & INFINIPATH_E_SMAXPKTLEN)
797 strlcat(buf, "smaxpktlen ", blen); 842 strlcat(buf, "smaxpktlen ", blen);
798 if (err & INFINIPATH_E_SMINPKTLEN)
799 strlcat(buf, "sminpktlen ", blen);
800 if (err & INFINIPATH_E_SUNSUPVL) 843 if (err & INFINIPATH_E_SUNSUPVL)
801 strlcat(buf, "sunsupVL ", blen); 844 strlcat(buf, "sunsupVL ", blen);
802 if (err & INFINIPATH_E_SPKTLEN)
803 strlcat(buf, "spktlen ", blen);
804 if (err & INFINIPATH_E_INVALIDADDR) 845 if (err & INFINIPATH_E_INVALIDADDR)
805 strlcat(buf, "invalidaddr ", blen); 846 strlcat(buf, "invalidaddr ", blen);
806 if (err & INFINIPATH_E_RICRC)
807 strlcat(buf, "CRC ", blen);
808 if (err & INFINIPATH_E_RVCRC)
809 strlcat(buf, "VCRC ", blen);
810 if (err & INFINIPATH_E_RRCVEGRFULL) 847 if (err & INFINIPATH_E_RRCVEGRFULL)
811 strlcat(buf, "rcvegrfull ", blen); 848 strlcat(buf, "rcvegrfull ", blen);
812 if (err & INFINIPATH_E_RRCVHDRFULL) 849 if (err & INFINIPATH_E_RRCVHDRFULL)
@@ -819,6 +856,8 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
819 strlcat(buf, "hardware ", blen); 856 strlcat(buf, "hardware ", blen);
820 if (err & INFINIPATH_E_RESET) 857 if (err & INFINIPATH_E_RESET)
821 strlcat(buf, "reset ", blen); 858 strlcat(buf, "reset ", blen);
859done:
860 return iserr;
822} 861}
823 862
824/** 863/**
@@ -1662,6 +1701,22 @@ int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
1662 lstate = IPATH_LINKACTIVE; 1701 lstate = IPATH_LINKACTIVE;
1663 break; 1702 break;
1664 1703
1704 case IPATH_IB_LINK_LOOPBACK:
1705 dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n");
1706 dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK;
1707 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
1708 dd->ipath_ibcctrl);
1709 ret = 0;
1710 goto bail; // no state change to wait for
1711
1712 case IPATH_IB_LINK_EXTERNAL:
1713 dev_info(&dd->pcidev->dev, "Disabling IB local loopback (normal)\n");
1714 dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK;
1715 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
1716 dd->ipath_ibcctrl);
1717 ret = 0;
1718 goto bail; // no state change to wait for
1719
1665 default: 1720 default:
1666 ipath_dbg("Invalid linkstate 0x%x requested\n", newstate); 1721 ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
1667 ret = -EINVAL; 1722 ret = -EINVAL;
@@ -1765,29 +1820,6 @@ int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
1765 return 0; 1820 return 0;
1766} 1821}
1767 1822
1768/**
1769 * ipath_read_kreg64_port - read a device's per-port 64-bit kernel register
1770 * @dd: the infinipath device
1771 * @regno: the register number to read
1772 * @port: the port containing the register
1773 *
1774 * Registers that vary with the chip implementation constants (port)
1775 * use this routine.
1776 */
1777u64 ipath_read_kreg64_port(const struct ipath_devdata *dd, ipath_kreg regno,
1778 unsigned port)
1779{
1780 u16 where;
1781
1782 if (port < dd->ipath_portcnt &&
1783 (regno == dd->ipath_kregs->kr_rcvhdraddr ||
1784 regno == dd->ipath_kregs->kr_rcvhdrtailaddr))
1785 where = regno + port;
1786 else
1787 where = -1;
1788
1789 return ipath_read_kreg64(dd, where);
1790}
1791 1823
1792/** 1824/**
1793 * ipath_write_kreg_port - write a device's per-port 64-bit kernel register 1825 * ipath_write_kreg_port - write a device's per-port 64-bit kernel register
@@ -1973,7 +2005,8 @@ static int __init infinipath_init(void)
1973{ 2005{
1974 int ret; 2006 int ret;
1975 2007
1976 ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version); 2008 if (ipath_debug & __IPATH_DBG)
2009 printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
1977 2010
1978 /* 2011 /*
1979 * These must be called before the driver is registered with 2012 * These must be called before the driver is registered with
diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c
index a4019a6b7560..030185f90ee2 100644
--- a/drivers/infiniband/hw/ipath/ipath_eeprom.c
+++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c
@@ -626,6 +626,10 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
626 } else 626 } else
627 memcpy(dd->ipath_serial, ifp->if_serial, 627 memcpy(dd->ipath_serial, ifp->if_serial,
628 sizeof ifp->if_serial); 628 sizeof ifp->if_serial);
629 if (!strstr(ifp->if_comment, "Tested successfully"))
630 ipath_dev_err(dd, "Board SN %s did not pass functional "
631 "test: %s\n", dd->ipath_serial,
632 ifp->if_comment);
629 633
630 ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n", 634 ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
631 (unsigned long long) be64_to_cpu(dd->ipath_guid)); 635 (unsigned long long) be64_to_cpu(dd->ipath_guid));
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 5d64ff875297..1272aaf2a785 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -41,12 +41,6 @@
41#include "ipath_kernel.h" 41#include "ipath_kernel.h"
42#include "ipath_common.h" 42#include "ipath_common.h"
43 43
44/*
45 * mmap64 doesn't allow all 64 bits for 32-bit applications
46 * so only use the low 43 bits.
47 */
48#define MMAP64_MASK 0x7FFFFFFFFFFUL
49
50static int ipath_open(struct inode *, struct file *); 44static int ipath_open(struct inode *, struct file *);
51static int ipath_close(struct inode *, struct file *); 45static int ipath_close(struct inode *, struct file *);
52static ssize_t ipath_write(struct file *, const char __user *, size_t, 46static ssize_t ipath_write(struct file *, const char __user *, size_t,
@@ -63,6 +57,24 @@ static const struct file_operations ipath_file_ops = {
63 .mmap = ipath_mmap 57 .mmap = ipath_mmap
64}; 58};
65 59
60/*
61 * Convert kernel virtual addresses to physical addresses so they don't
62 * potentially conflict with the chip addresses used as mmap offsets.
63 * It doesn't really matter what mmap offset we use as long as we can
64 * interpret it correctly.
65 */
66static u64 cvt_kvaddr(void *p)
67{
68 struct page *page;
69 u64 paddr = 0;
70
71 page = vmalloc_to_page(p);
72 if (page)
73 paddr = page_to_pfn(page) << PAGE_SHIFT;
74
75 return paddr;
76}
77
66static int ipath_get_base_info(struct file *fp, 78static int ipath_get_base_info(struct file *fp,
67 void __user *ubase, size_t ubase_size) 79 void __user *ubase, size_t ubase_size)
68{ 80{
@@ -87,7 +99,7 @@ static int ipath_get_base_info(struct file *fp,
87 sz = sizeof(*kinfo); 99 sz = sizeof(*kinfo);
88 /* If port sharing is not requested, allow the old size structure */ 100 /* If port sharing is not requested, allow the old size structure */
89 if (!shared) 101 if (!shared)
90 sz -= 3 * sizeof(u64); 102 sz -= 7 * sizeof(u64);
91 if (ubase_size < sz) { 103 if (ubase_size < sz) {
92 ipath_cdbg(PROC, 104 ipath_cdbg(PROC,
93 "Base size %zu, need %zu (version mismatch?)\n", 105 "Base size %zu, need %zu (version mismatch?)\n",
@@ -165,24 +177,41 @@ static int ipath_get_base_info(struct file *fp,
165 kinfo->spi_piobufbase = (u64) pd->port_piobufs + 177 kinfo->spi_piobufbase = (u64) pd->port_piobufs +
166 dd->ipath_palign * 178 dd->ipath_palign *
167 (dd->ipath_pbufsport - kinfo->spi_piocnt); 179 (dd->ipath_pbufsport - kinfo->spi_piocnt);
168 kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
169 dd->ipath_palign * pd->port_port;
170 } else { 180 } else {
171 unsigned slave = subport_fp(fp) - 1; 181 unsigned slave = subport_fp(fp) - 1;
172 182
173 kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt; 183 kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt;
174 kinfo->spi_piobufbase = (u64) pd->port_piobufs + 184 kinfo->spi_piobufbase = (u64) pd->port_piobufs +
175 dd->ipath_palign * kinfo->spi_piocnt * slave; 185 dd->ipath_palign * kinfo->spi_piocnt * slave;
176 kinfo->__spi_uregbase = ((u64) pd->subport_uregbase + 186 }
177 PAGE_SIZE * slave) & MMAP64_MASK; 187 if (shared) {
188 kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase +
189 dd->ipath_palign * pd->port_port;
190 kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs;
191 kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base;
192 kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr;
178 193
179 kinfo->spi_rcvhdr_base = ((u64) pd->subport_rcvhdr_base + 194 kinfo->__spi_uregbase = cvt_kvaddr(pd->subport_uregbase +
180 pd->port_rcvhdrq_size * slave) & MMAP64_MASK; 195 PAGE_SIZE * subport_fp(fp));
181 kinfo->spi_rcvhdr_tailaddr = 196
182 (u64) pd->port_rcvhdrqtailaddr_phys & MMAP64_MASK; 197 kinfo->spi_rcvhdr_base = cvt_kvaddr(pd->subport_rcvhdr_base +
183 kinfo->spi_rcv_egrbufs = ((u64) pd->subport_rcvegrbuf + 198 pd->port_rcvhdrq_size * subport_fp(fp));
184 dd->ipath_rcvegrcnt * dd->ipath_rcvegrbufsize * slave) & 199 kinfo->spi_rcvhdr_tailaddr = 0;
185 MMAP64_MASK; 200 kinfo->spi_rcv_egrbufs = cvt_kvaddr(pd->subport_rcvegrbuf +
201 pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size *
202 subport_fp(fp));
203
204 kinfo->spi_subport_uregbase =
205 cvt_kvaddr(pd->subport_uregbase);
206 kinfo->spi_subport_rcvegrbuf =
207 cvt_kvaddr(pd->subport_rcvegrbuf);
208 kinfo->spi_subport_rcvhdr_base =
209 cvt_kvaddr(pd->subport_rcvhdr_base);
210 ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
211 kinfo->spi_port, kinfo->spi_runtime_flags,
212 (unsigned long long) kinfo->spi_subport_uregbase,
213 (unsigned long long) kinfo->spi_subport_rcvegrbuf,
214 (unsigned long long) kinfo->spi_subport_rcvhdr_base);
186 } 215 }
187 216
188 kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) / 217 kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) /
@@ -199,20 +228,10 @@ static int ipath_get_base_info(struct file *fp,
199 228
200 if (master) { 229 if (master) {
201 kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER; 230 kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER;
202 kinfo->spi_subport_uregbase =
203 (u64) pd->subport_uregbase & MMAP64_MASK;
204 kinfo->spi_subport_rcvegrbuf =
205 (u64) pd->subport_rcvegrbuf & MMAP64_MASK;
206 kinfo->spi_subport_rcvhdr_base =
207 (u64) pd->subport_rcvhdr_base & MMAP64_MASK;
208 ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
209 kinfo->spi_port, kinfo->spi_runtime_flags,
210 (unsigned long long) kinfo->spi_subport_uregbase,
211 (unsigned long long) kinfo->spi_subport_rcvegrbuf,
212 (unsigned long long) kinfo->spi_subport_rcvhdr_base);
213 } 231 }
214 232
215 if (copy_to_user(ubase, kinfo, sizeof(*kinfo))) 233 sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo);
234 if (copy_to_user(ubase, kinfo, sz))
216 ret = -EFAULT; 235 ret = -EFAULT;
217 236
218bail: 237bail:
@@ -1132,67 +1151,55 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
1132 struct ipath_devdata *dd; 1151 struct ipath_devdata *dd;
1133 void *addr; 1152 void *addr;
1134 size_t size; 1153 size_t size;
1135 int ret; 1154 int ret = 0;
1136 1155
1137 /* If the port is not shared, all addresses should be physical */ 1156 /* If the port is not shared, all addresses should be physical */
1138 if (!pd->port_subport_cnt) { 1157 if (!pd->port_subport_cnt)
1139 ret = -EINVAL;
1140 goto bail; 1158 goto bail;
1141 }
1142 1159
1143 dd = pd->port_dd; 1160 dd = pd->port_dd;
1144 size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size; 1161 size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
1145 1162
1146 /* 1163 /*
1147 * Master has all the slave uregbase, rcvhdrq, and 1164 * Each process has all the subport uregbase, rcvhdrq, and
1148 * rcvegrbufs mmapped. 1165 * rcvegrbufs mmapped - as an array for all the processes,
1166 * and also separately for this process.
1149 */ 1167 */
1150 if (subport == 0) { 1168 if (pgaddr == cvt_kvaddr(pd->subport_uregbase)) {
1151 unsigned num_slaves = pd->port_subport_cnt - 1; 1169 addr = pd->subport_uregbase;
1152 1170 size = PAGE_SIZE * pd->port_subport_cnt;
1153 if (pgaddr == ((u64) pd->subport_uregbase & MMAP64_MASK)) { 1171 } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base)) {
1154 addr = pd->subport_uregbase; 1172 addr = pd->subport_rcvhdr_base;
1155 size = PAGE_SIZE * num_slaves; 1173 size = pd->port_rcvhdrq_size * pd->port_subport_cnt;
1156 } else if (pgaddr == ((u64) pd->subport_rcvhdr_base & 1174 } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf)) {
1157 MMAP64_MASK)) { 1175 addr = pd->subport_rcvegrbuf;
1158 addr = pd->subport_rcvhdr_base; 1176 size *= pd->port_subport_cnt;
1159 size = pd->port_rcvhdrq_size * num_slaves; 1177 } else if (pgaddr == cvt_kvaddr(pd->subport_uregbase +
1160 } else if (pgaddr == ((u64) pd->subport_rcvegrbuf & 1178 PAGE_SIZE * subport)) {
1161 MMAP64_MASK)) { 1179 addr = pd->subport_uregbase + PAGE_SIZE * subport;
1162 addr = pd->subport_rcvegrbuf; 1180 size = PAGE_SIZE;
1163 size *= num_slaves; 1181 } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base +
1164 } else { 1182 pd->port_rcvhdrq_size * subport)) {
1165 ret = -EINVAL; 1183 addr = pd->subport_rcvhdr_base +
1166 goto bail; 1184 pd->port_rcvhdrq_size * subport;
1167 } 1185 size = pd->port_rcvhdrq_size;
1168 } else if (pgaddr == (((u64) pd->subport_uregbase + 1186 } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf +
1169 PAGE_SIZE * (subport - 1)) & MMAP64_MASK)) { 1187 size * subport)) {
1170 addr = pd->subport_uregbase + PAGE_SIZE * (subport - 1); 1188 addr = pd->subport_rcvegrbuf + size * subport;
1171 size = PAGE_SIZE; 1189 /* rcvegrbufs are read-only on the slave */
1172 } else if (pgaddr == (((u64) pd->subport_rcvhdr_base + 1190 if (vma->vm_flags & VM_WRITE) {
1173 pd->port_rcvhdrq_size * (subport - 1)) & 1191 dev_info(&dd->pcidev->dev,
1174 MMAP64_MASK)) { 1192 "Can't map eager buffers as "
1175 addr = pd->subport_rcvhdr_base + 1193 "writable (flags=%lx)\n", vma->vm_flags);
1176 pd->port_rcvhdrq_size * (subport - 1); 1194 ret = -EPERM;
1177 size = pd->port_rcvhdrq_size; 1195 goto bail;
1178 } else if (pgaddr == (((u64) pd->subport_rcvegrbuf + 1196 }
1179 size * (subport - 1)) & MMAP64_MASK)) { 1197 /*
1180 addr = pd->subport_rcvegrbuf + size * (subport - 1); 1198 * Don't allow permission to later change to writeable
1181 /* rcvegrbufs are read-only on the slave */ 1199 * with mprotect.
1182 if (vma->vm_flags & VM_WRITE) { 1200 */
1183 dev_info(&dd->pcidev->dev, 1201 vma->vm_flags &= ~VM_MAYWRITE;
1184 "Can't map eager buffers as "
1185 "writable (flags=%lx)\n", vma->vm_flags);
1186 ret = -EPERM;
1187 goto bail;
1188 }
1189 /*
1190 * Don't allow permission to later change to writeable
1191 * with mprotect.
1192 */
1193 vma->vm_flags &= ~VM_MAYWRITE;
1194 } else { 1202 } else {
1195 ret = -EINVAL;
1196 goto bail; 1203 goto bail;
1197 } 1204 }
1198 len = vma->vm_end - vma->vm_start; 1205 len = vma->vm_end - vma->vm_start;
@@ -1205,7 +1212,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
1205 vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT; 1212 vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
1206 vma->vm_ops = &ipath_file_vm_ops; 1213 vma->vm_ops = &ipath_file_vm_ops;
1207 vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND; 1214 vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
1208 ret = 0; 1215 ret = 1;
1209 1216
1210bail: 1217bail:
1211 return ret; 1218 return ret;
@@ -1265,19 +1272,20 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
1265 * Check for kernel virtual addresses first, anything else must 1272 * Check for kernel virtual addresses first, anything else must
1266 * match a HW or memory address. 1273 * match a HW or memory address.
1267 */ 1274 */
1268 if (pgaddr >= (1ULL<<40)) { 1275 ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp));
1269 ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp)); 1276 if (ret) {
1277 if (ret > 0)
1278 ret = 0;
1270 goto bail; 1279 goto bail;
1271 } 1280 }
1272 1281
1282 ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
1273 if (!pd->port_subport_cnt) { 1283 if (!pd->port_subport_cnt) {
1274 /* port is not shared */ 1284 /* port is not shared */
1275 ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
1276 piocnt = dd->ipath_pbufsport; 1285 piocnt = dd->ipath_pbufsport;
1277 piobufs = pd->port_piobufs; 1286 piobufs = pd->port_piobufs;
1278 } else if (!subport_fp(fp)) { 1287 } else if (!subport_fp(fp)) {
1279 /* caller is the master */ 1288 /* caller is the master */
1280 ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
1281 piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) + 1289 piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) +
1282 (dd->ipath_pbufsport % pd->port_subport_cnt); 1290 (dd->ipath_pbufsport % pd->port_subport_cnt);
1283 piobufs = pd->port_piobufs + 1291 piobufs = pd->port_piobufs +
@@ -1286,7 +1294,6 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
1286 unsigned slave = subport_fp(fp) - 1; 1294 unsigned slave = subport_fp(fp) - 1;
1287 1295
1288 /* caller is a slave */ 1296 /* caller is a slave */
1289 ureg = 0;
1290 piocnt = dd->ipath_pbufsport / pd->port_subport_cnt; 1297 piocnt = dd->ipath_pbufsport / pd->port_subport_cnt;
1291 piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave; 1298 piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
1292 } 1299 }
@@ -1300,9 +1307,6 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
1300 ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, 1307 ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
1301 (void *) dd->ipath_pioavailregs_dma, 1308 (void *) dd->ipath_pioavailregs_dma,
1302 "pioavail registers"); 1309 "pioavail registers");
1303 else if (subport_fp(fp))
1304 /* Subports don't mmap the physical receive buffers */
1305 ret = -EINVAL;
1306 else if (pgaddr == pd->port_rcvegr_phys) 1310 else if (pgaddr == pd->port_rcvegr_phys)
1307 ret = mmap_rcvegrbufs(vma, pd); 1311 ret = mmap_rcvegrbufs(vma, pd);
1308 else if (pgaddr == (u64) pd->port_rcvhdrq_phys) 1312 else if (pgaddr == (u64) pd->port_rcvhdrq_phys)
@@ -1400,32 +1404,41 @@ static int init_subports(struct ipath_devdata *dd,
1400 const struct ipath_user_info *uinfo) 1404 const struct ipath_user_info *uinfo)
1401{ 1405{
1402 int ret = 0; 1406 int ret = 0;
1403 unsigned num_slaves; 1407 unsigned num_subports;
1404 size_t size; 1408 size_t size;
1405 1409
1406 /* Old user binaries don't know about subports */
1407 if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR)
1408 goto bail;
1409 /* 1410 /*
1410 * If the user is requesting zero or one port, 1411 * If the user is requesting zero or one port,
1411 * skip the subport allocation. 1412 * skip the subport allocation.
1412 */ 1413 */
1413 if (uinfo->spu_subport_cnt <= 1) 1414 if (uinfo->spu_subport_cnt <= 1)
1414 goto bail; 1415 goto bail;
1415 if (uinfo->spu_subport_cnt > 4) { 1416
1417 /* Old user binaries don't know about new subport implementation */
1418 if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) {
1419 dev_info(&dd->pcidev->dev,
1420 "Mismatched user minor version (%d) and driver "
1421 "minor version (%d) while port sharing. Ensure "
1422 "that driver and library are from the same "
1423 "release.\n",
1424 (int) (uinfo->spu_userversion & 0xffff),
1425 IPATH_USER_SWMINOR);
1426 goto bail;
1427 }
1428 if (uinfo->spu_subport_cnt > INFINIPATH_MAX_SUBPORT) {
1416 ret = -EINVAL; 1429 ret = -EINVAL;
1417 goto bail; 1430 goto bail;
1418 } 1431 }
1419 1432
1420 num_slaves = uinfo->spu_subport_cnt - 1; 1433 num_subports = uinfo->spu_subport_cnt;
1421 pd->subport_uregbase = vmalloc(PAGE_SIZE * num_slaves); 1434 pd->subport_uregbase = vmalloc(PAGE_SIZE * num_subports);
1422 if (!pd->subport_uregbase) { 1435 if (!pd->subport_uregbase) {
1423 ret = -ENOMEM; 1436 ret = -ENOMEM;
1424 goto bail; 1437 goto bail;
1425 } 1438 }
1426 /* Note: pd->port_rcvhdrq_size isn't initialized yet. */ 1439 /* Note: pd->port_rcvhdrq_size isn't initialized yet. */
1427 size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize * 1440 size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
1428 sizeof(u32), PAGE_SIZE) * num_slaves; 1441 sizeof(u32), PAGE_SIZE) * num_subports;
1429 pd->subport_rcvhdr_base = vmalloc(size); 1442 pd->subport_rcvhdr_base = vmalloc(size);
1430 if (!pd->subport_rcvhdr_base) { 1443 if (!pd->subport_rcvhdr_base) {
1431 ret = -ENOMEM; 1444 ret = -ENOMEM;
@@ -1434,7 +1447,7 @@ static int init_subports(struct ipath_devdata *dd,
1434 1447
1435 pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks * 1448 pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks *
1436 pd->port_rcvegrbuf_size * 1449 pd->port_rcvegrbuf_size *
1437 num_slaves); 1450 num_subports);
1438 if (!pd->subport_rcvegrbuf) { 1451 if (!pd->subport_rcvegrbuf) {
1439 ret = -ENOMEM; 1452 ret = -ENOMEM;
1440 goto bail_rhdr; 1453 goto bail_rhdr;
@@ -1443,6 +1456,12 @@ static int init_subports(struct ipath_devdata *dd,
1443 pd->port_subport_cnt = uinfo->spu_subport_cnt; 1456 pd->port_subport_cnt = uinfo->spu_subport_cnt;
1444 pd->port_subport_id = uinfo->spu_subport_id; 1457 pd->port_subport_id = uinfo->spu_subport_id;
1445 pd->active_slaves = 1; 1458 pd->active_slaves = 1;
1459 set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
1460 memset(pd->subport_uregbase, 0, PAGE_SIZE * num_subports);
1461 memset(pd->subport_rcvhdr_base, 0, size);
1462 memset(pd->subport_rcvegrbuf, 0, pd->port_rcvegrbuf_chunks *
1463 pd->port_rcvegrbuf_size *
1464 num_subports);
1446 goto bail; 1465 goto bail;
1447 1466
1448bail_rhdr: 1467bail_rhdr:
@@ -1573,18 +1592,19 @@ static int find_best_unit(struct file *fp,
1573 */ 1592 */
1574 if (!cpus_empty(current->cpus_allowed) && 1593 if (!cpus_empty(current->cpus_allowed) &&
1575 !cpus_full(current->cpus_allowed)) { 1594 !cpus_full(current->cpus_allowed)) {
1576 int ncpus = num_online_cpus(), curcpu = -1; 1595 int ncpus = num_online_cpus(), curcpu = -1, nset = 0;
1577 for (i = 0; i < ncpus; i++) 1596 for (i = 0; i < ncpus; i++)
1578 if (cpu_isset(i, current->cpus_allowed)) { 1597 if (cpu_isset(i, current->cpus_allowed)) {
1579 ipath_cdbg(PROC, "%s[%u] affinity set for " 1598 ipath_cdbg(PROC, "%s[%u] affinity set for "
1580 "cpu %d\n", current->comm, 1599 "cpu %d/%d\n", current->comm,
1581 current->pid, i); 1600 current->pid, i, ncpus);
1582 curcpu = i; 1601 curcpu = i;
1602 nset++;
1583 } 1603 }
1584 if (curcpu != -1) { 1604 if (curcpu != -1 && nset != ncpus) {
1585 if (npresent) { 1605 if (npresent) {
1586 prefunit = curcpu / (ncpus / npresent); 1606 prefunit = curcpu / (ncpus / npresent);
1587 ipath_dbg("%s[%u] %d chips, %d cpus, " 1607 ipath_cdbg(PROC,"%s[%u] %d chips, %d cpus, "
1588 "%d cpus/chip, select unit %d\n", 1608 "%d cpus/chip, select unit %d\n",
1589 current->comm, current->pid, 1609 current->comm, current->pid,
1590 npresent, ncpus, ncpus / npresent, 1610 npresent, ncpus, ncpus / npresent,
@@ -1764,11 +1784,17 @@ static int ipath_do_user_init(struct file *fp,
1764 const struct ipath_user_info *uinfo) 1784 const struct ipath_user_info *uinfo)
1765{ 1785{
1766 int ret; 1786 int ret;
1767 struct ipath_portdata *pd; 1787 struct ipath_portdata *pd = port_fp(fp);
1768 struct ipath_devdata *dd; 1788 struct ipath_devdata *dd;
1769 u32 head32; 1789 u32 head32;
1770 1790
1771 pd = port_fp(fp); 1791 /* Subports don't need to initialize anything since master did it. */
1792 if (subport_fp(fp)) {
1793 ret = wait_event_interruptible(pd->port_wait,
1794 !test_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag));
1795 goto done;
1796 }
1797
1772 dd = pd->port_dd; 1798 dd = pd->port_dd;
1773 1799
1774 if (uinfo->spu_rcvhdrsize) { 1800 if (uinfo->spu_rcvhdrsize) {
@@ -1826,6 +1852,11 @@ static int ipath_do_user_init(struct file *fp,
1826 dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD); 1852 dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD);
1827 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 1853 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
1828 dd->ipath_rcvctrl); 1854 dd->ipath_rcvctrl);
1855 /* Notify any waiting slaves */
1856 if (pd->port_subport_cnt) {
1857 clear_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
1858 wake_up(&pd->port_wait);
1859 }
1829done: 1860done:
1830 return ret; 1861 return ret;
1831} 1862}
@@ -2017,6 +2048,17 @@ static int ipath_get_slave_info(struct ipath_portdata *pd,
2017 return ret; 2048 return ret;
2018} 2049}
2019 2050
2051static int ipath_force_pio_avail_update(struct ipath_devdata *dd)
2052{
2053 u64 reg = dd->ipath_sendctrl;
2054
2055 clear_bit(IPATH_S_PIOBUFAVAILUPD, &reg);
2056 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, reg);
2057 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
2058
2059 return 0;
2060}
2061
2020static ssize_t ipath_write(struct file *fp, const char __user *data, 2062static ssize_t ipath_write(struct file *fp, const char __user *data,
2021 size_t count, loff_t *off) 2063 size_t count, loff_t *off)
2022{ 2064{
@@ -2071,27 +2113,35 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
2071 dest = &cmd.cmd.part_key; 2113 dest = &cmd.cmd.part_key;
2072 src = &ucmd->cmd.part_key; 2114 src = &ucmd->cmd.part_key;
2073 break; 2115 break;
2074 case IPATH_CMD_SLAVE_INFO: 2116 case __IPATH_CMD_SLAVE_INFO:
2075 copy = sizeof(cmd.cmd.slave_mask_addr); 2117 copy = sizeof(cmd.cmd.slave_mask_addr);
2076 dest = &cmd.cmd.slave_mask_addr; 2118 dest = &cmd.cmd.slave_mask_addr;
2077 src = &ucmd->cmd.slave_mask_addr; 2119 src = &ucmd->cmd.slave_mask_addr;
2078 break; 2120 break;
2121 case IPATH_CMD_PIOAVAILUPD: // force an update of PIOAvail reg
2122 copy = 0;
2123 src = NULL;
2124 dest = NULL;
2125 break;
2079 default: 2126 default:
2080 ret = -EINVAL; 2127 ret = -EINVAL;
2081 goto bail; 2128 goto bail;
2082 } 2129 }
2083 2130
2084 if ((count - consumed) < copy) { 2131 if (copy) {
2085 ret = -EINVAL; 2132 if ((count - consumed) < copy) {
2086 goto bail; 2133 ret = -EINVAL;
2087 } 2134 goto bail;
2135 }
2088 2136
2089 if (copy_from_user(dest, src, copy)) { 2137 if (copy_from_user(dest, src, copy)) {
2090 ret = -EFAULT; 2138 ret = -EFAULT;
2091 goto bail; 2139 goto bail;
2140 }
2141
2142 consumed += copy;
2092 } 2143 }
2093 2144
2094 consumed += copy;
2095 pd = port_fp(fp); 2145 pd = port_fp(fp);
2096 if (!pd && cmd.type != __IPATH_CMD_USER_INIT && 2146 if (!pd && cmd.type != __IPATH_CMD_USER_INIT &&
2097 cmd.type != IPATH_CMD_ASSIGN_PORT) { 2147 cmd.type != IPATH_CMD_ASSIGN_PORT) {
@@ -2137,11 +2187,14 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
2137 case IPATH_CMD_SET_PART_KEY: 2187 case IPATH_CMD_SET_PART_KEY:
2138 ret = ipath_set_part_key(pd, cmd.cmd.part_key); 2188 ret = ipath_set_part_key(pd, cmd.cmd.part_key);
2139 break; 2189 break;
2140 case IPATH_CMD_SLAVE_INFO: 2190 case __IPATH_CMD_SLAVE_INFO:
2141 ret = ipath_get_slave_info(pd, 2191 ret = ipath_get_slave_info(pd,
2142 (void __user *) (unsigned long) 2192 (void __user *) (unsigned long)
2143 cmd.cmd.slave_mask_addr); 2193 cmd.cmd.slave_mask_addr);
2144 break; 2194 break;
2195 case IPATH_CMD_PIOAVAILUPD:
2196 ret = ipath_force_pio_avail_update(pd->port_dd);
2197 break;
2145 } 2198 }
2146 2199
2147 if (ret >= 0) 2200 if (ret >= 0)
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 993482545021..4171198fc202 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -43,6 +43,9 @@
43#include "ipath_kernel.h" 43#include "ipath_kernel.h"
44#include "ipath_registers.h" 44#include "ipath_registers.h"
45 45
46static void ipath_setup_ht_setextled(struct ipath_devdata *, u64, u64);
47
48
46/* 49/*
47 * This lists the InfiniPath registers, in the actual chip layout. 50 * This lists the InfiniPath registers, in the actual chip layout.
48 * This structure should never be directly accessed. 51 * This structure should never be directly accessed.
@@ -208,8 +211,8 @@ static const struct ipath_kregs ipath_ht_kregs = {
208 .kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus), 211 .kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus),
209 .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig), 212 .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig),
210 /* 213 /*
211 * These should not be used directly via ipath_read_kreg64(), 214 * These should not be used directly via ipath_write_kreg64(),
212 * use them with ipath_read_kreg64_port(), 215 * use them with ipath_write_kreg64_port(),
213 */ 216 */
214 .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0), 217 .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
215 .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0) 218 .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0)
@@ -284,6 +287,14 @@ static const struct ipath_cregs ipath_ht_cregs = {
284#define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000 287#define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000
285#define INFINIPATH_EXTS_MEMBIST_CORRECT 0x0000000000008000 288#define INFINIPATH_EXTS_MEMBIST_CORRECT 0x0000000000008000
286 289
290
291/* TID entries (memory), HT-only */
292#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */
293#define INFINIPATH_RT_VALID 0x8000000000000000ULL
294#define INFINIPATH_RT_ADDR_SHIFT 0
295#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFFULL
296#define INFINIPATH_RT_BUFSIZE_SHIFT 48
297
287/* 298/*
288 * masks and bits that are different in different chips, or present only 299 * masks and bits that are different in different chips, or present only
289 * in one 300 * in one
@@ -402,6 +413,14 @@ static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = {
402 INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), 413 INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
403}; 414};
404 415
416#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
417 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
418 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
419#define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \
420 << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)
421
422static int ipath_ht_txe_recover(struct ipath_devdata *);
423
405/** 424/**
406 * ipath_ht_handle_hwerrors - display hardware errors. 425 * ipath_ht_handle_hwerrors - display hardware errors.
407 * @dd: the infinipath device 426 * @dd: the infinipath device
@@ -450,13 +469,12 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
450 469
451 /* 470 /*
452 * make sure we get this much out, unless told to be quiet, 471 * make sure we get this much out, unless told to be quiet,
472 * it's a parity error we may recover from,
453 * or it's occurred within the last 5 seconds 473 * or it's occurred within the last 5 seconds
454 */ 474 */
455 if ((hwerrs & ~(dd->ipath_lasthwerror | 475 if ((hwerrs & ~(dd->ipath_lasthwerror | TXE_PIO_PARITY |
456 ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | 476 RXE_EAGER_PARITY)) ||
457 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) 477 (ipath_debug & __IPATH_VERBDBG))
458 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
459 (ipath_debug & __IPATH_VERBDBG))
460 dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx " 478 dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
461 "(cleared)\n", (unsigned long long) hwerrs); 479 "(cleared)\n", (unsigned long long) hwerrs);
462 dd->ipath_lasthwerror |= hwerrs; 480 dd->ipath_lasthwerror |= hwerrs;
@@ -467,7 +485,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
467 (hwerrs & ~dd->ipath_hwe_bitsextant)); 485 (hwerrs & ~dd->ipath_hwe_bitsextant));
468 486
469 ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); 487 ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
470 if (ctrl & INFINIPATH_C_FREEZEMODE) { 488 if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
471 /* 489 /*
472 * parity errors in send memory are recoverable, 490 * parity errors in send memory are recoverable,
473 * just cancel the send (if indicated in * sendbuffererror), 491 * just cancel the send (if indicated in * sendbuffererror),
@@ -476,50 +494,14 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
476 * occur if a processor speculative read is done to the PIO 494 * occur if a processor speculative read is done to the PIO
477 * buffer while we are sending a packet, for example. 495 * buffer while we are sending a packet, for example.
478 */ 496 */
479 if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | 497 if ((hwerrs & TXE_PIO_PARITY) && ipath_ht_txe_recover(dd))
480 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) 498 hwerrs &= ~TXE_PIO_PARITY;
481 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) { 499 if (hwerrs & RXE_EAGER_PARITY)
482 ipath_stats.sps_txeparity++; 500 ipath_dev_err(dd, "RXE parity, Eager TID error is not "
483 ipath_dbg("Recovering from TXE parity error (%llu), " 501 "recoverable\n");
484 "hwerrstatus=%llx\n", 502 if (!hwerrs) {
485 (unsigned long long) ipath_stats.sps_txeparity, 503 ipath_dbg("Clearing freezemode on ignored or "
486 (unsigned long long) hwerrs); 504 "recovered hardware error\n");
487 ipath_disarm_senderrbufs(dd);
488 hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
489 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
490 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
491 if (!hwerrs) { /* else leave in freeze mode */
492 ipath_write_kreg(dd,
493 dd->ipath_kregs->kr_control,
494 dd->ipath_control);
495 return;
496 }
497 }
498 if (hwerrs) {
499 /*
500 * if any set that we aren't ignoring; only
501 * make the complaint once, in case it's stuck
502 * or recurring, and we get here multiple
503 * times.
504 */
505 if (dd->ipath_flags & IPATH_INITTED) {
506 ipath_dev_err(dd, "Fatal Hardware Error (freeze "
507 "mode), no longer usable, SN %.16s\n",
508 dd->ipath_serial);
509 isfatal = 1;
510 }
511 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
512 /* mark as having had error */
513 *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
514 /*
515 * mark as not usable, at a minimum until driver
516 * is reloaded, probably until reboot, since no
517 * other reset is possible.
518 */
519 dd->ipath_flags &= ~IPATH_INITTED;
520 } else {
521 ipath_dbg("Clearing freezemode on ignored hardware "
522 "error\n");
523 ctrl &= ~INFINIPATH_C_FREEZEMODE; 505 ctrl &= ~INFINIPATH_C_FREEZEMODE;
524 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 506 ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
525 ctrl); 507 ctrl);
@@ -587,7 +569,39 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
587 dd->ipath_hwerrmask); 569 dd->ipath_hwerrmask);
588 } 570 }
589 571
590 ipath_dev_err(dd, "%s hardware error\n", msg); 572 if (hwerrs) {
573 /*
574 * if any set that we aren't ignoring; only
575 * make the complaint once, in case it's stuck
576 * or recurring, and we get here multiple
577 * times.
578 * force link down, so switch knows, and
579 * LEDs are turned off
580 */
581 if (dd->ipath_flags & IPATH_INITTED) {
582 ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
583 ipath_setup_ht_setextled(dd,
584 INFINIPATH_IBCS_L_STATE_DOWN,
585 INFINIPATH_IBCS_LT_STATE_DISABLED);
586 ipath_dev_err(dd, "Fatal Hardware Error (freeze "
587 "mode), no longer usable, SN %.16s\n",
588 dd->ipath_serial);
589 isfatal = 1;
590 }
591 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
592 /* mark as having had error */
593 *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
594 /*
595 * mark as not usable, at a minimum until driver
596 * is reloaded, probably until reboot, since no
597 * other reset is possible.
598 */
599 dd->ipath_flags &= ~IPATH_INITTED;
600 }
601 else
602 *msg = 0; /* recovered from all of them */
603 if (*msg)
604 ipath_dev_err(dd, "%s hardware error\n", msg);
591 if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) 605 if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
592 /* 606 /*
593 * for status file; if no trailing brace is copied, 607 * for status file; if no trailing brace is copied,
@@ -658,7 +672,8 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
658 if (n) 672 if (n)
659 snprintf(name, namelen, "%s", n); 673 snprintf(name, namelen, "%s", n);
660 674
661 if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 3)) { 675 if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 ||
676 dd->ipath_minrev > 3)) {
662 /* 677 /*
663 * This version of the driver only supports Rev 3.2 and 3.3 678 * This version of the driver only supports Rev 3.2 and 3.3
664 */ 679 */
@@ -1163,6 +1178,8 @@ static void ipath_ht_init_hwerrors(struct ipath_devdata *dd)
1163 1178
1164 if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST)) 1179 if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
1165 ipath_dev_err(dd, "MemBIST did not complete!\n"); 1180 ipath_dev_err(dd, "MemBIST did not complete!\n");
1181 if (extsval & INFINIPATH_EXTS_MEMBIST_CORRECT)
1182 ipath_dbg("MemBIST corrected\n");
1166 1183
1167 ipath_check_htlink(dd); 1184 ipath_check_htlink(dd);
1168 1185
@@ -1366,6 +1383,9 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd,
1366 u64 __iomem *tidptr, u32 type, 1383 u64 __iomem *tidptr, u32 type,
1367 unsigned long pa) 1384 unsigned long pa)
1368{ 1385{
1386 if (!dd->ipath_kregbase)
1387 return;
1388
1369 if (pa != dd->ipath_tidinvalid) { 1389 if (pa != dd->ipath_tidinvalid) {
1370 if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) { 1390 if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) {
1371 dev_info(&dd->pcidev->dev, 1391 dev_info(&dd->pcidev->dev,
@@ -1382,10 +1402,10 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd,
1382 pa |= lenvalid | INFINIPATH_RT_VALID; 1402 pa |= lenvalid | INFINIPATH_RT_VALID;
1383 } 1403 }
1384 } 1404 }
1385 if (dd->ipath_kregbase) 1405 writeq(pa, tidptr);
1386 writeq(pa, tidptr);
1387} 1406}
1388 1407
1408
1389/** 1409/**
1390 * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager 1410 * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager
1391 * @dd: the infinipath device 1411 * @dd: the infinipath device
@@ -1515,7 +1535,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
1515 INFINIPATH_S_ABORT); 1535 INFINIPATH_S_ABORT);
1516 1536
1517 ipath_get_eeprom_info(dd); 1537 ipath_get_eeprom_info(dd);
1518 if(dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' && 1538 if (dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
1519 dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') { 1539 dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') {
1520 /* 1540 /*
1521 * Later production QHT7040 has same changes as QHT7140, so 1541 * Later production QHT7040 has same changes as QHT7140, so
@@ -1528,6 +1548,24 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
1528 return 0; 1548 return 0;
1529} 1549}
1530 1550
1551
1552static int ipath_ht_txe_recover(struct ipath_devdata *dd)
1553{
1554 int cnt = ++ipath_stats.sps_txeparity;
1555 if (cnt >= IPATH_MAX_PARITY_ATTEMPTS) {
1556 if (cnt == IPATH_MAX_PARITY_ATTEMPTS)
1557 ipath_dev_err(dd,
1558 "Too many attempts to recover from "
1559 "TXE parity, giving up\n");
1560 return 0;
1561 }
1562 dev_info(&dd->pcidev->dev,
1563 "Recovering from TXE PIO parity error\n");
1564 ipath_disarm_senderrbufs(dd, 1);
1565 return 1;
1566}
1567
1568
1531/** 1569/**
1532 * ipath_init_ht_get_base_info - set chip-specific flags for user code 1570 * ipath_init_ht_get_base_info - set chip-specific flags for user code
1533 * @dd: the infinipath device 1571 * @dd: the infinipath device
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index 05918e1e7c36..1b9c30857754 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -43,6 +43,8 @@
43#include "ipath_kernel.h" 43#include "ipath_kernel.h"
44#include "ipath_registers.h" 44#include "ipath_registers.h"
45 45
46static void ipath_setup_pe_setextled(struct ipath_devdata *, u64, u64);
47
46/* 48/*
47 * This file contains all the chip-specific register information and 49 * This file contains all the chip-specific register information and
48 * access functions for the QLogic InfiniPath PCI-Express chip. 50 * access functions for the QLogic InfiniPath PCI-Express chip.
@@ -207,8 +209,8 @@ static const struct ipath_kregs ipath_pe_kregs = {
207 .kr_ibpllcfg = IPATH_KREG_OFFSET(IBPLLCfg), 209 .kr_ibpllcfg = IPATH_KREG_OFFSET(IBPLLCfg),
208 210
209 /* 211 /*
210 * These should not be used directly via ipath_read_kreg64(), 212 * These should not be used directly via ipath_write_kreg64(),
211 * use them with ipath_read_kreg64_port() 213 * use them with ipath_write_kreg64_port(),
212 */ 214 */
213 .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0), 215 .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
214 .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0), 216 .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0),
@@ -321,6 +323,12 @@ static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = {
321 INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), 323 INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
322}; 324};
323 325
326#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
327 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
328 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
329
330static int ipath_pe_txe_recover(struct ipath_devdata *);
331
324/** 332/**
325 * ipath_pe_handle_hwerrors - display hardware errors. 333 * ipath_pe_handle_hwerrors - display hardware errors.
326 * @dd: the infinipath device 334 * @dd: the infinipath device
@@ -394,32 +402,21 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
394 * occur if a processor speculative read is done to the PIO 402 * occur if a processor speculative read is done to the PIO
395 * buffer while we are sending a packet, for example. 403 * buffer while we are sending a packet, for example.
396 */ 404 */
397 if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | 405 if ((hwerrs & TXE_PIO_PARITY) && ipath_pe_txe_recover(dd))
398 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) 406 hwerrs &= ~TXE_PIO_PARITY;
399 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
400 ipath_stats.sps_txeparity++;
401 ipath_dbg("Recovering from TXE parity error (%llu), "
402 "hwerrstatus=%llx\n",
403 (unsigned long long) ipath_stats.sps_txeparity,
404 (unsigned long long) hwerrs);
405 ipath_disarm_senderrbufs(dd);
406 hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
407 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
408 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
409 if (!hwerrs) { /* else leave in freeze mode */
410 ipath_write_kreg(dd,
411 dd->ipath_kregs->kr_control,
412 dd->ipath_control);
413 return;
414 }
415 }
416 if (hwerrs) { 407 if (hwerrs) {
417 /* 408 /*
418 * if any set that we aren't ignoring only make the 409 * if any set that we aren't ignoring only make the
419 * complaint once, in case it's stuck or recurring, 410 * complaint once, in case it's stuck or recurring,
420 * and we get here multiple times 411 * and we get here multiple times
412 * Force link down, so switch knows, and
413 * LEDs are turned off
421 */ 414 */
422 if (dd->ipath_flags & IPATH_INITTED) { 415 if (dd->ipath_flags & IPATH_INITTED) {
416 ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
417 ipath_setup_pe_setextled(dd,
418 INFINIPATH_IBCS_L_STATE_DOWN,
419 INFINIPATH_IBCS_LT_STATE_DISABLED);
423 ipath_dev_err(dd, "Fatal Hardware Error (freeze " 420 ipath_dev_err(dd, "Fatal Hardware Error (freeze "
424 "mode), no longer usable, SN %.16s\n", 421 "mode), no longer usable, SN %.16s\n",
425 dd->ipath_serial); 422 dd->ipath_serial);
@@ -493,7 +490,8 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
493 dd->ipath_hwerrmask); 490 dd->ipath_hwerrmask);
494 } 491 }
495 492
496 ipath_dev_err(dd, "%s hardware error\n", msg); 493 if (*msg)
494 ipath_dev_err(dd, "%s hardware error\n", msg);
497 if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) { 495 if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) {
498 /* 496 /*
499 * for /sys status file ; if no trailing } is copied, we'll 497 * for /sys status file ; if no trailing } is copied, we'll
@@ -581,6 +579,8 @@ static void ipath_pe_init_hwerrors(struct ipath_devdata *dd)
581 579
582 if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST)) 580 if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
583 ipath_dev_err(dd, "MemBIST did not complete!\n"); 581 ipath_dev_err(dd, "MemBIST did not complete!\n");
582 if (extsval & INFINIPATH_EXTS_MEMBIST_FOUND)
583 ipath_dbg("MemBIST corrected\n");
584 584
585 val = ~0ULL; /* barring bugs, all hwerrors become interrupts, */ 585 val = ~0ULL; /* barring bugs, all hwerrors become interrupts, */
586 586
@@ -1330,6 +1330,35 @@ static void ipath_pe_free_irq(struct ipath_devdata *dd)
1330 dd->ipath_irq = 0; 1330 dd->ipath_irq = 0;
1331} 1331}
1332 1332
1333/*
1334 * On platforms using this chip, and not having ordered WC stores, we
1335 * can get TXE parity errors due to speculative reads to the PIO buffers,
1336 * and this, due to a chip bug can result in (many) false parity error
1337 * reports. So it's a debug print on those, and an info print on systems
1338 * where the speculative reads don't occur.
1339 * Because we can get lots of false errors, we have no upper limit
1340 * on recovery attempts on those platforms.
1341 */
1342static int ipath_pe_txe_recover(struct ipath_devdata *dd)
1343{
1344 if (ipath_unordered_wc())
1345 ipath_dbg("Recovering from TXE PIO parity error\n");
1346 else {
1347 int cnt = ++ipath_stats.sps_txeparity;
1348 if (cnt >= IPATH_MAX_PARITY_ATTEMPTS) {
1349 if (cnt == IPATH_MAX_PARITY_ATTEMPTS)
1350 ipath_dev_err(dd,
1351 "Too many attempts to recover from "
1352 "TXE parity, giving up\n");
1353 return 0;
1354 }
1355 dev_info(&dd->pcidev->dev,
1356 "Recovering from TXE PIO parity error\n");
1357 }
1358 ipath_disarm_senderrbufs(dd, 1);
1359 return 1;
1360}
1361
1333/** 1362/**
1334 * ipath_init_iba6120_funcs - set up the chip-specific function pointers 1363 * ipath_init_iba6120_funcs - set up the chip-specific function pointers
1335 * @dd: the infinipath device 1364 * @dd: the infinipath device
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index d4f6b5239ef8..7045ba689494 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -216,6 +216,20 @@ static int bringup_link(struct ipath_devdata *dd)
216 return ret; 216 return ret;
217} 217}
218 218
219static struct ipath_portdata *create_portdata0(struct ipath_devdata *dd)
220{
221 struct ipath_portdata *pd = NULL;
222
223 pd = kzalloc(sizeof(*pd), GFP_KERNEL);
224 if (pd) {
225 pd->port_dd = dd;
226 pd->port_cnt = 1;
227 /* The port 0 pkey table is used by the layer interface. */
228 pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY;
229 }
230 return pd;
231}
232
219static int init_chip_first(struct ipath_devdata *dd, 233static int init_chip_first(struct ipath_devdata *dd,
220 struct ipath_portdata **pdp) 234 struct ipath_portdata **pdp)
221{ 235{
@@ -271,20 +285,16 @@ static int init_chip_first(struct ipath_devdata *dd,
271 goto done; 285 goto done;
272 } 286 }
273 287
274 dd->ipath_pd[0] = kzalloc(sizeof(*pd), GFP_KERNEL); 288 pd = create_portdata0(dd);
275 289
276 if (!dd->ipath_pd[0]) { 290 if (!pd) {
277 ipath_dev_err(dd, "Unable to allocate portdata for port " 291 ipath_dev_err(dd, "Unable to allocate portdata for port "
278 "0, failing\n"); 292 "0, failing\n");
279 ret = -ENOMEM; 293 ret = -ENOMEM;
280 goto done; 294 goto done;
281 } 295 }
282 pd = dd->ipath_pd[0]; 296 dd->ipath_pd[0] = pd;
283 pd->port_dd = dd; 297
284 pd->port_port = 0;
285 pd->port_cnt = 1;
286 /* The port 0 pkey table is used by the layer interface. */
287 pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY;
288 dd->ipath_rcvtidcnt = 298 dd->ipath_rcvtidcnt =
289 ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt); 299 ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
290 dd->ipath_rcvtidbase = 300 dd->ipath_rcvtidbase =
@@ -590,6 +600,10 @@ static int init_housekeeping(struct ipath_devdata *dd,
590 goto done; 600 goto done;
591 } 601 }
592 602
603
604 /* clear diagctrl register, in case diags were running and crashed */
605 ipath_write_kreg (dd, dd->ipath_kregs->kr_hwdiagctrl, 0);
606
593 /* clear the initial reset flag, in case first driver load */ 607 /* clear the initial reset flag, in case first driver load */
594 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 608 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
595 INFINIPATH_E_RESET); 609 INFINIPATH_E_RESET);
@@ -668,6 +682,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
668{ 682{
669 int ret = 0, i; 683 int ret = 0, i;
670 u32 val32, kpiobufs; 684 u32 val32, kpiobufs;
685 u32 piobufs, uports;
671 u64 val; 686 u64 val;
672 struct ipath_portdata *pd = NULL; /* keep gcc4 happy */ 687 struct ipath_portdata *pd = NULL; /* keep gcc4 happy */
673 gfp_t gfp_flags = GFP_USER | __GFP_COMP; 688 gfp_t gfp_flags = GFP_USER | __GFP_COMP;
@@ -702,16 +717,17 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
702 * the in memory DMA'ed copies of the registers. This has to 717 * the in memory DMA'ed copies of the registers. This has to
703 * be done early, before we calculate lastport, etc. 718 * be done early, before we calculate lastport, etc.
704 */ 719 */
705 val = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; 720 piobufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
706 /* 721 /*
707 * calc number of pioavail registers, and save it; we have 2 722 * calc number of pioavail registers, and save it; we have 2
708 * bits per buffer. 723 * bits per buffer.
709 */ 724 */
710 dd->ipath_pioavregs = ALIGN(val, sizeof(u64) * BITS_PER_BYTE / 2) 725 dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2)
711 / (sizeof(u64) * BITS_PER_BYTE / 2); 726 / (sizeof(u64) * BITS_PER_BYTE / 2);
727 uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
712 if (ipath_kpiobufs == 0) { 728 if (ipath_kpiobufs == 0) {
713 /* not set by user (this is default) */ 729 /* not set by user (this is default) */
714 if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 128) 730 if (piobufs >= (uports * IPATH_MIN_USER_PORT_BUFCNT) + 32)
715 kpiobufs = 32; 731 kpiobufs = 32;
716 else 732 else
717 kpiobufs = 16; 733 kpiobufs = 16;
@@ -719,31 +735,25 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
719 else 735 else
720 kpiobufs = ipath_kpiobufs; 736 kpiobufs = ipath_kpiobufs;
721 737
722 if (kpiobufs > 738 if (kpiobufs + (uports * IPATH_MIN_USER_PORT_BUFCNT) > piobufs) {
723 (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - 739 i = (int) piobufs -
724 (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT))) { 740 (int) (uports * IPATH_MIN_USER_PORT_BUFCNT);
725 i = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k -
726 (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT);
727 if (i < 0) 741 if (i < 0)
728 i = 0; 742 i = 0;
729 dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs for " 743 dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of "
730 "kernel leaves too few for %d user ports " 744 "%d for kernel leaves too few for %d user ports "
731 "(%d each); using %u\n", kpiobufs, 745 "(%d each); using %u\n", kpiobufs,
732 dd->ipath_cfgports - 1, 746 piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i);
733 IPATH_MIN_USER_PORT_BUFCNT, i);
734 /* 747 /*
735 * shouldn't change ipath_kpiobufs, because could be 748 * shouldn't change ipath_kpiobufs, because could be
736 * different for different devices... 749 * different for different devices...
737 */ 750 */
738 kpiobufs = i; 751 kpiobufs = i;
739 } 752 }
740 dd->ipath_lastport_piobuf = 753 dd->ipath_lastport_piobuf = piobufs - kpiobufs;
741 dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - kpiobufs; 754 dd->ipath_pbufsport =
742 dd->ipath_pbufsport = dd->ipath_cfgports > 1 755 uports ? dd->ipath_lastport_piobuf / uports : 0;
743 ? dd->ipath_lastport_piobuf / (dd->ipath_cfgports - 1) 756 val32 = dd->ipath_lastport_piobuf - (dd->ipath_pbufsport * uports);
744 : 0;
745 val32 = dd->ipath_lastport_piobuf -
746 (dd->ipath_pbufsport * (dd->ipath_cfgports - 1));
747 if (val32 > 0) { 757 if (val32 > 0) {
748 ipath_dbg("allocating %u pbufs/port leaves %u unused, " 758 ipath_dbg("allocating %u pbufs/port leaves %u unused, "
749 "add to kernel\n", dd->ipath_pbufsport, val32); 759 "add to kernel\n", dd->ipath_pbufsport, val32);
@@ -754,8 +764,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
754 dd->ipath_lastpioindex = dd->ipath_lastport_piobuf; 764 dd->ipath_lastpioindex = dd->ipath_lastport_piobuf;
755 ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u " 765 ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u "
756 "each for %u user ports\n", kpiobufs, 766 "each for %u user ports\n", kpiobufs,
757 dd->ipath_piobcnt2k + dd->ipath_piobcnt4k, 767 piobufs, dd->ipath_pbufsport, uports);
758 dd->ipath_pbufsport, dd->ipath_cfgports - 1);
759 768
760 dd->ipath_f_early_init(dd); 769 dd->ipath_f_early_init(dd);
761 770
@@ -839,11 +848,24 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
839 * Set up the port 0 (kernel) rcvhdr q and egr TIDs. If doing 848 * Set up the port 0 (kernel) rcvhdr q and egr TIDs. If doing
840 * re-init, the simplest way to handle this is to free 849 * re-init, the simplest way to handle this is to free
841 * existing, and re-allocate. 850 * existing, and re-allocate.
851 * Need to re-create rest of port 0 portdata as well.
842 */ 852 */
843 if (reinit) { 853 if (reinit) {
844 struct ipath_portdata *pd = dd->ipath_pd[0]; 854 /* Alloc and init new ipath_portdata for port0,
845 dd->ipath_pd[0] = NULL; 855 * Then free old pd. Could lead to fragmentation, but also
846 ipath_free_pddata(dd, pd); 856 * makes later support for hot-swap easier.
857 */
858 struct ipath_portdata *npd;
859 npd = create_portdata0(dd);
860 if (npd) {
861 ipath_free_pddata(dd, pd);
862 dd->ipath_pd[0] = pd = npd;
863 } else {
864 ipath_dev_err(dd, "Unable to allocate portdata for"
865 " port 0, failing\n");
866 ret = -ENOMEM;
867 goto done;
868 }
847 } 869 }
848 dd->ipath_f_tidtemplate(dd); 870 dd->ipath_f_tidtemplate(dd);
849 ret = ipath_create_rcvhdrq(dd, pd); 871 ret = ipath_create_rcvhdrq(dd, pd);
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 72b9e279d19d..45d033169c6e 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -38,10 +38,39 @@
38#include "ipath_common.h" 38#include "ipath_common.h"
39 39
40/* 40/*
41 * clear (write) a pio buffer, to clear a parity error. This routine
42 * should only be called when in freeze mode, and the buffer should be
43 * canceled afterwards.
44 */
45static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum)
46{
47 u32 __iomem *pbuf;
48 u32 dwcnt; /* dword count to write */
49 if (pnum < dd->ipath_piobcnt2k) {
50 pbuf = (u32 __iomem *) (dd->ipath_pio2kbase + pnum *
51 dd->ipath_palign);
52 dwcnt = dd->ipath_piosize2k >> 2;
53 }
54 else {
55 pbuf = (u32 __iomem *) (dd->ipath_pio4kbase +
56 (pnum - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
57 dwcnt = dd->ipath_piosize4k >> 2;
58 }
59 dev_info(&dd->pcidev->dev,
60 "Rewrite PIO buffer %u, to recover from parity error\n",
61 pnum);
62 *pbuf = dwcnt+1; /* no flush required, since already in freeze */
63 while(--dwcnt)
64 *pbuf++ = 0;
65}
66
67/*
41 * Called when we might have an error that is specific to a particular 68 * Called when we might have an error that is specific to a particular
42 * PIO buffer, and may need to cancel that buffer, so it can be re-used. 69 * PIO buffer, and may need to cancel that buffer, so it can be re-used.
70 * If rewrite is true, and bits are set in the sendbufferror registers,
71 * we'll write to the buffer, for error recovery on parity errors.
43 */ 72 */
44void ipath_disarm_senderrbufs(struct ipath_devdata *dd) 73void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
45{ 74{
46 u32 piobcnt; 75 u32 piobcnt;
47 unsigned long sbuf[4]; 76 unsigned long sbuf[4];
@@ -74,8 +103,11 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
74 } 103 }
75 104
76 for (i = 0; i < piobcnt; i++) 105 for (i = 0; i < piobcnt; i++)
77 if (test_bit(i, sbuf)) 106 if (test_bit(i, sbuf)) {
107 if (rewrite)
108 ipath_clrpiobuf(dd, i);
78 ipath_disarm_piobufs(dd, i, 1); 109 ipath_disarm_piobufs(dd, i, 1);
110 }
79 dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */ 111 dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */
80 } 112 }
81} 113}
@@ -114,7 +146,7 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
114{ 146{
115 u64 ignore_this_time = 0; 147 u64 ignore_this_time = 0;
116 148
117 ipath_disarm_senderrbufs(dd); 149 ipath_disarm_senderrbufs(dd, 0);
118 if ((errs & E_SUM_LINK_PKTERRS) && 150 if ((errs & E_SUM_LINK_PKTERRS) &&
119 !(dd->ipath_flags & IPATH_LINKACTIVE)) { 151 !(dd->ipath_flags & IPATH_LINKACTIVE)) {
120 /* 152 /*
@@ -403,10 +435,13 @@ static void handle_supp_msgs(struct ipath_devdata *dd,
403 * happens so often we never want to count it. 435 * happens so often we never want to count it.
404 */ 436 */
405 if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) { 437 if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) {
406 ipath_decode_err(msg, sizeof msg, dd->ipath_lasterror & 438 int iserr;
407 ~INFINIPATH_E_IBSTATUSCHANGED); 439 iserr = ipath_decode_err(msg, sizeof msg,
440 dd->ipath_lasterror &
441 ~INFINIPATH_E_IBSTATUSCHANGED);
408 if (dd->ipath_lasterror & 442 if (dd->ipath_lasterror &
409 ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL)) 443 ~(INFINIPATH_E_RRCVEGRFULL |
444 INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
410 ipath_dev_err(dd, "Suppressed %u messages for " 445 ipath_dev_err(dd, "Suppressed %u messages for "
411 "fast-repeating errors (%s) (%llx)\n", 446 "fast-repeating errors (%s) (%llx)\n",
412 supp_msgs, msg, 447 supp_msgs, msg,
@@ -420,8 +455,13 @@ static void handle_supp_msgs(struct ipath_devdata *dd,
420 * them. So only complain about these at debug 455 * them. So only complain about these at debug
421 * level. 456 * level.
422 */ 457 */
423 ipath_dbg("Suppressed %u messages for %s\n", 458 if (iserr)
424 supp_msgs, msg); 459 ipath_dbg("Suppressed %u messages for %s\n",
460 supp_msgs, msg);
461 else
462 ipath_cdbg(ERRPKT,
463 "Suppressed %u messages for %s\n",
464 supp_msgs, msg);
425 } 465 }
426 } 466 }
427} 467}
@@ -462,7 +502,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
462{ 502{
463 char msg[512]; 503 char msg[512];
464 u64 ignore_this_time = 0; 504 u64 ignore_this_time = 0;
465 int i; 505 int i, iserr = 0;
466 int chkerrpkts = 0, noprint = 0; 506 int chkerrpkts = 0, noprint = 0;
467 unsigned supp_msgs; 507 unsigned supp_msgs;
468 508
@@ -502,6 +542,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
502 } 542 }
503 543
504 if (supp_msgs == 250000) { 544 if (supp_msgs == 250000) {
545 int s_iserr;
505 /* 546 /*
506 * It's not entirely reasonable assuming that the errors set 547 * It's not entirely reasonable assuming that the errors set
507 * in the last clear period are all responsible for the 548 * in the last clear period are all responsible for the
@@ -511,17 +552,17 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
511 dd->ipath_maskederrs |= dd->ipath_lasterror | errs; 552 dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
512 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 553 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
513 ~dd->ipath_maskederrs); 554 ~dd->ipath_maskederrs);
514 ipath_decode_err(msg, sizeof msg, 555 s_iserr = ipath_decode_err(msg, sizeof msg,
515 (dd->ipath_maskederrs & ~dd-> 556 (dd->ipath_maskederrs & ~dd->
516 ipath_ignorederrs)); 557 ipath_ignorederrs));
517 558
518 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) & 559 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
519 ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL)) 560 ~(INFINIPATH_E_RRCVEGRFULL |
520 ipath_dev_err(dd, "Disabling error(s) %llx because " 561 INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
521 "occurring too frequently (%s)\n", 562 ipath_dev_err(dd, "Temporarily disabling "
522 (unsigned long long) 563 "error(s) %llx reporting; too frequent (%s)\n",
523 (dd->ipath_maskederrs & 564 (unsigned long long) (dd->ipath_maskederrs &
524 ~dd->ipath_ignorederrs), msg); 565 ~dd->ipath_ignorederrs), msg);
525 else { 566 else {
526 /* 567 /*
527 * rcvegrfull and rcvhdrqfull are "normal", 568 * rcvegrfull and rcvhdrqfull are "normal",
@@ -530,8 +571,15 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
530 * processing them. So only complain about 571 * processing them. So only complain about
531 * these at debug level. 572 * these at debug level.
532 */ 573 */
533 ipath_dbg("Disabling frequent queue full errors " 574 if (s_iserr)
534 "(%s)\n", msg); 575 ipath_dbg("Temporarily disabling reporting "
576 "too frequent queue full errors (%s)\n",
577 msg);
578 else
579 ipath_cdbg(ERRPKT,
580 "Temporarily disabling reporting too"
581 " frequent packet errors (%s)\n",
582 msg);
535 } 583 }
536 584
537 /* 585 /*
@@ -589,6 +637,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
589 ipath_stats.sps_crcerrs++; 637 ipath_stats.sps_crcerrs++;
590 chkerrpkts = 1; 638 chkerrpkts = 1;
591 } 639 }
640 iserr = errs & ~(E_SUM_PKTERRS | INFINIPATH_E_PKTERRS);
641
592 642
593 /* 643 /*
594 * We don't want to print these two as they happen, or we can make 644 * We don't want to print these two as they happen, or we can make
@@ -677,8 +727,13 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
677 *dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF; 727 *dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF;
678 } 728 }
679 729
680 if (!noprint && *msg) 730 if (!noprint && *msg) {
681 ipath_dev_err(dd, "%s error\n", msg); 731 if (iserr)
732 ipath_dev_err(dd, "%s error\n", msg);
733 else
734 dev_info(&dd->pcidev->dev, "%s packet problems\n",
735 msg);
736 }
682 if (dd->ipath_state_wanted & dd->ipath_flags) { 737 if (dd->ipath_state_wanted & dd->ipath_flags) {
683 ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, " 738 ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
684 "waking\n", dd->ipath_state_wanted, 739 "waking\n", dd->ipath_state_wanted,
@@ -819,11 +874,10 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat)
819 struct ipath_portdata *pd = dd->ipath_pd[i]; 874 struct ipath_portdata *pd = dd->ipath_pd[i];
820 if (portr & (1 << i) && pd && pd->port_cnt && 875 if (portr & (1 << i) && pd && pd->port_cnt &&
821 test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) { 876 test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) {
822 int rcbit;
823 clear_bit(IPATH_PORT_WAITING_RCV, 877 clear_bit(IPATH_PORT_WAITING_RCV,
824 &pd->port_flag); 878 &pd->port_flag);
825 rcbit = i + INFINIPATH_R_INTRAVAIL_SHIFT; 879 clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT,
826 clear_bit(1UL << rcbit, &dd->ipath_rcvctrl); 880 &dd->ipath_rcvctrl);
827 wake_up_interruptible(&pd->port_wait); 881 wake_up_interruptible(&pd->port_wait);
828 rcvdint = 1; 882 rcvdint = 1;
829 } 883 }
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 6d8d05fb5999..e900c2593f44 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -590,7 +590,6 @@ int ipath_enable_wc(struct ipath_devdata *dd);
590void ipath_disable_wc(struct ipath_devdata *dd); 590void ipath_disable_wc(struct ipath_devdata *dd);
591int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp); 591int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp);
592void ipath_shutdown_device(struct ipath_devdata *); 592void ipath_shutdown_device(struct ipath_devdata *);
593void ipath_disarm_senderrbufs(struct ipath_devdata *);
594 593
595struct file_operations; 594struct file_operations;
596int ipath_cdev_init(int minor, char *name, const struct file_operations *fops, 595int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
@@ -611,7 +610,7 @@ struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t);
611extern int ipath_diag_inuse; 610extern int ipath_diag_inuse;
612 611
613irqreturn_t ipath_intr(int irq, void *devid); 612irqreturn_t ipath_intr(int irq, void *devid);
614void ipath_decode_err(char *buf, size_t blen, ipath_err_t err); 613int ipath_decode_err(char *buf, size_t blen, ipath_err_t err);
615#if __IPATH_INFO || __IPATH_DBG 614#if __IPATH_INFO || __IPATH_DBG
616extern const char *ipath_ibcstatus_str[]; 615extern const char *ipath_ibcstatus_str[];
617#endif 616#endif
@@ -701,6 +700,8 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
701#define IPATH_PORT_WAITING_RCV 2 700#define IPATH_PORT_WAITING_RCV 2
702 /* waiting for a PIO buffer to be available */ 701 /* waiting for a PIO buffer to be available */
703#define IPATH_PORT_WAITING_PIO 3 702#define IPATH_PORT_WAITING_PIO 3
703 /* master has not finished initializing */
704#define IPATH_PORT_MASTER_UNINIT 4
704 705
705/* free up any allocated data at closes */ 706/* free up any allocated data at closes */
706void ipath_free_data(struct ipath_portdata *dd); 707void ipath_free_data(struct ipath_portdata *dd);
@@ -711,6 +712,7 @@ void ipath_init_iba6120_funcs(struct ipath_devdata *);
711void ipath_init_iba6110_funcs(struct ipath_devdata *); 712void ipath_init_iba6110_funcs(struct ipath_devdata *);
712void ipath_get_eeprom_info(struct ipath_devdata *); 713void ipath_get_eeprom_info(struct ipath_devdata *);
713u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); 714u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
715void ipath_disarm_senderrbufs(struct ipath_devdata *, int);
714 716
715/* 717/*
716 * number of words used for protocol header if not set by ipath_userinit(); 718 * number of words used for protocol header if not set by ipath_userinit();
@@ -754,8 +756,6 @@ int ipath_eeprom_write(struct ipath_devdata *, u8, const void *, int);
754/* these are used for the registers that vary with port */ 756/* these are used for the registers that vary with port */
755void ipath_write_kreg_port(const struct ipath_devdata *, ipath_kreg, 757void ipath_write_kreg_port(const struct ipath_devdata *, ipath_kreg,
756 unsigned, u64); 758 unsigned, u64);
757u64 ipath_read_kreg64_port(const struct ipath_devdata *, ipath_kreg,
758 unsigned);
759 759
760/* 760/*
761 * We could have a single register get/put routine, that takes a group type, 761 * We could have a single register get/put routine, that takes a group type,
@@ -897,6 +897,8 @@ dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int);
897 897
898extern unsigned ipath_debug; /* debugging bit mask */ 898extern unsigned ipath_debug; /* debugging bit mask */
899 899
900#define IPATH_MAX_PARITY_ATTEMPTS 10000 /* max times to try recovery */
901
900const char *ipath_get_unit_name(int unit); 902const char *ipath_get_unit_name(int unit);
901 903
902extern struct mutex ipath_mutex; 904extern struct mutex ipath_mutex;
diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c
index 851763d7d2db..dd487c100f5b 100644
--- a/drivers/infiniband/hw/ipath/ipath_keys.c
+++ b/drivers/infiniband/hw/ipath/ipath_keys.c
@@ -61,7 +61,7 @@ int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr)
61 r = (r + 1) & (rkt->max - 1); 61 r = (r + 1) & (rkt->max - 1);
62 if (r == n) { 62 if (r == n) {
63 spin_unlock_irqrestore(&rkt->lock, flags); 63 spin_unlock_irqrestore(&rkt->lock, flags);
64 ipath_dbg(KERN_INFO "LKEY table full\n"); 64 ipath_dbg("LKEY table full\n");
65 ret = 0; 65 ret = 0;
66 goto bail; 66 goto bail;
67 } 67 }
@@ -133,6 +133,12 @@ int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
133 * being reversible by calling bus_to_virt(). 133 * being reversible by calling bus_to_virt().
134 */ 134 */
135 if (sge->lkey == 0) { 135 if (sge->lkey == 0) {
136 struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
137
138 if (pd->user) {
139 ret = 0;
140 goto bail;
141 }
136 isge->mr = NULL; 142 isge->mr = NULL;
137 isge->vaddr = (void *) sge->addr; 143 isge->vaddr = (void *) sge->addr;
138 isge->length = sge->length; 144 isge->length = sge->length;
@@ -206,6 +212,12 @@ int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
206 * (see ipath_get_dma_mr and ipath_dma.c). 212 * (see ipath_get_dma_mr and ipath_dma.c).
207 */ 213 */
208 if (rkey == 0) { 214 if (rkey == 0) {
215 struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
216
217 if (pd->user) {
218 ret = 0;
219 goto bail;
220 }
209 sge->mr = NULL; 221 sge->mr = NULL;
210 sge->vaddr = (void *) vaddr; 222 sge->vaddr = (void *) vaddr;
211 sge->length = len; 223 sge->length = len;
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index 8cc8598d6c69..31e70732e369 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -210,9 +210,15 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
210 m = 0; 210 m = 0;
211 n = 0; 211 n = 0;
212 list_for_each_entry(chunk, &region->chunk_list, list) { 212 list_for_each_entry(chunk, &region->chunk_list, list) {
213 for (i = 0; i < chunk->nmap; i++) { 213 for (i = 0; i < chunk->nents; i++) {
214 mr->mr.map[m]->segs[n].vaddr = 214 void *vaddr;
215 page_address(chunk->page_list[i].page); 215
216 vaddr = page_address(chunk->page_list[i].page);
217 if (!vaddr) {
218 ret = ERR_PTR(-EINVAL);
219 goto bail;
220 }
221 mr->mr.map[m]->segs[n].vaddr = vaddr;
216 mr->mr.map[m]->segs[n].length = region->page_size; 222 mr->mr.map[m]->segs[n].length = region->page_size;
217 n++; 223 n++;
218 if (n == IPATH_SEGSZ) { 224 if (n == IPATH_SEGSZ) {
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 64f07b19349f..16db9ac0b402 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -81,11 +81,51 @@ static u32 credit_table[31] = {
81 32768 /* 1E */ 81 32768 /* 1E */
82}; 82};
83 83
84static u32 alloc_qpn(struct ipath_qp_table *qpt) 84
85static void get_map_page(struct ipath_qp_table *qpt, struct qpn_map *map)
86{
87 unsigned long page = get_zeroed_page(GFP_KERNEL);
88 unsigned long flags;
89
90 /*
91 * Free the page if someone raced with us installing it.
92 */
93
94 spin_lock_irqsave(&qpt->lock, flags);
95 if (map->page)
96 free_page(page);
97 else
98 map->page = (void *)page;
99 spin_unlock_irqrestore(&qpt->lock, flags);
100}
101
102
103static int alloc_qpn(struct ipath_qp_table *qpt, enum ib_qp_type type)
85{ 104{
86 u32 i, offset, max_scan, qpn; 105 u32 i, offset, max_scan, qpn;
87 struct qpn_map *map; 106 struct qpn_map *map;
88 u32 ret; 107 u32 ret = -1;
108
109 if (type == IB_QPT_SMI)
110 ret = 0;
111 else if (type == IB_QPT_GSI)
112 ret = 1;
113
114 if (ret != -1) {
115 map = &qpt->map[0];
116 if (unlikely(!map->page)) {
117 get_map_page(qpt, map);
118 if (unlikely(!map->page)) {
119 ret = -ENOMEM;
120 goto bail;
121 }
122 }
123 if (!test_and_set_bit(ret, map->page))
124 atomic_dec(&map->n_free);
125 else
126 ret = -EBUSY;
127 goto bail;
128 }
89 129
90 qpn = qpt->last + 1; 130 qpn = qpt->last + 1;
91 if (qpn >= QPN_MAX) 131 if (qpn >= QPN_MAX)
@@ -95,19 +135,7 @@ static u32 alloc_qpn(struct ipath_qp_table *qpt)
95 max_scan = qpt->nmaps - !offset; 135 max_scan = qpt->nmaps - !offset;
96 for (i = 0;;) { 136 for (i = 0;;) {
97 if (unlikely(!map->page)) { 137 if (unlikely(!map->page)) {
98 unsigned long page = get_zeroed_page(GFP_KERNEL); 138 get_map_page(qpt, map);
99 unsigned long flags;
100
101 /*
102 * Free the page if someone raced with us
103 * installing it:
104 */
105 spin_lock_irqsave(&qpt->lock, flags);
106 if (map->page)
107 free_page(page);
108 else
109 map->page = (void *)page;
110 spin_unlock_irqrestore(&qpt->lock, flags);
111 if (unlikely(!map->page)) 139 if (unlikely(!map->page))
112 break; 140 break;
113 } 141 }
@@ -151,7 +179,7 @@ static u32 alloc_qpn(struct ipath_qp_table *qpt)
151 qpn = mk_qpn(qpt, map, offset); 179 qpn = mk_qpn(qpt, map, offset);
152 } 180 }
153 181
154 ret = 0; 182 ret = -ENOMEM;
155 183
156bail: 184bail:
157 return ret; 185 return ret;
@@ -180,29 +208,19 @@ static int ipath_alloc_qpn(struct ipath_qp_table *qpt, struct ipath_qp *qp,
180 enum ib_qp_type type) 208 enum ib_qp_type type)
181{ 209{
182 unsigned long flags; 210 unsigned long flags;
183 u32 qpn;
184 int ret; 211 int ret;
185 212
186 if (type == IB_QPT_SMI) 213 ret = alloc_qpn(qpt, type);
187 qpn = 0; 214 if (ret < 0)
188 else if (type == IB_QPT_GSI) 215 goto bail;
189 qpn = 1; 216 qp->ibqp.qp_num = ret;
190 else {
191 /* Allocate the next available QPN */
192 qpn = alloc_qpn(qpt);
193 if (qpn == 0) {
194 ret = -ENOMEM;
195 goto bail;
196 }
197 }
198 qp->ibqp.qp_num = qpn;
199 217
200 /* Add the QP to the hash table. */ 218 /* Add the QP to the hash table. */
201 spin_lock_irqsave(&qpt->lock, flags); 219 spin_lock_irqsave(&qpt->lock, flags);
202 220
203 qpn %= qpt->max; 221 ret %= qpt->max;
204 qp->next = qpt->table[qpn]; 222 qp->next = qpt->table[ret];
205 qpt->table[qpn] = qp; 223 qpt->table[ret] = qp;
206 atomic_inc(&qp->refcount); 224 atomic_inc(&qp->refcount);
207 225
208 spin_unlock_irqrestore(&qpt->lock, flags); 226 spin_unlock_irqrestore(&qpt->lock, flags);
@@ -245,9 +263,7 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
245 if (!fnd) 263 if (!fnd)
246 return; 264 return;
247 265
248 /* If QPN is not reserved, mark QPN free in the bitmap. */ 266 free_qpn(qpt, qp->ibqp.qp_num);
249 if (qp->ibqp.qp_num > 1)
250 free_qpn(qpt, qp->ibqp.qp_num);
251 267
252 wait_event(qp->wait, !atomic_read(&qp->refcount)); 268 wait_event(qp->wait, !atomic_read(&qp->refcount));
253} 269}
@@ -270,11 +286,10 @@ void ipath_free_all_qps(struct ipath_qp_table *qpt)
270 286
271 while (qp) { 287 while (qp) {
272 nqp = qp->next; 288 nqp = qp->next;
273 if (qp->ibqp.qp_num > 1) 289 free_qpn(qpt, qp->ibqp.qp_num);
274 free_qpn(qpt, qp->ibqp.qp_num);
275 if (!atomic_dec_and_test(&qp->refcount) || 290 if (!atomic_dec_and_test(&qp->refcount) ||
276 !ipath_destroy_qp(&qp->ibqp)) 291 !ipath_destroy_qp(&qp->ibqp))
277 ipath_dbg(KERN_INFO "QP memory leak!\n"); 292 ipath_dbg("QP memory leak!\n");
278 qp = nqp; 293 qp = nqp;
279 } 294 }
280 } 295 }
@@ -320,7 +335,8 @@ static void ipath_reset_qp(struct ipath_qp *qp)
320 qp->remote_qpn = 0; 335 qp->remote_qpn = 0;
321 qp->qkey = 0; 336 qp->qkey = 0;
322 qp->qp_access_flags = 0; 337 qp->qp_access_flags = 0;
323 clear_bit(IPATH_S_BUSY, &qp->s_flags); 338 qp->s_busy = 0;
339 qp->s_flags &= ~IPATH_S_SIGNAL_REQ_WR;
324 qp->s_hdrwords = 0; 340 qp->s_hdrwords = 0;
325 qp->s_psn = 0; 341 qp->s_psn = 0;
326 qp->r_psn = 0; 342 qp->r_psn = 0;
@@ -333,7 +349,6 @@ static void ipath_reset_qp(struct ipath_qp *qp)
333 qp->r_state = IB_OPCODE_UC_SEND_LAST; 349 qp->r_state = IB_OPCODE_UC_SEND_LAST;
334 } 350 }
335 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; 351 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
336 qp->r_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
337 qp->r_nak_state = 0; 352 qp->r_nak_state = 0;
338 qp->r_wrid_valid = 0; 353 qp->r_wrid_valid = 0;
339 qp->s_rnr_timeout = 0; 354 qp->s_rnr_timeout = 0;
@@ -344,6 +359,10 @@ static void ipath_reset_qp(struct ipath_qp *qp)
344 qp->s_ssn = 1; 359 qp->s_ssn = 1;
345 qp->s_lsn = 0; 360 qp->s_lsn = 0;
346 qp->s_wait_credit = 0; 361 qp->s_wait_credit = 0;
362 memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
363 qp->r_head_ack_queue = 0;
364 qp->s_tail_ack_queue = 0;
365 qp->s_num_rd_atomic = 0;
347 if (qp->r_rq.wq) { 366 if (qp->r_rq.wq) {
348 qp->r_rq.wq->head = 0; 367 qp->r_rq.wq->head = 0;
349 qp->r_rq.wq->tail = 0; 368 qp->r_rq.wq->tail = 0;
@@ -357,7 +376,7 @@ static void ipath_reset_qp(struct ipath_qp *qp)
357 * @err: the receive completion error to signal if a RWQE is active 376 * @err: the receive completion error to signal if a RWQE is active
358 * 377 *
359 * Flushes both send and receive work queues. 378 * Flushes both send and receive work queues.
360 * QP s_lock should be held and interrupts disabled. 379 * The QP s_lock should be held and interrupts disabled.
361 */ 380 */
362 381
363void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) 382void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
@@ -365,7 +384,7 @@ void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
365 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 384 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
366 struct ib_wc wc; 385 struct ib_wc wc;
367 386
368 ipath_dbg(KERN_INFO "QP%d/%d in error state\n", 387 ipath_dbg("QP%d/%d in error state\n",
369 qp->ibqp.qp_num, qp->remote_qpn); 388 qp->ibqp.qp_num, qp->remote_qpn);
370 389
371 spin_lock(&dev->pending_lock); 390 spin_lock(&dev->pending_lock);
@@ -389,6 +408,8 @@ void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
389 wc.port_num = 0; 408 wc.port_num = 0;
390 if (qp->r_wrid_valid) { 409 if (qp->r_wrid_valid) {
391 qp->r_wrid_valid = 0; 410 qp->r_wrid_valid = 0;
411 wc.wr_id = qp->r_wr_id;
412 wc.opcode = IB_WC_RECV;
392 wc.status = err; 413 wc.status = err;
393 ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1); 414 ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
394 } 415 }
@@ -503,13 +524,17 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
503 attr->path_mig_state != IB_MIG_REARM) 524 attr->path_mig_state != IB_MIG_REARM)
504 goto inval; 525 goto inval;
505 526
527 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
528 if (attr->max_dest_rd_atomic > IPATH_MAX_RDMA_ATOMIC)
529 goto inval;
530
506 switch (new_state) { 531 switch (new_state) {
507 case IB_QPS_RESET: 532 case IB_QPS_RESET:
508 ipath_reset_qp(qp); 533 ipath_reset_qp(qp);
509 break; 534 break;
510 535
511 case IB_QPS_ERR: 536 case IB_QPS_ERR:
512 ipath_error_qp(qp, IB_WC_GENERAL_ERR); 537 ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
513 break; 538 break;
514 539
515 default: 540 default:
@@ -559,6 +584,12 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
559 if (attr_mask & IB_QP_QKEY) 584 if (attr_mask & IB_QP_QKEY)
560 qp->qkey = attr->qkey; 585 qp->qkey = attr->qkey;
561 586
587 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
588 qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
589
590 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
591 qp->s_max_rd_atomic = attr->max_rd_atomic;
592
562 qp->state = new_state; 593 qp->state = new_state;
563 spin_unlock_irqrestore(&qp->s_lock, flags); 594 spin_unlock_irqrestore(&qp->s_lock, flags);
564 595
@@ -598,8 +629,8 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
598 attr->alt_pkey_index = 0; 629 attr->alt_pkey_index = 0;
599 attr->en_sqd_async_notify = 0; 630 attr->en_sqd_async_notify = 0;
600 attr->sq_draining = 0; 631 attr->sq_draining = 0;
601 attr->max_rd_atomic = 1; 632 attr->max_rd_atomic = qp->s_max_rd_atomic;
602 attr->max_dest_rd_atomic = 1; 633 attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
603 attr->min_rnr_timer = qp->r_min_rnr_timer; 634 attr->min_rnr_timer = qp->r_min_rnr_timer;
604 attr->port_num = 1; 635 attr->port_num = 1;
605 attr->timeout = qp->timeout; 636 attr->timeout = qp->timeout;
@@ -614,7 +645,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
614 init_attr->recv_cq = qp->ibqp.recv_cq; 645 init_attr->recv_cq = qp->ibqp.recv_cq;
615 init_attr->srq = qp->ibqp.srq; 646 init_attr->srq = qp->ibqp.srq;
616 init_attr->cap = attr->cap; 647 init_attr->cap = attr->cap;
617 if (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR)) 648 if (qp->s_flags & IPATH_S_SIGNAL_REQ_WR)
618 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; 649 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
619 else 650 else
620 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; 651 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
@@ -786,7 +817,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
786 qp->s_size = init_attr->cap.max_send_wr + 1; 817 qp->s_size = init_attr->cap.max_send_wr + 1;
787 qp->s_max_sge = init_attr->cap.max_send_sge; 818 qp->s_max_sge = init_attr->cap.max_send_sge;
788 if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) 819 if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
789 qp->s_flags = 1 << IPATH_S_SIGNAL_REQ_WR; 820 qp->s_flags = IPATH_S_SIGNAL_REQ_WR;
790 else 821 else
791 qp->s_flags = 0; 822 qp->s_flags = 0;
792 dev = to_idev(ibpd->device); 823 dev = to_idev(ibpd->device);
@@ -958,7 +989,7 @@ bail:
958 * @wc: the WC responsible for putting the QP in this state 989 * @wc: the WC responsible for putting the QP in this state
959 * 990 *
960 * Flushes the send work queue. 991 * Flushes the send work queue.
961 * The QP s_lock should be held. 992 * The QP s_lock should be held and interrupts disabled.
962 */ 993 */
963 994
964void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc) 995void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
@@ -966,7 +997,7 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
966 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 997 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
967 struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); 998 struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
968 999
969 ipath_dbg(KERN_INFO "Send queue error on QP%d/%d: err: %d\n", 1000 ipath_dbg("Send queue error on QP%d/%d: err: %d\n",
970 qp->ibqp.qp_num, qp->remote_qpn, wc->status); 1001 qp->ibqp.qp_num, qp->remote_qpn, wc->status);
971 1002
972 spin_lock(&dev->pending_lock); 1003 spin_lock(&dev->pending_lock);
@@ -984,12 +1015,12 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
984 wc->status = IB_WC_WR_FLUSH_ERR; 1015 wc->status = IB_WC_WR_FLUSH_ERR;
985 1016
986 while (qp->s_last != qp->s_head) { 1017 while (qp->s_last != qp->s_head) {
1018 wqe = get_swqe_ptr(qp, qp->s_last);
987 wc->wr_id = wqe->wr.wr_id; 1019 wc->wr_id = wqe->wr.wr_id;
988 wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; 1020 wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
989 ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1); 1021 ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1);
990 if (++qp->s_last >= qp->s_size) 1022 if (++qp->s_last >= qp->s_size)
991 qp->s_last = 0; 1023 qp->s_last = 0;
992 wqe = get_swqe_ptr(qp, qp->s_last);
993 } 1024 }
994 qp->s_cur = qp->s_tail = qp->s_head; 1025 qp->s_cur = qp->s_tail = qp->s_head;
995 qp->state = IB_QPS_SQE; 1026 qp->state = IB_QPS_SQE;
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index 5ff20cb04494..b4b88d0b53f5 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -37,6 +37,19 @@
37/* cut down ridiculously long IB macro names */ 37/* cut down ridiculously long IB macro names */
38#define OP(x) IB_OPCODE_RC_##x 38#define OP(x) IB_OPCODE_RC_##x
39 39
40static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe,
41 u32 psn, u32 pmtu)
42{
43 u32 len;
44
45 len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
46 ss->sge = wqe->sg_list[0];
47 ss->sg_list = wqe->sg_list + 1;
48 ss->num_sge = wqe->wr.num_sge;
49 ipath_skip_sge(ss, len);
50 return wqe->length - len;
51}
52
40/** 53/**
41 * ipath_init_restart- initialize the qp->s_sge after a restart 54 * ipath_init_restart- initialize the qp->s_sge after a restart
42 * @qp: the QP who's SGE we're restarting 55 * @qp: the QP who's SGE we're restarting
@@ -47,15 +60,9 @@
47static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe) 60static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
48{ 61{
49 struct ipath_ibdev *dev; 62 struct ipath_ibdev *dev;
50 u32 len;
51 63
52 len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * 64 qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn,
53 ib_mtu_enum_to_int(qp->path_mtu); 65 ib_mtu_enum_to_int(qp->path_mtu));
54 qp->s_sge.sge = wqe->sg_list[0];
55 qp->s_sge.sg_list = wqe->sg_list + 1;
56 qp->s_sge.num_sge = wqe->wr.num_sge;
57 ipath_skip_sge(&qp->s_sge, len);
58 qp->s_len = wqe->length - len;
59 dev = to_idev(qp->ibqp.device); 66 dev = to_idev(qp->ibqp.device);
60 spin_lock(&dev->pending_lock); 67 spin_lock(&dev->pending_lock);
61 if (list_empty(&qp->timerwait)) 68 if (list_empty(&qp->timerwait))
@@ -70,107 +77,123 @@ static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
70 * @ohdr: a pointer to the IB header being constructed 77 * @ohdr: a pointer to the IB header being constructed
71 * @pmtu: the path MTU 78 * @pmtu: the path MTU
72 * 79 *
73 * Return bth0 if constructed; otherwise, return 0. 80 * Return 1 if constructed; otherwise, return 0.
81 * Note that we are in the responder's side of the QP context.
74 * Note the QP s_lock must be held. 82 * Note the QP s_lock must be held.
75 */ 83 */
76u32 ipath_make_rc_ack(struct ipath_qp *qp, 84static int ipath_make_rc_ack(struct ipath_qp *qp,
77 struct ipath_other_headers *ohdr, 85 struct ipath_other_headers *ohdr,
78 u32 pmtu) 86 u32 pmtu, u32 *bth0p, u32 *bth2p)
79{ 87{
88 struct ipath_ack_entry *e;
80 u32 hwords; 89 u32 hwords;
81 u32 len; 90 u32 len;
82 u32 bth0; 91 u32 bth0;
92 u32 bth2;
83 93
84 /* header size in 32-bit words LRH+BTH = (8+12)/4. */ 94 /* header size in 32-bit words LRH+BTH = (8+12)/4. */
85 hwords = 5; 95 hwords = 5;
86 96
87 /*
88 * Send a response. Note that we are in the responder's
89 * side of the QP context.
90 */
91 switch (qp->s_ack_state) { 97 switch (qp->s_ack_state) {
92 case OP(RDMA_READ_REQUEST): 98 case OP(RDMA_READ_RESPONSE_LAST):
93 qp->s_cur_sge = &qp->s_rdma_sge; 99 case OP(RDMA_READ_RESPONSE_ONLY):
94 len = qp->s_rdma_len; 100 case OP(ATOMIC_ACKNOWLEDGE):
95 if (len > pmtu) { 101 qp->s_ack_state = OP(ACKNOWLEDGE);
96 len = pmtu; 102 /* FALLTHROUGH */
97 qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); 103 case OP(ACKNOWLEDGE):
98 } else 104 /* Check for no next entry in the queue. */
99 qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY); 105 if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
100 qp->s_rdma_len -= len; 106 if (qp->s_flags & IPATH_S_ACK_PENDING)
107 goto normal;
108 goto bail;
109 }
110
111 e = &qp->s_ack_queue[qp->s_tail_ack_queue];
112 if (e->opcode == OP(RDMA_READ_REQUEST)) {
113 /* Copy SGE state in case we need to resend */
114 qp->s_ack_rdma_sge = e->rdma_sge;
115 qp->s_cur_sge = &qp->s_ack_rdma_sge;
116 len = e->rdma_sge.sge.sge_length;
117 if (len > pmtu) {
118 len = pmtu;
119 qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
120 } else {
121 qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
122 if (++qp->s_tail_ack_queue >
123 IPATH_MAX_RDMA_ATOMIC)
124 qp->s_tail_ack_queue = 0;
125 }
126 ohdr->u.aeth = ipath_compute_aeth(qp);
127 hwords++;
128 qp->s_ack_rdma_psn = e->psn;
129 bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
130 } else {
131 /* COMPARE_SWAP or FETCH_ADD */
132 qp->s_cur_sge = NULL;
133 len = 0;
134 qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
135 ohdr->u.at.aeth = ipath_compute_aeth(qp);
136 ohdr->u.at.atomic_ack_eth[0] =
137 cpu_to_be32(e->atomic_data >> 32);
138 ohdr->u.at.atomic_ack_eth[1] =
139 cpu_to_be32(e->atomic_data);
140 hwords += sizeof(ohdr->u.at) / sizeof(u32);
141 bth2 = e->psn;
142 if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
143 qp->s_tail_ack_queue = 0;
144 }
101 bth0 = qp->s_ack_state << 24; 145 bth0 = qp->s_ack_state << 24;
102 ohdr->u.aeth = ipath_compute_aeth(qp);
103 hwords++;
104 break; 146 break;
105 147
106 case OP(RDMA_READ_RESPONSE_FIRST): 148 case OP(RDMA_READ_RESPONSE_FIRST):
107 qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); 149 qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
108 /* FALLTHROUGH */ 150 /* FALLTHROUGH */
109 case OP(RDMA_READ_RESPONSE_MIDDLE): 151 case OP(RDMA_READ_RESPONSE_MIDDLE):
110 qp->s_cur_sge = &qp->s_rdma_sge; 152 len = qp->s_ack_rdma_sge.sge.sge_length;
111 len = qp->s_rdma_len;
112 if (len > pmtu) 153 if (len > pmtu)
113 len = pmtu; 154 len = pmtu;
114 else { 155 else {
115 ohdr->u.aeth = ipath_compute_aeth(qp); 156 ohdr->u.aeth = ipath_compute_aeth(qp);
116 hwords++; 157 hwords++;
117 qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); 158 qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
159 if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
160 qp->s_tail_ack_queue = 0;
118 } 161 }
119 qp->s_rdma_len -= len;
120 bth0 = qp->s_ack_state << 24; 162 bth0 = qp->s_ack_state << 24;
121 break; 163 bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
122
123 case OP(RDMA_READ_RESPONSE_LAST):
124 case OP(RDMA_READ_RESPONSE_ONLY):
125 /*
126 * We have to prevent new requests from changing
127 * the r_sge state while a ipath_verbs_send()
128 * is in progress.
129 */
130 qp->s_ack_state = OP(ACKNOWLEDGE);
131 bth0 = 0;
132 goto bail;
133
134 case OP(COMPARE_SWAP):
135 case OP(FETCH_ADD):
136 qp->s_cur_sge = NULL;
137 len = 0;
138 /*
139 * Set the s_ack_state so the receive interrupt handler
140 * won't try to send an ACK (out of order) until this one
141 * is actually sent.
142 */
143 qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
144 bth0 = OP(ATOMIC_ACKNOWLEDGE) << 24;
145 ohdr->u.at.aeth = ipath_compute_aeth(qp);
146 ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data);
147 hwords += sizeof(ohdr->u.at) / 4;
148 break; 164 break;
149 165
150 default: 166 default:
151 /* Send a regular ACK. */ 167 normal:
152 qp->s_cur_sge = NULL;
153 len = 0;
154 /* 168 /*
155 * Set the s_ack_state so the receive interrupt handler 169 * Send a regular ACK.
156 * won't try to send an ACK (out of order) until this one 170 * Set the s_ack_state so we wait until after sending
157 * is actually sent. 171 * the ACK before setting s_ack_state to ACKNOWLEDGE
172 * (see above).
158 */ 173 */
159 qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); 174 qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
160 bth0 = OP(ACKNOWLEDGE) << 24; 175 qp->s_flags &= ~IPATH_S_ACK_PENDING;
176 qp->s_cur_sge = NULL;
161 if (qp->s_nak_state) 177 if (qp->s_nak_state)
162 ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | 178 ohdr->u.aeth =
163 (qp->s_nak_state << 179 cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
164 IPATH_AETH_CREDIT_SHIFT)); 180 (qp->s_nak_state <<
181 IPATH_AETH_CREDIT_SHIFT));
165 else 182 else
166 ohdr->u.aeth = ipath_compute_aeth(qp); 183 ohdr->u.aeth = ipath_compute_aeth(qp);
167 hwords++; 184 hwords++;
185 len = 0;
186 bth0 = OP(ACKNOWLEDGE) << 24;
187 bth2 = qp->s_ack_psn & IPATH_PSN_MASK;
168 } 188 }
169 qp->s_hdrwords = hwords; 189 qp->s_hdrwords = hwords;
170 qp->s_cur_size = len; 190 qp->s_cur_size = len;
191 *bth0p = bth0;
192 *bth2p = bth2;
193 return 1;
171 194
172bail: 195bail:
173 return bth0; 196 return 0;
174} 197}
175 198
176/** 199/**
@@ -197,9 +220,16 @@ int ipath_make_rc_req(struct ipath_qp *qp,
197 u32 bth2; 220 u32 bth2;
198 char newreq; 221 char newreq;
199 222
223 /* Sending responses has higher priority over sending requests. */
224 if ((qp->r_head_ack_queue != qp->s_tail_ack_queue ||
225 (qp->s_flags & IPATH_S_ACK_PENDING) ||
226 qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE) &&
227 ipath_make_rc_ack(qp, ohdr, pmtu, bth0p, bth2p))
228 goto done;
229
200 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) || 230 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) ||
201 qp->s_rnr_timeout) 231 qp->s_rnr_timeout)
202 goto done; 232 goto bail;
203 233
204 /* Limit the number of packets sent without an ACK. */ 234 /* Limit the number of packets sent without an ACK. */
205 if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) { 235 if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) {
@@ -210,7 +240,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
210 list_add_tail(&qp->timerwait, 240 list_add_tail(&qp->timerwait,
211 &dev->pending[dev->pending_index]); 241 &dev->pending[dev->pending_index]);
212 spin_unlock(&dev->pending_lock); 242 spin_unlock(&dev->pending_lock);
213 goto done; 243 goto bail;
214 } 244 }
215 245
216 /* header size in 32-bit words LRH+BTH = (8+12)/4. */ 246 /* header size in 32-bit words LRH+BTH = (8+12)/4. */
@@ -232,7 +262,16 @@ int ipath_make_rc_req(struct ipath_qp *qp,
232 if (qp->s_cur == qp->s_tail) { 262 if (qp->s_cur == qp->s_tail) {
233 /* Check if send work queue is empty. */ 263 /* Check if send work queue is empty. */
234 if (qp->s_tail == qp->s_head) 264 if (qp->s_tail == qp->s_head)
235 goto done; 265 goto bail;
266 /*
267 * If a fence is requested, wait for previous
268 * RDMA read and atomic operations to finish.
269 */
270 if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
271 qp->s_num_rd_atomic) {
272 qp->s_flags |= IPATH_S_FENCE_PENDING;
273 goto bail;
274 }
236 wqe->psn = qp->s_next_psn; 275 wqe->psn = qp->s_next_psn;
237 newreq = 1; 276 newreq = 1;
238 } 277 }
@@ -250,7 +289,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
250 /* If no credit, return. */ 289 /* If no credit, return. */
251 if (qp->s_lsn != (u32) -1 && 290 if (qp->s_lsn != (u32) -1 &&
252 ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) 291 ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
253 goto done; 292 goto bail;
254 wqe->lpsn = wqe->psn; 293 wqe->lpsn = wqe->psn;
255 if (len > pmtu) { 294 if (len > pmtu) {
256 wqe->lpsn += (len - 1) / pmtu; 295 wqe->lpsn += (len - 1) / pmtu;
@@ -281,13 +320,13 @@ int ipath_make_rc_req(struct ipath_qp *qp,
281 /* If no credit, return. */ 320 /* If no credit, return. */
282 if (qp->s_lsn != (u32) -1 && 321 if (qp->s_lsn != (u32) -1 &&
283 ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) 322 ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
284 goto done; 323 goto bail;
285 ohdr->u.rc.reth.vaddr = 324 ohdr->u.rc.reth.vaddr =
286 cpu_to_be64(wqe->wr.wr.rdma.remote_addr); 325 cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
287 ohdr->u.rc.reth.rkey = 326 ohdr->u.rc.reth.rkey =
288 cpu_to_be32(wqe->wr.wr.rdma.rkey); 327 cpu_to_be32(wqe->wr.wr.rdma.rkey);
289 ohdr->u.rc.reth.length = cpu_to_be32(len); 328 ohdr->u.rc.reth.length = cpu_to_be32(len);
290 hwords += sizeof(struct ib_reth) / 4; 329 hwords += sizeof(struct ib_reth) / sizeof(u32);
291 wqe->lpsn = wqe->psn; 330 wqe->lpsn = wqe->psn;
292 if (len > pmtu) { 331 if (len > pmtu) {
293 wqe->lpsn += (len - 1) / pmtu; 332 wqe->lpsn += (len - 1) / pmtu;
@@ -312,14 +351,17 @@ int ipath_make_rc_req(struct ipath_qp *qp,
312 break; 351 break;
313 352
314 case IB_WR_RDMA_READ: 353 case IB_WR_RDMA_READ:
315 ohdr->u.rc.reth.vaddr = 354 /*
316 cpu_to_be64(wqe->wr.wr.rdma.remote_addr); 355 * Don't allow more operations to be started
317 ohdr->u.rc.reth.rkey = 356 * than the QP limits allow.
318 cpu_to_be32(wqe->wr.wr.rdma.rkey); 357 */
319 ohdr->u.rc.reth.length = cpu_to_be32(len);
320 qp->s_state = OP(RDMA_READ_REQUEST);
321 hwords += sizeof(ohdr->u.rc.reth) / 4;
322 if (newreq) { 358 if (newreq) {
359 if (qp->s_num_rd_atomic >=
360 qp->s_max_rd_atomic) {
361 qp->s_flags |= IPATH_S_RDMAR_PENDING;
362 goto bail;
363 }
364 qp->s_num_rd_atomic++;
323 if (qp->s_lsn != (u32) -1) 365 if (qp->s_lsn != (u32) -1)
324 qp->s_lsn++; 366 qp->s_lsn++;
325 /* 367 /*
@@ -330,6 +372,13 @@ int ipath_make_rc_req(struct ipath_qp *qp,
330 qp->s_next_psn += (len - 1) / pmtu; 372 qp->s_next_psn += (len - 1) / pmtu;
331 wqe->lpsn = qp->s_next_psn++; 373 wqe->lpsn = qp->s_next_psn++;
332 } 374 }
375 ohdr->u.rc.reth.vaddr =
376 cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
377 ohdr->u.rc.reth.rkey =
378 cpu_to_be32(wqe->wr.wr.rdma.rkey);
379 ohdr->u.rc.reth.length = cpu_to_be32(len);
380 qp->s_state = OP(RDMA_READ_REQUEST);
381 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
333 ss = NULL; 382 ss = NULL;
334 len = 0; 383 len = 0;
335 if (++qp->s_cur == qp->s_size) 384 if (++qp->s_cur == qp->s_size)
@@ -338,32 +387,48 @@ int ipath_make_rc_req(struct ipath_qp *qp,
338 387
339 case IB_WR_ATOMIC_CMP_AND_SWP: 388 case IB_WR_ATOMIC_CMP_AND_SWP:
340 case IB_WR_ATOMIC_FETCH_AND_ADD: 389 case IB_WR_ATOMIC_FETCH_AND_ADD:
341 if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) 390 /*
342 qp->s_state = OP(COMPARE_SWAP); 391 * Don't allow more operations to be started
343 else 392 * than the QP limits allow.
344 qp->s_state = OP(FETCH_ADD); 393 */
345 ohdr->u.atomic_eth.vaddr = cpu_to_be64(
346 wqe->wr.wr.atomic.remote_addr);
347 ohdr->u.atomic_eth.rkey = cpu_to_be32(
348 wqe->wr.wr.atomic.rkey);
349 ohdr->u.atomic_eth.swap_data = cpu_to_be64(
350 wqe->wr.wr.atomic.swap);
351 ohdr->u.atomic_eth.compare_data = cpu_to_be64(
352 wqe->wr.wr.atomic.compare_add);
353 hwords += sizeof(struct ib_atomic_eth) / 4;
354 if (newreq) { 394 if (newreq) {
395 if (qp->s_num_rd_atomic >=
396 qp->s_max_rd_atomic) {
397 qp->s_flags |= IPATH_S_RDMAR_PENDING;
398 goto bail;
399 }
400 qp->s_num_rd_atomic++;
355 if (qp->s_lsn != (u32) -1) 401 if (qp->s_lsn != (u32) -1)
356 qp->s_lsn++; 402 qp->s_lsn++;
357 wqe->lpsn = wqe->psn; 403 wqe->lpsn = wqe->psn;
358 } 404 }
359 if (++qp->s_cur == qp->s_size) 405 if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
360 qp->s_cur = 0; 406 qp->s_state = OP(COMPARE_SWAP);
407 ohdr->u.atomic_eth.swap_data = cpu_to_be64(
408 wqe->wr.wr.atomic.swap);
409 ohdr->u.atomic_eth.compare_data = cpu_to_be64(
410 wqe->wr.wr.atomic.compare_add);
411 } else {
412 qp->s_state = OP(FETCH_ADD);
413 ohdr->u.atomic_eth.swap_data = cpu_to_be64(
414 wqe->wr.wr.atomic.compare_add);
415 ohdr->u.atomic_eth.compare_data = 0;
416 }
417 ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
418 wqe->wr.wr.atomic.remote_addr >> 32);
419 ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
420 wqe->wr.wr.atomic.remote_addr);
421 ohdr->u.atomic_eth.rkey = cpu_to_be32(
422 wqe->wr.wr.atomic.rkey);
423 hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
361 ss = NULL; 424 ss = NULL;
362 len = 0; 425 len = 0;
426 if (++qp->s_cur == qp->s_size)
427 qp->s_cur = 0;
363 break; 428 break;
364 429
365 default: 430 default:
366 goto done; 431 goto bail;
367 } 432 }
368 qp->s_sge.sge = wqe->sg_list[0]; 433 qp->s_sge.sge = wqe->sg_list[0];
369 qp->s_sge.sg_list = wqe->sg_list + 1; 434 qp->s_sge.sg_list = wqe->sg_list + 1;
@@ -379,7 +444,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
379 qp->s_psn = wqe->lpsn + 1; 444 qp->s_psn = wqe->lpsn + 1;
380 else { 445 else {
381 qp->s_psn++; 446 qp->s_psn++;
382 if ((int)(qp->s_psn - qp->s_next_psn) > 0) 447 if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
383 qp->s_next_psn = qp->s_psn; 448 qp->s_next_psn = qp->s_psn;
384 } 449 }
385 /* 450 /*
@@ -406,7 +471,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
406 /* FALLTHROUGH */ 471 /* FALLTHROUGH */
407 case OP(SEND_MIDDLE): 472 case OP(SEND_MIDDLE):
408 bth2 = qp->s_psn++ & IPATH_PSN_MASK; 473 bth2 = qp->s_psn++ & IPATH_PSN_MASK;
409 if ((int)(qp->s_psn - qp->s_next_psn) > 0) 474 if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
410 qp->s_next_psn = qp->s_psn; 475 qp->s_next_psn = qp->s_psn;
411 ss = &qp->s_sge; 476 ss = &qp->s_sge;
412 len = qp->s_len; 477 len = qp->s_len;
@@ -442,7 +507,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
442 /* FALLTHROUGH */ 507 /* FALLTHROUGH */
443 case OP(RDMA_WRITE_MIDDLE): 508 case OP(RDMA_WRITE_MIDDLE):
444 bth2 = qp->s_psn++ & IPATH_PSN_MASK; 509 bth2 = qp->s_psn++ & IPATH_PSN_MASK;
445 if ((int)(qp->s_psn - qp->s_next_psn) > 0) 510 if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
446 qp->s_next_psn = qp->s_psn; 511 qp->s_next_psn = qp->s_psn;
447 ss = &qp->s_sge; 512 ss = &qp->s_sge;
448 len = qp->s_len; 513 len = qp->s_len;
@@ -479,9 +544,9 @@ int ipath_make_rc_req(struct ipath_qp *qp,
479 cpu_to_be32(wqe->wr.wr.rdma.rkey); 544 cpu_to_be32(wqe->wr.wr.rdma.rkey);
480 ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len); 545 ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);
481 qp->s_state = OP(RDMA_READ_REQUEST); 546 qp->s_state = OP(RDMA_READ_REQUEST);
482 hwords += sizeof(ohdr->u.rc.reth) / 4; 547 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
483 bth2 = qp->s_psn++ & IPATH_PSN_MASK; 548 bth2 = qp->s_psn++ & IPATH_PSN_MASK;
484 if ((int)(qp->s_psn - qp->s_next_psn) > 0) 549 if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
485 qp->s_next_psn = qp->s_psn; 550 qp->s_next_psn = qp->s_psn;
486 ss = NULL; 551 ss = NULL;
487 len = 0; 552 len = 0;
@@ -489,20 +554,6 @@ int ipath_make_rc_req(struct ipath_qp *qp,
489 if (qp->s_cur == qp->s_size) 554 if (qp->s_cur == qp->s_size)
490 qp->s_cur = 0; 555 qp->s_cur = 0;
491 break; 556 break;
492
493 case OP(RDMA_READ_REQUEST):
494 case OP(COMPARE_SWAP):
495 case OP(FETCH_ADD):
496 /*
497 * We shouldn't start anything new until this request is
498 * finished. The ACK will handle rescheduling us. XXX The
499 * number of outstanding ones is negotiated at connection
500 * setup time (see pg. 258,289)? XXX Also, if we support
501 * multiple outstanding requests, we need to check the WQE
502 * IB_SEND_FENCE flag and not send a new request if a RDMA
503 * read or atomic is pending.
504 */
505 goto done;
506 } 557 }
507 if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0) 558 if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0)
508 bth2 |= 1 << 31; /* Request ACK. */ 559 bth2 |= 1 << 31; /* Request ACK. */
@@ -512,9 +563,10 @@ int ipath_make_rc_req(struct ipath_qp *qp,
512 qp->s_cur_size = len; 563 qp->s_cur_size = len;
513 *bth0p = bth0 | (qp->s_state << 24); 564 *bth0p = bth0 | (qp->s_state << 24);
514 *bth2p = bth2; 565 *bth2p = bth2;
566done:
515 return 1; 567 return 1;
516 568
517done: 569bail:
518 return 0; 570 return 0;
519} 571}
520 572
@@ -524,7 +576,8 @@ done:
524 * 576 *
525 * This is called from ipath_rc_rcv() and only uses the receive 577 * This is called from ipath_rc_rcv() and only uses the receive
526 * side QP state. 578 * side QP state.
527 * Note that RDMA reads are handled in the send side QP state and tasklet. 579 * Note that RDMA reads and atomics are handled in the
580 * send side QP state and tasklet.
528 */ 581 */
529static void send_rc_ack(struct ipath_qp *qp) 582static void send_rc_ack(struct ipath_qp *qp)
530{ 583{
@@ -535,6 +588,10 @@ static void send_rc_ack(struct ipath_qp *qp)
535 struct ipath_ib_header hdr; 588 struct ipath_ib_header hdr;
536 struct ipath_other_headers *ohdr; 589 struct ipath_other_headers *ohdr;
537 590
591 /* Don't send ACK or NAK if a RDMA read or atomic is pending. */
592 if (qp->r_head_ack_queue != qp->s_tail_ack_queue)
593 goto queue_ack;
594
538 /* Construct the header. */ 595 /* Construct the header. */
539 ohdr = &hdr.u.oth; 596 ohdr = &hdr.u.oth;
540 lrh0 = IPATH_LRH_BTH; 597 lrh0 = IPATH_LRH_BTH;
@@ -548,19 +605,14 @@ static void send_rc_ack(struct ipath_qp *qp)
548 lrh0 = IPATH_LRH_GRH; 605 lrh0 = IPATH_LRH_GRH;
549 } 606 }
550 /* read pkey_index w/o lock (its atomic) */ 607 /* read pkey_index w/o lock (its atomic) */
551 bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index); 608 bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index) |
609 OP(ACKNOWLEDGE) << 24;
552 if (qp->r_nak_state) 610 if (qp->r_nak_state)
553 ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | 611 ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
554 (qp->r_nak_state << 612 (qp->r_nak_state <<
555 IPATH_AETH_CREDIT_SHIFT)); 613 IPATH_AETH_CREDIT_SHIFT));
556 else 614 else
557 ohdr->u.aeth = ipath_compute_aeth(qp); 615 ohdr->u.aeth = ipath_compute_aeth(qp);
558 if (qp->r_ack_state >= OP(COMPARE_SWAP)) {
559 bth0 |= OP(ATOMIC_ACKNOWLEDGE) << 24;
560 ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data);
561 hwords += sizeof(ohdr->u.at.atomic_ack_eth) / 4;
562 } else
563 bth0 |= OP(ACKNOWLEDGE) << 24;
564 lrh0 |= qp->remote_ah_attr.sl << 4; 616 lrh0 |= qp->remote_ah_attr.sl << 4;
565 hdr.lrh[0] = cpu_to_be16(lrh0); 617 hdr.lrh[0] = cpu_to_be16(lrh0);
566 hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); 618 hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
@@ -574,31 +626,31 @@ static void send_rc_ack(struct ipath_qp *qp)
574 * If we can send the ACK, clear the ACK state. 626 * If we can send the ACK, clear the ACK state.
575 */ 627 */
576 if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) { 628 if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) {
577 qp->r_ack_state = OP(ACKNOWLEDGE);
578 dev->n_unicast_xmit++; 629 dev->n_unicast_xmit++;
579 } else { 630 goto done;
580 /*
581 * We are out of PIO buffers at the moment.
582 * Pass responsibility for sending the ACK to the
583 * send tasklet so that when a PIO buffer becomes
584 * available, the ACK is sent ahead of other outgoing
585 * packets.
586 */
587 dev->n_rc_qacks++;
588 spin_lock_irq(&qp->s_lock);
589 /* Don't coalesce if a RDMA read or atomic is pending. */
590 if (qp->s_ack_state == OP(ACKNOWLEDGE) ||
591 qp->s_ack_state < OP(RDMA_READ_REQUEST)) {
592 qp->s_ack_state = qp->r_ack_state;
593 qp->s_nak_state = qp->r_nak_state;
594 qp->s_ack_psn = qp->r_ack_psn;
595 qp->r_ack_state = OP(ACKNOWLEDGE);
596 }
597 spin_unlock_irq(&qp->s_lock);
598
599 /* Call ipath_do_rc_send() in another thread. */
600 tasklet_hi_schedule(&qp->s_task);
601 } 631 }
632
633 /*
634 * We are out of PIO buffers at the moment.
635 * Pass responsibility for sending the ACK to the
636 * send tasklet so that when a PIO buffer becomes
637 * available, the ACK is sent ahead of other outgoing
638 * packets.
639 */
640 dev->n_rc_qacks++;
641
642queue_ack:
643 spin_lock_irq(&qp->s_lock);
644 qp->s_flags |= IPATH_S_ACK_PENDING;
645 qp->s_nak_state = qp->r_nak_state;
646 qp->s_ack_psn = qp->r_ack_psn;
647 spin_unlock_irq(&qp->s_lock);
648
649 /* Call ipath_do_rc_send() in another thread. */
650 tasklet_hi_schedule(&qp->s_task);
651
652done:
653 return;
602} 654}
603 655
604/** 656/**
@@ -727,7 +779,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
727 if (wqe->wr.opcode == IB_WR_RDMA_READ) 779 if (wqe->wr.opcode == IB_WR_RDMA_READ)
728 dev->n_rc_resends++; 780 dev->n_rc_resends++;
729 else 781 else
730 dev->n_rc_resends += (int)qp->s_psn - (int)psn; 782 dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK;
731 783
732 reset_psn(qp, psn); 784 reset_psn(qp, psn);
733 tasklet_hi_schedule(&qp->s_task); 785 tasklet_hi_schedule(&qp->s_task);
@@ -775,10 +827,6 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
775 list_del_init(&qp->timerwait); 827 list_del_init(&qp->timerwait);
776 spin_unlock(&dev->pending_lock); 828 spin_unlock(&dev->pending_lock);
777 829
778 /* Nothing is pending to ACK/NAK. */
779 if (unlikely(qp->s_last == qp->s_tail))
780 goto bail;
781
782 /* 830 /*
783 * Note that NAKs implicitly ACK outstanding SEND and RDMA write 831 * Note that NAKs implicitly ACK outstanding SEND and RDMA write
784 * requests and implicitly NAK RDMA read and atomic requests issued 832 * requests and implicitly NAK RDMA read and atomic requests issued
@@ -806,7 +854,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
806 */ 854 */
807 if ((wqe->wr.opcode == IB_WR_RDMA_READ && 855 if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
808 (opcode != OP(RDMA_READ_RESPONSE_LAST) || 856 (opcode != OP(RDMA_READ_RESPONSE_LAST) ||
809 ipath_cmp24(ack_psn, wqe->lpsn) != 0)) || 857 ipath_cmp24(ack_psn, wqe->lpsn) != 0)) ||
810 ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || 858 ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
811 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && 859 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
812 (opcode != OP(ATOMIC_ACKNOWLEDGE) || 860 (opcode != OP(ATOMIC_ACKNOWLEDGE) ||
@@ -824,20 +872,33 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
824 */ 872 */
825 goto bail; 873 goto bail;
826 } 874 }
827 if (wqe->wr.opcode == IB_WR_RDMA_READ || 875 if (qp->s_num_rd_atomic &&
828 wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || 876 (wqe->wr.opcode == IB_WR_RDMA_READ ||
829 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) 877 wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
830 tasklet_hi_schedule(&qp->s_task); 878 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
879 qp->s_num_rd_atomic--;
880 /* Restart sending task if fence is complete */
881 if ((qp->s_flags & IPATH_S_FENCE_PENDING) &&
882 !qp->s_num_rd_atomic) {
883 qp->s_flags &= ~IPATH_S_FENCE_PENDING;
884 tasklet_hi_schedule(&qp->s_task);
885 } else if (qp->s_flags & IPATH_S_RDMAR_PENDING) {
886 qp->s_flags &= ~IPATH_S_RDMAR_PENDING;
887 tasklet_hi_schedule(&qp->s_task);
888 }
889 }
831 /* Post a send completion queue entry if requested. */ 890 /* Post a send completion queue entry if requested. */
832 if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) || 891 if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
833 (wqe->wr.send_flags & IB_SEND_SIGNALED)) { 892 (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
834 wc.wr_id = wqe->wr.wr_id; 893 wc.wr_id = wqe->wr.wr_id;
835 wc.status = IB_WC_SUCCESS; 894 wc.status = IB_WC_SUCCESS;
836 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; 895 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
837 wc.vendor_err = 0; 896 wc.vendor_err = 0;
838 wc.byte_len = wqe->length; 897 wc.byte_len = wqe->length;
898 wc.imm_data = 0;
839 wc.qp = &qp->ibqp; 899 wc.qp = &qp->ibqp;
840 wc.src_qp = qp->remote_qpn; 900 wc.src_qp = qp->remote_qpn;
901 wc.wc_flags = 0;
841 wc.pkey_index = 0; 902 wc.pkey_index = 0;
842 wc.slid = qp->remote_ah_attr.dlid; 903 wc.slid = qp->remote_ah_attr.dlid;
843 wc.sl = qp->remote_ah_attr.sl; 904 wc.sl = qp->remote_ah_attr.sl;
@@ -854,15 +915,19 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
854 if (qp->s_last == qp->s_cur) { 915 if (qp->s_last == qp->s_cur) {
855 if (++qp->s_cur >= qp->s_size) 916 if (++qp->s_cur >= qp->s_size)
856 qp->s_cur = 0; 917 qp->s_cur = 0;
918 qp->s_last = qp->s_cur;
919 if (qp->s_last == qp->s_tail)
920 break;
857 wqe = get_swqe_ptr(qp, qp->s_cur); 921 wqe = get_swqe_ptr(qp, qp->s_cur);
858 qp->s_state = OP(SEND_LAST); 922 qp->s_state = OP(SEND_LAST);
859 qp->s_psn = wqe->psn; 923 qp->s_psn = wqe->psn;
924 } else {
925 if (++qp->s_last >= qp->s_size)
926 qp->s_last = 0;
927 if (qp->s_last == qp->s_tail)
928 break;
929 wqe = get_swqe_ptr(qp, qp->s_last);
860 } 930 }
861 if (++qp->s_last >= qp->s_size)
862 qp->s_last = 0;
863 wqe = get_swqe_ptr(qp, qp->s_last);
864 if (qp->s_last == qp->s_tail)
865 break;
866 } 931 }
867 932
868 switch (aeth >> 29) { 933 switch (aeth >> 29) {
@@ -874,6 +939,18 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
874 list_add_tail(&qp->timerwait, 939 list_add_tail(&qp->timerwait,
875 &dev->pending[dev->pending_index]); 940 &dev->pending[dev->pending_index]);
876 spin_unlock(&dev->pending_lock); 941 spin_unlock(&dev->pending_lock);
942 /*
943 * If we get a partial ACK for a resent operation,
944 * we can stop resending the earlier packets and
945 * continue with the next packet the receiver wants.
946 */
947 if (ipath_cmp24(qp->s_psn, psn) <= 0) {
948 reset_psn(qp, psn + 1);
949 tasklet_hi_schedule(&qp->s_task);
950 }
951 } else if (ipath_cmp24(qp->s_psn, psn) <= 0) {
952 qp->s_state = OP(SEND_LAST);
953 qp->s_psn = psn + 1;
877 } 954 }
878 ipath_get_credit(qp, aeth); 955 ipath_get_credit(qp, aeth);
879 qp->s_rnr_retry = qp->s_rnr_retry_cnt; 956 qp->s_rnr_retry = qp->s_rnr_retry_cnt;
@@ -884,22 +961,23 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
884 961
885 case 1: /* RNR NAK */ 962 case 1: /* RNR NAK */
886 dev->n_rnr_naks++; 963 dev->n_rnr_naks++;
964 if (qp->s_last == qp->s_tail)
965 goto bail;
887 if (qp->s_rnr_retry == 0) { 966 if (qp->s_rnr_retry == 0) {
888 if (qp->s_last == qp->s_tail)
889 goto bail;
890
891 wc.status = IB_WC_RNR_RETRY_EXC_ERR; 967 wc.status = IB_WC_RNR_RETRY_EXC_ERR;
892 goto class_b; 968 goto class_b;
893 } 969 }
894 if (qp->s_rnr_retry_cnt < 7) 970 if (qp->s_rnr_retry_cnt < 7)
895 qp->s_rnr_retry--; 971 qp->s_rnr_retry--;
896 if (qp->s_last == qp->s_tail)
897 goto bail;
898 972
899 /* The last valid PSN is the previous PSN. */ 973 /* The last valid PSN is the previous PSN. */
900 update_last_psn(qp, psn - 1); 974 update_last_psn(qp, psn - 1);
901 975
902 dev->n_rc_resends += (int)qp->s_psn - (int)psn; 976 if (wqe->wr.opcode == IB_WR_RDMA_READ)
977 dev->n_rc_resends++;
978 else
979 dev->n_rc_resends +=
980 (qp->s_psn - psn) & IPATH_PSN_MASK;
903 981
904 reset_psn(qp, psn); 982 reset_psn(qp, psn);
905 983
@@ -910,26 +988,20 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
910 goto bail; 988 goto bail;
911 989
912 case 3: /* NAK */ 990 case 3: /* NAK */
913 /* The last valid PSN seen is the previous request's. */ 991 if (qp->s_last == qp->s_tail)
914 if (qp->s_last != qp->s_tail) 992 goto bail;
915 update_last_psn(qp, wqe->psn - 1); 993 /* The last valid PSN is the previous PSN. */
994 update_last_psn(qp, psn - 1);
916 switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) & 995 switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) &
917 IPATH_AETH_CREDIT_MASK) { 996 IPATH_AETH_CREDIT_MASK) {
918 case 0: /* PSN sequence error */ 997 case 0: /* PSN sequence error */
919 dev->n_seq_naks++; 998 dev->n_seq_naks++;
920 /* 999 /*
921 * Back up to the responder's expected PSN. XXX 1000 * Back up to the responder's expected PSN.
922 * Note that we might get a NAK in the middle of an 1001 * Note that we might get a NAK in the middle of an
923 * RDMA READ response which terminates the RDMA 1002 * RDMA READ response which terminates the RDMA
924 * READ. 1003 * READ.
925 */ 1004 */
926 if (qp->s_last == qp->s_tail)
927 break;
928
929 if (ipath_cmp24(psn, wqe->psn) < 0)
930 break;
931
932 /* Retry the request. */
933 ipath_restart_rc(qp, psn, &wc); 1005 ipath_restart_rc(qp, psn, &wc);
934 break; 1006 break;
935 1007
@@ -1003,6 +1075,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1003 u32 psn, u32 hdrsize, u32 pmtu, 1075 u32 psn, u32 hdrsize, u32 pmtu,
1004 int header_in_data) 1076 int header_in_data)
1005{ 1077{
1078 struct ipath_swqe *wqe;
1006 unsigned long flags; 1079 unsigned long flags;
1007 struct ib_wc wc; 1080 struct ib_wc wc;
1008 int diff; 1081 int diff;
@@ -1032,6 +1105,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1032 goto ack_done; 1105 goto ack_done;
1033 } 1106 }
1034 1107
1108 if (unlikely(qp->s_last == qp->s_tail))
1109 goto ack_done;
1110 wqe = get_swqe_ptr(qp, qp->s_last);
1111
1035 switch (opcode) { 1112 switch (opcode) {
1036 case OP(ACKNOWLEDGE): 1113 case OP(ACKNOWLEDGE):
1037 case OP(ATOMIC_ACKNOWLEDGE): 1114 case OP(ATOMIC_ACKNOWLEDGE):
@@ -1042,38 +1119,49 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1042 aeth = be32_to_cpu(((__be32 *) data)[0]); 1119 aeth = be32_to_cpu(((__be32 *) data)[0]);
1043 data += sizeof(__be32); 1120 data += sizeof(__be32);
1044 } 1121 }
1045 if (opcode == OP(ATOMIC_ACKNOWLEDGE)) 1122 if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
1046 *(u64 *) qp->s_sge.sge.vaddr = *(u64 *) data; 1123 u64 val;
1124
1125 if (!header_in_data) {
1126 __be32 *p = ohdr->u.at.atomic_ack_eth;
1127
1128 val = ((u64) be32_to_cpu(p[0]) << 32) |
1129 be32_to_cpu(p[1]);
1130 } else
1131 val = be64_to_cpu(((__be64 *) data)[0]);
1132 *(u64 *) wqe->sg_list[0].vaddr = val;
1133 }
1047 if (!do_rc_ack(qp, aeth, psn, opcode) || 1134 if (!do_rc_ack(qp, aeth, psn, opcode) ||
1048 opcode != OP(RDMA_READ_RESPONSE_FIRST)) 1135 opcode != OP(RDMA_READ_RESPONSE_FIRST))
1049 goto ack_done; 1136 goto ack_done;
1050 hdrsize += 4; 1137 hdrsize += 4;
1138 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1139 goto ack_op_err;
1051 /* 1140 /*
1052 * do_rc_ack() has already checked the PSN so skip 1141 * If this is a response to a resent RDMA read, we
1053 * the sequence check. 1142 * have to be careful to copy the data to the right
1143 * location.
1054 */ 1144 */
1055 goto rdma_read; 1145 qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
1146 wqe, psn, pmtu);
1147 goto read_middle;
1056 1148
1057 case OP(RDMA_READ_RESPONSE_MIDDLE): 1149 case OP(RDMA_READ_RESPONSE_MIDDLE):
1058 /* no AETH, no ACK */ 1150 /* no AETH, no ACK */
1059 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { 1151 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
1060 dev->n_rdma_seq++; 1152 dev->n_rdma_seq++;
1061 if (qp->s_last != qp->s_tail) 1153 ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
1062 ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
1063 goto ack_done; 1154 goto ack_done;
1064 } 1155 }
1065 rdma_read: 1156 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1066 if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST))) 1157 goto ack_op_err;
1067 goto ack_done; 1158 read_middle:
1068 if (unlikely(tlen != (hdrsize + pmtu + 4))) 1159 if (unlikely(tlen != (hdrsize + pmtu + 4)))
1069 goto ack_done; 1160 goto ack_len_err;
1070 if (unlikely(pmtu >= qp->s_len)) 1161 if (unlikely(pmtu >= qp->s_rdma_read_len))
1071 goto ack_done; 1162 goto ack_len_err;
1163
1072 /* We got a response so update the timeout. */ 1164 /* We got a response so update the timeout. */
1073 if (unlikely(qp->s_last == qp->s_tail ||
1074 get_swqe_ptr(qp, qp->s_last)->wr.opcode !=
1075 IB_WR_RDMA_READ))
1076 goto ack_done;
1077 spin_lock(&dev->pending_lock); 1165 spin_lock(&dev->pending_lock);
1078 if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait)) 1166 if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait))
1079 list_move_tail(&qp->timerwait, 1167 list_move_tail(&qp->timerwait,
@@ -1082,67 +1170,97 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1082 /* 1170 /*
1083 * Update the RDMA receive state but do the copy w/o 1171 * Update the RDMA receive state but do the copy w/o
1084 * holding the locks and blocking interrupts. 1172 * holding the locks and blocking interrupts.
1085 * XXX Yet another place that affects relaxed RDMA order
1086 * since we don't want s_sge modified.
1087 */ 1173 */
1088 qp->s_len -= pmtu; 1174 qp->s_rdma_read_len -= pmtu;
1089 update_last_psn(qp, psn); 1175 update_last_psn(qp, psn);
1090 spin_unlock_irqrestore(&qp->s_lock, flags); 1176 spin_unlock_irqrestore(&qp->s_lock, flags);
1091 ipath_copy_sge(&qp->s_sge, data, pmtu); 1177 ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu);
1092 goto bail; 1178 goto bail;
1093 1179
1094 case OP(RDMA_READ_RESPONSE_LAST): 1180 case OP(RDMA_READ_RESPONSE_ONLY):
1095 /* ACKs READ req. */
1096 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { 1181 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
1097 dev->n_rdma_seq++; 1182 dev->n_rdma_seq++;
1098 if (qp->s_last != qp->s_tail) 1183 ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
1099 ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
1100 goto ack_done; 1184 goto ack_done;
1101 } 1185 }
1102 /* FALLTHROUGH */ 1186 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1103 case OP(RDMA_READ_RESPONSE_ONLY): 1187 goto ack_op_err;
1104 if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST))) 1188 /* Get the number of bytes the message was padded by. */
1105 goto ack_done; 1189 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
1190 /*
1191 * Check that the data size is >= 0 && <= pmtu.
1192 * Remember to account for the AETH header (4) and
1193 * ICRC (4).
1194 */
1195 if (unlikely(tlen < (hdrsize + pad + 8)))
1196 goto ack_len_err;
1106 /* 1197 /*
1107 * Get the number of bytes the message was padded by. 1198 * If this is a response to a resent RDMA read, we
1199 * have to be careful to copy the data to the right
1200 * location.
1108 */ 1201 */
1202 qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
1203 wqe, psn, pmtu);
1204 goto read_last;
1205
1206 case OP(RDMA_READ_RESPONSE_LAST):
1207 /* ACKs READ req. */
1208 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
1209 dev->n_rdma_seq++;
1210 ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
1211 goto ack_done;
1212 }
1213 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1214 goto ack_op_err;
1215 /* Get the number of bytes the message was padded by. */
1109 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; 1216 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
1110 /* 1217 /*
1111 * Check that the data size is >= 1 && <= pmtu. 1218 * Check that the data size is >= 1 && <= pmtu.
1112 * Remember to account for the AETH header (4) and 1219 * Remember to account for the AETH header (4) and
1113 * ICRC (4). 1220 * ICRC (4).
1114 */ 1221 */
1115 if (unlikely(tlen <= (hdrsize + pad + 8))) { 1222 if (unlikely(tlen <= (hdrsize + pad + 8)))
1116 /* XXX Need to generate an error CQ entry. */ 1223 goto ack_len_err;
1117 goto ack_done; 1224 read_last:
1118 }
1119 tlen -= hdrsize + pad + 8; 1225 tlen -= hdrsize + pad + 8;
1120 if (unlikely(tlen != qp->s_len)) { 1226 if (unlikely(tlen != qp->s_rdma_read_len))
1121 /* XXX Need to generate an error CQ entry. */ 1227 goto ack_len_err;
1122 goto ack_done;
1123 }
1124 if (!header_in_data) 1228 if (!header_in_data)
1125 aeth = be32_to_cpu(ohdr->u.aeth); 1229 aeth = be32_to_cpu(ohdr->u.aeth);
1126 else { 1230 else {
1127 aeth = be32_to_cpu(((__be32 *) data)[0]); 1231 aeth = be32_to_cpu(((__be32 *) data)[0]);
1128 data += sizeof(__be32); 1232 data += sizeof(__be32);
1129 } 1233 }
1130 ipath_copy_sge(&qp->s_sge, data, tlen); 1234 ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen);
1131 if (do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST))) { 1235 (void) do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST));
1132 /*
1133 * Change the state so we contimue
1134 * processing new requests and wake up the
1135 * tasklet if there are posted sends.
1136 */
1137 qp->s_state = OP(SEND_LAST);
1138 if (qp->s_tail != qp->s_head)
1139 tasklet_hi_schedule(&qp->s_task);
1140 }
1141 goto ack_done; 1236 goto ack_done;
1142 } 1237 }
1143 1238
1144ack_done: 1239ack_done:
1145 spin_unlock_irqrestore(&qp->s_lock, flags); 1240 spin_unlock_irqrestore(&qp->s_lock, flags);
1241 goto bail;
1242
1243ack_op_err:
1244 wc.status = IB_WC_LOC_QP_OP_ERR;
1245 goto ack_err;
1246
1247ack_len_err:
1248 wc.status = IB_WC_LOC_LEN_ERR;
1249ack_err:
1250 wc.wr_id = wqe->wr.wr_id;
1251 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
1252 wc.vendor_err = 0;
1253 wc.byte_len = 0;
1254 wc.imm_data = 0;
1255 wc.qp = &qp->ibqp;
1256 wc.src_qp = qp->remote_qpn;
1257 wc.wc_flags = 0;
1258 wc.pkey_index = 0;
1259 wc.slid = qp->remote_ah_attr.dlid;
1260 wc.sl = qp->remote_ah_attr.sl;
1261 wc.dlid_path_bits = 0;
1262 wc.port_num = 0;
1263 ipath_sqerror_qp(qp, &wc);
1146bail: 1264bail:
1147 return; 1265 return;
1148} 1266}
@@ -1162,7 +1280,7 @@ bail:
1162 * incoming RC packet for the given QP. 1280 * incoming RC packet for the given QP.
1163 * Called at interrupt level. 1281 * Called at interrupt level.
1164 * Return 1 if no more processing is needed; otherwise return 0 to 1282 * Return 1 if no more processing is needed; otherwise return 0 to
1165 * schedule a response to be sent and the s_lock unlocked. 1283 * schedule a response to be sent.
1166 */ 1284 */
1167static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, 1285static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1168 struct ipath_other_headers *ohdr, 1286 struct ipath_other_headers *ohdr,
@@ -1173,25 +1291,23 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1173 int diff, 1291 int diff,
1174 int header_in_data) 1292 int header_in_data)
1175{ 1293{
1176 struct ib_reth *reth; 1294 struct ipath_ack_entry *e;
1295 u8 i, prev;
1296 int old_req;
1177 1297
1178 if (diff > 0) { 1298 if (diff > 0) {
1179 /* 1299 /*
1180 * Packet sequence error. 1300 * Packet sequence error.
1181 * A NAK will ACK earlier sends and RDMA writes. 1301 * A NAK will ACK earlier sends and RDMA writes.
1182 * Don't queue the NAK if a RDMA read, atomic, or 1302 * Don't queue the NAK if we already sent one.
1183 * NAK is pending though.
1184 */ 1303 */
1185 if (qp->s_ack_state != OP(ACKNOWLEDGE) || 1304 if (!qp->r_nak_state) {
1186 qp->r_nak_state != 0)
1187 goto done;
1188 if (qp->r_ack_state < OP(COMPARE_SWAP)) {
1189 qp->r_ack_state = OP(SEND_ONLY);
1190 qp->r_nak_state = IB_NAK_PSN_ERROR; 1305 qp->r_nak_state = IB_NAK_PSN_ERROR;
1191 /* Use the expected PSN. */ 1306 /* Use the expected PSN. */
1192 qp->r_ack_psn = qp->r_psn; 1307 qp->r_ack_psn = qp->r_psn;
1308 goto send_ack;
1193 } 1309 }
1194 goto send_ack; 1310 goto done;
1195 } 1311 }
1196 1312
1197 /* 1313 /*
@@ -1204,8 +1320,46 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1204 * can coalesce an outstanding duplicate ACK. We have to 1320 * can coalesce an outstanding duplicate ACK. We have to
1205 * send the earliest so that RDMA reads can be restarted at 1321 * send the earliest so that RDMA reads can be restarted at
1206 * the requester's expected PSN. 1322 * the requester's expected PSN.
1323 *
1324 * First, find where this duplicate PSN falls within the
1325 * ACKs previously sent.
1207 */ 1326 */
1208 if (opcode == OP(RDMA_READ_REQUEST)) { 1327 psn &= IPATH_PSN_MASK;
1328 e = NULL;
1329 old_req = 1;
1330 spin_lock_irq(&qp->s_lock);
1331 for (i = qp->r_head_ack_queue; ; i = prev) {
1332 if (i == qp->s_tail_ack_queue)
1333 old_req = 0;
1334 if (i)
1335 prev = i - 1;
1336 else
1337 prev = IPATH_MAX_RDMA_ATOMIC;
1338 if (prev == qp->r_head_ack_queue) {
1339 e = NULL;
1340 break;
1341 }
1342 e = &qp->s_ack_queue[prev];
1343 if (!e->opcode) {
1344 e = NULL;
1345 break;
1346 }
1347 if (ipath_cmp24(psn, e->psn) >= 0)
1348 break;
1349 }
1350 switch (opcode) {
1351 case OP(RDMA_READ_REQUEST): {
1352 struct ib_reth *reth;
1353 u32 offset;
1354 u32 len;
1355
1356 /*
1357 * If we didn't find the RDMA read request in the ack queue,
1358 * or the send tasklet is already backed up to send an
1359 * earlier entry, we can ignore this request.
1360 */
1361 if (!e || e->opcode != OP(RDMA_READ_REQUEST) || old_req)
1362 goto unlock_done;
1209 /* RETH comes after BTH */ 1363 /* RETH comes after BTH */
1210 if (!header_in_data) 1364 if (!header_in_data)
1211 reth = &ohdr->u.rc.reth; 1365 reth = &ohdr->u.rc.reth;
@@ -1214,88 +1368,87 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1214 data += sizeof(*reth); 1368 data += sizeof(*reth);
1215 } 1369 }
1216 /* 1370 /*
1217 * If we receive a duplicate RDMA request, it means the 1371 * Address range must be a subset of the original
1218 * requester saw a sequence error and needs to restart 1372 * request and start on pmtu boundaries.
1219 * from an earlier point. We can abort the current 1373 * We reuse the old ack_queue slot since the requester
1220 * RDMA read send in that case. 1374 * should not back up and request an earlier PSN for the
1375 * same request.
1221 */ 1376 */
1222 spin_lock_irq(&qp->s_lock); 1377 offset = ((psn - e->psn) & IPATH_PSN_MASK) *
1223 if (qp->s_ack_state != OP(ACKNOWLEDGE) && 1378 ib_mtu_enum_to_int(qp->path_mtu);
1224 (qp->s_hdrwords || ipath_cmp24(psn, qp->s_ack_psn) >= 0)) { 1379 len = be32_to_cpu(reth->length);
1225 /* 1380 if (unlikely(offset + len > e->rdma_sge.sge.sge_length))
1226 * We are already sending earlier requested data. 1381 goto unlock_done;
1227 * Don't abort it to send later out of sequence data. 1382 if (len != 0) {
1228 */
1229 spin_unlock_irq(&qp->s_lock);
1230 goto done;
1231 }
1232 qp->s_rdma_len = be32_to_cpu(reth->length);
1233 if (qp->s_rdma_len != 0) {
1234 u32 rkey = be32_to_cpu(reth->rkey); 1383 u32 rkey = be32_to_cpu(reth->rkey);
1235 u64 vaddr = be64_to_cpu(reth->vaddr); 1384 u64 vaddr = be64_to_cpu(reth->vaddr);
1236 int ok; 1385 int ok;
1237 1386
1238 /* 1387 ok = ipath_rkey_ok(qp, &e->rdma_sge,
1239 * Address range must be a subset of the original 1388 len, vaddr, rkey,
1240 * request and start on pmtu boundaries.
1241 */
1242 ok = ipath_rkey_ok(qp, &qp->s_rdma_sge,
1243 qp->s_rdma_len, vaddr, rkey,
1244 IB_ACCESS_REMOTE_READ); 1389 IB_ACCESS_REMOTE_READ);
1245 if (unlikely(!ok)) { 1390 if (unlikely(!ok))
1246 spin_unlock_irq(&qp->s_lock); 1391 goto unlock_done;
1247 goto done;
1248 }
1249 } else { 1392 } else {
1250 qp->s_rdma_sge.sg_list = NULL; 1393 e->rdma_sge.sg_list = NULL;
1251 qp->s_rdma_sge.num_sge = 0; 1394 e->rdma_sge.num_sge = 0;
1252 qp->s_rdma_sge.sge.mr = NULL; 1395 e->rdma_sge.sge.mr = NULL;
1253 qp->s_rdma_sge.sge.vaddr = NULL; 1396 e->rdma_sge.sge.vaddr = NULL;
1254 qp->s_rdma_sge.sge.length = 0; 1397 e->rdma_sge.sge.length = 0;
1255 qp->s_rdma_sge.sge.sge_length = 0; 1398 e->rdma_sge.sge.sge_length = 0;
1256 } 1399 }
1257 qp->s_ack_state = opcode; 1400 e->psn = psn;
1258 qp->s_ack_psn = psn; 1401 qp->s_ack_state = OP(ACKNOWLEDGE);
1259 spin_unlock_irq(&qp->s_lock); 1402 qp->s_tail_ack_queue = prev;
1260 tasklet_hi_schedule(&qp->s_task); 1403 break;
1261 goto send_ack;
1262 } 1404 }
1263 1405
1264 /*
1265 * A pending RDMA read will ACK anything before it so
1266 * ignore earlier duplicate requests.
1267 */
1268 if (qp->s_ack_state != OP(ACKNOWLEDGE))
1269 goto done;
1270
1271 /*
1272 * If an ACK is pending, don't replace the pending ACK
1273 * with an earlier one since the later one will ACK the earlier.
1274 * Also, if we already have a pending atomic, send it.
1275 */
1276 if (qp->r_ack_state != OP(ACKNOWLEDGE) &&
1277 (ipath_cmp24(psn, qp->r_ack_psn) <= 0 ||
1278 qp->r_ack_state >= OP(COMPARE_SWAP)))
1279 goto send_ack;
1280 switch (opcode) {
1281 case OP(COMPARE_SWAP): 1406 case OP(COMPARE_SWAP):
1282 case OP(FETCH_ADD): 1407 case OP(FETCH_ADD): {
1283 /* 1408 /*
1284 * Check for the PSN of the last atomic operation 1409 * If we didn't find the atomic request in the ack queue
1285 * performed and resend the result if found. 1410 * or the send tasklet is already backed up to send an
1411 * earlier entry, we can ignore this request.
1286 */ 1412 */
1287 if ((psn & IPATH_PSN_MASK) != qp->r_atomic_psn) 1413 if (!e || e->opcode != (u8) opcode || old_req)
1288 goto done; 1414 goto unlock_done;
1415 qp->s_ack_state = OP(ACKNOWLEDGE);
1416 qp->s_tail_ack_queue = prev;
1417 break;
1418 }
1419
1420 default:
1421 if (old_req)
1422 goto unlock_done;
1423 /*
1424 * Resend the most recent ACK if this request is
1425 * after all the previous RDMA reads and atomics.
1426 */
1427 if (i == qp->r_head_ack_queue) {
1428 spin_unlock_irq(&qp->s_lock);
1429 qp->r_nak_state = 0;
1430 qp->r_ack_psn = qp->r_psn - 1;
1431 goto send_ack;
1432 }
1433 /*
1434 * Resend the RDMA read or atomic op which
1435 * ACKs this duplicate request.
1436 */
1437 qp->s_ack_state = OP(ACKNOWLEDGE);
1438 qp->s_tail_ack_queue = i;
1289 break; 1439 break;
1290 } 1440 }
1291 qp->r_ack_state = opcode;
1292 qp->r_nak_state = 0; 1441 qp->r_nak_state = 0;
1293 qp->r_ack_psn = psn; 1442 spin_unlock_irq(&qp->s_lock);
1294send_ack: 1443 tasklet_hi_schedule(&qp->s_task);
1295 return 0;
1296 1444
1445unlock_done:
1446 spin_unlock_irq(&qp->s_lock);
1297done: 1447done:
1298 return 1; 1448 return 1;
1449
1450send_ack:
1451 return 0;
1299} 1452}
1300 1453
1301static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) 1454static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
@@ -1391,15 +1544,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1391 opcode == OP(SEND_LAST_WITH_IMMEDIATE)) 1544 opcode == OP(SEND_LAST_WITH_IMMEDIATE))
1392 break; 1545 break;
1393 nack_inv: 1546 nack_inv:
1394 /*
1395 * A NAK will ACK earlier sends and RDMA writes.
1396 * Don't queue the NAK if a RDMA read, atomic, or NAK
1397 * is pending though.
1398 */
1399 if (qp->r_ack_state >= OP(COMPARE_SWAP))
1400 goto send_ack;
1401 ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR); 1547 ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR);
1402 qp->r_ack_state = OP(SEND_ONLY);
1403 qp->r_nak_state = IB_NAK_INVALID_REQUEST; 1548 qp->r_nak_state = IB_NAK_INVALID_REQUEST;
1404 qp->r_ack_psn = qp->r_psn; 1549 qp->r_ack_psn = qp->r_psn;
1405 goto send_ack; 1550 goto send_ack;
@@ -1441,9 +1586,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1441 * Don't queue the NAK if a RDMA read or atomic 1586 * Don't queue the NAK if a RDMA read or atomic
1442 * is pending though. 1587 * is pending though.
1443 */ 1588 */
1444 if (qp->r_ack_state >= OP(COMPARE_SWAP)) 1589 if (qp->r_nak_state)
1445 goto send_ack; 1590 goto done;
1446 qp->r_ack_state = OP(SEND_ONLY);
1447 qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; 1591 qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
1448 qp->r_ack_psn = qp->r_psn; 1592 qp->r_ack_psn = qp->r_psn;
1449 goto send_ack; 1593 goto send_ack;
@@ -1567,7 +1711,19 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1567 goto rnr_nak; 1711 goto rnr_nak;
1568 goto send_last_imm; 1712 goto send_last_imm;
1569 1713
1570 case OP(RDMA_READ_REQUEST): 1714 case OP(RDMA_READ_REQUEST): {
1715 struct ipath_ack_entry *e;
1716 u32 len;
1717 u8 next;
1718
1719 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
1720 goto nack_acc;
1721 next = qp->r_head_ack_queue + 1;
1722 if (next > IPATH_MAX_RDMA_ATOMIC)
1723 next = 0;
1724 if (unlikely(next == qp->s_tail_ack_queue))
1725 goto nack_inv;
1726 e = &qp->s_ack_queue[qp->r_head_ack_queue];
1571 /* RETH comes after BTH */ 1727 /* RETH comes after BTH */
1572 if (!header_in_data) 1728 if (!header_in_data)
1573 reth = &ohdr->u.rc.reth; 1729 reth = &ohdr->u.rc.reth;
@@ -1575,72 +1731,75 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1575 reth = (struct ib_reth *)data; 1731 reth = (struct ib_reth *)data;
1576 data += sizeof(*reth); 1732 data += sizeof(*reth);
1577 } 1733 }
1578 if (unlikely(!(qp->qp_access_flags & 1734 len = be32_to_cpu(reth->length);
1579 IB_ACCESS_REMOTE_READ))) 1735 if (len) {
1580 goto nack_acc;
1581 spin_lock_irq(&qp->s_lock);
1582 qp->s_rdma_len = be32_to_cpu(reth->length);
1583 if (qp->s_rdma_len != 0) {
1584 u32 rkey = be32_to_cpu(reth->rkey); 1736 u32 rkey = be32_to_cpu(reth->rkey);
1585 u64 vaddr = be64_to_cpu(reth->vaddr); 1737 u64 vaddr = be64_to_cpu(reth->vaddr);
1586 int ok; 1738 int ok;
1587 1739
1588 /* Check rkey & NAK */ 1740 /* Check rkey & NAK */
1589 ok = ipath_rkey_ok(qp, &qp->s_rdma_sge, 1741 ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr,
1590 qp->s_rdma_len, vaddr, rkey, 1742 rkey, IB_ACCESS_REMOTE_READ);
1591 IB_ACCESS_REMOTE_READ); 1743 if (unlikely(!ok))
1592 if (unlikely(!ok)) {
1593 spin_unlock_irq(&qp->s_lock);
1594 goto nack_acc; 1744 goto nack_acc;
1595 }
1596 /* 1745 /*
1597 * Update the next expected PSN. We add 1 later 1746 * Update the next expected PSN. We add 1 later
1598 * below, so only add the remainder here. 1747 * below, so only add the remainder here.
1599 */ 1748 */
1600 if (qp->s_rdma_len > pmtu) 1749 if (len > pmtu)
1601 qp->r_psn += (qp->s_rdma_len - 1) / pmtu; 1750 qp->r_psn += (len - 1) / pmtu;
1602 } else { 1751 } else {
1603 qp->s_rdma_sge.sg_list = NULL; 1752 e->rdma_sge.sg_list = NULL;
1604 qp->s_rdma_sge.num_sge = 0; 1753 e->rdma_sge.num_sge = 0;
1605 qp->s_rdma_sge.sge.mr = NULL; 1754 e->rdma_sge.sge.mr = NULL;
1606 qp->s_rdma_sge.sge.vaddr = NULL; 1755 e->rdma_sge.sge.vaddr = NULL;
1607 qp->s_rdma_sge.sge.length = 0; 1756 e->rdma_sge.sge.length = 0;
1608 qp->s_rdma_sge.sge.sge_length = 0; 1757 e->rdma_sge.sge.sge_length = 0;
1609 } 1758 }
1759 e->opcode = opcode;
1760 e->psn = psn;
1610 /* 1761 /*
1611 * We need to increment the MSN here instead of when we 1762 * We need to increment the MSN here instead of when we
1612 * finish sending the result since a duplicate request would 1763 * finish sending the result since a duplicate request would
1613 * increment it more than once. 1764 * increment it more than once.
1614 */ 1765 */
1615 qp->r_msn++; 1766 qp->r_msn++;
1616
1617 qp->s_ack_state = opcode;
1618 qp->s_ack_psn = psn;
1619 spin_unlock_irq(&qp->s_lock);
1620
1621 qp->r_psn++; 1767 qp->r_psn++;
1622 qp->r_state = opcode; 1768 qp->r_state = opcode;
1623 qp->r_nak_state = 0; 1769 qp->r_nak_state = 0;
1770 barrier();
1771 qp->r_head_ack_queue = next;
1624 1772
1625 /* Call ipath_do_rc_send() in another thread. */ 1773 /* Call ipath_do_rc_send() in another thread. */
1626 tasklet_hi_schedule(&qp->s_task); 1774 tasklet_hi_schedule(&qp->s_task);
1627 1775
1628 goto done; 1776 goto done;
1777 }
1629 1778
1630 case OP(COMPARE_SWAP): 1779 case OP(COMPARE_SWAP):
1631 case OP(FETCH_ADD): { 1780 case OP(FETCH_ADD): {
1632 struct ib_atomic_eth *ateth; 1781 struct ib_atomic_eth *ateth;
1782 struct ipath_ack_entry *e;
1633 u64 vaddr; 1783 u64 vaddr;
1784 atomic64_t *maddr;
1634 u64 sdata; 1785 u64 sdata;
1635 u32 rkey; 1786 u32 rkey;
1787 u8 next;
1636 1788
1789 if (unlikely(!(qp->qp_access_flags &
1790 IB_ACCESS_REMOTE_ATOMIC)))
1791 goto nack_acc;
1792 next = qp->r_head_ack_queue + 1;
1793 if (next > IPATH_MAX_RDMA_ATOMIC)
1794 next = 0;
1795 if (unlikely(next == qp->s_tail_ack_queue))
1796 goto nack_inv;
1637 if (!header_in_data) 1797 if (!header_in_data)
1638 ateth = &ohdr->u.atomic_eth; 1798 ateth = &ohdr->u.atomic_eth;
1639 else { 1799 else
1640 ateth = (struct ib_atomic_eth *)data; 1800 ateth = (struct ib_atomic_eth *)data;
1641 data += sizeof(*ateth); 1801 vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
1642 } 1802 be32_to_cpu(ateth->vaddr[1]);
1643 vaddr = be64_to_cpu(ateth->vaddr);
1644 if (unlikely(vaddr & (sizeof(u64) - 1))) 1803 if (unlikely(vaddr & (sizeof(u64) - 1)))
1645 goto nack_inv; 1804 goto nack_inv;
1646 rkey = be32_to_cpu(ateth->rkey); 1805 rkey = be32_to_cpu(ateth->rkey);
@@ -1649,63 +1808,50 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1649 sizeof(u64), vaddr, rkey, 1808 sizeof(u64), vaddr, rkey,
1650 IB_ACCESS_REMOTE_ATOMIC))) 1809 IB_ACCESS_REMOTE_ATOMIC)))
1651 goto nack_acc; 1810 goto nack_acc;
1652 if (unlikely(!(qp->qp_access_flags &
1653 IB_ACCESS_REMOTE_ATOMIC)))
1654 goto nack_acc;
1655 /* Perform atomic OP and save result. */ 1811 /* Perform atomic OP and save result. */
1812 maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
1656 sdata = be64_to_cpu(ateth->swap_data); 1813 sdata = be64_to_cpu(ateth->swap_data);
1657 spin_lock_irq(&dev->pending_lock); 1814 e = &qp->s_ack_queue[qp->r_head_ack_queue];
1658 qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr; 1815 e->atomic_data = (opcode == OP(FETCH_ADD)) ?
1659 if (opcode == OP(FETCH_ADD)) 1816 (u64) atomic64_add_return(sdata, maddr) - sdata :
1660 *(u64 *) qp->r_sge.sge.vaddr = 1817 (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
1661 qp->r_atomic_data + sdata; 1818 be64_to_cpu(ateth->compare_data),
1662 else if (qp->r_atomic_data == 1819 sdata);
1663 be64_to_cpu(ateth->compare_data)) 1820 e->opcode = opcode;
1664 *(u64 *) qp->r_sge.sge.vaddr = sdata; 1821 e->psn = psn & IPATH_PSN_MASK;
1665 spin_unlock_irq(&dev->pending_lock);
1666 qp->r_msn++; 1822 qp->r_msn++;
1667 qp->r_atomic_psn = psn & IPATH_PSN_MASK; 1823 qp->r_psn++;
1668 psn |= 1 << 31; 1824 qp->r_state = opcode;
1669 break; 1825 qp->r_nak_state = 0;
1826 barrier();
1827 qp->r_head_ack_queue = next;
1828
1829 /* Call ipath_do_rc_send() in another thread. */
1830 tasklet_hi_schedule(&qp->s_task);
1831
1832 goto done;
1670 } 1833 }
1671 1834
1672 default: 1835 default:
1673 /* Drop packet for unknown opcodes. */ 1836 /* NAK unknown opcodes. */
1674 goto done; 1837 goto nack_inv;
1675 } 1838 }
1676 qp->r_psn++; 1839 qp->r_psn++;
1677 qp->r_state = opcode; 1840 qp->r_state = opcode;
1841 qp->r_ack_psn = psn;
1678 qp->r_nak_state = 0; 1842 qp->r_nak_state = 0;
1679 /* Send an ACK if requested or required. */ 1843 /* Send an ACK if requested or required. */
1680 if (psn & (1 << 31)) { 1844 if (psn & (1 << 31))
1681 /*
1682 * Coalesce ACKs unless there is a RDMA READ or
1683 * ATOMIC pending.
1684 */
1685 if (qp->r_ack_state < OP(COMPARE_SWAP)) {
1686 qp->r_ack_state = opcode;
1687 qp->r_ack_psn = psn;
1688 }
1689 goto send_ack; 1845 goto send_ack;
1690 }
1691 goto done; 1846 goto done;
1692 1847
1693nack_acc: 1848nack_acc:
1694 /* 1849 ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR);
1695 * A NAK will ACK earlier sends and RDMA writes. 1850 qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
1696 * Don't queue the NAK if a RDMA read, atomic, or NAK 1851 qp->r_ack_psn = qp->r_psn;
1697 * is pending though. 1852
1698 */
1699 if (qp->r_ack_state < OP(COMPARE_SWAP)) {
1700 ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR);
1701 qp->r_ack_state = OP(RDMA_WRITE_ONLY);
1702 qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
1703 qp->r_ack_psn = qp->r_psn;
1704 }
1705send_ack: 1853send_ack:
1706 /* Send ACK right away unless the send tasklet has a pending ACK. */ 1854 send_rc_ack(qp);
1707 if (qp->s_ack_state == OP(ACKNOWLEDGE))
1708 send_rc_ack(qp);
1709 1855
1710done: 1856done:
1711 return; 1857 return;
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index dffc76016d3c..c182bcd62098 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -126,9 +126,18 @@
126#define INFINIPATH_E_RESET 0x0004000000000000ULL 126#define INFINIPATH_E_RESET 0x0004000000000000ULL
127#define INFINIPATH_E_HARDWARE 0x0008000000000000ULL 127#define INFINIPATH_E_HARDWARE 0x0008000000000000ULL
128 128
129/*
130 * this is used to print "common" packet errors only when the
131 * __IPATH_ERRPKTDBG bit is set in ipath_debug.
132 */
133#define INFINIPATH_E_PKTERRS ( INFINIPATH_E_SPKTLEN \
134 | INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_RVCRC \
135 | INFINIPATH_E_RICRC | INFINIPATH_E_RSHORTPKTLEN \
136 | INFINIPATH_E_REBP )
137
129/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */ 138/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
130/* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo 139/* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo
131 * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: eagerTID, 3: expTID 140 * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: expTID, 3: eagerTID
132 * bit 4: flag buffer, 5: datainfo, 6: header info */ 141 * bit 4: flag buffer, 5: datainfo, 6: header info */
133#define INFINIPATH_HWE_TXEMEMPARITYERR_MASK 0xFULL 142#define INFINIPATH_HWE_TXEMEMPARITYERR_MASK 0xFULL
134#define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40 143#define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40
@@ -143,8 +152,8 @@
143/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */ 152/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */
144#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF 0x01ULL 153#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF 0x01ULL
145#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ 0x02ULL 154#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ 0x02ULL
146#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x04ULL 155#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID 0x04ULL
147#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID 0x08ULL 156#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x08ULL
148#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF 0x10ULL 157#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF 0x10ULL
149#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL 158#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL
150#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO 0x40ULL 159#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO 0x40ULL
@@ -299,13 +308,6 @@
299#define INFINIPATH_XGXS_RX_POL_SHIFT 19 308#define INFINIPATH_XGXS_RX_POL_SHIFT 19
300#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL 309#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL
301 310
302#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */
303
304/* TID entries (memory), HT-only */
305#define INFINIPATH_RT_VALID 0x8000000000000000ULL
306#define INFINIPATH_RT_ADDR_SHIFT 0
307#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFF
308#define INFINIPATH_RT_BUFSIZE_SHIFT 48
309 311
310/* 312/*
311 * IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our 313 * IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index e86cb171872e..d9c2a9b15d86 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -202,6 +202,7 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
202 wq->tail = tail; 202 wq->tail = tail;
203 203
204 ret = 1; 204 ret = 1;
205 qp->r_wrid_valid = 1;
205 if (handler) { 206 if (handler) {
206 u32 n; 207 u32 n;
207 208
@@ -229,7 +230,6 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
229 } 230 }
230 } 231 }
231 spin_unlock_irqrestore(&rq->lock, flags); 232 spin_unlock_irqrestore(&rq->lock, flags);
232 qp->r_wrid_valid = 1;
233 233
234bail: 234bail:
235 return ret; 235 return ret;
@@ -255,6 +255,7 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
255 unsigned long flags; 255 unsigned long flags;
256 struct ib_wc wc; 256 struct ib_wc wc;
257 u64 sdata; 257 u64 sdata;
258 atomic64_t *maddr;
258 259
259 qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn); 260 qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
260 if (!qp) { 261 if (!qp) {
@@ -265,7 +266,8 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
265again: 266again:
266 spin_lock_irqsave(&sqp->s_lock, flags); 267 spin_lock_irqsave(&sqp->s_lock, flags);
267 268
268 if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK)) { 269 if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) ||
270 qp->s_rnr_timeout) {
269 spin_unlock_irqrestore(&sqp->s_lock, flags); 271 spin_unlock_irqrestore(&sqp->s_lock, flags);
270 goto done; 272 goto done;
271 } 273 }
@@ -310,7 +312,7 @@ again:
310 sqp->s_rnr_retry--; 312 sqp->s_rnr_retry--;
311 dev->n_rnr_naks++; 313 dev->n_rnr_naks++;
312 sqp->s_rnr_timeout = 314 sqp->s_rnr_timeout =
313 ib_ipath_rnr_table[sqp->r_min_rnr_timer]; 315 ib_ipath_rnr_table[qp->r_min_rnr_timer];
314 ipath_insert_rnr_queue(sqp); 316 ipath_insert_rnr_queue(sqp);
315 goto done; 317 goto done;
316 } 318 }
@@ -343,20 +345,22 @@ again:
343 wc.sl = sqp->remote_ah_attr.sl; 345 wc.sl = sqp->remote_ah_attr.sl;
344 wc.dlid_path_bits = 0; 346 wc.dlid_path_bits = 0;
345 wc.port_num = 0; 347 wc.port_num = 0;
348 spin_lock_irqsave(&sqp->s_lock, flags);
346 ipath_sqerror_qp(sqp, &wc); 349 ipath_sqerror_qp(sqp, &wc);
350 spin_unlock_irqrestore(&sqp->s_lock, flags);
347 goto done; 351 goto done;
348 } 352 }
349 break; 353 break;
350 354
351 case IB_WR_RDMA_READ: 355 case IB_WR_RDMA_READ:
356 if (unlikely(!(qp->qp_access_flags &
357 IB_ACCESS_REMOTE_READ)))
358 goto acc_err;
352 if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length, 359 if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
353 wqe->wr.wr.rdma.remote_addr, 360 wqe->wr.wr.rdma.remote_addr,
354 wqe->wr.wr.rdma.rkey, 361 wqe->wr.wr.rdma.rkey,
355 IB_ACCESS_REMOTE_READ))) 362 IB_ACCESS_REMOTE_READ)))
356 goto acc_err; 363 goto acc_err;
357 if (unlikely(!(qp->qp_access_flags &
358 IB_ACCESS_REMOTE_READ)))
359 goto acc_err;
360 qp->r_sge.sge = wqe->sg_list[0]; 364 qp->r_sge.sge = wqe->sg_list[0];
361 qp->r_sge.sg_list = wqe->sg_list + 1; 365 qp->r_sge.sg_list = wqe->sg_list + 1;
362 qp->r_sge.num_sge = wqe->wr.num_sge; 366 qp->r_sge.num_sge = wqe->wr.num_sge;
@@ -364,22 +368,22 @@ again:
364 368
365 case IB_WR_ATOMIC_CMP_AND_SWP: 369 case IB_WR_ATOMIC_CMP_AND_SWP:
366 case IB_WR_ATOMIC_FETCH_AND_ADD: 370 case IB_WR_ATOMIC_FETCH_AND_ADD:
371 if (unlikely(!(qp->qp_access_flags &
372 IB_ACCESS_REMOTE_ATOMIC)))
373 goto acc_err;
367 if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), 374 if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
368 wqe->wr.wr.rdma.remote_addr, 375 wqe->wr.wr.atomic.remote_addr,
369 wqe->wr.wr.rdma.rkey, 376 wqe->wr.wr.atomic.rkey,
370 IB_ACCESS_REMOTE_ATOMIC))) 377 IB_ACCESS_REMOTE_ATOMIC)))
371 goto acc_err; 378 goto acc_err;
372 /* Perform atomic OP and save result. */ 379 /* Perform atomic OP and save result. */
373 sdata = wqe->wr.wr.atomic.swap; 380 maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
374 spin_lock_irqsave(&dev->pending_lock, flags); 381 sdata = wqe->wr.wr.atomic.compare_add;
375 qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr; 382 *(u64 *) sqp->s_sge.sge.vaddr =
376 if (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) 383 (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
377 *(u64 *) qp->r_sge.sge.vaddr = 384 (u64) atomic64_add_return(sdata, maddr) - sdata :
378 qp->r_atomic_data + sdata; 385 (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
379 else if (qp->r_atomic_data == wqe->wr.wr.atomic.compare_add) 386 sdata, wqe->wr.wr.atomic.swap);
380 *(u64 *) qp->r_sge.sge.vaddr = sdata;
381 spin_unlock_irqrestore(&dev->pending_lock, flags);
382 *(u64 *) sqp->s_sge.sge.vaddr = qp->r_atomic_data;
383 goto send_comp; 387 goto send_comp;
384 388
385 default: 389 default:
@@ -440,7 +444,7 @@ again:
440send_comp: 444send_comp:
441 sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; 445 sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
442 446
443 if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &sqp->s_flags) || 447 if (!(sqp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
444 (wqe->wr.send_flags & IB_SEND_SIGNALED)) { 448 (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
445 wc.wr_id = wqe->wr.wr_id; 449 wc.wr_id = wqe->wr.wr_id;
446 wc.status = IB_WC_SUCCESS; 450 wc.status = IB_WC_SUCCESS;
@@ -502,7 +506,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
502 * We clear the tasklet flag now since we are committing to return 506 * We clear the tasklet flag now since we are committing to return
503 * from the tasklet function. 507 * from the tasklet function.
504 */ 508 */
505 clear_bit(IPATH_S_BUSY, &qp->s_flags); 509 clear_bit(IPATH_S_BUSY, &qp->s_busy);
506 tasklet_unlock(&qp->s_task); 510 tasklet_unlock(&qp->s_task);
507 want_buffer(dev->dd); 511 want_buffer(dev->dd);
508 dev->n_piowait++; 512 dev->n_piowait++;
@@ -541,6 +545,9 @@ int ipath_post_ruc_send(struct ipath_qp *qp, struct ib_send_wr *wr)
541 wr->sg_list[0].addr & (sizeof(u64) - 1))) { 545 wr->sg_list[0].addr & (sizeof(u64) - 1))) {
542 ret = -EINVAL; 546 ret = -EINVAL;
543 goto bail; 547 goto bail;
548 } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) {
549 ret = -EINVAL;
550 goto bail;
544 } 551 }
545 /* IB spec says that num_sge == 0 is OK. */ 552 /* IB spec says that num_sge == 0 is OK. */
546 if (wr->num_sge > qp->s_max_sge) { 553 if (wr->num_sge > qp->s_max_sge) {
@@ -647,7 +654,7 @@ void ipath_do_ruc_send(unsigned long data)
647 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); 654 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
648 struct ipath_other_headers *ohdr; 655 struct ipath_other_headers *ohdr;
649 656
650 if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags)) 657 if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy))
651 goto bail; 658 goto bail;
652 659
653 if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) { 660 if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) {
@@ -683,19 +690,15 @@ again:
683 */ 690 */
684 spin_lock_irqsave(&qp->s_lock, flags); 691 spin_lock_irqsave(&qp->s_lock, flags);
685 692
686 /* Sending responses has higher priority over sending requests. */ 693 if (!((qp->ibqp.qp_type == IB_QPT_RC) ?
687 if (qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE && 694 ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) :
688 (bth0 = ipath_make_rc_ack(qp, ohdr, pmtu)) != 0) 695 ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) {
689 bth2 = qp->s_ack_psn++ & IPATH_PSN_MASK;
690 else if (!((qp->ibqp.qp_type == IB_QPT_RC) ?
691 ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) :
692 ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) {
693 /* 696 /*
694 * Clear the busy bit before unlocking to avoid races with 697 * Clear the busy bit before unlocking to avoid races with
695 * adding new work queue items and then failing to process 698 * adding new work queue items and then failing to process
696 * them. 699 * them.
697 */ 700 */
698 clear_bit(IPATH_S_BUSY, &qp->s_flags); 701 clear_bit(IPATH_S_BUSY, &qp->s_busy);
699 spin_unlock_irqrestore(&qp->s_lock, flags); 702 spin_unlock_irqrestore(&qp->s_lock, flags);
700 goto bail; 703 goto bail;
701 } 704 }
@@ -728,7 +731,7 @@ again:
728 goto again; 731 goto again;
729 732
730clear: 733clear:
731 clear_bit(IPATH_S_BUSY, &qp->s_flags); 734 clear_bit(IPATH_S_BUSY, &qp->s_busy);
732bail: 735bail:
733 return; 736 return;
734} 737}
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
index 30a825928fcf..9307f7187ca5 100644
--- a/drivers/infiniband/hw/ipath/ipath_stats.c
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -207,7 +207,7 @@ void ipath_get_faststats(unsigned long opaque)
207 * don't access the chip while running diags, or memory diags can 207 * don't access the chip while running diags, or memory diags can
208 * fail 208 * fail
209 */ 209 */
210 if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT) || 210 if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_INITTED) ||
211 ipath_diag_inuse) 211 ipath_diag_inuse)
212 /* but re-arm the timer, for diags case; won't hurt other */ 212 /* but re-arm the timer, for diags case; won't hurt other */
213 goto done; 213 goto done;
@@ -237,11 +237,13 @@ void ipath_get_faststats(unsigned long opaque)
237 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) 237 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs)
238 && time_after(jiffies, dd->ipath_unmasktime)) { 238 && time_after(jiffies, dd->ipath_unmasktime)) {
239 char ebuf[256]; 239 char ebuf[256];
240 ipath_decode_err(ebuf, sizeof ebuf, 240 int iserr;
241 iserr = ipath_decode_err(ebuf, sizeof ebuf,
241 (dd->ipath_maskederrs & ~dd-> 242 (dd->ipath_maskederrs & ~dd->
242 ipath_ignorederrs)); 243 ipath_ignorederrs));
243 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) & 244 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
244 ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL)) 245 ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
246 INFINIPATH_E_PKTERRS ))
245 ipath_dev_err(dd, "Re-enabling masked errors " 247 ipath_dev_err(dd, "Re-enabling masked errors "
246 "(%s)\n", ebuf); 248 "(%s)\n", ebuf);
247 else { 249 else {
@@ -252,8 +254,12 @@ void ipath_get_faststats(unsigned long opaque)
252 * them. So only complain about these at debug 254 * them. So only complain about these at debug
253 * level. 255 * level.
254 */ 256 */
255 ipath_dbg("Disabling frequent queue full errors " 257 if (iserr)
256 "(%s)\n", ebuf); 258 ipath_dbg("Re-enabling queue full errors (%s)\n",
259 ebuf);
260 else
261 ipath_cdbg(ERRPKT, "Re-enabling packet"
262 " problem interrupt (%s)\n", ebuf);
257 } 263 }
258 dd->ipath_maskederrs = dd->ipath_ignorederrs; 264 dd->ipath_maskederrs = dd->ipath_ignorederrs;
259 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 265 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index 325d6634ff53..1c2b03c2ef5e 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -42,7 +42,7 @@ static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe,
42{ 42{
43 if (++qp->s_last == qp->s_size) 43 if (++qp->s_last == qp->s_size)
44 qp->s_last = 0; 44 qp->s_last = 0;
45 if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) || 45 if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
46 (wqe->wr.send_flags & IB_SEND_SIGNALED)) { 46 (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
47 wc->wr_id = wqe->wr.wr_id; 47 wc->wr_id = wqe->wr.wr_id;
48 wc->status = IB_WC_SUCCESS; 48 wc->status = IB_WC_SUCCESS;
@@ -344,13 +344,13 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
344 send_first: 344 send_first:
345 if (qp->r_reuse_sge) { 345 if (qp->r_reuse_sge) {
346 qp->r_reuse_sge = 0; 346 qp->r_reuse_sge = 0;
347 qp->r_sge = qp->s_rdma_sge; 347 qp->r_sge = qp->s_rdma_read_sge;
348 } else if (!ipath_get_rwqe(qp, 0)) { 348 } else if (!ipath_get_rwqe(qp, 0)) {
349 dev->n_pkt_drops++; 349 dev->n_pkt_drops++;
350 goto done; 350 goto done;
351 } 351 }
352 /* Save the WQE so we can reuse it in case of an error. */ 352 /* Save the WQE so we can reuse it in case of an error. */
353 qp->s_rdma_sge = qp->r_sge; 353 qp->s_rdma_read_sge = qp->r_sge;
354 qp->r_rcv_len = 0; 354 qp->r_rcv_len = 0;
355 if (opcode == OP(SEND_ONLY)) 355 if (opcode == OP(SEND_ONLY))
356 goto send_last; 356 goto send_last;
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 9a3e54664ee4..a518f7c8fa83 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -308,6 +308,11 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
308 goto bail; 308 goto bail;
309 } 309 }
310 310
311 if (wr->wr.ud.ah->pd != qp->ibqp.pd) {
312 ret = -EPERM;
313 goto bail;
314 }
315
311 /* IB spec says that num_sge == 0 is OK. */ 316 /* IB spec says that num_sge == 0 is OK. */
312 if (wr->num_sge > qp->s_max_sge) { 317 if (wr->num_sge > qp->s_max_sge) {
313 ret = -EINVAL; 318 ret = -EINVAL;
@@ -467,7 +472,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
467 472
468done: 473done:
469 /* Queue the completion status entry. */ 474 /* Queue the completion status entry. */
470 if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) || 475 if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
471 (wr->send_flags & IB_SEND_SIGNALED)) { 476 (wr->send_flags & IB_SEND_SIGNALED)) {
472 wc.wr_id = wr->wr_id; 477 wc.wr_id = wr->wr_id;
473 wc.status = IB_WC_SUCCESS; 478 wc.status = IB_WC_SUCCESS;
@@ -647,6 +652,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
647 ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh)); 652 ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
648 ipath_copy_sge(&qp->r_sge, data, 653 ipath_copy_sge(&qp->r_sge, data,
649 wc.byte_len - sizeof(struct ib_grh)); 654 wc.byte_len - sizeof(struct ib_grh));
655 qp->r_wrid_valid = 0;
650 wc.wr_id = qp->r_wr_id; 656 wc.wr_id = qp->r_wr_id;
651 wc.status = IB_WC_SUCCESS; 657 wc.status = IB_WC_SUCCESS;
652 wc.opcode = IB_WC_RECV; 658 wc.opcode = IB_WC_RECV;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 2aaacdb7e52a..18c6df2052c2 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -438,6 +438,10 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
438 struct ipath_mcast *mcast; 438 struct ipath_mcast *mcast;
439 struct ipath_mcast_qp *p; 439 struct ipath_mcast_qp *p;
440 440
441 if (lnh != IPATH_LRH_GRH) {
442 dev->n_pkt_drops++;
443 goto bail;
444 }
441 mcast = ipath_mcast_find(&hdr->u.l.grh.dgid); 445 mcast = ipath_mcast_find(&hdr->u.l.grh.dgid);
442 if (mcast == NULL) { 446 if (mcast == NULL) {
443 dev->n_pkt_drops++; 447 dev->n_pkt_drops++;
@@ -445,8 +449,7 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
445 } 449 }
446 dev->n_multicast_rcv++; 450 dev->n_multicast_rcv++;
447 list_for_each_entry_rcu(p, &mcast->qp_list, list) 451 list_for_each_entry_rcu(p, &mcast->qp_list, list)
448 ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data, 452 ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp);
449 tlen, p->qp);
450 /* 453 /*
451 * Notify ipath_multicast_detach() if it is waiting for us 454 * Notify ipath_multicast_detach() if it is waiting for us
452 * to finish. 455 * to finish.
@@ -773,7 +776,6 @@ int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
773 /* +1 is for the qword padding of pbc */ 776 /* +1 is for the qword padding of pbc */
774 plen = hdrwords + ((len + 3) >> 2) + 1; 777 plen = hdrwords + ((len + 3) >> 2) + 1;
775 if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) { 778 if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) {
776 ipath_dbg("packet len 0x%x too long, failing\n", plen);
777 ret = -EINVAL; 779 ret = -EINVAL;
778 goto bail; 780 goto bail;
779 } 781 }
@@ -980,14 +982,14 @@ static int ipath_query_device(struct ib_device *ibdev,
980 props->max_cqe = ib_ipath_max_cqes; 982 props->max_cqe = ib_ipath_max_cqes;
981 props->max_mr = dev->lk_table.max; 983 props->max_mr = dev->lk_table.max;
982 props->max_pd = ib_ipath_max_pds; 984 props->max_pd = ib_ipath_max_pds;
983 props->max_qp_rd_atom = 1; 985 props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC;
984 props->max_qp_init_rd_atom = 1; 986 props->max_qp_init_rd_atom = 255;
985 /* props->max_res_rd_atom */ 987 /* props->max_res_rd_atom */
986 props->max_srq = ib_ipath_max_srqs; 988 props->max_srq = ib_ipath_max_srqs;
987 props->max_srq_wr = ib_ipath_max_srq_wrs; 989 props->max_srq_wr = ib_ipath_max_srq_wrs;
988 props->max_srq_sge = ib_ipath_max_srq_sges; 990 props->max_srq_sge = ib_ipath_max_srq_sges;
989 /* props->local_ca_ack_delay */ 991 /* props->local_ca_ack_delay */
990 props->atomic_cap = IB_ATOMIC_HCA; 992 props->atomic_cap = IB_ATOMIC_GLOB;
991 props->max_pkeys = ipath_get_npkeys(dev->dd); 993 props->max_pkeys = ipath_get_npkeys(dev->dd);
992 props->max_mcast_grp = ib_ipath_max_mcast_grps; 994 props->max_mcast_grp = ib_ipath_max_mcast_grps;
993 props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached; 995 props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
@@ -1557,7 +1559,6 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
1557 dev->node_type = RDMA_NODE_IB_CA; 1559 dev->node_type = RDMA_NODE_IB_CA;
1558 dev->phys_port_cnt = 1; 1560 dev->phys_port_cnt = 1;
1559 dev->dma_device = &dd->pcidev->dev; 1561 dev->dma_device = &dd->pcidev->dev;
1560 dev->class_dev.dev = dev->dma_device;
1561 dev->query_device = ipath_query_device; 1562 dev->query_device = ipath_query_device;
1562 dev->modify_device = ipath_modify_device; 1563 dev->modify_device = ipath_modify_device;
1563 dev->query_port = ipath_query_port; 1564 dev->query_port = ipath_query_port;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index c0c8d5b24a7d..7c4929f1cb5b 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -40,9 +40,12 @@
40#include <linux/interrupt.h> 40#include <linux/interrupt.h>
41#include <linux/kref.h> 41#include <linux/kref.h>
42#include <rdma/ib_pack.h> 42#include <rdma/ib_pack.h>
43#include <rdma/ib_user_verbs.h>
43 44
44#include "ipath_layer.h" 45#include "ipath_layer.h"
45 46
47#define IPATH_MAX_RDMA_ATOMIC 4
48
46#define QPN_MAX (1 << 24) 49#define QPN_MAX (1 << 24)
47#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) 50#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
48 51
@@ -89,7 +92,7 @@ struct ib_reth {
89} __attribute__ ((packed)); 92} __attribute__ ((packed));
90 93
91struct ib_atomic_eth { 94struct ib_atomic_eth {
92 __be64 vaddr; 95 __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */
93 __be32 rkey; 96 __be32 rkey;
94 __be64 swap_data; 97 __be64 swap_data;
95 __be64 compare_data; 98 __be64 compare_data;
@@ -108,7 +111,7 @@ struct ipath_other_headers {
108 } rc; 111 } rc;
109 struct { 112 struct {
110 __be32 aeth; 113 __be32 aeth;
111 __be64 atomic_ack_eth; 114 __be32 atomic_ack_eth[2];
112 } at; 115 } at;
113 __be32 imm_data; 116 __be32 imm_data;
114 __be32 aeth; 117 __be32 aeth;
@@ -186,7 +189,7 @@ struct ipath_mmap_info {
186struct ipath_cq_wc { 189struct ipath_cq_wc {
187 u32 head; /* index of next entry to fill */ 190 u32 head; /* index of next entry to fill */
188 u32 tail; /* index of next ib_poll_cq() entry */ 191 u32 tail; /* index of next ib_poll_cq() entry */
189 struct ib_wc queue[1]; /* this is actually size ibcq.cqe + 1 */ 192 struct ib_uverbs_wc queue[1]; /* this is actually size ibcq.cqe + 1 */
190}; 193};
191 194
192/* 195/*
@@ -312,6 +315,19 @@ struct ipath_sge_state {
312}; 315};
313 316
314/* 317/*
318 * This structure holds the information that the send tasklet needs
319 * to send a RDMA read response or atomic operation.
320 */
321struct ipath_ack_entry {
322 u8 opcode;
323 u32 psn;
324 union {
325 struct ipath_sge_state rdma_sge;
326 u64 atomic_data;
327 };
328};
329
330/*
315 * Variables prefixed with s_ are for the requester (sender). 331 * Variables prefixed with s_ are for the requester (sender).
316 * Variables prefixed with r_ are for the responder (receiver). 332 * Variables prefixed with r_ are for the responder (receiver).
317 * Variables prefixed with ack_ are for responder replies. 333 * Variables prefixed with ack_ are for responder replies.
@@ -333,24 +349,24 @@ struct ipath_qp {
333 struct ipath_mmap_info *ip; 349 struct ipath_mmap_info *ip;
334 struct ipath_sge_state *s_cur_sge; 350 struct ipath_sge_state *s_cur_sge;
335 struct ipath_sge_state s_sge; /* current send request data */ 351 struct ipath_sge_state s_sge; /* current send request data */
336 /* current RDMA read send data */ 352 struct ipath_ack_entry s_ack_queue[IPATH_MAX_RDMA_ATOMIC + 1];
337 struct ipath_sge_state s_rdma_sge; 353 struct ipath_sge_state s_ack_rdma_sge;
354 struct ipath_sge_state s_rdma_read_sge;
338 struct ipath_sge_state r_sge; /* current receive data */ 355 struct ipath_sge_state r_sge; /* current receive data */
339 spinlock_t s_lock; 356 spinlock_t s_lock;
340 unsigned long s_flags; 357 unsigned long s_busy;
341 u32 s_hdrwords; /* size of s_hdr in 32 bit words */ 358 u32 s_hdrwords; /* size of s_hdr in 32 bit words */
342 u32 s_cur_size; /* size of send packet in bytes */ 359 u32 s_cur_size; /* size of send packet in bytes */
343 u32 s_len; /* total length of s_sge */ 360 u32 s_len; /* total length of s_sge */
344 u32 s_rdma_len; /* total length of s_rdma_sge */ 361 u32 s_rdma_read_len; /* total length of s_rdma_read_sge */
345 u32 s_next_psn; /* PSN for next request */ 362 u32 s_next_psn; /* PSN for next request */
346 u32 s_last_psn; /* last response PSN processed */ 363 u32 s_last_psn; /* last response PSN processed */
347 u32 s_psn; /* current packet sequence number */ 364 u32 s_psn; /* current packet sequence number */
348 u32 s_ack_psn; /* PSN for RDMA_READ */ 365 u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */
366 u32 s_ack_psn; /* PSN for acking sends and RDMA writes */
349 u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ 367 u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
350 u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ 368 u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
351 u64 r_wr_id; /* ID for current receive WQE */ 369 u64 r_wr_id; /* ID for current receive WQE */
352 u64 r_atomic_data; /* data for last atomic op */
353 u32 r_atomic_psn; /* PSN of last atomic op */
354 u32 r_len; /* total length of r_sge */ 370 u32 r_len; /* total length of r_sge */
355 u32 r_rcv_len; /* receive data len processed */ 371 u32 r_rcv_len; /* receive data len processed */
356 u32 r_psn; /* expected rcv packet sequence number */ 372 u32 r_psn; /* expected rcv packet sequence number */
@@ -360,12 +376,13 @@ struct ipath_qp {
360 u8 s_ack_state; /* opcode of packet to ACK */ 376 u8 s_ack_state; /* opcode of packet to ACK */
361 u8 s_nak_state; /* non-zero if NAK is pending */ 377 u8 s_nak_state; /* non-zero if NAK is pending */
362 u8 r_state; /* opcode of last packet received */ 378 u8 r_state; /* opcode of last packet received */
363 u8 r_ack_state; /* opcode of packet to ACK */
364 u8 r_nak_state; /* non-zero if NAK is pending */ 379 u8 r_nak_state; /* non-zero if NAK is pending */
365 u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ 380 u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
366 u8 r_reuse_sge; /* for UC receive errors */ 381 u8 r_reuse_sge; /* for UC receive errors */
367 u8 r_sge_inx; /* current index into sg_list */ 382 u8 r_sge_inx; /* current index into sg_list */
368 u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */ 383 u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */
384 u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */
385 u8 r_head_ack_queue; /* index into s_ack_queue[] */
369 u8 qp_access_flags; 386 u8 qp_access_flags;
370 u8 s_max_sge; /* size of s_wq->sg_list */ 387 u8 s_max_sge; /* size of s_wq->sg_list */
371 u8 s_retry_cnt; /* number of times to retry */ 388 u8 s_retry_cnt; /* number of times to retry */
@@ -374,6 +391,10 @@ struct ipath_qp {
374 u8 s_rnr_retry; /* requester RNR retry counter */ 391 u8 s_rnr_retry; /* requester RNR retry counter */
375 u8 s_wait_credit; /* limit number of unacked packets sent */ 392 u8 s_wait_credit; /* limit number of unacked packets sent */
376 u8 s_pkey_index; /* PKEY index to use */ 393 u8 s_pkey_index; /* PKEY index to use */
394 u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */
395 u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
396 u8 s_tail_ack_queue; /* index into s_ack_queue[] */
397 u8 s_flags;
377 u8 timeout; /* Timeout for this QP */ 398 u8 timeout; /* Timeout for this QP */
378 enum ib_mtu path_mtu; 399 enum ib_mtu path_mtu;
379 u32 remote_qpn; 400 u32 remote_qpn;
@@ -390,11 +411,16 @@ struct ipath_qp {
390 struct ipath_sge r_sg_list[0]; /* verified SGEs */ 411 struct ipath_sge r_sg_list[0]; /* verified SGEs */
391}; 412};
392 413
414/* Bit definition for s_busy. */
415#define IPATH_S_BUSY 0
416
393/* 417/*
394 * Bit definitions for s_flags. 418 * Bit definitions for s_flags.
395 */ 419 */
396#define IPATH_S_BUSY 0 420#define IPATH_S_SIGNAL_REQ_WR 0x01
397#define IPATH_S_SIGNAL_REQ_WR 1 421#define IPATH_S_FENCE_PENDING 0x02
422#define IPATH_S_RDMAR_PENDING 0x04
423#define IPATH_S_ACK_PENDING 0x08
398 424
399#define IPATH_PSN_CREDIT 2048 425#define IPATH_PSN_CREDIT 2048
400 426
@@ -706,8 +732,6 @@ int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
706 732
707int ipath_destroy_srq(struct ib_srq *ibsrq); 733int ipath_destroy_srq(struct ib_srq *ibsrq);
708 734
709void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
710
711int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); 735int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
712 736
713struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, 737struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
@@ -757,9 +781,6 @@ u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
757 781
758void ipath_do_ruc_send(unsigned long data); 782void ipath_do_ruc_send(unsigned long data);
759 783
760u32 ipath_make_rc_ack(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
761 u32 pmtu);
762
763int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr, 784int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
764 u32 pmtu, u32 *bth0p, u32 *bth2p); 785 u32 pmtu, u32 *bth0p, u32 *bth2p);
765 786
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 0d9b7d06bbc2..773145e29947 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -1013,14 +1013,14 @@ static struct {
1013 u64 latest_fw; 1013 u64 latest_fw;
1014 u32 flags; 1014 u32 flags;
1015} mthca_hca_table[] = { 1015} mthca_hca_table[] = {
1016 [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 4, 0), 1016 [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 5, 0),
1017 .flags = 0 }, 1017 .flags = 0 },
1018 [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 600), 1018 [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 8, 200),
1019 .flags = MTHCA_FLAG_PCIE }, 1019 .flags = MTHCA_FLAG_PCIE },
1020 [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 400), 1020 [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 2, 0),
1021 .flags = MTHCA_FLAG_MEMFREE | 1021 .flags = MTHCA_FLAG_MEMFREE |
1022 MTHCA_FLAG_PCIE }, 1022 MTHCA_FLAG_PCIE },
1023 [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 1, 0), 1023 [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 2, 0),
1024 .flags = MTHCA_FLAG_MEMFREE | 1024 .flags = MTHCA_FLAG_MEMFREE |
1025 MTHCA_FLAG_PCIE | 1025 MTHCA_FLAG_PCIE |
1026 MTHCA_FLAG_SINAI_OPT } 1026 MTHCA_FLAG_SINAI_OPT }
@@ -1135,7 +1135,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
1135 goto err_cmd; 1135 goto err_cmd;
1136 1136
1137 if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) { 1137 if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) {
1138 mthca_warn(mdev, "HCA FW version %d.%d.%d is old (%d.%d.%d is current).\n", 1138 mthca_warn(mdev, "HCA FW version %d.%d.%3d is old (%d.%d.%3d is current).\n",
1139 (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff, 1139 (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff,
1140 (int) (mdev->fw_ver & 0xffff), 1140 (int) (mdev->fw_ver & 0xffff),
1141 (int) (mthca_hca_table[hca_type].latest_fw >> 32), 1141 (int) (mthca_hca_table[hca_type].latest_fw >> 32),
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index ee561c569d5f..aa6c70a6a36f 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -297,7 +297,8 @@ out:
297 297
298int mthca_write_mtt_size(struct mthca_dev *dev) 298int mthca_write_mtt_size(struct mthca_dev *dev)
299{ 299{
300 if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy) 300 if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy ||
301 !(dev->mthca_flags & MTHCA_FLAG_FMR))
301 /* 302 /*
302 * Be friendly to WRITE_MTT command 303 * Be friendly to WRITE_MTT command
303 * and leave two empty slots for the 304 * and leave two empty slots for the
@@ -355,7 +356,8 @@ int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
355 int size = mthca_write_mtt_size(dev); 356 int size = mthca_write_mtt_size(dev);
356 int chunk; 357 int chunk;
357 358
358 if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy) 359 if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy ||
360 !(dev->mthca_flags & MTHCA_FLAG_FMR))
359 return __mthca_write_mtt(dev, mtt, start_index, buffer_list, list_len); 361 return __mthca_write_mtt(dev, mtt, start_index, buffer_list, list_len);
360 362
361 while (list_len > 0) { 363 while (list_len > 0) {
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 0725ad7ad9bf..47e6fd46d9c2 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1293,7 +1293,6 @@ int mthca_register_device(struct mthca_dev *dev)
1293 dev->ib_dev.node_type = RDMA_NODE_IB_CA; 1293 dev->ib_dev.node_type = RDMA_NODE_IB_CA;
1294 dev->ib_dev.phys_port_cnt = dev->limits.num_ports; 1294 dev->ib_dev.phys_port_cnt = dev->limits.num_ports;
1295 dev->ib_dev.dma_device = &dev->pdev->dev; 1295 dev->ib_dev.dma_device = &dev->pdev->dev;
1296 dev->ib_dev.class_dev.dev = &dev->pdev->dev;
1297 dev->ib_dev.query_device = mthca_query_device; 1296 dev->ib_dev.query_device = mthca_query_device;
1298 dev->ib_dev.query_port = mthca_query_port; 1297 dev->ib_dev.query_port = mthca_query_port;
1299 dev->ib_dev.modify_device = mthca_modify_device; 1298 dev->ib_dev.modify_device = mthca_modify_device;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 1c6b63aca268..8fe6fee7a97a 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1419,11 +1419,10 @@ void mthca_free_qp(struct mthca_dev *dev,
1419 * unref the mem-free tables and free the QPN in our table. 1419 * unref the mem-free tables and free the QPN in our table.
1420 */ 1420 */
1421 if (!qp->ibqp.uobject) { 1421 if (!qp->ibqp.uobject) {
1422 mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn, 1422 mthca_cq_clean(dev, recv_cq, qp->qpn,
1423 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); 1423 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
1424 if (qp->ibqp.send_cq != qp->ibqp.recv_cq) 1424 if (send_cq != recv_cq)
1425 mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn, 1425 mthca_cq_clean(dev, send_cq, qp->qpn, NULL);
1426 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
1427 1426
1428 mthca_free_memfree(dev, qp); 1427 mthca_free_memfree(dev, qp);
1429 mthca_free_wqe_buf(dev, qp); 1428 mthca_free_wqe_buf(dev, qp);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index c722e5c141b3..0c4e59b906cd 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -228,7 +228,6 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
228 struct net_device *dev = cm_id->context; 228 struct net_device *dev = cm_id->context;
229 struct ipoib_dev_priv *priv = netdev_priv(dev); 229 struct ipoib_dev_priv *priv = netdev_priv(dev);
230 struct ipoib_cm_rx *p; 230 struct ipoib_cm_rx *p;
231 unsigned long flags;
232 unsigned psn; 231 unsigned psn;
233 int ret; 232 int ret;
234 233
@@ -257,9 +256,9 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
257 256
258 cm_id->context = p; 257 cm_id->context = p;
259 p->jiffies = jiffies; 258 p->jiffies = jiffies;
260 spin_lock_irqsave(&priv->lock, flags); 259 spin_lock_irq(&priv->lock);
261 list_add(&p->list, &priv->cm.passive_ids); 260 list_add(&p->list, &priv->cm.passive_ids);
262 spin_unlock_irqrestore(&priv->lock, flags); 261 spin_unlock_irq(&priv->lock);
263 queue_delayed_work(ipoib_workqueue, 262 queue_delayed_work(ipoib_workqueue,
264 &priv->cm.stale_task, IPOIB_CM_RX_DELAY); 263 &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
265 return 0; 264 return 0;
@@ -277,7 +276,6 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
277{ 276{
278 struct ipoib_cm_rx *p; 277 struct ipoib_cm_rx *p;
279 struct ipoib_dev_priv *priv; 278 struct ipoib_dev_priv *priv;
280 unsigned long flags;
281 int ret; 279 int ret;
282 280
283 switch (event->event) { 281 switch (event->event) {
@@ -290,14 +288,14 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
290 case IB_CM_REJ_RECEIVED: 288 case IB_CM_REJ_RECEIVED:
291 p = cm_id->context; 289 p = cm_id->context;
292 priv = netdev_priv(p->dev); 290 priv = netdev_priv(p->dev);
293 spin_lock_irqsave(&priv->lock, flags); 291 spin_lock_irq(&priv->lock);
294 if (list_empty(&p->list)) 292 if (list_empty(&p->list))
295 ret = 0; /* Connection is going away already. */ 293 ret = 0; /* Connection is going away already. */
296 else { 294 else {
297 list_del_init(&p->list); 295 list_del_init(&p->list);
298 ret = -ECONNRESET; 296 ret = -ECONNRESET;
299 } 297 }
300 spin_unlock_irqrestore(&priv->lock, flags); 298 spin_unlock_irq(&priv->lock);
301 if (ret) { 299 if (ret) {
302 ib_destroy_qp(p->qp); 300 ib_destroy_qp(p->qp);
303 kfree(p); 301 kfree(p);
@@ -351,8 +349,8 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
351 u64 mapping[IPOIB_CM_RX_SG]; 349 u64 mapping[IPOIB_CM_RX_SG];
352 int frags; 350 int frags;
353 351
354 ipoib_dbg_data(priv, "cm recv completion: id %d, op %d, status: %d\n", 352 ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n",
355 wr_id, wc->opcode, wc->status); 353 wr_id, wc->status);
356 354
357 if (unlikely(wr_id >= ipoib_recvq_size)) { 355 if (unlikely(wr_id >= ipoib_recvq_size)) {
358 ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n", 356 ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
@@ -504,8 +502,8 @@ static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx
504 struct ipoib_tx_buf *tx_req; 502 struct ipoib_tx_buf *tx_req;
505 unsigned long flags; 503 unsigned long flags;
506 504
507 ipoib_dbg_data(priv, "cm send completion: id %d, op %d, status: %d\n", 505 ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n",
508 wr_id, wc->opcode, wc->status); 506 wr_id, wc->status);
509 507
510 if (unlikely(wr_id >= ipoib_sendq_size)) { 508 if (unlikely(wr_id >= ipoib_sendq_size)) {
511 ipoib_warn(priv, "cm send completion event with wrid %d (> %d)\n", 509 ipoib_warn(priv, "cm send completion event with wrid %d (> %d)\n",
@@ -612,23 +610,22 @@ void ipoib_cm_dev_stop(struct net_device *dev)
612{ 610{
613 struct ipoib_dev_priv *priv = netdev_priv(dev); 611 struct ipoib_dev_priv *priv = netdev_priv(dev);
614 struct ipoib_cm_rx *p; 612 struct ipoib_cm_rx *p;
615 unsigned long flags;
616 613
617 if (!IPOIB_CM_SUPPORTED(dev->dev_addr)) 614 if (!IPOIB_CM_SUPPORTED(dev->dev_addr))
618 return; 615 return;
619 616
620 ib_destroy_cm_id(priv->cm.id); 617 ib_destroy_cm_id(priv->cm.id);
621 spin_lock_irqsave(&priv->lock, flags); 618 spin_lock_irq(&priv->lock);
622 while (!list_empty(&priv->cm.passive_ids)) { 619 while (!list_empty(&priv->cm.passive_ids)) {
623 p = list_entry(priv->cm.passive_ids.next, typeof(*p), list); 620 p = list_entry(priv->cm.passive_ids.next, typeof(*p), list);
624 list_del_init(&p->list); 621 list_del_init(&p->list);
625 spin_unlock_irqrestore(&priv->lock, flags); 622 spin_unlock_irq(&priv->lock);
626 ib_destroy_cm_id(p->id); 623 ib_destroy_cm_id(p->id);
627 ib_destroy_qp(p->qp); 624 ib_destroy_qp(p->qp);
628 kfree(p); 625 kfree(p);
629 spin_lock_irqsave(&priv->lock, flags); 626 spin_lock_irq(&priv->lock);
630 } 627 }
631 spin_unlock_irqrestore(&priv->lock, flags); 628 spin_unlock_irq(&priv->lock);
632 629
633 cancel_delayed_work(&priv->cm.stale_task); 630 cancel_delayed_work(&priv->cm.stale_task);
634} 631}
@@ -642,7 +639,6 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
642 struct ib_qp_attr qp_attr; 639 struct ib_qp_attr qp_attr;
643 int qp_attr_mask, ret; 640 int qp_attr_mask, ret;
644 struct sk_buff *skb; 641 struct sk_buff *skb;
645 unsigned long flags;
646 642
647 p->mtu = be32_to_cpu(data->mtu); 643 p->mtu = be32_to_cpu(data->mtu);
648 644
@@ -680,12 +676,12 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
680 676
681 skb_queue_head_init(&skqueue); 677 skb_queue_head_init(&skqueue);
682 678
683 spin_lock_irqsave(&priv->lock, flags); 679 spin_lock_irq(&priv->lock);
684 set_bit(IPOIB_FLAG_OPER_UP, &p->flags); 680 set_bit(IPOIB_FLAG_OPER_UP, &p->flags);
685 if (p->neigh) 681 if (p->neigh)
686 while ((skb = __skb_dequeue(&p->neigh->queue))) 682 while ((skb = __skb_dequeue(&p->neigh->queue)))
687 __skb_queue_tail(&skqueue, skb); 683 __skb_queue_tail(&skqueue, skb);
688 spin_unlock_irqrestore(&priv->lock, flags); 684 spin_unlock_irq(&priv->lock);
689 685
690 while ((skb = __skb_dequeue(&skqueue))) { 686 while ((skb = __skb_dequeue(&skqueue))) {
691 skb->dev = p->dev; 687 skb->dev = p->dev;
@@ -895,7 +891,6 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
895 struct ipoib_dev_priv *priv = netdev_priv(tx->dev); 891 struct ipoib_dev_priv *priv = netdev_priv(tx->dev);
896 struct net_device *dev = priv->dev; 892 struct net_device *dev = priv->dev;
897 struct ipoib_neigh *neigh; 893 struct ipoib_neigh *neigh;
898 unsigned long flags;
899 int ret; 894 int ret;
900 895
901 switch (event->event) { 896 switch (event->event) {
@@ -914,7 +909,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
914 case IB_CM_REJ_RECEIVED: 909 case IB_CM_REJ_RECEIVED:
915 case IB_CM_TIMEWAIT_EXIT: 910 case IB_CM_TIMEWAIT_EXIT:
916 ipoib_dbg(priv, "CM error %d.\n", event->event); 911 ipoib_dbg(priv, "CM error %d.\n", event->event);
917 spin_lock_irqsave(&priv->tx_lock, flags); 912 spin_lock_irq(&priv->tx_lock);
918 spin_lock(&priv->lock); 913 spin_lock(&priv->lock);
919 neigh = tx->neigh; 914 neigh = tx->neigh;
920 915
@@ -934,7 +929,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
934 } 929 }
935 930
936 spin_unlock(&priv->lock); 931 spin_unlock(&priv->lock);
937 spin_unlock_irqrestore(&priv->tx_lock, flags); 932 spin_unlock_irq(&priv->tx_lock);
938 break; 933 break;
939 default: 934 default:
940 break; 935 break;
@@ -1023,21 +1018,20 @@ static void ipoib_cm_tx_reap(struct work_struct *work)
1023 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 1018 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
1024 cm.reap_task); 1019 cm.reap_task);
1025 struct ipoib_cm_tx *p; 1020 struct ipoib_cm_tx *p;
1026 unsigned long flags;
1027 1021
1028 spin_lock_irqsave(&priv->tx_lock, flags); 1022 spin_lock_irq(&priv->tx_lock);
1029 spin_lock(&priv->lock); 1023 spin_lock(&priv->lock);
1030 while (!list_empty(&priv->cm.reap_list)) { 1024 while (!list_empty(&priv->cm.reap_list)) {
1031 p = list_entry(priv->cm.reap_list.next, typeof(*p), list); 1025 p = list_entry(priv->cm.reap_list.next, typeof(*p), list);
1032 list_del(&p->list); 1026 list_del(&p->list);
1033 spin_unlock(&priv->lock); 1027 spin_unlock(&priv->lock);
1034 spin_unlock_irqrestore(&priv->tx_lock, flags); 1028 spin_unlock_irq(&priv->tx_lock);
1035 ipoib_cm_tx_destroy(p); 1029 ipoib_cm_tx_destroy(p);
1036 spin_lock_irqsave(&priv->tx_lock, flags); 1030 spin_lock_irq(&priv->tx_lock);
1037 spin_lock(&priv->lock); 1031 spin_lock(&priv->lock);
1038 } 1032 }
1039 spin_unlock(&priv->lock); 1033 spin_unlock(&priv->lock);
1040 spin_unlock_irqrestore(&priv->tx_lock, flags); 1034 spin_unlock_irq(&priv->tx_lock);
1041} 1035}
1042 1036
1043static void ipoib_cm_skb_reap(struct work_struct *work) 1037static void ipoib_cm_skb_reap(struct work_struct *work)
@@ -1046,15 +1040,14 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
1046 cm.skb_task); 1040 cm.skb_task);
1047 struct net_device *dev = priv->dev; 1041 struct net_device *dev = priv->dev;
1048 struct sk_buff *skb; 1042 struct sk_buff *skb;
1049 unsigned long flags;
1050 1043
1051 unsigned mtu = priv->mcast_mtu; 1044 unsigned mtu = priv->mcast_mtu;
1052 1045
1053 spin_lock_irqsave(&priv->tx_lock, flags); 1046 spin_lock_irq(&priv->tx_lock);
1054 spin_lock(&priv->lock); 1047 spin_lock(&priv->lock);
1055 while ((skb = skb_dequeue(&priv->cm.skb_queue))) { 1048 while ((skb = skb_dequeue(&priv->cm.skb_queue))) {
1056 spin_unlock(&priv->lock); 1049 spin_unlock(&priv->lock);
1057 spin_unlock_irqrestore(&priv->tx_lock, flags); 1050 spin_unlock_irq(&priv->tx_lock);
1058 if (skb->protocol == htons(ETH_P_IP)) 1051 if (skb->protocol == htons(ETH_P_IP))
1059 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 1052 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
1060#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 1053#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -1062,11 +1055,11 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
1062 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); 1055 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
1063#endif 1056#endif
1064 dev_kfree_skb_any(skb); 1057 dev_kfree_skb_any(skb);
1065 spin_lock_irqsave(&priv->tx_lock, flags); 1058 spin_lock_irq(&priv->tx_lock);
1066 spin_lock(&priv->lock); 1059 spin_lock(&priv->lock);
1067 } 1060 }
1068 spin_unlock(&priv->lock); 1061 spin_unlock(&priv->lock);
1069 spin_unlock_irqrestore(&priv->tx_lock, flags); 1062 spin_unlock_irq(&priv->tx_lock);
1070} 1063}
1071 1064
1072void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb, 1065void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
@@ -1088,9 +1081,8 @@ static void ipoib_cm_stale_task(struct work_struct *work)
1088 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 1081 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
1089 cm.stale_task.work); 1082 cm.stale_task.work);
1090 struct ipoib_cm_rx *p; 1083 struct ipoib_cm_rx *p;
1091 unsigned long flags;
1092 1084
1093 spin_lock_irqsave(&priv->lock, flags); 1085 spin_lock_irq(&priv->lock);
1094 while (!list_empty(&priv->cm.passive_ids)) { 1086 while (!list_empty(&priv->cm.passive_ids)) {
1095 /* List if sorted by LRU, start from tail, 1087 /* List if sorted by LRU, start from tail,
1096 * stop when we see a recently used entry */ 1088 * stop when we see a recently used entry */
@@ -1098,13 +1090,13 @@ static void ipoib_cm_stale_task(struct work_struct *work)
1098 if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT)) 1090 if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT))
1099 break; 1091 break;
1100 list_del_init(&p->list); 1092 list_del_init(&p->list);
1101 spin_unlock_irqrestore(&priv->lock, flags); 1093 spin_unlock_irq(&priv->lock);
1102 ib_destroy_cm_id(p->id); 1094 ib_destroy_cm_id(p->id);
1103 ib_destroy_qp(p->qp); 1095 ib_destroy_qp(p->qp);
1104 kfree(p); 1096 kfree(p);
1105 spin_lock_irqsave(&priv->lock, flags); 1097 spin_lock_irq(&priv->lock);
1106 } 1098 }
1107 spin_unlock_irqrestore(&priv->lock, flags); 1099 spin_unlock_irq(&priv->lock);
1108} 1100}
1109 1101
1110 1102
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 93f74567897e..1bdb9101911a 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -172,8 +172,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
172 struct sk_buff *skb; 172 struct sk_buff *skb;
173 u64 addr; 173 u64 addr;
174 174
175 ipoib_dbg_data(priv, "recv completion: id %d, op %d, status: %d\n", 175 ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n",
176 wr_id, wc->opcode, wc->status); 176 wr_id, wc->status);
177 177
178 if (unlikely(wr_id >= ipoib_recvq_size)) { 178 if (unlikely(wr_id >= ipoib_recvq_size)) {
179 ipoib_warn(priv, "recv completion event with wrid %d (> %d)\n", 179 ipoib_warn(priv, "recv completion event with wrid %d (> %d)\n",
@@ -245,8 +245,8 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
245 struct ipoib_tx_buf *tx_req; 245 struct ipoib_tx_buf *tx_req;
246 unsigned long flags; 246 unsigned long flags;
247 247
248 ipoib_dbg_data(priv, "send completion: id %d, op %d, status: %d\n", 248 ipoib_dbg_data(priv, "send completion: id %d, status: %d\n",
249 wr_id, wc->opcode, wc->status); 249 wr_id, wc->status);
250 250
251 if (unlikely(wr_id >= ipoib_sendq_size)) { 251 if (unlikely(wr_id >= ipoib_sendq_size)) {
252 ipoib_warn(priv, "send completion event with wrid %d (> %d)\n", 252 ipoib_warn(priv, "send completion event with wrid %d (> %d)\n",
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index f2a40ae8e7d0..b4c380c5a3ba 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -395,14 +395,10 @@ static void path_rec_completion(int status,
395 skb_queue_head_init(&skqueue); 395 skb_queue_head_init(&skqueue);
396 396
397 if (!status) { 397 if (!status) {
398 struct ib_ah_attr av = { 398 struct ib_ah_attr av;
399 .dlid = be16_to_cpu(pathrec->dlid), 399
400 .sl = pathrec->sl, 400 if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av))
401 .port_num = priv->port, 401 ah = ipoib_create_ah(dev, priv->pd, &av);
402 .static_rate = pathrec->rate
403 };
404
405 ah = ipoib_create_ah(dev, priv->pd, &av);
406 } 402 }
407 403
408 spin_lock_irqsave(&priv->lock, flags); 404 spin_lock_irqsave(&priv->lock, flags);