aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-01-25 17:41:24 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2008-01-25 17:41:24 -0500
commit99f1c97dbdb30e958edfd1ced0ae43df62504e07 (patch)
treecc61393c912b3c1b095a9c74322f8e1364ab9b3a
parentb31fde6db2b76a9f7f59bf016652b46cff43f8da (diff)
parent8176d297c73a06e6076c9c31f6404047567f6324 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (81 commits) RDMA/cxgb3: Fix the T3A workaround checks IB/ipath: Remove unnecessary cast IPoIB: Constify seq_operations function pointer tables RDMA/cxgb3: Mark QP as privileged based on user capabilities RDMA/cxgb3: Fix page shift calculation in build_phys_page_list() RDMA/cxgb3: Flush the receive queue when closing IB/ipath: Trivial simplification of ipath_make_ud_req() IB/mthca: Update latest "native Arbel" firmware revision IPoIB: Remove redundant check of netif_queue_stopped() in xmit handler IB/ipath: Add mappings from HW register to PortInfo port physical state IB/ipath: Changes to support PIO bandwidth check on IBA7220 IB/ipath: Minor cleanup of unused fields and chip-specific errors IB/ipath: New sysfs entries to control 7220 features IB/ipath: Add new chip-specific functions to older chips, consistent init IB/ipath: Remove unused MDIO interface code IB/ehca: Prevent RDMA-related connection failures on some eHCA2 hardware IB/ehca: Add "port connection autodetect mode" IB/ehca: Define array to store SMI/GSI QPs IB/ehca: Remove CQ-QP-link before destroying QP in error path of create_qp() IB/iser: Add change_queue_depth method ...
-rw-r--r--Documentation/feature-removal-schedule.txt10
-rw-r--r--drivers/infiniband/core/cm.c306
-rw-r--r--drivers/infiniband/core/cma.c60
-rw-r--r--drivers/infiniband/core/fmr_pool.c33
-rw-r--r--drivers/infiniband/core/mad.c26
-rw-r--r--drivers/infiniband/core/mad_priv.h3
-rw-r--r--drivers/infiniband/core/mad_rmpp.c2
-rw-r--r--drivers/infiniband/core/multicast.c55
-rw-r--r--drivers/infiniband/core/smi.h18
-rw-r--r--drivers/infiniband/core/ucm.c37
-rw-r--r--drivers/infiniband/core/ucma.c92
-rw-r--r--drivers/infiniband/core/user_mad.c115
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c4
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_wr.h5
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c4
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_mem.c7
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c7
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c29
-rw-r--r--drivers/infiniband/hw/ehca/ehca_av.c2
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h23
-rw-r--r--drivers/infiniband/hw/ehca/ehca_cq.c2
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c38
-rw-r--r--drivers/infiniband/hw/ehca/ehca_iverbs.h2
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c15
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c180
-rw-r--r--drivers/infiniband/hw/ehca/ehca_reqs.c112
-rw-r--r--drivers/infiniband/hw/ehca/ehca_sqp.c6
-rw-r--r--drivers/infiniband/hw/ipath/ipath_common.h35
-rw-r--r--drivers/infiniband/hw/ipath/ipath_cq.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_debug.h4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c180
-rw-r--r--drivers/infiniband/hw/ipath/ipath_eeprom.c23
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c94
-rw-r--r--drivers/infiniband/hw/ipath/ipath_fs.c14
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6110.c395
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6120.c439
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c67
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c81
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h201
-rw-r--r--drivers/infiniband/hw/ipath/ipath_keys.c5
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mad.c123
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c6
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c18
-rw-r--r--drivers/infiniband/hw/ipath/ipath_registers.h33
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c13
-rw-r--r--drivers/infiniband/hw/ipath/ipath_srq.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_stats.c24
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sysfs.c364
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c3
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c55
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h12
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c9
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h13
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c6
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c40
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h184
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c376
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_fs.c4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c8
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c60
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c8
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c18
-rw-r--r--drivers/infiniband/ulp/iser/Kconfig4
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c1
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c2
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c8
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c131
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h5
-rw-r--r--drivers/net/mlx4/fw.c2
-rw-r--r--include/net/if_inet6.h11
-rw-r--r--include/net/ip.h10
-rw-r--r--include/rdma/ib_mad.h4
-rw-r--r--include/rdma/rdma_user_cm.h13
-rw-r--r--net/ipv4/arp.c2
-rw-r--r--net/ipv6/ndisc.c2
75 files changed, 3124 insertions, 1185 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 20c4c8bac9d7..9b8291f4c211 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -295,16 +295,6 @@ Who: linuxppc-dev@ozlabs.org
295 295
296--------------------------- 296---------------------------
297 297
298What: mthca driver's MSI support
299When: January 2008
300Files: drivers/infiniband/hw/mthca/*.[ch]
301Why: All mthca hardware also supports MSI-X, which provides
302 strictly more functionality than MSI. So there is no point in
303 having both MSI-X and MSI support in the driver.
304Who: Roland Dreier <rolandd@cisco.com>
305
306---------------------------
307
308What: sk98lin network driver 298What: sk98lin network driver
309When: Feburary 2008 299When: Feburary 2008
310Why: In kernel tree version of driver is unmaintained. Sk98lin driver 300Why: In kernel tree version of driver is unmaintained. Sk98lin driver
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 2e39236d189f..c0150147d347 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2004-2006 Intel Corporation. All rights reserved. 2 * Copyright (c) 2004-2007 Intel Corporation. All rights reserved.
3 * Copyright (c) 2004 Topspin Corporation. All rights reserved. 3 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
4 * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. 4 * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved.
5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
@@ -37,12 +37,14 @@
37 37
38#include <linux/completion.h> 38#include <linux/completion.h>
39#include <linux/dma-mapping.h> 39#include <linux/dma-mapping.h>
40#include <linux/device.h>
40#include <linux/err.h> 41#include <linux/err.h>
41#include <linux/idr.h> 42#include <linux/idr.h>
42#include <linux/interrupt.h> 43#include <linux/interrupt.h>
43#include <linux/random.h> 44#include <linux/random.h>
44#include <linux/rbtree.h> 45#include <linux/rbtree.h>
45#include <linux/spinlock.h> 46#include <linux/spinlock.h>
47#include <linux/sysfs.h>
46#include <linux/workqueue.h> 48#include <linux/workqueue.h>
47 49
48#include <rdma/ib_cache.h> 50#include <rdma/ib_cache.h>
@@ -78,17 +80,94 @@ static struct ib_cm {
78 struct workqueue_struct *wq; 80 struct workqueue_struct *wq;
79} cm; 81} cm;
80 82
83/* Counter indexes ordered by attribute ID */
84enum {
85 CM_REQ_COUNTER,
86 CM_MRA_COUNTER,
87 CM_REJ_COUNTER,
88 CM_REP_COUNTER,
89 CM_RTU_COUNTER,
90 CM_DREQ_COUNTER,
91 CM_DREP_COUNTER,
92 CM_SIDR_REQ_COUNTER,
93 CM_SIDR_REP_COUNTER,
94 CM_LAP_COUNTER,
95 CM_APR_COUNTER,
96 CM_ATTR_COUNT,
97 CM_ATTR_ID_OFFSET = 0x0010,
98};
99
100enum {
101 CM_XMIT,
102 CM_XMIT_RETRIES,
103 CM_RECV,
104 CM_RECV_DUPLICATES,
105 CM_COUNTER_GROUPS
106};
107
108static char const counter_group_names[CM_COUNTER_GROUPS]
109 [sizeof("cm_rx_duplicates")] = {
110 "cm_tx_msgs", "cm_tx_retries",
111 "cm_rx_msgs", "cm_rx_duplicates"
112};
113
114struct cm_counter_group {
115 struct kobject obj;
116 atomic_long_t counter[CM_ATTR_COUNT];
117};
118
119struct cm_counter_attribute {
120 struct attribute attr;
121 int index;
122};
123
124#define CM_COUNTER_ATTR(_name, _index) \
125struct cm_counter_attribute cm_##_name##_counter_attr = { \
126 .attr = { .name = __stringify(_name), .mode = 0444, .owner = THIS_MODULE }, \
127 .index = _index \
128}
129
130static CM_COUNTER_ATTR(req, CM_REQ_COUNTER);
131static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER);
132static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER);
133static CM_COUNTER_ATTR(rep, CM_REP_COUNTER);
134static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER);
135static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER);
136static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER);
137static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER);
138static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER);
139static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER);
140static CM_COUNTER_ATTR(apr, CM_APR_COUNTER);
141
142static struct attribute *cm_counter_default_attrs[] = {
143 &cm_req_counter_attr.attr,
144 &cm_mra_counter_attr.attr,
145 &cm_rej_counter_attr.attr,
146 &cm_rep_counter_attr.attr,
147 &cm_rtu_counter_attr.attr,
148 &cm_dreq_counter_attr.attr,
149 &cm_drep_counter_attr.attr,
150 &cm_sidr_req_counter_attr.attr,
151 &cm_sidr_rep_counter_attr.attr,
152 &cm_lap_counter_attr.attr,
153 &cm_apr_counter_attr.attr,
154 NULL
155};
156
81struct cm_port { 157struct cm_port {
82 struct cm_device *cm_dev; 158 struct cm_device *cm_dev;
83 struct ib_mad_agent *mad_agent; 159 struct ib_mad_agent *mad_agent;
160 struct kobject port_obj;
84 u8 port_num; 161 u8 port_num;
162 struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
85}; 163};
86 164
87struct cm_device { 165struct cm_device {
88 struct list_head list; 166 struct list_head list;
89 struct ib_device *device; 167 struct ib_device *device;
168 struct kobject dev_obj;
90 u8 ack_delay; 169 u8 ack_delay;
91 struct cm_port port[0]; 170 struct cm_port *port[0];
92}; 171};
93 172
94struct cm_av { 173struct cm_av {
@@ -278,7 +357,7 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
278 list_for_each_entry(cm_dev, &cm.device_list, list) { 357 list_for_each_entry(cm_dev, &cm.device_list, list) {
279 if (!ib_find_cached_gid(cm_dev->device, &path->sgid, 358 if (!ib_find_cached_gid(cm_dev->device, &path->sgid,
280 &p, NULL)) { 359 &p, NULL)) {
281 port = &cm_dev->port[p-1]; 360 port = cm_dev->port[p-1];
282 break; 361 break;
283 } 362 }
284 } 363 }
@@ -1270,6 +1349,9 @@ static void cm_dup_req_handler(struct cm_work *work,
1270 struct ib_mad_send_buf *msg = NULL; 1349 struct ib_mad_send_buf *msg = NULL;
1271 int ret; 1350 int ret;
1272 1351
1352 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1353 counter[CM_REQ_COUNTER]);
1354
1273 /* Quick state check to discard duplicate REQs. */ 1355 /* Quick state check to discard duplicate REQs. */
1274 if (cm_id_priv->id.state == IB_CM_REQ_RCVD) 1356 if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
1275 return; 1357 return;
@@ -1616,6 +1698,8 @@ static void cm_dup_rep_handler(struct cm_work *work)
1616 if (!cm_id_priv) 1698 if (!cm_id_priv)
1617 return; 1699 return;
1618 1700
1701 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1702 counter[CM_REP_COUNTER]);
1619 ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg); 1703 ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1620 if (ret) 1704 if (ret)
1621 goto deref; 1705 goto deref;
@@ -1781,6 +1865,8 @@ static int cm_rtu_handler(struct cm_work *work)
1781 if (cm_id_priv->id.state != IB_CM_REP_SENT && 1865 if (cm_id_priv->id.state != IB_CM_REP_SENT &&
1782 cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) { 1866 cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
1783 spin_unlock_irq(&cm_id_priv->lock); 1867 spin_unlock_irq(&cm_id_priv->lock);
1868 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1869 counter[CM_RTU_COUNTER]);
1784 goto out; 1870 goto out;
1785 } 1871 }
1786 cm_id_priv->id.state = IB_CM_ESTABLISHED; 1872 cm_id_priv->id.state = IB_CM_ESTABLISHED;
@@ -1958,6 +2044,8 @@ static int cm_dreq_handler(struct cm_work *work)
1958 cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id, 2044 cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id,
1959 dreq_msg->local_comm_id); 2045 dreq_msg->local_comm_id);
1960 if (!cm_id_priv) { 2046 if (!cm_id_priv) {
2047 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2048 counter[CM_DREQ_COUNTER]);
1961 cm_issue_drep(work->port, work->mad_recv_wc); 2049 cm_issue_drep(work->port, work->mad_recv_wc);
1962 return -EINVAL; 2050 return -EINVAL;
1963 } 2051 }
@@ -1977,6 +2065,8 @@ static int cm_dreq_handler(struct cm_work *work)
1977 case IB_CM_MRA_REP_RCVD: 2065 case IB_CM_MRA_REP_RCVD:
1978 break; 2066 break;
1979 case IB_CM_TIMEWAIT: 2067 case IB_CM_TIMEWAIT:
2068 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2069 counter[CM_DREQ_COUNTER]);
1980 if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg)) 2070 if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
1981 goto unlock; 2071 goto unlock;
1982 2072
@@ -1988,6 +2078,10 @@ static int cm_dreq_handler(struct cm_work *work)
1988 if (ib_post_send_mad(msg, NULL)) 2078 if (ib_post_send_mad(msg, NULL))
1989 cm_free_msg(msg); 2079 cm_free_msg(msg);
1990 goto deref; 2080 goto deref;
2081 case IB_CM_DREQ_RCVD:
2082 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2083 counter[CM_DREQ_COUNTER]);
2084 goto unlock;
1991 default: 2085 default:
1992 goto unlock; 2086 goto unlock;
1993 } 2087 }
@@ -2339,10 +2433,20 @@ static int cm_mra_handler(struct cm_work *work)
2339 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER || 2433 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
2340 cm_id_priv->id.lap_state != IB_CM_LAP_SENT || 2434 cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
2341 ib_modify_mad(cm_id_priv->av.port->mad_agent, 2435 ib_modify_mad(cm_id_priv->av.port->mad_agent,
2342 cm_id_priv->msg, timeout)) 2436 cm_id_priv->msg, timeout)) {
2437 if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
2438 atomic_long_inc(&work->port->
2439 counter_group[CM_RECV_DUPLICATES].
2440 counter[CM_MRA_COUNTER]);
2343 goto out; 2441 goto out;
2442 }
2344 cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD; 2443 cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
2345 break; 2444 break;
2445 case IB_CM_MRA_REQ_RCVD:
2446 case IB_CM_MRA_REP_RCVD:
2447 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2448 counter[CM_MRA_COUNTER]);
2449 /* fall through */
2346 default: 2450 default:
2347 goto out; 2451 goto out;
2348 } 2452 }
@@ -2502,6 +2606,8 @@ static int cm_lap_handler(struct cm_work *work)
2502 case IB_CM_LAP_IDLE: 2606 case IB_CM_LAP_IDLE:
2503 break; 2607 break;
2504 case IB_CM_MRA_LAP_SENT: 2608 case IB_CM_MRA_LAP_SENT:
2609 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2610 counter[CM_LAP_COUNTER]);
2505 if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg)) 2611 if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
2506 goto unlock; 2612 goto unlock;
2507 2613
@@ -2515,6 +2621,10 @@ static int cm_lap_handler(struct cm_work *work)
2515 if (ib_post_send_mad(msg, NULL)) 2621 if (ib_post_send_mad(msg, NULL))
2516 cm_free_msg(msg); 2622 cm_free_msg(msg);
2517 goto deref; 2623 goto deref;
2624 case IB_CM_LAP_RCVD:
2625 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2626 counter[CM_LAP_COUNTER]);
2627 goto unlock;
2518 default: 2628 default:
2519 goto unlock; 2629 goto unlock;
2520 } 2630 }
@@ -2796,6 +2906,8 @@ static int cm_sidr_req_handler(struct cm_work *work)
2796 cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv); 2906 cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
2797 if (cur_cm_id_priv) { 2907 if (cur_cm_id_priv) {
2798 spin_unlock_irq(&cm.lock); 2908 spin_unlock_irq(&cm.lock);
2909 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2910 counter[CM_SIDR_REQ_COUNTER]);
2799 goto out; /* Duplicate message. */ 2911 goto out; /* Duplicate message. */
2800 } 2912 }
2801 cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD; 2913 cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
@@ -2990,6 +3102,27 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent,
2990 struct ib_mad_send_wc *mad_send_wc) 3102 struct ib_mad_send_wc *mad_send_wc)
2991{ 3103{
2992 struct ib_mad_send_buf *msg = mad_send_wc->send_buf; 3104 struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
3105 struct cm_port *port;
3106 u16 attr_index;
3107
3108 port = mad_agent->context;
3109 attr_index = be16_to_cpu(((struct ib_mad_hdr *)
3110 msg->mad)->attr_id) - CM_ATTR_ID_OFFSET;
3111
3112 /*
3113 * If the send was in response to a received message (context[0] is not
3114 * set to a cm_id), and is not a REJ, then it is a send that was
3115 * manually retried.
3116 */
3117 if (!msg->context[0] && (attr_index != CM_REJ_COUNTER))
3118 msg->retries = 1;
3119
3120 atomic_long_add(1 + msg->retries,
3121 &port->counter_group[CM_XMIT].counter[attr_index]);
3122 if (msg->retries)
3123 atomic_long_add(msg->retries,
3124 &port->counter_group[CM_XMIT_RETRIES].
3125 counter[attr_index]);
2993 3126
2994 switch (mad_send_wc->status) { 3127 switch (mad_send_wc->status) {
2995 case IB_WC_SUCCESS: 3128 case IB_WC_SUCCESS:
@@ -3148,8 +3281,10 @@ EXPORT_SYMBOL(ib_cm_notify);
3148static void cm_recv_handler(struct ib_mad_agent *mad_agent, 3281static void cm_recv_handler(struct ib_mad_agent *mad_agent,
3149 struct ib_mad_recv_wc *mad_recv_wc) 3282 struct ib_mad_recv_wc *mad_recv_wc)
3150{ 3283{
3284 struct cm_port *port = mad_agent->context;
3151 struct cm_work *work; 3285 struct cm_work *work;
3152 enum ib_cm_event_type event; 3286 enum ib_cm_event_type event;
3287 u16 attr_id;
3153 int paths = 0; 3288 int paths = 0;
3154 3289
3155 switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) { 3290 switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
@@ -3194,6 +3329,10 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent,
3194 return; 3329 return;
3195 } 3330 }
3196 3331
3332 attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id);
3333 atomic_long_inc(&port->counter_group[CM_RECV].
3334 counter[attr_id - CM_ATTR_ID_OFFSET]);
3335
3197 work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths, 3336 work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths,
3198 GFP_KERNEL); 3337 GFP_KERNEL);
3199 if (!work) { 3338 if (!work) {
@@ -3204,7 +3343,7 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent,
3204 INIT_DELAYED_WORK(&work->work, cm_work_handler); 3343 INIT_DELAYED_WORK(&work->work, cm_work_handler);
3205 work->cm_event.event = event; 3344 work->cm_event.event = event;
3206 work->mad_recv_wc = mad_recv_wc; 3345 work->mad_recv_wc = mad_recv_wc;
3207 work->port = (struct cm_port *)mad_agent->context; 3346 work->port = port;
3208 queue_delayed_work(cm.wq, &work->work, 0); 3347 queue_delayed_work(cm.wq, &work->work, 0);
3209} 3348}
3210 3349
@@ -3379,6 +3518,108 @@ static void cm_get_ack_delay(struct cm_device *cm_dev)
3379 cm_dev->ack_delay = attr.local_ca_ack_delay; 3518 cm_dev->ack_delay = attr.local_ca_ack_delay;
3380} 3519}
3381 3520
3521static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
3522 char *buf)
3523{
3524 struct cm_counter_group *group;
3525 struct cm_counter_attribute *cm_attr;
3526
3527 group = container_of(obj, struct cm_counter_group, obj);
3528 cm_attr = container_of(attr, struct cm_counter_attribute, attr);
3529
3530 return sprintf(buf, "%ld\n",
3531 atomic_long_read(&group->counter[cm_attr->index]));
3532}
3533
3534static struct sysfs_ops cm_counter_ops = {
3535 .show = cm_show_counter
3536};
3537
3538static struct kobj_type cm_counter_obj_type = {
3539 .sysfs_ops = &cm_counter_ops,
3540 .default_attrs = cm_counter_default_attrs
3541};
3542
3543static void cm_release_port_obj(struct kobject *obj)
3544{
3545 struct cm_port *cm_port;
3546
3547 printk(KERN_ERR "free cm port\n");
3548
3549 cm_port = container_of(obj, struct cm_port, port_obj);
3550 kfree(cm_port);
3551}
3552
3553static struct kobj_type cm_port_obj_type = {
3554 .release = cm_release_port_obj
3555};
3556
3557static void cm_release_dev_obj(struct kobject *obj)
3558{
3559 struct cm_device *cm_dev;
3560
3561 printk(KERN_ERR "free cm dev\n");
3562
3563 cm_dev = container_of(obj, struct cm_device, dev_obj);
3564 kfree(cm_dev);
3565}
3566
3567static struct kobj_type cm_dev_obj_type = {
3568 .release = cm_release_dev_obj
3569};
3570
3571struct class cm_class = {
3572 .name = "infiniband_cm",
3573};
3574EXPORT_SYMBOL(cm_class);
3575
3576static void cm_remove_fs_obj(struct kobject *obj)
3577{
3578 kobject_put(obj->parent);
3579 kobject_put(obj);
3580}
3581
3582static int cm_create_port_fs(struct cm_port *port)
3583{
3584 int i, ret;
3585
3586 ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type,
3587 kobject_get(&port->cm_dev->dev_obj),
3588 "%d", port->port_num);
3589 if (ret) {
3590 kfree(port);
3591 return ret;
3592 }
3593
3594 for (i = 0; i < CM_COUNTER_GROUPS; i++) {
3595 ret = kobject_init_and_add(&port->counter_group[i].obj,
3596 &cm_counter_obj_type,
3597 kobject_get(&port->port_obj),
3598 "%s", counter_group_names[i]);
3599 if (ret)
3600 goto error;
3601 }
3602
3603 return 0;
3604
3605error:
3606 while (i--)
3607 cm_remove_fs_obj(&port->counter_group[i].obj);
3608 cm_remove_fs_obj(&port->port_obj);
3609 return ret;
3610
3611}
3612
3613static void cm_remove_port_fs(struct cm_port *port)
3614{
3615 int i;
3616
3617 for (i = 0; i < CM_COUNTER_GROUPS; i++)
3618 cm_remove_fs_obj(&port->counter_group[i].obj);
3619
3620 cm_remove_fs_obj(&port->port_obj);
3621}
3622
3382static void cm_add_one(struct ib_device *device) 3623static void cm_add_one(struct ib_device *device)
3383{ 3624{
3384 struct cm_device *cm_dev; 3625 struct cm_device *cm_dev;
@@ -3397,7 +3638,7 @@ static void cm_add_one(struct ib_device *device)
3397 if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) 3638 if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
3398 return; 3639 return;
3399 3640
3400 cm_dev = kmalloc(sizeof(*cm_dev) + sizeof(*port) * 3641 cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) *
3401 device->phys_port_cnt, GFP_KERNEL); 3642 device->phys_port_cnt, GFP_KERNEL);
3402 if (!cm_dev) 3643 if (!cm_dev)
3403 return; 3644 return;
@@ -3405,11 +3646,27 @@ static void cm_add_one(struct ib_device *device)
3405 cm_dev->device = device; 3646 cm_dev->device = device;
3406 cm_get_ack_delay(cm_dev); 3647 cm_get_ack_delay(cm_dev);
3407 3648
3649 ret = kobject_init_and_add(&cm_dev->dev_obj, &cm_dev_obj_type,
3650 &cm_class.subsys.kobj, "%s", device->name);
3651 if (ret) {
3652 kfree(cm_dev);
3653 return;
3654 }
3655
3408 set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); 3656 set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
3409 for (i = 1; i <= device->phys_port_cnt; i++) { 3657 for (i = 1; i <= device->phys_port_cnt; i++) {
3410 port = &cm_dev->port[i-1]; 3658 port = kzalloc(sizeof *port, GFP_KERNEL);
3659 if (!port)
3660 goto error1;
3661
3662 cm_dev->port[i-1] = port;
3411 port->cm_dev = cm_dev; 3663 port->cm_dev = cm_dev;
3412 port->port_num = i; 3664 port->port_num = i;
3665
3666 ret = cm_create_port_fs(port);
3667 if (ret)
3668 goto error1;
3669
3413 port->mad_agent = ib_register_mad_agent(device, i, 3670 port->mad_agent = ib_register_mad_agent(device, i,
3414 IB_QPT_GSI, 3671 IB_QPT_GSI,
3415 &reg_req, 3672 &reg_req,
@@ -3418,11 +3675,11 @@ static void cm_add_one(struct ib_device *device)
3418 cm_recv_handler, 3675 cm_recv_handler,
3419 port); 3676 port);
3420 if (IS_ERR(port->mad_agent)) 3677 if (IS_ERR(port->mad_agent))
3421 goto error1; 3678 goto error2;
3422 3679
3423 ret = ib_modify_port(device, i, 0, &port_modify); 3680 ret = ib_modify_port(device, i, 0, &port_modify);
3424 if (ret) 3681 if (ret)
3425 goto error2; 3682 goto error3;
3426 } 3683 }
3427 ib_set_client_data(device, &cm_client, cm_dev); 3684 ib_set_client_data(device, &cm_client, cm_dev);
3428 3685
@@ -3431,17 +3688,20 @@ static void cm_add_one(struct ib_device *device)
3431 write_unlock_irqrestore(&cm.device_lock, flags); 3688 write_unlock_irqrestore(&cm.device_lock, flags);
3432 return; 3689 return;
3433 3690
3434error2: 3691error3:
3435 ib_unregister_mad_agent(port->mad_agent); 3692 ib_unregister_mad_agent(port->mad_agent);
3693error2:
3694 cm_remove_port_fs(port);
3436error1: 3695error1:
3437 port_modify.set_port_cap_mask = 0; 3696 port_modify.set_port_cap_mask = 0;
3438 port_modify.clr_port_cap_mask = IB_PORT_CM_SUP; 3697 port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
3439 while (--i) { 3698 while (--i) {
3440 port = &cm_dev->port[i-1]; 3699 port = cm_dev->port[i-1];
3441 ib_modify_port(device, port->port_num, 0, &port_modify); 3700 ib_modify_port(device, port->port_num, 0, &port_modify);
3442 ib_unregister_mad_agent(port->mad_agent); 3701 ib_unregister_mad_agent(port->mad_agent);
3702 cm_remove_port_fs(port);
3443 } 3703 }
3444 kfree(cm_dev); 3704 cm_remove_fs_obj(&cm_dev->dev_obj);
3445} 3705}
3446 3706
3447static void cm_remove_one(struct ib_device *device) 3707static void cm_remove_one(struct ib_device *device)
@@ -3463,11 +3723,12 @@ static void cm_remove_one(struct ib_device *device)
3463 write_unlock_irqrestore(&cm.device_lock, flags); 3723 write_unlock_irqrestore(&cm.device_lock, flags);
3464 3724
3465 for (i = 1; i <= device->phys_port_cnt; i++) { 3725 for (i = 1; i <= device->phys_port_cnt; i++) {
3466 port = &cm_dev->port[i-1]; 3726 port = cm_dev->port[i-1];
3467 ib_modify_port(device, port->port_num, 0, &port_modify); 3727 ib_modify_port(device, port->port_num, 0, &port_modify);
3468 ib_unregister_mad_agent(port->mad_agent); 3728 ib_unregister_mad_agent(port->mad_agent);
3729 cm_remove_port_fs(port);
3469 } 3730 }
3470 kfree(cm_dev); 3731 cm_remove_fs_obj(&cm_dev->dev_obj);
3471} 3732}
3472 3733
3473static int __init ib_cm_init(void) 3734static int __init ib_cm_init(void)
@@ -3488,17 +3749,25 @@ static int __init ib_cm_init(void)
3488 idr_pre_get(&cm.local_id_table, GFP_KERNEL); 3749 idr_pre_get(&cm.local_id_table, GFP_KERNEL);
3489 INIT_LIST_HEAD(&cm.timewait_list); 3750 INIT_LIST_HEAD(&cm.timewait_list);
3490 3751
3491 cm.wq = create_workqueue("ib_cm"); 3752 ret = class_register(&cm_class);
3492 if (!cm.wq) 3753 if (ret)
3493 return -ENOMEM; 3754 return -ENOMEM;
3494 3755
3756 cm.wq = create_workqueue("ib_cm");
3757 if (!cm.wq) {
3758 ret = -ENOMEM;
3759 goto error1;
3760 }
3761
3495 ret = ib_register_client(&cm_client); 3762 ret = ib_register_client(&cm_client);
3496 if (ret) 3763 if (ret)
3497 goto error; 3764 goto error2;
3498 3765
3499 return 0; 3766 return 0;
3500error: 3767error2:
3501 destroy_workqueue(cm.wq); 3768 destroy_workqueue(cm.wq);
3769error1:
3770 class_unregister(&cm_class);
3502 return ret; 3771 return ret;
3503} 3772}
3504 3773
@@ -3519,6 +3788,7 @@ static void __exit ib_cm_cleanup(void)
3519 } 3788 }
3520 3789
3521 ib_unregister_client(&cm_client); 3790 ib_unregister_client(&cm_client);
3791 class_unregister(&cm_class);
3522 idr_destroy(&cm.local_id_table); 3792 idr_destroy(&cm.local_id_table);
3523} 3793}
3524 3794
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 0751697ef984..637efead97a0 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -488,7 +488,8 @@ void rdma_destroy_qp(struct rdma_cm_id *id)
488} 488}
489EXPORT_SYMBOL(rdma_destroy_qp); 489EXPORT_SYMBOL(rdma_destroy_qp);
490 490
491static int cma_modify_qp_rtr(struct rdma_id_private *id_priv) 491static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
492 struct rdma_conn_param *conn_param)
492{ 493{
493 struct ib_qp_attr qp_attr; 494 struct ib_qp_attr qp_attr;
494 int qp_attr_mask, ret; 495 int qp_attr_mask, ret;
@@ -514,13 +515,16 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv)
514 if (ret) 515 if (ret)
515 goto out; 516 goto out;
516 517
518 if (conn_param)
519 qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
517 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 520 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
518out: 521out:
519 mutex_unlock(&id_priv->qp_mutex); 522 mutex_unlock(&id_priv->qp_mutex);
520 return ret; 523 return ret;
521} 524}
522 525
523static int cma_modify_qp_rts(struct rdma_id_private *id_priv) 526static int cma_modify_qp_rts(struct rdma_id_private *id_priv,
527 struct rdma_conn_param *conn_param)
524{ 528{
525 struct ib_qp_attr qp_attr; 529 struct ib_qp_attr qp_attr;
526 int qp_attr_mask, ret; 530 int qp_attr_mask, ret;
@@ -536,6 +540,8 @@ static int cma_modify_qp_rts(struct rdma_id_private *id_priv)
536 if (ret) 540 if (ret)
537 goto out; 541 goto out;
538 542
543 if (conn_param)
544 qp_attr.max_rd_atomic = conn_param->initiator_depth;
539 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 545 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
540out: 546out:
541 mutex_unlock(&id_priv->qp_mutex); 547 mutex_unlock(&id_priv->qp_mutex);
@@ -866,11 +872,11 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
866{ 872{
867 int ret; 873 int ret;
868 874
869 ret = cma_modify_qp_rtr(id_priv); 875 ret = cma_modify_qp_rtr(id_priv, NULL);
870 if (ret) 876 if (ret)
871 goto reject; 877 goto reject;
872 878
873 ret = cma_modify_qp_rts(id_priv); 879 ret = cma_modify_qp_rts(id_priv, NULL);
874 if (ret) 880 if (ret)
875 goto reject; 881 goto reject;
876 882
@@ -1122,8 +1128,10 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1122 cm_id->cm_handler = cma_ib_handler; 1128 cm_id->cm_handler = cma_ib_handler;
1123 1129
1124 ret = conn_id->id.event_handler(&conn_id->id, &event); 1130 ret = conn_id->id.event_handler(&conn_id->id, &event);
1125 if (!ret) 1131 if (!ret) {
1132 cma_enable_remove(conn_id);
1126 goto out; 1133 goto out;
1134 }
1127 1135
1128 /* Destroy the CM ID by returning a non-zero value. */ 1136 /* Destroy the CM ID by returning a non-zero value. */
1129 conn_id->cm_id.ib = NULL; 1137 conn_id->cm_id.ib = NULL;
@@ -1262,6 +1270,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1262 struct net_device *dev = NULL; 1270 struct net_device *dev = NULL;
1263 struct rdma_cm_event event; 1271 struct rdma_cm_event event;
1264 int ret; 1272 int ret;
1273 struct ib_device_attr attr;
1265 1274
1266 listen_id = cm_id->context; 1275 listen_id = cm_id->context;
1267 if (cma_disable_remove(listen_id, CMA_LISTEN)) 1276 if (cma_disable_remove(listen_id, CMA_LISTEN))
@@ -1311,10 +1320,19 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1311 sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr; 1320 sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
1312 *sin = iw_event->remote_addr; 1321 *sin = iw_event->remote_addr;
1313 1322
1323 ret = ib_query_device(conn_id->id.device, &attr);
1324 if (ret) {
1325 cma_enable_remove(conn_id);
1326 rdma_destroy_id(new_cm_id);
1327 goto out;
1328 }
1329
1314 memset(&event, 0, sizeof event); 1330 memset(&event, 0, sizeof event);
1315 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 1331 event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1316 event.param.conn.private_data = iw_event->private_data; 1332 event.param.conn.private_data = iw_event->private_data;
1317 event.param.conn.private_data_len = iw_event->private_data_len; 1333 event.param.conn.private_data_len = iw_event->private_data_len;
1334 event.param.conn.initiator_depth = attr.max_qp_init_rd_atom;
1335 event.param.conn.responder_resources = attr.max_qp_rd_atom;
1318 ret = conn_id->id.event_handler(&conn_id->id, &event); 1336 ret = conn_id->id.event_handler(&conn_id->id, &event);
1319 if (ret) { 1337 if (ret) {
1320 /* User wants to destroy the CM ID */ 1338 /* User wants to destroy the CM ID */
@@ -2272,7 +2290,7 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
2272 sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr; 2290 sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
2273 cm_id->remote_addr = *sin; 2291 cm_id->remote_addr = *sin;
2274 2292
2275 ret = cma_modify_qp_rtr(id_priv); 2293 ret = cma_modify_qp_rtr(id_priv, conn_param);
2276 if (ret) 2294 if (ret)
2277 goto out; 2295 goto out;
2278 2296
@@ -2335,25 +2353,15 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
2335 struct rdma_conn_param *conn_param) 2353 struct rdma_conn_param *conn_param)
2336{ 2354{
2337 struct ib_cm_rep_param rep; 2355 struct ib_cm_rep_param rep;
2338 struct ib_qp_attr qp_attr; 2356 int ret;
2339 int qp_attr_mask, ret;
2340
2341 if (id_priv->id.qp) {
2342 ret = cma_modify_qp_rtr(id_priv);
2343 if (ret)
2344 goto out;
2345 2357
2346 qp_attr.qp_state = IB_QPS_RTS; 2358 ret = cma_modify_qp_rtr(id_priv, conn_param);
2347 ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, &qp_attr, 2359 if (ret)
2348 &qp_attr_mask); 2360 goto out;
2349 if (ret)
2350 goto out;
2351 2361
2352 qp_attr.max_rd_atomic = conn_param->initiator_depth; 2362 ret = cma_modify_qp_rts(id_priv, conn_param);
2353 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 2363 if (ret)
2354 if (ret) 2364 goto out;
2355 goto out;
2356 }
2357 2365
2358 memset(&rep, 0, sizeof rep); 2366 memset(&rep, 0, sizeof rep);
2359 rep.qp_num = id_priv->qp_num; 2367 rep.qp_num = id_priv->qp_num;
@@ -2378,7 +2386,7 @@ static int cma_accept_iw(struct rdma_id_private *id_priv,
2378 struct iw_cm_conn_param iw_param; 2386 struct iw_cm_conn_param iw_param;
2379 int ret; 2387 int ret;
2380 2388
2381 ret = cma_modify_qp_rtr(id_priv); 2389 ret = cma_modify_qp_rtr(id_priv, conn_param);
2382 if (ret) 2390 if (ret)
2383 return ret; 2391 return ret;
2384 2392
@@ -2598,11 +2606,9 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
2598 /* IPv6 address is an SA assigned MGID. */ 2606 /* IPv6 address is an SA assigned MGID. */
2599 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 2607 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
2600 } else { 2608 } else {
2601 ip_ib_mc_map(sin->sin_addr.s_addr, mc_map); 2609 ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
2602 if (id_priv->id.ps == RDMA_PS_UDP) 2610 if (id_priv->id.ps == RDMA_PS_UDP)
2603 mc_map[7] = 0x01; /* Use RDMA CM signature */ 2611 mc_map[7] = 0x01; /* Use RDMA CM signature */
2604 mc_map[8] = ib_addr_get_pkey(dev_addr) >> 8;
2605 mc_map[9] = (unsigned char) ib_addr_get_pkey(dev_addr);
2606 *mgid = *(union ib_gid *) (mc_map + 4); 2612 *mgid = *(union ib_gid *) (mc_map + 4);
2607 } 2613 }
2608} 2614}
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index e8d5f6b64998..6c7aa59794d4 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -139,7 +139,7 @@ static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool,
139static void ib_fmr_batch_release(struct ib_fmr_pool *pool) 139static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
140{ 140{
141 int ret; 141 int ret;
142 struct ib_pool_fmr *fmr; 142 struct ib_pool_fmr *fmr, *next;
143 LIST_HEAD(unmap_list); 143 LIST_HEAD(unmap_list);
144 LIST_HEAD(fmr_list); 144 LIST_HEAD(fmr_list);
145 145
@@ -158,6 +158,20 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
158#endif 158#endif
159 } 159 }
160 160
161 /*
162 * The free_list may hold FMRs that have been put there
163 * because they haven't reached the max_remap count.
164 * Invalidate their mapping as well.
165 */
166 list_for_each_entry_safe(fmr, next, &pool->free_list, list) {
167 if (fmr->remap_count == 0)
168 continue;
169 hlist_del_init(&fmr->cache_node);
170 fmr->remap_count = 0;
171 list_add_tail(&fmr->fmr->list, &fmr_list);
172 list_move(&fmr->list, &unmap_list);
173 }
174
161 list_splice(&pool->dirty_list, &unmap_list); 175 list_splice(&pool->dirty_list, &unmap_list);
162 INIT_LIST_HEAD(&pool->dirty_list); 176 INIT_LIST_HEAD(&pool->dirty_list);
163 pool->dirty_len = 0; 177 pool->dirty_len = 0;
@@ -182,8 +196,7 @@ static int ib_fmr_cleanup_thread(void *pool_ptr)
182 struct ib_fmr_pool *pool = pool_ptr; 196 struct ib_fmr_pool *pool = pool_ptr;
183 197
184 do { 198 do {
185 if (pool->dirty_len >= pool->dirty_watermark || 199 if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) {
186 atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) {
187 ib_fmr_batch_release(pool); 200 ib_fmr_batch_release(pool);
188 201
189 atomic_inc(&pool->flush_ser); 202 atomic_inc(&pool->flush_ser);
@@ -194,8 +207,7 @@ static int ib_fmr_cleanup_thread(void *pool_ptr)
194 } 207 }
195 208
196 set_current_state(TASK_INTERRUPTIBLE); 209 set_current_state(TASK_INTERRUPTIBLE);
197 if (pool->dirty_len < pool->dirty_watermark && 210 if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 &&
198 atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 &&
199 !kthread_should_stop()) 211 !kthread_should_stop())
200 schedule(); 212 schedule();
201 __set_current_state(TASK_RUNNING); 213 __set_current_state(TASK_RUNNING);
@@ -369,11 +381,6 @@ void ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
369 381
370 i = 0; 382 i = 0;
371 list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) { 383 list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) {
372 if (fmr->remap_count) {
373 INIT_LIST_HEAD(&fmr_list);
374 list_add_tail(&fmr->fmr->list, &fmr_list);
375 ib_unmap_fmr(&fmr_list);
376 }
377 ib_dealloc_fmr(fmr->fmr); 384 ib_dealloc_fmr(fmr->fmr);
378 list_del(&fmr->list); 385 list_del(&fmr->list);
379 kfree(fmr); 386 kfree(fmr);
@@ -511,8 +518,10 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
511 list_add_tail(&fmr->list, &pool->free_list); 518 list_add_tail(&fmr->list, &pool->free_list);
512 } else { 519 } else {
513 list_add_tail(&fmr->list, &pool->dirty_list); 520 list_add_tail(&fmr->list, &pool->dirty_list);
514 ++pool->dirty_len; 521 if (++pool->dirty_len >= pool->dirty_watermark) {
515 wake_up_process(pool->thread); 522 atomic_inc(&pool->req_ser);
523 wake_up_process(pool->thread);
524 }
516 } 525 }
517 } 526 }
518 527
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 6f4287716ab1..fbe16d5250a4 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -701,7 +701,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
701 } 701 }
702 702
703 /* Check to post send on QP or process locally */ 703 /* Check to post send on QP or process locally */
704 if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD) 704 if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD &&
705 smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD)
705 goto out; 706 goto out;
706 707
707 local = kmalloc(sizeof *local, GFP_ATOMIC); 708 local = kmalloc(sizeof *local, GFP_ATOMIC);
@@ -752,8 +753,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
752 port_priv = ib_get_mad_port(mad_agent_priv->agent.device, 753 port_priv = ib_get_mad_port(mad_agent_priv->agent.device,
753 mad_agent_priv->agent.port_num); 754 mad_agent_priv->agent.port_num);
754 if (port_priv) { 755 if (port_priv) {
755 mad_priv->mad.mad.mad_hdr.tid = 756 memcpy(&mad_priv->mad.mad, smp, sizeof(struct ib_mad));
756 ((struct ib_mad *)smp)->mad_hdr.tid;
757 recv_mad_agent = find_mad_agent(port_priv, 757 recv_mad_agent = find_mad_agent(port_priv,
758 &mad_priv->mad.mad); 758 &mad_priv->mad.mad);
759 } 759 }
@@ -1100,7 +1100,9 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
1100 mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid; 1100 mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid;
1101 /* Timeout will be updated after send completes */ 1101 /* Timeout will be updated after send completes */
1102 mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms); 1102 mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms);
1103 mad_send_wr->retries = send_buf->retries; 1103 mad_send_wr->max_retries = send_buf->retries;
1104 mad_send_wr->retries_left = send_buf->retries;
1105 send_buf->retries = 0;
1104 /* Reference for work request to QP + response */ 1106 /* Reference for work request to QP + response */
1105 mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0); 1107 mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
1106 mad_send_wr->status = IB_WC_SUCCESS; 1108 mad_send_wr->status = IB_WC_SUCCESS;
@@ -1931,15 +1933,6 @@ local:
1931 if (port_priv->device->process_mad) { 1933 if (port_priv->device->process_mad) {
1932 int ret; 1934 int ret;
1933 1935
1934 if (!response) {
1935 printk(KERN_ERR PFX "No memory for response MAD\n");
1936 /*
1937 * Is it better to assume that
1938 * it wouldn't be processed ?
1939 */
1940 goto out;
1941 }
1942
1943 ret = port_priv->device->process_mad(port_priv->device, 0, 1936 ret = port_priv->device->process_mad(port_priv->device, 0,
1944 port_priv->port_num, 1937 port_priv->port_num,
1945 wc, &recv->grh, 1938 wc, &recv->grh,
@@ -2282,8 +2275,6 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
2282 2275
2283 /* Empty wait list to prevent receives from finding a request */ 2276 /* Empty wait list to prevent receives from finding a request */
2284 list_splice_init(&mad_agent_priv->wait_list, &cancel_list); 2277 list_splice_init(&mad_agent_priv->wait_list, &cancel_list);
2285 /* Empty local completion list as well */
2286 list_splice_init(&mad_agent_priv->local_list, &cancel_list);
2287 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 2278 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2288 2279
2289 /* Report all cancelled requests */ 2280 /* Report all cancelled requests */
@@ -2445,9 +2436,12 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
2445{ 2436{
2446 int ret; 2437 int ret;
2447 2438
2448 if (!mad_send_wr->retries--) 2439 if (!mad_send_wr->retries_left)
2449 return -ETIMEDOUT; 2440 return -ETIMEDOUT;
2450 2441
2442 mad_send_wr->retries_left--;
2443 mad_send_wr->send_buf.retries++;
2444
2451 mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); 2445 mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
2452 2446
2453 if (mad_send_wr->mad_agent_priv->agent.rmpp_version) { 2447 if (mad_send_wr->mad_agent_priv->agent.rmpp_version) {
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 9be5cc00a3a9..8b75010016ec 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -131,7 +131,8 @@ struct ib_mad_send_wr_private {
131 struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; 131 struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
132 __be64 tid; 132 __be64 tid;
133 unsigned long timeout; 133 unsigned long timeout;
134 int retries; 134 int max_retries;
135 int retries_left;
135 int retry; 136 int retry;
136 int refcount; 137 int refcount;
137 enum ib_wc_status status; 138 enum ib_wc_status status;
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index d43bc62005b3..a5e2a310f312 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -684,7 +684,7 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
684 684
685 if (seg_num > mad_send_wr->last_ack) { 685 if (seg_num > mad_send_wr->last_ack) {
686 adjust_last_ack(mad_send_wr, seg_num); 686 adjust_last_ack(mad_send_wr, seg_num);
687 mad_send_wr->retries = mad_send_wr->send_buf.retries; 687 mad_send_wr->retries_left = mad_send_wr->max_retries;
688 } 688 }
689 mad_send_wr->newwin = newwin; 689 mad_send_wr->newwin = newwin;
690 if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) { 690 if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) {
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 1bc1fe605282..107f170c57cd 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -73,11 +73,20 @@ struct mcast_device {
73}; 73};
74 74
75enum mcast_state { 75enum mcast_state {
76 MCAST_IDLE,
77 MCAST_JOINING, 76 MCAST_JOINING,
78 MCAST_MEMBER, 77 MCAST_MEMBER,
78 MCAST_ERROR,
79};
80
81enum mcast_group_state {
82 MCAST_IDLE,
79 MCAST_BUSY, 83 MCAST_BUSY,
80 MCAST_ERROR 84 MCAST_GROUP_ERROR,
85 MCAST_PKEY_EVENT
86};
87
88enum {
89 MCAST_INVALID_PKEY_INDEX = 0xFFFF
81}; 90};
82 91
83struct mcast_member; 92struct mcast_member;
@@ -93,9 +102,10 @@ struct mcast_group {
93 struct mcast_member *last_join; 102 struct mcast_member *last_join;
94 int members[3]; 103 int members[3];
95 atomic_t refcount; 104 atomic_t refcount;
96 enum mcast_state state; 105 enum mcast_group_state state;
97 struct ib_sa_query *query; 106 struct ib_sa_query *query;
98 int query_id; 107 int query_id;
108 u16 pkey_index;
99}; 109};
100 110
101struct mcast_member { 111struct mcast_member {
@@ -378,9 +388,19 @@ static int fail_join(struct mcast_group *group, struct mcast_member *member,
378static void process_group_error(struct mcast_group *group) 388static void process_group_error(struct mcast_group *group)
379{ 389{
380 struct mcast_member *member; 390 struct mcast_member *member;
381 int ret; 391 int ret = 0;
392 u16 pkey_index;
393
394 if (group->state == MCAST_PKEY_EVENT)
395 ret = ib_find_pkey(group->port->dev->device,
396 group->port->port_num,
397 be16_to_cpu(group->rec.pkey), &pkey_index);
382 398
383 spin_lock_irq(&group->lock); 399 spin_lock_irq(&group->lock);
400 if (group->state == MCAST_PKEY_EVENT && !ret &&
401 group->pkey_index == pkey_index)
402 goto out;
403
384 while (!list_empty(&group->active_list)) { 404 while (!list_empty(&group->active_list)) {
385 member = list_entry(group->active_list.next, 405 member = list_entry(group->active_list.next,
386 struct mcast_member, list); 406 struct mcast_member, list);
@@ -399,6 +419,7 @@ static void process_group_error(struct mcast_group *group)
399 } 419 }
400 420
401 group->rec.join_state = 0; 421 group->rec.join_state = 0;
422out:
402 group->state = MCAST_BUSY; 423 group->state = MCAST_BUSY;
403 spin_unlock_irq(&group->lock); 424 spin_unlock_irq(&group->lock);
404} 425}
@@ -415,9 +436,9 @@ static void mcast_work_handler(struct work_struct *work)
415retest: 436retest:
416 spin_lock_irq(&group->lock); 437 spin_lock_irq(&group->lock);
417 while (!list_empty(&group->pending_list) || 438 while (!list_empty(&group->pending_list) ||
418 (group->state == MCAST_ERROR)) { 439 (group->state != MCAST_BUSY)) {
419 440
420 if (group->state == MCAST_ERROR) { 441 if (group->state != MCAST_BUSY) {
421 spin_unlock_irq(&group->lock); 442 spin_unlock_irq(&group->lock);
422 process_group_error(group); 443 process_group_error(group);
423 goto retest; 444 goto retest;
@@ -494,12 +515,19 @@ static void join_handler(int status, struct ib_sa_mcmember_rec *rec,
494 void *context) 515 void *context)
495{ 516{
496 struct mcast_group *group = context; 517 struct mcast_group *group = context;
518 u16 pkey_index = MCAST_INVALID_PKEY_INDEX;
497 519
498 if (status) 520 if (status)
499 process_join_error(group, status); 521 process_join_error(group, status);
500 else { 522 else {
523 ib_find_pkey(group->port->dev->device, group->port->port_num,
524 be16_to_cpu(rec->pkey), &pkey_index);
525
501 spin_lock_irq(&group->port->lock); 526 spin_lock_irq(&group->port->lock);
502 group->rec = *rec; 527 group->rec = *rec;
528 if (group->state == MCAST_BUSY &&
529 group->pkey_index == MCAST_INVALID_PKEY_INDEX)
530 group->pkey_index = pkey_index;
503 if (!memcmp(&mgid0, &group->rec.mgid, sizeof mgid0)) { 531 if (!memcmp(&mgid0, &group->rec.mgid, sizeof mgid0)) {
504 rb_erase(&group->node, &group->port->table); 532 rb_erase(&group->node, &group->port->table);
505 mcast_insert(group->port, group, 1); 533 mcast_insert(group->port, group, 1);
@@ -539,6 +567,7 @@ static struct mcast_group *acquire_group(struct mcast_port *port,
539 567
540 group->port = port; 568 group->port = port;
541 group->rec.mgid = *mgid; 569 group->rec.mgid = *mgid;
570 group->pkey_index = MCAST_INVALID_PKEY_INDEX;
542 INIT_LIST_HEAD(&group->pending_list); 571 INIT_LIST_HEAD(&group->pending_list);
543 INIT_LIST_HEAD(&group->active_list); 572 INIT_LIST_HEAD(&group->active_list);
544 INIT_WORK(&group->work, mcast_work_handler); 573 INIT_WORK(&group->work, mcast_work_handler);
@@ -707,7 +736,8 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
707} 736}
708EXPORT_SYMBOL(ib_init_ah_from_mcmember); 737EXPORT_SYMBOL(ib_init_ah_from_mcmember);
709 738
710static void mcast_groups_lost(struct mcast_port *port) 739static void mcast_groups_event(struct mcast_port *port,
740 enum mcast_group_state state)
711{ 741{
712 struct mcast_group *group; 742 struct mcast_group *group;
713 struct rb_node *node; 743 struct rb_node *node;
@@ -721,7 +751,8 @@ static void mcast_groups_lost(struct mcast_port *port)
721 atomic_inc(&group->refcount); 751 atomic_inc(&group->refcount);
722 queue_work(mcast_wq, &group->work); 752 queue_work(mcast_wq, &group->work);
723 } 753 }
724 group->state = MCAST_ERROR; 754 if (group->state != MCAST_GROUP_ERROR)
755 group->state = state;
725 spin_unlock(&group->lock); 756 spin_unlock(&group->lock);
726 } 757 }
727 spin_unlock_irqrestore(&port->lock, flags); 758 spin_unlock_irqrestore(&port->lock, flags);
@@ -731,16 +762,20 @@ static void mcast_event_handler(struct ib_event_handler *handler,
731 struct ib_event *event) 762 struct ib_event *event)
732{ 763{
733 struct mcast_device *dev; 764 struct mcast_device *dev;
765 int index;
734 766
735 dev = container_of(handler, struct mcast_device, event_handler); 767 dev = container_of(handler, struct mcast_device, event_handler);
768 index = event->element.port_num - dev->start_port;
736 769
737 switch (event->event) { 770 switch (event->event) {
738 case IB_EVENT_PORT_ERR: 771 case IB_EVENT_PORT_ERR:
739 case IB_EVENT_LID_CHANGE: 772 case IB_EVENT_LID_CHANGE:
740 case IB_EVENT_SM_CHANGE: 773 case IB_EVENT_SM_CHANGE:
741 case IB_EVENT_CLIENT_REREGISTER: 774 case IB_EVENT_CLIENT_REREGISTER:
742 mcast_groups_lost(&dev->port[event->element.port_num - 775 mcast_groups_event(&dev->port[index], MCAST_GROUP_ERROR);
743 dev->start_port]); 776 break;
777 case IB_EVENT_PKEY_CHANGE:
778 mcast_groups_event(&dev->port[index], MCAST_PKEY_EVENT);
744 break; 779 break;
745 default: 780 default:
746 break; 781 break;
diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h
index 1cfc2984434f..aff96bac49b4 100644
--- a/drivers/infiniband/core/smi.h
+++ b/drivers/infiniband/core/smi.h
@@ -59,7 +59,8 @@ extern enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
59 u8 node_type, int port_num); 59 u8 node_type, int port_num);
60 60
61/* 61/*
62 * Return 1 if the SMP should be handled by the local SMA/SM via process_mad 62 * Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM
63 * via process_mad
63 */ 64 */
64static inline enum smi_action smi_check_local_smp(struct ib_smp *smp, 65static inline enum smi_action smi_check_local_smp(struct ib_smp *smp,
65 struct ib_device *device) 66 struct ib_device *device)
@@ -71,4 +72,19 @@ static inline enum smi_action smi_check_local_smp(struct ib_smp *smp,
71 (smp->hop_ptr == smp->hop_cnt + 1)) ? 72 (smp->hop_ptr == smp->hop_cnt + 1)) ?
72 IB_SMI_HANDLE : IB_SMI_DISCARD); 73 IB_SMI_HANDLE : IB_SMI_DISCARD);
73} 74}
75
76/*
77 * Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM
78 * via process_mad
79 */
80static inline enum smi_action smi_check_local_returning_smp(struct ib_smp *smp,
81 struct ib_device *device)
82{
83 /* C14-13:3 -- We're at the end of the DR segment of path */
84 /* C14-13:4 -- Hop Pointer == 0 -> give to SM */
85 return ((device->process_mad &&
86 ib_get_smp_direction(smp) &&
87 !smp->hop_ptr) ? IB_SMI_HANDLE : IB_SMI_DISCARD);
88}
89
74#endif /* __SMI_H_ */ 90#endif /* __SMI_H_ */
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 424983f5b1ee..4291ab42a5b9 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -106,6 +106,9 @@ enum {
106 IB_UCM_MAX_DEVICES = 32 106 IB_UCM_MAX_DEVICES = 32
107}; 107};
108 108
109/* ib_cm and ib_user_cm modules share /sys/class/infiniband_cm */
110extern struct class cm_class;
111
109#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR) 112#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR)
110 113
111static void ib_ucm_add_one(struct ib_device *device); 114static void ib_ucm_add_one(struct ib_device *device);
@@ -1199,7 +1202,7 @@ static int ib_ucm_close(struct inode *inode, struct file *filp)
1199 return 0; 1202 return 0;
1200} 1203}
1201 1204
1202static void ib_ucm_release_class_dev(struct class_device *class_dev) 1205static void ucm_release_class_dev(struct class_device *class_dev)
1203{ 1206{
1204 struct ib_ucm_device *dev; 1207 struct ib_ucm_device *dev;
1205 1208
@@ -1217,11 +1220,6 @@ static const struct file_operations ucm_fops = {
1217 .poll = ib_ucm_poll, 1220 .poll = ib_ucm_poll,
1218}; 1221};
1219 1222
1220static struct class ucm_class = {
1221 .name = "infiniband_cm",
1222 .release = ib_ucm_release_class_dev
1223};
1224
1225static ssize_t show_ibdev(struct class_device *class_dev, char *buf) 1223static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
1226{ 1224{
1227 struct ib_ucm_device *dev; 1225 struct ib_ucm_device *dev;
@@ -1257,9 +1255,10 @@ static void ib_ucm_add_one(struct ib_device *device)
1257 if (cdev_add(&ucm_dev->dev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1)) 1255 if (cdev_add(&ucm_dev->dev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1))
1258 goto err; 1256 goto err;
1259 1257
1260 ucm_dev->class_dev.class = &ucm_class; 1258 ucm_dev->class_dev.class = &cm_class;
1261 ucm_dev->class_dev.dev = device->dma_device; 1259 ucm_dev->class_dev.dev = device->dma_device;
1262 ucm_dev->class_dev.devt = ucm_dev->dev.dev; 1260 ucm_dev->class_dev.devt = ucm_dev->dev.dev;
1261 ucm_dev->class_dev.release = ucm_release_class_dev;
1263 snprintf(ucm_dev->class_dev.class_id, BUS_ID_SIZE, "ucm%d", 1262 snprintf(ucm_dev->class_dev.class_id, BUS_ID_SIZE, "ucm%d",
1264 ucm_dev->devnum); 1263 ucm_dev->devnum);
1265 if (class_device_register(&ucm_dev->class_dev)) 1264 if (class_device_register(&ucm_dev->class_dev))
@@ -1306,40 +1305,34 @@ static int __init ib_ucm_init(void)
1306 "infiniband_cm"); 1305 "infiniband_cm");
1307 if (ret) { 1306 if (ret) {
1308 printk(KERN_ERR "ucm: couldn't register device number\n"); 1307 printk(KERN_ERR "ucm: couldn't register device number\n");
1309 goto err; 1308 goto error1;
1310 } 1309 }
1311 1310
1312 ret = class_register(&ucm_class); 1311 ret = class_create_file(&cm_class, &class_attr_abi_version);
1313 if (ret) {
1314 printk(KERN_ERR "ucm: couldn't create class infiniband_cm\n");
1315 goto err_chrdev;
1316 }
1317
1318 ret = class_create_file(&ucm_class, &class_attr_abi_version);
1319 if (ret) { 1312 if (ret) {
1320 printk(KERN_ERR "ucm: couldn't create abi_version attribute\n"); 1313 printk(KERN_ERR "ucm: couldn't create abi_version attribute\n");
1321 goto err_class; 1314 goto error2;
1322 } 1315 }
1323 1316
1324 ret = ib_register_client(&ucm_client); 1317 ret = ib_register_client(&ucm_client);
1325 if (ret) { 1318 if (ret) {
1326 printk(KERN_ERR "ucm: couldn't register client\n"); 1319 printk(KERN_ERR "ucm: couldn't register client\n");
1327 goto err_class; 1320 goto error3;
1328 } 1321 }
1329 return 0; 1322 return 0;
1330 1323
1331err_class: 1324error3:
1332 class_unregister(&ucm_class); 1325 class_remove_file(&cm_class, &class_attr_abi_version);
1333err_chrdev: 1326error2:
1334 unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); 1327 unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
1335err: 1328error1:
1336 return ret; 1329 return ret;
1337} 1330}
1338 1331
1339static void __exit ib_ucm_cleanup(void) 1332static void __exit ib_ucm_cleanup(void)
1340{ 1333{
1341 ib_unregister_client(&ucm_client); 1334 ib_unregister_client(&ucm_client);
1342 class_unregister(&ucm_class); 1335 class_remove_file(&cm_class, &class_attr_abi_version);
1343 unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); 1336 unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
1344 idr_destroy(&ctx_id_table); 1337 idr_destroy(&ctx_id_table);
1345} 1338}
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 90d675ad9ec8..15937eb38aae 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -31,6 +31,7 @@
31 */ 31 */
32 32
33#include <linux/completion.h> 33#include <linux/completion.h>
34#include <linux/file.h>
34#include <linux/mutex.h> 35#include <linux/mutex.h>
35#include <linux/poll.h> 36#include <linux/poll.h>
36#include <linux/idr.h> 37#include <linux/idr.h>
@@ -991,6 +992,96 @@ out:
991 return ret; 992 return ret;
992} 993}
993 994
995static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2)
996{
997 /* Acquire mutex's based on pointer comparison to prevent deadlock. */
998 if (file1 < file2) {
999 mutex_lock(&file1->mut);
1000 mutex_lock(&file2->mut);
1001 } else {
1002 mutex_lock(&file2->mut);
1003 mutex_lock(&file1->mut);
1004 }
1005}
1006
1007static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2)
1008{
1009 if (file1 < file2) {
1010 mutex_unlock(&file2->mut);
1011 mutex_unlock(&file1->mut);
1012 } else {
1013 mutex_unlock(&file1->mut);
1014 mutex_unlock(&file2->mut);
1015 }
1016}
1017
1018static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file)
1019{
1020 struct ucma_event *uevent, *tmp;
1021
1022 list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list)
1023 if (uevent->ctx == ctx)
1024 list_move_tail(&uevent->list, &file->event_list);
1025}
1026
1027static ssize_t ucma_migrate_id(struct ucma_file *new_file,
1028 const char __user *inbuf,
1029 int in_len, int out_len)
1030{
1031 struct rdma_ucm_migrate_id cmd;
1032 struct rdma_ucm_migrate_resp resp;
1033 struct ucma_context *ctx;
1034 struct file *filp;
1035 struct ucma_file *cur_file;
1036 int ret = 0;
1037
1038 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1039 return -EFAULT;
1040
1041 /* Get current fd to protect against it being closed */
1042 filp = fget(cmd.fd);
1043 if (!filp)
1044 return -ENOENT;
1045
1046 /* Validate current fd and prevent destruction of id. */
1047 ctx = ucma_get_ctx(filp->private_data, cmd.id);
1048 if (IS_ERR(ctx)) {
1049 ret = PTR_ERR(ctx);
1050 goto file_put;
1051 }
1052
1053 cur_file = ctx->file;
1054 if (cur_file == new_file) {
1055 resp.events_reported = ctx->events_reported;
1056 goto response;
1057 }
1058
1059 /*
1060 * Migrate events between fd's, maintaining order, and avoiding new
1061 * events being added before existing events.
1062 */
1063 ucma_lock_files(cur_file, new_file);
1064 mutex_lock(&mut);
1065
1066 list_move_tail(&ctx->list, &new_file->ctx_list);
1067 ucma_move_events(ctx, new_file);
1068 ctx->file = new_file;
1069 resp.events_reported = ctx->events_reported;
1070
1071 mutex_unlock(&mut);
1072 ucma_unlock_files(cur_file, new_file);
1073
1074response:
1075 if (copy_to_user((void __user *)(unsigned long)cmd.response,
1076 &resp, sizeof(resp)))
1077 ret = -EFAULT;
1078
1079 ucma_put_ctx(ctx);
1080file_put:
1081 fput(filp);
1082 return ret;
1083}
1084
994static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, 1085static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
995 const char __user *inbuf, 1086 const char __user *inbuf,
996 int in_len, int out_len) = { 1087 int in_len, int out_len) = {
@@ -1012,6 +1103,7 @@ static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
1012 [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, 1103 [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify,
1013 [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast, 1104 [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast,
1014 [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, 1105 [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast,
1106 [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id
1015}; 1107};
1016 1108
1017static ssize_t ucma_write(struct file *filp, const char __user *buf, 1109static ssize_t ucma_write(struct file *filp, const char __user *buf,
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index b53eac4611de..4e915104ac4c 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -2,6 +2,7 @@
2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Voltaire, Inc. All rights reserved. 3 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
4 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 4 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
5 * Copyright (c) 2008 Cisco. All rights reserved.
5 * 6 *
6 * This software is available to you under a choice of one of two 7 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU 8 * licenses. You may choose to be licensed under the terms of the GNU
@@ -42,7 +43,7 @@
42#include <linux/cdev.h> 43#include <linux/cdev.h>
43#include <linux/dma-mapping.h> 44#include <linux/dma-mapping.h>
44#include <linux/poll.h> 45#include <linux/poll.h>
45#include <linux/rwsem.h> 46#include <linux/mutex.h>
46#include <linux/kref.h> 47#include <linux/kref.h>
47#include <linux/compat.h> 48#include <linux/compat.h>
48 49
@@ -94,7 +95,7 @@ struct ib_umad_port {
94 struct class_device *sm_class_dev; 95 struct class_device *sm_class_dev;
95 struct semaphore sm_sem; 96 struct semaphore sm_sem;
96 97
97 struct rw_semaphore mutex; 98 struct mutex file_mutex;
98 struct list_head file_list; 99 struct list_head file_list;
99 100
100 struct ib_device *ib_dev; 101 struct ib_device *ib_dev;
@@ -110,11 +111,11 @@ struct ib_umad_device {
110}; 111};
111 112
112struct ib_umad_file { 113struct ib_umad_file {
114 struct mutex mutex;
113 struct ib_umad_port *port; 115 struct ib_umad_port *port;
114 struct list_head recv_list; 116 struct list_head recv_list;
115 struct list_head send_list; 117 struct list_head send_list;
116 struct list_head port_list; 118 struct list_head port_list;
117 spinlock_t recv_lock;
118 spinlock_t send_lock; 119 spinlock_t send_lock;
119 wait_queue_head_t recv_wait; 120 wait_queue_head_t recv_wait;
120 struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS]; 121 struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS];
@@ -156,7 +157,7 @@ static int hdr_size(struct ib_umad_file *file)
156 sizeof (struct ib_user_mad_hdr_old); 157 sizeof (struct ib_user_mad_hdr_old);
157} 158}
158 159
159/* caller must hold port->mutex at least for reading */ 160/* caller must hold file->mutex */
160static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id) 161static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id)
161{ 162{
162 return file->agents_dead ? NULL : file->agent[id]; 163 return file->agents_dead ? NULL : file->agent[id];
@@ -168,32 +169,30 @@ static int queue_packet(struct ib_umad_file *file,
168{ 169{
169 int ret = 1; 170 int ret = 1;
170 171
171 down_read(&file->port->mutex); 172 mutex_lock(&file->mutex);
172 173
173 for (packet->mad.hdr.id = 0; 174 for (packet->mad.hdr.id = 0;
174 packet->mad.hdr.id < IB_UMAD_MAX_AGENTS; 175 packet->mad.hdr.id < IB_UMAD_MAX_AGENTS;
175 packet->mad.hdr.id++) 176 packet->mad.hdr.id++)
176 if (agent == __get_agent(file, packet->mad.hdr.id)) { 177 if (agent == __get_agent(file, packet->mad.hdr.id)) {
177 spin_lock_irq(&file->recv_lock);
178 list_add_tail(&packet->list, &file->recv_list); 178 list_add_tail(&packet->list, &file->recv_list);
179 spin_unlock_irq(&file->recv_lock);
180 wake_up_interruptible(&file->recv_wait); 179 wake_up_interruptible(&file->recv_wait);
181 ret = 0; 180 ret = 0;
182 break; 181 break;
183 } 182 }
184 183
185 up_read(&file->port->mutex); 184 mutex_unlock(&file->mutex);
186 185
187 return ret; 186 return ret;
188} 187}
189 188
190static void dequeue_send(struct ib_umad_file *file, 189static void dequeue_send(struct ib_umad_file *file,
191 struct ib_umad_packet *packet) 190 struct ib_umad_packet *packet)
192 { 191{
193 spin_lock_irq(&file->send_lock); 192 spin_lock_irq(&file->send_lock);
194 list_del(&packet->list); 193 list_del(&packet->list);
195 spin_unlock_irq(&file->send_lock); 194 spin_unlock_irq(&file->send_lock);
196 } 195}
197 196
198static void send_handler(struct ib_mad_agent *agent, 197static void send_handler(struct ib_mad_agent *agent,
199 struct ib_mad_send_wc *send_wc) 198 struct ib_mad_send_wc *send_wc)
@@ -341,10 +340,10 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
341 if (count < hdr_size(file)) 340 if (count < hdr_size(file))
342 return -EINVAL; 341 return -EINVAL;
343 342
344 spin_lock_irq(&file->recv_lock); 343 mutex_lock(&file->mutex);
345 344
346 while (list_empty(&file->recv_list)) { 345 while (list_empty(&file->recv_list)) {
347 spin_unlock_irq(&file->recv_lock); 346 mutex_unlock(&file->mutex);
348 347
349 if (filp->f_flags & O_NONBLOCK) 348 if (filp->f_flags & O_NONBLOCK)
350 return -EAGAIN; 349 return -EAGAIN;
@@ -353,13 +352,13 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
353 !list_empty(&file->recv_list))) 352 !list_empty(&file->recv_list)))
354 return -ERESTARTSYS; 353 return -ERESTARTSYS;
355 354
356 spin_lock_irq(&file->recv_lock); 355 mutex_lock(&file->mutex);
357 } 356 }
358 357
359 packet = list_entry(file->recv_list.next, struct ib_umad_packet, list); 358 packet = list_entry(file->recv_list.next, struct ib_umad_packet, list);
360 list_del(&packet->list); 359 list_del(&packet->list);
361 360
362 spin_unlock_irq(&file->recv_lock); 361 mutex_unlock(&file->mutex);
363 362
364 if (packet->recv_wc) 363 if (packet->recv_wc)
365 ret = copy_recv_mad(file, buf, packet, count); 364 ret = copy_recv_mad(file, buf, packet, count);
@@ -368,9 +367,9 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
368 367
369 if (ret < 0) { 368 if (ret < 0) {
370 /* Requeue packet */ 369 /* Requeue packet */
371 spin_lock_irq(&file->recv_lock); 370 mutex_lock(&file->mutex);
372 list_add(&packet->list, &file->recv_list); 371 list_add(&packet->list, &file->recv_list);
373 spin_unlock_irq(&file->recv_lock); 372 mutex_unlock(&file->mutex);
374 } else { 373 } else {
375 if (packet->recv_wc) 374 if (packet->recv_wc)
376 ib_free_recv_mad(packet->recv_wc); 375 ib_free_recv_mad(packet->recv_wc);
@@ -481,7 +480,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
481 goto err; 480 goto err;
482 } 481 }
483 482
484 down_read(&file->port->mutex); 483 mutex_lock(&file->mutex);
485 484
486 agent = __get_agent(file, packet->mad.hdr.id); 485 agent = __get_agent(file, packet->mad.hdr.id);
487 if (!agent) { 486 if (!agent) {
@@ -577,7 +576,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
577 if (ret) 576 if (ret)
578 goto err_send; 577 goto err_send;
579 578
580 up_read(&file->port->mutex); 579 mutex_unlock(&file->mutex);
581 return count; 580 return count;
582 581
583err_send: 582err_send:
@@ -587,7 +586,7 @@ err_msg:
587err_ah: 586err_ah:
588 ib_destroy_ah(ah); 587 ib_destroy_ah(ah);
589err_up: 588err_up:
590 up_read(&file->port->mutex); 589 mutex_unlock(&file->mutex);
591err: 590err:
592 kfree(packet); 591 kfree(packet);
593 return ret; 592 return ret;
@@ -613,11 +612,12 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
613{ 612{
614 struct ib_user_mad_reg_req ureq; 613 struct ib_user_mad_reg_req ureq;
615 struct ib_mad_reg_req req; 614 struct ib_mad_reg_req req;
616 struct ib_mad_agent *agent; 615 struct ib_mad_agent *agent = NULL;
617 int agent_id; 616 int agent_id;
618 int ret; 617 int ret;
619 618
620 down_write(&file->port->mutex); 619 mutex_lock(&file->port->file_mutex);
620 mutex_lock(&file->mutex);
621 621
622 if (!file->port->ib_dev) { 622 if (!file->port->ib_dev) {
623 ret = -EPIPE; 623 ret = -EPIPE;
@@ -666,13 +666,13 @@ found:
666 send_handler, recv_handler, file); 666 send_handler, recv_handler, file);
667 if (IS_ERR(agent)) { 667 if (IS_ERR(agent)) {
668 ret = PTR_ERR(agent); 668 ret = PTR_ERR(agent);
669 agent = NULL;
669 goto out; 670 goto out;
670 } 671 }
671 672
672 if (put_user(agent_id, 673 if (put_user(agent_id,
673 (u32 __user *) (arg + offsetof(struct ib_user_mad_reg_req, id)))) { 674 (u32 __user *) (arg + offsetof(struct ib_user_mad_reg_req, id)))) {
674 ret = -EFAULT; 675 ret = -EFAULT;
675 ib_unregister_mad_agent(agent);
676 goto out; 676 goto out;
677 } 677 }
678 678
@@ -690,7 +690,13 @@ found:
690 ret = 0; 690 ret = 0;
691 691
692out: 692out:
693 up_write(&file->port->mutex); 693 mutex_unlock(&file->mutex);
694
695 if (ret && agent)
696 ib_unregister_mad_agent(agent);
697
698 mutex_unlock(&file->port->file_mutex);
699
694 return ret; 700 return ret;
695} 701}
696 702
@@ -703,7 +709,8 @@ static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg)
703 if (get_user(id, arg)) 709 if (get_user(id, arg))
704 return -EFAULT; 710 return -EFAULT;
705 711
706 down_write(&file->port->mutex); 712 mutex_lock(&file->port->file_mutex);
713 mutex_lock(&file->mutex);
707 714
708 if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) { 715 if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) {
709 ret = -EINVAL; 716 ret = -EINVAL;
@@ -714,11 +721,13 @@ static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg)
714 file->agent[id] = NULL; 721 file->agent[id] = NULL;
715 722
716out: 723out:
717 up_write(&file->port->mutex); 724 mutex_unlock(&file->mutex);
718 725
719 if (agent) 726 if (agent)
720 ib_unregister_mad_agent(agent); 727 ib_unregister_mad_agent(agent);
721 728
729 mutex_unlock(&file->port->file_mutex);
730
722 return ret; 731 return ret;
723} 732}
724 733
@@ -726,12 +735,12 @@ static long ib_umad_enable_pkey(struct ib_umad_file *file)
726{ 735{
727 int ret = 0; 736 int ret = 0;
728 737
729 down_write(&file->port->mutex); 738 mutex_lock(&file->mutex);
730 if (file->already_used) 739 if (file->already_used)
731 ret = -EINVAL; 740 ret = -EINVAL;
732 else 741 else
733 file->use_pkey_index = 1; 742 file->use_pkey_index = 1;
734 up_write(&file->port->mutex); 743 mutex_unlock(&file->mutex);
735 744
736 return ret; 745 return ret;
737} 746}
@@ -783,7 +792,7 @@ static int ib_umad_open(struct inode *inode, struct file *filp)
783 if (!port) 792 if (!port)
784 return -ENXIO; 793 return -ENXIO;
785 794
786 down_write(&port->mutex); 795 mutex_lock(&port->file_mutex);
787 796
788 if (!port->ib_dev) { 797 if (!port->ib_dev) {
789 ret = -ENXIO; 798 ret = -ENXIO;
@@ -797,7 +806,7 @@ static int ib_umad_open(struct inode *inode, struct file *filp)
797 goto out; 806 goto out;
798 } 807 }
799 808
800 spin_lock_init(&file->recv_lock); 809 mutex_init(&file->mutex);
801 spin_lock_init(&file->send_lock); 810 spin_lock_init(&file->send_lock);
802 INIT_LIST_HEAD(&file->recv_list); 811 INIT_LIST_HEAD(&file->recv_list);
803 INIT_LIST_HEAD(&file->send_list); 812 INIT_LIST_HEAD(&file->send_list);
@@ -809,7 +818,7 @@ static int ib_umad_open(struct inode *inode, struct file *filp)
809 list_add_tail(&file->port_list, &port->file_list); 818 list_add_tail(&file->port_list, &port->file_list);
810 819
811out: 820out:
812 up_write(&port->mutex); 821 mutex_unlock(&port->file_mutex);
813 return ret; 822 return ret;
814} 823}
815 824
@@ -821,7 +830,8 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
821 int already_dead; 830 int already_dead;
822 int i; 831 int i;
823 832
824 down_write(&file->port->mutex); 833 mutex_lock(&file->port->file_mutex);
834 mutex_lock(&file->mutex);
825 835
826 already_dead = file->agents_dead; 836 already_dead = file->agents_dead;
827 file->agents_dead = 1; 837 file->agents_dead = 1;
@@ -834,14 +844,14 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
834 844
835 list_del(&file->port_list); 845 list_del(&file->port_list);
836 846
837 downgrade_write(&file->port->mutex); 847 mutex_unlock(&file->mutex);
838 848
839 if (!already_dead) 849 if (!already_dead)
840 for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i) 850 for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i)
841 if (file->agent[i]) 851 if (file->agent[i])
842 ib_unregister_mad_agent(file->agent[i]); 852 ib_unregister_mad_agent(file->agent[i]);
843 853
844 up_read(&file->port->mutex); 854 mutex_unlock(&file->port->file_mutex);
845 855
846 kfree(file); 856 kfree(file);
847 kref_put(&dev->ref, ib_umad_release_dev); 857 kref_put(&dev->ref, ib_umad_release_dev);
@@ -914,10 +924,10 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp)
914 }; 924 };
915 int ret = 0; 925 int ret = 0;
916 926
917 down_write(&port->mutex); 927 mutex_lock(&port->file_mutex);
918 if (port->ib_dev) 928 if (port->ib_dev)
919 ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); 929 ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props);
920 up_write(&port->mutex); 930 mutex_unlock(&port->file_mutex);
921 931
922 up(&port->sm_sem); 932 up(&port->sm_sem);
923 933
@@ -981,7 +991,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
981 port->ib_dev = device; 991 port->ib_dev = device;
982 port->port_num = port_num; 992 port->port_num = port_num;
983 init_MUTEX(&port->sm_sem); 993 init_MUTEX(&port->sm_sem);
984 init_rwsem(&port->mutex); 994 mutex_init(&port->file_mutex);
985 INIT_LIST_HEAD(&port->file_list); 995 INIT_LIST_HEAD(&port->file_list);
986 996
987 port->dev = cdev_alloc(); 997 port->dev = cdev_alloc();
@@ -1052,6 +1062,7 @@ err_cdev:
1052static void ib_umad_kill_port(struct ib_umad_port *port) 1062static void ib_umad_kill_port(struct ib_umad_port *port)
1053{ 1063{
1054 struct ib_umad_file *file; 1064 struct ib_umad_file *file;
1065 int already_dead;
1055 int id; 1066 int id;
1056 1067
1057 class_set_devdata(port->class_dev, NULL); 1068 class_set_devdata(port->class_dev, NULL);
@@ -1067,42 +1078,22 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
1067 umad_port[port->dev_num] = NULL; 1078 umad_port[port->dev_num] = NULL;
1068 spin_unlock(&port_lock); 1079 spin_unlock(&port_lock);
1069 1080
1070 down_write(&port->mutex); 1081 mutex_lock(&port->file_mutex);
1071 1082
1072 port->ib_dev = NULL; 1083 port->ib_dev = NULL;
1073 1084
1074 /* 1085 list_for_each_entry(file, &port->file_list, port_list) {
1075 * Now go through the list of files attached to this port and 1086 mutex_lock(&file->mutex);
1076 * unregister all of their MAD agents. We need to hold 1087 already_dead = file->agents_dead;
1077 * port->mutex while doing this to avoid racing with
1078 * ib_umad_close(), but we can't hold the mutex for writing
1079 * while calling ib_unregister_mad_agent(), since that might
1080 * deadlock by calling back into queue_packet(). So we
1081 * downgrade our lock to a read lock, and then drop and
1082 * reacquire the write lock for the next iteration.
1083 *
1084 * We do list_del_init() on the file's list_head so that the
1085 * list_del in ib_umad_close() is still OK, even after the
1086 * file is removed from the list.
1087 */
1088 while (!list_empty(&port->file_list)) {
1089 file = list_entry(port->file_list.next, struct ib_umad_file,
1090 port_list);
1091
1092 file->agents_dead = 1; 1088 file->agents_dead = 1;
1093 list_del_init(&file->port_list); 1089 mutex_unlock(&file->mutex);
1094
1095 downgrade_write(&port->mutex);
1096 1090
1097 for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id) 1091 for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id)
1098 if (file->agent[id]) 1092 if (file->agent[id])
1099 ib_unregister_mad_agent(file->agent[id]); 1093 ib_unregister_mad_agent(file->agent[id]);
1100
1101 up_read(&port->mutex);
1102 down_write(&port->mutex);
1103 } 1094 }
1104 1095
1105 up_write(&port->mutex); 1096 mutex_unlock(&port->file_mutex);
1106 1097
1107 clear_bit(port->dev_num, dev_map); 1098 clear_bit(port->dev_num, dev_map);
1108} 1099}
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index eec6a30840ca..03c5ff62889a 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -179,7 +179,7 @@ int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
179 setup.size = 1UL << cq->size_log2; 179 setup.size = 1UL << cq->size_log2;
180 setup.credits = 65535; 180 setup.credits = 65535;
181 setup.credit_thres = 1; 181 setup.credit_thres = 1;
182 if (rdev_p->t3cdev_p->type == T3B) 182 if (rdev_p->t3cdev_p->type != T3A)
183 setup.ovfl_mode = 0; 183 setup.ovfl_mode = 0;
184 else 184 else
185 setup.ovfl_mode = 1; 185 setup.ovfl_mode = 1;
@@ -584,7 +584,7 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
584{ 584{
585 u32 i, nr_wqe, copy_len; 585 u32 i, nr_wqe, copy_len;
586 u8 *copy_data; 586 u8 *copy_data;
587 u8 wr_len, utx_len; /* lenght in 8 byte flit */ 587 u8 wr_len, utx_len; /* length in 8 byte flit */
588 enum t3_wr_flags flag; 588 enum t3_wr_flags flag;
589 __be64 *wqe; 589 __be64 *wqe;
590 u64 utx_cmd; 590 u64 utx_cmd;
diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
index c84d4ac49355..969d4d928455 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_wr.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h
@@ -315,7 +315,7 @@ struct t3_rdma_init_wr {
315 __be32 ird; 315 __be32 ird;
316 __be64 qp_dma_addr; /* 7 */ 316 __be64 qp_dma_addr; /* 7 */
317 __be32 qp_dma_size; /* 8 */ 317 __be32 qp_dma_size; /* 8 */
318 u32 irs; 318 __be32 irs;
319}; 319};
320 320
321struct t3_genbit { 321struct t3_genbit {
@@ -324,7 +324,8 @@ struct t3_genbit {
324}; 324};
325 325
326enum rdma_init_wr_flags { 326enum rdma_init_wr_flags {
327 RECVS_POSTED = 1, 327 RECVS_POSTED = (1<<0),
328 PRIV_QP = (1<<1),
328}; 329};
329 330
330union t3_wr { 331union t3_wr {
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 20ba372dd182..f8cb0fe748c3 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -1118,7 +1118,7 @@ static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1118 status2errno(rpl->status)); 1118 status2errno(rpl->status));
1119 connect_reply_upcall(ep, status2errno(rpl->status)); 1119 connect_reply_upcall(ep, status2errno(rpl->status));
1120 state_set(&ep->com, DEAD); 1120 state_set(&ep->com, DEAD);
1121 if (ep->com.tdev->type == T3B && act_open_has_tid(rpl->status)) 1121 if (ep->com.tdev->type != T3A && act_open_has_tid(rpl->status))
1122 release_tid(ep->com.tdev, GET_TID(rpl), NULL); 1122 release_tid(ep->com.tdev, GET_TID(rpl), NULL);
1123 cxgb3_free_atid(ep->com.tdev, ep->atid); 1123 cxgb3_free_atid(ep->com.tdev, ep->atid);
1124 dst_release(ep->dst); 1124 dst_release(ep->dst);
@@ -1249,7 +1249,7 @@ static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip,
1249 skb_trim(skb, sizeof(struct cpl_tid_release)); 1249 skb_trim(skb, sizeof(struct cpl_tid_release));
1250 skb_get(skb); 1250 skb_get(skb);
1251 1251
1252 if (tdev->type == T3B) 1252 if (tdev->type != T3A)
1253 release_tid(tdev, hwtid, skb); 1253 release_tid(tdev, hwtid, skb);
1254 else { 1254 else {
1255 struct cpl_pass_accept_rpl *rpl; 1255 struct cpl_pass_accept_rpl *rpl;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c
index a6c2c4ba29e6..73bfd1656f86 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_mem.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c
@@ -122,6 +122,13 @@ int build_phys_page_list(struct ib_phys_buf *buffer_list,
122 *total_size += buffer_list[i].size; 122 *total_size += buffer_list[i].size;
123 if (i > 0) 123 if (i > 0)
124 mask |= buffer_list[i].addr; 124 mask |= buffer_list[i].addr;
125 else
126 mask |= buffer_list[i].addr & PAGE_MASK;
127 if (i != num_phys_buf - 1)
128 mask |= buffer_list[i].addr + buffer_list[i].size;
129 else
130 mask |= (buffer_list[i].addr + buffer_list[i].size +
131 PAGE_SIZE - 1) & PAGE_MASK;
125 } 132 }
126 133
127 if (*total_size > 0xFFFFFFFFULL) 134 if (*total_size > 0xFFFFFFFFULL)
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index b5436ca92e68..df1838f8f94d 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -39,6 +39,7 @@
39#include <linux/list.h> 39#include <linux/list.h>
40#include <linux/spinlock.h> 40#include <linux/spinlock.h>
41#include <linux/ethtool.h> 41#include <linux/ethtool.h>
42#include <linux/rtnetlink.h>
42 43
43#include <asm/io.h> 44#include <asm/io.h>
44#include <asm/irq.h> 45#include <asm/irq.h>
@@ -645,7 +646,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
645 if (err) 646 if (err)
646 goto err; 647 goto err;
647 648
648 if (udata && t3b_device(rhp)) { 649 if (udata && !t3a_device(rhp)) {
649 uresp.pbl_addr = (mhp->attr.pbl_addr - 650 uresp.pbl_addr = (mhp->attr.pbl_addr -
650 rhp->rdev.rnic_info.pbl_base) >> 3; 651 rhp->rdev.rnic_info.pbl_base) >> 3;
651 PDBG("%s user resp pbl_addr 0x%x\n", __FUNCTION__, 652 PDBG("%s user resp pbl_addr 0x%x\n", __FUNCTION__,
@@ -1053,7 +1054,9 @@ static ssize_t show_fw_ver(struct class_device *cdev, char *buf)
1053 struct net_device *lldev = dev->rdev.t3cdev_p->lldev; 1054 struct net_device *lldev = dev->rdev.t3cdev_p->lldev;
1054 1055
1055 PDBG("%s class dev 0x%p\n", __FUNCTION__, cdev); 1056 PDBG("%s class dev 0x%p\n", __FUNCTION__, cdev);
1057 rtnl_lock();
1056 lldev->ethtool_ops->get_drvinfo(lldev, &info); 1058 lldev->ethtool_ops->get_drvinfo(lldev, &info);
1059 rtnl_unlock();
1057 return sprintf(buf, "%s\n", info.fw_version); 1060 return sprintf(buf, "%s\n", info.fw_version);
1058} 1061}
1059 1062
@@ -1065,7 +1068,9 @@ static ssize_t show_hca(struct class_device *cdev, char *buf)
1065 struct net_device *lldev = dev->rdev.t3cdev_p->lldev; 1068 struct net_device *lldev = dev->rdev.t3cdev_p->lldev;
1066 1069
1067 PDBG("%s class dev 0x%p\n", __FUNCTION__, cdev); 1070 PDBG("%s class dev 0x%p\n", __FUNCTION__, cdev);
1071 rtnl_lock();
1068 lldev->ethtool_ops->get_drvinfo(lldev, &info); 1072 lldev->ethtool_ops->get_drvinfo(lldev, &info);
1073 rtnl_unlock();
1069 return sprintf(buf, "%s\n", info.driver); 1074 return sprintf(buf, "%s\n", info.driver);
1070} 1075}
1071 1076
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index dd89b6b91f9c..ea2cdd73dd85 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -208,36 +208,19 @@ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
208static int iwch_build_rdma_recv(struct iwch_dev *rhp, union t3_wr *wqe, 208static int iwch_build_rdma_recv(struct iwch_dev *rhp, union t3_wr *wqe,
209 struct ib_recv_wr *wr) 209 struct ib_recv_wr *wr)
210{ 210{
211 int i, err = 0; 211 int i;
212 u32 pbl_addr[4];
213 u8 page_size[4];
214 if (wr->num_sge > T3_MAX_SGE) 212 if (wr->num_sge > T3_MAX_SGE)
215 return -EINVAL; 213 return -EINVAL;
216 err = iwch_sgl2pbl_map(rhp, wr->sg_list, wr->num_sge, pbl_addr,
217 page_size);
218 if (err)
219 return err;
220 wqe->recv.pagesz[0] = page_size[0];
221 wqe->recv.pagesz[1] = page_size[1];
222 wqe->recv.pagesz[2] = page_size[2];
223 wqe->recv.pagesz[3] = page_size[3];
224 wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); 214 wqe->recv.num_sgle = cpu_to_be32(wr->num_sge);
225 for (i = 0; i < wr->num_sge; i++) { 215 for (i = 0; i < wr->num_sge; i++) {
226 wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey); 216 wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey);
227 wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); 217 wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
228 218 wqe->recv.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr);
229 /* to in the WQE == the offset into the page */
230 wqe->recv.sgl[i].to = cpu_to_be64(((u32) wr->sg_list[i].addr) %
231 (1UL << (12 + page_size[i])));
232
233 /* pbl_addr is the adapters address in the PBL */
234 wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]);
235 } 219 }
236 for (; i < T3_MAX_SGE; i++) { 220 for (; i < T3_MAX_SGE; i++) {
237 wqe->recv.sgl[i].stag = 0; 221 wqe->recv.sgl[i].stag = 0;
238 wqe->recv.sgl[i].len = 0; 222 wqe->recv.sgl[i].len = 0;
239 wqe->recv.sgl[i].to = 0; 223 wqe->recv.sgl[i].to = 0;
240 wqe->recv.pbl_addr[i] = 0;
241 } 224 }
242 return 0; 225 return 0;
243} 226}
@@ -659,6 +642,7 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag)
659 cxio_flush_rq(&qhp->wq, &rchp->cq, count); 642 cxio_flush_rq(&qhp->wq, &rchp->cq, count);
660 spin_unlock(&qhp->lock); 643 spin_unlock(&qhp->lock);
661 spin_unlock_irqrestore(&rchp->lock, *flag); 644 spin_unlock_irqrestore(&rchp->lock, *flag);
645 (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
662 646
663 /* locking heirarchy: cq lock first, then qp lock. */ 647 /* locking heirarchy: cq lock first, then qp lock. */
664 spin_lock_irqsave(&schp->lock, *flag); 648 spin_lock_irqsave(&schp->lock, *flag);
@@ -668,6 +652,7 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag)
668 cxio_flush_sq(&qhp->wq, &schp->cq, count); 652 cxio_flush_sq(&qhp->wq, &schp->cq, count);
669 spin_unlock(&qhp->lock); 653 spin_unlock(&qhp->lock);
670 spin_unlock_irqrestore(&schp->lock, *flag); 654 spin_unlock_irqrestore(&schp->lock, *flag);
655 (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
671 656
672 /* deref */ 657 /* deref */
673 if (atomic_dec_and_test(&qhp->refcnt)) 658 if (atomic_dec_and_test(&qhp->refcnt))
@@ -678,7 +663,7 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag)
678 663
679static void flush_qp(struct iwch_qp *qhp, unsigned long *flag) 664static void flush_qp(struct iwch_qp *qhp, unsigned long *flag)
680{ 665{
681 if (t3b_device(qhp->rhp)) 666 if (qhp->ibqp.uobject)
682 cxio_set_wq_in_error(&qhp->wq); 667 cxio_set_wq_in_error(&qhp->wq);
683 else 668 else
684 __flush_qp(qhp, flag); 669 __flush_qp(qhp, flag);
@@ -732,6 +717,7 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
732 init_attr.qp_dma_addr = qhp->wq.dma_addr; 717 init_attr.qp_dma_addr = qhp->wq.dma_addr;
733 init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); 718 init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
734 init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0; 719 init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0;
720 init_attr.flags |= capable(CAP_NET_BIND_SERVICE) ? PRIV_QP : 0;
735 init_attr.irs = qhp->ep->rcv_seq; 721 init_attr.irs = qhp->ep->rcv_seq;
736 PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d " 722 PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
737 "flags 0x%x qpcaps 0x%x\n", __FUNCTION__, 723 "flags 0x%x qpcaps 0x%x\n", __FUNCTION__,
@@ -847,10 +833,11 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
847 disconnect = 1; 833 disconnect = 1;
848 ep = qhp->ep; 834 ep = qhp->ep;
849 } 835 }
836 flush_qp(qhp, &flag);
850 break; 837 break;
851 case IWCH_QP_STATE_TERMINATE: 838 case IWCH_QP_STATE_TERMINATE:
852 qhp->attr.state = IWCH_QP_STATE_TERMINATE; 839 qhp->attr.state = IWCH_QP_STATE_TERMINATE;
853 if (t3b_device(qhp->rhp)) 840 if (qhp->ibqp.uobject)
854 cxio_set_wq_in_error(&qhp->wq); 841 cxio_set_wq_in_error(&qhp->wq);
855 if (!internal) 842 if (!internal)
856 terminate = 1; 843 terminate = 1;
diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c
index f7782c882ab4..194c1c30cf63 100644
--- a/drivers/infiniband/hw/ehca/ehca_av.c
+++ b/drivers/infiniband/hw/ehca/ehca_av.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER 2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 * 3 *
4 * adress vector functions 4 * address vector functions
5 * 5 *
6 * Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com> 6 * Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
7 * Khadija Souissi <souissik@de.ibm.com> 7 * Khadija Souissi <souissik@de.ibm.com>
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 74d2b72a11d8..f281d16040f5 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -94,7 +94,11 @@ struct ehca_sma_attr {
94 94
95struct ehca_sport { 95struct ehca_sport {
96 struct ib_cq *ibcq_aqp1; 96 struct ib_cq *ibcq_aqp1;
97 struct ib_qp *ibqp_aqp1; 97 struct ib_qp *ibqp_sqp[2];
98 /* lock to serialze modify_qp() calls for sqp in normal
99 * and irq path (when event PORT_ACTIVE is received first time)
100 */
101 spinlock_t mod_sqp_lock;
98 enum ib_port_state port_state; 102 enum ib_port_state port_state;
99 struct ehca_sma_attr saved_attr; 103 struct ehca_sma_attr saved_attr;
100}; 104};
@@ -141,6 +145,14 @@ enum ehca_ext_qp_type {
141 EQPT_SRQ = 3, 145 EQPT_SRQ = 3,
142}; 146};
143 147
148/* struct to cache modify_qp()'s parms for GSI/SMI qp */
149struct ehca_mod_qp_parm {
150 int mask;
151 struct ib_qp_attr attr;
152};
153
154#define EHCA_MOD_QP_PARM_MAX 4
155
144struct ehca_qp { 156struct ehca_qp {
145 union { 157 union {
146 struct ib_qp ib_qp; 158 struct ib_qp ib_qp;
@@ -164,10 +176,18 @@ struct ehca_qp {
164 struct ehca_cq *recv_cq; 176 struct ehca_cq *recv_cq;
165 unsigned int sqerr_purgeflag; 177 unsigned int sqerr_purgeflag;
166 struct hlist_node list_entries; 178 struct hlist_node list_entries;
179 /* array to cache modify_qp()'s parms for GSI/SMI qp */
180 struct ehca_mod_qp_parm *mod_qp_parm;
181 int mod_qp_parm_idx;
167 /* mmap counter for resources mapped into user space */ 182 /* mmap counter for resources mapped into user space */
168 u32 mm_count_squeue; 183 u32 mm_count_squeue;
169 u32 mm_count_rqueue; 184 u32 mm_count_rqueue;
170 u32 mm_count_galpa; 185 u32 mm_count_galpa;
186 /* unsolicited ack circumvention */
187 int unsol_ack_circ;
188 int mtu_shift;
189 u32 message_count;
190 u32 packet_count;
171}; 191};
172 192
173#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) 193#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
@@ -323,6 +343,7 @@ extern int ehca_port_act_time;
323extern int ehca_use_hp_mr; 343extern int ehca_use_hp_mr;
324extern int ehca_scaling_code; 344extern int ehca_scaling_code;
325extern int ehca_lock_hcalls; 345extern int ehca_lock_hcalls;
346extern int ehca_nr_ports;
326 347
327struct ipzu_queue_resp { 348struct ipzu_queue_resp {
328 u32 qe_size; /* queue entry size */ 349 u32 qe_size; /* queue entry size */
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index 79c25f51c21e..0467c158d4a9 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -246,7 +246,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
246 } else { 246 } else {
247 if (h_ret != H_PAGE_REGISTERED) { 247 if (h_ret != H_PAGE_REGISTERED) {
248 ehca_err(device, "Registration of page failed " 248 ehca_err(device, "Registration of page failed "
249 "ehca_cq=%p cq_num=%x h_ret=%li" 249 "ehca_cq=%p cq_num=%x h_ret=%li "
250 "counter=%i act_pages=%i", 250 "counter=%i act_pages=%i",
251 my_cq, my_cq->cq_number, 251 my_cq, my_cq->cq_number,
252 h_ret, counter, param.act_pages); 252 h_ret, counter, param.act_pages);
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index 3f617b27b954..863b34fa9ff9 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -62,6 +62,7 @@
62#define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15) 62#define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15)
63#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16) 63#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16)
64#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16) 64#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16)
65#define NEQE_SPECIFIC_EVENT EHCA_BMASK_IBM(16, 23)
65 66
66#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63) 67#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63)
67#define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7) 68#define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7)
@@ -354,17 +355,34 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
354{ 355{
355 u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe); 356 u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
356 u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe); 357 u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
358 u8 spec_event;
359 struct ehca_sport *sport = &shca->sport[port - 1];
360 unsigned long flags;
357 361
358 switch (ec) { 362 switch (ec) {
359 case 0x30: /* port availability change */ 363 case 0x30: /* port availability change */
360 if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) { 364 if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
361 shca->sport[port - 1].port_state = IB_PORT_ACTIVE; 365 int suppress_event;
366 /* replay modify_qp for sqps */
367 spin_lock_irqsave(&sport->mod_sqp_lock, flags);
368 suppress_event = !sport->ibqp_sqp[IB_QPT_GSI];
369 if (sport->ibqp_sqp[IB_QPT_SMI])
370 ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]);
371 if (!suppress_event)
372 ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]);
373 spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
374
375 /* AQP1 was destroyed, ignore this event */
376 if (suppress_event)
377 break;
378
379 sport->port_state = IB_PORT_ACTIVE;
362 dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, 380 dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
363 "is active"); 381 "is active");
364 ehca_query_sma_attr(shca, port, 382 ehca_query_sma_attr(shca, port,
365 &shca->sport[port - 1].saved_attr); 383 &sport->saved_attr);
366 } else { 384 } else {
367 shca->sport[port - 1].port_state = IB_PORT_DOWN; 385 sport->port_state = IB_PORT_DOWN;
368 dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, 386 dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
369 "is inactive"); 387 "is inactive");
370 } 388 }
@@ -378,11 +396,11 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
378 ehca_warn(&shca->ib_device, "disruptive port " 396 ehca_warn(&shca->ib_device, "disruptive port "
379 "%d configuration change", port); 397 "%d configuration change", port);
380 398
381 shca->sport[port - 1].port_state = IB_PORT_DOWN; 399 sport->port_state = IB_PORT_DOWN;
382 dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, 400 dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
383 "is inactive"); 401 "is inactive");
384 402
385 shca->sport[port - 1].port_state = IB_PORT_ACTIVE; 403 sport->port_state = IB_PORT_ACTIVE;
386 dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, 404 dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
387 "is active"); 405 "is active");
388 } else 406 } else
@@ -394,6 +412,16 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
394 case 0x33: /* trace stopped */ 412 case 0x33: /* trace stopped */
395 ehca_err(&shca->ib_device, "Traced stopped."); 413 ehca_err(&shca->ib_device, "Traced stopped.");
396 break; 414 break;
415 case 0x34: /* util async event */
416 spec_event = EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT, eqe);
417 if (spec_event == 0x80) /* client reregister required */
418 dispatch_port_event(shca, port,
419 IB_EVENT_CLIENT_REREGISTER,
420 "client reregister req.");
421 else
422 ehca_warn(&shca->ib_device, "Unknown util async "
423 "event %x on port %x", spec_event, port);
424 break;
397 default: 425 default:
398 ehca_err(&shca->ib_device, "Unknown event code: %x on %s.", 426 ehca_err(&shca->ib_device, "Unknown event code: %x on %s.",
399 ec, shca->ib_device.name); 427 ec, shca->ib_device.name);
diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h
index 5485799cdc8d..c469bfde2708 100644
--- a/drivers/infiniband/hw/ehca/ehca_iverbs.h
+++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h
@@ -200,4 +200,6 @@ void ehca_free_fw_ctrlblock(void *ptr);
200#define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr)) 200#define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr))
201#endif 201#endif
202 202
203void ehca_recover_sqp(struct ib_qp *sqp);
204
203#endif 205#endif
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index c9e32b46387f..84c9b7b8669b 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -90,7 +90,8 @@ MODULE_PARM_DESC(hw_level,
90 "hardware level" 90 "hardware level"
91 " (0: autosensing (default), 1: v. 0.20, 2: v. 0.21)"); 91 " (0: autosensing (default), 1: v. 0.20, 2: v. 0.21)");
92MODULE_PARM_DESC(nr_ports, 92MODULE_PARM_DESC(nr_ports,
93 "number of connected ports (default: 2)"); 93 "number of connected ports (-1: autodetect, 1: port one only, "
94 "2: two ports (default)");
94MODULE_PARM_DESC(use_hp_mr, 95MODULE_PARM_DESC(use_hp_mr,
95 "high performance MRs (0: no (default), 1: yes)"); 96 "high performance MRs (0: no (default), 1: yes)");
96MODULE_PARM_DESC(port_act_time, 97MODULE_PARM_DESC(port_act_time,
@@ -511,7 +512,7 @@ static int ehca_create_aqp1(struct ehca_shca *shca, u32 port)
511 } 512 }
512 sport->ibcq_aqp1 = ibcq; 513 sport->ibcq_aqp1 = ibcq;
513 514
514 if (sport->ibqp_aqp1) { 515 if (sport->ibqp_sqp[IB_QPT_GSI]) {
515 ehca_err(&shca->ib_device, "AQP1 QP is already created."); 516 ehca_err(&shca->ib_device, "AQP1 QP is already created.");
516 ret = -EPERM; 517 ret = -EPERM;
517 goto create_aqp1; 518 goto create_aqp1;
@@ -537,7 +538,7 @@ static int ehca_create_aqp1(struct ehca_shca *shca, u32 port)
537 ret = PTR_ERR(ibqp); 538 ret = PTR_ERR(ibqp);
538 goto create_aqp1; 539 goto create_aqp1;
539 } 540 }
540 sport->ibqp_aqp1 = ibqp; 541 sport->ibqp_sqp[IB_QPT_GSI] = ibqp;
541 542
542 return 0; 543 return 0;
543 544
@@ -550,7 +551,7 @@ static int ehca_destroy_aqp1(struct ehca_sport *sport)
550{ 551{
551 int ret; 552 int ret;
552 553
553 ret = ib_destroy_qp(sport->ibqp_aqp1); 554 ret = ib_destroy_qp(sport->ibqp_sqp[IB_QPT_GSI]);
554 if (ret) { 555 if (ret) {
555 ehca_gen_err("Cannot destroy AQP1 QP. ret=%i", ret); 556 ehca_gen_err("Cannot destroy AQP1 QP. ret=%i", ret);
556 return ret; 557 return ret;
@@ -693,7 +694,7 @@ static int __devinit ehca_probe(struct of_device *dev,
693 struct ehca_shca *shca; 694 struct ehca_shca *shca;
694 const u64 *handle; 695 const u64 *handle;
695 struct ib_pd *ibpd; 696 struct ib_pd *ibpd;
696 int ret; 697 int ret, i;
697 698
698 handle = of_get_property(dev->node, "ibm,hca-handle", NULL); 699 handle = of_get_property(dev->node, "ibm,hca-handle", NULL);
699 if (!handle) { 700 if (!handle) {
@@ -714,6 +715,8 @@ static int __devinit ehca_probe(struct of_device *dev,
714 return -ENOMEM; 715 return -ENOMEM;
715 } 716 }
716 mutex_init(&shca->modify_mutex); 717 mutex_init(&shca->modify_mutex);
718 for (i = 0; i < ARRAY_SIZE(shca->sport); i++)
719 spin_lock_init(&shca->sport[i].mod_sqp_lock);
717 720
718 shca->ofdev = dev; 721 shca->ofdev = dev;
719 shca->ipz_hca_handle.handle = *handle; 722 shca->ipz_hca_handle.handle = *handle;
@@ -934,7 +937,7 @@ void ehca_poll_eqs(unsigned long data)
934 ehca_process_eq(shca, 0); 937 ehca_process_eq(shca, 0);
935 } 938 }
936 } 939 }
937 mod_timer(&poll_eqs_timer, jiffies + HZ); 940 mod_timer(&poll_eqs_timer, round_jiffies(jiffies + HZ));
938 spin_unlock(&shca_list_lock); 941 spin_unlock(&shca_list_lock);
939} 942}
940 943
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index eff5fb55604b..1012f15a7140 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -592,10 +592,8 @@ static struct ehca_qp *internal_create_qp(
592 goto create_qp_exit1; 592 goto create_qp_exit1;
593 } 593 }
594 594
595 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) 595 /* Always signal by WQE so we can hide circ. WQEs */
596 parms.sigtype = HCALL_SIGT_EVERY; 596 parms.sigtype = HCALL_SIGT_BY_WQE;
597 else
598 parms.sigtype = HCALL_SIGT_BY_WQE;
599 597
600 /* UD_AV CIRCUMVENTION */ 598 /* UD_AV CIRCUMVENTION */
601 max_send_sge = init_attr->cap.max_send_sge; 599 max_send_sge = init_attr->cap.max_send_sge;
@@ -618,6 +616,10 @@ static struct ehca_qp *internal_create_qp(
618 parms.squeue.max_sge = max_send_sge; 616 parms.squeue.max_sge = max_send_sge;
619 parms.rqueue.max_sge = max_recv_sge; 617 parms.rqueue.max_sge = max_recv_sge;
620 618
619 /* RC QPs need one more SWQE for unsolicited ack circumvention */
620 if (qp_type == IB_QPT_RC)
621 parms.squeue.max_wr++;
622
621 if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)) { 623 if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)) {
622 if (HAS_SQ(my_qp)) 624 if (HAS_SQ(my_qp))
623 ehca_determine_small_queue( 625 ehca_determine_small_queue(
@@ -650,6 +652,8 @@ static struct ehca_qp *internal_create_qp(
650 parms.squeue.act_nr_sges = 1; 652 parms.squeue.act_nr_sges = 1;
651 parms.rqueue.act_nr_sges = 1; 653 parms.rqueue.act_nr_sges = 1;
652 } 654 }
655 /* hide the extra WQE */
656 parms.squeue.act_nr_wqes--;
653 break; 657 break;
654 case IB_QPT_UD: 658 case IB_QPT_UD:
655 case IB_QPT_GSI: 659 case IB_QPT_GSI:
@@ -729,12 +733,31 @@ static struct ehca_qp *internal_create_qp(
729 init_attr->cap.max_send_wr = parms.squeue.act_nr_wqes; 733 init_attr->cap.max_send_wr = parms.squeue.act_nr_wqes;
730 my_qp->init_attr = *init_attr; 734 my_qp->init_attr = *init_attr;
731 735
736 if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) {
737 shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] =
738 &my_qp->ib_qp;
739 if (ehca_nr_ports < 0) {
740 /* alloc array to cache subsequent modify qp parms
741 * for autodetect mode
742 */
743 my_qp->mod_qp_parm =
744 kzalloc(EHCA_MOD_QP_PARM_MAX *
745 sizeof(*my_qp->mod_qp_parm),
746 GFP_KERNEL);
747 if (!my_qp->mod_qp_parm) {
748 ehca_err(pd->device,
749 "Could not alloc mod_qp_parm");
750 goto create_qp_exit4;
751 }
752 }
753 }
754
732 /* NOTE: define_apq0() not supported yet */ 755 /* NOTE: define_apq0() not supported yet */
733 if (qp_type == IB_QPT_GSI) { 756 if (qp_type == IB_QPT_GSI) {
734 h_ret = ehca_define_sqp(shca, my_qp, init_attr); 757 h_ret = ehca_define_sqp(shca, my_qp, init_attr);
735 if (h_ret != H_SUCCESS) { 758 if (h_ret != H_SUCCESS) {
736 ret = ehca2ib_return_code(h_ret); 759 ret = ehca2ib_return_code(h_ret);
737 goto create_qp_exit4; 760 goto create_qp_exit5;
738 } 761 }
739 } 762 }
740 763
@@ -743,7 +766,7 @@ static struct ehca_qp *internal_create_qp(
743 if (ret) { 766 if (ret) {
744 ehca_err(pd->device, 767 ehca_err(pd->device,
745 "Couldn't assign qp to send_cq ret=%i", ret); 768 "Couldn't assign qp to send_cq ret=%i", ret);
746 goto create_qp_exit4; 769 goto create_qp_exit5;
747 } 770 }
748 } 771 }
749 772
@@ -769,12 +792,18 @@ static struct ehca_qp *internal_create_qp(
769 if (ib_copy_to_udata(udata, &resp, sizeof resp)) { 792 if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
770 ehca_err(pd->device, "Copy to udata failed"); 793 ehca_err(pd->device, "Copy to udata failed");
771 ret = -EINVAL; 794 ret = -EINVAL;
772 goto create_qp_exit4; 795 goto create_qp_exit6;
773 } 796 }
774 } 797 }
775 798
776 return my_qp; 799 return my_qp;
777 800
801create_qp_exit6:
802 ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num);
803
804create_qp_exit5:
805 kfree(my_qp->mod_qp_parm);
806
778create_qp_exit4: 807create_qp_exit4:
779 if (HAS_RQ(my_qp)) 808 if (HAS_RQ(my_qp))
780 ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); 809 ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
@@ -858,7 +887,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd,
858 update_mask, 887 update_mask,
859 mqpcb, my_qp->galpas.kernel); 888 mqpcb, my_qp->galpas.kernel);
860 if (hret != H_SUCCESS) { 889 if (hret != H_SUCCESS) {
861 ehca_err(pd->device, "Could not modify SRQ to INIT" 890 ehca_err(pd->device, "Could not modify SRQ to INIT "
862 "ehca_qp=%p qp_num=%x h_ret=%li", 891 "ehca_qp=%p qp_num=%x h_ret=%li",
863 my_qp, my_qp->real_qp_num, hret); 892 my_qp, my_qp->real_qp_num, hret);
864 goto create_srq2; 893 goto create_srq2;
@@ -872,7 +901,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd,
872 update_mask, 901 update_mask,
873 mqpcb, my_qp->galpas.kernel); 902 mqpcb, my_qp->galpas.kernel);
874 if (hret != H_SUCCESS) { 903 if (hret != H_SUCCESS) {
875 ehca_err(pd->device, "Could not enable SRQ" 904 ehca_err(pd->device, "Could not enable SRQ "
876 "ehca_qp=%p qp_num=%x h_ret=%li", 905 "ehca_qp=%p qp_num=%x h_ret=%li",
877 my_qp, my_qp->real_qp_num, hret); 906 my_qp, my_qp->real_qp_num, hret);
878 goto create_srq2; 907 goto create_srq2;
@@ -886,7 +915,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd,
886 update_mask, 915 update_mask,
887 mqpcb, my_qp->galpas.kernel); 916 mqpcb, my_qp->galpas.kernel);
888 if (hret != H_SUCCESS) { 917 if (hret != H_SUCCESS) {
889 ehca_err(pd->device, "Could not modify SRQ to RTR" 918 ehca_err(pd->device, "Could not modify SRQ to RTR "
890 "ehca_qp=%p qp_num=%x h_ret=%li", 919 "ehca_qp=%p qp_num=%x h_ret=%li",
891 my_qp, my_qp->real_qp_num, hret); 920 my_qp, my_qp->real_qp_num, hret);
892 goto create_srq2; 921 goto create_srq2;
@@ -992,7 +1021,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
992 unsigned long flags = 0; 1021 unsigned long flags = 0;
993 1022
994 /* do query_qp to obtain current attr values */ 1023 /* do query_qp to obtain current attr values */
995 mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); 1024 mqpcb = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
996 if (!mqpcb) { 1025 if (!mqpcb) {
997 ehca_err(ibqp->device, "Could not get zeroed page for mqpcb " 1026 ehca_err(ibqp->device, "Could not get zeroed page for mqpcb "
998 "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num); 1027 "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num);
@@ -1180,6 +1209,8 @@ static int internal_modify_qp(struct ib_qp *ibqp,
1180 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1); 1209 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1);
1181 } 1210 }
1182 if (attr_mask & IB_QP_PORT) { 1211 if (attr_mask & IB_QP_PORT) {
1212 struct ehca_sport *sport;
1213 struct ehca_qp *aqp1;
1183 if (attr->port_num < 1 || attr->port_num > shca->num_ports) { 1214 if (attr->port_num < 1 || attr->port_num > shca->num_ports) {
1184 ret = -EINVAL; 1215 ret = -EINVAL;
1185 ehca_err(ibqp->device, "Invalid port=%x. " 1216 ehca_err(ibqp->device, "Invalid port=%x. "
@@ -1188,6 +1219,29 @@ static int internal_modify_qp(struct ib_qp *ibqp,
1188 shca->num_ports); 1219 shca->num_ports);
1189 goto modify_qp_exit2; 1220 goto modify_qp_exit2;
1190 } 1221 }
1222 sport = &shca->sport[attr->port_num - 1];
1223 if (!sport->ibqp_sqp[IB_QPT_GSI]) {
1224 /* should not occur */
1225 ret = -EFAULT;
1226 ehca_err(ibqp->device, "AQP1 was not created for "
1227 "port=%x", attr->port_num);
1228 goto modify_qp_exit2;
1229 }
1230 aqp1 = container_of(sport->ibqp_sqp[IB_QPT_GSI],
1231 struct ehca_qp, ib_qp);
1232 if (ibqp->qp_type != IB_QPT_GSI &&
1233 ibqp->qp_type != IB_QPT_SMI &&
1234 aqp1->mod_qp_parm) {
1235 /*
1236 * firmware will reject this modify_qp() because
1237 * port is not activated/initialized fully
1238 */
1239 ret = -EFAULT;
1240 ehca_warn(ibqp->device, "Couldn't modify qp port=%x: "
1241 "either port is being activated (try again) "
1242 "or cabling issue", attr->port_num);
1243 goto modify_qp_exit2;
1244 }
1191 mqpcb->prim_phys_port = attr->port_num; 1245 mqpcb->prim_phys_port = attr->port_num;
1192 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1); 1246 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1);
1193 } 1247 }
@@ -1244,6 +1298,8 @@ static int internal_modify_qp(struct ib_qp *ibqp,
1244 } 1298 }
1245 1299
1246 if (attr_mask & IB_QP_PATH_MTU) { 1300 if (attr_mask & IB_QP_PATH_MTU) {
1301 /* store ld(MTU) */
1302 my_qp->mtu_shift = attr->path_mtu + 7;
1247 mqpcb->path_mtu = attr->path_mtu; 1303 mqpcb->path_mtu = attr->path_mtu;
1248 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1); 1304 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1);
1249 } 1305 }
@@ -1467,6 +1523,8 @@ modify_qp_exit1:
1467int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, 1523int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
1468 struct ib_udata *udata) 1524 struct ib_udata *udata)
1469{ 1525{
1526 struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
1527 ib_device);
1470 struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); 1528 struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
1471 struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, 1529 struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
1472 ib_pd); 1530 ib_pd);
@@ -1479,9 +1537,100 @@ int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
1479 return -EINVAL; 1537 return -EINVAL;
1480 } 1538 }
1481 1539
1540 /* The if-block below caches qp_attr to be modified for GSI and SMI
1541 * qps during the initialization by ib_mad. When the respective port
1542 * is activated, ie we got an event PORT_ACTIVE, we'll replay the
1543 * cached modify calls sequence, see ehca_recover_sqs() below.
1544 * Why that is required:
1545 * 1) If one port is connected, older code requires that port one
1546 * to be connected and module option nr_ports=1 to be given by
1547 * user, which is very inconvenient for end user.
1548 * 2) Firmware accepts modify_qp() only if respective port has become
1549 * active. Older code had a wait loop of 30sec create_qp()/
1550 * define_aqp1(), which is not appropriate in practice. This
1551 * code now removes that wait loop, see define_aqp1(), and always
1552 * reports all ports to ib_mad resp. users. Only activated ports
1553 * will then usable for the users.
1554 */
1555 if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) {
1556 int port = my_qp->init_attr.port_num;
1557 struct ehca_sport *sport = &shca->sport[port - 1];
1558 unsigned long flags;
1559 spin_lock_irqsave(&sport->mod_sqp_lock, flags);
1560 /* cache qp_attr only during init */
1561 if (my_qp->mod_qp_parm) {
1562 struct ehca_mod_qp_parm *p;
1563 if (my_qp->mod_qp_parm_idx >= EHCA_MOD_QP_PARM_MAX) {
1564 ehca_err(&shca->ib_device,
1565 "mod_qp_parm overflow state=%x port=%x"
1566 " type=%x", attr->qp_state,
1567 my_qp->init_attr.port_num,
1568 ibqp->qp_type);
1569 spin_unlock_irqrestore(&sport->mod_sqp_lock,
1570 flags);
1571 return -EINVAL;
1572 }
1573 p = &my_qp->mod_qp_parm[my_qp->mod_qp_parm_idx];
1574 p->mask = attr_mask;
1575 p->attr = *attr;
1576 my_qp->mod_qp_parm_idx++;
1577 ehca_dbg(&shca->ib_device,
1578 "Saved qp_attr for state=%x port=%x type=%x",
1579 attr->qp_state, my_qp->init_attr.port_num,
1580 ibqp->qp_type);
1581 spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
1582 return 0;
1583 }
1584 spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
1585 }
1586
1482 return internal_modify_qp(ibqp, attr, attr_mask, 0); 1587 return internal_modify_qp(ibqp, attr, attr_mask, 0);
1483} 1588}
1484 1589
1590void ehca_recover_sqp(struct ib_qp *sqp)
1591{
1592 struct ehca_qp *my_sqp = container_of(sqp, struct ehca_qp, ib_qp);
1593 int port = my_sqp->init_attr.port_num;
1594 struct ib_qp_attr attr;
1595 struct ehca_mod_qp_parm *qp_parm;
1596 int i, qp_parm_idx, ret;
1597 unsigned long flags, wr_cnt;
1598
1599 if (!my_sqp->mod_qp_parm)
1600 return;
1601 ehca_dbg(sqp->device, "SQP port=%x qp_num=%x", port, sqp->qp_num);
1602
1603 qp_parm = my_sqp->mod_qp_parm;
1604 qp_parm_idx = my_sqp->mod_qp_parm_idx;
1605 for (i = 0; i < qp_parm_idx; i++) {
1606 attr = qp_parm[i].attr;
1607 ret = internal_modify_qp(sqp, &attr, qp_parm[i].mask, 0);
1608 if (ret) {
1609 ehca_err(sqp->device, "Could not modify SQP port=%x "
1610 "qp_num=%x ret=%x", port, sqp->qp_num, ret);
1611 goto free_qp_parm;
1612 }
1613 ehca_dbg(sqp->device, "SQP port=%x qp_num=%x in state=%x",
1614 port, sqp->qp_num, attr.qp_state);
1615 }
1616
1617 /* re-trigger posted recv wrs */
1618 wr_cnt = my_sqp->ipz_rqueue.current_q_offset /
1619 my_sqp->ipz_rqueue.qe_size;
1620 if (wr_cnt) {
1621 spin_lock_irqsave(&my_sqp->spinlock_r, flags);
1622 hipz_update_rqa(my_sqp, wr_cnt);
1623 spin_unlock_irqrestore(&my_sqp->spinlock_r, flags);
1624 ehca_dbg(sqp->device, "doorbell port=%x qp_num=%x wr_cnt=%lx",
1625 port, sqp->qp_num, wr_cnt);
1626 }
1627
1628free_qp_parm:
1629 kfree(qp_parm);
1630 /* this prevents subsequent calls to modify_qp() to cache qp_attr */
1631 my_sqp->mod_qp_parm = NULL;
1632}
1633
1485int ehca_query_qp(struct ib_qp *qp, 1634int ehca_query_qp(struct ib_qp *qp,
1486 struct ib_qp_attr *qp_attr, 1635 struct ib_qp_attr *qp_attr,
1487 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) 1636 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
@@ -1769,6 +1918,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
1769 struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device); 1918 struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device);
1770 struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, 1919 struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
1771 ib_pd); 1920 ib_pd);
1921 struct ehca_sport *sport = &shca->sport[my_qp->init_attr.port_num - 1];
1772 u32 cur_pid = current->tgid; 1922 u32 cur_pid = current->tgid;
1773 u32 qp_num = my_qp->real_qp_num; 1923 u32 qp_num = my_qp->real_qp_num;
1774 int ret; 1924 int ret;
@@ -1815,6 +1965,14 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
1815 port_num = my_qp->init_attr.port_num; 1965 port_num = my_qp->init_attr.port_num;
1816 qp_type = my_qp->init_attr.qp_type; 1966 qp_type = my_qp->init_attr.qp_type;
1817 1967
1968 if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) {
1969 spin_lock_irqsave(&sport->mod_sqp_lock, flags);
1970 kfree(my_qp->mod_qp_parm);
1971 my_qp->mod_qp_parm = NULL;
1972 shca->sport[port_num - 1].ibqp_sqp[qp_type] = NULL;
1973 spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
1974 }
1975
1818 /* no support for IB_QPT_SMI yet */ 1976 /* no support for IB_QPT_SMI yet */
1819 if (qp_type == IB_QPT_GSI) { 1977 if (qp_type == IB_QPT_GSI) {
1820 struct ib_event event; 1978 struct ib_event event;
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index ea91360835d3..3aacc8cf1e44 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -50,6 +50,9 @@
50#include "hcp_if.h" 50#include "hcp_if.h"
51#include "hipz_fns.h" 51#include "hipz_fns.h"
52 52
53/* in RC traffic, insert an empty RDMA READ every this many packets */
54#define ACK_CIRC_THRESHOLD 2000000
55
53static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, 56static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
54 struct ehca_wqe *wqe_p, 57 struct ehca_wqe *wqe_p,
55 struct ib_recv_wr *recv_wr) 58 struct ib_recv_wr *recv_wr)
@@ -81,7 +84,7 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
81 if (ehca_debug_level) { 84 if (ehca_debug_level) {
82 ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", 85 ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p",
83 ipz_rqueue); 86 ipz_rqueue);
84 ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); 87 ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe");
85 } 88 }
86 89
87 return 0; 90 return 0;
@@ -135,7 +138,8 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
135 138
136static inline int ehca_write_swqe(struct ehca_qp *qp, 139static inline int ehca_write_swqe(struct ehca_qp *qp,
137 struct ehca_wqe *wqe_p, 140 struct ehca_wqe *wqe_p,
138 const struct ib_send_wr *send_wr) 141 const struct ib_send_wr *send_wr,
142 int hidden)
139{ 143{
140 u32 idx; 144 u32 idx;
141 u64 dma_length; 145 u64 dma_length;
@@ -176,7 +180,9 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
176 180
177 wqe_p->wr_flag = 0; 181 wqe_p->wr_flag = 0;
178 182
179 if (send_wr->send_flags & IB_SEND_SIGNALED) 183 if ((send_wr->send_flags & IB_SEND_SIGNALED ||
184 qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR)
185 && !hidden)
180 wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM; 186 wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM;
181 187
182 if (send_wr->opcode == IB_WR_SEND_WITH_IMM || 188 if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
@@ -199,7 +205,7 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
199 205
200 wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8; 206 wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8;
201 wqe_p->local_ee_context_qkey = remote_qkey; 207 wqe_p->local_ee_context_qkey = remote_qkey;
202 if (!send_wr->wr.ud.ah) { 208 if (unlikely(!send_wr->wr.ud.ah)) {
203 ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp); 209 ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp);
204 return -EINVAL; 210 return -EINVAL;
205 } 211 }
@@ -255,6 +261,15 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
255 } /* eof idx */ 261 } /* eof idx */
256 wqe_p->u.nud.atomic_1st_op_dma_len = dma_length; 262 wqe_p->u.nud.atomic_1st_op_dma_len = dma_length;
257 263
264 /* unsolicited ack circumvention */
265 if (send_wr->opcode == IB_WR_RDMA_READ) {
266 /* on RDMA read, switch on and reset counters */
267 qp->message_count = qp->packet_count = 0;
268 qp->unsol_ack_circ = 1;
269 } else
270 /* else estimate #packets */
271 qp->packet_count += (dma_length >> qp->mtu_shift) + 1;
272
258 break; 273 break;
259 274
260 default: 275 default:
@@ -355,13 +370,49 @@ static inline void map_ib_wc_status(u32 cqe_status,
355 *wc_status = IB_WC_SUCCESS; 370 *wc_status = IB_WC_SUCCESS;
356} 371}
357 372
373static inline int post_one_send(struct ehca_qp *my_qp,
374 struct ib_send_wr *cur_send_wr,
375 struct ib_send_wr **bad_send_wr,
376 int hidden)
377{
378 struct ehca_wqe *wqe_p;
379 int ret;
380 u64 start_offset = my_qp->ipz_squeue.current_q_offset;
381
382 /* get pointer next to free WQE */
383 wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
384 if (unlikely(!wqe_p)) {
385 /* too many posted work requests: queue overflow */
386 if (bad_send_wr)
387 *bad_send_wr = cur_send_wr;
388 ehca_err(my_qp->ib_qp.device, "Too many posted WQEs "
389 "qp_num=%x", my_qp->ib_qp.qp_num);
390 return -ENOMEM;
391 }
392 /* write a SEND WQE into the QUEUE */
393 ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, hidden);
394 /*
395 * if something failed,
396 * reset the free entry pointer to the start value
397 */
398 if (unlikely(ret)) {
399 my_qp->ipz_squeue.current_q_offset = start_offset;
400 if (bad_send_wr)
401 *bad_send_wr = cur_send_wr;
402 ehca_err(my_qp->ib_qp.device, "Could not write WQE "
403 "qp_num=%x", my_qp->ib_qp.qp_num);
404 return -EINVAL;
405 }
406
407 return 0;
408}
409
358int ehca_post_send(struct ib_qp *qp, 410int ehca_post_send(struct ib_qp *qp,
359 struct ib_send_wr *send_wr, 411 struct ib_send_wr *send_wr,
360 struct ib_send_wr **bad_send_wr) 412 struct ib_send_wr **bad_send_wr)
361{ 413{
362 struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); 414 struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
363 struct ib_send_wr *cur_send_wr; 415 struct ib_send_wr *cur_send_wr;
364 struct ehca_wqe *wqe_p;
365 int wqe_cnt = 0; 416 int wqe_cnt = 0;
366 int ret = 0; 417 int ret = 0;
367 unsigned long flags; 418 unsigned long flags;
@@ -369,37 +420,33 @@ int ehca_post_send(struct ib_qp *qp,
369 /* LOCK the QUEUE */ 420 /* LOCK the QUEUE */
370 spin_lock_irqsave(&my_qp->spinlock_s, flags); 421 spin_lock_irqsave(&my_qp->spinlock_s, flags);
371 422
423 /* Send an empty extra RDMA read if:
424 * 1) there has been an RDMA read on this connection before
425 * 2) no RDMA read occurred for ACK_CIRC_THRESHOLD link packets
426 * 3) we can be sure that any previous extra RDMA read has been
427 * processed so we don't overflow the SQ
428 */
429 if (unlikely(my_qp->unsol_ack_circ &&
430 my_qp->packet_count > ACK_CIRC_THRESHOLD &&
431 my_qp->message_count > my_qp->init_attr.cap.max_send_wr)) {
432 /* insert an empty RDMA READ to fix up the remote QP state */
433 struct ib_send_wr circ_wr;
434 memset(&circ_wr, 0, sizeof(circ_wr));
435 circ_wr.opcode = IB_WR_RDMA_READ;
436 post_one_send(my_qp, &circ_wr, NULL, 1); /* ignore retcode */
437 wqe_cnt++;
438 ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num);
439 my_qp->message_count = my_qp->packet_count = 0;
440 }
441
372 /* loop processes list of send reqs */ 442 /* loop processes list of send reqs */
373 for (cur_send_wr = send_wr; cur_send_wr != NULL; 443 for (cur_send_wr = send_wr; cur_send_wr != NULL;
374 cur_send_wr = cur_send_wr->next) { 444 cur_send_wr = cur_send_wr->next) {
375 u64 start_offset = my_qp->ipz_squeue.current_q_offset; 445 ret = post_one_send(my_qp, cur_send_wr, bad_send_wr, 0);
376 /* get pointer next to free WQE */
377 wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
378 if (unlikely(!wqe_p)) {
379 /* too many posted work requests: queue overflow */
380 if (bad_send_wr)
381 *bad_send_wr = cur_send_wr;
382 if (wqe_cnt == 0) {
383 ret = -ENOMEM;
384 ehca_err(qp->device, "Too many posted WQEs "
385 "qp_num=%x", qp->qp_num);
386 }
387 goto post_send_exit0;
388 }
389 /* write a SEND WQE into the QUEUE */
390 ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr);
391 /*
392 * if something failed,
393 * reset the free entry pointer to the start value
394 */
395 if (unlikely(ret)) { 446 if (unlikely(ret)) {
396 my_qp->ipz_squeue.current_q_offset = start_offset; 447 /* if one or more WQEs were successful, don't fail */
397 *bad_send_wr = cur_send_wr; 448 if (wqe_cnt)
398 if (wqe_cnt == 0) { 449 ret = 0;
399 ret = -EINVAL;
400 ehca_err(qp->device, "Could not write WQE "
401 "qp_num=%x", qp->qp_num);
402 }
403 goto post_send_exit0; 450 goto post_send_exit0;
404 } 451 }
405 wqe_cnt++; 452 wqe_cnt++;
@@ -410,6 +457,7 @@ int ehca_post_send(struct ib_qp *qp,
410post_send_exit0: 457post_send_exit0:
411 iosync(); /* serialize GAL register access */ 458 iosync(); /* serialize GAL register access */
412 hipz_update_sqa(my_qp, wqe_cnt); 459 hipz_update_sqa(my_qp, wqe_cnt);
460 my_qp->message_count += wqe_cnt;
413 spin_unlock_irqrestore(&my_qp->spinlock_s, flags); 461 spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
414 return ret; 462 return ret;
415} 463}
diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c
index f0792e5fbd02..79e72b25b252 100644
--- a/drivers/infiniband/hw/ehca/ehca_sqp.c
+++ b/drivers/infiniband/hw/ehca/ehca_sqp.c
@@ -40,11 +40,8 @@
40 */ 40 */
41 41
42 42
43#include <linux/module.h>
44#include <linux/err.h>
45#include "ehca_classes.h" 43#include "ehca_classes.h"
46#include "ehca_tools.h" 44#include "ehca_tools.h"
47#include "ehca_qes.h"
48#include "ehca_iverbs.h" 45#include "ehca_iverbs.h"
49#include "hcp_if.h" 46#include "hcp_if.h"
50 47
@@ -93,6 +90,9 @@ u64 ehca_define_sqp(struct ehca_shca *shca,
93 return H_PARAMETER; 90 return H_PARAMETER;
94 } 91 }
95 92
93 if (ehca_nr_ports < 0) /* autodetect mode */
94 return H_SUCCESS;
95
96 for (counter = 0; 96 for (counter = 0;
97 shca->sport[port - 1].port_state != IB_PORT_ACTIVE && 97 shca->sport[port - 1].port_state != IB_PORT_ACTIVE &&
98 counter < ehca_port_act_time; 98 counter < ehca_port_act_time;
diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
index 851df8a75e79..414621095540 100644
--- a/drivers/infiniband/hw/ipath/ipath_common.h
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -82,6 +82,16 @@
82#define IPATH_IB_LINK_EXTERNAL 7 /* normal, disable local loopback */ 82#define IPATH_IB_LINK_EXTERNAL 7 /* normal, disable local loopback */
83 83
84/* 84/*
85 * These 3 values (SDR and DDR may be ORed for auto-speed
86 * negotiation) are used for the 3rd argument to path_f_set_ib_cfg
87 * with cmd IPATH_IB_CFG_SPD_ENB, by direct calls or via sysfs. They
88 * are also the the possible values for ipath_link_speed_enabled and active
89 * The values were chosen to match values used within the IB spec.
90 */
91#define IPATH_IB_SDR 1
92#define IPATH_IB_DDR 2
93
94/*
85 * stats maintained by the driver. For now, at least, this is global 95 * stats maintained by the driver. For now, at least, this is global
86 * to all minor devices. 96 * to all minor devices.
87 */ 97 */
@@ -433,8 +443,9 @@ struct ipath_user_info {
433#define IPATH_CMD_UNUSED_2 26 443#define IPATH_CMD_UNUSED_2 26
434#define IPATH_CMD_PIOAVAILUPD 27 /* force an update of PIOAvail reg */ 444#define IPATH_CMD_PIOAVAILUPD 27 /* force an update of PIOAvail reg */
435#define IPATH_CMD_POLL_TYPE 28 /* set the kind of polling we want */ 445#define IPATH_CMD_POLL_TYPE 28 /* set the kind of polling we want */
446#define IPATH_CMD_ARMLAUNCH_CTRL 29 /* armlaunch detection control */
436 447
437#define IPATH_CMD_MAX 28 448#define IPATH_CMD_MAX 29
438 449
439/* 450/*
440 * Poll types 451 * Poll types
@@ -477,6 +488,8 @@ struct ipath_cmd {
477 __u64 port_info; 488 __u64 port_info;
478 /* enable/disable receipt of packets */ 489 /* enable/disable receipt of packets */
479 __u32 recv_ctrl; 490 __u32 recv_ctrl;
491 /* enable/disable armlaunch errors (non-zero to enable) */
492 __u32 armlaunch_ctrl;
480 /* partition key to set */ 493 /* partition key to set */
481 __u16 part_key; 494 __u16 part_key;
482 /* user address of __u32 bitmask of active slaves */ 495 /* user address of __u32 bitmask of active slaves */
@@ -579,7 +592,7 @@ struct ipath_flash {
579struct infinipath_counters { 592struct infinipath_counters {
580 __u64 LBIntCnt; 593 __u64 LBIntCnt;
581 __u64 LBFlowStallCnt; 594 __u64 LBFlowStallCnt;
582 __u64 Reserved1; 595 __u64 TxSDmaDescCnt; /* was Reserved1 */
583 __u64 TxUnsupVLErrCnt; 596 __u64 TxUnsupVLErrCnt;
584 __u64 TxDataPktCnt; 597 __u64 TxDataPktCnt;
585 __u64 TxFlowPktCnt; 598 __u64 TxFlowPktCnt;
@@ -615,12 +628,26 @@ struct infinipath_counters {
615 __u64 RxP6HdrEgrOvflCnt; 628 __u64 RxP6HdrEgrOvflCnt;
616 __u64 RxP7HdrEgrOvflCnt; 629 __u64 RxP7HdrEgrOvflCnt;
617 __u64 RxP8HdrEgrOvflCnt; 630 __u64 RxP8HdrEgrOvflCnt;
618 __u64 Reserved6; 631 __u64 RxP9HdrEgrOvflCnt; /* was Reserved6 */
619 __u64 Reserved7; 632 __u64 RxP10HdrEgrOvflCnt; /* was Reserved7 */
633 __u64 RxP11HdrEgrOvflCnt; /* new for IBA7220 */
634 __u64 RxP12HdrEgrOvflCnt; /* new for IBA7220 */
635 __u64 RxP13HdrEgrOvflCnt; /* new for IBA7220 */
636 __u64 RxP14HdrEgrOvflCnt; /* new for IBA7220 */
637 __u64 RxP15HdrEgrOvflCnt; /* new for IBA7220 */
638 __u64 RxP16HdrEgrOvflCnt; /* new for IBA7220 */
620 __u64 IBStatusChangeCnt; 639 __u64 IBStatusChangeCnt;
621 __u64 IBLinkErrRecoveryCnt; 640 __u64 IBLinkErrRecoveryCnt;
622 __u64 IBLinkDownedCnt; 641 __u64 IBLinkDownedCnt;
623 __u64 IBSymbolErrCnt; 642 __u64 IBSymbolErrCnt;
643 /* The following are new for IBA7220 */
644 __u64 RxVL15DroppedPktCnt;
645 __u64 RxOtherLocalPhyErrCnt;
646 __u64 PcieRetryBufDiagQwordCnt;
647 __u64 ExcessBufferOvflCnt;
648 __u64 LocalLinkIntegrityErrCnt;
649 __u64 RxVlErrCnt;
650 __u64 RxDlidFltrCnt;
624}; 651};
625 652
626/* 653/*
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
index d1380c7a1703..a03bd28d9b48 100644
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -421,7 +421,7 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
421 else 421 else
422 n = head - tail; 422 n = head - tail;
423 if (unlikely((u32)cqe < n)) { 423 if (unlikely((u32)cqe < n)) {
424 ret = -EOVERFLOW; 424 ret = -EINVAL;
425 goto bail_unlock; 425 goto bail_unlock;
426 } 426 }
427 for (n = 0; tail != head; n++) { 427 for (n = 0; tail != head; n++) {
diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h
index 19c56e6491eb..d6f69532d83f 100644
--- a/drivers/infiniband/hw/ipath/ipath_debug.h
+++ b/drivers/infiniband/hw/ipath/ipath_debug.h
@@ -55,7 +55,7 @@
55#define __IPATH_PKTDBG 0x80 /* print packet data */ 55#define __IPATH_PKTDBG 0x80 /* print packet data */
56/* print process startup (init)/exit messages */ 56/* print process startup (init)/exit messages */
57#define __IPATH_PROCDBG 0x100 57#define __IPATH_PROCDBG 0x100
58/* print mmap/nopage stuff, not using VDBG any more */ 58/* print mmap/fault stuff, not using VDBG any more */
59#define __IPATH_MMDBG 0x200 59#define __IPATH_MMDBG 0x200
60#define __IPATH_ERRPKTDBG 0x400 60#define __IPATH_ERRPKTDBG 0x400
61#define __IPATH_USER_SEND 0x1000 /* use user mode send */ 61#define __IPATH_USER_SEND 0x1000 /* use user mode send */
@@ -81,7 +81,7 @@
81#define __IPATH_VERBDBG 0x0 /* very verbose debug */ 81#define __IPATH_VERBDBG 0x0 /* very verbose debug */
82#define __IPATH_PKTDBG 0x0 /* print packet data */ 82#define __IPATH_PKTDBG 0x0 /* print packet data */
83#define __IPATH_PROCDBG 0x0 /* process startup (init)/exit messages */ 83#define __IPATH_PROCDBG 0x0 /* process startup (init)/exit messages */
84/* print mmap/nopage stuff, not using VDBG any more */ 84/* print mmap/fault stuff, not using VDBG any more */
85#define __IPATH_MMDBG 0x0 85#define __IPATH_MMDBG 0x0
86#define __IPATH_EPKTDBG 0x0 /* print ethernet packet data */ 86#define __IPATH_EPKTDBG 0x0 /* print ethernet packet data */
87#define __IPATH_IPATHDBG 0x0 /* Ethernet (IPATH) table dump on */ 87#define __IPATH_IPATHDBG 0x0 /* Ethernet (IPATH) table dump on */
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index fc355981bbab..d5ff6ca2db30 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -334,6 +334,8 @@ static void ipath_verify_pioperf(struct ipath_devdata *dd)
334 udelay(1); 334 udelay(1);
335 } 335 }
336 336
337 ipath_disable_armlaunch(dd);
338
337 writeq(0, piobuf); /* length 0, no dwords actually sent */ 339 writeq(0, piobuf); /* length 0, no dwords actually sent */
338 ipath_flush_wc(); 340 ipath_flush_wc();
339 341
@@ -365,6 +367,7 @@ static void ipath_verify_pioperf(struct ipath_devdata *dd)
365done: 367done:
366 /* disarm piobuf, so it's available again */ 368 /* disarm piobuf, so it's available again */
367 ipath_disarm_piobufs(dd, pbnum, 1); 369 ipath_disarm_piobufs(dd, pbnum, 1);
370 ipath_enable_armlaunch(dd);
368} 371}
369 372
370static int __devinit ipath_init_one(struct pci_dev *pdev, 373static int __devinit ipath_init_one(struct pci_dev *pdev,
@@ -803,31 +806,37 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
803 unsigned cnt) 806 unsigned cnt)
804{ 807{
805 unsigned i, last = first + cnt; 808 unsigned i, last = first + cnt;
806 u64 sendctrl, sendorig; 809 unsigned long flags;
807 810
808 ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first); 811 ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first);
809 sendorig = dd->ipath_sendctrl;
810 for (i = first; i < last; i++) { 812 for (i = first; i < last; i++) {
811 sendctrl = sendorig | INFINIPATH_S_DISARM | 813 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
812 (i << INFINIPATH_S_DISARMPIOBUF_SHIFT); 814 /*
815 * The disarm-related bits are write-only, so it
816 * is ok to OR them in with our copy of sendctrl
817 * while we hold the lock.
818 */
813 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 819 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
814 sendctrl); 820 dd->ipath_sendctrl | INFINIPATH_S_DISARM |
821 (i << INFINIPATH_S_DISARMPIOBUF_SHIFT));
822 /* can't disarm bufs back-to-back per iba7220 spec */
823 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
824 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
815 } 825 }
816 826
817 /* 827 /*
818 * Write it again with current value, in case ipath_sendctrl changed 828 * Disable PIOAVAILUPD, then re-enable, reading scratch in
819 * while we were looping; no critical bits that would require
820 * locking.
821 *
822 * disable PIOAVAILUPD, then re-enable, reading scratch in
823 * between. This seems to avoid a chip timing race that causes 829 * between. This seems to avoid a chip timing race that causes
824 * pioavail updates to memory to stop. 830 * pioavail updates to memory to stop. We xor as we don't
831 * know the state of the bit when we're called.
825 */ 832 */
833 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
826 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 834 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
827 sendorig & ~INFINIPATH_S_PIOBUFAVAILUPD); 835 dd->ipath_sendctrl ^ INFINIPATH_S_PIOBUFAVAILUPD);
828 sendorig = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 836 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
829 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 837 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
830 dd->ipath_sendctrl); 838 dd->ipath_sendctrl);
839 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
831} 840}
832 841
833/** 842/**
@@ -1003,12 +1012,10 @@ static void get_rhf_errstring(u32 err, char *msg, size_t len)
1003 * ipath_get_egrbuf - get an eager buffer 1012 * ipath_get_egrbuf - get an eager buffer
1004 * @dd: the infinipath device 1013 * @dd: the infinipath device
1005 * @bufnum: the eager buffer to get 1014 * @bufnum: the eager buffer to get
1006 * @err: unused
1007 * 1015 *
1008 * must only be called if ipath_pd[port] is known to be allocated 1016 * must only be called if ipath_pd[port] is known to be allocated
1009 */ 1017 */
1010static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum, 1018static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum)
1011 int err)
1012{ 1019{
1013 return dd->ipath_port0_skbinfo ? 1020 return dd->ipath_port0_skbinfo ?
1014 (void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL; 1021 (void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL;
@@ -1100,13 +1107,14 @@ static void ipath_rcv_hdrerr(struct ipath_devdata *dd,
1100 1107
1101/* 1108/*
1102 * ipath_kreceive - receive a packet 1109 * ipath_kreceive - receive a packet
1103 * @dd: the infinipath device 1110 * @pd: the infinipath port
1104 * 1111 *
1105 * called from interrupt handler for errors or receive interrupt 1112 * called from interrupt handler for errors or receive interrupt
1106 */ 1113 */
1107void ipath_kreceive(struct ipath_devdata *dd) 1114void ipath_kreceive(struct ipath_portdata *pd)
1108{ 1115{
1109 u64 *rc; 1116 u64 *rc;
1117 struct ipath_devdata *dd = pd->port_dd;
1110 void *ebuf; 1118 void *ebuf;
1111 const u32 rsize = dd->ipath_rcvhdrentsize; /* words */ 1119 const u32 rsize = dd->ipath_rcvhdrentsize; /* words */
1112 const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */ 1120 const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */
@@ -1121,8 +1129,8 @@ void ipath_kreceive(struct ipath_devdata *dd)
1121 goto bail; 1129 goto bail;
1122 } 1130 }
1123 1131
1124 l = dd->ipath_port0head; 1132 l = pd->port_head;
1125 hdrqtail = (u32) le64_to_cpu(*dd->ipath_hdrqtailptr); 1133 hdrqtail = ipath_get_rcvhdrtail(pd);
1126 if (l == hdrqtail) 1134 if (l == hdrqtail)
1127 goto bail; 1135 goto bail;
1128 1136
@@ -1131,7 +1139,7 @@ reloop:
1131 u32 qp; 1139 u32 qp;
1132 u8 *bthbytes; 1140 u8 *bthbytes;
1133 1141
1134 rc = (u64 *) (dd->ipath_pd[0]->port_rcvhdrq + (l << 2)); 1142 rc = (u64 *) (pd->port_rcvhdrq + (l << 2));
1135 hdr = (struct ipath_message_header *)&rc[1]; 1143 hdr = (struct ipath_message_header *)&rc[1];
1136 /* 1144 /*
1137 * could make a network order version of IPATH_KD_QP, and 1145 * could make a network order version of IPATH_KD_QP, and
@@ -1156,7 +1164,7 @@ reloop:
1156 etail = ipath_hdrget_index((__le32 *) rc); 1164 etail = ipath_hdrget_index((__le32 *) rc);
1157 if (tlen > sizeof(*hdr) || 1165 if (tlen > sizeof(*hdr) ||
1158 etype == RCVHQ_RCV_TYPE_NON_KD) 1166 etype == RCVHQ_RCV_TYPE_NON_KD)
1159 ebuf = ipath_get_egrbuf(dd, etail, 0); 1167 ebuf = ipath_get_egrbuf(dd, etail);
1160 } 1168 }
1161 1169
1162 /* 1170 /*
@@ -1191,7 +1199,7 @@ reloop:
1191 be32_to_cpu(hdr->bth[0]) & 0xff); 1199 be32_to_cpu(hdr->bth[0]) & 0xff);
1192 else { 1200 else {
1193 /* 1201 /*
1194 * error packet, type of error unknown. 1202 * error packet, type of error unknown.
1195 * Probably type 3, but we don't know, so don't 1203 * Probably type 3, but we don't know, so don't
1196 * even try to print the opcode, etc. 1204 * even try to print the opcode, etc.
1197 */ 1205 */
@@ -1241,7 +1249,7 @@ reloop:
1241 * earlier packets, we "almost" guarantee we have covered 1249 * earlier packets, we "almost" guarantee we have covered
1242 * that case. 1250 * that case.
1243 */ 1251 */
1244 u32 hqtail = (u32)le64_to_cpu(*dd->ipath_hdrqtailptr); 1252 u32 hqtail = ipath_get_rcvhdrtail(pd);
1245 if (hqtail != hdrqtail) { 1253 if (hqtail != hdrqtail) {
1246 hdrqtail = hqtail; 1254 hdrqtail = hqtail;
1247 reloop = 1; /* loop 1 extra time at most */ 1255 reloop = 1; /* loop 1 extra time at most */
@@ -1251,7 +1259,7 @@ reloop:
1251 1259
1252 pkttot += i; 1260 pkttot += i;
1253 1261
1254 dd->ipath_port0head = l; 1262 pd->port_head = l;
1255 1263
1256 if (pkttot > ipath_stats.sps_maxpkts_call) 1264 if (pkttot > ipath_stats.sps_maxpkts_call)
1257 ipath_stats.sps_maxpkts_call = pkttot; 1265 ipath_stats.sps_maxpkts_call = pkttot;
@@ -1335,14 +1343,9 @@ static void ipath_update_pio_bufs(struct ipath_devdata *dd)
1335 /* 1343 /*
1336 * Chip Errata: bug 6641; even and odd qwords>3 are swapped 1344 * Chip Errata: bug 6641; even and odd qwords>3 are swapped
1337 */ 1345 */
1338 if (i > 3) { 1346 if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS))
1339 if (i & 1) 1347 piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i ^ 1]);
1340 piov = le64_to_cpu( 1348 else
1341 dd->ipath_pioavailregs_dma[i - 1]);
1342 else
1343 piov = le64_to_cpu(
1344 dd->ipath_pioavailregs_dma[i + 1]);
1345 } else
1346 piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]); 1349 piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]);
1347 pchg = _IPATH_ALL_CHECKBITS & 1350 pchg = _IPATH_ALL_CHECKBITS &
1348 ~(dd->ipath_pioavailshadow[i] ^ piov); 1351 ~(dd->ipath_pioavailshadow[i] ^ piov);
@@ -1601,7 +1604,8 @@ int ipath_create_rcvhdrq(struct ipath_devdata *dd,
1601 1604
1602 /* clear for security and sanity on each use */ 1605 /* clear for security and sanity on each use */
1603 memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size); 1606 memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size);
1604 memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE); 1607 if (pd->port_rcvhdrtail_kvaddr)
1608 memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);
1605 1609
1606 /* 1610 /*
1607 * tell chip each time we init it, even if we are re-using previous 1611 * tell chip each time we init it, even if we are re-using previous
@@ -1617,77 +1621,6 @@ bail:
1617 return ret; 1621 return ret;
1618} 1622}
1619 1623
1620int ipath_waitfor_complete(struct ipath_devdata *dd, ipath_kreg reg_id,
1621 u64 bits_to_wait_for, u64 * valp)
1622{
1623 unsigned long timeout;
1624 u64 lastval, val;
1625 int ret;
1626
1627 lastval = ipath_read_kreg64(dd, reg_id);
1628 /* wait a ridiculously long time */
1629 timeout = jiffies + msecs_to_jiffies(5);
1630 do {
1631 val = ipath_read_kreg64(dd, reg_id);
1632 /* set so they have something, even on failures. */
1633 *valp = val;
1634 if ((val & bits_to_wait_for) == bits_to_wait_for) {
1635 ret = 0;
1636 break;
1637 }
1638 if (val != lastval)
1639 ipath_cdbg(VERBOSE, "Changed from %llx to %llx, "
1640 "waiting for %llx bits\n",
1641 (unsigned long long) lastval,
1642 (unsigned long long) val,
1643 (unsigned long long) bits_to_wait_for);
1644 cond_resched();
1645 if (time_after(jiffies, timeout)) {
1646 ipath_dbg("Didn't get bits %llx in register 0x%x, "
1647 "got %llx\n",
1648 (unsigned long long) bits_to_wait_for,
1649 reg_id, (unsigned long long) *valp);
1650 ret = -ENODEV;
1651 break;
1652 }
1653 } while (1);
1654
1655 return ret;
1656}
1657
1658/**
1659 * ipath_waitfor_mdio_cmdready - wait for last command to complete
1660 * @dd: the infinipath device
1661 *
1662 * Like ipath_waitfor_complete(), but we wait for the CMDVALID bit to go
1663 * away indicating the last command has completed. It doesn't return data
1664 */
1665int ipath_waitfor_mdio_cmdready(struct ipath_devdata *dd)
1666{
1667 unsigned long timeout;
1668 u64 val;
1669 int ret;
1670
1671 /* wait a ridiculously long time */
1672 timeout = jiffies + msecs_to_jiffies(5);
1673 do {
1674 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_mdio);
1675 if (!(val & IPATH_MDIO_CMDVALID)) {
1676 ret = 0;
1677 break;
1678 }
1679 cond_resched();
1680 if (time_after(jiffies, timeout)) {
1681 ipath_dbg("CMDVALID stuck in mdio reg? (%llx)\n",
1682 (unsigned long long) val);
1683 ret = -ENODEV;
1684 break;
1685 }
1686 } while (1);
1687
1688 return ret;
1689}
1690
1691 1624
1692/* 1625/*
1693 * Flush all sends that might be in the ready to send state, as well as any 1626 * Flush all sends that might be in the ready to send state, as well as any
@@ -2056,6 +1989,8 @@ void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val)
2056 */ 1989 */
2057void ipath_shutdown_device(struct ipath_devdata *dd) 1990void ipath_shutdown_device(struct ipath_devdata *dd)
2058{ 1991{
1992 unsigned long flags;
1993
2059 ipath_dbg("Shutting down the device\n"); 1994 ipath_dbg("Shutting down the device\n");
2060 1995
2061 dd->ipath_flags |= IPATH_LINKUNK; 1996 dd->ipath_flags |= IPATH_LINKUNK;
@@ -2076,9 +2011,13 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
2076 * gracefully stop all sends allowing any in progress to trickle out 2011 * gracefully stop all sends allowing any in progress to trickle out
2077 * first. 2012 * first.
2078 */ 2013 */
2079 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 0ULL); 2014 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
2015 dd->ipath_sendctrl = 0;
2016 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
2080 /* flush it */ 2017 /* flush it */
2081 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 2018 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
2019 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
2020
2082 /* 2021 /*
2083 * enough for anything that's going to trickle out to have actually 2022 * enough for anything that's going to trickle out to have actually
2084 * done so. 2023 * done so.
@@ -2335,5 +2274,34 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
2335 } 2274 }
2336 return 0; 2275 return 0;
2337} 2276}
2277
2278/*
2279 * Disable and enable the armlaunch error. Used for PIO bandwidth testing on
2280 * the 7220, which is count-based, rather than trigger-based. Safe for the
2281 * driver check, since it's at init. Not completely safe when used for
2282 * user-mode checking, since some error checking can be lost, but not
2283 * particularly risky, and only has problematic side-effects in the face of
2284 * very buggy user code. There is no reference counting, but that's also
2285 * fine, given the intended use.
2286 */
2287void ipath_enable_armlaunch(struct ipath_devdata *dd)
2288{
2289 dd->ipath_lasterror &= ~INFINIPATH_E_SPIOARMLAUNCH;
2290 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
2291 INFINIPATH_E_SPIOARMLAUNCH);
2292 dd->ipath_errormask |= INFINIPATH_E_SPIOARMLAUNCH;
2293 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
2294 dd->ipath_errormask);
2295}
2296
2297void ipath_disable_armlaunch(struct ipath_devdata *dd)
2298{
2299 /* so don't re-enable if already set */
2300 dd->ipath_maskederrs &= ~INFINIPATH_E_SPIOARMLAUNCH;
2301 dd->ipath_errormask &= ~INFINIPATH_E_SPIOARMLAUNCH;
2302 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
2303 dd->ipath_errormask);
2304}
2305
2338module_init(infinipath_init); 2306module_init(infinipath_init);
2339module_exit(infinipath_cleanup); 2307module_exit(infinipath_cleanup);
diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c
index e7c25dbbcdc9..e28a42f53769 100644
--- a/drivers/infiniband/hw/ipath/ipath_eeprom.c
+++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c
@@ -510,10 +510,10 @@ int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
510{ 510{
511 int ret; 511 int ret;
512 512
513 ret = down_interruptible(&dd->ipath_eep_sem); 513 ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
514 if (!ret) { 514 if (!ret) {
515 ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len); 515 ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len);
516 up(&dd->ipath_eep_sem); 516 mutex_unlock(&dd->ipath_eep_lock);
517 } 517 }
518 518
519 return ret; 519 return ret;
@@ -524,10 +524,10 @@ int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
524{ 524{
525 int ret; 525 int ret;
526 526
527 ret = down_interruptible(&dd->ipath_eep_sem); 527 ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
528 if (!ret) { 528 if (!ret) {
529 ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len); 529 ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len);
530 up(&dd->ipath_eep_sem); 530 mutex_unlock(&dd->ipath_eep_lock);
531 } 531 }
532 532
533 return ret; 533 return ret;
@@ -574,7 +574,7 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
574 struct ipath_devdata *dd0 = ipath_lookup(0); 574 struct ipath_devdata *dd0 = ipath_lookup(0);
575 575
576 if (t && dd0->ipath_nguid > 1 && t <= dd0->ipath_nguid) { 576 if (t && dd0->ipath_nguid > 1 && t <= dd0->ipath_nguid) {
577 u8 *bguid, oguid; 577 u8 oguid;
578 dd->ipath_guid = dd0->ipath_guid; 578 dd->ipath_guid = dd0->ipath_guid;
579 bguid = (u8 *) & dd->ipath_guid; 579 bguid = (u8 *) & dd->ipath_guid;
580 580
@@ -616,9 +616,9 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
616 goto bail; 616 goto bail;
617 } 617 }
618 618
619 down(&dd->ipath_eep_sem); 619 mutex_lock(&dd->ipath_eep_lock);
620 eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len); 620 eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len);
621 up(&dd->ipath_eep_sem); 621 mutex_unlock(&dd->ipath_eep_lock);
622 622
623 if (eep_stat) { 623 if (eep_stat) {
624 ipath_dev_err(dd, "Failed reading GUID from eeprom\n"); 624 ipath_dev_err(dd, "Failed reading GUID from eeprom\n");
@@ -674,7 +674,6 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
674 * elsewhere for backward-compatibility. 674 * elsewhere for backward-compatibility.
675 */ 675 */
676 char *snp = dd->ipath_serial; 676 char *snp = dd->ipath_serial;
677 int len;
678 memcpy(snp, ifp->if_sprefix, sizeof ifp->if_sprefix); 677 memcpy(snp, ifp->if_sprefix, sizeof ifp->if_sprefix);
679 snp[sizeof ifp->if_sprefix] = '\0'; 678 snp[sizeof ifp->if_sprefix] = '\0';
680 len = strlen(snp); 679 len = strlen(snp);
@@ -764,14 +763,14 @@ int ipath_update_eeprom_log(struct ipath_devdata *dd)
764 /* Grab semaphore and read current EEPROM. If we get an 763 /* Grab semaphore and read current EEPROM. If we get an
765 * error, let go, but if not, keep it until we finish write. 764 * error, let go, but if not, keep it until we finish write.
766 */ 765 */
767 ret = down_interruptible(&dd->ipath_eep_sem); 766 ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
768 if (ret) { 767 if (ret) {
769 ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n"); 768 ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n");
770 goto free_bail; 769 goto free_bail;
771 } 770 }
772 ret = ipath_eeprom_internal_read(dd, 0, buf, len); 771 ret = ipath_eeprom_internal_read(dd, 0, buf, len);
773 if (ret) { 772 if (ret) {
774 up(&dd->ipath_eep_sem); 773 mutex_unlock(&dd->ipath_eep_lock);
775 ipath_dev_err(dd, "Unable read EEPROM for logging\n"); 774 ipath_dev_err(dd, "Unable read EEPROM for logging\n");
776 goto free_bail; 775 goto free_bail;
777 } 776 }
@@ -779,7 +778,7 @@ int ipath_update_eeprom_log(struct ipath_devdata *dd)
779 778
780 csum = flash_csum(ifp, 0); 779 csum = flash_csum(ifp, 0);
781 if (csum != ifp->if_csum) { 780 if (csum != ifp->if_csum) {
782 up(&dd->ipath_eep_sem); 781 mutex_unlock(&dd->ipath_eep_lock);
783 ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n", 782 ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n",
784 csum, ifp->if_csum); 783 csum, ifp->if_csum);
785 ret = 1; 784 ret = 1;
@@ -849,7 +848,7 @@ int ipath_update_eeprom_log(struct ipath_devdata *dd)
849 csum = flash_csum(ifp, 1); 848 csum = flash_csum(ifp, 1);
850 ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1); 849 ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1);
851 } 850 }
852 up(&dd->ipath_eep_sem); 851 mutex_unlock(&dd->ipath_eep_lock);
853 if (ret) 852 if (ret)
854 ipath_dev_err(dd, "Failed updating EEPROM\n"); 853 ipath_dev_err(dd, "Failed updating EEPROM\n");
855 854
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 5de3243a47c3..7e025c8e01b6 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -169,7 +169,7 @@ static int ipath_get_base_info(struct file *fp,
169 kinfo->spi_piocnt = dd->ipath_pbufsport; 169 kinfo->spi_piocnt = dd->ipath_pbufsport;
170 kinfo->spi_piobufbase = (u64) pd->port_piobufs; 170 kinfo->spi_piobufbase = (u64) pd->port_piobufs;
171 kinfo->__spi_uregbase = (u64) dd->ipath_uregbase + 171 kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
172 dd->ipath_palign * pd->port_port; 172 dd->ipath_ureg_align * pd->port_port;
173 } else if (master) { 173 } else if (master) {
174 kinfo->spi_piocnt = (dd->ipath_pbufsport / subport_cnt) + 174 kinfo->spi_piocnt = (dd->ipath_pbufsport / subport_cnt) +
175 (dd->ipath_pbufsport % subport_cnt); 175 (dd->ipath_pbufsport % subport_cnt);
@@ -186,7 +186,7 @@ static int ipath_get_base_info(struct file *fp,
186 } 186 }
187 if (shared) { 187 if (shared) {
188 kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase + 188 kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase +
189 dd->ipath_palign * pd->port_port; 189 dd->ipath_ureg_align * pd->port_port;
190 kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs; 190 kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs;
191 kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base; 191 kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base;
192 kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr; 192 kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr;
@@ -742,11 +742,12 @@ static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport,
742 * updated and correct itself, even in the face of software 742 * updated and correct itself, even in the face of software
743 * bugs. 743 * bugs.
744 */ 744 */
745 *(volatile u64 *)pd->port_rcvhdrtail_kvaddr = 0; 745 if (pd->port_rcvhdrtail_kvaddr)
746 set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port, 746 ipath_clear_rcvhdrtail(pd);
747 set_bit(dd->ipath_r_portenable_shift + pd->port_port,
747 &dd->ipath_rcvctrl); 748 &dd->ipath_rcvctrl);
748 } else 749 } else
749 clear_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port, 750 clear_bit(dd->ipath_r_portenable_shift + pd->port_port,
750 &dd->ipath_rcvctrl); 751 &dd->ipath_rcvctrl);
751 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 752 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
752 dd->ipath_rcvctrl); 753 dd->ipath_rcvctrl);
@@ -881,7 +882,7 @@ static int ipath_create_user_egr(struct ipath_portdata *pd)
881 882
882 egrcnt = dd->ipath_rcvegrcnt; 883 egrcnt = dd->ipath_rcvegrcnt;
883 /* TID number offset for this port */ 884 /* TID number offset for this port */
884 egroff = pd->port_port * egrcnt; 885 egroff = (pd->port_port - 1) * egrcnt + dd->ipath_p0_rcvegrcnt;
885 egrsize = dd->ipath_rcvegrbufsize; 886 egrsize = dd->ipath_rcvegrbufsize;
886 ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid " 887 ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid "
887 "offset %x, egrsize %u\n", egrcnt, egroff, egrsize); 888 "offset %x, egrsize %u\n", egrcnt, egroff, egrsize);
@@ -1049,11 +1050,6 @@ static int mmap_piobufs(struct vm_area_struct *vma,
1049 1050
1050 phys = dd->ipath_physaddr + piobufs; 1051 phys = dd->ipath_physaddr + piobufs;
1051 1052
1052 /*
1053 * Don't mark this as non-cached, or we don't get the
1054 * write combining behavior we want on the PIO buffers!
1055 */
1056
1057#if defined(__powerpc__) 1053#if defined(__powerpc__)
1058 /* There isn't a generic way to specify writethrough mappings */ 1054 /* There isn't a generic way to specify writethrough mappings */
1059 pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE; 1055 pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
@@ -1120,33 +1116,24 @@ bail:
1120} 1116}
1121 1117
1122/* 1118/*
1123 * ipath_file_vma_nopage - handle a VMA page fault. 1119 * ipath_file_vma_fault - handle a VMA page fault.
1124 */ 1120 */
1125static struct page *ipath_file_vma_nopage(struct vm_area_struct *vma, 1121static int ipath_file_vma_fault(struct vm_area_struct *vma,
1126 unsigned long address, int *type) 1122 struct vm_fault *vmf)
1127{ 1123{
1128 unsigned long offset = address - vma->vm_start; 1124 struct page *page;
1129 struct page *page = NOPAGE_SIGBUS;
1130 void *pageptr;
1131 1125
1132 /* 1126 page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT));
1133 * Convert the vmalloc address into a struct page.
1134 */
1135 pageptr = (void *)(offset + (vma->vm_pgoff << PAGE_SHIFT));
1136 page = vmalloc_to_page(pageptr);
1137 if (!page) 1127 if (!page)
1138 goto out; 1128 return VM_FAULT_SIGBUS;
1139
1140 /* Increment the reference count. */
1141 get_page(page); 1129 get_page(page);
1142 if (type) 1130 vmf->page = page;
1143 *type = VM_FAULT_MINOR; 1131
1144out: 1132 return 0;
1145 return page;
1146} 1133}
1147 1134
1148static struct vm_operations_struct ipath_file_vm_ops = { 1135static struct vm_operations_struct ipath_file_vm_ops = {
1149 .nopage = ipath_file_vma_nopage, 1136 .fault = ipath_file_vma_fault,
1150}; 1137};
1151 1138
1152static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, 1139static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
@@ -1284,7 +1271,7 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
1284 goto bail; 1271 goto bail;
1285 } 1272 }
1286 1273
1287 ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port; 1274 ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port;
1288 if (!pd->port_subport_cnt) { 1275 if (!pd->port_subport_cnt) {
1289 /* port is not shared */ 1276 /* port is not shared */
1290 piocnt = dd->ipath_pbufsport; 1277 piocnt = dd->ipath_pbufsport;
@@ -1400,7 +1387,10 @@ static unsigned int ipath_poll_next(struct ipath_portdata *pd,
1400 pollflag = ipath_poll_hdrqfull(pd); 1387 pollflag = ipath_poll_hdrqfull(pd);
1401 1388
1402 head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port); 1389 head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port);
1403 tail = *(volatile u64 *)pd->port_rcvhdrtail_kvaddr; 1390 if (pd->port_rcvhdrtail_kvaddr)
1391 tail = ipath_get_rcvhdrtail(pd);
1392 else
1393 tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
1404 1394
1405 if (head != tail) 1395 if (head != tail)
1406 pollflag |= POLLIN | POLLRDNORM; 1396 pollflag |= POLLIN | POLLRDNORM;
@@ -1410,7 +1400,7 @@ static unsigned int ipath_poll_next(struct ipath_portdata *pd,
1410 /* flush waiting flag so we don't miss an event */ 1400 /* flush waiting flag so we don't miss an event */
1411 wmb(); 1401 wmb();
1412 1402
1413 set_bit(pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT, 1403 set_bit(pd->port_port + dd->ipath_r_intravail_shift,
1414 &dd->ipath_rcvctrl); 1404 &dd->ipath_rcvctrl);
1415 1405
1416 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 1406 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
@@ -1790,6 +1780,7 @@ static int find_shared_port(struct file *fp,
1790 } 1780 }
1791 port_fp(fp) = pd; 1781 port_fp(fp) = pd;
1792 subport_fp(fp) = pd->port_cnt++; 1782 subport_fp(fp) = pd->port_cnt++;
1783 pd->port_subpid[subport_fp(fp)] = current->pid;
1793 tidcursor_fp(fp) = 0; 1784 tidcursor_fp(fp) = 0;
1794 pd->active_slaves |= 1 << subport_fp(fp); 1785 pd->active_slaves |= 1 << subport_fp(fp);
1795 ipath_cdbg(PROC, 1786 ipath_cdbg(PROC,
@@ -1920,8 +1911,7 @@ static int ipath_do_user_init(struct file *fp,
1920 */ 1911 */
1921 head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port); 1912 head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);
1922 ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port); 1913 ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);
1923 dd->ipath_lastegrheads[pd->port_port] = -1; 1914 pd->port_lastrcvhdrqtail = -1;
1924 dd->ipath_lastrcvhdrqtails[pd->port_port] = -1;
1925 ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n", 1915 ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
1926 pd->port_port, head32); 1916 pd->port_port, head32);
1927 pd->port_tidcursor = 0; /* start at beginning after open */ 1917 pd->port_tidcursor = 0; /* start at beginning after open */
@@ -1941,11 +1931,13 @@ static int ipath_do_user_init(struct file *fp,
1941 * We explictly set the in-memory copy to 0 beforehand, so we don't 1931 * We explictly set the in-memory copy to 0 beforehand, so we don't
1942 * have to wait to be sure the DMA update has happened. 1932 * have to wait to be sure the DMA update has happened.
1943 */ 1933 */
1944 *(volatile u64 *)pd->port_rcvhdrtail_kvaddr = 0ULL; 1934 if (pd->port_rcvhdrtail_kvaddr)
1945 set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port, 1935 ipath_clear_rcvhdrtail(pd);
1936 set_bit(dd->ipath_r_portenable_shift + pd->port_port,
1946 &dd->ipath_rcvctrl); 1937 &dd->ipath_rcvctrl);
1947 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 1938 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
1948 dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD); 1939 dd->ipath_rcvctrl &
1940 ~(1ULL << dd->ipath_r_tailupd_shift));
1949 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 1941 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
1950 dd->ipath_rcvctrl); 1942 dd->ipath_rcvctrl);
1951 /* Notify any waiting slaves */ 1943 /* Notify any waiting slaves */
@@ -2022,6 +2014,7 @@ static int ipath_close(struct inode *in, struct file *fp)
2022 * the slave(s) don't wait for receive data forever. 2014 * the slave(s) don't wait for receive data forever.
2023 */ 2015 */
2024 pd->active_slaves &= ~(1 << fd->subport); 2016 pd->active_slaves &= ~(1 << fd->subport);
2017 pd->port_subpid[fd->subport] = 0;
2025 mutex_unlock(&ipath_mutex); 2018 mutex_unlock(&ipath_mutex);
2026 goto bail; 2019 goto bail;
2027 } 2020 }
@@ -2054,9 +2047,9 @@ static int ipath_close(struct inode *in, struct file *fp)
2054 if (dd->ipath_kregbase) { 2047 if (dd->ipath_kregbase) {
2055 int i; 2048 int i;
2056 /* atomically clear receive enable port and intr avail. */ 2049 /* atomically clear receive enable port and intr avail. */
2057 clear_bit(INFINIPATH_R_PORTENABLE_SHIFT + port, 2050 clear_bit(dd->ipath_r_portenable_shift + port,
2058 &dd->ipath_rcvctrl); 2051 &dd->ipath_rcvctrl);
2059 clear_bit(pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT, 2052 clear_bit(pd->port_port + dd->ipath_r_intravail_shift,
2060 &dd->ipath_rcvctrl); 2053 &dd->ipath_rcvctrl);
2061 ipath_write_kreg( dd, dd->ipath_kregs->kr_rcvctrl, 2054 ipath_write_kreg( dd, dd->ipath_kregs->kr_rcvctrl,
2062 dd->ipath_rcvctrl); 2055 dd->ipath_rcvctrl);
@@ -2149,11 +2142,15 @@ static int ipath_get_slave_info(struct ipath_portdata *pd,
2149 2142
2150static int ipath_force_pio_avail_update(struct ipath_devdata *dd) 2143static int ipath_force_pio_avail_update(struct ipath_devdata *dd)
2151{ 2144{
2152 u64 reg = dd->ipath_sendctrl; 2145 unsigned long flags;
2153 2146
2154 clear_bit(IPATH_S_PIOBUFAVAILUPD, &reg); 2147 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
2155 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, reg); 2148 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
2149 dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD);
2150 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
2156 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); 2151 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
2152 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
2153 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
2157 2154
2158 return 0; 2155 return 0;
2159} 2156}
@@ -2227,6 +2224,11 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
2227 dest = &cmd.cmd.poll_type; 2224 dest = &cmd.cmd.poll_type;
2228 src = &ucmd->cmd.poll_type; 2225 src = &ucmd->cmd.poll_type;
2229 break; 2226 break;
2227 case IPATH_CMD_ARMLAUNCH_CTRL:
2228 copy = sizeof(cmd.cmd.armlaunch_ctrl);
2229 dest = &cmd.cmd.armlaunch_ctrl;
2230 src = &ucmd->cmd.armlaunch_ctrl;
2231 break;
2230 default: 2232 default:
2231 ret = -EINVAL; 2233 ret = -EINVAL;
2232 goto bail; 2234 goto bail;
@@ -2302,6 +2304,12 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
2302 case IPATH_CMD_POLL_TYPE: 2304 case IPATH_CMD_POLL_TYPE:
2303 pd->poll_type = cmd.cmd.poll_type; 2305 pd->poll_type = cmd.cmd.poll_type;
2304 break; 2306 break;
2307 case IPATH_CMD_ARMLAUNCH_CTRL:
2308 if (cmd.cmd.armlaunch_ctrl)
2309 ipath_enable_armlaunch(pd->port_dd);
2310 else
2311 ipath_disable_armlaunch(pd->port_dd);
2312 break;
2305 } 2313 }
2306 2314
2307 if (ret >= 0) 2315 if (ret >= 0)
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index 262c25db05cd..23faba9d21eb 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -108,21 +108,16 @@ static const struct file_operations atomic_stats_ops = {
108 .read = atomic_stats_read, 108 .read = atomic_stats_read,
109}; 109};
110 110
111#define NUM_COUNTERS sizeof(struct infinipath_counters) / sizeof(u64)
112
113static ssize_t atomic_counters_read(struct file *file, char __user *buf, 111static ssize_t atomic_counters_read(struct file *file, char __user *buf,
114 size_t count, loff_t *ppos) 112 size_t count, loff_t *ppos)
115{ 113{
116 u64 counters[NUM_COUNTERS]; 114 struct infinipath_counters counters;
117 u16 i;
118 struct ipath_devdata *dd; 115 struct ipath_devdata *dd;
119 116
120 dd = file->f_path.dentry->d_inode->i_private; 117 dd = file->f_path.dentry->d_inode->i_private;
118 dd->ipath_f_read_counters(dd, &counters);
121 119
122 for (i = 0; i < NUM_COUNTERS; i++) 120 return simple_read_from_buffer(buf, count, ppos, &counters,
123 counters[i] = ipath_snap_cntr(dd, i);
124
125 return simple_read_from_buffer(buf, count, ppos, counters,
126 sizeof counters); 121 sizeof counters);
127} 122}
128 123
@@ -243,8 +238,7 @@ static int create_device_files(struct super_block *sb,
243 238
244 snprintf(unit, sizeof unit, "%02d", dd->ipath_unit); 239 snprintf(unit, sizeof unit, "%02d", dd->ipath_unit);
245 ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir, 240 ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir,
246 (struct file_operations *) &simple_dir_operations, 241 &simple_dir_operations, dd);
247 dd);
248 if (ret) { 242 if (ret) {
249 printk(KERN_ERR "create_file(%s) failed: %d\n", unit, ret); 243 printk(KERN_ERR "create_file(%s) failed: %d\n", unit, ret);
250 goto bail; 244 goto bail;
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index ddbebe4bdb27..9e2ced3cdc5e 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -148,10 +148,57 @@ struct _infinipath_do_not_use_kernel_regs {
148 unsigned long long ReservedSW2[4]; 148 unsigned long long ReservedSW2[4];
149}; 149};
150 150
151#define IPATH_KREG_OFFSET(field) (offsetof(struct \ 151struct _infinipath_do_not_use_counters {
152 _infinipath_do_not_use_kernel_regs, field) / sizeof(u64)) 152 __u64 LBIntCnt;
153 __u64 LBFlowStallCnt;
154 __u64 Reserved1;
155 __u64 TxUnsupVLErrCnt;
156 __u64 TxDataPktCnt;
157 __u64 TxFlowPktCnt;
158 __u64 TxDwordCnt;
159 __u64 TxLenErrCnt;
160 __u64 TxMaxMinLenErrCnt;
161 __u64 TxUnderrunCnt;
162 __u64 TxFlowStallCnt;
163 __u64 TxDroppedPktCnt;
164 __u64 RxDroppedPktCnt;
165 __u64 RxDataPktCnt;
166 __u64 RxFlowPktCnt;
167 __u64 RxDwordCnt;
168 __u64 RxLenErrCnt;
169 __u64 RxMaxMinLenErrCnt;
170 __u64 RxICRCErrCnt;
171 __u64 RxVCRCErrCnt;
172 __u64 RxFlowCtrlErrCnt;
173 __u64 RxBadFormatCnt;
174 __u64 RxLinkProblemCnt;
175 __u64 RxEBPCnt;
176 __u64 RxLPCRCErrCnt;
177 __u64 RxBufOvflCnt;
178 __u64 RxTIDFullErrCnt;
179 __u64 RxTIDValidErrCnt;
180 __u64 RxPKeyMismatchCnt;
181 __u64 RxP0HdrEgrOvflCnt;
182 __u64 RxP1HdrEgrOvflCnt;
183 __u64 RxP2HdrEgrOvflCnt;
184 __u64 RxP3HdrEgrOvflCnt;
185 __u64 RxP4HdrEgrOvflCnt;
186 __u64 RxP5HdrEgrOvflCnt;
187 __u64 RxP6HdrEgrOvflCnt;
188 __u64 RxP7HdrEgrOvflCnt;
189 __u64 RxP8HdrEgrOvflCnt;
190 __u64 Reserved6;
191 __u64 Reserved7;
192 __u64 IBStatusChangeCnt;
193 __u64 IBLinkErrRecoveryCnt;
194 __u64 IBLinkDownedCnt;
195 __u64 IBSymbolErrCnt;
196};
197
198#define IPATH_KREG_OFFSET(field) (offsetof( \
199 struct _infinipath_do_not_use_kernel_regs, field) / sizeof(u64))
153#define IPATH_CREG_OFFSET(field) (offsetof( \ 200#define IPATH_CREG_OFFSET(field) (offsetof( \
154 struct infinipath_counters, field) / sizeof(u64)) 201 struct _infinipath_do_not_use_counters, field) / sizeof(u64))
155 202
156static const struct ipath_kregs ipath_ht_kregs = { 203static const struct ipath_kregs ipath_ht_kregs = {
157 .kr_control = IPATH_KREG_OFFSET(Control), 204 .kr_control = IPATH_KREG_OFFSET(Control),
@@ -282,6 +329,9 @@ static const struct ipath_cregs ipath_ht_cregs = {
282#define INFINIPATH_HWE_HTAPLL_RFSLIP 0x1000000000000000ULL 329#define INFINIPATH_HWE_HTAPLL_RFSLIP 0x1000000000000000ULL
283#define INFINIPATH_HWE_SERDESPLLFAILED 0x2000000000000000ULL 330#define INFINIPATH_HWE_SERDESPLLFAILED 0x2000000000000000ULL
284 331
332#define IBA6110_IBCS_LINKTRAININGSTATE_MASK 0xf
333#define IBA6110_IBCS_LINKSTATE_SHIFT 4
334
285/* kr_extstatus bits */ 335/* kr_extstatus bits */
286#define INFINIPATH_EXTS_FREQSEL 0x2 336#define INFINIPATH_EXTS_FREQSEL 0x2
287#define INFINIPATH_EXTS_SERDESSEL 0x4 337#define INFINIPATH_EXTS_SERDESSEL 0x4
@@ -296,6 +346,12 @@ static const struct ipath_cregs ipath_ht_cregs = {
296#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFFULL 346#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFFULL
297#define INFINIPATH_RT_BUFSIZE_SHIFT 48 347#define INFINIPATH_RT_BUFSIZE_SHIFT 48
298 348
349#define INFINIPATH_R_INTRAVAIL_SHIFT 16
350#define INFINIPATH_R_TAILUPD_SHIFT 31
351
352/* kr_xgxsconfig bits */
353#define INFINIPATH_XGXS_RESET 0x7ULL
354
299/* 355/*
300 * masks and bits that are different in different chips, or present only 356 * masks and bits that are different in different chips, or present only
301 * in one 357 * in one
@@ -652,7 +708,6 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
652 "with ID %u\n", boardrev); 708 "with ID %u\n", boardrev);
653 snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u", 709 snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u",
654 boardrev); 710 boardrev);
655 ret = 1;
656 break; 711 break;
657 } 712 }
658 if (n) 713 if (n)
@@ -686,6 +741,13 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
686 dd->ipath_htspeed); 741 dd->ipath_htspeed);
687 ret = 0; 742 ret = 0;
688 743
744 /*
745 * set here, not in ipath_init_*_funcs because we have to do
746 * it after we can read chip registers.
747 */
748 dd->ipath_ureg_align =
749 ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign);
750
689bail: 751bail:
690 return ret; 752 return ret;
691} 753}
@@ -969,7 +1031,8 @@ static int ipath_setup_ht_config(struct ipath_devdata *dd,
969 do { 1031 do {
970 u8 cap_type; 1032 u8 cap_type;
971 1033
972 /* the HT capability type byte is 3 bytes after the 1034 /*
1035 * The HT capability type byte is 3 bytes after the
973 * capability byte. 1036 * capability byte.
974 */ 1037 */
975 if (pci_read_config_byte(pdev, pos + 3, &cap_type)) { 1038 if (pci_read_config_byte(pdev, pos + 3, &cap_type)) {
@@ -982,6 +1045,8 @@ static int ipath_setup_ht_config(struct ipath_devdata *dd,
982 } while ((pos = pci_find_next_capability(pdev, pos, 1045 } while ((pos = pci_find_next_capability(pdev, pos,
983 PCI_CAP_ID_HT))); 1046 PCI_CAP_ID_HT)));
984 1047
1048 dd->ipath_flags |= IPATH_SWAP_PIOBUFS;
1049
985bail: 1050bail:
986 return ret; 1051 return ret;
987} 1052}
@@ -1074,11 +1139,55 @@ static void ipath_setup_ht_setextled(struct ipath_devdata *dd,
1074 1139
1075static void ipath_init_ht_variables(struct ipath_devdata *dd) 1140static void ipath_init_ht_variables(struct ipath_devdata *dd)
1076{ 1141{
1142 /*
1143 * setup the register offsets, since they are different for each
1144 * chip
1145 */
1146 dd->ipath_kregs = &ipath_ht_kregs;
1147 dd->ipath_cregs = &ipath_ht_cregs;
1148
1077 dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM; 1149 dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
1078 dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM; 1150 dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
1079 dd->ipath_gpio_sda = IPATH_GPIO_SDA; 1151 dd->ipath_gpio_sda = IPATH_GPIO_SDA;
1080 dd->ipath_gpio_scl = IPATH_GPIO_SCL; 1152 dd->ipath_gpio_scl = IPATH_GPIO_SCL;
1081 1153
1154 /*
1155 * Fill in data for field-values that change in newer chips.
1156 * We dynamically specify only the mask for LINKTRAININGSTATE
1157 * and only the shift for LINKSTATE, as they are the only ones
1158 * that change. Also precalculate the 3 link states of interest
1159 * and the combined mask.
1160 */
1161 dd->ibcs_ls_shift = IBA6110_IBCS_LINKSTATE_SHIFT;
1162 dd->ibcs_lts_mask = IBA6110_IBCS_LINKTRAININGSTATE_MASK;
1163 dd->ibcs_mask = (INFINIPATH_IBCS_LINKSTATE_MASK <<
1164 dd->ibcs_ls_shift) | dd->ibcs_lts_mask;
1165 dd->ib_init = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
1166 INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
1167 (INFINIPATH_IBCS_L_STATE_INIT << dd->ibcs_ls_shift);
1168 dd->ib_arm = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
1169 INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
1170 (INFINIPATH_IBCS_L_STATE_ARM << dd->ibcs_ls_shift);
1171 dd->ib_active = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
1172 INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
1173 (INFINIPATH_IBCS_L_STATE_ACTIVE << dd->ibcs_ls_shift);
1174
1175 /*
1176 * Fill in data for ibcc field-values that change in newer chips.
1177 * We dynamically specify only the mask for LINKINITCMD
1178 * and only the shift for LINKCMD and MAXPKTLEN, as they are
1179 * the only ones that change.
1180 */
1181 dd->ibcc_lic_mask = INFINIPATH_IBCC_LINKINITCMD_MASK;
1182 dd->ibcc_lc_shift = INFINIPATH_IBCC_LINKCMD_SHIFT;
1183 dd->ibcc_mpl_shift = INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
1184
1185 /* Fill in shifts for RcvCtrl. */
1186 dd->ipath_r_portenable_shift = INFINIPATH_R_PORTENABLE_SHIFT;
1187 dd->ipath_r_intravail_shift = INFINIPATH_R_INTRAVAIL_SHIFT;
1188 dd->ipath_r_tailupd_shift = INFINIPATH_R_TAILUPD_SHIFT;
1189 dd->ipath_r_portcfg_shift = 0; /* Not on IBA6110 */
1190
1082 dd->ipath_i_bitsextant = 1191 dd->ipath_i_bitsextant =
1083 (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) | 1192 (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
1084 (INFINIPATH_I_RCVAVAIL_MASK << 1193 (INFINIPATH_I_RCVAVAIL_MASK <<
@@ -1135,6 +1244,8 @@ static void ipath_init_ht_variables(struct ipath_devdata *dd)
1135 1244
1136 dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; 1245 dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
1137 dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; 1246 dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
1247 dd->ipath_i_rcvavail_shift = INFINIPATH_I_RCVAVAIL_SHIFT;
1248 dd->ipath_i_rcvurg_shift = INFINIPATH_I_RCVURG_SHIFT;
1138 1249
1139 /* 1250 /*
1140 * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity. 1251 * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
@@ -1148,9 +1259,17 @@ static void ipath_init_ht_variables(struct ipath_devdata *dd)
1148 INFINIPATH_HWE_RXEMEMPARITYERR_MASK << 1259 INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
1149 INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT; 1260 INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
1150 1261
1151 dd->ipath_eep_st_masks[2].errs_to_log = 1262 dd->ipath_eep_st_masks[2].errs_to_log = INFINIPATH_E_RESET;
1152 INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET;
1153 1263
1264 dd->delay_mult = 2; /* SDR, 4X, can't change */
1265
1266 dd->ipath_link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
1267 dd->ipath_link_speed_supported = IPATH_IB_SDR;
1268 dd->ipath_link_width_enabled = IB_WIDTH_4X;
1269 dd->ipath_link_speed_enabled = dd->ipath_link_speed_supported;
1270 /* these can't change for this chip, so set once */
1271 dd->ipath_link_width_active = dd->ipath_link_width_enabled;
1272 dd->ipath_link_speed_active = dd->ipath_link_speed_enabled;
1154} 1273}
1155 1274
1156/** 1275/**
@@ -1205,14 +1324,16 @@ static void ipath_ht_init_hwerrors(struct ipath_devdata *dd)
1205 val &= ~INFINIPATH_HWE_HTCMISCERR4; 1324 val &= ~INFINIPATH_HWE_HTCMISCERR4;
1206 1325
1207 /* 1326 /*
1208 * PLL ignored because MDIO interface has a logic problem 1327 * PLL ignored because unused MDIO interface has a logic problem
1209 * for reads, on Comstock and Ponderosa. BRINGUP
1210 */ 1328 */
1211 if (dd->ipath_boardrev == 4 || dd->ipath_boardrev == 9) 1329 if (dd->ipath_boardrev == 4 || dd->ipath_boardrev == 9)
1212 val &= ~INFINIPATH_HWE_SERDESPLLFAILED; 1330 val &= ~INFINIPATH_HWE_SERDESPLLFAILED;
1213 dd->ipath_hwerrmask = val; 1331 dd->ipath_hwerrmask = val;
1214} 1332}
1215 1333
1334
1335
1336
1216/** 1337/**
1217 * ipath_ht_bringup_serdes - bring up the serdes 1338 * ipath_ht_bringup_serdes - bring up the serdes
1218 * @dd: the infinipath device 1339 * @dd: the infinipath device
@@ -1284,16 +1405,6 @@ static int ipath_ht_bringup_serdes(struct ipath_devdata *dd)
1284 } 1405 }
1285 1406
1286 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); 1407 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
1287 if (((val >> INFINIPATH_XGXS_MDIOADDR_SHIFT) &
1288 INFINIPATH_XGXS_MDIOADDR_MASK) != 3) {
1289 val &= ~(INFINIPATH_XGXS_MDIOADDR_MASK <<
1290 INFINIPATH_XGXS_MDIOADDR_SHIFT);
1291 /*
1292 * we use address 3
1293 */
1294 val |= 3ULL << INFINIPATH_XGXS_MDIOADDR_SHIFT;
1295 change = 1;
1296 }
1297 if (val & INFINIPATH_XGXS_RESET) { 1408 if (val & INFINIPATH_XGXS_RESET) {
1298 /* normally true after boot */ 1409 /* normally true after boot */
1299 val &= ~INFINIPATH_XGXS_RESET; 1410 val &= ~INFINIPATH_XGXS_RESET;
@@ -1329,21 +1440,6 @@ static int ipath_ht_bringup_serdes(struct ipath_devdata *dd)
1329 (unsigned long long) 1440 (unsigned long long)
1330 ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig)); 1441 ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
1331 1442
1332 if (!ipath_waitfor_mdio_cmdready(dd)) {
1333 ipath_write_kreg(dd, dd->ipath_kregs->kr_mdio,
1334 ipath_mdio_req(IPATH_MDIO_CMD_READ, 31,
1335 IPATH_MDIO_CTRL_XGXS_REG_8,
1336 0));
1337 if (ipath_waitfor_complete(dd, dd->ipath_kregs->kr_mdio,
1338 IPATH_MDIO_DATAVALID, &val))
1339 ipath_dbg("Never got MDIO data for XGXS status "
1340 "read\n");
1341 else
1342 ipath_cdbg(VERBOSE, "MDIO Read reg8, "
1343 "'bank' 31 %x\n", (u32) val);
1344 } else
1345 ipath_dbg("Never got MDIO cmdready for XGXS status read\n");
1346
1347 return ret; /* for now, say we always succeeded */ 1443 return ret; /* for now, say we always succeeded */
1348} 1444}
1349 1445
@@ -1396,6 +1492,7 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd,
1396 pa |= lenvalid | INFINIPATH_RT_VALID; 1492 pa |= lenvalid | INFINIPATH_RT_VALID;
1397 } 1493 }
1398 } 1494 }
1495
1399 writeq(pa, tidptr); 1496 writeq(pa, tidptr);
1400} 1497}
1401 1498
@@ -1526,8 +1623,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
1526 } 1623 }
1527 1624
1528 ipath_get_eeprom_info(dd); 1625 ipath_get_eeprom_info(dd);
1529 if (dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' && 1626 if (dd->ipath_boardrev == 5) {
1530 dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') {
1531 /* 1627 /*
1532 * Later production QHT7040 has same changes as QHT7140, so 1628 * Later production QHT7040 has same changes as QHT7140, so
1533 * can use GPIO interrupts. They have serial #'s starting 1629 * can use GPIO interrupts. They have serial #'s starting
@@ -1602,6 +1698,210 @@ static void ipath_ht_free_irq(struct ipath_devdata *dd)
1602 dd->ipath_intconfig = 0; 1698 dd->ipath_intconfig = 0;
1603} 1699}
1604 1700
1701static struct ipath_message_header *
1702ipath_ht_get_msgheader(struct ipath_devdata *dd, __le32 *rhf_addr)
1703{
1704 return (struct ipath_message_header *)
1705 &rhf_addr[sizeof(u64) / sizeof(u32)];
1706}
1707
1708static void ipath_ht_config_ports(struct ipath_devdata *dd, ushort cfgports)
1709{
1710 dd->ipath_portcnt =
1711 ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt);
1712 dd->ipath_p0_rcvegrcnt =
1713 ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
1714}
1715
1716static void ipath_ht_read_counters(struct ipath_devdata *dd,
1717 struct infinipath_counters *cntrs)
1718{
1719 cntrs->LBIntCnt =
1720 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBIntCnt));
1721 cntrs->LBFlowStallCnt =
1722 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBFlowStallCnt));
1723 cntrs->TxSDmaDescCnt = 0;
1724 cntrs->TxUnsupVLErrCnt =
1725 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnsupVLErrCnt));
1726 cntrs->TxDataPktCnt =
1727 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDataPktCnt));
1728 cntrs->TxFlowPktCnt =
1729 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowPktCnt));
1730 cntrs->TxDwordCnt =
1731 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDwordCnt));
1732 cntrs->TxLenErrCnt =
1733 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxLenErrCnt));
1734 cntrs->TxMaxMinLenErrCnt =
1735 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxMaxMinLenErrCnt));
1736 cntrs->TxUnderrunCnt =
1737 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnderrunCnt));
1738 cntrs->TxFlowStallCnt =
1739 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowStallCnt));
1740 cntrs->TxDroppedPktCnt =
1741 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDroppedPktCnt));
1742 cntrs->RxDroppedPktCnt =
1743 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDroppedPktCnt));
1744 cntrs->RxDataPktCnt =
1745 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDataPktCnt));
1746 cntrs->RxFlowPktCnt =
1747 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowPktCnt));
1748 cntrs->RxDwordCnt =
1749 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDwordCnt));
1750 cntrs->RxLenErrCnt =
1751 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLenErrCnt));
1752 cntrs->RxMaxMinLenErrCnt =
1753 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxMaxMinLenErrCnt));
1754 cntrs->RxICRCErrCnt =
1755 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxICRCErrCnt));
1756 cntrs->RxVCRCErrCnt =
1757 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxVCRCErrCnt));
1758 cntrs->RxFlowCtrlErrCnt =
1759 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowCtrlErrCnt));
1760 cntrs->RxBadFormatCnt =
1761 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBadFormatCnt));
1762 cntrs->RxLinkProblemCnt =
1763 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLinkProblemCnt));
1764 cntrs->RxEBPCnt =
1765 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxEBPCnt));
1766 cntrs->RxLPCRCErrCnt =
1767 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLPCRCErrCnt));
1768 cntrs->RxBufOvflCnt =
1769 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBufOvflCnt));
1770 cntrs->RxTIDFullErrCnt =
1771 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDFullErrCnt));
1772 cntrs->RxTIDValidErrCnt =
1773 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDValidErrCnt));
1774 cntrs->RxPKeyMismatchCnt =
1775 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxPKeyMismatchCnt));
1776 cntrs->RxP0HdrEgrOvflCnt =
1777 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt));
1778 cntrs->RxP1HdrEgrOvflCnt =
1779 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP1HdrEgrOvflCnt));
1780 cntrs->RxP2HdrEgrOvflCnt =
1781 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP2HdrEgrOvflCnt));
1782 cntrs->RxP3HdrEgrOvflCnt =
1783 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP3HdrEgrOvflCnt));
1784 cntrs->RxP4HdrEgrOvflCnt =
1785 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP4HdrEgrOvflCnt));
1786 cntrs->RxP5HdrEgrOvflCnt =
1787 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP5HdrEgrOvflCnt));
1788 cntrs->RxP6HdrEgrOvflCnt =
1789 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP6HdrEgrOvflCnt));
1790 cntrs->RxP7HdrEgrOvflCnt =
1791 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP7HdrEgrOvflCnt));
1792 cntrs->RxP8HdrEgrOvflCnt =
1793 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP8HdrEgrOvflCnt));
1794 cntrs->RxP9HdrEgrOvflCnt = 0;
1795 cntrs->RxP10HdrEgrOvflCnt = 0;
1796 cntrs->RxP11HdrEgrOvflCnt = 0;
1797 cntrs->RxP12HdrEgrOvflCnt = 0;
1798 cntrs->RxP13HdrEgrOvflCnt = 0;
1799 cntrs->RxP14HdrEgrOvflCnt = 0;
1800 cntrs->RxP15HdrEgrOvflCnt = 0;
1801 cntrs->RxP16HdrEgrOvflCnt = 0;
1802 cntrs->IBStatusChangeCnt =
1803 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBStatusChangeCnt));
1804 cntrs->IBLinkErrRecoveryCnt =
1805 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt));
1806 cntrs->IBLinkDownedCnt =
1807 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkDownedCnt));
1808 cntrs->IBSymbolErrCnt =
1809 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBSymbolErrCnt));
1810 cntrs->RxVL15DroppedPktCnt = 0;
1811 cntrs->RxOtherLocalPhyErrCnt = 0;
1812 cntrs->PcieRetryBufDiagQwordCnt = 0;
1813 cntrs->ExcessBufferOvflCnt = dd->ipath_overrun_thresh_errs;
1814 cntrs->LocalLinkIntegrityErrCnt =
1815 (dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
1816 dd->ipath_lli_errs : dd->ipath_lli_errors;
1817 cntrs->RxVlErrCnt = 0;
1818 cntrs->RxDlidFltrCnt = 0;
1819}
1820
1821
1822/* no interrupt fallback for these chips */
1823static int ipath_ht_nointr_fallback(struct ipath_devdata *dd)
1824{
1825 return 0;
1826}
1827
1828
1829/*
1830 * reset the XGXS (between serdes and IBC). Slightly less intrusive
1831 * than resetting the IBC or external link state, and useful in some
1832 * cases to cause some retraining. To do this right, we reset IBC
1833 * as well.
1834 */
1835static void ipath_ht_xgxs_reset(struct ipath_devdata *dd)
1836{
1837 u64 val, prev_val;
1838
1839 prev_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
1840 val = prev_val | INFINIPATH_XGXS_RESET;
1841 prev_val &= ~INFINIPATH_XGXS_RESET; /* be sure */
1842 ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
1843 dd->ipath_control & ~INFINIPATH_C_LINKENABLE);
1844 ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
1845 ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
1846 ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, prev_val);
1847 ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
1848 dd->ipath_control);
1849}
1850
1851
1852static int ipath_ht_get_ib_cfg(struct ipath_devdata *dd, int which)
1853{
1854 int ret;
1855
1856 switch (which) {
1857 case IPATH_IB_CFG_LWID:
1858 ret = dd->ipath_link_width_active;
1859 break;
1860 case IPATH_IB_CFG_SPD:
1861 ret = dd->ipath_link_speed_active;
1862 break;
1863 case IPATH_IB_CFG_LWID_ENB:
1864 ret = dd->ipath_link_width_enabled;
1865 break;
1866 case IPATH_IB_CFG_SPD_ENB:
1867 ret = dd->ipath_link_speed_enabled;
1868 break;
1869 default:
1870 ret = -ENOTSUPP;
1871 break;
1872 }
1873 return ret;
1874}
1875
1876
1877/* we assume range checking is already done, if needed */
1878static int ipath_ht_set_ib_cfg(struct ipath_devdata *dd, int which, u32 val)
1879{
1880 int ret = 0;
1881
1882 if (which == IPATH_IB_CFG_LWID_ENB)
1883 dd->ipath_link_width_enabled = val;
1884 else if (which == IPATH_IB_CFG_SPD_ENB)
1885 dd->ipath_link_speed_enabled = val;
1886 else
1887 ret = -ENOTSUPP;
1888 return ret;
1889}
1890
1891
1892static void ipath_ht_config_jint(struct ipath_devdata *dd, u16 a, u16 b)
1893{
1894}
1895
1896
1897static int ipath_ht_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
1898{
1899 ipath_setup_ht_setextled(dd, ipath_ib_linkstate(dd, ibcs),
1900 ipath_ib_linktrstate(dd, ibcs));
1901 return 0;
1902}
1903
1904
1605/** 1905/**
1606 * ipath_init_iba6110_funcs - set up the chip-specific function pointers 1906 * ipath_init_iba6110_funcs - set up the chip-specific function pointers
1607 * @dd: the infinipath device 1907 * @dd: the infinipath device
@@ -1626,22 +1926,19 @@ void ipath_init_iba6110_funcs(struct ipath_devdata *dd)
1626 dd->ipath_f_setextled = ipath_setup_ht_setextled; 1926 dd->ipath_f_setextled = ipath_setup_ht_setextled;
1627 dd->ipath_f_get_base_info = ipath_ht_get_base_info; 1927 dd->ipath_f_get_base_info = ipath_ht_get_base_info;
1628 dd->ipath_f_free_irq = ipath_ht_free_irq; 1928 dd->ipath_f_free_irq = ipath_ht_free_irq;
1629
1630 /*
1631 * initialize chip-specific variables
1632 */
1633 dd->ipath_f_tidtemplate = ipath_ht_tidtemplate; 1929 dd->ipath_f_tidtemplate = ipath_ht_tidtemplate;
1930 dd->ipath_f_intr_fallback = ipath_ht_nointr_fallback;
1931 dd->ipath_f_get_msgheader = ipath_ht_get_msgheader;
1932 dd->ipath_f_config_ports = ipath_ht_config_ports;
1933 dd->ipath_f_read_counters = ipath_ht_read_counters;
1934 dd->ipath_f_xgxs_reset = ipath_ht_xgxs_reset;
1935 dd->ipath_f_get_ib_cfg = ipath_ht_get_ib_cfg;
1936 dd->ipath_f_set_ib_cfg = ipath_ht_set_ib_cfg;
1937 dd->ipath_f_config_jint = ipath_ht_config_jint;
1938 dd->ipath_f_ib_updown = ipath_ht_ib_updown;
1634 1939
1635 /* 1940 /*
1636 * setup the register offsets, since they are different for each 1941 * initialize chip-specific variables
1637 * chip
1638 */
1639 dd->ipath_kregs = &ipath_ht_kregs;
1640 dd->ipath_cregs = &ipath_ht_cregs;
1641
1642 /*
1643 * do very early init that is needed before ipath_f_bus is
1644 * called
1645 */ 1942 */
1646 ipath_init_ht_variables(dd); 1943 ipath_init_ht_variables(dd);
1647} 1944}
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index 0103d6f4847b..c7a2f50824c0 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -145,10 +145,57 @@ struct _infinipath_do_not_use_kernel_regs {
145 unsigned long long Reserved12; 145 unsigned long long Reserved12;
146}; 146};
147 147
148#define IPATH_KREG_OFFSET(field) (offsetof(struct \ 148struct _infinipath_do_not_use_counters {
149 _infinipath_do_not_use_kernel_regs, field) / sizeof(u64)) 149 __u64 LBIntCnt;
150 __u64 LBFlowStallCnt;
151 __u64 Reserved1;
152 __u64 TxUnsupVLErrCnt;
153 __u64 TxDataPktCnt;
154 __u64 TxFlowPktCnt;
155 __u64 TxDwordCnt;
156 __u64 TxLenErrCnt;
157 __u64 TxMaxMinLenErrCnt;
158 __u64 TxUnderrunCnt;
159 __u64 TxFlowStallCnt;
160 __u64 TxDroppedPktCnt;
161 __u64 RxDroppedPktCnt;
162 __u64 RxDataPktCnt;
163 __u64 RxFlowPktCnt;
164 __u64 RxDwordCnt;
165 __u64 RxLenErrCnt;
166 __u64 RxMaxMinLenErrCnt;
167 __u64 RxICRCErrCnt;
168 __u64 RxVCRCErrCnt;
169 __u64 RxFlowCtrlErrCnt;
170 __u64 RxBadFormatCnt;
171 __u64 RxLinkProblemCnt;
172 __u64 RxEBPCnt;
173 __u64 RxLPCRCErrCnt;
174 __u64 RxBufOvflCnt;
175 __u64 RxTIDFullErrCnt;
176 __u64 RxTIDValidErrCnt;
177 __u64 RxPKeyMismatchCnt;
178 __u64 RxP0HdrEgrOvflCnt;
179 __u64 RxP1HdrEgrOvflCnt;
180 __u64 RxP2HdrEgrOvflCnt;
181 __u64 RxP3HdrEgrOvflCnt;
182 __u64 RxP4HdrEgrOvflCnt;
183 __u64 RxP5HdrEgrOvflCnt;
184 __u64 RxP6HdrEgrOvflCnt;
185 __u64 RxP7HdrEgrOvflCnt;
186 __u64 RxP8HdrEgrOvflCnt;
187 __u64 Reserved6;
188 __u64 Reserved7;
189 __u64 IBStatusChangeCnt;
190 __u64 IBLinkErrRecoveryCnt;
191 __u64 IBLinkDownedCnt;
192 __u64 IBSymbolErrCnt;
193};
194
195#define IPATH_KREG_OFFSET(field) (offsetof( \
196 struct _infinipath_do_not_use_kernel_regs, field) / sizeof(u64))
150#define IPATH_CREG_OFFSET(field) (offsetof( \ 197#define IPATH_CREG_OFFSET(field) (offsetof( \
151 struct infinipath_counters, field) / sizeof(u64)) 198 struct _infinipath_do_not_use_counters, field) / sizeof(u64))
152 199
153static const struct ipath_kregs ipath_pe_kregs = { 200static const struct ipath_kregs ipath_pe_kregs = {
154 .kr_control = IPATH_KREG_OFFSET(Control), 201 .kr_control = IPATH_KREG_OFFSET(Control),
@@ -282,6 +329,9 @@ static const struct ipath_cregs ipath_pe_cregs = {
282#define INFINIPATH_HWE_PCIE0PLLFAILED 0x0800000000000000ULL 329#define INFINIPATH_HWE_PCIE0PLLFAILED 0x0800000000000000ULL
283#define INFINIPATH_HWE_SERDESPLLFAILED 0x1000000000000000ULL 330#define INFINIPATH_HWE_SERDESPLLFAILED 0x1000000000000000ULL
284 331
332#define IBA6120_IBCS_LINKTRAININGSTATE_MASK 0xf
333#define IBA6120_IBCS_LINKSTATE_SHIFT 4
334
285/* kr_extstatus bits */ 335/* kr_extstatus bits */
286#define INFINIPATH_EXTS_FREQSEL 0x2 336#define INFINIPATH_EXTS_FREQSEL 0x2
287#define INFINIPATH_EXTS_SERDESSEL 0x4 337#define INFINIPATH_EXTS_SERDESSEL 0x4
@@ -296,6 +346,9 @@ static const struct ipath_cregs ipath_pe_cregs = {
296#define IPATH_GPIO_SCL (1ULL << \ 346#define IPATH_GPIO_SCL (1ULL << \
297 (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT)) 347 (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
298 348
349#define INFINIPATH_R_INTRAVAIL_SHIFT 16
350#define INFINIPATH_R_TAILUPD_SHIFT 31
351
299/* 6120 specific hardware errors... */ 352/* 6120 specific hardware errors... */
300static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = { 353static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = {
301 INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"), 354 INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"),
@@ -320,10 +373,28 @@ static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = {
320 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \ 373 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
321 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) 374 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
322 375
323static int ipath_pe_txe_recover(struct ipath_devdata *);
324static void ipath_pe_put_tid_2(struct ipath_devdata *, u64 __iomem *, 376static void ipath_pe_put_tid_2(struct ipath_devdata *, u64 __iomem *,
325 u32, unsigned long); 377 u32, unsigned long);
326 378
379/*
380 * On platforms using this chip, and not having ordered WC stores, we
381 * can get TXE parity errors due to speculative reads to the PIO buffers,
382 * and this, due to a chip bug can result in (many) false parity error
383 * reports. So it's a debug print on those, and an info print on systems
384 * where the speculative reads don't occur.
385 */
386static void ipath_pe_txe_recover(struct ipath_devdata *dd)
387{
388 if (ipath_unordered_wc())
389 ipath_dbg("Recovering from TXE PIO parity error\n");
390 else {
391 ++ipath_stats.sps_txeparity;
392 dev_info(&dd->pcidev->dev,
393 "Recovering from TXE PIO parity error\n");
394 }
395}
396
397
327/** 398/**
328 * ipath_pe_handle_hwerrors - display hardware errors. 399 * ipath_pe_handle_hwerrors - display hardware errors.
329 * @dd: the infinipath device 400 * @dd: the infinipath device
@@ -403,35 +474,11 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
403 * occur if a processor speculative read is done to the PIO 474 * occur if a processor speculative read is done to the PIO
404 * buffer while we are sending a packet, for example. 475 * buffer while we are sending a packet, for example.
405 */ 476 */
406 if ((hwerrs & TXE_PIO_PARITY) && ipath_pe_txe_recover(dd)) 477 if (hwerrs & TXE_PIO_PARITY) {
478 ipath_pe_txe_recover(dd);
407 hwerrs &= ~TXE_PIO_PARITY; 479 hwerrs &= ~TXE_PIO_PARITY;
408 if (hwerrs) { 480 }
409 /* 481 if (!hwerrs) {
410 * if any set that we aren't ignoring only make the
411 * complaint once, in case it's stuck or recurring,
412 * and we get here multiple times
413 * Force link down, so switch knows, and
414 * LEDs are turned off
415 */
416 if (dd->ipath_flags & IPATH_INITTED) {
417 ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
418 ipath_setup_pe_setextled(dd,
419 INFINIPATH_IBCS_L_STATE_DOWN,
420 INFINIPATH_IBCS_LT_STATE_DISABLED);
421 ipath_dev_err(dd, "Fatal Hardware Error (freeze "
422 "mode), no longer usable, SN %.16s\n",
423 dd->ipath_serial);
424 isfatal = 1;
425 }
426 /*
427 * Mark as having had an error for driver, and also
428 * for /sys and status word mapped to user programs.
429 * This marks unit as not usable, until reset
430 */
431 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
432 *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
433 dd->ipath_flags &= ~IPATH_INITTED;
434 } else {
435 static u32 freeze_cnt; 482 static u32 freeze_cnt;
436 483
437 freeze_cnt++; 484 freeze_cnt++;
@@ -485,7 +532,7 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
485 532
486 if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) { 533 if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) {
487 /* 534 /*
488 * If it occurs, it is left masked since the eternal 535 * If it occurs, it is left masked since the external
489 * interface is unused 536 * interface is unused
490 */ 537 */
491 dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED; 538 dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED;
@@ -563,6 +610,14 @@ static int ipath_pe_boardname(struct ipath_devdata *dd, char *name,
563 dd->ipath_f_put_tid = ipath_pe_put_tid_2; 610 dd->ipath_f_put_tid = ipath_pe_put_tid_2;
564 } 611 }
565 612
613
614 /*
615 * set here, not in ipath_init_*_funcs because we have to do
616 * it after we can read chip registers.
617 */
618 dd->ipath_ureg_align =
619 ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign);
620
566 return ret; 621 return ret;
567} 622}
568 623
@@ -667,17 +722,8 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
667 722
668 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); 723 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
669 prev_val = val; 724 prev_val = val;
670 if (((val >> INFINIPATH_XGXS_MDIOADDR_SHIFT) & 725 if (val & INFINIPATH_XGXS_RESET)
671 INFINIPATH_XGXS_MDIOADDR_MASK) != 3) {
672 val &=
673 ~(INFINIPATH_XGXS_MDIOADDR_MASK <<
674 INFINIPATH_XGXS_MDIOADDR_SHIFT);
675 /* MDIO address 3 */
676 val |= 3ULL << INFINIPATH_XGXS_MDIOADDR_SHIFT;
677 }
678 if (val & INFINIPATH_XGXS_RESET) {
679 val &= ~INFINIPATH_XGXS_RESET; 726 val &= ~INFINIPATH_XGXS_RESET;
680 }
681 if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) & 727 if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
682 INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) { 728 INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
683 /* need to compensate for Tx inversion in partner */ 729 /* need to compensate for Tx inversion in partner */
@@ -707,21 +753,6 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
707 (unsigned long long) 753 (unsigned long long)
708 ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig)); 754 ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
709 755
710 if (!ipath_waitfor_mdio_cmdready(dd)) {
711 ipath_write_kreg(
712 dd, dd->ipath_kregs->kr_mdio,
713 ipath_mdio_req(IPATH_MDIO_CMD_READ, 31,
714 IPATH_MDIO_CTRL_XGXS_REG_8, 0));
715 if (ipath_waitfor_complete(dd, dd->ipath_kregs->kr_mdio,
716 IPATH_MDIO_DATAVALID, &val))
717 ipath_dbg("Never got MDIO data for XGXS "
718 "status read\n");
719 else
720 ipath_cdbg(VERBOSE, "MDIO Read reg8, "
721 "'bank' 31 %x\n", (u32) val);
722 } else
723 ipath_dbg("Never got MDIO cmdready for XGXS status read\n");
724
725 return ret; 756 return ret;
726} 757}
727 758
@@ -902,12 +933,27 @@ static int ipath_setup_pe_config(struct ipath_devdata *dd,
902 else 933 else
903 ipath_dev_err(dd, "Can't find PCI Express " 934 ipath_dev_err(dd, "Can't find PCI Express "
904 "capability!\n"); 935 "capability!\n");
936
937 dd->ipath_link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
938 dd->ipath_link_speed_supported = IPATH_IB_SDR;
939 dd->ipath_link_width_enabled = IB_WIDTH_4X;
940 dd->ipath_link_speed_enabled = dd->ipath_link_speed_supported;
941 /* these can't change for this chip, so set once */
942 dd->ipath_link_width_active = dd->ipath_link_width_enabled;
943 dd->ipath_link_speed_active = dd->ipath_link_speed_enabled;
905 return 0; 944 return 0;
906} 945}
907 946
908static void ipath_init_pe_variables(struct ipath_devdata *dd) 947static void ipath_init_pe_variables(struct ipath_devdata *dd)
909{ 948{
910 /* 949 /*
950 * setup the register offsets, since they are different for each
951 * chip
952 */
953 dd->ipath_kregs = &ipath_pe_kregs;
954 dd->ipath_cregs = &ipath_pe_cregs;
955
956 /*
911 * bits for selecting i2c direction and values, 957 * bits for selecting i2c direction and values,
912 * used for I2C serial flash 958 * used for I2C serial flash
913 */ 959 */
@@ -916,6 +962,43 @@ static void ipath_init_pe_variables(struct ipath_devdata *dd)
916 dd->ipath_gpio_sda = IPATH_GPIO_SDA; 962 dd->ipath_gpio_sda = IPATH_GPIO_SDA;
917 dd->ipath_gpio_scl = IPATH_GPIO_SCL; 963 dd->ipath_gpio_scl = IPATH_GPIO_SCL;
918 964
965 /*
966 * Fill in data for field-values that change in newer chips.
967 * We dynamically specify only the mask for LINKTRAININGSTATE
968 * and only the shift for LINKSTATE, as they are the only ones
969 * that change. Also precalculate the 3 link states of interest
970 * and the combined mask.
971 */
972 dd->ibcs_ls_shift = IBA6120_IBCS_LINKSTATE_SHIFT;
973 dd->ibcs_lts_mask = IBA6120_IBCS_LINKTRAININGSTATE_MASK;
974 dd->ibcs_mask = (INFINIPATH_IBCS_LINKSTATE_MASK <<
975 dd->ibcs_ls_shift) | dd->ibcs_lts_mask;
976 dd->ib_init = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
977 INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
978 (INFINIPATH_IBCS_L_STATE_INIT << dd->ibcs_ls_shift);
979 dd->ib_arm = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
980 INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
981 (INFINIPATH_IBCS_L_STATE_ARM << dd->ibcs_ls_shift);
982 dd->ib_active = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
983 INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
984 (INFINIPATH_IBCS_L_STATE_ACTIVE << dd->ibcs_ls_shift);
985
986 /*
987 * Fill in data for ibcc field-values that change in newer chips.
988 * We dynamically specify only the mask for LINKINITCMD
989 * and only the shift for LINKCMD and MAXPKTLEN, as they are
990 * the only ones that change.
991 */
992 dd->ibcc_lic_mask = INFINIPATH_IBCC_LINKINITCMD_MASK;
993 dd->ibcc_lc_shift = INFINIPATH_IBCC_LINKCMD_SHIFT;
994 dd->ibcc_mpl_shift = INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
995
996 /* Fill in shifts for RcvCtrl. */
997 dd->ipath_r_portenable_shift = INFINIPATH_R_PORTENABLE_SHIFT;
998 dd->ipath_r_intravail_shift = INFINIPATH_R_INTRAVAIL_SHIFT;
999 dd->ipath_r_tailupd_shift = INFINIPATH_R_TAILUPD_SHIFT;
1000 dd->ipath_r_portcfg_shift = 0; /* Not on IBA6120 */
1001
919 /* variables for sanity checking interrupt and errors */ 1002 /* variables for sanity checking interrupt and errors */
920 dd->ipath_hwe_bitsextant = 1003 dd->ipath_hwe_bitsextant =
921 (INFINIPATH_HWE_RXEMEMPARITYERR_MASK << 1004 (INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
@@ -963,6 +1046,8 @@ static void ipath_init_pe_variables(struct ipath_devdata *dd)
963 1046
964 dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; 1047 dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
965 dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; 1048 dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
1049 dd->ipath_i_rcvavail_shift = INFINIPATH_I_RCVAVAIL_SHIFT;
1050 dd->ipath_i_rcvurg_shift = INFINIPATH_I_RCVURG_SHIFT;
966 1051
967 /* 1052 /*
968 * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity. 1053 * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
@@ -984,6 +1069,7 @@ static void ipath_init_pe_variables(struct ipath_devdata *dd)
984 INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET; 1069 INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET;
985 1070
986 1071
1072 dd->delay_mult = 2; /* SDR, 4X, can't change */
987} 1073}
988 1074
989/* setup the MSI stuff again after a reset. I'd like to just call 1075/* setup the MSI stuff again after a reset. I'd like to just call
@@ -1289,6 +1375,9 @@ static int ipath_pe_early_init(struct ipath_devdata *dd)
1289 */ 1375 */
1290 dd->ipath_rcvhdrentsize = 24; 1376 dd->ipath_rcvhdrentsize = 24;
1291 dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE; 1377 dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
1378 dd->ipath_rhf_offset = 0;
1379 dd->ipath_egrtidbase = (u64 __iomem *)
1380 ((char __iomem *) dd->ipath_kregbase + dd->ipath_rcvegrbase);
1292 1381
1293 /* 1382 /*
1294 * To truly support a 4KB MTU (for usermode), we need to 1383 * To truly support a 4KB MTU (for usermode), we need to
@@ -1359,34 +1448,204 @@ static void ipath_pe_free_irq(struct ipath_devdata *dd)
1359 dd->ipath_irq = 0; 1448 dd->ipath_irq = 0;
1360} 1449}
1361 1450
1451
1452static struct ipath_message_header *
1453ipath_pe_get_msgheader(struct ipath_devdata *dd, __le32 *rhf_addr)
1454{
1455 return (struct ipath_message_header *)
1456 &rhf_addr[sizeof(u64) / sizeof(u32)];
1457}
1458
1459static void ipath_pe_config_ports(struct ipath_devdata *dd, ushort cfgports)
1460{
1461 dd->ipath_portcnt =
1462 ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt);
1463 dd->ipath_p0_rcvegrcnt =
1464 ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
1465}
1466
1467static void ipath_pe_read_counters(struct ipath_devdata *dd,
1468 struct infinipath_counters *cntrs)
1469{
1470 cntrs->LBIntCnt =
1471 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBIntCnt));
1472 cntrs->LBFlowStallCnt =
1473 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBFlowStallCnt));
1474 cntrs->TxSDmaDescCnt = 0;
1475 cntrs->TxUnsupVLErrCnt =
1476 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnsupVLErrCnt));
1477 cntrs->TxDataPktCnt =
1478 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDataPktCnt));
1479 cntrs->TxFlowPktCnt =
1480 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowPktCnt));
1481 cntrs->TxDwordCnt =
1482 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDwordCnt));
1483 cntrs->TxLenErrCnt =
1484 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxLenErrCnt));
1485 cntrs->TxMaxMinLenErrCnt =
1486 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxMaxMinLenErrCnt));
1487 cntrs->TxUnderrunCnt =
1488 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnderrunCnt));
1489 cntrs->TxFlowStallCnt =
1490 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowStallCnt));
1491 cntrs->TxDroppedPktCnt =
1492 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDroppedPktCnt));
1493 cntrs->RxDroppedPktCnt =
1494 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDroppedPktCnt));
1495 cntrs->RxDataPktCnt =
1496 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDataPktCnt));
1497 cntrs->RxFlowPktCnt =
1498 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowPktCnt));
1499 cntrs->RxDwordCnt =
1500 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDwordCnt));
1501 cntrs->RxLenErrCnt =
1502 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLenErrCnt));
1503 cntrs->RxMaxMinLenErrCnt =
1504 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxMaxMinLenErrCnt));
1505 cntrs->RxICRCErrCnt =
1506 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxICRCErrCnt));
1507 cntrs->RxVCRCErrCnt =
1508 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxVCRCErrCnt));
1509 cntrs->RxFlowCtrlErrCnt =
1510 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowCtrlErrCnt));
1511 cntrs->RxBadFormatCnt =
1512 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBadFormatCnt));
1513 cntrs->RxLinkProblemCnt =
1514 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLinkProblemCnt));
1515 cntrs->RxEBPCnt =
1516 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxEBPCnt));
1517 cntrs->RxLPCRCErrCnt =
1518 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLPCRCErrCnt));
1519 cntrs->RxBufOvflCnt =
1520 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBufOvflCnt));
1521 cntrs->RxTIDFullErrCnt =
1522 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDFullErrCnt));
1523 cntrs->RxTIDValidErrCnt =
1524 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDValidErrCnt));
1525 cntrs->RxPKeyMismatchCnt =
1526 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxPKeyMismatchCnt));
1527 cntrs->RxP0HdrEgrOvflCnt =
1528 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt));
1529 cntrs->RxP1HdrEgrOvflCnt =
1530 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP1HdrEgrOvflCnt));
1531 cntrs->RxP2HdrEgrOvflCnt =
1532 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP2HdrEgrOvflCnt));
1533 cntrs->RxP3HdrEgrOvflCnt =
1534 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP3HdrEgrOvflCnt));
1535 cntrs->RxP4HdrEgrOvflCnt =
1536 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP4HdrEgrOvflCnt));
1537 cntrs->RxP5HdrEgrOvflCnt = 0;
1538 cntrs->RxP6HdrEgrOvflCnt = 0;
1539 cntrs->RxP7HdrEgrOvflCnt = 0;
1540 cntrs->RxP8HdrEgrOvflCnt = 0;
1541 cntrs->RxP9HdrEgrOvflCnt = 0;
1542 cntrs->RxP10HdrEgrOvflCnt = 0;
1543 cntrs->RxP11HdrEgrOvflCnt = 0;
1544 cntrs->RxP12HdrEgrOvflCnt = 0;
1545 cntrs->RxP13HdrEgrOvflCnt = 0;
1546 cntrs->RxP14HdrEgrOvflCnt = 0;
1547 cntrs->RxP15HdrEgrOvflCnt = 0;
1548 cntrs->RxP16HdrEgrOvflCnt = 0;
1549 cntrs->IBStatusChangeCnt =
1550 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBStatusChangeCnt));
1551 cntrs->IBLinkErrRecoveryCnt =
1552 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt));
1553 cntrs->IBLinkDownedCnt =
1554 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkDownedCnt));
1555 cntrs->IBSymbolErrCnt =
1556 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBSymbolErrCnt));
1557 cntrs->RxVL15DroppedPktCnt = 0;
1558 cntrs->RxOtherLocalPhyErrCnt = 0;
1559 cntrs->PcieRetryBufDiagQwordCnt = 0;
1560 cntrs->ExcessBufferOvflCnt = dd->ipath_overrun_thresh_errs;
1561 cntrs->LocalLinkIntegrityErrCnt = dd->ipath_lli_errs;
1562 cntrs->RxVlErrCnt = 0;
1563 cntrs->RxDlidFltrCnt = 0;
1564}
1565
1566
1567/* no interrupt fallback for these chips */
1568static int ipath_pe_nointr_fallback(struct ipath_devdata *dd)
1569{
1570 return 0;
1571}
1572
1573
1362/* 1574/*
1363 * On platforms using this chip, and not having ordered WC stores, we 1575 * reset the XGXS (between serdes and IBC). Slightly less intrusive
1364 * can get TXE parity errors due to speculative reads to the PIO buffers, 1576 * than resetting the IBC or external link state, and useful in some
1365 * and this, due to a chip bug can result in (many) false parity error 1577 * cases to cause some retraining. To do this right, we reset IBC
1366 * reports. So it's a debug print on those, and an info print on systems 1578 * as well.
1367 * where the speculative reads don't occur.
1368 * Because we can get lots of false errors, we have no upper limit
1369 * on recovery attempts on those platforms.
1370 */ 1579 */
1371static int ipath_pe_txe_recover(struct ipath_devdata *dd) 1580static void ipath_pe_xgxs_reset(struct ipath_devdata *dd)
1372{ 1581{
1373 if (ipath_unordered_wc()) 1582 u64 val, prev_val;
1374 ipath_dbg("Recovering from TXE PIO parity error\n"); 1583
1375 else { 1584 prev_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
1376 int cnt = ++ipath_stats.sps_txeparity; 1585 val = prev_val | INFINIPATH_XGXS_RESET;
1377 if (cnt >= IPATH_MAX_PARITY_ATTEMPTS) { 1586 prev_val &= ~INFINIPATH_XGXS_RESET; /* be sure */
1378 if (cnt == IPATH_MAX_PARITY_ATTEMPTS) 1587 ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
1379 ipath_dev_err(dd, 1588 dd->ipath_control & ~INFINIPATH_C_LINKENABLE);
1380 "Too many attempts to recover from " 1589 ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
1381 "TXE parity, giving up\n"); 1590 ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
1382 return 0; 1591 ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, prev_val);
1383 } 1592 ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
1384 dev_info(&dd->pcidev->dev, 1593 dd->ipath_control);
1385 "Recovering from TXE PIO parity error\n"); 1594}
1595
1596
1597static int ipath_pe_get_ib_cfg(struct ipath_devdata *dd, int which)
1598{
1599 int ret;
1600
1601 switch (which) {
1602 case IPATH_IB_CFG_LWID:
1603 ret = dd->ipath_link_width_active;
1604 break;
1605 case IPATH_IB_CFG_SPD:
1606 ret = dd->ipath_link_speed_active;
1607 break;
1608 case IPATH_IB_CFG_LWID_ENB:
1609 ret = dd->ipath_link_width_enabled;
1610 break;
1611 case IPATH_IB_CFG_SPD_ENB:
1612 ret = dd->ipath_link_speed_enabled;
1613 break;
1614 default:
1615 ret = -ENOTSUPP;
1616 break;
1386 } 1617 }
1387 return 1; 1618 return ret;
1619}
1620
1621
1622/* we assume range checking is already done, if needed */
1623static int ipath_pe_set_ib_cfg(struct ipath_devdata *dd, int which, u32 val)
1624{
1625 int ret = 0;
1626
1627 if (which == IPATH_IB_CFG_LWID_ENB)
1628 dd->ipath_link_width_enabled = val;
1629 else if (which == IPATH_IB_CFG_SPD_ENB)
1630 dd->ipath_link_speed_enabled = val;
1631 else
1632 ret = -ENOTSUPP;
1633 return ret;
1388} 1634}
1389 1635
1636static void ipath_pe_config_jint(struct ipath_devdata *dd, u16 a, u16 b)
1637{
1638}
1639
1640
1641static int ipath_pe_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
1642{
1643 ipath_setup_pe_setextled(dd, ipath_ib_linkstate(dd, ibcs),
1644 ipath_ib_linktrstate(dd, ibcs));
1645 return 0;
1646}
1647
1648
1390/** 1649/**
1391 * ipath_init_iba6120_funcs - set up the chip-specific function pointers 1650 * ipath_init_iba6120_funcs - set up the chip-specific function pointers
1392 * @dd: the infinipath device 1651 * @dd: the infinipath device
@@ -1407,7 +1666,7 @@ void ipath_init_iba6120_funcs(struct ipath_devdata *dd)
1407 dd->ipath_f_bringup_serdes = ipath_pe_bringup_serdes; 1666 dd->ipath_f_bringup_serdes = ipath_pe_bringup_serdes;
1408 dd->ipath_f_clear_tids = ipath_pe_clear_tids; 1667 dd->ipath_f_clear_tids = ipath_pe_clear_tids;
1409 /* 1668 /*
1410 * this may get changed after we read the chip revision, 1669 * _f_put_tid may get changed after we read the chip revision,
1411 * but we start with the safe version for all revs 1670 * but we start with the safe version for all revs
1412 */ 1671 */
1413 dd->ipath_f_put_tid = ipath_pe_put_tid; 1672 dd->ipath_f_put_tid = ipath_pe_put_tid;
@@ -1415,17 +1674,19 @@ void ipath_init_iba6120_funcs(struct ipath_devdata *dd)
1415 dd->ipath_f_setextled = ipath_setup_pe_setextled; 1674 dd->ipath_f_setextled = ipath_setup_pe_setextled;
1416 dd->ipath_f_get_base_info = ipath_pe_get_base_info; 1675 dd->ipath_f_get_base_info = ipath_pe_get_base_info;
1417 dd->ipath_f_free_irq = ipath_pe_free_irq; 1676 dd->ipath_f_free_irq = ipath_pe_free_irq;
1418
1419 /* initialize chip-specific variables */
1420 dd->ipath_f_tidtemplate = ipath_pe_tidtemplate; 1677 dd->ipath_f_tidtemplate = ipath_pe_tidtemplate;
1678 dd->ipath_f_intr_fallback = ipath_pe_nointr_fallback;
1679 dd->ipath_f_xgxs_reset = ipath_pe_xgxs_reset;
1680 dd->ipath_f_get_msgheader = ipath_pe_get_msgheader;
1681 dd->ipath_f_config_ports = ipath_pe_config_ports;
1682 dd->ipath_f_read_counters = ipath_pe_read_counters;
1683 dd->ipath_f_get_ib_cfg = ipath_pe_get_ib_cfg;
1684 dd->ipath_f_set_ib_cfg = ipath_pe_set_ib_cfg;
1685 dd->ipath_f_config_jint = ipath_pe_config_jint;
1686 dd->ipath_f_ib_updown = ipath_pe_ib_updown;
1421 1687
1422 /*
1423 * setup the register offsets, since they are different for each
1424 * chip
1425 */
1426 dd->ipath_kregs = &ipath_pe_kregs;
1427 dd->ipath_cregs = &ipath_pe_cregs;
1428 1688
1689 /* initialize chip-specific variables */
1429 ipath_init_pe_variables(dd); 1690 ipath_init_pe_variables(dd);
1430} 1691}
1431 1692
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 9dd0bacf8461..4471674975cd 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -91,7 +91,7 @@ static int create_port0_egr(struct ipath_devdata *dd)
91 struct ipath_skbinfo *skbinfo; 91 struct ipath_skbinfo *skbinfo;
92 int ret; 92 int ret;
93 93
94 egrcnt = dd->ipath_rcvegrcnt; 94 egrcnt = dd->ipath_p0_rcvegrcnt;
95 95
96 skbinfo = vmalloc(sizeof(*dd->ipath_port0_skbinfo) * egrcnt); 96 skbinfo = vmalloc(sizeof(*dd->ipath_port0_skbinfo) * egrcnt);
97 if (skbinfo == NULL) { 97 if (skbinfo == NULL) {
@@ -244,8 +244,7 @@ static int init_chip_first(struct ipath_devdata *dd,
244 * cfgports. We do still check and report a difference, if 244 * cfgports. We do still check and report a difference, if
245 * not same (should be impossible). 245 * not same (should be impossible).
246 */ 246 */
247 dd->ipath_portcnt = 247 dd->ipath_f_config_ports(dd, ipath_cfgports);
248 ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt);
249 if (!ipath_cfgports) 248 if (!ipath_cfgports)
250 dd->ipath_cfgports = dd->ipath_portcnt; 249 dd->ipath_cfgports = dd->ipath_portcnt;
251 else if (ipath_cfgports <= dd->ipath_portcnt) { 250 else if (ipath_cfgports <= dd->ipath_portcnt) {
@@ -272,22 +271,7 @@ static int init_chip_first(struct ipath_devdata *dd,
272 goto done; 271 goto done;
273 } 272 }
274 273
275 dd->ipath_lastegrheads = kzalloc(sizeof(*dd->ipath_lastegrheads)
276 * dd->ipath_cfgports,
277 GFP_KERNEL);
278 dd->ipath_lastrcvhdrqtails =
279 kzalloc(sizeof(*dd->ipath_lastrcvhdrqtails)
280 * dd->ipath_cfgports, GFP_KERNEL);
281
282 if (!dd->ipath_lastegrheads || !dd->ipath_lastrcvhdrqtails) {
283 ipath_dev_err(dd, "Unable to allocate head arrays, "
284 "failing\n");
285 ret = -ENOMEM;
286 goto done;
287 }
288
289 pd = create_portdata0(dd); 274 pd = create_portdata0(dd);
290
291 if (!pd) { 275 if (!pd) {
292 ipath_dev_err(dd, "Unable to allocate portdata for port " 276 ipath_dev_err(dd, "Unable to allocate portdata for port "
293 "0, failing\n"); 277 "0, failing\n");
@@ -345,10 +329,10 @@ static int init_chip_first(struct ipath_devdata *dd,
345 dd->ipath_piobcnt2k, dd->ipath_pio2kbase); 329 dd->ipath_piobcnt2k, dd->ipath_pio2kbase);
346 330
347 spin_lock_init(&dd->ipath_tid_lock); 331 spin_lock_init(&dd->ipath_tid_lock);
348 332 spin_lock_init(&dd->ipath_sendctrl_lock);
349 spin_lock_init(&dd->ipath_gpio_lock); 333 spin_lock_init(&dd->ipath_gpio_lock);
350 spin_lock_init(&dd->ipath_eep_st_lock); 334 spin_lock_init(&dd->ipath_eep_st_lock);
351 sema_init(&dd->ipath_eep_sem, 1); 335 mutex_init(&dd->ipath_eep_lock);
352 336
353done: 337done:
354 *pdp = pd; 338 *pdp = pd;
@@ -372,9 +356,9 @@ static int init_chip_reset(struct ipath_devdata *dd,
372 *pdp = dd->ipath_pd[0]; 356 *pdp = dd->ipath_pd[0];
373 /* ensure chip does no sends or receives while we re-initialize */ 357 /* ensure chip does no sends or receives while we re-initialize */
374 dd->ipath_control = dd->ipath_sendctrl = dd->ipath_rcvctrl = 0U; 358 dd->ipath_control = dd->ipath_sendctrl = dd->ipath_rcvctrl = 0U;
375 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 0); 359 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl);
376 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 0); 360 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
377 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0); 361 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, dd->ipath_control);
378 362
379 rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt); 363 rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt);
380 if (dd->ipath_portcnt != rtmp) 364 if (dd->ipath_portcnt != rtmp)
@@ -487,6 +471,7 @@ static void enable_chip(struct ipath_devdata *dd,
487 struct ipath_portdata *pd, int reinit) 471 struct ipath_portdata *pd, int reinit)
488{ 472{
489 u32 val; 473 u32 val;
474 unsigned long flags;
490 int i; 475 int i;
491 476
492 if (!reinit) 477 if (!reinit)
@@ -495,19 +480,21 @@ static void enable_chip(struct ipath_devdata *dd,
495 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 480 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
496 dd->ipath_rcvctrl); 481 dd->ipath_rcvctrl);
497 482
483 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
498 /* Enable PIO send, and update of PIOavail regs to memory. */ 484 /* Enable PIO send, and update of PIOavail regs to memory. */
499 dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE | 485 dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE |
500 INFINIPATH_S_PIOBUFAVAILUPD; 486 INFINIPATH_S_PIOBUFAVAILUPD;
501 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 487 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
502 dd->ipath_sendctrl); 488 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
489 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
503 490
504 /* 491 /*
505 * enable port 0 receive, and receive interrupt. other ports 492 * enable port 0 receive, and receive interrupt. other ports
506 * done as user opens and inits them. 493 * done as user opens and inits them.
507 */ 494 */
508 dd->ipath_rcvctrl = INFINIPATH_R_TAILUPD | 495 dd->ipath_rcvctrl = (1ULL << dd->ipath_r_tailupd_shift) |
509 (1ULL << INFINIPATH_R_PORTENABLE_SHIFT) | 496 (1ULL << dd->ipath_r_portenable_shift) |
510 (1ULL << INFINIPATH_R_INTRAVAIL_SHIFT); 497 (1ULL << dd->ipath_r_intravail_shift);
511 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 498 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
512 dd->ipath_rcvctrl); 499 dd->ipath_rcvctrl);
513 500
@@ -523,12 +510,11 @@ static void enable_chip(struct ipath_devdata *dd,
523 */ 510 */
524 val = ipath_read_ureg32(dd, ur_rcvegrindextail, 0); 511 val = ipath_read_ureg32(dd, ur_rcvegrindextail, 0);
525 (void)ipath_write_ureg(dd, ur_rcvegrindexhead, val, 0); 512 (void)ipath_write_ureg(dd, ur_rcvegrindexhead, val, 0);
526 dd->ipath_port0head = ipath_read_ureg32(dd, ur_rcvhdrtail, 0);
527 513
528 /* Initialize so we interrupt on next packet received */ 514 /* Initialize so we interrupt on next packet received */
529 (void)ipath_write_ureg(dd, ur_rcvhdrhead, 515 (void)ipath_write_ureg(dd, ur_rcvhdrhead,
530 dd->ipath_rhdrhead_intr_off | 516 dd->ipath_rhdrhead_intr_off |
531 dd->ipath_port0head, 0); 517 dd->ipath_pd[0]->port_head, 0);
532 518
533 /* 519 /*
534 * by now pioavail updates to memory should have occurred, so 520 * by now pioavail updates to memory should have occurred, so
@@ -542,12 +528,8 @@ static void enable_chip(struct ipath_devdata *dd,
542 /* 528 /*
543 * Chip Errata bug 6641; even and odd qwords>3 are swapped. 529 * Chip Errata bug 6641; even and odd qwords>3 are swapped.
544 */ 530 */
545 if (i > 3) { 531 if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS))
546 if (i & 1) 532 val = dd->ipath_pioavailregs_dma[i ^ 1];
547 val = dd->ipath_pioavailregs_dma[i - 1];
548 else
549 val = dd->ipath_pioavailregs_dma[i + 1];
550 }
551 else 533 else
552 val = dd->ipath_pioavailregs_dma[i]; 534 val = dd->ipath_pioavailregs_dma[i];
553 dd->ipath_pioavailshadow[i] = le64_to_cpu(val); 535 dd->ipath_pioavailshadow[i] = le64_to_cpu(val);
@@ -690,12 +672,13 @@ done:
690 */ 672 */
691int ipath_init_chip(struct ipath_devdata *dd, int reinit) 673int ipath_init_chip(struct ipath_devdata *dd, int reinit)
692{ 674{
693 int ret = 0, i; 675 int ret = 0;
694 u32 val32, kpiobufs; 676 u32 val32, kpiobufs;
695 u32 piobufs, uports; 677 u32 piobufs, uports;
696 u64 val; 678 u64 val;
697 struct ipath_portdata *pd = NULL; /* keep gcc4 happy */ 679 struct ipath_portdata *pd = NULL; /* keep gcc4 happy */
698 gfp_t gfp_flags = GFP_USER | __GFP_COMP; 680 gfp_t gfp_flags = GFP_USER | __GFP_COMP;
681 unsigned long flags;
699 682
700 ret = init_housekeeping(dd, &pd, reinit); 683 ret = init_housekeeping(dd, &pd, reinit);
701 if (ret) 684 if (ret)
@@ -746,7 +729,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
746 kpiobufs = ipath_kpiobufs; 729 kpiobufs = ipath_kpiobufs;
747 730
748 if (kpiobufs + (uports * IPATH_MIN_USER_PORT_BUFCNT) > piobufs) { 731 if (kpiobufs + (uports * IPATH_MIN_USER_PORT_BUFCNT) > piobufs) {
749 i = (int) piobufs - 732 int i = (int) piobufs -
750 (int) (uports * IPATH_MIN_USER_PORT_BUFCNT); 733 (int) (uports * IPATH_MIN_USER_PORT_BUFCNT);
751 if (i < 0) 734 if (i < 0)
752 i = 0; 735 i = 0;
@@ -827,8 +810,12 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
827 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 810 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
828 ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED); 811 ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED);
829 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL); 812 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL);
830 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 813
831 INFINIPATH_S_PIOENABLE); 814 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
815 dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE;
816 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
817 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
818 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
832 819
833 /* 820 /*
834 * before error clears, since we expect serdes pll errors during 821 * before error clears, since we expect serdes pll errors during
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index c61f9da2964a..92e58c921522 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -683,7 +683,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
683 for (i = 0; i < dd->ipath_cfgports; i++) { 683 for (i = 0; i < dd->ipath_cfgports; i++) {
684 struct ipath_portdata *pd = dd->ipath_pd[i]; 684 struct ipath_portdata *pd = dd->ipath_pd[i];
685 if (i == 0) { 685 if (i == 0) {
686 hd = dd->ipath_port0head; 686 hd = pd->port_head;
687 tl = (u32) le64_to_cpu( 687 tl = (u32) le64_to_cpu(
688 *dd->ipath_hdrqtailptr); 688 *dd->ipath_hdrqtailptr);
689 } else if (pd && pd->port_cnt && 689 } else if (pd && pd->port_cnt &&
@@ -693,7 +693,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
693 * except kernel 693 * except kernel
694 */ 694 */
695 tl = *(u64 *) pd->port_rcvhdrtail_kvaddr; 695 tl = *(u64 *) pd->port_rcvhdrtail_kvaddr;
696 if (tl == dd->ipath_lastrcvhdrqtails[i]) 696 if (tl == pd->port_lastrcvhdrqtail)
697 continue; 697 continue;
698 hd = ipath_read_ureg32(dd, ur_rcvhdrhead, 698 hd = ipath_read_ureg32(dd, ur_rcvhdrhead,
699 i); 699 i);
@@ -703,7 +703,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
703 (!hd && tl == dd->ipath_hdrqlast)) { 703 (!hd && tl == dd->ipath_hdrqlast)) {
704 if (i == 0) 704 if (i == 0)
705 chkerrpkts = 1; 705 chkerrpkts = 1;
706 dd->ipath_lastrcvhdrqtails[i] = tl; 706 pd->port_lastrcvhdrqtail = tl;
707 pd->port_hdrqfull++; 707 pd->port_hdrqfull++;
708 /* flush hdrqfull so that poll() sees it */ 708 /* flush hdrqfull so that poll() sees it */
709 wmb(); 709 wmb();
@@ -712,6 +712,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
712 } 712 }
713 } 713 }
714 if (errs & INFINIPATH_E_RRCVEGRFULL) { 714 if (errs & INFINIPATH_E_RRCVEGRFULL) {
715 struct ipath_portdata *pd = dd->ipath_pd[0];
716
715 /* 717 /*
716 * since this is of less importance and not likely to 718 * since this is of less importance and not likely to
717 * happen without also getting hdrfull, only count 719 * happen without also getting hdrfull, only count
@@ -719,7 +721,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
719 * vs user) 721 * vs user)
720 */ 722 */
721 ipath_stats.sps_etidfull++; 723 ipath_stats.sps_etidfull++;
722 if (dd->ipath_port0head != 724 if (pd->port_head !=
723 (u32) le64_to_cpu(*dd->ipath_hdrqtailptr)) 725 (u32) le64_to_cpu(*dd->ipath_hdrqtailptr))
724 chkerrpkts = 1; 726 chkerrpkts = 1;
725 } 727 }
@@ -795,6 +797,7 @@ void ipath_clear_freeze(struct ipath_devdata *dd)
795{ 797{
796 int i, im; 798 int i, im;
797 __le64 val; 799 __le64 val;
800 unsigned long flags;
798 801
799 /* disable error interrupts, to avoid confusion */ 802 /* disable error interrupts, to avoid confusion */
800 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL); 803 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
@@ -813,11 +816,14 @@ void ipath_clear_freeze(struct ipath_devdata *dd)
813 dd->ipath_control); 816 dd->ipath_control);
814 817
815 /* ensure pio avail updates continue */ 818 /* ensure pio avail updates continue */
819 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
816 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 820 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
817 dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD); 821 dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD);
818 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 822 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
819 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 823 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
820 dd->ipath_sendctrl); 824 dd->ipath_sendctrl);
825 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
826 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
821 827
822 /* 828 /*
823 * We just enabled pioavailupdate, so dma copy is almost certainly 829 * We just enabled pioavailupdate, so dma copy is almost certainly
@@ -825,8 +831,8 @@ void ipath_clear_freeze(struct ipath_devdata *dd)
825 */ 831 */
826 for (i = 0; i < dd->ipath_pioavregs; i++) { 832 for (i = 0; i < dd->ipath_pioavregs; i++) {
827 /* deal with 6110 chip bug */ 833 /* deal with 6110 chip bug */
828 im = i > 3 ? ((i&1) ? i-1 : i+1) : i; 834 im = i > 3 ? i ^ 1 : i;
829 val = ipath_read_kreg64(dd, (0x1000/sizeof(u64))+im); 835 val = ipath_read_kreg64(dd, (0x1000 / sizeof(u64)) + im);
830 dd->ipath_pioavailregs_dma[i] = dd->ipath_pioavailshadow[i] 836 dd->ipath_pioavailregs_dma[i] = dd->ipath_pioavailshadow[i]
831 = le64_to_cpu(val); 837 = le64_to_cpu(val);
832 } 838 }
@@ -849,7 +855,7 @@ void ipath_clear_freeze(struct ipath_devdata *dd)
849 855
850/* this is separate to allow for better optimization of ipath_intr() */ 856/* this is separate to allow for better optimization of ipath_intr() */
851 857
852static void ipath_bad_intr(struct ipath_devdata *dd, u32 * unexpectp) 858static noinline void ipath_bad_intr(struct ipath_devdata *dd, u32 *unexpectp)
853{ 859{
854 /* 860 /*
855 * sometimes happen during driver init and unload, don't want 861 * sometimes happen during driver init and unload, don't want
@@ -877,7 +883,7 @@ static void ipath_bad_intr(struct ipath_devdata *dd, u32 * unexpectp)
877 dd->ipath_f_free_irq(dd); 883 dd->ipath_f_free_irq(dd);
878 } 884 }
879 } 885 }
880 if (ipath_read_kreg32(dd, dd->ipath_kregs->kr_intmask)) { 886 if (ipath_read_ireg(dd, dd->ipath_kregs->kr_intmask)) {
881 ipath_dev_err(dd, "%u unexpected interrupts, " 887 ipath_dev_err(dd, "%u unexpected interrupts, "
882 "disabling interrupts completely\n", 888 "disabling interrupts completely\n",
883 *unexpectp); 889 *unexpectp);
@@ -892,7 +898,7 @@ static void ipath_bad_intr(struct ipath_devdata *dd, u32 * unexpectp)
892 "ignoring\n"); 898 "ignoring\n");
893} 899}
894 900
895static void ipath_bad_regread(struct ipath_devdata *dd) 901static noinline void ipath_bad_regread(struct ipath_devdata *dd)
896{ 902{
897 static int allbits; 903 static int allbits;
898 904
@@ -920,31 +926,9 @@ static void ipath_bad_regread(struct ipath_devdata *dd)
920 } 926 }
921} 927}
922 928
923static void handle_port_pioavail(struct ipath_devdata *dd)
924{
925 u32 i;
926 /*
927 * start from port 1, since for now port 0 is never using
928 * wait_event for PIO
929 */
930 for (i = 1; dd->ipath_portpiowait && i < dd->ipath_cfgports; i++) {
931 struct ipath_portdata *pd = dd->ipath_pd[i];
932
933 if (pd && pd->port_cnt &&
934 dd->ipath_portpiowait & (1U << i)) {
935 clear_bit(i, &dd->ipath_portpiowait);
936 if (test_bit(IPATH_PORT_WAITING_PIO,
937 &pd->port_flag)) {
938 clear_bit(IPATH_PORT_WAITING_PIO,
939 &pd->port_flag);
940 wake_up_interruptible(&pd->port_wait);
941 }
942 }
943 }
944}
945
946static void handle_layer_pioavail(struct ipath_devdata *dd) 929static void handle_layer_pioavail(struct ipath_devdata *dd)
947{ 930{
931 unsigned long flags;
948 int ret; 932 int ret;
949 933
950 ret = ipath_ib_piobufavail(dd->verbs_dev); 934 ret = ipath_ib_piobufavail(dd->verbs_dev);
@@ -953,9 +937,12 @@ static void handle_layer_pioavail(struct ipath_devdata *dd)
953 937
954 return; 938 return;
955set: 939set:
956 set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl); 940 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
941 dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL;
957 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 942 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
958 dd->ipath_sendctrl); 943 dd->ipath_sendctrl);
944 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
945 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
959} 946}
960 947
961/* 948/*
@@ -969,7 +956,15 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat)
969 int i; 956 int i;
970 int rcvdint = 0; 957 int rcvdint = 0;
971 958
972 /* test_bit below needs this... */ 959 /*
960 * test_and_clear_bit(IPATH_PORT_WAITING_RCV) and
961 * test_and_clear_bit(IPATH_PORT_WAITING_URG) below
962 * would both like timely updates of the bits so that
963 * we don't pass them by unnecessarily. the rmb()
964 * here ensures that we see them promptly -- the
965 * corresponding wmb()'s are in ipath_poll_urgent()
966 * and ipath_poll_next()...
967 */
973 rmb(); 968 rmb();
974 portr = ((istat >> INFINIPATH_I_RCVAVAIL_SHIFT) & 969 portr = ((istat >> INFINIPATH_I_RCVAVAIL_SHIFT) &
975 dd->ipath_i_rcvavail_mask) 970 dd->ipath_i_rcvavail_mask)
@@ -980,7 +975,7 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat)
980 if (portr & (1 << i) && pd && pd->port_cnt) { 975 if (portr & (1 << i) && pd && pd->port_cnt) {
981 if (test_and_clear_bit(IPATH_PORT_WAITING_RCV, 976 if (test_and_clear_bit(IPATH_PORT_WAITING_RCV,
982 &pd->port_flag)) { 977 &pd->port_flag)) {
983 clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT, 978 clear_bit(i + dd->ipath_r_intravail_shift,
984 &dd->ipath_rcvctrl); 979 &dd->ipath_rcvctrl);
985 wake_up_interruptible(&pd->port_wait); 980 wake_up_interruptible(&pd->port_wait);
986 rcvdint = 1; 981 rcvdint = 1;
@@ -1039,7 +1034,7 @@ irqreturn_t ipath_intr(int irq, void *data)
1039 goto bail; 1034 goto bail;
1040 } 1035 }
1041 1036
1042 istat = ipath_read_kreg32(dd, dd->ipath_kregs->kr_intstatus); 1037 istat = ipath_read_ireg(dd, dd->ipath_kregs->kr_intstatus);
1043 1038
1044 if (unlikely(!istat)) { 1039 if (unlikely(!istat)) {
1045 ipath_stats.sps_nullintr++; 1040 ipath_stats.sps_nullintr++;
@@ -1180,7 +1175,7 @@ irqreturn_t ipath_intr(int irq, void *data)
1180 * for receive are at the bottom. 1175 * for receive are at the bottom.
1181 */ 1176 */
1182 if (chk0rcv) { 1177 if (chk0rcv) {
1183 ipath_kreceive(dd); 1178 ipath_kreceive(dd->ipath_pd[0]);
1184 istat &= ~port0rbits; 1179 istat &= ~port0rbits;
1185 } 1180 }
1186 1181
@@ -1191,12 +1186,14 @@ irqreturn_t ipath_intr(int irq, void *data)
1191 handle_urcv(dd, istat); 1186 handle_urcv(dd, istat);
1192 1187
1193 if (istat & INFINIPATH_I_SPIOBUFAVAIL) { 1188 if (istat & INFINIPATH_I_SPIOBUFAVAIL) {
1194 clear_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl); 1189 unsigned long flags;
1190
1191 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
1192 dd->ipath_sendctrl &= ~INFINIPATH_S_PIOINTBUFAVAIL;
1195 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 1193 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1196 dd->ipath_sendctrl); 1194 dd->ipath_sendctrl);
1197 1195 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1198 if (dd->ipath_portpiowait) 1196 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
1199 handle_port_pioavail(dd);
1200 1197
1201 handle_layer_pioavail(dd); 1198 handle_layer_pioavail(dd);
1202 } 1199 }
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index bb1dc075f1d1..4cc0f95ea877 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -41,6 +41,7 @@
41#include <linux/interrupt.h> 41#include <linux/interrupt.h>
42#include <linux/pci.h> 42#include <linux/pci.h>
43#include <linux/dma-mapping.h> 43#include <linux/dma-mapping.h>
44#include <linux/mutex.h>
44#include <asm/io.h> 45#include <asm/io.h>
45#include <rdma/ib_verbs.h> 46#include <rdma/ib_verbs.h>
46 47
@@ -140,6 +141,11 @@ struct ipath_portdata {
140 u32 port_pionowait; 141 u32 port_pionowait;
141 /* total number of rcvhdrqfull errors */ 142 /* total number of rcvhdrqfull errors */
142 u32 port_hdrqfull; 143 u32 port_hdrqfull;
144 /*
145 * Used to suppress multiple instances of same
146 * port staying stuck at same point.
147 */
148 u32 port_lastrcvhdrqtail;
143 /* saved total number of rcvhdrqfull errors for poll edge trigger */ 149 /* saved total number of rcvhdrqfull errors for poll edge trigger */
144 u32 port_hdrqfull_poll; 150 u32 port_hdrqfull_poll;
145 /* total number of polled urgent packets */ 151 /* total number of polled urgent packets */
@@ -148,6 +154,7 @@ struct ipath_portdata {
148 u32 port_urgent_poll; 154 u32 port_urgent_poll;
149 /* pid of process using this port */ 155 /* pid of process using this port */
150 pid_t port_pid; 156 pid_t port_pid;
157 pid_t port_subpid[INFINIPATH_MAX_SUBPORT];
151 /* same size as task_struct .comm[] */ 158 /* same size as task_struct .comm[] */
152 char port_comm[16]; 159 char port_comm[16];
153 /* pkeys set by this use of this port */ 160 /* pkeys set by this use of this port */
@@ -166,6 +173,8 @@ struct ipath_portdata {
166 u32 active_slaves; 173 u32 active_slaves;
167 /* Type of packets or conditions we want to poll for */ 174 /* Type of packets or conditions we want to poll for */
168 u16 poll_type; 175 u16 poll_type;
176 /* port rcvhdrq head offset */
177 u32 port_head;
169}; 178};
170 179
171struct sk_buff; 180struct sk_buff;
@@ -182,6 +191,22 @@ struct ipath_skbinfo {
182 dma_addr_t phys; 191 dma_addr_t phys;
183}; 192};
184 193
194/*
195 * Possible IB config parameters for ipath_f_get/set_ib_cfg()
196 */
197#define IPATH_IB_CFG_LIDLMC 0 /* Get/set LID (LS16b) and Mask (MS16b) */
198#define IPATH_IB_CFG_HRTBT 1 /* Get/set Heartbeat off/enable/auto */
199#define IPATH_IB_HRTBT_ON 3 /* Heartbeat enabled, sent every 100msec */
200#define IPATH_IB_HRTBT_OFF 0 /* Heartbeat off */
201#define IPATH_IB_CFG_LWID_ENB 2 /* Get/set allowed Link-width */
202#define IPATH_IB_CFG_LWID 3 /* Get currently active Link-width */
203#define IPATH_IB_CFG_SPD_ENB 4 /* Get/set allowed Link speeds */
204#define IPATH_IB_CFG_SPD 5 /* Get current Link spd */
205#define IPATH_IB_CFG_RXPOL_ENB 6 /* Get/set Auto-RX-polarity enable */
206#define IPATH_IB_CFG_LREV_ENB 7 /* Get/set Auto-Lane-reversal enable */
207#define IPATH_IB_CFG_LINKLATENCY 8 /* Get Auto-Lane-reversal enable */
208
209
185struct ipath_devdata { 210struct ipath_devdata {
186 struct list_head ipath_list; 211 struct list_head ipath_list;
187 212
@@ -222,6 +247,8 @@ struct ipath_devdata {
222 struct _ipath_layer ipath_layer; 247 struct _ipath_layer ipath_layer;
223 /* setup intr */ 248 /* setup intr */
224 int (*ipath_f_intrsetup)(struct ipath_devdata *); 249 int (*ipath_f_intrsetup)(struct ipath_devdata *);
250 /* fallback to alternate interrupt type if possible */
251 int (*ipath_f_intr_fallback)(struct ipath_devdata *);
225 /* setup on-chip bus config */ 252 /* setup on-chip bus config */
226 int (*ipath_f_bus)(struct ipath_devdata *, struct pci_dev *); 253 int (*ipath_f_bus)(struct ipath_devdata *, struct pci_dev *);
227 /* hard reset chip */ 254 /* hard reset chip */
@@ -244,6 +271,18 @@ struct ipath_devdata {
244 int (*ipath_f_get_base_info)(struct ipath_portdata *, void *); 271 int (*ipath_f_get_base_info)(struct ipath_portdata *, void *);
245 /* free irq */ 272 /* free irq */
246 void (*ipath_f_free_irq)(struct ipath_devdata *); 273 void (*ipath_f_free_irq)(struct ipath_devdata *);
274 struct ipath_message_header *(*ipath_f_get_msgheader)
275 (struct ipath_devdata *, __le32 *);
276 void (*ipath_f_config_ports)(struct ipath_devdata *, ushort);
277 int (*ipath_f_get_ib_cfg)(struct ipath_devdata *, int);
278 int (*ipath_f_set_ib_cfg)(struct ipath_devdata *, int, u32);
279 void (*ipath_f_config_jint)(struct ipath_devdata *, u16 , u16);
280 void (*ipath_f_read_counters)(struct ipath_devdata *,
281 struct infinipath_counters *);
282 void (*ipath_f_xgxs_reset)(struct ipath_devdata *);
283 /* per chip actions needed for IB Link up/down changes */
284 int (*ipath_f_ib_updown)(struct ipath_devdata *, int, u64);
285
247 struct ipath_ibdev *verbs_dev; 286 struct ipath_ibdev *verbs_dev;
248 struct timer_list verbs_timer; 287 struct timer_list verbs_timer;
249 /* total dwords sent (summed from counter) */ 288 /* total dwords sent (summed from counter) */
@@ -313,22 +352,12 @@ struct ipath_devdata {
313 * supports, less gives more pio bufs/port, etc. 352 * supports, less gives more pio bufs/port, etc.
314 */ 353 */
315 u32 ipath_cfgports; 354 u32 ipath_cfgports;
316 /* port0 rcvhdrq head offset */
317 u32 ipath_port0head;
318 /* count of port 0 hdrqfull errors */ 355 /* count of port 0 hdrqfull errors */
319 u32 ipath_p0_hdrqfull; 356 u32 ipath_p0_hdrqfull;
357 /* port 0 number of receive eager buffers */
358 u32 ipath_p0_rcvegrcnt;
320 359
321 /* 360 /*
322 * (*cfgports) used to suppress multiple instances of same
323 * port staying stuck at same point
324 */
325 u32 *ipath_lastrcvhdrqtails;
326 /*
327 * (*cfgports) used to suppress multiple instances of same
328 * port staying stuck at same point
329 */
330 u32 *ipath_lastegrheads;
331 /*
332 * index of last piobuffer we used. Speeds up searching, by 361 * index of last piobuffer we used. Speeds up searching, by
333 * starting at this point. Doesn't matter if multiple cpu's use and 362 * starting at this point. Doesn't matter if multiple cpu's use and
334 * update, last updater is only write that matters. Whenever it 363 * update, last updater is only write that matters. Whenever it
@@ -367,14 +396,15 @@ struct ipath_devdata {
367 unsigned long ipath_wc_len; 396 unsigned long ipath_wc_len;
368 /* ref count for each pkey */ 397 /* ref count for each pkey */
369 atomic_t ipath_pkeyrefs[4]; 398 atomic_t ipath_pkeyrefs[4];
370 /* shadow copy of all exptids physaddr; used only by funcsim */
371 u64 *ipath_tidsimshadow;
372 /* shadow copy of struct page *'s for exp tid pages */ 399 /* shadow copy of struct page *'s for exp tid pages */
373 struct page **ipath_pageshadow; 400 struct page **ipath_pageshadow;
374 /* shadow copy of dma handles for exp tid pages */ 401 /* shadow copy of dma handles for exp tid pages */
375 dma_addr_t *ipath_physshadow; 402 dma_addr_t *ipath_physshadow;
376 /* lock to workaround chip bug 9437 */ 403 u64 __iomem *ipath_egrtidbase;
404 /* lock to workaround chip bug 9437 and others */
405 spinlock_t ipath_kernel_tid_lock;
377 spinlock_t ipath_tid_lock; 406 spinlock_t ipath_tid_lock;
407 spinlock_t ipath_sendctrl_lock;
378 408
379 /* 409 /*
380 * IPATH_STATUS_*, 410 * IPATH_STATUS_*,
@@ -395,6 +425,8 @@ struct ipath_devdata {
395 void *ipath_dummy_hdrq; /* used after port close */ 425 void *ipath_dummy_hdrq; /* used after port close */
396 dma_addr_t ipath_dummy_hdrq_phys; 426 dma_addr_t ipath_dummy_hdrq_phys;
397 427
428 unsigned long ipath_ureg_align; /* user register alignment */
429
398 /* 430 /*
399 * Shadow copies of registers; size indicates read access size. 431 * Shadow copies of registers; size indicates read access size.
400 * Most of them are readonly, but some are write-only register, 432 * Most of them are readonly, but some are write-only register,
@@ -456,8 +488,6 @@ struct ipath_devdata {
456 unsigned long ipath_rcvctrl; 488 unsigned long ipath_rcvctrl;
457 /* shadow kr_sendctrl */ 489 /* shadow kr_sendctrl */
458 unsigned long ipath_sendctrl; 490 unsigned long ipath_sendctrl;
459 /* ports waiting for PIOavail intr */
460 unsigned long ipath_portpiowait;
461 unsigned long ipath_lastcancel; /* to not count armlaunch after cancel */ 491 unsigned long ipath_lastcancel; /* to not count armlaunch after cancel */
462 492
463 /* value we put in kr_rcvhdrcnt */ 493 /* value we put in kr_rcvhdrcnt */
@@ -550,12 +580,26 @@ struct ipath_devdata {
550 u8 ipath_minrev; 580 u8 ipath_minrev;
551 /* board rev, from ipath_revision */ 581 /* board rev, from ipath_revision */
552 u8 ipath_boardrev; 582 u8 ipath_boardrev;
583
584 u8 ipath_r_portenable_shift;
585 u8 ipath_r_intravail_shift;
586 u8 ipath_r_tailupd_shift;
587 u8 ipath_r_portcfg_shift;
588
553 /* unit # of this chip, if present */ 589 /* unit # of this chip, if present */
554 int ipath_unit; 590 int ipath_unit;
555 /* saved for restore after reset */ 591 /* saved for restore after reset */
556 u8 ipath_pci_cacheline; 592 u8 ipath_pci_cacheline;
557 /* LID mask control */ 593 /* LID mask control */
558 u8 ipath_lmc; 594 u8 ipath_lmc;
595 /* link width supported */
596 u8 ipath_link_width_supported;
597 /* link speed supported */
598 u8 ipath_link_speed_supported;
599 u8 ipath_link_width_enabled;
600 u8 ipath_link_speed_enabled;
601 u8 ipath_link_width_active;
602 u8 ipath_link_speed_active;
559 /* Rx Polarity inversion (compensate for ~tx on partner) */ 603 /* Rx Polarity inversion (compensate for ~tx on partner) */
560 u8 ipath_rx_pol_inv; 604 u8 ipath_rx_pol_inv;
561 605
@@ -590,6 +634,8 @@ struct ipath_devdata {
590 */ 634 */
591 u32 ipath_i_rcvavail_mask; 635 u32 ipath_i_rcvavail_mask;
592 u32 ipath_i_rcvurg_mask; 636 u32 ipath_i_rcvurg_mask;
637 u16 ipath_i_rcvurg_shift;
638 u16 ipath_i_rcvavail_shift;
593 639
594 /* 640 /*
595 * Register bits for selecting i2c direction and values, used for 641 * Register bits for selecting i2c direction and values, used for
@@ -603,6 +649,29 @@ struct ipath_devdata {
603 /* lock for doing RMW of shadows/regs for ExtCtrl and GPIO */ 649 /* lock for doing RMW of shadows/regs for ExtCtrl and GPIO */
604 spinlock_t ipath_gpio_lock; 650 spinlock_t ipath_gpio_lock;
605 651
652 /*
653 * IB link and linktraining states and masks that vary per chip in
654 * some way. Set at init, to avoid each IB status change interrupt
655 */
656 u8 ibcs_ls_shift;
657 u8 ibcs_lts_mask;
658 u32 ibcs_mask;
659 u32 ib_init;
660 u32 ib_arm;
661 u32 ib_active;
662
663 u16 ipath_rhf_offset; /* offset of RHF within receive header entry */
664
665 /*
666 * shift/mask for linkcmd, linkinitcmd, maxpktlen in ibccontol
667 * reg. Changes for IBA7220
668 */
669 u8 ibcc_lic_mask; /* LinkInitCmd */
670 u8 ibcc_lc_shift; /* LinkCmd */
671 u8 ibcc_mpl_shift; /* Maxpktlen */
672
673 u8 delay_mult;
674
606 /* used to override LED behavior */ 675 /* used to override LED behavior */
607 u8 ipath_led_override; /* Substituted for normal value, if non-zero */ 676 u8 ipath_led_override; /* Substituted for normal value, if non-zero */
608 u16 ipath_led_override_timeoff; /* delta to next timer event */ 677 u16 ipath_led_override_timeoff; /* delta to next timer event */
@@ -616,7 +685,7 @@ struct ipath_devdata {
616 /* control access to actual counters, timer */ 685 /* control access to actual counters, timer */
617 spinlock_t ipath_eep_st_lock; 686 spinlock_t ipath_eep_st_lock;
618 /* control high-level access to EEPROM */ 687 /* control high-level access to EEPROM */
619 struct semaphore ipath_eep_sem; 688 struct mutex ipath_eep_lock;
620 /* Below inc'd by ipath_snap_cntrs(), locked by ipath_eep_st_lock */ 689 /* Below inc'd by ipath_snap_cntrs(), locked by ipath_eep_st_lock */
621 uint64_t ipath_traffic_wds; 690 uint64_t ipath_traffic_wds;
622 /* active time is kept in seconds, but logged in hours */ 691 /* active time is kept in seconds, but logged in hours */
@@ -630,6 +699,10 @@ struct ipath_devdata {
630 * each of the counters to increment. 699 * each of the counters to increment.
631 */ 700 */
632 struct ipath_eep_log_mask ipath_eep_st_masks[IPATH_EEP_LOG_CNT]; 701 struct ipath_eep_log_mask ipath_eep_st_masks[IPATH_EEP_LOG_CNT];
702
703 /* interrupt mitigation reload register info */
704 u16 ipath_jint_idle_ticks; /* idle clock ticks */
705 u16 ipath_jint_max_packets; /* max packets across all ports */
633}; 706};
634 707
635/* Private data for file operations */ 708/* Private data for file operations */
@@ -690,7 +763,7 @@ void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *);
690 763
691int ipath_parse_ushort(const char *str, unsigned short *valp); 764int ipath_parse_ushort(const char *str, unsigned short *valp);
692 765
693void ipath_kreceive(struct ipath_devdata *); 766void ipath_kreceive(struct ipath_portdata *);
694int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned); 767int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned);
695int ipath_reset_device(int); 768int ipath_reset_device(int);
696void ipath_get_faststats(unsigned long); 769void ipath_get_faststats(unsigned long);
@@ -698,6 +771,8 @@ int ipath_set_linkstate(struct ipath_devdata *, u8);
698int ipath_set_mtu(struct ipath_devdata *, u16); 771int ipath_set_mtu(struct ipath_devdata *, u16);
699int ipath_set_lid(struct ipath_devdata *, u32, u8); 772int ipath_set_lid(struct ipath_devdata *, u32, u8);
700int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv); 773int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
774void ipath_enable_armlaunch(struct ipath_devdata *);
775void ipath_disable_armlaunch(struct ipath_devdata *);
701 776
702/* for use in system calls, where we want to know device type, etc. */ 777/* for use in system calls, where we want to know device type, etc. */
703#define port_fp(fp) ((struct ipath_filedata *)(fp)->private_data)->pd 778#define port_fp(fp) ((struct ipath_filedata *)(fp)->private_data)->pd
@@ -744,9 +819,15 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
744 * are 64bit */ 819 * are 64bit */
745#define IPATH_32BITCOUNTERS 0x20000 820#define IPATH_32BITCOUNTERS 0x20000
746 /* can miss port0 rx interrupts */ 821 /* can miss port0 rx interrupts */
822 /* Interrupt register is 64 bits */
823#define IPATH_INTREG_64 0x40000
747#define IPATH_DISABLED 0x80000 /* administratively disabled */ 824#define IPATH_DISABLED 0x80000 /* administratively disabled */
748 /* Use GPIO interrupts for new counters */ 825 /* Use GPIO interrupts for new counters */
749#define IPATH_GPIO_ERRINTRS 0x100000 826#define IPATH_GPIO_ERRINTRS 0x100000
827#define IPATH_SWAP_PIOBUFS 0x200000
828 /* Suppress heartbeat, even if turning off loopback */
829#define IPATH_NO_HRTBT 0x1000000
830#define IPATH_HAS_MULT_IB_SPEED 0x8000000
750 831
751/* Bits in GPIO for the added interrupts */ 832/* Bits in GPIO for the added interrupts */
752#define IPATH_GPIO_PORT0_BIT 2 833#define IPATH_GPIO_PORT0_BIT 2
@@ -758,8 +839,6 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
758/* portdata flag bit offsets */ 839/* portdata flag bit offsets */
759 /* waiting for a packet to arrive */ 840 /* waiting for a packet to arrive */
760#define IPATH_PORT_WAITING_RCV 2 841#define IPATH_PORT_WAITING_RCV 2
761 /* waiting for a PIO buffer to be available */
762#define IPATH_PORT_WAITING_PIO 3
763 /* master has not finished initializing */ 842 /* master has not finished initializing */
764#define IPATH_PORT_MASTER_UNINIT 4 843#define IPATH_PORT_MASTER_UNINIT 4
765 /* waiting for an urgent packet to arrive */ 844 /* waiting for an urgent packet to arrive */
@@ -767,8 +846,6 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
767 846
768/* free up any allocated data at closes */ 847/* free up any allocated data at closes */
769void ipath_free_data(struct ipath_portdata *dd); 848void ipath_free_data(struct ipath_portdata *dd);
770int ipath_waitfor_mdio_cmdready(struct ipath_devdata *);
771int ipath_waitfor_complete(struct ipath_devdata *, ipath_kreg, u64, u64 *);
772u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *); 849u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *);
773void ipath_init_iba6120_funcs(struct ipath_devdata *); 850void ipath_init_iba6120_funcs(struct ipath_devdata *);
774void ipath_init_iba6110_funcs(struct ipath_devdata *); 851void ipath_init_iba6110_funcs(struct ipath_devdata *);
@@ -792,33 +869,6 @@ void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val);
792 */ 869 */
793#define IPATH_DFLT_RCVHDRSIZE 9 870#define IPATH_DFLT_RCVHDRSIZE 9
794 871
795#define IPATH_MDIO_CMD_WRITE 1
796#define IPATH_MDIO_CMD_READ 2
797#define IPATH_MDIO_CLD_DIV 25 /* to get 2.5 Mhz mdio clock */
798#define IPATH_MDIO_CMDVALID 0x40000000 /* bit 30 */
799#define IPATH_MDIO_DATAVALID 0x80000000 /* bit 31 */
800#define IPATH_MDIO_CTRL_STD 0x0
801
802static inline u64 ipath_mdio_req(int cmd, int dev, int reg, int data)
803{
804 return (((u64) IPATH_MDIO_CLD_DIV) << 32) |
805 (cmd << 26) |
806 (dev << 21) |
807 (reg << 16) |
808 (data & 0xFFFF);
809}
810
811 /* signal and fifo status, in bank 31 */
812#define IPATH_MDIO_CTRL_XGXS_REG_8 0x8
813 /* controls loopback, redundancy */
814#define IPATH_MDIO_CTRL_8355_REG_1 0x10
815 /* premph, encdec, etc. */
816#define IPATH_MDIO_CTRL_8355_REG_2 0x11
817 /* Kchars, etc. */
818#define IPATH_MDIO_CTRL_8355_REG_6 0x15
819#define IPATH_MDIO_CTRL_8355_REG_9 0x18
820#define IPATH_MDIO_CTRL_8355_REG_10 0x1D
821
822int ipath_get_user_pages(unsigned long, size_t, struct page **); 872int ipath_get_user_pages(unsigned long, size_t, struct page **);
823void ipath_release_user_pages(struct page **, size_t); 873void ipath_release_user_pages(struct page **, size_t);
824void ipath_release_user_pages_on_close(struct page **, size_t); 874void ipath_release_user_pages_on_close(struct page **, size_t);
@@ -863,7 +913,7 @@ static inline u32 ipath_read_ureg32(const struct ipath_devdata *dd,
863 return readl(regno + (u64 __iomem *) 913 return readl(regno + (u64 __iomem *)
864 (dd->ipath_uregbase + 914 (dd->ipath_uregbase +
865 (char __iomem *)dd->ipath_kregbase + 915 (char __iomem *)dd->ipath_kregbase +
866 dd->ipath_palign * port)); 916 dd->ipath_ureg_align * port));
867} 917}
868 918
869/** 919/**
@@ -880,7 +930,7 @@ static inline void ipath_write_ureg(const struct ipath_devdata *dd,
880{ 930{
881 u64 __iomem *ubase = (u64 __iomem *) 931 u64 __iomem *ubase = (u64 __iomem *)
882 (dd->ipath_uregbase + (char __iomem *) dd->ipath_kregbase + 932 (dd->ipath_uregbase + (char __iomem *) dd->ipath_kregbase +
883 dd->ipath_palign * port); 933 dd->ipath_ureg_align * port);
884 if (dd->ipath_kregbase) 934 if (dd->ipath_kregbase)
885 writeq(value, &ubase[regno]); 935 writeq(value, &ubase[regno]);
886} 936}
@@ -930,6 +980,53 @@ static inline u32 ipath_read_creg32(const struct ipath_devdata *dd,
930 (char __iomem *)dd->ipath_kregbase)); 980 (char __iomem *)dd->ipath_kregbase));
931} 981}
932 982
983static inline void ipath_write_creg(const struct ipath_devdata *dd,
984 ipath_creg regno, u64 value)
985{
986 if (dd->ipath_kregbase)
987 writeq(value, regno + (u64 __iomem *)
988 (dd->ipath_cregbase +
989 (char __iomem *)dd->ipath_kregbase));
990}
991
992static inline void ipath_clear_rcvhdrtail(const struct ipath_portdata *pd)
993{
994 *((u64 *) pd->port_rcvhdrtail_kvaddr) = 0ULL;
995}
996
997static inline u32 ipath_get_rcvhdrtail(const struct ipath_portdata *pd)
998{
999 return (u32) le64_to_cpu(*((volatile __le64 *)
1000 pd->port_rcvhdrtail_kvaddr));
1001}
1002
1003static inline u64 ipath_read_ireg(const struct ipath_devdata *dd, ipath_kreg r)
1004{
1005 return (dd->ipath_flags & IPATH_INTREG_64) ?
1006 ipath_read_kreg64(dd, r) : ipath_read_kreg32(dd, r);
1007}
1008
1009/*
1010 * from contents of IBCStatus (or a saved copy), return linkstate
1011 * Report ACTIVE_DEFER as ACTIVE, because we treat them the same
1012 * everywhere, anyway (and should be, for almost all purposes).
1013 */
1014static inline u32 ipath_ib_linkstate(struct ipath_devdata *dd, u64 ibcs)
1015{
1016 u32 state = (u32)(ibcs >> dd->ibcs_ls_shift) &
1017 INFINIPATH_IBCS_LINKSTATE_MASK;
1018 if (state == INFINIPATH_IBCS_L_STATE_ACT_DEFER)
1019 state = INFINIPATH_IBCS_L_STATE_ACTIVE;
1020 return state;
1021}
1022
1023/* from contents of IBCStatus (or a saved copy), return linktrainingstate */
1024static inline u32 ipath_ib_linktrstate(struct ipath_devdata *dd, u64 ibcs)
1025{
1026 return (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
1027 dd->ibcs_lts_mask;
1028}
1029
933/* 1030/*
934 * sysfs interface. 1031 * sysfs interface.
935 */ 1032 */
diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c
index 85a4aefc6c03..8f32b17a5eed 100644
--- a/drivers/infiniband/hw/ipath/ipath_keys.c
+++ b/drivers/infiniband/hw/ipath/ipath_keys.c
@@ -128,9 +128,8 @@ int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
128 int ret; 128 int ret;
129 129
130 /* 130 /*
131 * We use LKEY == zero to mean a physical kmalloc() address. 131 * We use LKEY == zero for kernel virtual addresses
132 * This is a bit of a hack since we rely on dma_map_single() 132 * (see ipath_get_dma_mr and ipath_dma.c).
133 * being reversible by calling bus_to_virt().
134 */ 133 */
135 if (sge->lkey == 0) { 134 if (sge->lkey == 0) {
136 struct ipath_pd *pd = to_ipd(qp->ibqp.pd); 135 struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
index 3d1432d1e3f4..d98d5f103700 100644
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -934,6 +934,7 @@ static int recv_pma_get_portsamplescontrol(struct ib_perf *pmp,
934 struct ib_pma_portsamplescontrol *p = 934 struct ib_pma_portsamplescontrol *p =
935 (struct ib_pma_portsamplescontrol *)pmp->data; 935 (struct ib_pma_portsamplescontrol *)pmp->data;
936 struct ipath_ibdev *dev = to_idev(ibdev); 936 struct ipath_ibdev *dev = to_idev(ibdev);
937 struct ipath_cregs const *crp = dev->dd->ipath_cregs;
937 unsigned long flags; 938 unsigned long flags;
938 u8 port_select = p->port_select; 939 u8 port_select = p->port_select;
939 940
@@ -955,7 +956,10 @@ static int recv_pma_get_portsamplescontrol(struct ib_perf *pmp,
955 p->counter_width = 4; /* 32 bit counters */ 956 p->counter_width = 4; /* 32 bit counters */
956 p->counter_mask0_9 = COUNTER_MASK0_9; 957 p->counter_mask0_9 = COUNTER_MASK0_9;
957 spin_lock_irqsave(&dev->pending_lock, flags); 958 spin_lock_irqsave(&dev->pending_lock, flags);
958 p->sample_status = dev->pma_sample_status; 959 if (crp->cr_psstat)
960 p->sample_status = ipath_read_creg32(dev->dd, crp->cr_psstat);
961 else
962 p->sample_status = dev->pma_sample_status;
959 p->sample_start = cpu_to_be32(dev->pma_sample_start); 963 p->sample_start = cpu_to_be32(dev->pma_sample_start);
960 p->sample_interval = cpu_to_be32(dev->pma_sample_interval); 964 p->sample_interval = cpu_to_be32(dev->pma_sample_interval);
961 p->tag = cpu_to_be16(dev->pma_tag); 965 p->tag = cpu_to_be16(dev->pma_tag);
@@ -975,8 +979,9 @@ static int recv_pma_set_portsamplescontrol(struct ib_perf *pmp,
975 struct ib_pma_portsamplescontrol *p = 979 struct ib_pma_portsamplescontrol *p =
976 (struct ib_pma_portsamplescontrol *)pmp->data; 980 (struct ib_pma_portsamplescontrol *)pmp->data;
977 struct ipath_ibdev *dev = to_idev(ibdev); 981 struct ipath_ibdev *dev = to_idev(ibdev);
982 struct ipath_cregs const *crp = dev->dd->ipath_cregs;
978 unsigned long flags; 983 unsigned long flags;
979 u32 start; 984 u8 status;
980 int ret; 985 int ret;
981 986
982 if (pmp->attr_mod != 0 || 987 if (pmp->attr_mod != 0 ||
@@ -986,59 +991,67 @@ static int recv_pma_set_portsamplescontrol(struct ib_perf *pmp,
986 goto bail; 991 goto bail;
987 } 992 }
988 993
989 start = be32_to_cpu(p->sample_start); 994 spin_lock_irqsave(&dev->pending_lock, flags);
990 if (start != 0) { 995 if (crp->cr_psstat)
991 spin_lock_irqsave(&dev->pending_lock, flags); 996 status = ipath_read_creg32(dev->dd, crp->cr_psstat);
992 if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_DONE) { 997 else
993 dev->pma_sample_status = 998 status = dev->pma_sample_status;
994 IB_PMA_SAMPLE_STATUS_STARTED; 999 if (status == IB_PMA_SAMPLE_STATUS_DONE) {
995 dev->pma_sample_start = start; 1000 dev->pma_sample_start = be32_to_cpu(p->sample_start);
996 dev->pma_sample_interval = 1001 dev->pma_sample_interval = be32_to_cpu(p->sample_interval);
997 be32_to_cpu(p->sample_interval); 1002 dev->pma_tag = be16_to_cpu(p->tag);
998 dev->pma_tag = be16_to_cpu(p->tag); 1003 dev->pma_counter_select[0] = p->counter_select[0];
999 if (p->counter_select[0]) 1004 dev->pma_counter_select[1] = p->counter_select[1];
1000 dev->pma_counter_select[0] = 1005 dev->pma_counter_select[2] = p->counter_select[2];
1001 p->counter_select[0]; 1006 dev->pma_counter_select[3] = p->counter_select[3];
1002 if (p->counter_select[1]) 1007 dev->pma_counter_select[4] = p->counter_select[4];
1003 dev->pma_counter_select[1] = 1008 if (crp->cr_psstat) {
1004 p->counter_select[1]; 1009 ipath_write_creg(dev->dd, crp->cr_psinterval,
1005 if (p->counter_select[2]) 1010 dev->pma_sample_interval);
1006 dev->pma_counter_select[2] = 1011 ipath_write_creg(dev->dd, crp->cr_psstart,
1007 p->counter_select[2]; 1012 dev->pma_sample_start);
1008 if (p->counter_select[3]) 1013 } else
1009 dev->pma_counter_select[3] = 1014 dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_STARTED;
1010 p->counter_select[3];
1011 if (p->counter_select[4])
1012 dev->pma_counter_select[4] =
1013 p->counter_select[4];
1014 }
1015 spin_unlock_irqrestore(&dev->pending_lock, flags);
1016 } 1015 }
1016 spin_unlock_irqrestore(&dev->pending_lock, flags);
1017
1017 ret = recv_pma_get_portsamplescontrol(pmp, ibdev, port); 1018 ret = recv_pma_get_portsamplescontrol(pmp, ibdev, port);
1018 1019
1019bail: 1020bail:
1020 return ret; 1021 return ret;
1021} 1022}
1022 1023
1023static u64 get_counter(struct ipath_ibdev *dev, __be16 sel) 1024static u64 get_counter(struct ipath_ibdev *dev,
1025 struct ipath_cregs const *crp,
1026 __be16 sel)
1024{ 1027{
1025 u64 ret; 1028 u64 ret;
1026 1029
1027 switch (sel) { 1030 switch (sel) {
1028 case IB_PMA_PORT_XMIT_DATA: 1031 case IB_PMA_PORT_XMIT_DATA:
1029 ret = dev->ipath_sword; 1032 ret = (crp->cr_psxmitdatacount) ?
1033 ipath_read_creg32(dev->dd, crp->cr_psxmitdatacount) :
1034 dev->ipath_sword;
1030 break; 1035 break;
1031 case IB_PMA_PORT_RCV_DATA: 1036 case IB_PMA_PORT_RCV_DATA:
1032 ret = dev->ipath_rword; 1037 ret = (crp->cr_psrcvdatacount) ?
1038 ipath_read_creg32(dev->dd, crp->cr_psrcvdatacount) :
1039 dev->ipath_rword;
1033 break; 1040 break;
1034 case IB_PMA_PORT_XMIT_PKTS: 1041 case IB_PMA_PORT_XMIT_PKTS:
1035 ret = dev->ipath_spkts; 1042 ret = (crp->cr_psxmitpktscount) ?
1043 ipath_read_creg32(dev->dd, crp->cr_psxmitpktscount) :
1044 dev->ipath_spkts;
1036 break; 1045 break;
1037 case IB_PMA_PORT_RCV_PKTS: 1046 case IB_PMA_PORT_RCV_PKTS:
1038 ret = dev->ipath_rpkts; 1047 ret = (crp->cr_psrcvpktscount) ?
1048 ipath_read_creg32(dev->dd, crp->cr_psrcvpktscount) :
1049 dev->ipath_rpkts;
1039 break; 1050 break;
1040 case IB_PMA_PORT_XMIT_WAIT: 1051 case IB_PMA_PORT_XMIT_WAIT:
1041 ret = dev->ipath_xmit_wait; 1052 ret = (crp->cr_psxmitwaitcount) ?
1053 ipath_read_creg32(dev->dd, crp->cr_psxmitwaitcount) :
1054 dev->ipath_xmit_wait;
1042 break; 1055 break;
1043 default: 1056 default:
1044 ret = 0; 1057 ret = 0;
@@ -1053,14 +1066,21 @@ static int recv_pma_get_portsamplesresult(struct ib_perf *pmp,
1053 struct ib_pma_portsamplesresult *p = 1066 struct ib_pma_portsamplesresult *p =
1054 (struct ib_pma_portsamplesresult *)pmp->data; 1067 (struct ib_pma_portsamplesresult *)pmp->data;
1055 struct ipath_ibdev *dev = to_idev(ibdev); 1068 struct ipath_ibdev *dev = to_idev(ibdev);
1069 struct ipath_cregs const *crp = dev->dd->ipath_cregs;
1070 u8 status;
1056 int i; 1071 int i;
1057 1072
1058 memset(pmp->data, 0, sizeof(pmp->data)); 1073 memset(pmp->data, 0, sizeof(pmp->data));
1059 p->tag = cpu_to_be16(dev->pma_tag); 1074 p->tag = cpu_to_be16(dev->pma_tag);
1060 p->sample_status = cpu_to_be16(dev->pma_sample_status); 1075 if (crp->cr_psstat)
1076 status = ipath_read_creg32(dev->dd, crp->cr_psstat);
1077 else
1078 status = dev->pma_sample_status;
1079 p->sample_status = cpu_to_be16(status);
1061 for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++) 1080 for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++)
1062 p->counter[i] = cpu_to_be32( 1081 p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 :
1063 get_counter(dev, dev->pma_counter_select[i])); 1082 cpu_to_be32(
1083 get_counter(dev, crp, dev->pma_counter_select[i]));
1064 1084
1065 return reply((struct ib_smp *) pmp); 1085 return reply((struct ib_smp *) pmp);
1066} 1086}
@@ -1071,16 +1091,23 @@ static int recv_pma_get_portsamplesresult_ext(struct ib_perf *pmp,
1071 struct ib_pma_portsamplesresult_ext *p = 1091 struct ib_pma_portsamplesresult_ext *p =
1072 (struct ib_pma_portsamplesresult_ext *)pmp->data; 1092 (struct ib_pma_portsamplesresult_ext *)pmp->data;
1073 struct ipath_ibdev *dev = to_idev(ibdev); 1093 struct ipath_ibdev *dev = to_idev(ibdev);
1094 struct ipath_cregs const *crp = dev->dd->ipath_cregs;
1095 u8 status;
1074 int i; 1096 int i;
1075 1097
1076 memset(pmp->data, 0, sizeof(pmp->data)); 1098 memset(pmp->data, 0, sizeof(pmp->data));
1077 p->tag = cpu_to_be16(dev->pma_tag); 1099 p->tag = cpu_to_be16(dev->pma_tag);
1078 p->sample_status = cpu_to_be16(dev->pma_sample_status); 1100 if (crp->cr_psstat)
1101 status = ipath_read_creg32(dev->dd, crp->cr_psstat);
1102 else
1103 status = dev->pma_sample_status;
1104 p->sample_status = cpu_to_be16(status);
1079 /* 64 bits */ 1105 /* 64 bits */
1080 p->extended_width = __constant_cpu_to_be32(0x80000000); 1106 p->extended_width = __constant_cpu_to_be32(0x80000000);
1081 for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++) 1107 for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++)
1082 p->counter[i] = cpu_to_be64( 1108 p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 :
1083 get_counter(dev, dev->pma_counter_select[i])); 1109 cpu_to_be64(
1110 get_counter(dev, crp, dev->pma_counter_select[i]));
1084 1111
1085 return reply((struct ib_smp *) pmp); 1112 return reply((struct ib_smp *) pmp);
1086} 1113}
@@ -1113,6 +1140,8 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp,
1113 dev->z_local_link_integrity_errors; 1140 dev->z_local_link_integrity_errors;
1114 cntrs.excessive_buffer_overrun_errors -= 1141 cntrs.excessive_buffer_overrun_errors -=
1115 dev->z_excessive_buffer_overrun_errors; 1142 dev->z_excessive_buffer_overrun_errors;
1143 cntrs.vl15_dropped -= dev->z_vl15_dropped;
1144 cntrs.vl15_dropped += dev->n_vl15_dropped;
1116 1145
1117 memset(pmp->data, 0, sizeof(pmp->data)); 1146 memset(pmp->data, 0, sizeof(pmp->data));
1118 1147
@@ -1156,10 +1185,10 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp,
1156 cntrs.excessive_buffer_overrun_errors = 0xFUL; 1185 cntrs.excessive_buffer_overrun_errors = 0xFUL;
1157 p->lli_ebor_errors = (cntrs.local_link_integrity_errors << 4) | 1186 p->lli_ebor_errors = (cntrs.local_link_integrity_errors << 4) |
1158 cntrs.excessive_buffer_overrun_errors; 1187 cntrs.excessive_buffer_overrun_errors;
1159 if (dev->n_vl15_dropped > 0xFFFFUL) 1188 if (cntrs.vl15_dropped > 0xFFFFUL)
1160 p->vl15_dropped = __constant_cpu_to_be16(0xFFFF); 1189 p->vl15_dropped = __constant_cpu_to_be16(0xFFFF);
1161 else 1190 else
1162 p->vl15_dropped = cpu_to_be16((u16)dev->n_vl15_dropped); 1191 p->vl15_dropped = cpu_to_be16((u16)cntrs.vl15_dropped);
1163 if (cntrs.port_xmit_data > 0xFFFFFFFFUL) 1192 if (cntrs.port_xmit_data > 0xFFFFFFFFUL)
1164 p->port_xmit_data = __constant_cpu_to_be32(0xFFFFFFFF); 1193 p->port_xmit_data = __constant_cpu_to_be32(0xFFFFFFFF);
1165 else 1194 else
@@ -1262,8 +1291,10 @@ static int recv_pma_set_portcounters(struct ib_perf *pmp,
1262 dev->z_excessive_buffer_overrun_errors = 1291 dev->z_excessive_buffer_overrun_errors =
1263 cntrs.excessive_buffer_overrun_errors; 1292 cntrs.excessive_buffer_overrun_errors;
1264 1293
1265 if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED) 1294 if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED) {
1266 dev->n_vl15_dropped = 0; 1295 dev->n_vl15_dropped = 0;
1296 dev->z_vl15_dropped = cntrs.vl15_dropped;
1297 }
1267 1298
1268 if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DATA) 1299 if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DATA)
1269 dev->z_port_xmit_data = cntrs.port_xmit_data; 1300 dev->z_port_xmit_data = cntrs.port_xmit_data;
@@ -1434,7 +1465,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
1434 * before checking for other consumers. 1465 * before checking for other consumers.
1435 * Just tell the caller to process it normally. 1466 * Just tell the caller to process it normally.
1436 */ 1467 */
1437 ret = IB_MAD_RESULT_FAILURE; 1468 ret = IB_MAD_RESULT_SUCCESS;
1438 goto bail; 1469 goto bail;
1439 default: 1470 default:
1440 smp->status |= IB_SMP_UNSUP_METHOD; 1471 smp->status |= IB_SMP_UNSUP_METHOD;
@@ -1516,7 +1547,7 @@ static int process_perf(struct ib_device *ibdev, u8 port_num,
1516 * before checking for other consumers. 1547 * before checking for other consumers.
1517 * Just tell the caller to process it normally. 1548 * Just tell the caller to process it normally.
1518 */ 1549 */
1519 ret = IB_MAD_RESULT_FAILURE; 1550 ret = IB_MAD_RESULT_SUCCESS;
1520 goto bail; 1551 goto bail;
1521 default: 1552 default:
1522 pmp->status |= IB_SMP_UNSUP_METHOD; 1553 pmp->status |= IB_SMP_UNSUP_METHOD;
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index b997ff88401b..80dc623cee40 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -387,8 +387,8 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
387 struct ib_wc wc; 387 struct ib_wc wc;
388 int ret = 0; 388 int ret = 0;
389 389
390 ipath_dbg("QP%d/%d in error state\n", 390 ipath_dbg("QP%d/%d in error state (%d)\n",
391 qp->ibqp.qp_num, qp->remote_qpn); 391 qp->ibqp.qp_num, qp->remote_qpn, err);
392 392
393 spin_lock(&dev->pending_lock); 393 spin_lock(&dev->pending_lock);
394 /* XXX What if its already removed by the timeout code? */ 394 /* XXX What if its already removed by the timeout code? */
@@ -855,8 +855,6 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
855 * See ipath_mmap() for details. 855 * See ipath_mmap() for details.
856 */ 856 */
857 if (udata && udata->outlen >= sizeof(__u64)) { 857 if (udata && udata->outlen >= sizeof(__u64)) {
858 int err;
859
860 if (!qp->r_rq.wq) { 858 if (!qp->r_rq.wq) {
861 __u64 offset = 0; 859 __u64 offset = 0;
862 860
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index 120a61b03bc4..459e46e2c016 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -647,6 +647,7 @@ static void send_rc_ack(struct ipath_qp *qp)
647 647
648queue_ack: 648queue_ack:
649 spin_lock_irqsave(&qp->s_lock, flags); 649 spin_lock_irqsave(&qp->s_lock, flags);
650 dev->n_rc_qacks++;
650 qp->s_flags |= IPATH_S_ACK_PENDING; 651 qp->s_flags |= IPATH_S_ACK_PENDING;
651 qp->s_nak_state = qp->r_nak_state; 652 qp->s_nak_state = qp->r_nak_state;
652 qp->s_ack_psn = qp->r_ack_psn; 653 qp->s_ack_psn = qp->r_ack_psn;
@@ -798,11 +799,13 @@ bail:
798 799
799static inline void update_last_psn(struct ipath_qp *qp, u32 psn) 800static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
800{ 801{
801 if (qp->s_wait_credit) { 802 if (qp->s_last_psn != psn) {
802 qp->s_wait_credit = 0; 803 qp->s_last_psn = psn;
803 tasklet_hi_schedule(&qp->s_task); 804 if (qp->s_wait_credit) {
805 qp->s_wait_credit = 0;
806 tasklet_hi_schedule(&qp->s_task);
807 }
804 } 808 }
805 qp->s_last_psn = psn;
806} 809}
807 810
808/** 811/**
@@ -1653,13 +1656,6 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1653 case OP(SEND_FIRST): 1656 case OP(SEND_FIRST):
1654 if (!ipath_get_rwqe(qp, 0)) { 1657 if (!ipath_get_rwqe(qp, 0)) {
1655 rnr_nak: 1658 rnr_nak:
1656 /*
1657 * A RNR NAK will ACK earlier sends and RDMA writes.
1658 * Don't queue the NAK if a RDMA read or atomic
1659 * is pending though.
1660 */
1661 if (qp->r_nak_state)
1662 goto done;
1663 qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; 1659 qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
1664 qp->r_ack_psn = qp->r_psn; 1660 qp->r_ack_psn = qp->r_psn;
1665 goto send_ack; 1661 goto send_ack;
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index 708eba3165d7..6d2a17f9c1da 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -82,8 +82,7 @@
82 82
83/* kr_rcvctrl bits */ 83/* kr_rcvctrl bits */
84#define INFINIPATH_R_PORTENABLE_SHIFT 0 84#define INFINIPATH_R_PORTENABLE_SHIFT 0
85#define INFINIPATH_R_INTRAVAIL_SHIFT 16 85#define INFINIPATH_R_QPMAP_ENABLE (1ULL << 38)
86#define INFINIPATH_R_TAILUPD 0x80000000
87 86
88/* kr_intstatus, kr_intclear, kr_intmask bits */ 87/* kr_intstatus, kr_intclear, kr_intmask bits */
89#define INFINIPATH_I_RCVURG_SHIFT 0 88#define INFINIPATH_I_RCVURG_SHIFT 0
@@ -272,20 +271,6 @@
272#define INFINIPATH_EXTC_LEDGBLOK_ON 0x00000002ULL 271#define INFINIPATH_EXTC_LEDGBLOK_ON 0x00000002ULL
273#define INFINIPATH_EXTC_LEDGBLERR_OFF 0x00000001ULL 272#define INFINIPATH_EXTC_LEDGBLERR_OFF 0x00000001ULL
274 273
275/* kr_mdio bits */
276#define INFINIPATH_MDIO_CLKDIV_MASK 0x7FULL
277#define INFINIPATH_MDIO_CLKDIV_SHIFT 32
278#define INFINIPATH_MDIO_COMMAND_MASK 0x7ULL
279#define INFINIPATH_MDIO_COMMAND_SHIFT 26
280#define INFINIPATH_MDIO_DEVADDR_MASK 0x1FULL
281#define INFINIPATH_MDIO_DEVADDR_SHIFT 21
282#define INFINIPATH_MDIO_REGADDR_MASK 0x1FULL
283#define INFINIPATH_MDIO_REGADDR_SHIFT 16
284#define INFINIPATH_MDIO_DATA_MASK 0xFFFFULL
285#define INFINIPATH_MDIO_DATA_SHIFT 0
286#define INFINIPATH_MDIO_CMDVALID 0x0000000040000000ULL
287#define INFINIPATH_MDIO_RDDATAVALID 0x0000000080000000ULL
288
289/* kr_partitionkey bits */ 274/* kr_partitionkey bits */
290#define INFINIPATH_PKEY_SIZE 16 275#define INFINIPATH_PKEY_SIZE 16
291#define INFINIPATH_PKEY_MASK 0xFFFF 276#define INFINIPATH_PKEY_MASK 0xFFFF
@@ -303,8 +288,6 @@
303 288
304/* kr_xgxsconfig bits */ 289/* kr_xgxsconfig bits */
305#define INFINIPATH_XGXS_RESET 0x7ULL 290#define INFINIPATH_XGXS_RESET 0x7ULL
306#define INFINIPATH_XGXS_MDIOADDR_MASK 0xfULL
307#define INFINIPATH_XGXS_MDIOADDR_SHIFT 4
308#define INFINIPATH_XGXS_RX_POL_SHIFT 19 291#define INFINIPATH_XGXS_RX_POL_SHIFT 19
309#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL 292#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL
310 293
@@ -470,6 +453,20 @@ struct ipath_cregs {
470 ipath_creg cr_unsupvlcnt; 453 ipath_creg cr_unsupvlcnt;
471 ipath_creg cr_wordrcvcnt; 454 ipath_creg cr_wordrcvcnt;
472 ipath_creg cr_wordsendcnt; 455 ipath_creg cr_wordsendcnt;
456 ipath_creg cr_vl15droppedpktcnt;
457 ipath_creg cr_rxotherlocalphyerrcnt;
458 ipath_creg cr_excessbufferovflcnt;
459 ipath_creg cr_locallinkintegrityerrcnt;
460 ipath_creg cr_rxvlerrcnt;
461 ipath_creg cr_rxdlidfltrcnt;
462 ipath_creg cr_psstat;
463 ipath_creg cr_psstart;
464 ipath_creg cr_psinterval;
465 ipath_creg cr_psrcvdatacount;
466 ipath_creg cr_psrcvpktscount;
467 ipath_creg cr_psxmitdatacount;
468 ipath_creg cr_psxmitpktscount;
469 ipath_creg cr_psxmitwaitcount;
473}; 470};
474 471
475#endif /* _IPATH_REGISTERS_H */ 472#endif /* _IPATH_REGISTERS_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index 54c61a972de2..a59bdbd0ed87 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -98,11 +98,15 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp)
98 while (qp->s_rnr_timeout >= nqp->s_rnr_timeout) { 98 while (qp->s_rnr_timeout >= nqp->s_rnr_timeout) {
99 qp->s_rnr_timeout -= nqp->s_rnr_timeout; 99 qp->s_rnr_timeout -= nqp->s_rnr_timeout;
100 l = l->next; 100 l = l->next;
101 if (l->next == &dev->rnrwait) 101 if (l->next == &dev->rnrwait) {
102 nqp = NULL;
102 break; 103 break;
104 }
103 nqp = list_entry(l->next, struct ipath_qp, 105 nqp = list_entry(l->next, struct ipath_qp,
104 timerwait); 106 timerwait);
105 } 107 }
108 if (nqp)
109 nqp->s_rnr_timeout -= qp->s_rnr_timeout;
106 list_add(&qp->timerwait, l); 110 list_add(&qp->timerwait, l);
107 } 111 }
108 spin_unlock_irqrestore(&dev->pending_lock, flags); 112 spin_unlock_irqrestore(&dev->pending_lock, flags);
@@ -479,9 +483,14 @@ done:
479 483
480static void want_buffer(struct ipath_devdata *dd) 484static void want_buffer(struct ipath_devdata *dd)
481{ 485{
482 set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl); 486 unsigned long flags;
487
488 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
489 dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL;
483 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 490 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
484 dd->ipath_sendctrl); 491 dd->ipath_sendctrl);
492 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
493 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
485} 494}
486 495
487/** 496/**
diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c
index 2fef36f4b675..f772102e4713 100644
--- a/drivers/infiniband/hw/ipath/ipath_srq.c
+++ b/drivers/infiniband/hw/ipath/ipath_srq.c
@@ -94,8 +94,8 @@ bail:
94/** 94/**
95 * ipath_create_srq - create a shared receive queue 95 * ipath_create_srq - create a shared receive queue
96 * @ibpd: the protection domain of the SRQ to create 96 * @ibpd: the protection domain of the SRQ to create
97 * @attr: the attributes of the SRQ 97 * @srq_init_attr: the attributes of the SRQ
98 * @udata: not used by the InfiniPath verbs driver 98 * @udata: data from libipathverbs when creating a user SRQ
99 */ 99 */
100struct ib_srq *ipath_create_srq(struct ib_pd *ibpd, 100struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
101 struct ib_srq_init_attr *srq_init_attr, 101 struct ib_srq_init_attr *srq_init_attr,
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
index f0271415cd5b..d2725cd11bdc 100644
--- a/drivers/infiniband/hw/ipath/ipath_stats.c
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -133,15 +133,16 @@ bail:
133static void ipath_qcheck(struct ipath_devdata *dd) 133static void ipath_qcheck(struct ipath_devdata *dd)
134{ 134{
135 static u64 last_tot_hdrqfull; 135 static u64 last_tot_hdrqfull;
136 struct ipath_portdata *pd = dd->ipath_pd[0];
136 size_t blen = 0; 137 size_t blen = 0;
137 char buf[128]; 138 char buf[128];
138 139
139 *buf = 0; 140 *buf = 0;
140 if (dd->ipath_pd[0]->port_hdrqfull != dd->ipath_p0_hdrqfull) { 141 if (pd->port_hdrqfull != dd->ipath_p0_hdrqfull) {
141 blen = snprintf(buf, sizeof buf, "port 0 hdrqfull %u", 142 blen = snprintf(buf, sizeof buf, "port 0 hdrqfull %u",
142 dd->ipath_pd[0]->port_hdrqfull - 143 pd->port_hdrqfull -
143 dd->ipath_p0_hdrqfull); 144 dd->ipath_p0_hdrqfull);
144 dd->ipath_p0_hdrqfull = dd->ipath_pd[0]->port_hdrqfull; 145 dd->ipath_p0_hdrqfull = pd->port_hdrqfull;
145 } 146 }
146 if (ipath_stats.sps_etidfull != dd->ipath_last_tidfull) { 147 if (ipath_stats.sps_etidfull != dd->ipath_last_tidfull) {
147 blen += snprintf(buf + blen, sizeof buf - blen, 148 blen += snprintf(buf + blen, sizeof buf - blen,
@@ -173,7 +174,7 @@ static void ipath_qcheck(struct ipath_devdata *dd)
173 if (blen) 174 if (blen)
174 ipath_dbg("%s\n", buf); 175 ipath_dbg("%s\n", buf);
175 176
176 if (dd->ipath_port0head != (u32) 177 if (pd->port_head != (u32)
177 le64_to_cpu(*dd->ipath_hdrqtailptr)) { 178 le64_to_cpu(*dd->ipath_hdrqtailptr)) {
178 if (dd->ipath_lastport0rcv_cnt == 179 if (dd->ipath_lastport0rcv_cnt ==
179 ipath_stats.sps_port0pkts) { 180 ipath_stats.sps_port0pkts) {
@@ -181,7 +182,7 @@ static void ipath_qcheck(struct ipath_devdata *dd)
181 "port0 hd=%llx tl=%x; port0pkts %llx\n", 182 "port0 hd=%llx tl=%x; port0pkts %llx\n",
182 (unsigned long long) 183 (unsigned long long)
183 le64_to_cpu(*dd->ipath_hdrqtailptr), 184 le64_to_cpu(*dd->ipath_hdrqtailptr),
184 dd->ipath_port0head, 185 pd->port_head,
185 (unsigned long long) 186 (unsigned long long)
186 ipath_stats.sps_port0pkts); 187 ipath_stats.sps_port0pkts);
187 } 188 }
@@ -237,7 +238,7 @@ static void ipath_chk_errormask(struct ipath_devdata *dd)
237void ipath_get_faststats(unsigned long opaque) 238void ipath_get_faststats(unsigned long opaque)
238{ 239{
239 struct ipath_devdata *dd = (struct ipath_devdata *) opaque; 240 struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
240 u32 val; 241 int i;
241 static unsigned cnt; 242 static unsigned cnt;
242 unsigned long flags; 243 unsigned long flags;
243 u64 traffic_wds; 244 u64 traffic_wds;
@@ -321,12 +322,11 @@ void ipath_get_faststats(unsigned long opaque)
321 322
322 /* limit qfull messages to ~one per minute per port */ 323 /* limit qfull messages to ~one per minute per port */
323 if ((++cnt & 0x10)) { 324 if ((++cnt & 0x10)) {
324 for (val = dd->ipath_cfgports - 1; ((int)val) >= 0; 325 for (i = (int) dd->ipath_cfgports; --i >= 0; ) {
325 val--) { 326 struct ipath_portdata *pd = dd->ipath_pd[i];
326 if (dd->ipath_lastegrheads[val] != -1) 327
327 dd->ipath_lastegrheads[val] = -1; 328 if (pd && pd->port_lastrcvhdrqtail != -1)
328 if (dd->ipath_lastrcvhdrqtails[val] != -1) 329 pd->port_lastrcvhdrqtail = -1;
329 dd->ipath_lastrcvhdrqtails[val] = -1;
330 } 330 }
331 } 331 }
332 332
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
index aa27ca9f03b1..56dfc8a2344c 100644
--- a/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -363,6 +363,60 @@ static ssize_t show_unit(struct device *dev,
363 return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_unit); 363 return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_unit);
364} 364}
365 365
366static ssize_t show_jint_max_packets(struct device *dev,
367 struct device_attribute *attr,
368 char *buf)
369{
370 struct ipath_devdata *dd = dev_get_drvdata(dev);
371
372 return scnprintf(buf, PAGE_SIZE, "%hu\n", dd->ipath_jint_max_packets);
373}
374
375static ssize_t store_jint_max_packets(struct device *dev,
376 struct device_attribute *attr,
377 const char *buf,
378 size_t count)
379{
380 struct ipath_devdata *dd = dev_get_drvdata(dev);
381 u16 v = 0;
382 int ret;
383
384 ret = ipath_parse_ushort(buf, &v);
385 if (ret < 0)
386 ipath_dev_err(dd, "invalid jint_max_packets.\n");
387 else
388 dd->ipath_f_config_jint(dd, dd->ipath_jint_idle_ticks, v);
389
390 return ret;
391}
392
393static ssize_t show_jint_idle_ticks(struct device *dev,
394 struct device_attribute *attr,
395 char *buf)
396{
397 struct ipath_devdata *dd = dev_get_drvdata(dev);
398
399 return scnprintf(buf, PAGE_SIZE, "%hu\n", dd->ipath_jint_idle_ticks);
400}
401
402static ssize_t store_jint_idle_ticks(struct device *dev,
403 struct device_attribute *attr,
404 const char *buf,
405 size_t count)
406{
407 struct ipath_devdata *dd = dev_get_drvdata(dev);
408 u16 v = 0;
409 int ret;
410
411 ret = ipath_parse_ushort(buf, &v);
412 if (ret < 0)
413 ipath_dev_err(dd, "invalid jint_idle_ticks.\n");
414 else
415 dd->ipath_f_config_jint(dd, v, dd->ipath_jint_max_packets);
416
417 return ret;
418}
419
366#define DEVICE_COUNTER(name, attr) \ 420#define DEVICE_COUNTER(name, attr) \
367 static ssize_t show_counter_##name(struct device *dev, \ 421 static ssize_t show_counter_##name(struct device *dev, \
368 struct device_attribute *attr, \ 422 struct device_attribute *attr, \
@@ -670,6 +724,257 @@ static ssize_t show_logged_errs(struct device *dev,
670 return count; 724 return count;
671} 725}
672 726
727/*
728 * New sysfs entries to control various IB config. These all turn into
729 * accesses via ipath_f_get/set_ib_cfg.
730 *
731 * Get/Set heartbeat enable. Or of 1=enabled, 2=auto
732 */
733static ssize_t show_hrtbt_enb(struct device *dev,
734 struct device_attribute *attr,
735 char *buf)
736{
737 struct ipath_devdata *dd = dev_get_drvdata(dev);
738 int ret;
739
740 ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_HRTBT);
741 if (ret >= 0)
742 ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
743 return ret;
744}
745
746static ssize_t store_hrtbt_enb(struct device *dev,
747 struct device_attribute *attr,
748 const char *buf,
749 size_t count)
750{
751 struct ipath_devdata *dd = dev_get_drvdata(dev);
752 int ret, r;
753 u16 val;
754
755 ret = ipath_parse_ushort(buf, &val);
756 if (ret >= 0 && val > 3)
757 ret = -EINVAL;
758 if (ret < 0) {
759 ipath_dev_err(dd, "attempt to set invalid Heartbeat enable\n");
760 goto bail;
761 }
762
763 /*
764 * Set the "intentional" heartbeat enable per either of
765 * "Enable" and "Auto", as these are normally set together.
766 * This bit is consulted when leaving loopback mode,
767 * because entering loopback mode overrides it and automatically
768 * disables heartbeat.
769 */
770 r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, val);
771 if (r < 0)
772 ret = r;
773 else if (val == IPATH_IB_HRTBT_OFF)
774 dd->ipath_flags |= IPATH_NO_HRTBT;
775 else
776 dd->ipath_flags &= ~IPATH_NO_HRTBT;
777
778bail:
779 return ret;
780}
781
782/*
783 * Get/Set Link-widths enabled. Or of 1=1x, 2=4x (this is human/IB centric,
784 * _not_ the particular encoding of any given chip)
785 */
786static ssize_t show_lwid_enb(struct device *dev,
787 struct device_attribute *attr,
788 char *buf)
789{
790 struct ipath_devdata *dd = dev_get_drvdata(dev);
791 int ret;
792
793 ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB);
794 if (ret >= 0)
795 ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
796 return ret;
797}
798
799static ssize_t store_lwid_enb(struct device *dev,
800 struct device_attribute *attr,
801 const char *buf,
802 size_t count)
803{
804 struct ipath_devdata *dd = dev_get_drvdata(dev);
805 int ret, r;
806 u16 val;
807
808 ret = ipath_parse_ushort(buf, &val);
809 if (ret >= 0 && (val == 0 || val > 3))
810 ret = -EINVAL;
811 if (ret < 0) {
812 ipath_dev_err(dd,
813 "attempt to set invalid Link Width (enable)\n");
814 goto bail;
815 }
816
817 r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB, val);
818 if (r < 0)
819 ret = r;
820
821bail:
822 return ret;
823}
824
825/* Get current link width */
826static ssize_t show_lwid(struct device *dev,
827 struct device_attribute *attr,
828 char *buf)
829
830{
831 struct ipath_devdata *dd = dev_get_drvdata(dev);
832 int ret;
833
834 ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LWID);
835 if (ret >= 0)
836 ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
837 return ret;
838}
839
840/*
841 * Get/Set Link-speeds enabled. Or of 1=SDR 2=DDR.
842 */
843static ssize_t show_spd_enb(struct device *dev,
844 struct device_attribute *attr,
845 char *buf)
846{
847 struct ipath_devdata *dd = dev_get_drvdata(dev);
848 int ret;
849
850 ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB);
851 if (ret >= 0)
852 ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
853 return ret;
854}
855
856static ssize_t store_spd_enb(struct device *dev,
857 struct device_attribute *attr,
858 const char *buf,
859 size_t count)
860{
861 struct ipath_devdata *dd = dev_get_drvdata(dev);
862 int ret, r;
863 u16 val;
864
865 ret = ipath_parse_ushort(buf, &val);
866 if (ret >= 0 && (val == 0 || val > (IPATH_IB_SDR | IPATH_IB_DDR)))
867 ret = -EINVAL;
868 if (ret < 0) {
869 ipath_dev_err(dd,
870 "attempt to set invalid Link Speed (enable)\n");
871 goto bail;
872 }
873
874 r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB, val);
875 if (r < 0)
876 ret = r;
877
878bail:
879 return ret;
880}
881
882/* Get current link speed */
883static ssize_t show_spd(struct device *dev,
884 struct device_attribute *attr,
885 char *buf)
886{
887 struct ipath_devdata *dd = dev_get_drvdata(dev);
888 int ret;
889
890 ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_SPD);
891 if (ret >= 0)
892 ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
893 return ret;
894}
895
896/*
897 * Get/Set RX polarity-invert enable. 0=no, 1=yes.
898 */
899static ssize_t show_rx_polinv_enb(struct device *dev,
900 struct device_attribute *attr,
901 char *buf)
902{
903 struct ipath_devdata *dd = dev_get_drvdata(dev);
904 int ret;
905
906 ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB);
907 if (ret >= 0)
908 ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
909 return ret;
910}
911
912static ssize_t store_rx_polinv_enb(struct device *dev,
913 struct device_attribute *attr,
914 const char *buf,
915 size_t count)
916{
917 struct ipath_devdata *dd = dev_get_drvdata(dev);
918 int ret, r;
919 u16 val;
920
921 ret = ipath_parse_ushort(buf, &val);
922 if (ret < 0 || val > 1)
923 goto invalid;
924
925 r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB, val);
926 if (r < 0) {
927 ret = r;
928 goto bail;
929 }
930
931 goto bail;
932invalid:
933 ipath_dev_err(dd, "attempt to set invalid Rx Polarity (enable)\n");
934bail:
935 return ret;
936}
937/*
938 * Get/Set RX lane-reversal enable. 0=no, 1=yes.
939 */
940static ssize_t show_lanerev_enb(struct device *dev,
941 struct device_attribute *attr,
942 char *buf)
943{
944 struct ipath_devdata *dd = dev_get_drvdata(dev);
945 int ret;
946
947 ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LREV_ENB);
948 if (ret >= 0)
949 ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
950 return ret;
951}
952
953static ssize_t store_lanerev_enb(struct device *dev,
954 struct device_attribute *attr,
955 const char *buf,
956 size_t count)
957{
958 struct ipath_devdata *dd = dev_get_drvdata(dev);
959 int ret, r;
960 u16 val;
961
962 ret = ipath_parse_ushort(buf, &val);
963 if (ret >= 0 && val > 1) {
964 ret = -EINVAL;
965 ipath_dev_err(dd,
966 "attempt to set invalid Lane reversal (enable)\n");
967 goto bail;
968 }
969
970 r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LREV_ENB, val);
971 if (r < 0)
972 ret = r;
973
974bail:
975 return ret;
976}
977
673static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL); 978static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL);
674static DRIVER_ATTR(version, S_IRUGO, show_version, NULL); 979static DRIVER_ATTR(version, S_IRUGO, show_version, NULL);
675 980
@@ -706,6 +1011,10 @@ static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL);
706static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv); 1011static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv);
707static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override); 1012static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override);
708static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL); 1013static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL);
1014static DEVICE_ATTR(jint_max_packets, S_IWUSR | S_IRUGO,
1015 show_jint_max_packets, store_jint_max_packets);
1016static DEVICE_ATTR(jint_idle_ticks, S_IWUSR | S_IRUGO,
1017 show_jint_idle_ticks, store_jint_idle_ticks);
709 1018
710static struct attribute *dev_attributes[] = { 1019static struct attribute *dev_attributes[] = {
711 &dev_attr_guid.attr, 1020 &dev_attr_guid.attr,
@@ -732,6 +1041,34 @@ static struct attribute_group dev_attr_group = {
732 .attrs = dev_attributes 1041 .attrs = dev_attributes
733}; 1042};
734 1043
1044static DEVICE_ATTR(hrtbt_enable, S_IWUSR | S_IRUGO, show_hrtbt_enb,
1045 store_hrtbt_enb);
1046static DEVICE_ATTR(link_width_enable, S_IWUSR | S_IRUGO, show_lwid_enb,
1047 store_lwid_enb);
1048static DEVICE_ATTR(link_width, S_IRUGO, show_lwid, NULL);
1049static DEVICE_ATTR(link_speed_enable, S_IWUSR | S_IRUGO, show_spd_enb,
1050 store_spd_enb);
1051static DEVICE_ATTR(link_speed, S_IRUGO, show_spd, NULL);
1052static DEVICE_ATTR(rx_pol_inv_enable, S_IWUSR | S_IRUGO, show_rx_polinv_enb,
1053 store_rx_polinv_enb);
1054static DEVICE_ATTR(rx_lane_rev_enable, S_IWUSR | S_IRUGO, show_lanerev_enb,
1055 store_lanerev_enb);
1056
1057static struct attribute *dev_ibcfg_attributes[] = {
1058 &dev_attr_hrtbt_enable.attr,
1059 &dev_attr_link_width_enable.attr,
1060 &dev_attr_link_width.attr,
1061 &dev_attr_link_speed_enable.attr,
1062 &dev_attr_link_speed.attr,
1063 &dev_attr_rx_pol_inv_enable.attr,
1064 &dev_attr_rx_lane_rev_enable.attr,
1065 NULL
1066};
1067
1068static struct attribute_group dev_ibcfg_attr_group = {
1069 .attrs = dev_ibcfg_attributes
1070};
1071
735/** 1072/**
736 * ipath_expose_reset - create a device reset file 1073 * ipath_expose_reset - create a device reset file
737 * @dev: the device structure 1074 * @dev: the device structure
@@ -770,6 +1107,26 @@ int ipath_device_create_group(struct device *dev, struct ipath_devdata *dd)
770 if (ret) 1107 if (ret)
771 goto bail_attrs; 1108 goto bail_attrs;
772 1109
1110 if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) {
1111 ret = device_create_file(dev, &dev_attr_jint_idle_ticks);
1112 if (ret)
1113 goto bail_counter;
1114 ret = device_create_file(dev, &dev_attr_jint_max_packets);
1115 if (ret)
1116 goto bail_idle;
1117
1118 ret = sysfs_create_group(&dev->kobj, &dev_ibcfg_attr_group);
1119 if (ret)
1120 goto bail_max;
1121 }
1122
1123 return 0;
1124
1125bail_max:
1126 device_remove_file(dev, &dev_attr_jint_max_packets);
1127bail_idle:
1128 device_remove_file(dev, &dev_attr_jint_idle_ticks);
1129bail_counter:
773 sysfs_remove_group(&dev->kobj, &dev_counter_attr_group); 1130 sysfs_remove_group(&dev->kobj, &dev_counter_attr_group);
774bail_attrs: 1131bail_attrs:
775 sysfs_remove_group(&dev->kobj, &dev_attr_group); 1132 sysfs_remove_group(&dev->kobj, &dev_attr_group);
@@ -780,6 +1137,13 @@ bail:
780void ipath_device_remove_group(struct device *dev, struct ipath_devdata *dd) 1137void ipath_device_remove_group(struct device *dev, struct ipath_devdata *dd)
781{ 1138{
782 sysfs_remove_group(&dev->kobj, &dev_counter_attr_group); 1139 sysfs_remove_group(&dev->kobj, &dev_counter_attr_group);
1140
1141 if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) {
1142 sysfs_remove_group(&dev->kobj, &dev_ibcfg_attr_group);
1143 device_remove_file(dev, &dev_attr_jint_idle_ticks);
1144 device_remove_file(dev, &dev_attr_jint_max_packets);
1145 }
1146
783 sysfs_remove_group(&dev->kobj, &dev_attr_group); 1147 sysfs_remove_group(&dev->kobj, &dev_attr_group);
784 1148
785 device_remove_file(dev, &dev_attr_reset); 1149 device_remove_file(dev, &dev_attr_reset);
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index b3df6f3c705e..de67eed08ed0 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -301,8 +301,6 @@ int ipath_make_ud_req(struct ipath_qp *qp)
301 301
302 /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */ 302 /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */
303 qp->s_hdrwords = 7; 303 qp->s_hdrwords = 7;
304 if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM)
305 qp->s_hdrwords++;
306 qp->s_cur_size = wqe->length; 304 qp->s_cur_size = wqe->length;
307 qp->s_cur_sge = &qp->s_sge; 305 qp->s_cur_sge = &qp->s_sge;
308 qp->s_wqe = wqe; 306 qp->s_wqe = wqe;
@@ -327,6 +325,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)
327 ohdr = &qp->s_hdr.u.oth; 325 ohdr = &qp->s_hdr.u.oth;
328 } 326 }
329 if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { 327 if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
328 qp->s_hdrwords++;
330 ohdr->u.ud.imm_data = wqe->wr.imm_data; 329 ohdr->u.ud.imm_data = wqe->wr.imm_data;
331 bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24; 330 bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
332 } else 331 } else
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index c4c998446c7b..32d8f882e56c 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -943,7 +943,7 @@ bail:
943 * ipath_verbs_send - send a packet 943 * ipath_verbs_send - send a packet
944 * @qp: the QP to send on 944 * @qp: the QP to send on
945 * @hdr: the packet header 945 * @hdr: the packet header
946 * @hdrwords: the number of words in the header 946 * @hdrwords: the number of 32-bit words in the header
947 * @ss: the SGE to send 947 * @ss: the SGE to send
948 * @len: the length of the packet in bytes 948 * @len: the length of the packet in bytes
949 */ 949 */
@@ -955,7 +955,10 @@ int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
955 int ret; 955 int ret;
956 u32 dwords = (len + 3) >> 2; 956 u32 dwords = (len + 3) >> 2;
957 957
958 /* +1 is for the qword padding of pbc */ 958 /*
959 * Calculate the send buffer trigger address.
960 * The +1 counts for the pbc control dword following the pbc length.
961 */
959 plen = hdrwords + dwords + 1; 962 plen = hdrwords + dwords + 1;
960 963
961 /* Drop non-VL15 packets if we are not in the active state */ 964 /* Drop non-VL15 packets if we are not in the active state */
@@ -1130,20 +1133,34 @@ static int ipath_query_device(struct ib_device *ibdev,
1130 return 0; 1133 return 0;
1131} 1134}
1132 1135
1133const u8 ipath_cvt_physportstate[16] = { 1136const u8 ipath_cvt_physportstate[32] = {
1134 [INFINIPATH_IBCS_LT_STATE_DISABLED] = 3, 1137 [INFINIPATH_IBCS_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED,
1135 [INFINIPATH_IBCS_LT_STATE_LINKUP] = 5, 1138 [INFINIPATH_IBCS_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP,
1136 [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = 2, 1139 [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL,
1137 [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = 2, 1140 [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL,
1138 [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = 1, 1141 [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP,
1139 [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = 1, 1142 [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP,
1140 [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] = 4, 1143 [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] =
1141 [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] = 4, 1144 IB_PHYSPORTSTATE_CFG_TRAIN,
1142 [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] = 4, 1145 [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] =
1143 [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = 4, 1146 IB_PHYSPORTSTATE_CFG_TRAIN,
1144 [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] = 6, 1147 [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] =
1145 [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] = 6, 1148 IB_PHYSPORTSTATE_CFG_TRAIN,
1146 [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] = 6, 1149 [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN,
1150 [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] =
1151 IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
1152 [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] =
1153 IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
1154 [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] =
1155 IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
1156 [0x10] = IB_PHYSPORTSTATE_CFG_TRAIN,
1157 [0x11] = IB_PHYSPORTSTATE_CFG_TRAIN,
1158 [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN,
1159 [0x13] = IB_PHYSPORTSTATE_CFG_TRAIN,
1160 [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN,
1161 [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN,
1162 [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN,
1163 [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN
1147}; 1164};
1148 1165
1149u32 ipath_get_cr_errpkey(struct ipath_devdata *dd) 1166u32 ipath_get_cr_errpkey(struct ipath_devdata *dd)
@@ -1168,8 +1185,9 @@ static int ipath_query_port(struct ib_device *ibdev,
1168 ibcstat = dd->ipath_lastibcstat; 1185 ibcstat = dd->ipath_lastibcstat;
1169 props->state = ((ibcstat >> 4) & 0x3) + 1; 1186 props->state = ((ibcstat >> 4) & 0x3) + 1;
1170 /* See phys_state_show() */ 1187 /* See phys_state_show() */
1171 props->phys_state = ipath_cvt_physportstate[ 1188 props->phys_state = /* MEA: assumes shift == 0 */
1172 dd->ipath_lastibcstat & 0xf]; 1189 ipath_cvt_physportstate[dd->ipath_lastibcstat &
1190 dd->ibcs_lts_mask];
1173 props->port_cap_flags = dev->port_cap_flags; 1191 props->port_cap_flags = dev->port_cap_flags;
1174 props->gid_tbl_len = 1; 1192 props->gid_tbl_len = 1;
1175 props->max_msg_sz = 0x80000000; 1193 props->max_msg_sz = 0x80000000;
@@ -1641,6 +1659,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
1641 cntrs.local_link_integrity_errors; 1659 cntrs.local_link_integrity_errors;
1642 idev->z_excessive_buffer_overrun_errors = 1660 idev->z_excessive_buffer_overrun_errors =
1643 cntrs.excessive_buffer_overrun_errors; 1661 cntrs.excessive_buffer_overrun_errors;
1662 idev->z_vl15_dropped = cntrs.vl15_dropped;
1644 1663
1645 /* 1664 /*
1646 * The system image GUID is supposed to be the same for all 1665 * The system image GUID is supposed to be the same for all
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 6ccb54f104a3..3d59736b49b2 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -554,6 +554,7 @@ struct ipath_ibdev {
554 u32 z_pkey_violations; /* starting count for PMA */ 554 u32 z_pkey_violations; /* starting count for PMA */
555 u32 z_local_link_integrity_errors; /* starting count for PMA */ 555 u32 z_local_link_integrity_errors; /* starting count for PMA */
556 u32 z_excessive_buffer_overrun_errors; /* starting count for PMA */ 556 u32 z_excessive_buffer_overrun_errors; /* starting count for PMA */
557 u32 z_vl15_dropped; /* starting count for PMA */
557 u32 n_rc_resends; 558 u32 n_rc_resends;
558 u32 n_rc_acks; 559 u32 n_rc_acks;
559 u32 n_rc_qacks; 560 u32 n_rc_qacks;
@@ -598,6 +599,7 @@ struct ipath_verbs_counters {
598 u64 port_rcv_packets; 599 u64 port_rcv_packets;
599 u32 local_link_integrity_errors; 600 u32 local_link_integrity_errors;
600 u32 excessive_buffer_overrun_errors; 601 u32 excessive_buffer_overrun_errors;
602 u32 vl15_dropped;
601}; 603};
602 604
603static inline struct ipath_mr *to_imr(struct ib_mr *ibmr) 605static inline struct ipath_mr *to_imr(struct ib_mr *ibmr)
@@ -830,7 +832,17 @@ unsigned ipath_get_pkey(struct ipath_devdata *, unsigned);
830 832
831extern const enum ib_wc_opcode ib_ipath_wc_opcode[]; 833extern const enum ib_wc_opcode ib_ipath_wc_opcode[];
832 834
835/*
836 * Below converts HCA-specific LinkTrainingState to IB PhysPortState
837 * values.
838 */
833extern const u8 ipath_cvt_physportstate[]; 839extern const u8 ipath_cvt_physportstate[];
840#define IB_PHYSPORTSTATE_SLEEP 1
841#define IB_PHYSPORTSTATE_POLL 2
842#define IB_PHYSPORTSTATE_DISABLED 3
843#define IB_PHYSPORTSTATE_CFG_TRAIN 4
844#define IB_PHYSPORTSTATE_LINKUP 5
845#define IB_PHYSPORTSTATE_LINK_ERR_RECOVER 6
834 846
835extern const int ib_ipath_state_ops[]; 847extern const int ib_ipath_state_ops[];
836 848
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 9d32c49cc651..7950aa6e8184 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -313,6 +313,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
313 struct mlx4_ib_srq *srq; 313 struct mlx4_ib_srq *srq;
314 int is_send; 314 int is_send;
315 int is_error; 315 int is_error;
316 u32 g_mlpath_rqpn;
316 u16 wqe_ctr; 317 u16 wqe_ctr;
317 318
318 cqe = next_cqe_sw(cq); 319 cqe = next_cqe_sw(cq);
@@ -426,10 +427,10 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
426 427
427 wc->slid = be16_to_cpu(cqe->rlid); 428 wc->slid = be16_to_cpu(cqe->rlid);
428 wc->sl = cqe->sl >> 4; 429 wc->sl = cqe->sl >> 4;
429 wc->src_qp = be32_to_cpu(cqe->g_mlpath_rqpn) & 0xffffff; 430 g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn);
430 wc->dlid_path_bits = (be32_to_cpu(cqe->g_mlpath_rqpn) >> 24) & 0x7f; 431 wc->src_qp = g_mlpath_rqpn & 0xffffff;
431 wc->wc_flags |= be32_to_cpu(cqe->g_mlpath_rqpn) & 0x80000000 ? 432 wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
432 IB_WC_GRH : 0; 433 wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0;
433 wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f; 434 wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f;
434 } 435 }
435 436
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 15aa32eb78b6..7bbdd1f4e6c7 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -60,13 +60,12 @@
60enum { 60enum {
61 MTHCA_FLAG_DDR_HIDDEN = 1 << 1, 61 MTHCA_FLAG_DDR_HIDDEN = 1 << 1,
62 MTHCA_FLAG_SRQ = 1 << 2, 62 MTHCA_FLAG_SRQ = 1 << 2,
63 MTHCA_FLAG_MSI = 1 << 3, 63 MTHCA_FLAG_MSI_X = 1 << 3,
64 MTHCA_FLAG_MSI_X = 1 << 4, 64 MTHCA_FLAG_NO_LAM = 1 << 4,
65 MTHCA_FLAG_NO_LAM = 1 << 5, 65 MTHCA_FLAG_FMR = 1 << 5,
66 MTHCA_FLAG_FMR = 1 << 6, 66 MTHCA_FLAG_MEMFREE = 1 << 6,
67 MTHCA_FLAG_MEMFREE = 1 << 7, 67 MTHCA_FLAG_PCIE = 1 << 7,
68 MTHCA_FLAG_PCIE = 1 << 8, 68 MTHCA_FLAG_SINAI_OPT = 1 << 8
69 MTHCA_FLAG_SINAI_OPT = 1 << 9
70}; 69};
71 70
72enum { 71enum {
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index b29de51b7f35..b60eb5df96e8 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -827,8 +827,7 @@ int mthca_init_eq_table(struct mthca_dev *dev)
827 if (err) 827 if (err)
828 goto err_out_free; 828 goto err_out_free;
829 829
830 if (dev->mthca_flags & MTHCA_FLAG_MSI || 830 if (dev->mthca_flags & MTHCA_FLAG_MSI_X) {
831 dev->mthca_flags & MTHCA_FLAG_MSI_X) {
832 dev->eq_table.clr_mask = 0; 831 dev->eq_table.clr_mask = 0;
833 } else { 832 } else {
834 dev->eq_table.clr_mask = 833 dev->eq_table.clr_mask =
@@ -839,8 +838,7 @@ int mthca_init_eq_table(struct mthca_dev *dev)
839 838
840 dev->eq_table.arm_mask = 0; 839 dev->eq_table.arm_mask = 0;
841 840
842 intr = (dev->mthca_flags & MTHCA_FLAG_MSI) ? 841 intr = dev->eq_table.inta_pin;
843 128 : dev->eq_table.inta_pin;
844 842
845 err = mthca_create_eq(dev, dev->limits.num_cqs + MTHCA_NUM_SPARE_EQE, 843 err = mthca_create_eq(dev, dev->limits.num_cqs + MTHCA_NUM_SPARE_EQE,
846 (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 128 : intr, 844 (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 128 : intr,
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 60de6f93869e..5cf8250d4e16 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -65,14 +65,9 @@ static int msi_x = 1;
65module_param(msi_x, int, 0444); 65module_param(msi_x, int, 0444);
66MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero"); 66MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
67 67
68static int msi = 0;
69module_param(msi, int, 0444);
70MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero (deprecated, use MSI-X instead)");
71
72#else /* CONFIG_PCI_MSI */ 68#else /* CONFIG_PCI_MSI */
73 69
74#define msi_x (0) 70#define msi_x (0)
75#define msi (0)
76 71
77#endif /* CONFIG_PCI_MSI */ 72#endif /* CONFIG_PCI_MSI */
78 73
@@ -816,13 +811,11 @@ static int mthca_setup_hca(struct mthca_dev *dev)
816 811
817 err = mthca_NOP(dev, &status); 812 err = mthca_NOP(dev, &status);
818 if (err || status) { 813 if (err || status) {
819 if (dev->mthca_flags & (MTHCA_FLAG_MSI | MTHCA_FLAG_MSI_X)) { 814 if (dev->mthca_flags & MTHCA_FLAG_MSI_X) {
820 mthca_warn(dev, "NOP command failed to generate interrupt " 815 mthca_warn(dev, "NOP command failed to generate interrupt "
821 "(IRQ %d).\n", 816 "(IRQ %d).\n",
822 dev->mthca_flags & MTHCA_FLAG_MSI_X ? 817 dev->eq_table.eq[MTHCA_EQ_CMD].msi_x_vector);
823 dev->eq_table.eq[MTHCA_EQ_CMD].msi_x_vector : 818 mthca_warn(dev, "Trying again with MSI-X disabled.\n");
824 dev->pdev->irq);
825 mthca_warn(dev, "Trying again with MSI/MSI-X disabled.\n");
826 } else { 819 } else {
827 mthca_err(dev, "NOP command failed to generate interrupt " 820 mthca_err(dev, "NOP command failed to generate interrupt "
828 "(IRQ %d), aborting.\n", 821 "(IRQ %d), aborting.\n",
@@ -1005,7 +998,7 @@ static struct {
1005 .flags = 0 }, 998 .flags = 0 },
1006 [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 8, 200), 999 [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 8, 200),
1007 .flags = MTHCA_FLAG_PCIE }, 1000 .flags = MTHCA_FLAG_PCIE },
1008 [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 2, 0), 1001 [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 3, 0),
1009 .flags = MTHCA_FLAG_MEMFREE | 1002 .flags = MTHCA_FLAG_MEMFREE |
1010 MTHCA_FLAG_PCIE }, 1003 MTHCA_FLAG_PCIE },
1011 [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 2, 0), 1004 [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 2, 0),
@@ -1128,29 +1121,12 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
1128 1121
1129 if (msi_x && !mthca_enable_msi_x(mdev)) 1122 if (msi_x && !mthca_enable_msi_x(mdev))
1130 mdev->mthca_flags |= MTHCA_FLAG_MSI_X; 1123 mdev->mthca_flags |= MTHCA_FLAG_MSI_X;
1131 else if (msi) {
1132 static int warned;
1133
1134 if (!warned) {
1135 printk(KERN_WARNING PFX "WARNING: MSI support will be "
1136 "removed from the ib_mthca driver in January 2008.\n");
1137 printk(KERN_WARNING " If you are using MSI and cannot "
1138 "switch to MSI-X, please tell "
1139 "<general@lists.openfabrics.org>.\n");
1140 ++warned;
1141 }
1142
1143 if (!pci_enable_msi(pdev))
1144 mdev->mthca_flags |= MTHCA_FLAG_MSI;
1145 }
1146 1124
1147 err = mthca_setup_hca(mdev); 1125 err = mthca_setup_hca(mdev);
1148 if (err == -EBUSY && (mdev->mthca_flags & (MTHCA_FLAG_MSI | MTHCA_FLAG_MSI_X))) { 1126 if (err == -EBUSY && (mdev->mthca_flags & MTHCA_FLAG_MSI_X)) {
1149 if (mdev->mthca_flags & MTHCA_FLAG_MSI_X) 1127 if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
1150 pci_disable_msix(pdev); 1128 pci_disable_msix(pdev);
1151 if (mdev->mthca_flags & MTHCA_FLAG_MSI) 1129 mdev->mthca_flags &= ~MTHCA_FLAG_MSI_X;
1152 pci_disable_msi(pdev);
1153 mdev->mthca_flags &= ~(MTHCA_FLAG_MSI_X | MTHCA_FLAG_MSI);
1154 1130
1155 err = mthca_setup_hca(mdev); 1131 err = mthca_setup_hca(mdev);
1156 } 1132 }
@@ -1192,8 +1168,6 @@ err_cleanup:
1192err_close: 1168err_close:
1193 if (mdev->mthca_flags & MTHCA_FLAG_MSI_X) 1169 if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
1194 pci_disable_msix(pdev); 1170 pci_disable_msix(pdev);
1195 if (mdev->mthca_flags & MTHCA_FLAG_MSI)
1196 pci_disable_msi(pdev);
1197 1171
1198 mthca_close_hca(mdev); 1172 mthca_close_hca(mdev);
1199 1173
@@ -1246,8 +1220,6 @@ static void __mthca_remove_one(struct pci_dev *pdev)
1246 1220
1247 if (mdev->mthca_flags & MTHCA_FLAG_MSI_X) 1221 if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
1248 pci_disable_msix(pdev); 1222 pci_disable_msix(pdev);
1249 if (mdev->mthca_flags & MTHCA_FLAG_MSI)
1250 pci_disable_msi(pdev);
1251 1223
1252 ib_dealloc_device(&mdev->ib_dev); 1224 ib_dealloc_device(&mdev->ib_dev);
1253 mthca_release_regions(pdev, mdev->mthca_flags & 1225 mthca_release_regions(pdev, mdev->mthca_flags &
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index eb7edab0e836..fe250c60607d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -56,42 +56,43 @@
56/* constants */ 56/* constants */
57 57
58enum { 58enum {
59 IPOIB_PACKET_SIZE = 2048, 59 IPOIB_PACKET_SIZE = 2048,
60 IPOIB_BUF_SIZE = IPOIB_PACKET_SIZE + IB_GRH_BYTES, 60 IPOIB_BUF_SIZE = IPOIB_PACKET_SIZE + IB_GRH_BYTES,
61 61
62 IPOIB_ENCAP_LEN = 4, 62 IPOIB_ENCAP_LEN = 4,
63 63
64 IPOIB_CM_MTU = 0x10000 - 0x10, /* padding to align header to 16 */ 64 IPOIB_CM_MTU = 0x10000 - 0x10, /* padding to align header to 16 */
65 IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN, 65 IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN,
66 IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE, 66 IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE,
67 IPOIB_CM_RX_SG = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE, 67 IPOIB_CM_RX_SG = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE,
68 IPOIB_RX_RING_SIZE = 128, 68 IPOIB_RX_RING_SIZE = 128,
69 IPOIB_TX_RING_SIZE = 64, 69 IPOIB_TX_RING_SIZE = 64,
70 IPOIB_MAX_QUEUE_SIZE = 8192, 70 IPOIB_MAX_QUEUE_SIZE = 8192,
71 IPOIB_MIN_QUEUE_SIZE = 2, 71 IPOIB_MIN_QUEUE_SIZE = 2,
72 IPOIB_CM_MAX_CONN_QP = 4096,
72 73
73 IPOIB_NUM_WC = 4, 74 IPOIB_NUM_WC = 4,
74 75
75 IPOIB_MAX_PATH_REC_QUEUE = 3, 76 IPOIB_MAX_PATH_REC_QUEUE = 3,
76 IPOIB_MAX_MCAST_QUEUE = 3, 77 IPOIB_MAX_MCAST_QUEUE = 3,
77 78
78 IPOIB_FLAG_OPER_UP = 0, 79 IPOIB_FLAG_OPER_UP = 0,
79 IPOIB_FLAG_INITIALIZED = 1, 80 IPOIB_FLAG_INITIALIZED = 1,
80 IPOIB_FLAG_ADMIN_UP = 2, 81 IPOIB_FLAG_ADMIN_UP = 2,
81 IPOIB_PKEY_ASSIGNED = 3, 82 IPOIB_PKEY_ASSIGNED = 3,
82 IPOIB_PKEY_STOP = 4, 83 IPOIB_PKEY_STOP = 4,
83 IPOIB_FLAG_SUBINTERFACE = 5, 84 IPOIB_FLAG_SUBINTERFACE = 5,
84 IPOIB_MCAST_RUN = 6, 85 IPOIB_MCAST_RUN = 6,
85 IPOIB_STOP_REAPER = 7, 86 IPOIB_STOP_REAPER = 7,
86 IPOIB_MCAST_STARTED = 8, 87 IPOIB_MCAST_STARTED = 8,
87 IPOIB_FLAG_ADMIN_CM = 9, 88 IPOIB_FLAG_ADMIN_CM = 9,
88 IPOIB_FLAG_UMCAST = 10, 89 IPOIB_FLAG_UMCAST = 10,
89 90
90 IPOIB_MAX_BACKOFF_SECONDS = 16, 91 IPOIB_MAX_BACKOFF_SECONDS = 16,
91 92
92 IPOIB_MCAST_FLAG_FOUND = 0, /* used in set_multicast_list */ 93 IPOIB_MCAST_FLAG_FOUND = 0, /* used in set_multicast_list */
93 IPOIB_MCAST_FLAG_SENDONLY = 1, 94 IPOIB_MCAST_FLAG_SENDONLY = 1,
94 IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */ 95 IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */
95 IPOIB_MCAST_FLAG_ATTACHED = 3, 96 IPOIB_MCAST_FLAG_ATTACHED = 3,
96}; 97};
97 98
@@ -117,7 +118,7 @@ struct ipoib_pseudoheader {
117struct ipoib_mcast { 118struct ipoib_mcast {
118 struct ib_sa_mcmember_rec mcmember; 119 struct ib_sa_mcmember_rec mcmember;
119 struct ib_sa_multicast *mc; 120 struct ib_sa_multicast *mc;
120 struct ipoib_ah *ah; 121 struct ipoib_ah *ah;
121 122
122 struct rb_node rb_node; 123 struct rb_node rb_node;
123 struct list_head list; 124 struct list_head list;
@@ -186,27 +187,29 @@ enum ipoib_cm_state {
186}; 187};
187 188
188struct ipoib_cm_rx { 189struct ipoib_cm_rx {
189 struct ib_cm_id *id; 190 struct ib_cm_id *id;
190 struct ib_qp *qp; 191 struct ib_qp *qp;
191 struct list_head list; 192 struct ipoib_cm_rx_buf *rx_ring;
192 struct net_device *dev; 193 struct list_head list;
193 unsigned long jiffies; 194 struct net_device *dev;
194 enum ipoib_cm_state state; 195 unsigned long jiffies;
196 enum ipoib_cm_state state;
197 int recv_count;
195}; 198};
196 199
197struct ipoib_cm_tx { 200struct ipoib_cm_tx {
198 struct ib_cm_id *id; 201 struct ib_cm_id *id;
199 struct ib_qp *qp; 202 struct ib_qp *qp;
200 struct list_head list; 203 struct list_head list;
201 struct net_device *dev; 204 struct net_device *dev;
202 struct ipoib_neigh *neigh; 205 struct ipoib_neigh *neigh;
203 struct ipoib_path *path; 206 struct ipoib_path *path;
204 struct ipoib_tx_buf *tx_ring; 207 struct ipoib_tx_buf *tx_ring;
205 unsigned tx_head; 208 unsigned tx_head;
206 unsigned tx_tail; 209 unsigned tx_tail;
207 unsigned long flags; 210 unsigned long flags;
208 u32 mtu; 211 u32 mtu;
209 struct ib_wc ibwc[IPOIB_NUM_WC]; 212 struct ib_wc ibwc[IPOIB_NUM_WC];
210}; 213};
211 214
212struct ipoib_cm_rx_buf { 215struct ipoib_cm_rx_buf {
@@ -215,25 +218,28 @@ struct ipoib_cm_rx_buf {
215}; 218};
216 219
217struct ipoib_cm_dev_priv { 220struct ipoib_cm_dev_priv {
218 struct ib_srq *srq; 221 struct ib_srq *srq;
219 struct ipoib_cm_rx_buf *srq_ring; 222 struct ipoib_cm_rx_buf *srq_ring;
220 struct ib_cm_id *id; 223 struct ib_cm_id *id;
221 struct list_head passive_ids; /* state: LIVE */ 224 struct list_head passive_ids; /* state: LIVE */
222 struct list_head rx_error_list; /* state: ERROR */ 225 struct list_head rx_error_list; /* state: ERROR */
223 struct list_head rx_flush_list; /* state: FLUSH, drain not started */ 226 struct list_head rx_flush_list; /* state: FLUSH, drain not started */
224 struct list_head rx_drain_list; /* state: FLUSH, drain started */ 227 struct list_head rx_drain_list; /* state: FLUSH, drain started */
225 struct list_head rx_reap_list; /* state: FLUSH, drain done */ 228 struct list_head rx_reap_list; /* state: FLUSH, drain done */
226 struct work_struct start_task; 229 struct work_struct start_task;
227 struct work_struct reap_task; 230 struct work_struct reap_task;
228 struct work_struct skb_task; 231 struct work_struct skb_task;
229 struct work_struct rx_reap_task; 232 struct work_struct rx_reap_task;
230 struct delayed_work stale_task; 233 struct delayed_work stale_task;
231 struct sk_buff_head skb_queue; 234 struct sk_buff_head skb_queue;
232 struct list_head start_list; 235 struct list_head start_list;
233 struct list_head reap_list; 236 struct list_head reap_list;
234 struct ib_wc ibwc[IPOIB_NUM_WC]; 237 struct ib_wc ibwc[IPOIB_NUM_WC];
235 struct ib_sge rx_sge[IPOIB_CM_RX_SG]; 238 struct ib_sge rx_sge[IPOIB_CM_RX_SG];
236 struct ib_recv_wr rx_wr; 239 struct ib_recv_wr rx_wr;
240 int nonsrq_conn_qp;
241 int max_cm_mtu;
242 int num_frags;
237}; 243};
238 244
239/* 245/*
@@ -269,30 +275,30 @@ struct ipoib_dev_priv {
269 struct work_struct pkey_event_task; 275 struct work_struct pkey_event_task;
270 276
271 struct ib_device *ca; 277 struct ib_device *ca;
272 u8 port; 278 u8 port;
273 u16 pkey; 279 u16 pkey;
274 u16 pkey_index; 280 u16 pkey_index;
275 struct ib_pd *pd; 281 struct ib_pd *pd;
276 struct ib_mr *mr; 282 struct ib_mr *mr;
277 struct ib_cq *cq; 283 struct ib_cq *cq;
278 struct ib_qp *qp; 284 struct ib_qp *qp;
279 u32 qkey; 285 u32 qkey;
280 286
281 union ib_gid local_gid; 287 union ib_gid local_gid;
282 u16 local_lid; 288 u16 local_lid;
283 289
284 unsigned int admin_mtu; 290 unsigned int admin_mtu;
285 unsigned int mcast_mtu; 291 unsigned int mcast_mtu;
286 292
287 struct ipoib_rx_buf *rx_ring; 293 struct ipoib_rx_buf *rx_ring;
288 294
289 spinlock_t tx_lock; 295 spinlock_t tx_lock;
290 struct ipoib_tx_buf *tx_ring; 296 struct ipoib_tx_buf *tx_ring;
291 unsigned tx_head; 297 unsigned tx_head;
292 unsigned tx_tail; 298 unsigned tx_tail;
293 struct ib_sge tx_sge; 299 struct ib_sge tx_sge;
294 struct ib_send_wr tx_wr; 300 struct ib_send_wr tx_wr;
295 unsigned tx_outstanding; 301 unsigned tx_outstanding;
296 302
297 struct ib_wc ibwc[IPOIB_NUM_WC]; 303 struct ib_wc ibwc[IPOIB_NUM_WC];
298 304
@@ -317,10 +323,10 @@ struct ipoib_dev_priv {
317 323
318struct ipoib_ah { 324struct ipoib_ah {
319 struct net_device *dev; 325 struct net_device *dev;
320 struct ib_ah *ah; 326 struct ib_ah *ah;
321 struct list_head list; 327 struct list_head list;
322 struct kref ref; 328 struct kref ref;
323 unsigned last_send; 329 unsigned last_send;
324}; 330};
325 331
326struct ipoib_path { 332struct ipoib_path {
@@ -331,11 +337,11 @@ struct ipoib_path {
331 337
332 struct list_head neigh_list; 338 struct list_head neigh_list;
333 339
334 int query_id; 340 int query_id;
335 struct ib_sa_query *query; 341 struct ib_sa_query *query;
336 struct completion done; 342 struct completion done;
337 343
338 struct rb_node rb_node; 344 struct rb_node rb_node;
339 struct list_head list; 345 struct list_head list;
340}; 346};
341 347
@@ -344,7 +350,7 @@ struct ipoib_neigh {
344#ifdef CONFIG_INFINIBAND_IPOIB_CM 350#ifdef CONFIG_INFINIBAND_IPOIB_CM
345 struct ipoib_cm_tx *cm; 351 struct ipoib_cm_tx *cm;
346#endif 352#endif
347 union ib_gid dgid; 353 union ib_gid dgid;
348 struct sk_buff_head queue; 354 struct sk_buff_head queue;
349 355
350 struct neighbour *neighbour; 356 struct neighbour *neighbour;
@@ -455,12 +461,14 @@ void ipoib_drain_cq(struct net_device *dev);
455 461
456#ifdef CONFIG_INFINIBAND_IPOIB_CM 462#ifdef CONFIG_INFINIBAND_IPOIB_CM
457 463
458#define IPOIB_FLAGS_RC 0x80 464#define IPOIB_FLAGS_RC 0x80
459#define IPOIB_FLAGS_UC 0x40 465#define IPOIB_FLAGS_UC 0x40
460 466
461/* We don't support UC connections at the moment */ 467/* We don't support UC connections at the moment */
462#define IPOIB_CM_SUPPORTED(ha) (ha[0] & (IPOIB_FLAGS_RC)) 468#define IPOIB_CM_SUPPORTED(ha) (ha[0] & (IPOIB_FLAGS_RC))
463 469
470extern int ipoib_max_conn_qp;
471
464static inline int ipoib_cm_admin_enabled(struct net_device *dev) 472static inline int ipoib_cm_admin_enabled(struct net_device *dev)
465{ 473{
466 struct ipoib_dev_priv *priv = netdev_priv(dev); 474 struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -491,6 +499,18 @@ static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *t
491 neigh->cm = tx; 499 neigh->cm = tx;
492} 500}
493 501
502static inline int ipoib_cm_has_srq(struct net_device *dev)
503{
504 struct ipoib_dev_priv *priv = netdev_priv(dev);
505 return !!priv->cm.srq;
506}
507
508static inline unsigned int ipoib_cm_max_mtu(struct net_device *dev)
509{
510 struct ipoib_dev_priv *priv = netdev_priv(dev);
511 return priv->cm.max_cm_mtu;
512}
513
494void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx); 514void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx);
495int ipoib_cm_dev_open(struct net_device *dev); 515int ipoib_cm_dev_open(struct net_device *dev);
496void ipoib_cm_dev_stop(struct net_device *dev); 516void ipoib_cm_dev_stop(struct net_device *dev);
@@ -500,7 +520,7 @@ void ipoib_cm_dev_cleanup(struct net_device *dev);
500struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path, 520struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path,
501 struct ipoib_neigh *neigh); 521 struct ipoib_neigh *neigh);
502void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx); 522void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx);
503void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb, 523void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
504 unsigned int mtu); 524 unsigned int mtu);
505void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc); 525void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc);
506void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc); 526void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc);
@@ -508,6 +528,8 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc);
508 528
509struct ipoib_cm_tx; 529struct ipoib_cm_tx;
510 530
531#define ipoib_max_conn_qp 0
532
511static inline int ipoib_cm_admin_enabled(struct net_device *dev) 533static inline int ipoib_cm_admin_enabled(struct net_device *dev)
512{ 534{
513 return 0; 535 return 0;
@@ -533,6 +555,16 @@ static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *t
533{ 555{
534} 556}
535 557
558static inline int ipoib_cm_has_srq(struct net_device *dev)
559{
560 return 0;
561}
562
563static inline unsigned int ipoib_cm_max_mtu(struct net_device *dev)
564{
565 return 0;
566}
567
536static inline 568static inline
537void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) 569void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
538{ 570{
@@ -582,7 +614,7 @@ int ipoib_cm_add_mode_attr(struct net_device *dev)
582 return 0; 614 return 0;
583} 615}
584 616
585static inline void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb, 617static inline void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
586 unsigned int mtu) 618 unsigned int mtu)
587{ 619{
588 dev_kfree_skb_any(skb); 620 dev_kfree_skb_any(skb);
@@ -624,12 +656,12 @@ extern struct ib_sa_client ipoib_sa_client;
624extern int ipoib_debug_level; 656extern int ipoib_debug_level;
625 657
626#define ipoib_dbg(priv, format, arg...) \ 658#define ipoib_dbg(priv, format, arg...) \
627 do { \ 659 do { \
628 if (ipoib_debug_level > 0) \ 660 if (ipoib_debug_level > 0) \
629 ipoib_printk(KERN_DEBUG, priv, format , ## arg); \ 661 ipoib_printk(KERN_DEBUG, priv, format , ## arg); \
630 } while (0) 662 } while (0)
631#define ipoib_dbg_mcast(priv, format, arg...) \ 663#define ipoib_dbg_mcast(priv, format, arg...) \
632 do { \ 664 do { \
633 if (mcast_debug_level > 0) \ 665 if (mcast_debug_level > 0) \
634 ipoib_printk(KERN_DEBUG, priv, format , ## arg); \ 666 ipoib_printk(KERN_DEBUG, priv, format , ## arg); \
635 } while (0) 667 } while (0)
@@ -642,7 +674,7 @@ extern int ipoib_debug_level;
642 674
643#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA 675#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA
644#define ipoib_dbg_data(priv, format, arg...) \ 676#define ipoib_dbg_data(priv, format, arg...) \
645 do { \ 677 do { \
646 if (data_debug_level > 0) \ 678 if (data_debug_level > 0) \
647 ipoib_printk(KERN_DEBUG, priv, format , ## arg); \ 679 ipoib_printk(KERN_DEBUG, priv, format , ## arg); \
648 } while (0) 680 } while (0)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 059cf92b60a5..1818f958c250 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -39,6 +39,15 @@
39#include <linux/icmpv6.h> 39#include <linux/icmpv6.h>
40#include <linux/delay.h> 40#include <linux/delay.h>
41 41
42#include "ipoib.h"
43
44int ipoib_max_conn_qp = 128;
45
46module_param_named(max_nonsrq_conn_qp, ipoib_max_conn_qp, int, 0444);
47MODULE_PARM_DESC(max_nonsrq_conn_qp,
48 "Max number of connected-mode QPs per interface "
49 "(applied only if shared receive queue is not available)");
50
42#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA 51#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA
43static int data_debug_level; 52static int data_debug_level;
44 53
@@ -47,8 +56,6 @@ MODULE_PARM_DESC(cm_data_debug_level,
47 "Enable data path debug tracing for connected mode if > 0"); 56 "Enable data path debug tracing for connected mode if > 0");
48#endif 57#endif
49 58
50#include "ipoib.h"
51
52#define IPOIB_CM_IETF_ID 0x1000000000000000ULL 59#define IPOIB_CM_IETF_ID 0x1000000000000000ULL
53 60
54#define IPOIB_CM_RX_UPDATE_TIME (256 * HZ) 61#define IPOIB_CM_RX_UPDATE_TIME (256 * HZ)
@@ -81,7 +88,7 @@ static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags,
81 ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE); 88 ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
82} 89}
83 90
84static int ipoib_cm_post_receive(struct net_device *dev, int id) 91static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
85{ 92{
86 struct ipoib_dev_priv *priv = netdev_priv(dev); 93 struct ipoib_dev_priv *priv = netdev_priv(dev);
87 struct ib_recv_wr *bad_wr; 94 struct ib_recv_wr *bad_wr;
@@ -89,13 +96,13 @@ static int ipoib_cm_post_receive(struct net_device *dev, int id)
89 96
90 priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; 97 priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
91 98
92 for (i = 0; i < IPOIB_CM_RX_SG; ++i) 99 for (i = 0; i < priv->cm.num_frags; ++i)
93 priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i]; 100 priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
94 101
95 ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr); 102 ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr);
96 if (unlikely(ret)) { 103 if (unlikely(ret)) {
97 ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret); 104 ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);
98 ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, 105 ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1,
99 priv->cm.srq_ring[id].mapping); 106 priv->cm.srq_ring[id].mapping);
100 dev_kfree_skb_any(priv->cm.srq_ring[id].skb); 107 dev_kfree_skb_any(priv->cm.srq_ring[id].skb);
101 priv->cm.srq_ring[id].skb = NULL; 108 priv->cm.srq_ring[id].skb = NULL;
@@ -104,7 +111,33 @@ static int ipoib_cm_post_receive(struct net_device *dev, int id)
104 return ret; 111 return ret;
105} 112}
106 113
107static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, int id, int frags, 114static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
115 struct ipoib_cm_rx *rx, int id)
116{
117 struct ipoib_dev_priv *priv = netdev_priv(dev);
118 struct ib_recv_wr *bad_wr;
119 int i, ret;
120
121 priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
122
123 for (i = 0; i < IPOIB_CM_RX_SG; ++i)
124 priv->cm.rx_sge[i].addr = rx->rx_ring[id].mapping[i];
125
126 ret = ib_post_recv(rx->qp, &priv->cm.rx_wr, &bad_wr);
127 if (unlikely(ret)) {
128 ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret);
129 ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
130 rx->rx_ring[id].mapping);
131 dev_kfree_skb_any(rx->rx_ring[id].skb);
132 rx->rx_ring[id].skb = NULL;
133 }
134
135 return ret;
136}
137
138static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
139 struct ipoib_cm_rx_buf *rx_ring,
140 int id, int frags,
108 u64 mapping[IPOIB_CM_RX_SG]) 141 u64 mapping[IPOIB_CM_RX_SG])
109{ 142{
110 struct ipoib_dev_priv *priv = netdev_priv(dev); 143 struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -141,7 +174,7 @@ static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, int id, int
141 goto partial_error; 174 goto partial_error;
142 } 175 }
143 176
144 priv->cm.srq_ring[id].skb = skb; 177 rx_ring[id].skb = skb;
145 return skb; 178 return skb;
146 179
147partial_error: 180partial_error:
@@ -155,7 +188,23 @@ partial_error:
155 return NULL; 188 return NULL;
156} 189}
157 190
158static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv* priv) 191static void ipoib_cm_free_rx_ring(struct net_device *dev,
192 struct ipoib_cm_rx_buf *rx_ring)
193{
194 struct ipoib_dev_priv *priv = netdev_priv(dev);
195 int i;
196
197 for (i = 0; i < ipoib_recvq_size; ++i)
198 if (rx_ring[i].skb) {
199 ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
200 rx_ring[i].mapping);
201 dev_kfree_skb_any(rx_ring[i].skb);
202 }
203
204 kfree(rx_ring);
205}
206
207static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
159{ 208{
160 struct ib_send_wr *bad_wr; 209 struct ib_send_wr *bad_wr;
161 struct ipoib_cm_rx *p; 210 struct ipoib_cm_rx *p;
@@ -208,12 +257,18 @@ static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
208 .qp_type = IB_QPT_RC, 257 .qp_type = IB_QPT_RC,
209 .qp_context = p, 258 .qp_context = p,
210 }; 259 };
260
261 if (!ipoib_cm_has_srq(dev)) {
262 attr.cap.max_recv_wr = ipoib_recvq_size;
263 attr.cap.max_recv_sge = IPOIB_CM_RX_SG;
264 }
265
211 return ib_create_qp(priv->pd, &attr); 266 return ib_create_qp(priv->pd, &attr);
212} 267}
213 268
214static int ipoib_cm_modify_rx_qp(struct net_device *dev, 269static int ipoib_cm_modify_rx_qp(struct net_device *dev,
215 struct ib_cm_id *cm_id, struct ib_qp *qp, 270 struct ib_cm_id *cm_id, struct ib_qp *qp,
216 unsigned psn) 271 unsigned psn)
217{ 272{
218 struct ipoib_dev_priv *priv = netdev_priv(dev); 273 struct ipoib_dev_priv *priv = netdev_priv(dev);
219 struct ib_qp_attr qp_attr; 274 struct ib_qp_attr qp_attr;
@@ -266,6 +321,60 @@ static int ipoib_cm_modify_rx_qp(struct net_device *dev,
266 return 0; 321 return 0;
267} 322}
268 323
324static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id,
325 struct ipoib_cm_rx *rx)
326{
327 struct ipoib_dev_priv *priv = netdev_priv(dev);
328 int ret;
329 int i;
330
331 rx->rx_ring = kcalloc(ipoib_recvq_size, sizeof *rx->rx_ring, GFP_KERNEL);
332 if (!rx->rx_ring)
333 return -ENOMEM;
334
335 spin_lock_irq(&priv->lock);
336
337 if (priv->cm.nonsrq_conn_qp >= ipoib_max_conn_qp) {
338 spin_unlock_irq(&priv->lock);
339 ib_send_cm_rej(cm_id, IB_CM_REJ_NO_QP, NULL, 0, NULL, 0);
340 ret = -EINVAL;
341 goto err_free;
342 } else
343 ++priv->cm.nonsrq_conn_qp;
344
345 spin_unlock_irq(&priv->lock);
346
347 for (i = 0; i < ipoib_recvq_size; ++i) {
348 if (!ipoib_cm_alloc_rx_skb(dev, rx->rx_ring, i, IPOIB_CM_RX_SG - 1,
349 rx->rx_ring[i].mapping)) {
350 ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
351 ret = -ENOMEM;
352 goto err_count;
353 }
354 ret = ipoib_cm_post_receive_nonsrq(dev, rx, i);
355 if (ret) {
356 ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq "
357 "failed for buf %d\n", i);
358 ret = -EIO;
359 goto err_count;
360 }
361 }
362
363 rx->recv_count = ipoib_recvq_size;
364
365 return 0;
366
367err_count:
368 spin_lock_irq(&priv->lock);
369 --priv->cm.nonsrq_conn_qp;
370 spin_unlock_irq(&priv->lock);
371
372err_free:
373 ipoib_cm_free_rx_ring(dev, rx->rx_ring);
374
375 return ret;
376}
377
269static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id, 378static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
270 struct ib_qp *qp, struct ib_cm_req_event_param *req, 379 struct ib_qp *qp, struct ib_cm_req_event_param *req,
271 unsigned psn) 380 unsigned psn)
@@ -281,7 +390,7 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
281 rep.private_data_len = sizeof data; 390 rep.private_data_len = sizeof data;
282 rep.flow_control = 0; 391 rep.flow_control = 0;
283 rep.rnr_retry_count = req->rnr_retry_count; 392 rep.rnr_retry_count = req->rnr_retry_count;
284 rep.srq = 1; 393 rep.srq = ipoib_cm_has_srq(dev);
285 rep.qp_num = qp->qp_num; 394 rep.qp_num = qp->qp_num;
286 rep.starting_psn = psn; 395 rep.starting_psn = psn;
287 return ib_send_cm_rep(cm_id, &rep); 396 return ib_send_cm_rep(cm_id, &rep);
@@ -317,6 +426,12 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
317 if (ret) 426 if (ret)
318 goto err_modify; 427 goto err_modify;
319 428
429 if (!ipoib_cm_has_srq(dev)) {
430 ret = ipoib_cm_nonsrq_init_rx(dev, cm_id, p);
431 if (ret)
432 goto err_modify;
433 }
434
320 spin_lock_irq(&priv->lock); 435 spin_lock_irq(&priv->lock);
321 queue_delayed_work(ipoib_workqueue, 436 queue_delayed_work(ipoib_workqueue,
322 &priv->cm.stale_task, IPOIB_CM_RX_DELAY); 437 &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
@@ -401,12 +516,14 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
401void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) 516void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
402{ 517{
403 struct ipoib_dev_priv *priv = netdev_priv(dev); 518 struct ipoib_dev_priv *priv = netdev_priv(dev);
519 struct ipoib_cm_rx_buf *rx_ring;
404 unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV); 520 unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV);
405 struct sk_buff *skb, *newskb; 521 struct sk_buff *skb, *newskb;
406 struct ipoib_cm_rx *p; 522 struct ipoib_cm_rx *p;
407 unsigned long flags; 523 unsigned long flags;
408 u64 mapping[IPOIB_CM_RX_SG]; 524 u64 mapping[IPOIB_CM_RX_SG];
409 int frags; 525 int frags;
526 int has_srq;
410 527
411 ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n", 528 ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n",
412 wr_id, wc->status); 529 wr_id, wc->status);
@@ -424,18 +541,32 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
424 return; 541 return;
425 } 542 }
426 543
427 skb = priv->cm.srq_ring[wr_id].skb; 544 p = wc->qp->qp_context;
545
546 has_srq = ipoib_cm_has_srq(dev);
547 rx_ring = has_srq ? priv->cm.srq_ring : p->rx_ring;
548
549 skb = rx_ring[wr_id].skb;
428 550
429 if (unlikely(wc->status != IB_WC_SUCCESS)) { 551 if (unlikely(wc->status != IB_WC_SUCCESS)) {
430 ipoib_dbg(priv, "cm recv error " 552 ipoib_dbg(priv, "cm recv error "
431 "(status=%d, wrid=%d vend_err %x)\n", 553 "(status=%d, wrid=%d vend_err %x)\n",
432 wc->status, wr_id, wc->vendor_err); 554 wc->status, wr_id, wc->vendor_err);
433 ++dev->stats.rx_dropped; 555 ++dev->stats.rx_dropped;
434 goto repost; 556 if (has_srq)
557 goto repost;
558 else {
559 if (!--p->recv_count) {
560 spin_lock_irqsave(&priv->lock, flags);
561 list_move(&p->list, &priv->cm.rx_reap_list);
562 spin_unlock_irqrestore(&priv->lock, flags);
563 queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
564 }
565 return;
566 }
435 } 567 }
436 568
437 if (unlikely(!(wr_id & IPOIB_CM_RX_UPDATE_MASK))) { 569 if (unlikely(!(wr_id & IPOIB_CM_RX_UPDATE_MASK))) {
438 p = wc->qp->qp_context;
439 if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) { 570 if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) {
440 spin_lock_irqsave(&priv->lock, flags); 571 spin_lock_irqsave(&priv->lock, flags);
441 p->jiffies = jiffies; 572 p->jiffies = jiffies;
@@ -450,7 +581,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
450 frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len, 581 frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,
451 (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE; 582 (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE;
452 583
453 newskb = ipoib_cm_alloc_rx_skb(dev, wr_id, frags, mapping); 584 newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags, mapping);
454 if (unlikely(!newskb)) { 585 if (unlikely(!newskb)) {
455 /* 586 /*
456 * If we can't allocate a new RX buffer, dump 587 * If we can't allocate a new RX buffer, dump
@@ -461,8 +592,8 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
461 goto repost; 592 goto repost;
462 } 593 }
463 594
464 ipoib_cm_dma_unmap_rx(priv, frags, priv->cm.srq_ring[wr_id].mapping); 595 ipoib_cm_dma_unmap_rx(priv, frags, rx_ring[wr_id].mapping);
465 memcpy(priv->cm.srq_ring[wr_id].mapping, mapping, (frags + 1) * sizeof *mapping); 596 memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof *mapping);
466 597
467 ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", 598 ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
468 wc->byte_len, wc->slid); 599 wc->byte_len, wc->slid);
@@ -483,9 +614,17 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
483 netif_receive_skb(skb); 614 netif_receive_skb(skb);
484 615
485repost: 616repost:
486 if (unlikely(ipoib_cm_post_receive(dev, wr_id))) 617 if (has_srq) {
487 ipoib_warn(priv, "ipoib_cm_post_receive failed " 618 if (unlikely(ipoib_cm_post_receive_srq(dev, wr_id)))
488 "for buf %d\n", wr_id); 619 ipoib_warn(priv, "ipoib_cm_post_receive_srq failed "
620 "for buf %d\n", wr_id);
621 } else {
622 if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p, wr_id))) {
623 --p->recv_count;
624 ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed "
625 "for buf %d\n", wr_id);
626 }
627 }
489} 628}
490 629
491static inline int post_send(struct ipoib_dev_priv *priv, 630static inline int post_send(struct ipoib_dev_priv *priv,
@@ -495,10 +634,10 @@ static inline int post_send(struct ipoib_dev_priv *priv,
495{ 634{
496 struct ib_send_wr *bad_wr; 635 struct ib_send_wr *bad_wr;
497 636
498 priv->tx_sge.addr = addr; 637 priv->tx_sge.addr = addr;
499 priv->tx_sge.length = len; 638 priv->tx_sge.length = len;
500 639
501 priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM; 640 priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM;
502 641
503 return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr); 642 return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr);
504} 643}
@@ -540,7 +679,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
540 tx_req->mapping = addr; 679 tx_req->mapping = addr;
541 680
542 if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), 681 if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1),
543 addr, skb->len))) { 682 addr, skb->len))) {
544 ipoib_warn(priv, "post_send failed\n"); 683 ipoib_warn(priv, "post_send failed\n");
545 ++dev->stats.tx_errors; 684 ++dev->stats.tx_errors;
546 ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE); 685 ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE);
@@ -657,10 +796,33 @@ err_cm:
657 return ret; 796 return ret;
658} 797}
659 798
799static void ipoib_cm_free_rx_reap_list(struct net_device *dev)
800{
801 struct ipoib_dev_priv *priv = netdev_priv(dev);
802 struct ipoib_cm_rx *rx, *n;
803 LIST_HEAD(list);
804
805 spin_lock_irq(&priv->lock);
806 list_splice_init(&priv->cm.rx_reap_list, &list);
807 spin_unlock_irq(&priv->lock);
808
809 list_for_each_entry_safe(rx, n, &list, list) {
810 ib_destroy_cm_id(rx->id);
811 ib_destroy_qp(rx->qp);
812 if (!ipoib_cm_has_srq(dev)) {
813 ipoib_cm_free_rx_ring(priv->dev, rx->rx_ring);
814 spin_lock_irq(&priv->lock);
815 --priv->cm.nonsrq_conn_qp;
816 spin_unlock_irq(&priv->lock);
817 }
818 kfree(rx);
819 }
820}
821
660void ipoib_cm_dev_stop(struct net_device *dev) 822void ipoib_cm_dev_stop(struct net_device *dev)
661{ 823{
662 struct ipoib_dev_priv *priv = netdev_priv(dev); 824 struct ipoib_dev_priv *priv = netdev_priv(dev);
663 struct ipoib_cm_rx *p, *n; 825 struct ipoib_cm_rx *p;
664 unsigned long begin; 826 unsigned long begin;
665 LIST_HEAD(list); 827 LIST_HEAD(list);
666 int ret; 828 int ret;
@@ -706,15 +868,9 @@ void ipoib_cm_dev_stop(struct net_device *dev)
706 spin_lock_irq(&priv->lock); 868 spin_lock_irq(&priv->lock);
707 } 869 }
708 870
709 list_splice_init(&priv->cm.rx_reap_list, &list);
710
711 spin_unlock_irq(&priv->lock); 871 spin_unlock_irq(&priv->lock);
712 872
713 list_for_each_entry_safe(p, n, &list, list) { 873 ipoib_cm_free_rx_reap_list(dev);
714 ib_destroy_cm_id(p->id);
715 ib_destroy_qp(p->qp);
716 kfree(p);
717 }
718 874
719 cancel_delayed_work(&priv->cm.stale_task); 875 cancel_delayed_work(&priv->cm.stale_task);
720} 876}
@@ -799,7 +955,7 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
799 .sq_sig_type = IB_SIGNAL_ALL_WR, 955 .sq_sig_type = IB_SIGNAL_ALL_WR,
800 .qp_type = IB_QPT_RC, 956 .qp_type = IB_QPT_RC,
801 .qp_context = tx 957 .qp_context = tx
802 }; 958 };
803 959
804 return ib_create_qp(priv->pd, &attr); 960 return ib_create_qp(priv->pd, &attr);
805} 961}
@@ -816,28 +972,28 @@ static int ipoib_cm_send_req(struct net_device *dev,
816 data.qpn = cpu_to_be32(priv->qp->qp_num); 972 data.qpn = cpu_to_be32(priv->qp->qp_num);
817 data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE); 973 data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE);
818 974
819 req.primary_path = pathrec; 975 req.primary_path = pathrec;
820 req.alternate_path = NULL; 976 req.alternate_path = NULL;
821 req.service_id = cpu_to_be64(IPOIB_CM_IETF_ID | qpn); 977 req.service_id = cpu_to_be64(IPOIB_CM_IETF_ID | qpn);
822 req.qp_num = qp->qp_num; 978 req.qp_num = qp->qp_num;
823 req.qp_type = qp->qp_type; 979 req.qp_type = qp->qp_type;
824 req.private_data = &data; 980 req.private_data = &data;
825 req.private_data_len = sizeof data; 981 req.private_data_len = sizeof data;
826 req.flow_control = 0; 982 req.flow_control = 0;
827 983
828 req.starting_psn = 0; /* FIXME */ 984 req.starting_psn = 0; /* FIXME */
829 985
830 /* 986 /*
831 * Pick some arbitrary defaults here; we could make these 987 * Pick some arbitrary defaults here; we could make these
832 * module parameters if anyone cared about setting them. 988 * module parameters if anyone cared about setting them.
833 */ 989 */
834 req.responder_resources = 4; 990 req.responder_resources = 4;
835 req.remote_cm_response_timeout = 20; 991 req.remote_cm_response_timeout = 20;
836 req.local_cm_response_timeout = 20; 992 req.local_cm_response_timeout = 20;
837 req.retry_count = 0; /* RFC draft warns against retries */ 993 req.retry_count = 0; /* RFC draft warns against retries */
838 req.rnr_retry_count = 0; /* RFC draft warns against retries */ 994 req.rnr_retry_count = 0; /* RFC draft warns against retries */
839 req.max_cm_retries = 15; 995 req.max_cm_retries = 15;
840 req.srq = 1; 996 req.srq = ipoib_cm_has_srq(dev);
841 return ib_send_cm_req(id, &req); 997 return ib_send_cm_req(id, &req);
842} 998}
843 999
@@ -1150,7 +1306,7 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
1150 spin_unlock_irq(&priv->tx_lock); 1306 spin_unlock_irq(&priv->tx_lock);
1151} 1307}
1152 1308
1153void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb, 1309void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
1154 unsigned int mtu) 1310 unsigned int mtu)
1155{ 1311{
1156 struct ipoib_dev_priv *priv = netdev_priv(dev); 1312 struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -1166,20 +1322,8 @@ void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
1166 1322
1167static void ipoib_cm_rx_reap(struct work_struct *work) 1323static void ipoib_cm_rx_reap(struct work_struct *work)
1168{ 1324{
1169 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 1325 ipoib_cm_free_rx_reap_list(container_of(work, struct ipoib_dev_priv,
1170 cm.rx_reap_task); 1326 cm.rx_reap_task)->dev);
1171 struct ipoib_cm_rx *p, *n;
1172 LIST_HEAD(list);
1173
1174 spin_lock_irq(&priv->lock);
1175 list_splice_init(&priv->cm.rx_reap_list, &list);
1176 spin_unlock_irq(&priv->lock);
1177
1178 list_for_each_entry_safe(p, n, &list, list) {
1179 ib_destroy_cm_id(p->id);
1180 ib_destroy_qp(p->qp);
1181 kfree(p);
1182 }
1183} 1327}
1184 1328
1185static void ipoib_cm_stale_task(struct work_struct *work) 1329static void ipoib_cm_stale_task(struct work_struct *work)
@@ -1212,7 +1356,7 @@ static void ipoib_cm_stale_task(struct work_struct *work)
1212} 1356}
1213 1357
1214 1358
1215static ssize_t show_mode(struct device *d, struct device_attribute *attr, 1359static ssize_t show_mode(struct device *d, struct device_attribute *attr,
1216 char *buf) 1360 char *buf)
1217{ 1361{
1218 struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(d)); 1362 struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(d));
@@ -1255,16 +1399,40 @@ int ipoib_cm_add_mode_attr(struct net_device *dev)
1255 return device_create_file(&dev->dev, &dev_attr_mode); 1399 return device_create_file(&dev->dev, &dev_attr_mode);
1256} 1400}
1257 1401
1258int ipoib_cm_dev_init(struct net_device *dev) 1402static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
1259{ 1403{
1260 struct ipoib_dev_priv *priv = netdev_priv(dev); 1404 struct ipoib_dev_priv *priv = netdev_priv(dev);
1261 struct ib_srq_init_attr srq_init_attr = { 1405 struct ib_srq_init_attr srq_init_attr = {
1262 .attr = { 1406 .attr = {
1263 .max_wr = ipoib_recvq_size, 1407 .max_wr = ipoib_recvq_size,
1264 .max_sge = IPOIB_CM_RX_SG 1408 .max_sge = max_sge
1265 } 1409 }
1266 }; 1410 };
1267 int ret, i; 1411
1412 priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr);
1413 if (IS_ERR(priv->cm.srq)) {
1414 if (PTR_ERR(priv->cm.srq) != -ENOSYS)
1415 printk(KERN_WARNING "%s: failed to allocate SRQ, error %ld\n",
1416 priv->ca->name, PTR_ERR(priv->cm.srq));
1417 priv->cm.srq = NULL;
1418 return;
1419 }
1420
1421 priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring,
1422 GFP_KERNEL);
1423 if (!priv->cm.srq_ring) {
1424 printk(KERN_WARNING "%s: failed to allocate CM SRQ ring (%d entries)\n",
1425 priv->ca->name, ipoib_recvq_size);
1426 ib_destroy_srq(priv->cm.srq);
1427 priv->cm.srq = NULL;
1428 }
1429}
1430
1431int ipoib_cm_dev_init(struct net_device *dev)
1432{
1433 struct ipoib_dev_priv *priv = netdev_priv(dev);
1434 int i, ret;
1435 struct ib_device_attr attr;
1268 1436
1269 INIT_LIST_HEAD(&priv->cm.passive_ids); 1437 INIT_LIST_HEAD(&priv->cm.passive_ids);
1270 INIT_LIST_HEAD(&priv->cm.reap_list); 1438 INIT_LIST_HEAD(&priv->cm.reap_list);
@@ -1281,43 +1449,53 @@ int ipoib_cm_dev_init(struct net_device *dev)
1281 1449
1282 skb_queue_head_init(&priv->cm.skb_queue); 1450 skb_queue_head_init(&priv->cm.skb_queue);
1283 1451
1284 priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr); 1452 ret = ib_query_device(priv->ca, &attr);
1285 if (IS_ERR(priv->cm.srq)) { 1453 if (ret) {
1286 ret = PTR_ERR(priv->cm.srq); 1454 printk(KERN_WARNING "ib_query_device() failed with %d\n", ret);
1287 priv->cm.srq = NULL;
1288 return ret; 1455 return ret;
1289 } 1456 }
1290 1457
1291 priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring, 1458 ipoib_dbg(priv, "max_srq_sge=%d\n", attr.max_srq_sge);
1292 GFP_KERNEL); 1459
1293 if (!priv->cm.srq_ring) { 1460 attr.max_srq_sge = min_t(int, IPOIB_CM_RX_SG, attr.max_srq_sge);
1294 printk(KERN_WARNING "%s: failed to allocate CM ring (%d entries)\n", 1461 ipoib_cm_create_srq(dev, attr.max_srq_sge);
1295 priv->ca->name, ipoib_recvq_size); 1462 if (ipoib_cm_has_srq(dev)) {
1296 ipoib_cm_dev_cleanup(dev); 1463 priv->cm.max_cm_mtu = attr.max_srq_sge * PAGE_SIZE - 0x10;
1297 return -ENOMEM; 1464 priv->cm.num_frags = attr.max_srq_sge;
1465 ipoib_dbg(priv, "max_cm_mtu = 0x%x, num_frags=%d\n",
1466 priv->cm.max_cm_mtu, priv->cm.num_frags);
1467 } else {
1468 priv->cm.max_cm_mtu = IPOIB_CM_MTU;
1469 priv->cm.num_frags = IPOIB_CM_RX_SG;
1298 } 1470 }
1299 1471
1300 for (i = 0; i < IPOIB_CM_RX_SG; ++i) 1472 for (i = 0; i < priv->cm.num_frags; ++i)
1301 priv->cm.rx_sge[i].lkey = priv->mr->lkey; 1473 priv->cm.rx_sge[i].lkey = priv->mr->lkey;
1302 1474
1303 priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE; 1475 priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE;
1304 for (i = 1; i < IPOIB_CM_RX_SG; ++i) 1476 for (i = 1; i < priv->cm.num_frags; ++i)
1305 priv->cm.rx_sge[i].length = PAGE_SIZE; 1477 priv->cm.rx_sge[i].length = PAGE_SIZE;
1306 priv->cm.rx_wr.next = NULL; 1478 priv->cm.rx_wr.next = NULL;
1307 priv->cm.rx_wr.sg_list = priv->cm.rx_sge; 1479 priv->cm.rx_wr.sg_list = priv->cm.rx_sge;
1308 priv->cm.rx_wr.num_sge = IPOIB_CM_RX_SG; 1480 priv->cm.rx_wr.num_sge = priv->cm.num_frags;
1481
1482 if (ipoib_cm_has_srq(dev)) {
1483 for (i = 0; i < ipoib_recvq_size; ++i) {
1484 if (!ipoib_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i,
1485 priv->cm.num_frags - 1,
1486 priv->cm.srq_ring[i].mapping)) {
1487 ipoib_warn(priv, "failed to allocate "
1488 "receive buffer %d\n", i);
1489 ipoib_cm_dev_cleanup(dev);
1490 return -ENOMEM;
1491 }
1309 1492
1310 for (i = 0; i < ipoib_recvq_size; ++i) { 1493 if (ipoib_cm_post_receive_srq(dev, i)) {
1311 if (!ipoib_cm_alloc_rx_skb(dev, i, IPOIB_CM_RX_SG - 1, 1494 ipoib_warn(priv, "ipoib_cm_post_receive_srq "
1312 priv->cm.srq_ring[i].mapping)) { 1495 "failed for buf %d\n", i);
1313 ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); 1496 ipoib_cm_dev_cleanup(dev);
1314 ipoib_cm_dev_cleanup(dev); 1497 return -EIO;
1315 return -ENOMEM; 1498 }
1316 }
1317 if (ipoib_cm_post_receive(dev, i)) {
1318 ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i);
1319 ipoib_cm_dev_cleanup(dev);
1320 return -EIO;
1321 } 1499 }
1322 } 1500 }
1323 1501
@@ -1328,7 +1506,7 @@ int ipoib_cm_dev_init(struct net_device *dev)
1328void ipoib_cm_dev_cleanup(struct net_device *dev) 1506void ipoib_cm_dev_cleanup(struct net_device *dev)
1329{ 1507{
1330 struct ipoib_dev_priv *priv = netdev_priv(dev); 1508 struct ipoib_dev_priv *priv = netdev_priv(dev);
1331 int i, ret; 1509 int ret;
1332 1510
1333 if (!priv->cm.srq) 1511 if (!priv->cm.srq)
1334 return; 1512 return;
@@ -1342,13 +1520,7 @@ void ipoib_cm_dev_cleanup(struct net_device *dev)
1342 priv->cm.srq = NULL; 1520 priv->cm.srq = NULL;
1343 if (!priv->cm.srq_ring) 1521 if (!priv->cm.srq_ring)
1344 return; 1522 return;
1345 for (i = 0; i < ipoib_recvq_size; ++i) 1523
1346 if (priv->cm.srq_ring[i].skb) { 1524 ipoib_cm_free_rx_ring(dev, priv->cm.srq_ring);
1347 ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
1348 priv->cm.srq_ring[i].mapping);
1349 dev_kfree_skb_any(priv->cm.srq_ring[i].skb);
1350 priv->cm.srq_ring[i].skb = NULL;
1351 }
1352 kfree(priv->cm.srq_ring);
1353 priv->cm.srq_ring = NULL; 1525 priv->cm.srq_ring = NULL;
1354} 1526}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index 44c174182a82..8b882bbd1d05 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -124,7 +124,7 @@ static int ipoib_mcg_seq_show(struct seq_file *file, void *iter_ptr)
124 return 0; 124 return 0;
125} 125}
126 126
127static struct seq_operations ipoib_mcg_seq_ops = { 127static const struct seq_operations ipoib_mcg_seq_ops = {
128 .start = ipoib_mcg_seq_start, 128 .start = ipoib_mcg_seq_start,
129 .next = ipoib_mcg_seq_next, 129 .next = ipoib_mcg_seq_next,
130 .stop = ipoib_mcg_seq_stop, 130 .stop = ipoib_mcg_seq_stop,
@@ -230,7 +230,7 @@ static int ipoib_path_seq_show(struct seq_file *file, void *iter_ptr)
230 return 0; 230 return 0;
231} 231}
232 232
233static struct seq_operations ipoib_path_seq_ops = { 233static const struct seq_operations ipoib_path_seq_ops = {
234 .start = ipoib_path_seq_start, 234 .start = ipoib_path_seq_start,
235 .next = ipoib_path_seq_next, 235 .next = ipoib_path_seq_next,
236 .stop = ipoib_path_seq_stop, 236 .stop = ipoib_path_seq_stop,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 5063dd509ad2..52bc2bd5799a 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -345,12 +345,12 @@ static inline int post_send(struct ipoib_dev_priv *priv,
345{ 345{
346 struct ib_send_wr *bad_wr; 346 struct ib_send_wr *bad_wr;
347 347
348 priv->tx_sge.addr = addr; 348 priv->tx_sge.addr = addr;
349 priv->tx_sge.length = len; 349 priv->tx_sge.length = len;
350 350
351 priv->tx_wr.wr_id = wr_id; 351 priv->tx_wr.wr_id = wr_id;
352 priv->tx_wr.wr.ud.remote_qpn = qpn; 352 priv->tx_wr.wr.ud.remote_qpn = qpn;
353 priv->tx_wr.wr.ud.ah = address; 353 priv->tx_wr.wr.ud.ah = address;
354 354
355 return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr); 355 return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
356} 356}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index c9f6077b615e..a082466f4a83 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -182,17 +182,20 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
182 struct ipoib_dev_priv *priv = netdev_priv(dev); 182 struct ipoib_dev_priv *priv = netdev_priv(dev);
183 183
184 /* dev->mtu > 2K ==> connected mode */ 184 /* dev->mtu > 2K ==> connected mode */
185 if (ipoib_cm_admin_enabled(dev) && new_mtu <= IPOIB_CM_MTU) { 185 if (ipoib_cm_admin_enabled(dev)) {
186 if (new_mtu > ipoib_cm_max_mtu(dev))
187 return -EINVAL;
188
186 if (new_mtu > priv->mcast_mtu) 189 if (new_mtu > priv->mcast_mtu)
187 ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n", 190 ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n",
188 priv->mcast_mtu); 191 priv->mcast_mtu);
192
189 dev->mtu = new_mtu; 193 dev->mtu = new_mtu;
190 return 0; 194 return 0;
191 } 195 }
192 196
193 if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) { 197 if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN)
194 return -EINVAL; 198 return -EINVAL;
195 }
196 199
197 priv->admin_mtu = new_mtu; 200 priv->admin_mtu = new_mtu;
198 201
@@ -474,8 +477,8 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
474 INIT_LIST_HEAD(&path->neigh_list); 477 INIT_LIST_HEAD(&path->neigh_list);
475 478
476 memcpy(path->pathrec.dgid.raw, gid, sizeof (union ib_gid)); 479 memcpy(path->pathrec.dgid.raw, gid, sizeof (union ib_gid));
477 path->pathrec.sgid = priv->local_gid; 480 path->pathrec.sgid = priv->local_gid;
478 path->pathrec.pkey = cpu_to_be16(priv->pkey); 481 path->pathrec.pkey = cpu_to_be16(priv->pkey);
479 path->pathrec.numb_path = 1; 482 path->pathrec.numb_path = 1;
480 path->pathrec.traffic_class = priv->broadcast->mcmember.traffic_class; 483 path->pathrec.traffic_class = priv->broadcast->mcmember.traffic_class;
481 484
@@ -669,16 +672,6 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
669 if (unlikely(!spin_trylock_irqsave(&priv->tx_lock, flags))) 672 if (unlikely(!spin_trylock_irqsave(&priv->tx_lock, flags)))
670 return NETDEV_TX_LOCKED; 673 return NETDEV_TX_LOCKED;
671 674
672 /*
673 * Check if our queue is stopped. Since we have the LLTX bit
674 * set, we can't rely on netif_stop_queue() preventing our
675 * xmit function from being called with a full queue.
676 */
677 if (unlikely(netif_queue_stopped(dev))) {
678 spin_unlock_irqrestore(&priv->tx_lock, flags);
679 return NETDEV_TX_BUSY;
680 }
681
682 if (likely(skb->dst && skb->dst->neighbour)) { 675 if (likely(skb->dst && skb->dst->neighbour)) {
683 if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) { 676 if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) {
684 ipoib_path_lookup(skb, dev); 677 ipoib_path_lookup(skb, dev);
@@ -950,34 +943,34 @@ static void ipoib_setup(struct net_device *dev)
950{ 943{
951 struct ipoib_dev_priv *priv = netdev_priv(dev); 944 struct ipoib_dev_priv *priv = netdev_priv(dev);
952 945
953 dev->open = ipoib_open; 946 dev->open = ipoib_open;
954 dev->stop = ipoib_stop; 947 dev->stop = ipoib_stop;
955 dev->change_mtu = ipoib_change_mtu; 948 dev->change_mtu = ipoib_change_mtu;
956 dev->hard_start_xmit = ipoib_start_xmit; 949 dev->hard_start_xmit = ipoib_start_xmit;
957 dev->tx_timeout = ipoib_timeout; 950 dev->tx_timeout = ipoib_timeout;
958 dev->header_ops = &ipoib_header_ops; 951 dev->header_ops = &ipoib_header_ops;
959 dev->set_multicast_list = ipoib_set_mcast_list; 952 dev->set_multicast_list = ipoib_set_mcast_list;
960 dev->neigh_setup = ipoib_neigh_setup_dev; 953 dev->neigh_setup = ipoib_neigh_setup_dev;
961 954
962 netif_napi_add(dev, &priv->napi, ipoib_poll, 100); 955 netif_napi_add(dev, &priv->napi, ipoib_poll, 100);
963 956
964 dev->watchdog_timeo = HZ; 957 dev->watchdog_timeo = HZ;
965 958
966 dev->flags |= IFF_BROADCAST | IFF_MULTICAST; 959 dev->flags |= IFF_BROADCAST | IFF_MULTICAST;
967 960
968 /* 961 /*
969 * We add in INFINIBAND_ALEN to allow for the destination 962 * We add in INFINIBAND_ALEN to allow for the destination
970 * address "pseudoheader" for skbs without neighbour struct. 963 * address "pseudoheader" for skbs without neighbour struct.
971 */ 964 */
972 dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN; 965 dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN;
973 dev->addr_len = INFINIBAND_ALEN; 966 dev->addr_len = INFINIBAND_ALEN;
974 dev->type = ARPHRD_INFINIBAND; 967 dev->type = ARPHRD_INFINIBAND;
975 dev->tx_queue_len = ipoib_sendq_size * 2; 968 dev->tx_queue_len = ipoib_sendq_size * 2;
976 dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX; 969 dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX;
977 970
978 /* MTU will be reset when mcast join happens */ 971 /* MTU will be reset when mcast join happens */
979 dev->mtu = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN; 972 dev->mtu = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN;
980 priv->mcast_mtu = priv->admin_mtu = dev->mtu; 973 priv->mcast_mtu = priv->admin_mtu = dev->mtu;
981 974
982 memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); 975 memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
983 976
@@ -1268,6 +1261,9 @@ static int __init ipoib_init_module(void)
1268 ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size); 1261 ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size);
1269 ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE); 1262 ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE);
1270 ipoib_sendq_size = max(ipoib_sendq_size, IPOIB_MIN_QUEUE_SIZE); 1263 ipoib_sendq_size = max(ipoib_sendq_size, IPOIB_MIN_QUEUE_SIZE);
1264#ifdef CONFIG_INFINIBAND_IPOIB_CM
1265 ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP);
1266#endif
1271 1267
1272 ret = ipoib_register_debugfs(); 1268 ret = ipoib_register_debugfs();
1273 if (ret) 1269 if (ret)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 9bcfc7ad6aa6..2628339e3a99 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -702,7 +702,7 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
702 702
703out: 703out:
704 if (mcast && mcast->ah) { 704 if (mcast && mcast->ah) {
705 if (skb->dst && 705 if (skb->dst &&
706 skb->dst->neighbour && 706 skb->dst->neighbour &&
707 !*to_ipoib_neigh(skb->dst->neighbour)) { 707 !*to_ipoib_neigh(skb->dst->neighbour)) {
708 struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour, 708 struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour,
@@ -710,7 +710,7 @@ out:
710 710
711 if (neigh) { 711 if (neigh) {
712 kref_get(&mcast->ah->ref); 712 kref_get(&mcast->ah->ref);
713 neigh->ah = mcast->ah; 713 neigh->ah = mcast->ah;
714 list_add_tail(&neigh->list, &mcast->neigh_list); 714 list_add_tail(&neigh->list, &mcast->neigh_list);
715 } 715 }
716 } 716 }
@@ -788,10 +788,6 @@ void ipoib_mcast_restart_task(struct work_struct *work)
788 788
789 memcpy(mgid.raw, mclist->dmi_addr + 4, sizeof mgid); 789 memcpy(mgid.raw, mclist->dmi_addr + 4, sizeof mgid);
790 790
791 /* Add in the P_Key */
792 mgid.raw[4] = (priv->pkey >> 8) & 0xff;
793 mgid.raw[5] = priv->pkey & 0xff;
794
795 mcast = __ipoib_mcast_find(dev, &mgid); 791 mcast = __ipoib_mcast_find(dev, &mgid);
796 if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 792 if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
797 struct ipoib_mcast *nmcast; 793 struct ipoib_mcast *nmcast;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 3c6e45db0ab5..433e99ac227b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -172,8 +172,12 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
172 172
173 size = ipoib_sendq_size + ipoib_recvq_size + 1; 173 size = ipoib_sendq_size + ipoib_recvq_size + 1;
174 ret = ipoib_cm_dev_init(dev); 174 ret = ipoib_cm_dev_init(dev);
175 if (!ret) 175 if (!ret) {
176 size += ipoib_recvq_size + 1 /* 1 extra for rx_drain_qp */; 176 if (ipoib_cm_has_srq(dev))
177 size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */
178 else
179 size += ipoib_recvq_size * ipoib_max_conn_qp;
180 }
177 181
178 priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0); 182 priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
179 if (IS_ERR(priv->cq)) { 183 if (IS_ERR(priv->cq)) {
@@ -197,12 +201,12 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
197 priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff; 201 priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff;
198 priv->dev->dev_addr[3] = (priv->qp->qp_num ) & 0xff; 202 priv->dev->dev_addr[3] = (priv->qp->qp_num ) & 0xff;
199 203
200 priv->tx_sge.lkey = priv->mr->lkey; 204 priv->tx_sge.lkey = priv->mr->lkey;
201 205
202 priv->tx_wr.opcode = IB_WR_SEND; 206 priv->tx_wr.opcode = IB_WR_SEND;
203 priv->tx_wr.sg_list = &priv->tx_sge; 207 priv->tx_wr.sg_list = &priv->tx_sge;
204 priv->tx_wr.num_sge = 1; 208 priv->tx_wr.num_sge = 1;
205 priv->tx_wr.send_flags = IB_SEND_SIGNALED; 209 priv->tx_wr.send_flags = IB_SEND_SIGNALED;
206 210
207 return 0; 211 return 0;
208 212
diff --git a/drivers/infiniband/ulp/iser/Kconfig b/drivers/infiniband/ulp/iser/Kconfig
index fe604c8d2996..77dedba829e6 100644
--- a/drivers/infiniband/ulp/iser/Kconfig
+++ b/drivers/infiniband/ulp/iser/Kconfig
@@ -8,5 +8,5 @@ config INFINIBAND_ISER
8 that speak iSCSI over iSER over InfiniBand. 8 that speak iSCSI over iSER over InfiniBand.
9 9
10 The iSER protocol is defined by IETF. 10 The iSER protocol is defined by IETF.
11 See <http://www.ietf.org/internet-drafts/draft-ietf-ips-iser-05.txt> 11 See <http://www.ietf.org/rfc/rfc5046.txt>
12 and <http://www.infinibandta.org/members/spec/iser_annex_060418.pdf> 12 and <http://www.infinibandta.org/members/spec/Annex_iSER.PDF>
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index bad8dacafd10..dfa5a4544187 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -551,6 +551,7 @@ static struct scsi_host_template iscsi_iser_sht = {
551 .module = THIS_MODULE, 551 .module = THIS_MODULE,
552 .name = "iSCSI Initiator over iSER, v." DRV_VER, 552 .name = "iSCSI Initiator over iSER, v." DRV_VER,
553 .queuecommand = iscsi_queuecommand, 553 .queuecommand = iscsi_queuecommand,
554 .change_queue_depth = iscsi_change_queue_depth,
554 .can_queue = ISCSI_DEF_XMIT_CMDS_MAX - 1, 555 .can_queue = ISCSI_DEF_XMIT_CMDS_MAX - 1,
555 .sg_tablesize = ISCSI_ISER_SG_TABLESIZE, 556 .sg_tablesize = ISCSI_ISER_SG_TABLESIZE,
556 .max_sectors = 1024, 557 .max_sectors = 1024,
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index a6f2303ed14a..ba1b455949c0 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -561,7 +561,7 @@ void iser_rcv_completion(struct iser_desc *rx_desc,
561 if (opcode == ISCSI_OP_SCSI_CMD_RSP) { 561 if (opcode == ISCSI_OP_SCSI_CMD_RSP) {
562 itt = get_itt(hdr->itt); /* mask out cid and age bits */ 562 itt = get_itt(hdr->itt); /* mask out cid and age bits */
563 if (!(itt < session->cmds_max)) 563 if (!(itt < session->cmds_max))
564 iser_err("itt can't be matched to task!!!" 564 iser_err("itt can't be matched to task!!! "
565 "conn %p opcode %d cmds_max %d itt %d\n", 565 "conn %p opcode %d cmds_max %d itt %d\n",
566 conn->iscsi_conn,opcode,session->cmds_max,itt); 566 conn->iscsi_conn,opcode,session->cmds_max,itt);
567 /* use the mapping given with the cmds array indexed by itt */ 567 /* use the mapping given with the cmds array indexed by itt */
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 654a4dce0236..714b8db02b29 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -105,7 +105,7 @@ pd_err:
105} 105}
106 106
107/** 107/**
108 * iser_free_device_ib_res - destory/dealloc/dereg the DMA MR, 108 * iser_free_device_ib_res - destroy/dealloc/dereg the DMA MR,
109 * CQ and PD created with the device associated with the adapator. 109 * CQ and PD created with the device associated with the adapator.
110 */ 110 */
111static void iser_free_device_ib_res(struct iser_device *device) 111static void iser_free_device_ib_res(struct iser_device *device)
@@ -475,13 +475,11 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
475 iser_disconnected_handler(cma_id); 475 iser_disconnected_handler(cma_id);
476 break; 476 break;
477 case RDMA_CM_EVENT_DEVICE_REMOVAL: 477 case RDMA_CM_EVENT_DEVICE_REMOVAL:
478 iser_err("Device removal is currently unsupported\n");
478 BUG(); 479 BUG();
479 break; 480 break;
480 case RDMA_CM_EVENT_CONNECT_RESPONSE:
481 BUG();
482 break;
483 case RDMA_CM_EVENT_CONNECT_REQUEST:
484 default: 481 default:
482 iser_err("Unexpected RDMA CM event (%d)\n", event->event);
485 break; 483 break;
486 } 484 }
487 return ret; 485 return ret;
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index bdb6f8517401..f2d2c7e2c76b 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -272,7 +272,8 @@ static void srp_path_rec_completion(int status,
272 272
273 target->status = status; 273 target->status = status;
274 if (status) 274 if (status)
275 printk(KERN_ERR PFX "Got failed path rec status %d\n", status); 275 shost_printk(KERN_ERR, target->scsi_host,
276 PFX "Got failed path rec status %d\n", status);
276 else 277 else
277 target->path = *pathrec; 278 target->path = *pathrec;
278 complete(&target->done); 279 complete(&target->done);
@@ -303,7 +304,8 @@ static int srp_lookup_path(struct srp_target_port *target)
303 wait_for_completion(&target->done); 304 wait_for_completion(&target->done);
304 305
305 if (target->status < 0) 306 if (target->status < 0)
306 printk(KERN_WARNING PFX "Path record query failed\n"); 307 shost_printk(KERN_WARNING, target->scsi_host,
308 PFX "Path record query failed\n");
307 309
308 return target->status; 310 return target->status;
309} 311}
@@ -379,9 +381,10 @@ static int srp_send_req(struct srp_target_port *target)
379 * the second 8 bytes to the local node GUID. 381 * the second 8 bytes to the local node GUID.
380 */ 382 */
381 if (srp_target_is_topspin(target)) { 383 if (srp_target_is_topspin(target)) {
382 printk(KERN_DEBUG PFX "Topspin/Cisco initiator port ID workaround " 384 shost_printk(KERN_DEBUG, target->scsi_host,
383 "activated for target GUID %016llx\n", 385 PFX "Topspin/Cisco initiator port ID workaround "
384 (unsigned long long) be64_to_cpu(target->ioc_guid)); 386 "activated for target GUID %016llx\n",
387 (unsigned long long) be64_to_cpu(target->ioc_guid));
385 memset(req->priv.initiator_port_id, 0, 8); 388 memset(req->priv.initiator_port_id, 0, 8);
386 memcpy(req->priv.initiator_port_id + 8, 389 memcpy(req->priv.initiator_port_id + 8,
387 &target->srp_host->dev->dev->node_guid, 8); 390 &target->srp_host->dev->dev->node_guid, 8);
@@ -400,7 +403,8 @@ static void srp_disconnect_target(struct srp_target_port *target)
400 403
401 init_completion(&target->done); 404 init_completion(&target->done);
402 if (ib_send_cm_dreq(target->cm_id, NULL, 0)) { 405 if (ib_send_cm_dreq(target->cm_id, NULL, 0)) {
403 printk(KERN_DEBUG PFX "Sending CM DREQ failed\n"); 406 shost_printk(KERN_DEBUG, target->scsi_host,
407 PFX "Sending CM DREQ failed\n");
404 return; 408 return;
405 } 409 }
406 wait_for_completion(&target->done); 410 wait_for_completion(&target->done);
@@ -568,7 +572,8 @@ static int srp_reconnect_target(struct srp_target_port *target)
568 return ret; 572 return ret;
569 573
570err: 574err:
571 printk(KERN_ERR PFX "reconnect failed (%d), removing target port.\n", ret); 575 shost_printk(KERN_ERR, target->scsi_host,
576 PFX "reconnect failed (%d), removing target port.\n", ret);
572 577
573 /* 578 /*
574 * We couldn't reconnect, so kill our target port off. 579 * We couldn't reconnect, so kill our target port off.
@@ -683,8 +688,9 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
683 688
684 if (scmnd->sc_data_direction != DMA_FROM_DEVICE && 689 if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
685 scmnd->sc_data_direction != DMA_TO_DEVICE) { 690 scmnd->sc_data_direction != DMA_TO_DEVICE) {
686 printk(KERN_WARNING PFX "Unhandled data direction %d\n", 691 shost_printk(KERN_WARNING, target->scsi_host,
687 scmnd->sc_data_direction); 692 PFX "Unhandled data direction %d\n",
693 scmnd->sc_data_direction);
688 return -EINVAL; 694 return -EINVAL;
689 } 695 }
690 696
@@ -786,8 +792,9 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
786 } else { 792 } else {
787 scmnd = req->scmnd; 793 scmnd = req->scmnd;
788 if (!scmnd) 794 if (!scmnd)
789 printk(KERN_ERR "Null scmnd for RSP w/tag %016llx\n", 795 shost_printk(KERN_ERR, target->scsi_host,
790 (unsigned long long) rsp->tag); 796 "Null scmnd for RSP w/tag %016llx\n",
797 (unsigned long long) rsp->tag);
791 scmnd->result = rsp->status; 798 scmnd->result = rsp->status;
792 799
793 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) { 800 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
@@ -831,7 +838,8 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
831 if (0) { 838 if (0) {
832 int i; 839 int i;
833 840
834 printk(KERN_ERR PFX "recv completion, opcode 0x%02x\n", opcode); 841 shost_printk(KERN_ERR, target->scsi_host,
842 PFX "recv completion, opcode 0x%02x\n", opcode);
835 843
836 for (i = 0; i < wc->byte_len; ++i) { 844 for (i = 0; i < wc->byte_len; ++i) {
837 if (i % 8 == 0) 845 if (i % 8 == 0)
@@ -852,11 +860,13 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
852 860
853 case SRP_T_LOGOUT: 861 case SRP_T_LOGOUT:
854 /* XXX Handle target logout */ 862 /* XXX Handle target logout */
855 printk(KERN_WARNING PFX "Got target logout request\n"); 863 shost_printk(KERN_WARNING, target->scsi_host,
864 PFX "Got target logout request\n");
856 break; 865 break;
857 866
858 default: 867 default:
859 printk(KERN_WARNING PFX "Unhandled SRP opcode 0x%02x\n", opcode); 868 shost_printk(KERN_WARNING, target->scsi_host,
869 PFX "Unhandled SRP opcode 0x%02x\n", opcode);
860 break; 870 break;
861 } 871 }
862 872
@@ -872,9 +882,10 @@ static void srp_completion(struct ib_cq *cq, void *target_ptr)
872 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 882 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
873 while (ib_poll_cq(cq, 1, &wc) > 0) { 883 while (ib_poll_cq(cq, 1, &wc) > 0) {
874 if (wc.status) { 884 if (wc.status) {
875 printk(KERN_ERR PFX "failed %s status %d\n", 885 shost_printk(KERN_ERR, target->scsi_host,
876 wc.wr_id & SRP_OP_RECV ? "receive" : "send", 886 PFX "failed %s status %d\n",
877 wc.status); 887 wc.wr_id & SRP_OP_RECV ? "receive" : "send",
888 wc.status);
878 target->qp_in_error = 1; 889 target->qp_in_error = 1;
879 break; 890 break;
880 } 891 }
@@ -930,13 +941,18 @@ static int srp_post_recv(struct srp_target_port *target)
930 * req_lim and tx_head. Lock cannot be dropped between call here and 941 * req_lim and tx_head. Lock cannot be dropped between call here and
931 * call to __srp_post_send(). 942 * call to __srp_post_send().
932 */ 943 */
933static struct srp_iu *__srp_get_tx_iu(struct srp_target_port *target) 944static struct srp_iu *__srp_get_tx_iu(struct srp_target_port *target,
945 enum srp_request_type req_type)
934{ 946{
947 s32 min = (req_type == SRP_REQ_TASK_MGMT) ? 1 : 2;
948
935 if (target->tx_head - target->tx_tail >= SRP_SQ_SIZE) 949 if (target->tx_head - target->tx_tail >= SRP_SQ_SIZE)
936 return NULL; 950 return NULL;
937 951
938 if (unlikely(target->req_lim < 1)) 952 if (target->req_lim < min) {
939 ++target->zero_req_lim; 953 ++target->zero_req_lim;
954 return NULL;
955 }
940 956
941 return target->tx_ring[target->tx_head & SRP_SQ_SIZE]; 957 return target->tx_ring[target->tx_head & SRP_SQ_SIZE];
942} 958}
@@ -993,7 +1009,7 @@ static int srp_queuecommand(struct scsi_cmnd *scmnd,
993 return 0; 1009 return 0;
994 } 1010 }
995 1011
996 iu = __srp_get_tx_iu(target); 1012 iu = __srp_get_tx_iu(target, SRP_REQ_NORMAL);
997 if (!iu) 1013 if (!iu)
998 goto err; 1014 goto err;
999 1015
@@ -1022,12 +1038,13 @@ static int srp_queuecommand(struct scsi_cmnd *scmnd,
1022 1038
1023 len = srp_map_data(scmnd, target, req); 1039 len = srp_map_data(scmnd, target, req);
1024 if (len < 0) { 1040 if (len < 0) {
1025 printk(KERN_ERR PFX "Failed to map data\n"); 1041 shost_printk(KERN_ERR, target->scsi_host,
1042 PFX "Failed to map data\n");
1026 goto err; 1043 goto err;
1027 } 1044 }
1028 1045
1029 if (__srp_post_recv(target)) { 1046 if (__srp_post_recv(target)) {
1030 printk(KERN_ERR PFX "Recv failed\n"); 1047 shost_printk(KERN_ERR, target->scsi_host, PFX "Recv failed\n");
1031 goto err_unmap; 1048 goto err_unmap;
1032 } 1049 }
1033 1050
@@ -1035,7 +1052,7 @@ static int srp_queuecommand(struct scsi_cmnd *scmnd,
1035 DMA_TO_DEVICE); 1052 DMA_TO_DEVICE);
1036 1053
1037 if (__srp_post_send(target, iu, len)) { 1054 if (__srp_post_send(target, iu, len)) {
1038 printk(KERN_ERR PFX "Send failed\n"); 1055 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
1039 goto err_unmap; 1056 goto err_unmap;
1040 } 1057 }
1041 1058
@@ -1090,6 +1107,7 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
1090 struct ib_cm_event *event, 1107 struct ib_cm_event *event,
1091 struct srp_target_port *target) 1108 struct srp_target_port *target)
1092{ 1109{
1110 struct Scsi_Host *shost = target->scsi_host;
1093 struct ib_class_port_info *cpi; 1111 struct ib_class_port_info *cpi;
1094 int opcode; 1112 int opcode;
1095 1113
@@ -1115,19 +1133,22 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
1115 memcpy(target->path.dgid.raw, 1133 memcpy(target->path.dgid.raw,
1116 event->param.rej_rcvd.ari, 16); 1134 event->param.rej_rcvd.ari, 16);
1117 1135
1118 printk(KERN_DEBUG PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n", 1136 shost_printk(KERN_DEBUG, shost,
1119 (unsigned long long) be64_to_cpu(target->path.dgid.global.subnet_prefix), 1137 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
1120 (unsigned long long) be64_to_cpu(target->path.dgid.global.interface_id)); 1138 (unsigned long long) be64_to_cpu(target->path.dgid.global.subnet_prefix),
1139 (unsigned long long) be64_to_cpu(target->path.dgid.global.interface_id));
1121 1140
1122 target->status = SRP_PORT_REDIRECT; 1141 target->status = SRP_PORT_REDIRECT;
1123 } else { 1142 } else {
1124 printk(KERN_WARNING " REJ reason: IB_CM_REJ_PORT_REDIRECT\n"); 1143 shost_printk(KERN_WARNING, shost,
1144 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
1125 target->status = -ECONNRESET; 1145 target->status = -ECONNRESET;
1126 } 1146 }
1127 break; 1147 break;
1128 1148
1129 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID: 1149 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
1130 printk(KERN_WARNING " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n"); 1150 shost_printk(KERN_WARNING, shost,
1151 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
1131 target->status = -ECONNRESET; 1152 target->status = -ECONNRESET;
1132 break; 1153 break;
1133 1154
@@ -1138,20 +1159,21 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
1138 u32 reason = be32_to_cpu(rej->reason); 1159 u32 reason = be32_to_cpu(rej->reason);
1139 1160
1140 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE) 1161 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
1141 printk(KERN_WARNING PFX 1162 shost_printk(KERN_WARNING, shost,
1142 "SRP_LOGIN_REJ: requested max_it_iu_len too large\n"); 1163 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
1143 else 1164 else
1144 printk(KERN_WARNING PFX 1165 shost_printk(KERN_WARNING, shost,
1145 "SRP LOGIN REJECTED, reason 0x%08x\n", reason); 1166 PFX "SRP LOGIN REJECTED, reason 0x%08x\n", reason);
1146 } else 1167 } else
1147 printk(KERN_WARNING " REJ reason: IB_CM_REJ_CONSUMER_DEFINED," 1168 shost_printk(KERN_WARNING, shost,
1148 " opcode 0x%02x\n", opcode); 1169 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
1170 " opcode 0x%02x\n", opcode);
1149 target->status = -ECONNRESET; 1171 target->status = -ECONNRESET;
1150 break; 1172 break;
1151 1173
1152 default: 1174 default:
1153 printk(KERN_WARNING " REJ reason 0x%x\n", 1175 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
1154 event->param.rej_rcvd.reason); 1176 event->param.rej_rcvd.reason);
1155 target->status = -ECONNRESET; 1177 target->status = -ECONNRESET;
1156 } 1178 }
1157} 1179}
@@ -1166,7 +1188,8 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1166 1188
1167 switch (event->event) { 1189 switch (event->event) {
1168 case IB_CM_REQ_ERROR: 1190 case IB_CM_REQ_ERROR:
1169 printk(KERN_DEBUG PFX "Sending CM REQ failed\n"); 1191 shost_printk(KERN_DEBUG, target->scsi_host,
1192 PFX "Sending CM REQ failed\n");
1170 comp = 1; 1193 comp = 1;
1171 target->status = -ECONNRESET; 1194 target->status = -ECONNRESET;
1172 break; 1195 break;
@@ -1184,7 +1207,8 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1184 target->scsi_host->can_queue = min(target->req_lim, 1207 target->scsi_host->can_queue = min(target->req_lim,
1185 target->scsi_host->can_queue); 1208 target->scsi_host->can_queue);
1186 } else { 1209 } else {
1187 printk(KERN_WARNING PFX "Unhandled RSP opcode %#x\n", opcode); 1210 shost_printk(KERN_WARNING, target->scsi_host,
1211 PFX "Unhandled RSP opcode %#x\n", opcode);
1188 target->status = -ECONNRESET; 1212 target->status = -ECONNRESET;
1189 break; 1213 break;
1190 } 1214 }
@@ -1230,20 +1254,23 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1230 break; 1254 break;
1231 1255
1232 case IB_CM_REJ_RECEIVED: 1256 case IB_CM_REJ_RECEIVED:
1233 printk(KERN_DEBUG PFX "REJ received\n"); 1257 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
1234 comp = 1; 1258 comp = 1;
1235 1259
1236 srp_cm_rej_handler(cm_id, event, target); 1260 srp_cm_rej_handler(cm_id, event, target);
1237 break; 1261 break;
1238 1262
1239 case IB_CM_DREQ_RECEIVED: 1263 case IB_CM_DREQ_RECEIVED:
1240 printk(KERN_WARNING PFX "DREQ received - connection closed\n"); 1264 shost_printk(KERN_WARNING, target->scsi_host,
1265 PFX "DREQ received - connection closed\n");
1241 if (ib_send_cm_drep(cm_id, NULL, 0)) 1266 if (ib_send_cm_drep(cm_id, NULL, 0))
1242 printk(KERN_ERR PFX "Sending CM DREP failed\n"); 1267 shost_printk(KERN_ERR, target->scsi_host,
1268 PFX "Sending CM DREP failed\n");
1243 break; 1269 break;
1244 1270
1245 case IB_CM_TIMEWAIT_EXIT: 1271 case IB_CM_TIMEWAIT_EXIT:
1246 printk(KERN_ERR PFX "connection closed\n"); 1272 shost_printk(KERN_ERR, target->scsi_host,
1273 PFX "connection closed\n");
1247 1274
1248 comp = 1; 1275 comp = 1;
1249 target->status = 0; 1276 target->status = 0;
@@ -1255,7 +1282,8 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1255 break; 1282 break;
1256 1283
1257 default: 1284 default:
1258 printk(KERN_WARNING PFX "Unhandled CM event %d\n", event->event); 1285 shost_printk(KERN_WARNING, target->scsi_host,
1286 PFX "Unhandled CM event %d\n", event->event);
1259 break; 1287 break;
1260 } 1288 }
1261 1289
@@ -1283,7 +1311,7 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target,
1283 1311
1284 init_completion(&req->done); 1312 init_completion(&req->done);
1285 1313
1286 iu = __srp_get_tx_iu(target); 1314 iu = __srp_get_tx_iu(target, SRP_REQ_TASK_MGMT);
1287 if (!iu) 1315 if (!iu)
1288 goto out; 1316 goto out;
1289 1317
@@ -1332,7 +1360,7 @@ static int srp_abort(struct scsi_cmnd *scmnd)
1332 struct srp_request *req; 1360 struct srp_request *req;
1333 int ret = SUCCESS; 1361 int ret = SUCCESS;
1334 1362
1335 printk(KERN_ERR "SRP abort called\n"); 1363 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
1336 1364
1337 if (target->qp_in_error) 1365 if (target->qp_in_error)
1338 return FAILED; 1366 return FAILED;
@@ -1362,7 +1390,7 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
1362 struct srp_target_port *target = host_to_target(scmnd->device->host); 1390 struct srp_target_port *target = host_to_target(scmnd->device->host);
1363 struct srp_request *req, *tmp; 1391 struct srp_request *req, *tmp;
1364 1392
1365 printk(KERN_ERR "SRP reset_device called\n"); 1393 shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
1366 1394
1367 if (target->qp_in_error) 1395 if (target->qp_in_error)
1368 return FAILED; 1396 return FAILED;
@@ -1389,7 +1417,7 @@ static int srp_reset_host(struct scsi_cmnd *scmnd)
1389 struct srp_target_port *target = host_to_target(scmnd->device->host); 1417 struct srp_target_port *target = host_to_target(scmnd->device->host);
1390 int ret = FAILED; 1418 int ret = FAILED;
1391 1419
1392 printk(KERN_ERR PFX "SRP reset_host called\n"); 1420 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
1393 1421
1394 if (!srp_reconnect_target(target)) 1422 if (!srp_reconnect_target(target))
1395 ret = SUCCESS; 1423 ret = SUCCESS;
@@ -1543,6 +1571,7 @@ static struct scsi_host_template srp_template = {
1543 .this_id = -1, 1571 .this_id = -1,
1544 .cmd_per_lun = SRP_SQ_SIZE, 1572 .cmd_per_lun = SRP_SQ_SIZE,
1545 .use_clustering = ENABLE_CLUSTERING, 1573 .use_clustering = ENABLE_CLUSTERING,
1574 .use_sg_chaining = ENABLE_SG_CHAINING,
1546 .shost_attrs = srp_host_attrs 1575 .shost_attrs = srp_host_attrs
1547}; 1576};
1548 1577
@@ -1814,8 +1843,9 @@ static ssize_t srp_create_target(struct class_device *class_dev,
1814 1843
1815 ib_get_cached_gid(host->dev->dev, host->port, 0, &target->path.sgid); 1844 ib_get_cached_gid(host->dev->dev, host->port, 0, &target->path.sgid);
1816 1845
1817 printk(KERN_DEBUG PFX "new target: id_ext %016llx ioc_guid %016llx pkey %04x " 1846 shost_printk(KERN_DEBUG, target->scsi_host, PFX
1818 "service_id %016llx dgid %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", 1847 "new target: id_ext %016llx ioc_guid %016llx pkey %04x "
1848 "service_id %016llx dgid %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
1819 (unsigned long long) be64_to_cpu(target->id_ext), 1849 (unsigned long long) be64_to_cpu(target->id_ext),
1820 (unsigned long long) be64_to_cpu(target->ioc_guid), 1850 (unsigned long long) be64_to_cpu(target->ioc_guid),
1821 be16_to_cpu(target->path.pkey), 1851 be16_to_cpu(target->path.pkey),
@@ -1842,7 +1872,8 @@ static ssize_t srp_create_target(struct class_device *class_dev,
1842 target->qp_in_error = 0; 1872 target->qp_in_error = 0;
1843 ret = srp_connect_target(target); 1873 ret = srp_connect_target(target);
1844 if (ret) { 1874 if (ret) {
1845 printk(KERN_ERR PFX "Connection failed\n"); 1875 shost_printk(KERN_ERR, target->scsi_host,
1876 PFX "Connection failed\n");
1846 goto err_cm_id; 1877 goto err_cm_id;
1847 } 1878 }
1848 1879
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index e3573e7038c4..4a3c1f37e4c2 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -79,6 +79,11 @@ enum srp_target_state {
79 SRP_TARGET_REMOVED 79 SRP_TARGET_REMOVED
80}; 80};
81 81
82enum srp_request_type {
83 SRP_REQ_NORMAL,
84 SRP_REQ_TASK_MGMT,
85};
86
82struct srp_device { 87struct srp_device {
83 struct list_head dev_list; 88 struct list_head dev_list;
84 struct ib_device *dev; 89 struct ib_device *dev;
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index 50648738d679..535a4461d88c 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -202,7 +202,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
202 MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_EQ_OFFSET); 202 MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_EQ_OFFSET);
203 dev_cap->reserved_eqs = 1 << (field & 0xf); 203 dev_cap->reserved_eqs = 1 << (field & 0xf);
204 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_EQ_OFFSET); 204 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_EQ_OFFSET);
205 dev_cap->max_eqs = 1 << (field & 0x7); 205 dev_cap->max_eqs = 1 << (field & 0xf);
206 MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET); 206 MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET);
207 dev_cap->reserved_mtts = 1 << (field >> 4); 207 dev_cap->reserved_mtts = 1 << (field >> 4);
208 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MRW_SZ_OFFSET); 208 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MRW_SZ_OFFSET);
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 448eccb20638..b24508abb850 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -269,18 +269,21 @@ static inline void ipv6_arcnet_mc_map(const struct in6_addr *addr, char *buf)
269 buf[0] = 0x00; 269 buf[0] = 0x00;
270} 270}
271 271
272static inline void ipv6_ib_mc_map(struct in6_addr *addr, char *buf) 272static inline void ipv6_ib_mc_map(const struct in6_addr *addr,
273 const unsigned char *broadcast, char *buf)
273{ 274{
275 unsigned char scope = broadcast[5] & 0xF;
276
274 buf[0] = 0; /* Reserved */ 277 buf[0] = 0; /* Reserved */
275 buf[1] = 0xff; /* Multicast QPN */ 278 buf[1] = 0xff; /* Multicast QPN */
276 buf[2] = 0xff; 279 buf[2] = 0xff;
277 buf[3] = 0xff; 280 buf[3] = 0xff;
278 buf[4] = 0xff; 281 buf[4] = 0xff;
279 buf[5] = 0x12; /* link local scope */ 282 buf[5] = 0x10 | scope; /* scope from broadcast address */
280 buf[6] = 0x60; /* IPv6 signature */ 283 buf[6] = 0x60; /* IPv6 signature */
281 buf[7] = 0x1b; 284 buf[7] = 0x1b;
282 buf[8] = 0; /* P_Key */ 285 buf[8] = broadcast[8]; /* P_Key */
283 buf[9] = 0; 286 buf[9] = broadcast[9];
284 memcpy(buf + 10, addr->s6_addr + 6, 10); 287 memcpy(buf + 10, addr->s6_addr + 6, 10);
285} 288}
286#endif 289#endif
diff --git a/include/net/ip.h b/include/net/ip.h
index 840dd91b513b..50c8889b1b8d 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -266,20 +266,22 @@ static inline void ip_eth_mc_map(__be32 naddr, char *buf)
266 * Leave P_Key as 0 to be filled in by driver. 266 * Leave P_Key as 0 to be filled in by driver.
267 */ 267 */
268 268
269static inline void ip_ib_mc_map(__be32 naddr, char *buf) 269static inline void ip_ib_mc_map(__be32 naddr, const unsigned char *broadcast, char *buf)
270{ 270{
271 __u32 addr; 271 __u32 addr;
272 unsigned char scope = broadcast[5] & 0xF;
273
272 buf[0] = 0; /* Reserved */ 274 buf[0] = 0; /* Reserved */
273 buf[1] = 0xff; /* Multicast QPN */ 275 buf[1] = 0xff; /* Multicast QPN */
274 buf[2] = 0xff; 276 buf[2] = 0xff;
275 buf[3] = 0xff; 277 buf[3] = 0xff;
276 addr = ntohl(naddr); 278 addr = ntohl(naddr);
277 buf[4] = 0xff; 279 buf[4] = 0xff;
278 buf[5] = 0x12; /* link local scope */ 280 buf[5] = 0x10 | scope; /* scope from broadcast address */
279 buf[6] = 0x40; /* IPv4 signature */ 281 buf[6] = 0x40; /* IPv4 signature */
280 buf[7] = 0x1b; 282 buf[7] = 0x1b;
281 buf[8] = 0; /* P_Key */ 283 buf[8] = broadcast[8]; /* P_Key */
282 buf[9] = 0; 284 buf[9] = broadcast[9];
283 buf[10] = 0; 285 buf[10] = 0;
284 buf[11] = 0; 286 buf[11] = 0;
285 buf[12] = 0; 287 buf[12] = 0;
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 8ec3799e42e1..7228c056b9e9 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -230,7 +230,9 @@ struct ib_class_port_info
230 * @seg_count: The number of RMPP segments allocated for this send. 230 * @seg_count: The number of RMPP segments allocated for this send.
231 * @seg_size: Size of each RMPP segment. 231 * @seg_size: Size of each RMPP segment.
232 * @timeout_ms: Time to wait for a response. 232 * @timeout_ms: Time to wait for a response.
233 * @retries: Number of times to retry a request for a response. 233 * @retries: Number of times to retry a request for a response. For MADs
234 * using RMPP, this applies per window. On completion, returns the number
235 * of retries needed to complete the transfer.
234 * 236 *
235 * Users are responsible for initializing the MAD buffer itself, with the 237 * Users are responsible for initializing the MAD buffer itself, with the
236 * exception of any RMPP header. Additional segment buffer space allocated 238 * exception of any RMPP header. Additional segment buffer space allocated
diff --git a/include/rdma/rdma_user_cm.h b/include/rdma/rdma_user_cm.h
index 9749c1b34d00..c55705460b87 100644
--- a/include/rdma/rdma_user_cm.h
+++ b/include/rdma/rdma_user_cm.h
@@ -60,7 +60,8 @@ enum {
60 RDMA_USER_CM_CMD_SET_OPTION, 60 RDMA_USER_CM_CMD_SET_OPTION,
61 RDMA_USER_CM_CMD_NOTIFY, 61 RDMA_USER_CM_CMD_NOTIFY,
62 RDMA_USER_CM_CMD_JOIN_MCAST, 62 RDMA_USER_CM_CMD_JOIN_MCAST,
63 RDMA_USER_CM_CMD_LEAVE_MCAST 63 RDMA_USER_CM_CMD_LEAVE_MCAST,
64 RDMA_USER_CM_CMD_MIGRATE_ID
64}; 65};
65 66
66/* 67/*
@@ -230,4 +231,14 @@ struct rdma_ucm_set_option {
230 __u32 optlen; 231 __u32 optlen;
231}; 232};
232 233
234struct rdma_ucm_migrate_id {
235 __u64 response;
236 __u32 id;
237 __u32 fd;
238};
239
240struct rdma_ucm_migrate_resp {
241 __u32 events_reported;
242};
243
233#endif /* RDMA_USER_CM_H */ 244#endif /* RDMA_USER_CM_H */
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 08174a2aa878..54a76b8b803a 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -211,7 +211,7 @@ int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir)
211 ip_tr_mc_map(addr, haddr); 211 ip_tr_mc_map(addr, haddr);
212 return 0; 212 return 0;
213 case ARPHRD_INFINIBAND: 213 case ARPHRD_INFINIBAND:
214 ip_ib_mc_map(addr, haddr); 214 ip_ib_mc_map(addr, dev->broadcast, haddr);
215 return 0; 215 return 0;
216 default: 216 default:
217 if (dir) { 217 if (dir) {
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 777ed733b2d7..85947eae5bf7 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -337,7 +337,7 @@ int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int d
337 ipv6_arcnet_mc_map(addr, buf); 337 ipv6_arcnet_mc_map(addr, buf);
338 return 0; 338 return 0;
339 case ARPHRD_INFINIBAND: 339 case ARPHRD_INFINIBAND:
340 ipv6_ib_mc_map(addr, buf); 340 ipv6_ib_mc_map(addr, dev->broadcast, buf);
341 return 0; 341 return 0;
342 default: 342 default:
343 if (dir) { 343 if (dir) {