aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-04-27 12:39:27 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-04-27 12:39:27 -0400
commitafc2e82c0851317931a9bfdb98271253371825c6 (patch)
tree3f1c119559bd94402d0574f786851bd34bbc048f /drivers/infiniband/hw
parent0278ef8b484a71917bd4f03a763285cdaac10954 (diff)
parent1912ffbb88efe872eb8fa8113dfb3cb0b7238764 (diff)
Merge branch 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband
* 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband: (49 commits) IB: Set class_dev->dev in core for nice device symlink IB/ehca: Implement modify_port IB/umad: Clarify documentation of transaction ID IPoIB/cm: spin_lock_irqsave() -> spin_lock_irq() replacements IB/mad: Change SMI to use enums rather than magic return codes IB/umad: Implement GRH handling for sent/received MADs IB/ipoib: Use ib_init_ah_from_path to initialize ah_attr IB/sa: Set src_path_bits correctly in ib_init_ah_from_path() IB/ucm: Simplify ib_ucm_event() RDMA/ucma: Simplify ucma_get_event() IB/mthca: Simplify CQ cleaning in mthca_free_qp() IB/mthca: Fix mthca_write_mtt() on HCAs with hidden memory IB/mthca: Update HCA firmware revisions IB/ipath: Fix WC format drift between user and kernel space IB/ipath: Check that a UD work request's address handle is valid IB/ipath: Remove duplicate stuff from ipath_verbs.h IB/ipath: Check reserved memory keys IB/ipath: Fix unit selection when all CPU affinity bits set IB/ipath: Don't allow QPs 0 and 1 to be opened multiple times IB/ipath: Disable IB link earlier in shutdown sequence ...
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/amso1100/c2_provider.c1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_hca.c55
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c1
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.c24
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.h4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_common.h23
-rw-r--r--drivers/infiniband/hw/ipath/ipath_cq.c38
-rw-r--r--drivers/infiniband/hw/ipath/ipath_debug.h1
-rw-r--r--drivers/infiniband/hw/ipath/ipath_diag.c11
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c123
-rw-r--r--drivers/infiniband/hw/ipath/ipath_eeprom.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c287
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6110.c152
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6120.c73
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c86
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c100
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h10
-rw-r--r--drivers/infiniband/hw/ipath/ipath_keys.c14
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mr.c12
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c133
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c920
-rw-r--r--drivers/infiniband/hw/ipath/ipath_registers.h22
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c63
-rw-r--r--drivers/infiniband/hw/ipath/ipath_stats.c16
-rw-r--r--drivers/infiniband/hw/ipath/ipath_uc.c6
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c8
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c15
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h57
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c10
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c6
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c7
34 files changed, 1444 insertions, 841 deletions
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index fef972752912..607c09bf764c 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -796,7 +796,6 @@ int c2_register_device(struct c2_dev *dev)
796 memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6); 796 memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6);
797 dev->ibdev.phys_port_cnt = 1; 797 dev->ibdev.phys_port_cnt = 1;
798 dev->ibdev.dma_device = &dev->pcidev->dev; 798 dev->ibdev.dma_device = &dev->pcidev->dev;
799 dev->ibdev.class_dev.dev = &dev->pcidev->dev;
800 dev->ibdev.query_device = c2_query_device; 799 dev->ibdev.query_device = c2_query_device;
801 dev->ibdev.query_port = c2_query_port; 800 dev->ibdev.query_port = c2_query_port;
802 dev->ibdev.modify_port = c2_modify_port; 801 dev->ibdev.modify_port = c2_modify_port;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 24e0df04f7db..af28a317016d 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1108,7 +1108,6 @@ int iwch_register_device(struct iwch_dev *dev)
1108 memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC)); 1108 memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC));
1109 dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports; 1109 dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports;
1110 dev->ibdev.dma_device = &(dev->rdev.rnic_info.pdev->dev); 1110 dev->ibdev.dma_device = &(dev->rdev.rnic_info.pdev->dev);
1111 dev->ibdev.class_dev.dev = &(dev->rdev.rnic_info.pdev->dev);
1112 dev->ibdev.query_device = iwch_query_device; 1111 dev->ibdev.query_device = iwch_query_device;
1113 dev->ibdev.query_port = iwch_query_port; 1112 dev->ibdev.query_port = iwch_query_port;
1114 dev->ibdev.modify_port = iwch_modify_port; 1113 dev->ibdev.modify_port = iwch_modify_port;
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 82ded44c6cee..10fb8fbafa0c 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -106,6 +106,7 @@ struct ehca_shca {
106 struct ehca_mr *maxmr; 106 struct ehca_mr *maxmr;
107 struct ehca_pd *pd; 107 struct ehca_pd *pd;
108 struct h_galpas galpas; 108 struct h_galpas galpas;
109 struct mutex modify_mutex;
109}; 110};
110 111
111struct ehca_pd { 112struct ehca_pd {
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
index 30eb45df9f0b..32b55a4f0e5b 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -147,6 +147,7 @@ int ehca_query_port(struct ib_device *ibdev,
147 break; 147 break;
148 } 148 }
149 149
150 props->port_cap_flags = rblock->capability_mask;
150 props->gid_tbl_len = rblock->gid_tbl_len; 151 props->gid_tbl_len = rblock->gid_tbl_len;
151 props->max_msg_sz = rblock->max_msg_sz; 152 props->max_msg_sz = rblock->max_msg_sz;
152 props->bad_pkey_cntr = rblock->bad_pkey_cntr; 153 props->bad_pkey_cntr = rblock->bad_pkey_cntr;
@@ -236,10 +237,60 @@ query_gid1:
236 return ret; 237 return ret;
237} 238}
238 239
240const u32 allowed_port_caps = (
241 IB_PORT_SM | IB_PORT_LED_INFO_SUP | IB_PORT_CM_SUP |
242 IB_PORT_SNMP_TUNNEL_SUP | IB_PORT_DEVICE_MGMT_SUP |
243 IB_PORT_VENDOR_CLASS_SUP);
244
239int ehca_modify_port(struct ib_device *ibdev, 245int ehca_modify_port(struct ib_device *ibdev,
240 u8 port, int port_modify_mask, 246 u8 port, int port_modify_mask,
241 struct ib_port_modify *props) 247 struct ib_port_modify *props)
242{ 248{
243 /* Not implemented yet */ 249 int ret = 0;
244 return -EFAULT; 250 struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device);
251 struct hipz_query_port *rblock;
252 u32 cap;
253 u64 hret;
254
255 if ((props->set_port_cap_mask | props->clr_port_cap_mask)
256 & ~allowed_port_caps) {
257 ehca_err(&shca->ib_device, "Non-changeable bits set in masks "
258 "set=%x clr=%x allowed=%x", props->set_port_cap_mask,
259 props->clr_port_cap_mask, allowed_port_caps);
260 return -EINVAL;
261 }
262
263 if (mutex_lock_interruptible(&shca->modify_mutex))
264 return -ERESTARTSYS;
265
266 rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
267 if (!rblock) {
268 ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
269 ret = -ENOMEM;
270 goto modify_port1;
271 }
272
273 if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
274 ehca_err(&shca->ib_device, "Can't query port properties");
275 ret = -EINVAL;
276 goto modify_port2;
277 }
278
279 cap = (rblock->capability_mask | props->set_port_cap_mask)
280 & ~props->clr_port_cap_mask;
281
282 hret = hipz_h_modify_port(shca->ipz_hca_handle, port,
283 cap, props->init_type, port_modify_mask);
284 if (hret != H_SUCCESS) {
285 ehca_err(&shca->ib_device, "Modify port failed hret=%lx", hret);
286 ret = -EINVAL;
287 }
288
289modify_port2:
290 ehca_free_fw_ctrlblock(rblock);
291
292modify_port1:
293 mutex_unlock(&shca->modify_mutex);
294
295 return ret;
245} 296}
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 059da9628bb5..3b23d677cb86 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -587,6 +587,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev,
587 ehca_gen_err("Cannot allocate shca memory."); 587 ehca_gen_err("Cannot allocate shca memory.");
588 return -ENOMEM; 588 return -ENOMEM;
589 } 589 }
590 mutex_init(&shca->modify_mutex);
590 591
591 shca->ibmebus_dev = dev; 592 shca->ibmebus_dev = dev;
592 shca->ipz_hca_handle.handle = *handle; 593 shca->ipz_hca_handle.handle = *handle;
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
index 3fb46e67df87..b564fcd3b282 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.c
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -70,6 +70,10 @@
70#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES EHCA_BMASK_IBM(0, 31) 70#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES EHCA_BMASK_IBM(0, 31)
71#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES EHCA_BMASK_IBM(32, 63) 71#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES EHCA_BMASK_IBM(32, 63)
72 72
73#define H_MP_INIT_TYPE EHCA_BMASK_IBM(44, 47)
74#define H_MP_SHUTDOWN EHCA_BMASK_IBM(48, 48)
75#define H_MP_RESET_QKEY_CTR EHCA_BMASK_IBM(49, 49)
76
73/* direct access qp controls */ 77/* direct access qp controls */
74#define DAQP_CTRL_ENABLE 0x01 78#define DAQP_CTRL_ENABLE 0x01
75#define DAQP_CTRL_SEND_COMP 0x20 79#define DAQP_CTRL_SEND_COMP 0x20
@@ -364,6 +368,26 @@ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
364 return ret; 368 return ret;
365} 369}
366 370
371u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
372 const u8 port_id, const u32 port_cap,
373 const u8 init_type, const int modify_mask)
374{
375 u64 port_attributes = port_cap;
376
377 if (modify_mask & IB_PORT_SHUTDOWN)
378 port_attributes |= EHCA_BMASK_SET(H_MP_SHUTDOWN, 1);
379 if (modify_mask & IB_PORT_INIT_TYPE)
380 port_attributes |= EHCA_BMASK_SET(H_MP_INIT_TYPE, init_type);
381 if (modify_mask & IB_PORT_RESET_QKEY_CNTR)
382 port_attributes |= EHCA_BMASK_SET(H_MP_RESET_QKEY_CTR, 1);
383
384 return ehca_plpar_hcall_norets(H_MODIFY_PORT,
385 adapter_handle.handle, /* r4 */
386 port_id, /* r5 */
387 port_attributes, /* r6 */
388 0, 0, 0, 0);
389}
390
367u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, 391u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
368 struct hipz_query_hca *query_hca_rblock) 392 struct hipz_query_hca *query_hca_rblock)
369{ 393{
diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h
index 587ebd470959..2869f7dd6196 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.h
+++ b/drivers/infiniband/hw/ehca/hcp_if.h
@@ -85,6 +85,10 @@ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
85 const u8 port_id, 85 const u8 port_id,
86 struct hipz_query_port *query_port_response_block); 86 struct hipz_query_port *query_port_response_block);
87 87
88u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
89 const u8 port_id, const u32 port_cap,
90 const u8 init_type, const int modify_mask);
91
88u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, 92u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
89 struct hipz_query_hca *query_hca_rblock); 93 struct hipz_query_hca *query_hca_rblock);
90 94
diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
index 54139d398181..10c008f22ba6 100644
--- a/drivers/infiniband/hw/ipath/ipath_common.h
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -78,6 +78,8 @@
78#define IPATH_IB_LINKINIT 3 78#define IPATH_IB_LINKINIT 3
79#define IPATH_IB_LINKDOWN_SLEEP 4 79#define IPATH_IB_LINKDOWN_SLEEP 4
80#define IPATH_IB_LINKDOWN_DISABLE 5 80#define IPATH_IB_LINKDOWN_DISABLE 5
81#define IPATH_IB_LINK_LOOPBACK 6 /* enable local loopback */
82#define IPATH_IB_LINK_EXTERNAL 7 /* normal, disable local loopback */
81 83
82/* 84/*
83 * stats maintained by the driver. For now, at least, this is global 85 * stats maintained by the driver. For now, at least, this is global
@@ -316,11 +318,17 @@ struct ipath_base_info {
316 /* address of readonly memory copy of the rcvhdrq tail register. */ 318 /* address of readonly memory copy of the rcvhdrq tail register. */
317 __u64 spi_rcvhdr_tailaddr; 319 __u64 spi_rcvhdr_tailaddr;
318 320
319 /* shared memory pages for subports if IPATH_RUNTIME_MASTER is set */ 321 /* shared memory pages for subports if port is shared */
320 __u64 spi_subport_uregbase; 322 __u64 spi_subport_uregbase;
321 __u64 spi_subport_rcvegrbuf; 323 __u64 spi_subport_rcvegrbuf;
322 __u64 spi_subport_rcvhdr_base; 324 __u64 spi_subport_rcvhdr_base;
323 325
326 /* shared memory page for hardware port if it is shared */
327 __u64 spi_port_uregbase;
328 __u64 spi_port_rcvegrbuf;
329 __u64 spi_port_rcvhdr_base;
330 __u64 spi_port_rcvhdr_tailaddr;
331
324} __attribute__ ((aligned(8))); 332} __attribute__ ((aligned(8)));
325 333
326 334
@@ -344,7 +352,7 @@ struct ipath_base_info {
344 * may not be implemented; the user code must deal with this if it 352 * may not be implemented; the user code must deal with this if it
345 * cares, or it must abort after initialization reports the difference. 353 * cares, or it must abort after initialization reports the difference.
346 */ 354 */
347#define IPATH_USER_SWMINOR 3 355#define IPATH_USER_SWMINOR 5
348 356
349#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR) 357#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR)
350 358
@@ -418,11 +426,14 @@ struct ipath_user_info {
418#define IPATH_CMD_TID_UPDATE 19 /* update expected TID entries */ 426#define IPATH_CMD_TID_UPDATE 19 /* update expected TID entries */
419#define IPATH_CMD_TID_FREE 20 /* free expected TID entries */ 427#define IPATH_CMD_TID_FREE 20 /* free expected TID entries */
420#define IPATH_CMD_SET_PART_KEY 21 /* add partition key */ 428#define IPATH_CMD_SET_PART_KEY 21 /* add partition key */
421#define IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes */ 429#define __IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes (for old user code) */
422#define IPATH_CMD_ASSIGN_PORT 23 /* allocate HCA and port */ 430#define IPATH_CMD_ASSIGN_PORT 23 /* allocate HCA and port */
423#define IPATH_CMD_USER_INIT 24 /* set up userspace */ 431#define IPATH_CMD_USER_INIT 24 /* set up userspace */
432#define IPATH_CMD_UNUSED_1 25
433#define IPATH_CMD_UNUSED_2 26
434#define IPATH_CMD_PIOAVAILUPD 27 /* force an update of PIOAvail reg */
424 435
425#define IPATH_CMD_MAX 24 436#define IPATH_CMD_MAX 27
426 437
427struct ipath_port_info { 438struct ipath_port_info {
428 __u32 num_active; /* number of active units */ 439 __u32 num_active; /* number of active units */
@@ -430,7 +441,7 @@ struct ipath_port_info {
430 __u16 port; /* port on unit assigned to caller */ 441 __u16 port; /* port on unit assigned to caller */
431 __u16 subport; /* subport on unit assigned to caller */ 442 __u16 subport; /* subport on unit assigned to caller */
432 __u16 num_ports; /* number of ports available on unit */ 443 __u16 num_ports; /* number of ports available on unit */
433 __u16 num_subports; /* number of subport slaves opened on port */ 444 __u16 num_subports; /* number of subports opened on port */
434}; 445};
435 446
436struct ipath_tid_info { 447struct ipath_tid_info {
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
index 87462e0cb4d2..ea78e6dddc90 100644
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -76,7 +76,20 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
76 } 76 }
77 return; 77 return;
78 } 78 }
79 wc->queue[head] = *entry; 79 wc->queue[head].wr_id = entry->wr_id;
80 wc->queue[head].status = entry->status;
81 wc->queue[head].opcode = entry->opcode;
82 wc->queue[head].vendor_err = entry->vendor_err;
83 wc->queue[head].byte_len = entry->byte_len;
84 wc->queue[head].imm_data = (__u32 __force)entry->imm_data;
85 wc->queue[head].qp_num = entry->qp->qp_num;
86 wc->queue[head].src_qp = entry->src_qp;
87 wc->queue[head].wc_flags = entry->wc_flags;
88 wc->queue[head].pkey_index = entry->pkey_index;
89 wc->queue[head].slid = entry->slid;
90 wc->queue[head].sl = entry->sl;
91 wc->queue[head].dlid_path_bits = entry->dlid_path_bits;
92 wc->queue[head].port_num = entry->port_num;
80 wc->head = next; 93 wc->head = next;
81 94
82 if (cq->notify == IB_CQ_NEXT_COMP || 95 if (cq->notify == IB_CQ_NEXT_COMP ||
@@ -122,9 +135,30 @@ int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
122 if (tail > (u32) cq->ibcq.cqe) 135 if (tail > (u32) cq->ibcq.cqe)
123 tail = (u32) cq->ibcq.cqe; 136 tail = (u32) cq->ibcq.cqe;
124 for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { 137 for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
138 struct ipath_qp *qp;
139
125 if (tail == wc->head) 140 if (tail == wc->head)
126 break; 141 break;
127 *entry = wc->queue[tail]; 142
143 qp = ipath_lookup_qpn(&to_idev(cq->ibcq.device)->qp_table,
144 wc->queue[tail].qp_num);
145 entry->qp = &qp->ibqp;
146 if (atomic_dec_and_test(&qp->refcount))
147 wake_up(&qp->wait);
148
149 entry->wr_id = wc->queue[tail].wr_id;
150 entry->status = wc->queue[tail].status;
151 entry->opcode = wc->queue[tail].opcode;
152 entry->vendor_err = wc->queue[tail].vendor_err;
153 entry->byte_len = wc->queue[tail].byte_len;
154 entry->imm_data = wc->queue[tail].imm_data;
155 entry->src_qp = wc->queue[tail].src_qp;
156 entry->wc_flags = wc->queue[tail].wc_flags;
157 entry->pkey_index = wc->queue[tail].pkey_index;
158 entry->slid = wc->queue[tail].slid;
159 entry->sl = wc->queue[tail].sl;
160 entry->dlid_path_bits = wc->queue[tail].dlid_path_bits;
161 entry->port_num = wc->queue[tail].port_num;
128 if (tail >= cq->ibcq.cqe) 162 if (tail >= cq->ibcq.cqe)
129 tail = 0; 163 tail = 0;
130 else 164 else
diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h
index df69f0d80b8b..42bfbdb0d3e6 100644
--- a/drivers/infiniband/hw/ipath/ipath_debug.h
+++ b/drivers/infiniband/hw/ipath/ipath_debug.h
@@ -57,6 +57,7 @@
57#define __IPATH_PROCDBG 0x100 57#define __IPATH_PROCDBG 0x100
58/* print mmap/nopage stuff, not using VDBG any more */ 58/* print mmap/nopage stuff, not using VDBG any more */
59#define __IPATH_MMDBG 0x200 59#define __IPATH_MMDBG 0x200
60#define __IPATH_ERRPKTDBG 0x400
60#define __IPATH_USER_SEND 0x1000 /* use user mode send */ 61#define __IPATH_USER_SEND 0x1000 /* use user mode send */
61#define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */ 62#define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */
62#define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */ 63#define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index 0f13a2182cc7..63e8368b0e95 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -296,7 +296,7 @@ static int ipath_diag_open(struct inode *in, struct file *fp)
296 } 296 }
297 297
298 fp->private_data = dd; 298 fp->private_data = dd;
299 ipath_diag_inuse = 1; 299 ipath_diag_inuse = -2;
300 diag_set_link = 0; 300 diag_set_link = 0;
301 ret = 0; 301 ret = 0;
302 302
@@ -461,6 +461,8 @@ static ssize_t ipath_diag_read(struct file *fp, char __user *data,
461 else if ((count % 4) || (*off % 4)) 461 else if ((count % 4) || (*off % 4))
462 /* address or length is not 32-bit aligned, hence invalid */ 462 /* address or length is not 32-bit aligned, hence invalid */
463 ret = -EINVAL; 463 ret = -EINVAL;
464 else if (ipath_diag_inuse < 1 && (*off || count != 8))
465 ret = -EINVAL; /* prevent cat /dev/ipath_diag* */
464 else if ((count % 8) || (*off % 8)) 466 else if ((count % 8) || (*off % 8))
465 /* address or length not 64-bit aligned; do 32-bit reads */ 467 /* address or length not 64-bit aligned; do 32-bit reads */
466 ret = ipath_read_umem32(dd, data, kreg_base + *off, count); 468 ret = ipath_read_umem32(dd, data, kreg_base + *off, count);
@@ -470,6 +472,8 @@ static ssize_t ipath_diag_read(struct file *fp, char __user *data,
470 if (ret >= 0) { 472 if (ret >= 0) {
471 *off += count; 473 *off += count;
472 ret = count; 474 ret = count;
475 if (ipath_diag_inuse == -2)
476 ipath_diag_inuse++;
473 } 477 }
474 478
475 return ret; 479 return ret;
@@ -489,6 +493,9 @@ static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
489 else if ((count % 4) || (*off % 4)) 493 else if ((count % 4) || (*off % 4))
490 /* address or length is not 32-bit aligned, hence invalid */ 494 /* address or length is not 32-bit aligned, hence invalid */
491 ret = -EINVAL; 495 ret = -EINVAL;
496 else if ((ipath_diag_inuse == -1 && (*off || count != 8)) ||
497 ipath_diag_inuse == -2) /* read qw off 0, write qw off 0 */
498 ret = -EINVAL; /* before any other write allowed */
492 else if ((count % 8) || (*off % 8)) 499 else if ((count % 8) || (*off % 8))
493 /* address or length not 64-bit aligned; do 32-bit writes */ 500 /* address or length not 64-bit aligned; do 32-bit writes */
494 ret = ipath_write_umem32(dd, kreg_base + *off, data, count); 501 ret = ipath_write_umem32(dd, kreg_base + *off, data, count);
@@ -498,6 +505,8 @@ static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
498 if (ret >= 0) { 505 if (ret >= 0) {
499 *off += count; 506 *off += count;
500 ret = count; 507 ret = count;
508 if (ipath_diag_inuse == -1)
509 ipath_diag_inuse = 1; /* all read/write OK now */
501 } 510 }
502 511
503 return ret; 512 return ret;
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index ae7f21a0cdc0..e3a223209710 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -390,15 +390,23 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
390 390
391 /* setup the chip-specific functions, as early as possible. */ 391 /* setup the chip-specific functions, as early as possible. */
392 switch (ent->device) { 392 switch (ent->device) {
393#ifdef CONFIG_HT_IRQ
394 case PCI_DEVICE_ID_INFINIPATH_HT: 393 case PCI_DEVICE_ID_INFINIPATH_HT:
394#ifdef CONFIG_HT_IRQ
395 ipath_init_iba6110_funcs(dd); 395 ipath_init_iba6110_funcs(dd);
396 break; 396 break;
397#else
398 ipath_dev_err(dd, "QLogic HT device 0x%x cannot work if "
399 "CONFIG_HT_IRQ is not enabled\n", ent->device);
400 return -ENODEV;
397#endif 401#endif
398#ifdef CONFIG_PCI_MSI
399 case PCI_DEVICE_ID_INFINIPATH_PE800: 402 case PCI_DEVICE_ID_INFINIPATH_PE800:
403#ifdef CONFIG_PCI_MSI
400 ipath_init_iba6120_funcs(dd); 404 ipath_init_iba6120_funcs(dd);
401 break; 405 break;
406#else
407 ipath_dev_err(dd, "QLogic PCIE device 0x%x cannot work if "
408 "CONFIG_PCI_MSI is not enabled\n", ent->device);
409 return -ENODEV;
402#endif 410#endif
403 default: 411 default:
404 ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, " 412 ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
@@ -486,7 +494,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
486 494
487 ret = ipath_init_chip(dd, 0); /* do the chip-specific init */ 495 ret = ipath_init_chip(dd, 0); /* do the chip-specific init */
488 if (ret) 496 if (ret)
489 goto bail_iounmap; 497 goto bail_irqsetup;
490 498
491 ret = ipath_enable_wc(dd); 499 ret = ipath_enable_wc(dd);
492 500
@@ -505,6 +513,9 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
505 513
506 goto bail; 514 goto bail;
507 515
516bail_irqsetup:
517 if (pdev->irq) free_irq(pdev->irq, dd);
518
508bail_iounmap: 519bail_iounmap:
509 iounmap((volatile void __iomem *) dd->ipath_kregbase); 520 iounmap((volatile void __iomem *) dd->ipath_kregbase);
510 521
@@ -525,8 +536,6 @@ static void __devexit cleanup_device(struct ipath_devdata *dd)
525{ 536{
526 int port; 537 int port;
527 538
528 ipath_shutdown_device(dd);
529
530 if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) { 539 if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
531 /* can't do anything more with chip; needs re-init */ 540 /* can't do anything more with chip; needs re-init */
532 *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT; 541 *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
@@ -594,8 +603,9 @@ static void __devexit cleanup_device(struct ipath_devdata *dd)
594 603
595 ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n", 604 ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
596 dd->ipath_pageshadow); 605 dd->ipath_pageshadow);
597 vfree(dd->ipath_pageshadow); 606 tmpp = dd->ipath_pageshadow;
598 dd->ipath_pageshadow = NULL; 607 dd->ipath_pageshadow = NULL;
608 vfree(tmpp);
599 } 609 }
600 610
601 /* 611 /*
@@ -622,6 +632,12 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev)
622 632
623 ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd); 633 ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd);
624 634
635 /*
636 * disable the IB link early, to be sure no new packets arrive, which
637 * complicates the shutdown process
638 */
639 ipath_shutdown_device(dd);
640
625 if (dd->verbs_dev) 641 if (dd->verbs_dev)
626 ipath_unregister_ib_device(dd->verbs_dev); 642 ipath_unregister_ib_device(dd->verbs_dev);
627 643
@@ -754,9 +770,42 @@ static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state,
754 return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT; 770 return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;
755} 771}
756 772
757void ipath_decode_err(char *buf, size_t blen, ipath_err_t err) 773/*
774 * Decode the error status into strings, deciding whether to always
775 * print * it or not depending on "normal packet errors" vs everything
776 * else. Return 1 if "real" errors, otherwise 0 if only packet
777 * errors, so caller can decide what to print with the string.
778 */
779int ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
758{ 780{
781 int iserr = 1;
759 *buf = '\0'; 782 *buf = '\0';
783 if (err & INFINIPATH_E_PKTERRS) {
784 if (!(err & ~INFINIPATH_E_PKTERRS))
785 iserr = 0; // if only packet errors.
786 if (ipath_debug & __IPATH_ERRPKTDBG) {
787 if (err & INFINIPATH_E_REBP)
788 strlcat(buf, "EBP ", blen);
789 if (err & INFINIPATH_E_RVCRC)
790 strlcat(buf, "VCRC ", blen);
791 if (err & INFINIPATH_E_RICRC) {
792 strlcat(buf, "CRC ", blen);
793 // clear for check below, so only once
794 err &= INFINIPATH_E_RICRC;
795 }
796 if (err & INFINIPATH_E_RSHORTPKTLEN)
797 strlcat(buf, "rshortpktlen ", blen);
798 if (err & INFINIPATH_E_SDROPPEDDATAPKT)
799 strlcat(buf, "sdroppeddatapkt ", blen);
800 if (err & INFINIPATH_E_SPKTLEN)
801 strlcat(buf, "spktlen ", blen);
802 }
803 if ((err & INFINIPATH_E_RICRC) &&
804 !(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP)))
805 strlcat(buf, "CRC ", blen);
806 if (!iserr)
807 goto done;
808 }
760 if (err & INFINIPATH_E_RHDRLEN) 809 if (err & INFINIPATH_E_RHDRLEN)
761 strlcat(buf, "rhdrlen ", blen); 810 strlcat(buf, "rhdrlen ", blen);
762 if (err & INFINIPATH_E_RBADTID) 811 if (err & INFINIPATH_E_RBADTID)
@@ -767,12 +816,12 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
767 strlcat(buf, "rhdr ", blen); 816 strlcat(buf, "rhdr ", blen);
768 if (err & INFINIPATH_E_RLONGPKTLEN) 817 if (err & INFINIPATH_E_RLONGPKTLEN)
769 strlcat(buf, "rlongpktlen ", blen); 818 strlcat(buf, "rlongpktlen ", blen);
770 if (err & INFINIPATH_E_RSHORTPKTLEN)
771 strlcat(buf, "rshortpktlen ", blen);
772 if (err & INFINIPATH_E_RMAXPKTLEN) 819 if (err & INFINIPATH_E_RMAXPKTLEN)
773 strlcat(buf, "rmaxpktlen ", blen); 820 strlcat(buf, "rmaxpktlen ", blen);
774 if (err & INFINIPATH_E_RMINPKTLEN) 821 if (err & INFINIPATH_E_RMINPKTLEN)
775 strlcat(buf, "rminpktlen ", blen); 822 strlcat(buf, "rminpktlen ", blen);
823 if (err & INFINIPATH_E_SMINPKTLEN)
824 strlcat(buf, "sminpktlen ", blen);
776 if (err & INFINIPATH_E_RFORMATERR) 825 if (err & INFINIPATH_E_RFORMATERR)
777 strlcat(buf, "rformaterr ", blen); 826 strlcat(buf, "rformaterr ", blen);
778 if (err & INFINIPATH_E_RUNSUPVL) 827 if (err & INFINIPATH_E_RUNSUPVL)
@@ -781,32 +830,20 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
781 strlcat(buf, "runexpchar ", blen); 830 strlcat(buf, "runexpchar ", blen);
782 if (err & INFINIPATH_E_RIBFLOW) 831 if (err & INFINIPATH_E_RIBFLOW)
783 strlcat(buf, "ribflow ", blen); 832 strlcat(buf, "ribflow ", blen);
784 if (err & INFINIPATH_E_REBP)
785 strlcat(buf, "EBP ", blen);
786 if (err & INFINIPATH_E_SUNDERRUN) 833 if (err & INFINIPATH_E_SUNDERRUN)
787 strlcat(buf, "sunderrun ", blen); 834 strlcat(buf, "sunderrun ", blen);
788 if (err & INFINIPATH_E_SPIOARMLAUNCH) 835 if (err & INFINIPATH_E_SPIOARMLAUNCH)
789 strlcat(buf, "spioarmlaunch ", blen); 836 strlcat(buf, "spioarmlaunch ", blen);
790 if (err & INFINIPATH_E_SUNEXPERRPKTNUM) 837 if (err & INFINIPATH_E_SUNEXPERRPKTNUM)
791 strlcat(buf, "sunexperrpktnum ", blen); 838 strlcat(buf, "sunexperrpktnum ", blen);
792 if (err & INFINIPATH_E_SDROPPEDDATAPKT)
793 strlcat(buf, "sdroppeddatapkt ", blen);
794 if (err & INFINIPATH_E_SDROPPEDSMPPKT) 839 if (err & INFINIPATH_E_SDROPPEDSMPPKT)
795 strlcat(buf, "sdroppedsmppkt ", blen); 840 strlcat(buf, "sdroppedsmppkt ", blen);
796 if (err & INFINIPATH_E_SMAXPKTLEN) 841 if (err & INFINIPATH_E_SMAXPKTLEN)
797 strlcat(buf, "smaxpktlen ", blen); 842 strlcat(buf, "smaxpktlen ", blen);
798 if (err & INFINIPATH_E_SMINPKTLEN)
799 strlcat(buf, "sminpktlen ", blen);
800 if (err & INFINIPATH_E_SUNSUPVL) 843 if (err & INFINIPATH_E_SUNSUPVL)
801 strlcat(buf, "sunsupVL ", blen); 844 strlcat(buf, "sunsupVL ", blen);
802 if (err & INFINIPATH_E_SPKTLEN)
803 strlcat(buf, "spktlen ", blen);
804 if (err & INFINIPATH_E_INVALIDADDR) 845 if (err & INFINIPATH_E_INVALIDADDR)
805 strlcat(buf, "invalidaddr ", blen); 846 strlcat(buf, "invalidaddr ", blen);
806 if (err & INFINIPATH_E_RICRC)
807 strlcat(buf, "CRC ", blen);
808 if (err & INFINIPATH_E_RVCRC)
809 strlcat(buf, "VCRC ", blen);
810 if (err & INFINIPATH_E_RRCVEGRFULL) 847 if (err & INFINIPATH_E_RRCVEGRFULL)
811 strlcat(buf, "rcvegrfull ", blen); 848 strlcat(buf, "rcvegrfull ", blen);
812 if (err & INFINIPATH_E_RRCVHDRFULL) 849 if (err & INFINIPATH_E_RRCVHDRFULL)
@@ -819,6 +856,8 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
819 strlcat(buf, "hardware ", blen); 856 strlcat(buf, "hardware ", blen);
820 if (err & INFINIPATH_E_RESET) 857 if (err & INFINIPATH_E_RESET)
821 strlcat(buf, "reset ", blen); 858 strlcat(buf, "reset ", blen);
859done:
860 return iserr;
822} 861}
823 862
824/** 863/**
@@ -1662,6 +1701,22 @@ int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
1662 lstate = IPATH_LINKACTIVE; 1701 lstate = IPATH_LINKACTIVE;
1663 break; 1702 break;
1664 1703
1704 case IPATH_IB_LINK_LOOPBACK:
1705 dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n");
1706 dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK;
1707 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
1708 dd->ipath_ibcctrl);
1709 ret = 0;
1710 goto bail; // no state change to wait for
1711
1712 case IPATH_IB_LINK_EXTERNAL:
1713 dev_info(&dd->pcidev->dev, "Disabling IB local loopback (normal)\n");
1714 dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK;
1715 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
1716 dd->ipath_ibcctrl);
1717 ret = 0;
1718 goto bail; // no state change to wait for
1719
1665 default: 1720 default:
1666 ipath_dbg("Invalid linkstate 0x%x requested\n", newstate); 1721 ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
1667 ret = -EINVAL; 1722 ret = -EINVAL;
@@ -1765,29 +1820,6 @@ int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
1765 return 0; 1820 return 0;
1766} 1821}
1767 1822
1768/**
1769 * ipath_read_kreg64_port - read a device's per-port 64-bit kernel register
1770 * @dd: the infinipath device
1771 * @regno: the register number to read
1772 * @port: the port containing the register
1773 *
1774 * Registers that vary with the chip implementation constants (port)
1775 * use this routine.
1776 */
1777u64 ipath_read_kreg64_port(const struct ipath_devdata *dd, ipath_kreg regno,
1778 unsigned port)
1779{
1780 u16 where;
1781
1782 if (port < dd->ipath_portcnt &&
1783 (regno == dd->ipath_kregs->kr_rcvhdraddr ||
1784 regno == dd->ipath_kregs->kr_rcvhdrtailaddr))
1785 where = regno + port;
1786 else
1787 where = -1;
1788
1789 return ipath_read_kreg64(dd, where);
1790}
1791 1823
1792/** 1824/**
1793 * ipath_write_kreg_port - write a device's per-port 64-bit kernel register 1825 * ipath_write_kreg_port - write a device's per-port 64-bit kernel register
@@ -1973,7 +2005,8 @@ static int __init infinipath_init(void)
1973{ 2005{
1974 int ret; 2006 int ret;
1975 2007
1976 ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version); 2008 if (ipath_debug & __IPATH_DBG)
2009 printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
1977 2010
1978 /* 2011 /*
1979 * These must be called before the driver is registered with 2012 * These must be called before the driver is registered with
diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c
index a4019a6b7560..030185f90ee2 100644
--- a/drivers/infiniband/hw/ipath/ipath_eeprom.c
+++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c
@@ -626,6 +626,10 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
626 } else 626 } else
627 memcpy(dd->ipath_serial, ifp->if_serial, 627 memcpy(dd->ipath_serial, ifp->if_serial,
628 sizeof ifp->if_serial); 628 sizeof ifp->if_serial);
629 if (!strstr(ifp->if_comment, "Tested successfully"))
630 ipath_dev_err(dd, "Board SN %s did not pass functional "
631 "test: %s\n", dd->ipath_serial,
632 ifp->if_comment);
629 633
630 ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n", 634 ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
631 (unsigned long long) be64_to_cpu(dd->ipath_guid)); 635 (unsigned long long) be64_to_cpu(dd->ipath_guid));
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 5d64ff875297..1272aaf2a785 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -41,12 +41,6 @@
41#include "ipath_kernel.h" 41#include "ipath_kernel.h"
42#include "ipath_common.h" 42#include "ipath_common.h"
43 43
44/*
45 * mmap64 doesn't allow all 64 bits for 32-bit applications
46 * so only use the low 43 bits.
47 */
48#define MMAP64_MASK 0x7FFFFFFFFFFUL
49
50static int ipath_open(struct inode *, struct file *); 44static int ipath_open(struct inode *, struct file *);
51static int ipath_close(struct inode *, struct file *); 45static int ipath_close(struct inode *, struct file *);
52static ssize_t ipath_write(struct file *, const char __user *, size_t, 46static ssize_t ipath_write(struct file *, const char __user *, size_t,
@@ -63,6 +57,24 @@ static const struct file_operations ipath_file_ops = {
63 .mmap = ipath_mmap 57 .mmap = ipath_mmap
64}; 58};
65 59
60/*
61 * Convert kernel virtual addresses to physical addresses so they don't
62 * potentially conflict with the chip addresses used as mmap offsets.
63 * It doesn't really matter what mmap offset we use as long as we can
64 * interpret it correctly.
65 */
66static u64 cvt_kvaddr(void *p)
67{
68 struct page *page;
69 u64 paddr = 0;
70
71 page = vmalloc_to_page(p);
72 if (page)
73 paddr = page_to_pfn(page) << PAGE_SHIFT;
74
75 return paddr;
76}
77
66static int ipath_get_base_info(struct file *fp, 78static int ipath_get_base_info(struct file *fp,
67 void __user *ubase, size_t ubase_size) 79 void __user *ubase, size_t ubase_size)
68{ 80{
@@ -87,7 +99,7 @@ static int ipath_get_base_info(struct file *fp,
87 sz = sizeof(*kinfo); 99 sz = sizeof(*kinfo);
88 /* If port sharing is not requested, allow the old size structure */ 100 /* If port sharing is not requested, allow the old size structure */
89 if (!shared) 101 if (!shared)
90 sz -= 3 * sizeof(u64); 102 sz -= 7 * sizeof(u64);
91 if (ubase_size < sz) { 103 if (ubase_size < sz) {
92 ipath_cdbg(PROC, 104 ipath_cdbg(PROC,
93 "Base size %zu, need %zu (version mismatch?)\n", 105 "Base size %zu, need %zu (version mismatch?)\n",
@@ -165,24 +177,41 @@ static int ipath_get_base_info(struct file *fp,
165 kinfo->spi_piobufbase = (u64) pd->port_piobufs + 177 kinfo->spi_piobufbase = (u64) pd->port_piobufs +
166 dd->ipath_palign * 178 dd->ipath_palign *
167 (dd->ipath_pbufsport - kinfo->spi_piocnt); 179 (dd->ipath_pbufsport - kinfo->spi_piocnt);
168 kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
169 dd->ipath_palign * pd->port_port;
170 } else { 180 } else {
171 unsigned slave = subport_fp(fp) - 1; 181 unsigned slave = subport_fp(fp) - 1;
172 182
173 kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt; 183 kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt;
174 kinfo->spi_piobufbase = (u64) pd->port_piobufs + 184 kinfo->spi_piobufbase = (u64) pd->port_piobufs +
175 dd->ipath_palign * kinfo->spi_piocnt * slave; 185 dd->ipath_palign * kinfo->spi_piocnt * slave;
176 kinfo->__spi_uregbase = ((u64) pd->subport_uregbase + 186 }
177 PAGE_SIZE * slave) & MMAP64_MASK; 187 if (shared) {
188 kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase +
189 dd->ipath_palign * pd->port_port;
190 kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs;
191 kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base;
192 kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr;
178 193
179 kinfo->spi_rcvhdr_base = ((u64) pd->subport_rcvhdr_base + 194 kinfo->__spi_uregbase = cvt_kvaddr(pd->subport_uregbase +
180 pd->port_rcvhdrq_size * slave) & MMAP64_MASK; 195 PAGE_SIZE * subport_fp(fp));
181 kinfo->spi_rcvhdr_tailaddr = 196
182 (u64) pd->port_rcvhdrqtailaddr_phys & MMAP64_MASK; 197 kinfo->spi_rcvhdr_base = cvt_kvaddr(pd->subport_rcvhdr_base +
183 kinfo->spi_rcv_egrbufs = ((u64) pd->subport_rcvegrbuf + 198 pd->port_rcvhdrq_size * subport_fp(fp));
184 dd->ipath_rcvegrcnt * dd->ipath_rcvegrbufsize * slave) & 199 kinfo->spi_rcvhdr_tailaddr = 0;
185 MMAP64_MASK; 200 kinfo->spi_rcv_egrbufs = cvt_kvaddr(pd->subport_rcvegrbuf +
201 pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size *
202 subport_fp(fp));
203
204 kinfo->spi_subport_uregbase =
205 cvt_kvaddr(pd->subport_uregbase);
206 kinfo->spi_subport_rcvegrbuf =
207 cvt_kvaddr(pd->subport_rcvegrbuf);
208 kinfo->spi_subport_rcvhdr_base =
209 cvt_kvaddr(pd->subport_rcvhdr_base);
210 ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
211 kinfo->spi_port, kinfo->spi_runtime_flags,
212 (unsigned long long) kinfo->spi_subport_uregbase,
213 (unsigned long long) kinfo->spi_subport_rcvegrbuf,
214 (unsigned long long) kinfo->spi_subport_rcvhdr_base);
186 } 215 }
187 216
188 kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) / 217 kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) /
@@ -199,20 +228,10 @@ static int ipath_get_base_info(struct file *fp,
199 228
200 if (master) { 229 if (master) {
201 kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER; 230 kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER;
202 kinfo->spi_subport_uregbase =
203 (u64) pd->subport_uregbase & MMAP64_MASK;
204 kinfo->spi_subport_rcvegrbuf =
205 (u64) pd->subport_rcvegrbuf & MMAP64_MASK;
206 kinfo->spi_subport_rcvhdr_base =
207 (u64) pd->subport_rcvhdr_base & MMAP64_MASK;
208 ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
209 kinfo->spi_port, kinfo->spi_runtime_flags,
210 (unsigned long long) kinfo->spi_subport_uregbase,
211 (unsigned long long) kinfo->spi_subport_rcvegrbuf,
212 (unsigned long long) kinfo->spi_subport_rcvhdr_base);
213 } 231 }
214 232
215 if (copy_to_user(ubase, kinfo, sizeof(*kinfo))) 233 sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo);
234 if (copy_to_user(ubase, kinfo, sz))
216 ret = -EFAULT; 235 ret = -EFAULT;
217 236
218bail: 237bail:
@@ -1132,67 +1151,55 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
1132 struct ipath_devdata *dd; 1151 struct ipath_devdata *dd;
1133 void *addr; 1152 void *addr;
1134 size_t size; 1153 size_t size;
1135 int ret; 1154 int ret = 0;
1136 1155
1137 /* If the port is not shared, all addresses should be physical */ 1156 /* If the port is not shared, all addresses should be physical */
1138 if (!pd->port_subport_cnt) { 1157 if (!pd->port_subport_cnt)
1139 ret = -EINVAL;
1140 goto bail; 1158 goto bail;
1141 }
1142 1159
1143 dd = pd->port_dd; 1160 dd = pd->port_dd;
1144 size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size; 1161 size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
1145 1162
1146 /* 1163 /*
1147 * Master has all the slave uregbase, rcvhdrq, and 1164 * Each process has all the subport uregbase, rcvhdrq, and
1148 * rcvegrbufs mmapped. 1165 * rcvegrbufs mmapped - as an array for all the processes,
1166 * and also separately for this process.
1149 */ 1167 */
1150 if (subport == 0) { 1168 if (pgaddr == cvt_kvaddr(pd->subport_uregbase)) {
1151 unsigned num_slaves = pd->port_subport_cnt - 1; 1169 addr = pd->subport_uregbase;
1152 1170 size = PAGE_SIZE * pd->port_subport_cnt;
1153 if (pgaddr == ((u64) pd->subport_uregbase & MMAP64_MASK)) { 1171 } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base)) {
1154 addr = pd->subport_uregbase; 1172 addr = pd->subport_rcvhdr_base;
1155 size = PAGE_SIZE * num_slaves; 1173 size = pd->port_rcvhdrq_size * pd->port_subport_cnt;
1156 } else if (pgaddr == ((u64) pd->subport_rcvhdr_base & 1174 } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf)) {
1157 MMAP64_MASK)) { 1175 addr = pd->subport_rcvegrbuf;
1158 addr = pd->subport_rcvhdr_base; 1176 size *= pd->port_subport_cnt;
1159 size = pd->port_rcvhdrq_size * num_slaves; 1177 } else if (pgaddr == cvt_kvaddr(pd->subport_uregbase +
1160 } else if (pgaddr == ((u64) pd->subport_rcvegrbuf & 1178 PAGE_SIZE * subport)) {
1161 MMAP64_MASK)) { 1179 addr = pd->subport_uregbase + PAGE_SIZE * subport;
1162 addr = pd->subport_rcvegrbuf; 1180 size = PAGE_SIZE;
1163 size *= num_slaves; 1181 } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base +
1164 } else { 1182 pd->port_rcvhdrq_size * subport)) {
1165 ret = -EINVAL; 1183 addr = pd->subport_rcvhdr_base +
1166 goto bail; 1184 pd->port_rcvhdrq_size * subport;
1167 } 1185 size = pd->port_rcvhdrq_size;
1168 } else if (pgaddr == (((u64) pd->subport_uregbase + 1186 } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf +
1169 PAGE_SIZE * (subport - 1)) & MMAP64_MASK)) { 1187 size * subport)) {
1170 addr = pd->subport_uregbase + PAGE_SIZE * (subport - 1); 1188 addr = pd->subport_rcvegrbuf + size * subport;
1171 size = PAGE_SIZE; 1189 /* rcvegrbufs are read-only on the slave */
1172 } else if (pgaddr == (((u64) pd->subport_rcvhdr_base + 1190 if (vma->vm_flags & VM_WRITE) {
1173 pd->port_rcvhdrq_size * (subport - 1)) & 1191 dev_info(&dd->pcidev->dev,
1174 MMAP64_MASK)) { 1192 "Can't map eager buffers as "
1175 addr = pd->subport_rcvhdr_base + 1193 "writable (flags=%lx)\n", vma->vm_flags);
1176 pd->port_rcvhdrq_size * (subport - 1); 1194 ret = -EPERM;
1177 size = pd->port_rcvhdrq_size; 1195 goto bail;
1178 } else if (pgaddr == (((u64) pd->subport_rcvegrbuf + 1196 }
1179 size * (subport - 1)) & MMAP64_MASK)) { 1197 /*
1180 addr = pd->subport_rcvegrbuf + size * (subport - 1); 1198 * Don't allow permission to later change to writeable
1181 /* rcvegrbufs are read-only on the slave */ 1199 * with mprotect.
1182 if (vma->vm_flags & VM_WRITE) { 1200 */
1183 dev_info(&dd->pcidev->dev, 1201 vma->vm_flags &= ~VM_MAYWRITE;
1184 "Can't map eager buffers as "
1185 "writable (flags=%lx)\n", vma->vm_flags);
1186 ret = -EPERM;
1187 goto bail;
1188 }
1189 /*
1190 * Don't allow permission to later change to writeable
1191 * with mprotect.
1192 */
1193 vma->vm_flags &= ~VM_MAYWRITE;
1194 } else { 1202 } else {
1195 ret = -EINVAL;
1196 goto bail; 1203 goto bail;
1197 } 1204 }
1198 len = vma->vm_end - vma->vm_start; 1205 len = vma->vm_end - vma->vm_start;
@@ -1205,7 +1212,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
1205 vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT; 1212 vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
1206 vma->vm_ops = &ipath_file_vm_ops; 1213 vma->vm_ops = &ipath_file_vm_ops;
1207 vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND; 1214 vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
1208 ret = 0; 1215 ret = 1;
1209 1216
1210bail: 1217bail:
1211 return ret; 1218 return ret;
@@ -1265,19 +1272,20 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
1265 * Check for kernel virtual addresses first, anything else must 1272 * Check for kernel virtual addresses first, anything else must
1266 * match a HW or memory address. 1273 * match a HW or memory address.
1267 */ 1274 */
1268 if (pgaddr >= (1ULL<<40)) { 1275 ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp));
1269 ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp)); 1276 if (ret) {
1277 if (ret > 0)
1278 ret = 0;
1270 goto bail; 1279 goto bail;
1271 } 1280 }
1272 1281
1282 ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
1273 if (!pd->port_subport_cnt) { 1283 if (!pd->port_subport_cnt) {
1274 /* port is not shared */ 1284 /* port is not shared */
1275 ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
1276 piocnt = dd->ipath_pbufsport; 1285 piocnt = dd->ipath_pbufsport;
1277 piobufs = pd->port_piobufs; 1286 piobufs = pd->port_piobufs;
1278 } else if (!subport_fp(fp)) { 1287 } else if (!subport_fp(fp)) {
1279 /* caller is the master */ 1288 /* caller is the master */
1280 ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
1281 piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) + 1289 piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) +
1282 (dd->ipath_pbufsport % pd->port_subport_cnt); 1290 (dd->ipath_pbufsport % pd->port_subport_cnt);
1283 piobufs = pd->port_piobufs + 1291 piobufs = pd->port_piobufs +
@@ -1286,7 +1294,6 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
1286 unsigned slave = subport_fp(fp) - 1; 1294 unsigned slave = subport_fp(fp) - 1;
1287 1295
1288 /* caller is a slave */ 1296 /* caller is a slave */
1289 ureg = 0;
1290 piocnt = dd->ipath_pbufsport / pd->port_subport_cnt; 1297 piocnt = dd->ipath_pbufsport / pd->port_subport_cnt;
1291 piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave; 1298 piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
1292 } 1299 }
@@ -1300,9 +1307,6 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
1300 ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, 1307 ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
1301 (void *) dd->ipath_pioavailregs_dma, 1308 (void *) dd->ipath_pioavailregs_dma,
1302 "pioavail registers"); 1309 "pioavail registers");
1303 else if (subport_fp(fp))
1304 /* Subports don't mmap the physical receive buffers */
1305 ret = -EINVAL;
1306 else if (pgaddr == pd->port_rcvegr_phys) 1310 else if (pgaddr == pd->port_rcvegr_phys)
1307 ret = mmap_rcvegrbufs(vma, pd); 1311 ret = mmap_rcvegrbufs(vma, pd);
1308 else if (pgaddr == (u64) pd->port_rcvhdrq_phys) 1312 else if (pgaddr == (u64) pd->port_rcvhdrq_phys)
@@ -1400,32 +1404,41 @@ static int init_subports(struct ipath_devdata *dd,
1400 const struct ipath_user_info *uinfo) 1404 const struct ipath_user_info *uinfo)
1401{ 1405{
1402 int ret = 0; 1406 int ret = 0;
1403 unsigned num_slaves; 1407 unsigned num_subports;
1404 size_t size; 1408 size_t size;
1405 1409
1406 /* Old user binaries don't know about subports */
1407 if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR)
1408 goto bail;
1409 /* 1410 /*
1410 * If the user is requesting zero or one port, 1411 * If the user is requesting zero or one port,
1411 * skip the subport allocation. 1412 * skip the subport allocation.
1412 */ 1413 */
1413 if (uinfo->spu_subport_cnt <= 1) 1414 if (uinfo->spu_subport_cnt <= 1)
1414 goto bail; 1415 goto bail;
1415 if (uinfo->spu_subport_cnt > 4) { 1416
1417 /* Old user binaries don't know about new subport implementation */
1418 if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) {
1419 dev_info(&dd->pcidev->dev,
1420 "Mismatched user minor version (%d) and driver "
1421 "minor version (%d) while port sharing. Ensure "
1422 "that driver and library are from the same "
1423 "release.\n",
1424 (int) (uinfo->spu_userversion & 0xffff),
1425 IPATH_USER_SWMINOR);
1426 goto bail;
1427 }
1428 if (uinfo->spu_subport_cnt > INFINIPATH_MAX_SUBPORT) {
1416 ret = -EINVAL; 1429 ret = -EINVAL;
1417 goto bail; 1430 goto bail;
1418 } 1431 }
1419 1432
1420 num_slaves = uinfo->spu_subport_cnt - 1; 1433 num_subports = uinfo->spu_subport_cnt;
1421 pd->subport_uregbase = vmalloc(PAGE_SIZE * num_slaves); 1434 pd->subport_uregbase = vmalloc(PAGE_SIZE * num_subports);
1422 if (!pd->subport_uregbase) { 1435 if (!pd->subport_uregbase) {
1423 ret = -ENOMEM; 1436 ret = -ENOMEM;
1424 goto bail; 1437 goto bail;
1425 } 1438 }
1426 /* Note: pd->port_rcvhdrq_size isn't initialized yet. */ 1439 /* Note: pd->port_rcvhdrq_size isn't initialized yet. */
1427 size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize * 1440 size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
1428 sizeof(u32), PAGE_SIZE) * num_slaves; 1441 sizeof(u32), PAGE_SIZE) * num_subports;
1429 pd->subport_rcvhdr_base = vmalloc(size); 1442 pd->subport_rcvhdr_base = vmalloc(size);
1430 if (!pd->subport_rcvhdr_base) { 1443 if (!pd->subport_rcvhdr_base) {
1431 ret = -ENOMEM; 1444 ret = -ENOMEM;
@@ -1434,7 +1447,7 @@ static int init_subports(struct ipath_devdata *dd,
1434 1447
1435 pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks * 1448 pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks *
1436 pd->port_rcvegrbuf_size * 1449 pd->port_rcvegrbuf_size *
1437 num_slaves); 1450 num_subports);
1438 if (!pd->subport_rcvegrbuf) { 1451 if (!pd->subport_rcvegrbuf) {
1439 ret = -ENOMEM; 1452 ret = -ENOMEM;
1440 goto bail_rhdr; 1453 goto bail_rhdr;
@@ -1443,6 +1456,12 @@ static int init_subports(struct ipath_devdata *dd,
1443 pd->port_subport_cnt = uinfo->spu_subport_cnt; 1456 pd->port_subport_cnt = uinfo->spu_subport_cnt;
1444 pd->port_subport_id = uinfo->spu_subport_id; 1457 pd->port_subport_id = uinfo->spu_subport_id;
1445 pd->active_slaves = 1; 1458 pd->active_slaves = 1;
1459 set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
1460 memset(pd->subport_uregbase, 0, PAGE_SIZE * num_subports);
1461 memset(pd->subport_rcvhdr_base, 0, size);
1462 memset(pd->subport_rcvegrbuf, 0, pd->port_rcvegrbuf_chunks *
1463 pd->port_rcvegrbuf_size *
1464 num_subports);
1446 goto bail; 1465 goto bail;
1447 1466
1448bail_rhdr: 1467bail_rhdr:
@@ -1573,18 +1592,19 @@ static int find_best_unit(struct file *fp,
1573 */ 1592 */
1574 if (!cpus_empty(current->cpus_allowed) && 1593 if (!cpus_empty(current->cpus_allowed) &&
1575 !cpus_full(current->cpus_allowed)) { 1594 !cpus_full(current->cpus_allowed)) {
1576 int ncpus = num_online_cpus(), curcpu = -1; 1595 int ncpus = num_online_cpus(), curcpu = -1, nset = 0;
1577 for (i = 0; i < ncpus; i++) 1596 for (i = 0; i < ncpus; i++)
1578 if (cpu_isset(i, current->cpus_allowed)) { 1597 if (cpu_isset(i, current->cpus_allowed)) {
1579 ipath_cdbg(PROC, "%s[%u] affinity set for " 1598 ipath_cdbg(PROC, "%s[%u] affinity set for "
1580 "cpu %d\n", current->comm, 1599 "cpu %d/%d\n", current->comm,
1581 current->pid, i); 1600 current->pid, i, ncpus);
1582 curcpu = i; 1601 curcpu = i;
1602 nset++;
1583 } 1603 }
1584 if (curcpu != -1) { 1604 if (curcpu != -1 && nset != ncpus) {
1585 if (npresent) { 1605 if (npresent) {
1586 prefunit = curcpu / (ncpus / npresent); 1606 prefunit = curcpu / (ncpus / npresent);
1587 ipath_dbg("%s[%u] %d chips, %d cpus, " 1607 ipath_cdbg(PROC,"%s[%u] %d chips, %d cpus, "
1588 "%d cpus/chip, select unit %d\n", 1608 "%d cpus/chip, select unit %d\n",
1589 current->comm, current->pid, 1609 current->comm, current->pid,
1590 npresent, ncpus, ncpus / npresent, 1610 npresent, ncpus, ncpus / npresent,
@@ -1764,11 +1784,17 @@ static int ipath_do_user_init(struct file *fp,
1764 const struct ipath_user_info *uinfo) 1784 const struct ipath_user_info *uinfo)
1765{ 1785{
1766 int ret; 1786 int ret;
1767 struct ipath_portdata *pd; 1787 struct ipath_portdata *pd = port_fp(fp);
1768 struct ipath_devdata *dd; 1788 struct ipath_devdata *dd;
1769 u32 head32; 1789 u32 head32;
1770 1790
1771 pd = port_fp(fp); 1791 /* Subports don't need to initialize anything since master did it. */
1792 if (subport_fp(fp)) {
1793 ret = wait_event_interruptible(pd->port_wait,
1794 !test_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag));
1795 goto done;
1796 }
1797
1772 dd = pd->port_dd; 1798 dd = pd->port_dd;
1773 1799
1774 if (uinfo->spu_rcvhdrsize) { 1800 if (uinfo->spu_rcvhdrsize) {
@@ -1826,6 +1852,11 @@ static int ipath_do_user_init(struct file *fp,
1826 dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD); 1852 dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD);
1827 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 1853 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
1828 dd->ipath_rcvctrl); 1854 dd->ipath_rcvctrl);
1855 /* Notify any waiting slaves */
1856 if (pd->port_subport_cnt) {
1857 clear_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
1858 wake_up(&pd->port_wait);
1859 }
1829done: 1860done:
1830 return ret; 1861 return ret;
1831} 1862}
@@ -2017,6 +2048,17 @@ static int ipath_get_slave_info(struct ipath_portdata *pd,
2017 return ret; 2048 return ret;
2018} 2049}
2019 2050
2051static int ipath_force_pio_avail_update(struct ipath_devdata *dd)
2052{
2053 u64 reg = dd->ipath_sendctrl;
2054
2055 clear_bit(IPATH_S_PIOBUFAVAILUPD, &reg);
2056 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, reg);
2057 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
2058
2059 return 0;
2060}
2061
2020static ssize_t ipath_write(struct file *fp, const char __user *data, 2062static ssize_t ipath_write(struct file *fp, const char __user *data,
2021 size_t count, loff_t *off) 2063 size_t count, loff_t *off)
2022{ 2064{
@@ -2071,27 +2113,35 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
2071 dest = &cmd.cmd.part_key; 2113 dest = &cmd.cmd.part_key;
2072 src = &ucmd->cmd.part_key; 2114 src = &ucmd->cmd.part_key;
2073 break; 2115 break;
2074 case IPATH_CMD_SLAVE_INFO: 2116 case __IPATH_CMD_SLAVE_INFO:
2075 copy = sizeof(cmd.cmd.slave_mask_addr); 2117 copy = sizeof(cmd.cmd.slave_mask_addr);
2076 dest = &cmd.cmd.slave_mask_addr; 2118 dest = &cmd.cmd.slave_mask_addr;
2077 src = &ucmd->cmd.slave_mask_addr; 2119 src = &ucmd->cmd.slave_mask_addr;
2078 break; 2120 break;
2121 case IPATH_CMD_PIOAVAILUPD: // force an update of PIOAvail reg
2122 copy = 0;
2123 src = NULL;
2124 dest = NULL;
2125 break;
2079 default: 2126 default:
2080 ret = -EINVAL; 2127 ret = -EINVAL;
2081 goto bail; 2128 goto bail;
2082 } 2129 }
2083 2130
2084 if ((count - consumed) < copy) { 2131 if (copy) {
2085 ret = -EINVAL; 2132 if ((count - consumed) < copy) {
2086 goto bail; 2133 ret = -EINVAL;
2087 } 2134 goto bail;
2135 }
2088 2136
2089 if (copy_from_user(dest, src, copy)) { 2137 if (copy_from_user(dest, src, copy)) {
2090 ret = -EFAULT; 2138 ret = -EFAULT;
2091 goto bail; 2139 goto bail;
2140 }
2141
2142 consumed += copy;
2092 } 2143 }
2093 2144
2094 consumed += copy;
2095 pd = port_fp(fp); 2145 pd = port_fp(fp);
2096 if (!pd && cmd.type != __IPATH_CMD_USER_INIT && 2146 if (!pd && cmd.type != __IPATH_CMD_USER_INIT &&
2097 cmd.type != IPATH_CMD_ASSIGN_PORT) { 2147 cmd.type != IPATH_CMD_ASSIGN_PORT) {
@@ -2137,11 +2187,14 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
2137 case IPATH_CMD_SET_PART_KEY: 2187 case IPATH_CMD_SET_PART_KEY:
2138 ret = ipath_set_part_key(pd, cmd.cmd.part_key); 2188 ret = ipath_set_part_key(pd, cmd.cmd.part_key);
2139 break; 2189 break;
2140 case IPATH_CMD_SLAVE_INFO: 2190 case __IPATH_CMD_SLAVE_INFO:
2141 ret = ipath_get_slave_info(pd, 2191 ret = ipath_get_slave_info(pd,
2142 (void __user *) (unsigned long) 2192 (void __user *) (unsigned long)
2143 cmd.cmd.slave_mask_addr); 2193 cmd.cmd.slave_mask_addr);
2144 break; 2194 break;
2195 case IPATH_CMD_PIOAVAILUPD:
2196 ret = ipath_force_pio_avail_update(pd->port_dd);
2197 break;
2145 } 2198 }
2146 2199
2147 if (ret >= 0) 2200 if (ret >= 0)
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 993482545021..4171198fc202 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -43,6 +43,9 @@
43#include "ipath_kernel.h" 43#include "ipath_kernel.h"
44#include "ipath_registers.h" 44#include "ipath_registers.h"
45 45
46static void ipath_setup_ht_setextled(struct ipath_devdata *, u64, u64);
47
48
46/* 49/*
47 * This lists the InfiniPath registers, in the actual chip layout. 50 * This lists the InfiniPath registers, in the actual chip layout.
48 * This structure should never be directly accessed. 51 * This structure should never be directly accessed.
@@ -208,8 +211,8 @@ static const struct ipath_kregs ipath_ht_kregs = {
208 .kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus), 211 .kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus),
209 .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig), 212 .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig),
210 /* 213 /*
211 * These should not be used directly via ipath_read_kreg64(), 214 * These should not be used directly via ipath_write_kreg64(),
212 * use them with ipath_read_kreg64_port(), 215 * use them with ipath_write_kreg64_port(),
213 */ 216 */
214 .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0), 217 .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
215 .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0) 218 .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0)
@@ -284,6 +287,14 @@ static const struct ipath_cregs ipath_ht_cregs = {
284#define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000 287#define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000
285#define INFINIPATH_EXTS_MEMBIST_CORRECT 0x0000000000008000 288#define INFINIPATH_EXTS_MEMBIST_CORRECT 0x0000000000008000
286 289
290
291/* TID entries (memory), HT-only */
292#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */
293#define INFINIPATH_RT_VALID 0x8000000000000000ULL
294#define INFINIPATH_RT_ADDR_SHIFT 0
295#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFFULL
296#define INFINIPATH_RT_BUFSIZE_SHIFT 48
297
287/* 298/*
288 * masks and bits that are different in different chips, or present only 299 * masks and bits that are different in different chips, or present only
289 * in one 300 * in one
@@ -402,6 +413,14 @@ static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = {
402 INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), 413 INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
403}; 414};
404 415
416#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
417 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
418 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
419#define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \
420 << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)
421
422static int ipath_ht_txe_recover(struct ipath_devdata *);
423
405/** 424/**
406 * ipath_ht_handle_hwerrors - display hardware errors. 425 * ipath_ht_handle_hwerrors - display hardware errors.
407 * @dd: the infinipath device 426 * @dd: the infinipath device
@@ -450,13 +469,12 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
450 469
451 /* 470 /*
452 * make sure we get this much out, unless told to be quiet, 471 * make sure we get this much out, unless told to be quiet,
472 * it's a parity error we may recover from,
453 * or it's occurred within the last 5 seconds 473 * or it's occurred within the last 5 seconds
454 */ 474 */
455 if ((hwerrs & ~(dd->ipath_lasthwerror | 475 if ((hwerrs & ~(dd->ipath_lasthwerror | TXE_PIO_PARITY |
456 ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | 476 RXE_EAGER_PARITY)) ||
457 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) 477 (ipath_debug & __IPATH_VERBDBG))
458 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
459 (ipath_debug & __IPATH_VERBDBG))
460 dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx " 478 dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
461 "(cleared)\n", (unsigned long long) hwerrs); 479 "(cleared)\n", (unsigned long long) hwerrs);
462 dd->ipath_lasthwerror |= hwerrs; 480 dd->ipath_lasthwerror |= hwerrs;
@@ -467,7 +485,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
467 (hwerrs & ~dd->ipath_hwe_bitsextant)); 485 (hwerrs & ~dd->ipath_hwe_bitsextant));
468 486
469 ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); 487 ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
470 if (ctrl & INFINIPATH_C_FREEZEMODE) { 488 if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
471 /* 489 /*
472 * parity errors in send memory are recoverable, 490 * parity errors in send memory are recoverable,
473 * just cancel the send (if indicated in * sendbuffererror), 491 * just cancel the send (if indicated in * sendbuffererror),
@@ -476,50 +494,14 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
476 * occur if a processor speculative read is done to the PIO 494 * occur if a processor speculative read is done to the PIO
477 * buffer while we are sending a packet, for example. 495 * buffer while we are sending a packet, for example.
478 */ 496 */
479 if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | 497 if ((hwerrs & TXE_PIO_PARITY) && ipath_ht_txe_recover(dd))
480 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) 498 hwerrs &= ~TXE_PIO_PARITY;
481 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) { 499 if (hwerrs & RXE_EAGER_PARITY)
482 ipath_stats.sps_txeparity++; 500 ipath_dev_err(dd, "RXE parity, Eager TID error is not "
483 ipath_dbg("Recovering from TXE parity error (%llu), " 501 "recoverable\n");
484 "hwerrstatus=%llx\n", 502 if (!hwerrs) {
485 (unsigned long long) ipath_stats.sps_txeparity, 503 ipath_dbg("Clearing freezemode on ignored or "
486 (unsigned long long) hwerrs); 504 "recovered hardware error\n");
487 ipath_disarm_senderrbufs(dd);
488 hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
489 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
490 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
491 if (!hwerrs) { /* else leave in freeze mode */
492 ipath_write_kreg(dd,
493 dd->ipath_kregs->kr_control,
494 dd->ipath_control);
495 return;
496 }
497 }
498 if (hwerrs) {
499 /*
500 * if any set that we aren't ignoring; only
501 * make the complaint once, in case it's stuck
502 * or recurring, and we get here multiple
503 * times.
504 */
505 if (dd->ipath_flags & IPATH_INITTED) {
506 ipath_dev_err(dd, "Fatal Hardware Error (freeze "
507 "mode), no longer usable, SN %.16s\n",
508 dd->ipath_serial);
509 isfatal = 1;
510 }
511 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
512 /* mark as having had error */
513 *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
514 /*
515 * mark as not usable, at a minimum until driver
516 * is reloaded, probably until reboot, since no
517 * other reset is possible.
518 */
519 dd->ipath_flags &= ~IPATH_INITTED;
520 } else {
521 ipath_dbg("Clearing freezemode on ignored hardware "
522 "error\n");
523 ctrl &= ~INFINIPATH_C_FREEZEMODE; 505 ctrl &= ~INFINIPATH_C_FREEZEMODE;
524 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 506 ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
525 ctrl); 507 ctrl);
@@ -587,7 +569,39 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
587 dd->ipath_hwerrmask); 569 dd->ipath_hwerrmask);
588 } 570 }
589 571
590 ipath_dev_err(dd, "%s hardware error\n", msg); 572 if (hwerrs) {
573 /*
574 * if any set that we aren't ignoring; only
575 * make the complaint once, in case it's stuck
576 * or recurring, and we get here multiple
577 * times.
578 * force link down, so switch knows, and
579 * LEDs are turned off
580 */
581 if (dd->ipath_flags & IPATH_INITTED) {
582 ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
583 ipath_setup_ht_setextled(dd,
584 INFINIPATH_IBCS_L_STATE_DOWN,
585 INFINIPATH_IBCS_LT_STATE_DISABLED);
586 ipath_dev_err(dd, "Fatal Hardware Error (freeze "
587 "mode), no longer usable, SN %.16s\n",
588 dd->ipath_serial);
589 isfatal = 1;
590 }
591 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
592 /* mark as having had error */
593 *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
594 /*
595 * mark as not usable, at a minimum until driver
596 * is reloaded, probably until reboot, since no
597 * other reset is possible.
598 */
599 dd->ipath_flags &= ~IPATH_INITTED;
600 }
601 else
602 *msg = 0; /* recovered from all of them */
603 if (*msg)
604 ipath_dev_err(dd, "%s hardware error\n", msg);
591 if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) 605 if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
592 /* 606 /*
593 * for status file; if no trailing brace is copied, 607 * for status file; if no trailing brace is copied,
@@ -658,7 +672,8 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
658 if (n) 672 if (n)
659 snprintf(name, namelen, "%s", n); 673 snprintf(name, namelen, "%s", n);
660 674
661 if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 3)) { 675 if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 ||
676 dd->ipath_minrev > 3)) {
662 /* 677 /*
663 * This version of the driver only supports Rev 3.2 and 3.3 678 * This version of the driver only supports Rev 3.2 and 3.3
664 */ 679 */
@@ -1163,6 +1178,8 @@ static void ipath_ht_init_hwerrors(struct ipath_devdata *dd)
1163 1178
1164 if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST)) 1179 if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
1165 ipath_dev_err(dd, "MemBIST did not complete!\n"); 1180 ipath_dev_err(dd, "MemBIST did not complete!\n");
1181 if (extsval & INFINIPATH_EXTS_MEMBIST_CORRECT)
1182 ipath_dbg("MemBIST corrected\n");
1166 1183
1167 ipath_check_htlink(dd); 1184 ipath_check_htlink(dd);
1168 1185
@@ -1366,6 +1383,9 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd,
1366 u64 __iomem *tidptr, u32 type, 1383 u64 __iomem *tidptr, u32 type,
1367 unsigned long pa) 1384 unsigned long pa)
1368{ 1385{
1386 if (!dd->ipath_kregbase)
1387 return;
1388
1369 if (pa != dd->ipath_tidinvalid) { 1389 if (pa != dd->ipath_tidinvalid) {
1370 if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) { 1390 if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) {
1371 dev_info(&dd->pcidev->dev, 1391 dev_info(&dd->pcidev->dev,
@@ -1382,10 +1402,10 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd,
1382 pa |= lenvalid | INFINIPATH_RT_VALID; 1402 pa |= lenvalid | INFINIPATH_RT_VALID;
1383 } 1403 }
1384 } 1404 }
1385 if (dd->ipath_kregbase) 1405 writeq(pa, tidptr);
1386 writeq(pa, tidptr);
1387} 1406}
1388 1407
1408
1389/** 1409/**
1390 * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager 1410 * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager
1391 * @dd: the infinipath device 1411 * @dd: the infinipath device
@@ -1515,7 +1535,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
1515 INFINIPATH_S_ABORT); 1535 INFINIPATH_S_ABORT);
1516 1536
1517 ipath_get_eeprom_info(dd); 1537 ipath_get_eeprom_info(dd);
1518 if(dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' && 1538 if (dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
1519 dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') { 1539 dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') {
1520 /* 1540 /*
1521 * Later production QHT7040 has same changes as QHT7140, so 1541 * Later production QHT7040 has same changes as QHT7140, so
@@ -1528,6 +1548,24 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
1528 return 0; 1548 return 0;
1529} 1549}
1530 1550
1551
1552static int ipath_ht_txe_recover(struct ipath_devdata *dd)
1553{
1554 int cnt = ++ipath_stats.sps_txeparity;
1555 if (cnt >= IPATH_MAX_PARITY_ATTEMPTS) {
1556 if (cnt == IPATH_MAX_PARITY_ATTEMPTS)
1557 ipath_dev_err(dd,
1558 "Too many attempts to recover from "
1559 "TXE parity, giving up\n");
1560 return 0;
1561 }
1562 dev_info(&dd->pcidev->dev,
1563 "Recovering from TXE PIO parity error\n");
1564 ipath_disarm_senderrbufs(dd, 1);
1565 return 1;
1566}
1567
1568
1531/** 1569/**
1532 * ipath_init_ht_get_base_info - set chip-specific flags for user code 1570 * ipath_init_ht_get_base_info - set chip-specific flags for user code
1533 * @dd: the infinipath device 1571 * @dd: the infinipath device
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index 05918e1e7c36..1b9c30857754 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -43,6 +43,8 @@
43#include "ipath_kernel.h" 43#include "ipath_kernel.h"
44#include "ipath_registers.h" 44#include "ipath_registers.h"
45 45
46static void ipath_setup_pe_setextled(struct ipath_devdata *, u64, u64);
47
46/* 48/*
47 * This file contains all the chip-specific register information and 49 * This file contains all the chip-specific register information and
48 * access functions for the QLogic InfiniPath PCI-Express chip. 50 * access functions for the QLogic InfiniPath PCI-Express chip.
@@ -207,8 +209,8 @@ static const struct ipath_kregs ipath_pe_kregs = {
207 .kr_ibpllcfg = IPATH_KREG_OFFSET(IBPLLCfg), 209 .kr_ibpllcfg = IPATH_KREG_OFFSET(IBPLLCfg),
208 210
209 /* 211 /*
210 * These should not be used directly via ipath_read_kreg64(), 212 * These should not be used directly via ipath_write_kreg64(),
211 * use them with ipath_read_kreg64_port() 213 * use them with ipath_write_kreg64_port(),
212 */ 214 */
213 .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0), 215 .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
214 .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0), 216 .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0),
@@ -321,6 +323,12 @@ static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = {
321 INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), 323 INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
322}; 324};
323 325
326#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
327 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
328 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
329
330static int ipath_pe_txe_recover(struct ipath_devdata *);
331
324/** 332/**
325 * ipath_pe_handle_hwerrors - display hardware errors. 333 * ipath_pe_handle_hwerrors - display hardware errors.
326 * @dd: the infinipath device 334 * @dd: the infinipath device
@@ -394,32 +402,21 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
394 * occur if a processor speculative read is done to the PIO 402 * occur if a processor speculative read is done to the PIO
395 * buffer while we are sending a packet, for example. 403 * buffer while we are sending a packet, for example.
396 */ 404 */
397 if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | 405 if ((hwerrs & TXE_PIO_PARITY) && ipath_pe_txe_recover(dd))
398 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) 406 hwerrs &= ~TXE_PIO_PARITY;
399 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
400 ipath_stats.sps_txeparity++;
401 ipath_dbg("Recovering from TXE parity error (%llu), "
402 "hwerrstatus=%llx\n",
403 (unsigned long long) ipath_stats.sps_txeparity,
404 (unsigned long long) hwerrs);
405 ipath_disarm_senderrbufs(dd);
406 hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
407 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
408 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
409 if (!hwerrs) { /* else leave in freeze mode */
410 ipath_write_kreg(dd,
411 dd->ipath_kregs->kr_control,
412 dd->ipath_control);
413 return;
414 }
415 }
416 if (hwerrs) { 407 if (hwerrs) {
417 /* 408 /*
418 * if any set that we aren't ignoring only make the 409 * if any set that we aren't ignoring only make the
419 * complaint once, in case it's stuck or recurring, 410 * complaint once, in case it's stuck or recurring,
420 * and we get here multiple times 411 * and we get here multiple times
412 * Force link down, so switch knows, and
413 * LEDs are turned off
421 */ 414 */
422 if (dd->ipath_flags & IPATH_INITTED) { 415 if (dd->ipath_flags & IPATH_INITTED) {
416 ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
417 ipath_setup_pe_setextled(dd,
418 INFINIPATH_IBCS_L_STATE_DOWN,
419 INFINIPATH_IBCS_LT_STATE_DISABLED);
423 ipath_dev_err(dd, "Fatal Hardware Error (freeze " 420 ipath_dev_err(dd, "Fatal Hardware Error (freeze "
424 "mode), no longer usable, SN %.16s\n", 421 "mode), no longer usable, SN %.16s\n",
425 dd->ipath_serial); 422 dd->ipath_serial);
@@ -493,7 +490,8 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
493 dd->ipath_hwerrmask); 490 dd->ipath_hwerrmask);
494 } 491 }
495 492
496 ipath_dev_err(dd, "%s hardware error\n", msg); 493 if (*msg)
494 ipath_dev_err(dd, "%s hardware error\n", msg);
497 if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) { 495 if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) {
498 /* 496 /*
499 * for /sys status file ; if no trailing } is copied, we'll 497 * for /sys status file ; if no trailing } is copied, we'll
@@ -581,6 +579,8 @@ static void ipath_pe_init_hwerrors(struct ipath_devdata *dd)
581 579
582 if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST)) 580 if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
583 ipath_dev_err(dd, "MemBIST did not complete!\n"); 581 ipath_dev_err(dd, "MemBIST did not complete!\n");
582 if (extsval & INFINIPATH_EXTS_MEMBIST_FOUND)
583 ipath_dbg("MemBIST corrected\n");
584 584
585 val = ~0ULL; /* barring bugs, all hwerrors become interrupts, */ 585 val = ~0ULL; /* barring bugs, all hwerrors become interrupts, */
586 586
@@ -1330,6 +1330,35 @@ static void ipath_pe_free_irq(struct ipath_devdata *dd)
1330 dd->ipath_irq = 0; 1330 dd->ipath_irq = 0;
1331} 1331}
1332 1332
1333/*
1334 * On platforms using this chip, and not having ordered WC stores, we
1335 * can get TXE parity errors due to speculative reads to the PIO buffers,
1336 * and this, due to a chip bug can result in (many) false parity error
1337 * reports. So it's a debug print on those, and an info print on systems
1338 * where the speculative reads don't occur.
1339 * Because we can get lots of false errors, we have no upper limit
1340 * on recovery attempts on those platforms.
1341 */
1342static int ipath_pe_txe_recover(struct ipath_devdata *dd)
1343{
1344 if (ipath_unordered_wc())
1345 ipath_dbg("Recovering from TXE PIO parity error\n");
1346 else {
1347 int cnt = ++ipath_stats.sps_txeparity;
1348 if (cnt >= IPATH_MAX_PARITY_ATTEMPTS) {
1349 if (cnt == IPATH_MAX_PARITY_ATTEMPTS)
1350 ipath_dev_err(dd,
1351 "Too many attempts to recover from "
1352 "TXE parity, giving up\n");
1353 return 0;
1354 }
1355 dev_info(&dd->pcidev->dev,
1356 "Recovering from TXE PIO parity error\n");
1357 }
1358 ipath_disarm_senderrbufs(dd, 1);
1359 return 1;
1360}
1361
1333/** 1362/**
1334 * ipath_init_iba6120_funcs - set up the chip-specific function pointers 1363 * ipath_init_iba6120_funcs - set up the chip-specific function pointers
1335 * @dd: the infinipath device 1364 * @dd: the infinipath device
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index d4f6b5239ef8..7045ba689494 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -216,6 +216,20 @@ static int bringup_link(struct ipath_devdata *dd)
216 return ret; 216 return ret;
217} 217}
218 218
219static struct ipath_portdata *create_portdata0(struct ipath_devdata *dd)
220{
221 struct ipath_portdata *pd = NULL;
222
223 pd = kzalloc(sizeof(*pd), GFP_KERNEL);
224 if (pd) {
225 pd->port_dd = dd;
226 pd->port_cnt = 1;
227 /* The port 0 pkey table is used by the layer interface. */
228 pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY;
229 }
230 return pd;
231}
232
219static int init_chip_first(struct ipath_devdata *dd, 233static int init_chip_first(struct ipath_devdata *dd,
220 struct ipath_portdata **pdp) 234 struct ipath_portdata **pdp)
221{ 235{
@@ -271,20 +285,16 @@ static int init_chip_first(struct ipath_devdata *dd,
271 goto done; 285 goto done;
272 } 286 }
273 287
274 dd->ipath_pd[0] = kzalloc(sizeof(*pd), GFP_KERNEL); 288 pd = create_portdata0(dd);
275 289
276 if (!dd->ipath_pd[0]) { 290 if (!pd) {
277 ipath_dev_err(dd, "Unable to allocate portdata for port " 291 ipath_dev_err(dd, "Unable to allocate portdata for port "
278 "0, failing\n"); 292 "0, failing\n");
279 ret = -ENOMEM; 293 ret = -ENOMEM;
280 goto done; 294 goto done;
281 } 295 }
282 pd = dd->ipath_pd[0]; 296 dd->ipath_pd[0] = pd;
283 pd->port_dd = dd; 297
284 pd->port_port = 0;
285 pd->port_cnt = 1;
286 /* The port 0 pkey table is used by the layer interface. */
287 pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY;
288 dd->ipath_rcvtidcnt = 298 dd->ipath_rcvtidcnt =
289 ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt); 299 ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
290 dd->ipath_rcvtidbase = 300 dd->ipath_rcvtidbase =
@@ -590,6 +600,10 @@ static int init_housekeeping(struct ipath_devdata *dd,
590 goto done; 600 goto done;
591 } 601 }
592 602
603
604 /* clear diagctrl register, in case diags were running and crashed */
605 ipath_write_kreg (dd, dd->ipath_kregs->kr_hwdiagctrl, 0);
606
593 /* clear the initial reset flag, in case first driver load */ 607 /* clear the initial reset flag, in case first driver load */
594 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 608 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
595 INFINIPATH_E_RESET); 609 INFINIPATH_E_RESET);
@@ -668,6 +682,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
668{ 682{
669 int ret = 0, i; 683 int ret = 0, i;
670 u32 val32, kpiobufs; 684 u32 val32, kpiobufs;
685 u32 piobufs, uports;
671 u64 val; 686 u64 val;
672 struct ipath_portdata *pd = NULL; /* keep gcc4 happy */ 687 struct ipath_portdata *pd = NULL; /* keep gcc4 happy */
673 gfp_t gfp_flags = GFP_USER | __GFP_COMP; 688 gfp_t gfp_flags = GFP_USER | __GFP_COMP;
@@ -702,16 +717,17 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
702 * the in memory DMA'ed copies of the registers. This has to 717 * the in memory DMA'ed copies of the registers. This has to
703 * be done early, before we calculate lastport, etc. 718 * be done early, before we calculate lastport, etc.
704 */ 719 */
705 val = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; 720 piobufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
706 /* 721 /*
707 * calc number of pioavail registers, and save it; we have 2 722 * calc number of pioavail registers, and save it; we have 2
708 * bits per buffer. 723 * bits per buffer.
709 */ 724 */
710 dd->ipath_pioavregs = ALIGN(val, sizeof(u64) * BITS_PER_BYTE / 2) 725 dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2)
711 / (sizeof(u64) * BITS_PER_BYTE / 2); 726 / (sizeof(u64) * BITS_PER_BYTE / 2);
727 uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
712 if (ipath_kpiobufs == 0) { 728 if (ipath_kpiobufs == 0) {
713 /* not set by user (this is default) */ 729 /* not set by user (this is default) */
714 if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 128) 730 if (piobufs >= (uports * IPATH_MIN_USER_PORT_BUFCNT) + 32)
715 kpiobufs = 32; 731 kpiobufs = 32;
716 else 732 else
717 kpiobufs = 16; 733 kpiobufs = 16;
@@ -719,31 +735,25 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
719 else 735 else
720 kpiobufs = ipath_kpiobufs; 736 kpiobufs = ipath_kpiobufs;
721 737
722 if (kpiobufs > 738 if (kpiobufs + (uports * IPATH_MIN_USER_PORT_BUFCNT) > piobufs) {
723 (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - 739 i = (int) piobufs -
724 (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT))) { 740 (int) (uports * IPATH_MIN_USER_PORT_BUFCNT);
725 i = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k -
726 (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT);
727 if (i < 0) 741 if (i < 0)
728 i = 0; 742 i = 0;
729 dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs for " 743 dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of "
730 "kernel leaves too few for %d user ports " 744 "%d for kernel leaves too few for %d user ports "
731 "(%d each); using %u\n", kpiobufs, 745 "(%d each); using %u\n", kpiobufs,
732 dd->ipath_cfgports - 1, 746 piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i);
733 IPATH_MIN_USER_PORT_BUFCNT, i);
734 /* 747 /*
735 * shouldn't change ipath_kpiobufs, because could be 748 * shouldn't change ipath_kpiobufs, because could be
736 * different for different devices... 749 * different for different devices...
737 */ 750 */
738 kpiobufs = i; 751 kpiobufs = i;
739 } 752 }
740 dd->ipath_lastport_piobuf = 753 dd->ipath_lastport_piobuf = piobufs - kpiobufs;
741 dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - kpiobufs; 754 dd->ipath_pbufsport =
742 dd->ipath_pbufsport = dd->ipath_cfgports > 1 755 uports ? dd->ipath_lastport_piobuf / uports : 0;
743 ? dd->ipath_lastport_piobuf / (dd->ipath_cfgports - 1) 756 val32 = dd->ipath_lastport_piobuf - (dd->ipath_pbufsport * uports);
744 : 0;
745 val32 = dd->ipath_lastport_piobuf -
746 (dd->ipath_pbufsport * (dd->ipath_cfgports - 1));
747 if (val32 > 0) { 757 if (val32 > 0) {
748 ipath_dbg("allocating %u pbufs/port leaves %u unused, " 758 ipath_dbg("allocating %u pbufs/port leaves %u unused, "
749 "add to kernel\n", dd->ipath_pbufsport, val32); 759 "add to kernel\n", dd->ipath_pbufsport, val32);
@@ -754,8 +764,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
754 dd->ipath_lastpioindex = dd->ipath_lastport_piobuf; 764 dd->ipath_lastpioindex = dd->ipath_lastport_piobuf;
755 ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u " 765 ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u "
756 "each for %u user ports\n", kpiobufs, 766 "each for %u user ports\n", kpiobufs,
757 dd->ipath_piobcnt2k + dd->ipath_piobcnt4k, 767 piobufs, dd->ipath_pbufsport, uports);
758 dd->ipath_pbufsport, dd->ipath_cfgports - 1);
759 768
760 dd->ipath_f_early_init(dd); 769 dd->ipath_f_early_init(dd);
761 770
@@ -839,11 +848,24 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
839 * Set up the port 0 (kernel) rcvhdr q and egr TIDs. If doing 848 * Set up the port 0 (kernel) rcvhdr q and egr TIDs. If doing
840 * re-init, the simplest way to handle this is to free 849 * re-init, the simplest way to handle this is to free
841 * existing, and re-allocate. 850 * existing, and re-allocate.
851 * Need to re-create rest of port 0 portdata as well.
842 */ 852 */
843 if (reinit) { 853 if (reinit) {
844 struct ipath_portdata *pd = dd->ipath_pd[0]; 854 /* Alloc and init new ipath_portdata for port0,
845 dd->ipath_pd[0] = NULL; 855 * Then free old pd. Could lead to fragmentation, but also
846 ipath_free_pddata(dd, pd); 856 * makes later support for hot-swap easier.
857 */
858 struct ipath_portdata *npd;
859 npd = create_portdata0(dd);
860 if (npd) {
861 ipath_free_pddata(dd, pd);
862 dd->ipath_pd[0] = pd = npd;
863 } else {
864 ipath_dev_err(dd, "Unable to allocate portdata for"
865 " port 0, failing\n");
866 ret = -ENOMEM;
867 goto done;
868 }
847 } 869 }
848 dd->ipath_f_tidtemplate(dd); 870 dd->ipath_f_tidtemplate(dd);
849 ret = ipath_create_rcvhdrq(dd, pd); 871 ret = ipath_create_rcvhdrq(dd, pd);
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 72b9e279d19d..45d033169c6e 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -38,10 +38,39 @@
38#include "ipath_common.h" 38#include "ipath_common.h"
39 39
40/* 40/*
41 * clear (write) a pio buffer, to clear a parity error. This routine
42 * should only be called when in freeze mode, and the buffer should be
43 * canceled afterwards.
44 */
45static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum)
46{
47 u32 __iomem *pbuf;
48 u32 dwcnt; /* dword count to write */
49 if (pnum < dd->ipath_piobcnt2k) {
50 pbuf = (u32 __iomem *) (dd->ipath_pio2kbase + pnum *
51 dd->ipath_palign);
52 dwcnt = dd->ipath_piosize2k >> 2;
53 }
54 else {
55 pbuf = (u32 __iomem *) (dd->ipath_pio4kbase +
56 (pnum - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
57 dwcnt = dd->ipath_piosize4k >> 2;
58 }
59 dev_info(&dd->pcidev->dev,
60 "Rewrite PIO buffer %u, to recover from parity error\n",
61 pnum);
62 *pbuf = dwcnt+1; /* no flush required, since already in freeze */
63 while(--dwcnt)
64 *pbuf++ = 0;
65}
66
67/*
41 * Called when we might have an error that is specific to a particular 68 * Called when we might have an error that is specific to a particular
42 * PIO buffer, and may need to cancel that buffer, so it can be re-used. 69 * PIO buffer, and may need to cancel that buffer, so it can be re-used.
70 * If rewrite is true, and bits are set in the sendbufferror registers,
71 * we'll write to the buffer, for error recovery on parity errors.
43 */ 72 */
44void ipath_disarm_senderrbufs(struct ipath_devdata *dd) 73void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
45{ 74{
46 u32 piobcnt; 75 u32 piobcnt;
47 unsigned long sbuf[4]; 76 unsigned long sbuf[4];
@@ -74,8 +103,11 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
74 } 103 }
75 104
76 for (i = 0; i < piobcnt; i++) 105 for (i = 0; i < piobcnt; i++)
77 if (test_bit(i, sbuf)) 106 if (test_bit(i, sbuf)) {
107 if (rewrite)
108 ipath_clrpiobuf(dd, i);
78 ipath_disarm_piobufs(dd, i, 1); 109 ipath_disarm_piobufs(dd, i, 1);
110 }
79 dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */ 111 dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */
80 } 112 }
81} 113}
@@ -114,7 +146,7 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
114{ 146{
115 u64 ignore_this_time = 0; 147 u64 ignore_this_time = 0;
116 148
117 ipath_disarm_senderrbufs(dd); 149 ipath_disarm_senderrbufs(dd, 0);
118 if ((errs & E_SUM_LINK_PKTERRS) && 150 if ((errs & E_SUM_LINK_PKTERRS) &&
119 !(dd->ipath_flags & IPATH_LINKACTIVE)) { 151 !(dd->ipath_flags & IPATH_LINKACTIVE)) {
120 /* 152 /*
@@ -403,10 +435,13 @@ static void handle_supp_msgs(struct ipath_devdata *dd,
403 * happens so often we never want to count it. 435 * happens so often we never want to count it.
404 */ 436 */
405 if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) { 437 if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) {
406 ipath_decode_err(msg, sizeof msg, dd->ipath_lasterror & 438 int iserr;
407 ~INFINIPATH_E_IBSTATUSCHANGED); 439 iserr = ipath_decode_err(msg, sizeof msg,
440 dd->ipath_lasterror &
441 ~INFINIPATH_E_IBSTATUSCHANGED);
408 if (dd->ipath_lasterror & 442 if (dd->ipath_lasterror &
409 ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL)) 443 ~(INFINIPATH_E_RRCVEGRFULL |
444 INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
410 ipath_dev_err(dd, "Suppressed %u messages for " 445 ipath_dev_err(dd, "Suppressed %u messages for "
411 "fast-repeating errors (%s) (%llx)\n", 446 "fast-repeating errors (%s) (%llx)\n",
412 supp_msgs, msg, 447 supp_msgs, msg,
@@ -420,8 +455,13 @@ static void handle_supp_msgs(struct ipath_devdata *dd,
420 * them. So only complain about these at debug 455 * them. So only complain about these at debug
421 * level. 456 * level.
422 */ 457 */
423 ipath_dbg("Suppressed %u messages for %s\n", 458 if (iserr)
424 supp_msgs, msg); 459 ipath_dbg("Suppressed %u messages for %s\n",
460 supp_msgs, msg);
461 else
462 ipath_cdbg(ERRPKT,
463 "Suppressed %u messages for %s\n",
464 supp_msgs, msg);
425 } 465 }
426 } 466 }
427} 467}
@@ -462,7 +502,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
462{ 502{
463 char msg[512]; 503 char msg[512];
464 u64 ignore_this_time = 0; 504 u64 ignore_this_time = 0;
465 int i; 505 int i, iserr = 0;
466 int chkerrpkts = 0, noprint = 0; 506 int chkerrpkts = 0, noprint = 0;
467 unsigned supp_msgs; 507 unsigned supp_msgs;
468 508
@@ -502,6 +542,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
502 } 542 }
503 543
504 if (supp_msgs == 250000) { 544 if (supp_msgs == 250000) {
545 int s_iserr;
505 /* 546 /*
506 * It's not entirely reasonable assuming that the errors set 547 * It's not entirely reasonable assuming that the errors set
507 * in the last clear period are all responsible for the 548 * in the last clear period are all responsible for the
@@ -511,17 +552,17 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
511 dd->ipath_maskederrs |= dd->ipath_lasterror | errs; 552 dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
512 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 553 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
513 ~dd->ipath_maskederrs); 554 ~dd->ipath_maskederrs);
514 ipath_decode_err(msg, sizeof msg, 555 s_iserr = ipath_decode_err(msg, sizeof msg,
515 (dd->ipath_maskederrs & ~dd-> 556 (dd->ipath_maskederrs & ~dd->
516 ipath_ignorederrs)); 557 ipath_ignorederrs));
517 558
518 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) & 559 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
519 ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL)) 560 ~(INFINIPATH_E_RRCVEGRFULL |
520 ipath_dev_err(dd, "Disabling error(s) %llx because " 561 INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
521 "occurring too frequently (%s)\n", 562 ipath_dev_err(dd, "Temporarily disabling "
522 (unsigned long long) 563 "error(s) %llx reporting; too frequent (%s)\n",
523 (dd->ipath_maskederrs & 564 (unsigned long long) (dd->ipath_maskederrs &
524 ~dd->ipath_ignorederrs), msg); 565 ~dd->ipath_ignorederrs), msg);
525 else { 566 else {
526 /* 567 /*
527 * rcvegrfull and rcvhdrqfull are "normal", 568 * rcvegrfull and rcvhdrqfull are "normal",
@@ -530,8 +571,15 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
530 * processing them. So only complain about 571 * processing them. So only complain about
531 * these at debug level. 572 * these at debug level.
532 */ 573 */
533 ipath_dbg("Disabling frequent queue full errors " 574 if (s_iserr)
534 "(%s)\n", msg); 575 ipath_dbg("Temporarily disabling reporting "
576 "too frequent queue full errors (%s)\n",
577 msg);
578 else
579 ipath_cdbg(ERRPKT,
580 "Temporarily disabling reporting too"
581 " frequent packet errors (%s)\n",
582 msg);
535 } 583 }
536 584
537 /* 585 /*
@@ -589,6 +637,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
589 ipath_stats.sps_crcerrs++; 637 ipath_stats.sps_crcerrs++;
590 chkerrpkts = 1; 638 chkerrpkts = 1;
591 } 639 }
640 iserr = errs & ~(E_SUM_PKTERRS | INFINIPATH_E_PKTERRS);
641
592 642
593 /* 643 /*
594 * We don't want to print these two as they happen, or we can make 644 * We don't want to print these two as they happen, or we can make
@@ -677,8 +727,13 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
677 *dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF; 727 *dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF;
678 } 728 }
679 729
680 if (!noprint && *msg) 730 if (!noprint && *msg) {
681 ipath_dev_err(dd, "%s error\n", msg); 731 if (iserr)
732 ipath_dev_err(dd, "%s error\n", msg);
733 else
734 dev_info(&dd->pcidev->dev, "%s packet problems\n",
735 msg);
736 }
682 if (dd->ipath_state_wanted & dd->ipath_flags) { 737 if (dd->ipath_state_wanted & dd->ipath_flags) {
683 ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, " 738 ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
684 "waking\n", dd->ipath_state_wanted, 739 "waking\n", dd->ipath_state_wanted,
@@ -819,11 +874,10 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat)
819 struct ipath_portdata *pd = dd->ipath_pd[i]; 874 struct ipath_portdata *pd = dd->ipath_pd[i];
820 if (portr & (1 << i) && pd && pd->port_cnt && 875 if (portr & (1 << i) && pd && pd->port_cnt &&
821 test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) { 876 test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) {
822 int rcbit;
823 clear_bit(IPATH_PORT_WAITING_RCV, 877 clear_bit(IPATH_PORT_WAITING_RCV,
824 &pd->port_flag); 878 &pd->port_flag);
825 rcbit = i + INFINIPATH_R_INTRAVAIL_SHIFT; 879 clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT,
826 clear_bit(1UL << rcbit, &dd->ipath_rcvctrl); 880 &dd->ipath_rcvctrl);
827 wake_up_interruptible(&pd->port_wait); 881 wake_up_interruptible(&pd->port_wait);
828 rcvdint = 1; 882 rcvdint = 1;
829 } 883 }
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 6d8d05fb5999..e900c2593f44 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -590,7 +590,6 @@ int ipath_enable_wc(struct ipath_devdata *dd);
590void ipath_disable_wc(struct ipath_devdata *dd); 590void ipath_disable_wc(struct ipath_devdata *dd);
591int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp); 591int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp);
592void ipath_shutdown_device(struct ipath_devdata *); 592void ipath_shutdown_device(struct ipath_devdata *);
593void ipath_disarm_senderrbufs(struct ipath_devdata *);
594 593
595struct file_operations; 594struct file_operations;
596int ipath_cdev_init(int minor, char *name, const struct file_operations *fops, 595int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
@@ -611,7 +610,7 @@ struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t);
611extern int ipath_diag_inuse; 610extern int ipath_diag_inuse;
612 611
613irqreturn_t ipath_intr(int irq, void *devid); 612irqreturn_t ipath_intr(int irq, void *devid);
614void ipath_decode_err(char *buf, size_t blen, ipath_err_t err); 613int ipath_decode_err(char *buf, size_t blen, ipath_err_t err);
615#if __IPATH_INFO || __IPATH_DBG 614#if __IPATH_INFO || __IPATH_DBG
616extern const char *ipath_ibcstatus_str[]; 615extern const char *ipath_ibcstatus_str[];
617#endif 616#endif
@@ -701,6 +700,8 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
701#define IPATH_PORT_WAITING_RCV 2 700#define IPATH_PORT_WAITING_RCV 2
702 /* waiting for a PIO buffer to be available */ 701 /* waiting for a PIO buffer to be available */
703#define IPATH_PORT_WAITING_PIO 3 702#define IPATH_PORT_WAITING_PIO 3
703 /* master has not finished initializing */
704#define IPATH_PORT_MASTER_UNINIT 4
704 705
705/* free up any allocated data at closes */ 706/* free up any allocated data at closes */
706void ipath_free_data(struct ipath_portdata *dd); 707void ipath_free_data(struct ipath_portdata *dd);
@@ -711,6 +712,7 @@ void ipath_init_iba6120_funcs(struct ipath_devdata *);
711void ipath_init_iba6110_funcs(struct ipath_devdata *); 712void ipath_init_iba6110_funcs(struct ipath_devdata *);
712void ipath_get_eeprom_info(struct ipath_devdata *); 713void ipath_get_eeprom_info(struct ipath_devdata *);
713u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); 714u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
715void ipath_disarm_senderrbufs(struct ipath_devdata *, int);
714 716
715/* 717/*
716 * number of words used for protocol header if not set by ipath_userinit(); 718 * number of words used for protocol header if not set by ipath_userinit();
@@ -754,8 +756,6 @@ int ipath_eeprom_write(struct ipath_devdata *, u8, const void *, int);
754/* these are used for the registers that vary with port */ 756/* these are used for the registers that vary with port */
755void ipath_write_kreg_port(const struct ipath_devdata *, ipath_kreg, 757void ipath_write_kreg_port(const struct ipath_devdata *, ipath_kreg,
756 unsigned, u64); 758 unsigned, u64);
757u64 ipath_read_kreg64_port(const struct ipath_devdata *, ipath_kreg,
758 unsigned);
759 759
760/* 760/*
761 * We could have a single register get/put routine, that takes a group type, 761 * We could have a single register get/put routine, that takes a group type,
@@ -897,6 +897,8 @@ dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int);
897 897
898extern unsigned ipath_debug; /* debugging bit mask */ 898extern unsigned ipath_debug; /* debugging bit mask */
899 899
900#define IPATH_MAX_PARITY_ATTEMPTS 10000 /* max times to try recovery */
901
900const char *ipath_get_unit_name(int unit); 902const char *ipath_get_unit_name(int unit);
901 903
902extern struct mutex ipath_mutex; 904extern struct mutex ipath_mutex;
diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c
index 851763d7d2db..dd487c100f5b 100644
--- a/drivers/infiniband/hw/ipath/ipath_keys.c
+++ b/drivers/infiniband/hw/ipath/ipath_keys.c
@@ -61,7 +61,7 @@ int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr)
61 r = (r + 1) & (rkt->max - 1); 61 r = (r + 1) & (rkt->max - 1);
62 if (r == n) { 62 if (r == n) {
63 spin_unlock_irqrestore(&rkt->lock, flags); 63 spin_unlock_irqrestore(&rkt->lock, flags);
64 ipath_dbg(KERN_INFO "LKEY table full\n"); 64 ipath_dbg("LKEY table full\n");
65 ret = 0; 65 ret = 0;
66 goto bail; 66 goto bail;
67 } 67 }
@@ -133,6 +133,12 @@ int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
133 * being reversible by calling bus_to_virt(). 133 * being reversible by calling bus_to_virt().
134 */ 134 */
135 if (sge->lkey == 0) { 135 if (sge->lkey == 0) {
136 struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
137
138 if (pd->user) {
139 ret = 0;
140 goto bail;
141 }
136 isge->mr = NULL; 142 isge->mr = NULL;
137 isge->vaddr = (void *) sge->addr; 143 isge->vaddr = (void *) sge->addr;
138 isge->length = sge->length; 144 isge->length = sge->length;
@@ -206,6 +212,12 @@ int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
206 * (see ipath_get_dma_mr and ipath_dma.c). 212 * (see ipath_get_dma_mr and ipath_dma.c).
207 */ 213 */
208 if (rkey == 0) { 214 if (rkey == 0) {
215 struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
216
217 if (pd->user) {
218 ret = 0;
219 goto bail;
220 }
209 sge->mr = NULL; 221 sge->mr = NULL;
210 sge->vaddr = (void *) vaddr; 222 sge->vaddr = (void *) vaddr;
211 sge->length = len; 223 sge->length = len;
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index 8cc8598d6c69..31e70732e369 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -210,9 +210,15 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
210 m = 0; 210 m = 0;
211 n = 0; 211 n = 0;
212 list_for_each_entry(chunk, &region->chunk_list, list) { 212 list_for_each_entry(chunk, &region->chunk_list, list) {
213 for (i = 0; i < chunk->nmap; i++) { 213 for (i = 0; i < chunk->nents; i++) {
214 mr->mr.map[m]->segs[n].vaddr = 214 void *vaddr;
215 page_address(chunk->page_list[i].page); 215
216 vaddr = page_address(chunk->page_list[i].page);
217 if (!vaddr) {
218 ret = ERR_PTR(-EINVAL);
219 goto bail;
220 }
221 mr->mr.map[m]->segs[n].vaddr = vaddr;
216 mr->mr.map[m]->segs[n].length = region->page_size; 222 mr->mr.map[m]->segs[n].length = region->page_size;
217 n++; 223 n++;
218 if (n == IPATH_SEGSZ) { 224 if (n == IPATH_SEGSZ) {
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 64f07b19349f..16db9ac0b402 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -81,11 +81,51 @@ static u32 credit_table[31] = {
81 32768 /* 1E */ 81 32768 /* 1E */
82}; 82};
83 83
84static u32 alloc_qpn(struct ipath_qp_table *qpt) 84
85static void get_map_page(struct ipath_qp_table *qpt, struct qpn_map *map)
86{
87 unsigned long page = get_zeroed_page(GFP_KERNEL);
88 unsigned long flags;
89
90 /*
91 * Free the page if someone raced with us installing it.
92 */
93
94 spin_lock_irqsave(&qpt->lock, flags);
95 if (map->page)
96 free_page(page);
97 else
98 map->page = (void *)page;
99 spin_unlock_irqrestore(&qpt->lock, flags);
100}
101
102
103static int alloc_qpn(struct ipath_qp_table *qpt, enum ib_qp_type type)
85{ 104{
86 u32 i, offset, max_scan, qpn; 105 u32 i, offset, max_scan, qpn;
87 struct qpn_map *map; 106 struct qpn_map *map;
88 u32 ret; 107 u32 ret = -1;
108
109 if (type == IB_QPT_SMI)
110 ret = 0;
111 else if (type == IB_QPT_GSI)
112 ret = 1;
113
114 if (ret != -1) {
115 map = &qpt->map[0];
116 if (unlikely(!map->page)) {
117 get_map_page(qpt, map);
118 if (unlikely(!map->page)) {
119 ret = -ENOMEM;
120 goto bail;
121 }
122 }
123 if (!test_and_set_bit(ret, map->page))
124 atomic_dec(&map->n_free);
125 else
126 ret = -EBUSY;
127 goto bail;
128 }
89 129
90 qpn = qpt->last + 1; 130 qpn = qpt->last + 1;
91 if (qpn >= QPN_MAX) 131 if (qpn >= QPN_MAX)
@@ -95,19 +135,7 @@ static u32 alloc_qpn(struct ipath_qp_table *qpt)
95 max_scan = qpt->nmaps - !offset; 135 max_scan = qpt->nmaps - !offset;
96 for (i = 0;;) { 136 for (i = 0;;) {
97 if (unlikely(!map->page)) { 137 if (unlikely(!map->page)) {
98 unsigned long page = get_zeroed_page(GFP_KERNEL); 138 get_map_page(qpt, map);
99 unsigned long flags;
100
101 /*
102 * Free the page if someone raced with us
103 * installing it:
104 */
105 spin_lock_irqsave(&qpt->lock, flags);
106 if (map->page)
107 free_page(page);
108 else
109 map->page = (void *)page;
110 spin_unlock_irqrestore(&qpt->lock, flags);
111 if (unlikely(!map->page)) 139 if (unlikely(!map->page))
112 break; 140 break;
113 } 141 }
@@ -151,7 +179,7 @@ static u32 alloc_qpn(struct ipath_qp_table *qpt)
151 qpn = mk_qpn(qpt, map, offset); 179 qpn = mk_qpn(qpt, map, offset);
152 } 180 }
153 181
154 ret = 0; 182 ret = -ENOMEM;
155 183
156bail: 184bail:
157 return ret; 185 return ret;
@@ -180,29 +208,19 @@ static int ipath_alloc_qpn(struct ipath_qp_table *qpt, struct ipath_qp *qp,
180 enum ib_qp_type type) 208 enum ib_qp_type type)
181{ 209{
182 unsigned long flags; 210 unsigned long flags;
183 u32 qpn;
184 int ret; 211 int ret;
185 212
186 if (type == IB_QPT_SMI) 213 ret = alloc_qpn(qpt, type);
187 qpn = 0; 214 if (ret < 0)
188 else if (type == IB_QPT_GSI) 215 goto bail;
189 qpn = 1; 216 qp->ibqp.qp_num = ret;
190 else {
191 /* Allocate the next available QPN */
192 qpn = alloc_qpn(qpt);
193 if (qpn == 0) {
194 ret = -ENOMEM;
195 goto bail;
196 }
197 }
198 qp->ibqp.qp_num = qpn;
199 217
200 /* Add the QP to the hash table. */ 218 /* Add the QP to the hash table. */
201 spin_lock_irqsave(&qpt->lock, flags); 219 spin_lock_irqsave(&qpt->lock, flags);
202 220
203 qpn %= qpt->max; 221 ret %= qpt->max;
204 qp->next = qpt->table[qpn]; 222 qp->next = qpt->table[ret];
205 qpt->table[qpn] = qp; 223 qpt->table[ret] = qp;
206 atomic_inc(&qp->refcount); 224 atomic_inc(&qp->refcount);
207 225
208 spin_unlock_irqrestore(&qpt->lock, flags); 226 spin_unlock_irqrestore(&qpt->lock, flags);
@@ -245,9 +263,7 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
245 if (!fnd) 263 if (!fnd)
246 return; 264 return;
247 265
248 /* If QPN is not reserved, mark QPN free in the bitmap. */ 266 free_qpn(qpt, qp->ibqp.qp_num);
249 if (qp->ibqp.qp_num > 1)
250 free_qpn(qpt, qp->ibqp.qp_num);
251 267
252 wait_event(qp->wait, !atomic_read(&qp->refcount)); 268 wait_event(qp->wait, !atomic_read(&qp->refcount));
253} 269}
@@ -270,11 +286,10 @@ void ipath_free_all_qps(struct ipath_qp_table *qpt)
270 286
271 while (qp) { 287 while (qp) {
272 nqp = qp->next; 288 nqp = qp->next;
273 if (qp->ibqp.qp_num > 1) 289 free_qpn(qpt, qp->ibqp.qp_num);
274 free_qpn(qpt, qp->ibqp.qp_num);
275 if (!atomic_dec_and_test(&qp->refcount) || 290 if (!atomic_dec_and_test(&qp->refcount) ||
276 !ipath_destroy_qp(&qp->ibqp)) 291 !ipath_destroy_qp(&qp->ibqp))
277 ipath_dbg(KERN_INFO "QP memory leak!\n"); 292 ipath_dbg("QP memory leak!\n");
278 qp = nqp; 293 qp = nqp;
279 } 294 }
280 } 295 }
@@ -320,7 +335,8 @@ static void ipath_reset_qp(struct ipath_qp *qp)
320 qp->remote_qpn = 0; 335 qp->remote_qpn = 0;
321 qp->qkey = 0; 336 qp->qkey = 0;
322 qp->qp_access_flags = 0; 337 qp->qp_access_flags = 0;
323 clear_bit(IPATH_S_BUSY, &qp->s_flags); 338 qp->s_busy = 0;
339 qp->s_flags &= ~IPATH_S_SIGNAL_REQ_WR;
324 qp->s_hdrwords = 0; 340 qp->s_hdrwords = 0;
325 qp->s_psn = 0; 341 qp->s_psn = 0;
326 qp->r_psn = 0; 342 qp->r_psn = 0;
@@ -333,7 +349,6 @@ static void ipath_reset_qp(struct ipath_qp *qp)
333 qp->r_state = IB_OPCODE_UC_SEND_LAST; 349 qp->r_state = IB_OPCODE_UC_SEND_LAST;
334 } 350 }
335 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; 351 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
336 qp->r_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
337 qp->r_nak_state = 0; 352 qp->r_nak_state = 0;
338 qp->r_wrid_valid = 0; 353 qp->r_wrid_valid = 0;
339 qp->s_rnr_timeout = 0; 354 qp->s_rnr_timeout = 0;
@@ -344,6 +359,10 @@ static void ipath_reset_qp(struct ipath_qp *qp)
344 qp->s_ssn = 1; 359 qp->s_ssn = 1;
345 qp->s_lsn = 0; 360 qp->s_lsn = 0;
346 qp->s_wait_credit = 0; 361 qp->s_wait_credit = 0;
362 memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
363 qp->r_head_ack_queue = 0;
364 qp->s_tail_ack_queue = 0;
365 qp->s_num_rd_atomic = 0;
347 if (qp->r_rq.wq) { 366 if (qp->r_rq.wq) {
348 qp->r_rq.wq->head = 0; 367 qp->r_rq.wq->head = 0;
349 qp->r_rq.wq->tail = 0; 368 qp->r_rq.wq->tail = 0;
@@ -357,7 +376,7 @@ static void ipath_reset_qp(struct ipath_qp *qp)
357 * @err: the receive completion error to signal if a RWQE is active 376 * @err: the receive completion error to signal if a RWQE is active
358 * 377 *
359 * Flushes both send and receive work queues. 378 * Flushes both send and receive work queues.
360 * QP s_lock should be held and interrupts disabled. 379 * The QP s_lock should be held and interrupts disabled.
361 */ 380 */
362 381
363void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) 382void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
@@ -365,7 +384,7 @@ void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
365 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 384 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
366 struct ib_wc wc; 385 struct ib_wc wc;
367 386
368 ipath_dbg(KERN_INFO "QP%d/%d in error state\n", 387 ipath_dbg("QP%d/%d in error state\n",
369 qp->ibqp.qp_num, qp->remote_qpn); 388 qp->ibqp.qp_num, qp->remote_qpn);
370 389
371 spin_lock(&dev->pending_lock); 390 spin_lock(&dev->pending_lock);
@@ -389,6 +408,8 @@ void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
389 wc.port_num = 0; 408 wc.port_num = 0;
390 if (qp->r_wrid_valid) { 409 if (qp->r_wrid_valid) {
391 qp->r_wrid_valid = 0; 410 qp->r_wrid_valid = 0;
411 wc.wr_id = qp->r_wr_id;
412 wc.opcode = IB_WC_RECV;
392 wc.status = err; 413 wc.status = err;
393 ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1); 414 ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
394 } 415 }
@@ -503,13 +524,17 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
503 attr->path_mig_state != IB_MIG_REARM) 524 attr->path_mig_state != IB_MIG_REARM)
504 goto inval; 525 goto inval;
505 526
527 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
528 if (attr->max_dest_rd_atomic > IPATH_MAX_RDMA_ATOMIC)
529 goto inval;
530
506 switch (new_state) { 531 switch (new_state) {
507 case IB_QPS_RESET: 532 case IB_QPS_RESET:
508 ipath_reset_qp(qp); 533 ipath_reset_qp(qp);
509 break; 534 break;
510 535
511 case IB_QPS_ERR: 536 case IB_QPS_ERR:
512 ipath_error_qp(qp, IB_WC_GENERAL_ERR); 537 ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
513 break; 538 break;
514 539
515 default: 540 default:
@@ -559,6 +584,12 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
559 if (attr_mask & IB_QP_QKEY) 584 if (attr_mask & IB_QP_QKEY)
560 qp->qkey = attr->qkey; 585 qp->qkey = attr->qkey;
561 586
587 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
588 qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
589
590 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
591 qp->s_max_rd_atomic = attr->max_rd_atomic;
592
562 qp->state = new_state; 593 qp->state = new_state;
563 spin_unlock_irqrestore(&qp->s_lock, flags); 594 spin_unlock_irqrestore(&qp->s_lock, flags);
564 595
@@ -598,8 +629,8 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
598 attr->alt_pkey_index = 0; 629 attr->alt_pkey_index = 0;
599 attr->en_sqd_async_notify = 0; 630 attr->en_sqd_async_notify = 0;
600 attr->sq_draining = 0; 631 attr->sq_draining = 0;
601 attr->max_rd_atomic = 1; 632 attr->max_rd_atomic = qp->s_max_rd_atomic;
602 attr->max_dest_rd_atomic = 1; 633 attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
603 attr->min_rnr_timer = qp->r_min_rnr_timer; 634 attr->min_rnr_timer = qp->r_min_rnr_timer;
604 attr->port_num = 1; 635 attr->port_num = 1;
605 attr->timeout = qp->timeout; 636 attr->timeout = qp->timeout;
@@ -614,7 +645,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
614 init_attr->recv_cq = qp->ibqp.recv_cq; 645 init_attr->recv_cq = qp->ibqp.recv_cq;
615 init_attr->srq = qp->ibqp.srq; 646 init_attr->srq = qp->ibqp.srq;
616 init_attr->cap = attr->cap; 647 init_attr->cap = attr->cap;
617 if (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR)) 648 if (qp->s_flags & IPATH_S_SIGNAL_REQ_WR)
618 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; 649 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
619 else 650 else
620 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; 651 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
@@ -786,7 +817,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
786 qp->s_size = init_attr->cap.max_send_wr + 1; 817 qp->s_size = init_attr->cap.max_send_wr + 1;
787 qp->s_max_sge = init_attr->cap.max_send_sge; 818 qp->s_max_sge = init_attr->cap.max_send_sge;
788 if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) 819 if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
789 qp->s_flags = 1 << IPATH_S_SIGNAL_REQ_WR; 820 qp->s_flags = IPATH_S_SIGNAL_REQ_WR;
790 else 821 else
791 qp->s_flags = 0; 822 qp->s_flags = 0;
792 dev = to_idev(ibpd->device); 823 dev = to_idev(ibpd->device);
@@ -958,7 +989,7 @@ bail:
958 * @wc: the WC responsible for putting the QP in this state 989 * @wc: the WC responsible for putting the QP in this state
959 * 990 *
960 * Flushes the send work queue. 991 * Flushes the send work queue.
961 * The QP s_lock should be held. 992 * The QP s_lock should be held and interrupts disabled.
962 */ 993 */
963 994
964void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc) 995void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
@@ -966,7 +997,7 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
966 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 997 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
967 struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); 998 struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
968 999
969 ipath_dbg(KERN_INFO "Send queue error on QP%d/%d: err: %d\n", 1000 ipath_dbg("Send queue error on QP%d/%d: err: %d\n",
970 qp->ibqp.qp_num, qp->remote_qpn, wc->status); 1001 qp->ibqp.qp_num, qp->remote_qpn, wc->status);
971 1002
972 spin_lock(&dev->pending_lock); 1003 spin_lock(&dev->pending_lock);
@@ -984,12 +1015,12 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
984 wc->status = IB_WC_WR_FLUSH_ERR; 1015 wc->status = IB_WC_WR_FLUSH_ERR;
985 1016
986 while (qp->s_last != qp->s_head) { 1017 while (qp->s_last != qp->s_head) {
1018 wqe = get_swqe_ptr(qp, qp->s_last);
987 wc->wr_id = wqe->wr.wr_id; 1019 wc->wr_id = wqe->wr.wr_id;
988 wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; 1020 wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
989 ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1); 1021 ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1);
990 if (++qp->s_last >= qp->s_size) 1022 if (++qp->s_last >= qp->s_size)
991 qp->s_last = 0; 1023 qp->s_last = 0;
992 wqe = get_swqe_ptr(qp, qp->s_last);
993 } 1024 }
994 qp->s_cur = qp->s_tail = qp->s_head; 1025 qp->s_cur = qp->s_tail = qp->s_head;
995 qp->state = IB_QPS_SQE; 1026 qp->state = IB_QPS_SQE;
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index 5ff20cb04494..b4b88d0b53f5 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -37,6 +37,19 @@
37/* cut down ridiculously long IB macro names */ 37/* cut down ridiculously long IB macro names */
38#define OP(x) IB_OPCODE_RC_##x 38#define OP(x) IB_OPCODE_RC_##x
39 39
40static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe,
41 u32 psn, u32 pmtu)
42{
43 u32 len;
44
45 len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
46 ss->sge = wqe->sg_list[0];
47 ss->sg_list = wqe->sg_list + 1;
48 ss->num_sge = wqe->wr.num_sge;
49 ipath_skip_sge(ss, len);
50 return wqe->length - len;
51}
52
40/** 53/**
41 * ipath_init_restart- initialize the qp->s_sge after a restart 54 * ipath_init_restart- initialize the qp->s_sge after a restart
42 * @qp: the QP who's SGE we're restarting 55 * @qp: the QP who's SGE we're restarting
@@ -47,15 +60,9 @@
47static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe) 60static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
48{ 61{
49 struct ipath_ibdev *dev; 62 struct ipath_ibdev *dev;
50 u32 len;
51 63
52 len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * 64 qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn,
53 ib_mtu_enum_to_int(qp->path_mtu); 65 ib_mtu_enum_to_int(qp->path_mtu));
54 qp->s_sge.sge = wqe->sg_list[0];
55 qp->s_sge.sg_list = wqe->sg_list + 1;
56 qp->s_sge.num_sge = wqe->wr.num_sge;
57 ipath_skip_sge(&qp->s_sge, len);
58 qp->s_len = wqe->length - len;
59 dev = to_idev(qp->ibqp.device); 66 dev = to_idev(qp->ibqp.device);
60 spin_lock(&dev->pending_lock); 67 spin_lock(&dev->pending_lock);
61 if (list_empty(&qp->timerwait)) 68 if (list_empty(&qp->timerwait))
@@ -70,107 +77,123 @@ static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
70 * @ohdr: a pointer to the IB header being constructed 77 * @ohdr: a pointer to the IB header being constructed
71 * @pmtu: the path MTU 78 * @pmtu: the path MTU
72 * 79 *
73 * Return bth0 if constructed; otherwise, return 0. 80 * Return 1 if constructed; otherwise, return 0.
81 * Note that we are in the responder's side of the QP context.
74 * Note the QP s_lock must be held. 82 * Note the QP s_lock must be held.
75 */ 83 */
76u32 ipath_make_rc_ack(struct ipath_qp *qp, 84static int ipath_make_rc_ack(struct ipath_qp *qp,
77 struct ipath_other_headers *ohdr, 85 struct ipath_other_headers *ohdr,
78 u32 pmtu) 86 u32 pmtu, u32 *bth0p, u32 *bth2p)
79{ 87{
88 struct ipath_ack_entry *e;
80 u32 hwords; 89 u32 hwords;
81 u32 len; 90 u32 len;
82 u32 bth0; 91 u32 bth0;
92 u32 bth2;
83 93
84 /* header size in 32-bit words LRH+BTH = (8+12)/4. */ 94 /* header size in 32-bit words LRH+BTH = (8+12)/4. */
85 hwords = 5; 95 hwords = 5;
86 96
87 /*
88 * Send a response. Note that we are in the responder's
89 * side of the QP context.
90 */
91 switch (qp->s_ack_state) { 97 switch (qp->s_ack_state) {
92 case OP(RDMA_READ_REQUEST): 98 case OP(RDMA_READ_RESPONSE_LAST):
93 qp->s_cur_sge = &qp->s_rdma_sge; 99 case OP(RDMA_READ_RESPONSE_ONLY):
94 len = qp->s_rdma_len; 100 case OP(ATOMIC_ACKNOWLEDGE):
95 if (len > pmtu) { 101 qp->s_ack_state = OP(ACKNOWLEDGE);
96 len = pmtu; 102 /* FALLTHROUGH */
97 qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); 103 case OP(ACKNOWLEDGE):
98 } else 104 /* Check for no next entry in the queue. */
99 qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY); 105 if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
100 qp->s_rdma_len -= len; 106 if (qp->s_flags & IPATH_S_ACK_PENDING)
107 goto normal;
108 goto bail;
109 }
110
111 e = &qp->s_ack_queue[qp->s_tail_ack_queue];
112 if (e->opcode == OP(RDMA_READ_REQUEST)) {
113 /* Copy SGE state in case we need to resend */
114 qp->s_ack_rdma_sge = e->rdma_sge;
115 qp->s_cur_sge = &qp->s_ack_rdma_sge;
116 len = e->rdma_sge.sge.sge_length;
117 if (len > pmtu) {
118 len = pmtu;
119 qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
120 } else {
121 qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
122 if (++qp->s_tail_ack_queue >
123 IPATH_MAX_RDMA_ATOMIC)
124 qp->s_tail_ack_queue = 0;
125 }
126 ohdr->u.aeth = ipath_compute_aeth(qp);
127 hwords++;
128 qp->s_ack_rdma_psn = e->psn;
129 bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
130 } else {
131 /* COMPARE_SWAP or FETCH_ADD */
132 qp->s_cur_sge = NULL;
133 len = 0;
134 qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
135 ohdr->u.at.aeth = ipath_compute_aeth(qp);
136 ohdr->u.at.atomic_ack_eth[0] =
137 cpu_to_be32(e->atomic_data >> 32);
138 ohdr->u.at.atomic_ack_eth[1] =
139 cpu_to_be32(e->atomic_data);
140 hwords += sizeof(ohdr->u.at) / sizeof(u32);
141 bth2 = e->psn;
142 if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
143 qp->s_tail_ack_queue = 0;
144 }
101 bth0 = qp->s_ack_state << 24; 145 bth0 = qp->s_ack_state << 24;
102 ohdr->u.aeth = ipath_compute_aeth(qp);
103 hwords++;
104 break; 146 break;
105 147
106 case OP(RDMA_READ_RESPONSE_FIRST): 148 case OP(RDMA_READ_RESPONSE_FIRST):
107 qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); 149 qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
108 /* FALLTHROUGH */ 150 /* FALLTHROUGH */
109 case OP(RDMA_READ_RESPONSE_MIDDLE): 151 case OP(RDMA_READ_RESPONSE_MIDDLE):
110 qp->s_cur_sge = &qp->s_rdma_sge; 152 len = qp->s_ack_rdma_sge.sge.sge_length;
111 len = qp->s_rdma_len;
112 if (len > pmtu) 153 if (len > pmtu)
113 len = pmtu; 154 len = pmtu;
114 else { 155 else {
115 ohdr->u.aeth = ipath_compute_aeth(qp); 156 ohdr->u.aeth = ipath_compute_aeth(qp);
116 hwords++; 157 hwords++;
117 qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); 158 qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
159 if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
160 qp->s_tail_ack_queue = 0;
118 } 161 }
119 qp->s_rdma_len -= len;
120 bth0 = qp->s_ack_state << 24; 162 bth0 = qp->s_ack_state << 24;
121 break; 163 bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
122
123 case OP(RDMA_READ_RESPONSE_LAST):
124 case OP(RDMA_READ_RESPONSE_ONLY):
125 /*
126 * We have to prevent new requests from changing
127 * the r_sge state while a ipath_verbs_send()
128 * is in progress.
129 */
130 qp->s_ack_state = OP(ACKNOWLEDGE);
131 bth0 = 0;
132 goto bail;
133
134 case OP(COMPARE_SWAP):
135 case OP(FETCH_ADD):
136 qp->s_cur_sge = NULL;
137 len = 0;
138 /*
139 * Set the s_ack_state so the receive interrupt handler
140 * won't try to send an ACK (out of order) until this one
141 * is actually sent.
142 */
143 qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
144 bth0 = OP(ATOMIC_ACKNOWLEDGE) << 24;
145 ohdr->u.at.aeth = ipath_compute_aeth(qp);
146 ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data);
147 hwords += sizeof(ohdr->u.at) / 4;
148 break; 164 break;
149 165
150 default: 166 default:
151 /* Send a regular ACK. */ 167 normal:
152 qp->s_cur_sge = NULL;
153 len = 0;
154 /* 168 /*
155 * Set the s_ack_state so the receive interrupt handler 169 * Send a regular ACK.
156 * won't try to send an ACK (out of order) until this one 170 * Set the s_ack_state so we wait until after sending
157 * is actually sent. 171 * the ACK before setting s_ack_state to ACKNOWLEDGE
172 * (see above).
158 */ 173 */
159 qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); 174 qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
160 bth0 = OP(ACKNOWLEDGE) << 24; 175 qp->s_flags &= ~IPATH_S_ACK_PENDING;
176 qp->s_cur_sge = NULL;
161 if (qp->s_nak_state) 177 if (qp->s_nak_state)
162 ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | 178 ohdr->u.aeth =
163 (qp->s_nak_state << 179 cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
164 IPATH_AETH_CREDIT_SHIFT)); 180 (qp->s_nak_state <<
181 IPATH_AETH_CREDIT_SHIFT));
165 else 182 else
166 ohdr->u.aeth = ipath_compute_aeth(qp); 183 ohdr->u.aeth = ipath_compute_aeth(qp);
167 hwords++; 184 hwords++;
185 len = 0;
186 bth0 = OP(ACKNOWLEDGE) << 24;
187 bth2 = qp->s_ack_psn & IPATH_PSN_MASK;
168 } 188 }
169 qp->s_hdrwords = hwords; 189 qp->s_hdrwords = hwords;
170 qp->s_cur_size = len; 190 qp->s_cur_size = len;
191 *bth0p = bth0;
192 *bth2p = bth2;
193 return 1;
171 194
172bail: 195bail:
173 return bth0; 196 return 0;
174} 197}
175 198
176/** 199/**
@@ -197,9 +220,16 @@ int ipath_make_rc_req(struct ipath_qp *qp,
197 u32 bth2; 220 u32 bth2;
198 char newreq; 221 char newreq;
199 222
223 /* Sending responses has higher priority over sending requests. */
224 if ((qp->r_head_ack_queue != qp->s_tail_ack_queue ||
225 (qp->s_flags & IPATH_S_ACK_PENDING) ||
226 qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE) &&
227 ipath_make_rc_ack(qp, ohdr, pmtu, bth0p, bth2p))
228 goto done;
229
200 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) || 230 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) ||
201 qp->s_rnr_timeout) 231 qp->s_rnr_timeout)
202 goto done; 232 goto bail;
203 233
204 /* Limit the number of packets sent without an ACK. */ 234 /* Limit the number of packets sent without an ACK. */
205 if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) { 235 if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) {
@@ -210,7 +240,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
210 list_add_tail(&qp->timerwait, 240 list_add_tail(&qp->timerwait,
211 &dev->pending[dev->pending_index]); 241 &dev->pending[dev->pending_index]);
212 spin_unlock(&dev->pending_lock); 242 spin_unlock(&dev->pending_lock);
213 goto done; 243 goto bail;
214 } 244 }
215 245
216 /* header size in 32-bit words LRH+BTH = (8+12)/4. */ 246 /* header size in 32-bit words LRH+BTH = (8+12)/4. */
@@ -232,7 +262,16 @@ int ipath_make_rc_req(struct ipath_qp *qp,
232 if (qp->s_cur == qp->s_tail) { 262 if (qp->s_cur == qp->s_tail) {
233 /* Check if send work queue is empty. */ 263 /* Check if send work queue is empty. */
234 if (qp->s_tail == qp->s_head) 264 if (qp->s_tail == qp->s_head)
235 goto done; 265 goto bail;
266 /*
267 * If a fence is requested, wait for previous
268 * RDMA read and atomic operations to finish.
269 */
270 if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
271 qp->s_num_rd_atomic) {
272 qp->s_flags |= IPATH_S_FENCE_PENDING;
273 goto bail;
274 }
236 wqe->psn = qp->s_next_psn; 275 wqe->psn = qp->s_next_psn;
237 newreq = 1; 276 newreq = 1;
238 } 277 }
@@ -250,7 +289,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
250 /* If no credit, return. */ 289 /* If no credit, return. */
251 if (qp->s_lsn != (u32) -1 && 290 if (qp->s_lsn != (u32) -1 &&
252 ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) 291 ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
253 goto done; 292 goto bail;
254 wqe->lpsn = wqe->psn; 293 wqe->lpsn = wqe->psn;
255 if (len > pmtu) { 294 if (len > pmtu) {
256 wqe->lpsn += (len - 1) / pmtu; 295 wqe->lpsn += (len - 1) / pmtu;
@@ -281,13 +320,13 @@ int ipath_make_rc_req(struct ipath_qp *qp,
281 /* If no credit, return. */ 320 /* If no credit, return. */
282 if (qp->s_lsn != (u32) -1 && 321 if (qp->s_lsn != (u32) -1 &&
283 ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) 322 ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
284 goto done; 323 goto bail;
285 ohdr->u.rc.reth.vaddr = 324 ohdr->u.rc.reth.vaddr =
286 cpu_to_be64(wqe->wr.wr.rdma.remote_addr); 325 cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
287 ohdr->u.rc.reth.rkey = 326 ohdr->u.rc.reth.rkey =
288 cpu_to_be32(wqe->wr.wr.rdma.rkey); 327 cpu_to_be32(wqe->wr.wr.rdma.rkey);
289 ohdr->u.rc.reth.length = cpu_to_be32(len); 328 ohdr->u.rc.reth.length = cpu_to_be32(len);
290 hwords += sizeof(struct ib_reth) / 4; 329 hwords += sizeof(struct ib_reth) / sizeof(u32);
291 wqe->lpsn = wqe->psn; 330 wqe->lpsn = wqe->psn;
292 if (len > pmtu) { 331 if (len > pmtu) {
293 wqe->lpsn += (len - 1) / pmtu; 332 wqe->lpsn += (len - 1) / pmtu;
@@ -312,14 +351,17 @@ int ipath_make_rc_req(struct ipath_qp *qp,
312 break; 351 break;
313 352
314 case IB_WR_RDMA_READ: 353 case IB_WR_RDMA_READ:
315 ohdr->u.rc.reth.vaddr = 354 /*
316 cpu_to_be64(wqe->wr.wr.rdma.remote_addr); 355 * Don't allow more operations to be started
317 ohdr->u.rc.reth.rkey = 356 * than the QP limits allow.
318 cpu_to_be32(wqe->wr.wr.rdma.rkey); 357 */
319 ohdr->u.rc.reth.length = cpu_to_be32(len);
320 qp->s_state = OP(RDMA_READ_REQUEST);
321 hwords += sizeof(ohdr->u.rc.reth) / 4;
322 if (newreq) { 358 if (newreq) {
359 if (qp->s_num_rd_atomic >=
360 qp->s_max_rd_atomic) {
361 qp->s_flags |= IPATH_S_RDMAR_PENDING;
362 goto bail;
363 }
364 qp->s_num_rd_atomic++;
323 if (qp->s_lsn != (u32) -1) 365 if (qp->s_lsn != (u32) -1)
324 qp->s_lsn++; 366 qp->s_lsn++;
325 /* 367 /*
@@ -330,6 +372,13 @@ int ipath_make_rc_req(struct ipath_qp *qp,
330 qp->s_next_psn += (len - 1) / pmtu; 372 qp->s_next_psn += (len - 1) / pmtu;
331 wqe->lpsn = qp->s_next_psn++; 373 wqe->lpsn = qp->s_next_psn++;
332 } 374 }
375 ohdr->u.rc.reth.vaddr =
376 cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
377 ohdr->u.rc.reth.rkey =
378 cpu_to_be32(wqe->wr.wr.rdma.rkey);
379 ohdr->u.rc.reth.length = cpu_to_be32(len);
380 qp->s_state = OP(RDMA_READ_REQUEST);
381 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
333 ss = NULL; 382 ss = NULL;
334 len = 0; 383 len = 0;
335 if (++qp->s_cur == qp->s_size) 384 if (++qp->s_cur == qp->s_size)
@@ -338,32 +387,48 @@ int ipath_make_rc_req(struct ipath_qp *qp,
338 387
339 case IB_WR_ATOMIC_CMP_AND_SWP: 388 case IB_WR_ATOMIC_CMP_AND_SWP:
340 case IB_WR_ATOMIC_FETCH_AND_ADD: 389 case IB_WR_ATOMIC_FETCH_AND_ADD:
341 if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) 390 /*
342 qp->s_state = OP(COMPARE_SWAP); 391 * Don't allow more operations to be started
343 else 392 * than the QP limits allow.
344 qp->s_state = OP(FETCH_ADD); 393 */
345 ohdr->u.atomic_eth.vaddr = cpu_to_be64(
346 wqe->wr.wr.atomic.remote_addr);
347 ohdr->u.atomic_eth.rkey = cpu_to_be32(
348 wqe->wr.wr.atomic.rkey);
349 ohdr->u.atomic_eth.swap_data = cpu_to_be64(
350 wqe->wr.wr.atomic.swap);
351 ohdr->u.atomic_eth.compare_data = cpu_to_be64(
352 wqe->wr.wr.atomic.compare_add);
353 hwords += sizeof(struct ib_atomic_eth) / 4;
354 if (newreq) { 394 if (newreq) {
395 if (qp->s_num_rd_atomic >=
396 qp->s_max_rd_atomic) {
397 qp->s_flags |= IPATH_S_RDMAR_PENDING;
398 goto bail;
399 }
400 qp->s_num_rd_atomic++;
355 if (qp->s_lsn != (u32) -1) 401 if (qp->s_lsn != (u32) -1)
356 qp->s_lsn++; 402 qp->s_lsn++;
357 wqe->lpsn = wqe->psn; 403 wqe->lpsn = wqe->psn;
358 } 404 }
359 if (++qp->s_cur == qp->s_size) 405 if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
360 qp->s_cur = 0; 406 qp->s_state = OP(COMPARE_SWAP);
407 ohdr->u.atomic_eth.swap_data = cpu_to_be64(
408 wqe->wr.wr.atomic.swap);
409 ohdr->u.atomic_eth.compare_data = cpu_to_be64(
410 wqe->wr.wr.atomic.compare_add);
411 } else {
412 qp->s_state = OP(FETCH_ADD);
413 ohdr->u.atomic_eth.swap_data = cpu_to_be64(
414 wqe->wr.wr.atomic.compare_add);
415 ohdr->u.atomic_eth.compare_data = 0;
416 }
417 ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
418 wqe->wr.wr.atomic.remote_addr >> 32);
419 ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
420 wqe->wr.wr.atomic.remote_addr);
421 ohdr->u.atomic_eth.rkey = cpu_to_be32(
422 wqe->wr.wr.atomic.rkey);
423 hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
361 ss = NULL; 424 ss = NULL;
362 len = 0; 425 len = 0;
426 if (++qp->s_cur == qp->s_size)
427 qp->s_cur = 0;
363 break; 428 break;
364 429
365 default: 430 default:
366 goto done; 431 goto bail;
367 } 432 }
368 qp->s_sge.sge = wqe->sg_list[0]; 433 qp->s_sge.sge = wqe->sg_list[0];
369 qp->s_sge.sg_list = wqe->sg_list + 1; 434 qp->s_sge.sg_list = wqe->sg_list + 1;
@@ -379,7 +444,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
379 qp->s_psn = wqe->lpsn + 1; 444 qp->s_psn = wqe->lpsn + 1;
380 else { 445 else {
381 qp->s_psn++; 446 qp->s_psn++;
382 if ((int)(qp->s_psn - qp->s_next_psn) > 0) 447 if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
383 qp->s_next_psn = qp->s_psn; 448 qp->s_next_psn = qp->s_psn;
384 } 449 }
385 /* 450 /*
@@ -406,7 +471,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
406 /* FALLTHROUGH */ 471 /* FALLTHROUGH */
407 case OP(SEND_MIDDLE): 472 case OP(SEND_MIDDLE):
408 bth2 = qp->s_psn++ & IPATH_PSN_MASK; 473 bth2 = qp->s_psn++ & IPATH_PSN_MASK;
409 if ((int)(qp->s_psn - qp->s_next_psn) > 0) 474 if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
410 qp->s_next_psn = qp->s_psn; 475 qp->s_next_psn = qp->s_psn;
411 ss = &qp->s_sge; 476 ss = &qp->s_sge;
412 len = qp->s_len; 477 len = qp->s_len;
@@ -442,7 +507,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
442 /* FALLTHROUGH */ 507 /* FALLTHROUGH */
443 case OP(RDMA_WRITE_MIDDLE): 508 case OP(RDMA_WRITE_MIDDLE):
444 bth2 = qp->s_psn++ & IPATH_PSN_MASK; 509 bth2 = qp->s_psn++ & IPATH_PSN_MASK;
445 if ((int)(qp->s_psn - qp->s_next_psn) > 0) 510 if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
446 qp->s_next_psn = qp->s_psn; 511 qp->s_next_psn = qp->s_psn;
447 ss = &qp->s_sge; 512 ss = &qp->s_sge;
448 len = qp->s_len; 513 len = qp->s_len;
@@ -479,9 +544,9 @@ int ipath_make_rc_req(struct ipath_qp *qp,
479 cpu_to_be32(wqe->wr.wr.rdma.rkey); 544 cpu_to_be32(wqe->wr.wr.rdma.rkey);
480 ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len); 545 ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);
481 qp->s_state = OP(RDMA_READ_REQUEST); 546 qp->s_state = OP(RDMA_READ_REQUEST);
482 hwords += sizeof(ohdr->u.rc.reth) / 4; 547 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
483 bth2 = qp->s_psn++ & IPATH_PSN_MASK; 548 bth2 = qp->s_psn++ & IPATH_PSN_MASK;
484 if ((int)(qp->s_psn - qp->s_next_psn) > 0) 549 if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
485 qp->s_next_psn = qp->s_psn; 550 qp->s_next_psn = qp->s_psn;
486 ss = NULL; 551 ss = NULL;
487 len = 0; 552 len = 0;
@@ -489,20 +554,6 @@ int ipath_make_rc_req(struct ipath_qp *qp,
489 if (qp->s_cur == qp->s_size) 554 if (qp->s_cur == qp->s_size)
490 qp->s_cur = 0; 555 qp->s_cur = 0;
491 break; 556 break;
492
493 case OP(RDMA_READ_REQUEST):
494 case OP(COMPARE_SWAP):
495 case OP(FETCH_ADD):
496 /*
497 * We shouldn't start anything new until this request is
498 * finished. The ACK will handle rescheduling us. XXX The
499 * number of outstanding ones is negotiated at connection
500 * setup time (see pg. 258,289)? XXX Also, if we support
501 * multiple outstanding requests, we need to check the WQE
502 * IB_SEND_FENCE flag and not send a new request if a RDMA
503 * read or atomic is pending.
504 */
505 goto done;
506 } 557 }
507 if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0) 558 if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0)
508 bth2 |= 1 << 31; /* Request ACK. */ 559 bth2 |= 1 << 31; /* Request ACK. */
@@ -512,9 +563,10 @@ int ipath_make_rc_req(struct ipath_qp *qp,
512 qp->s_cur_size = len; 563 qp->s_cur_size = len;
513 *bth0p = bth0 | (qp->s_state << 24); 564 *bth0p = bth0 | (qp->s_state << 24);
514 *bth2p = bth2; 565 *bth2p = bth2;
566done:
515 return 1; 567 return 1;
516 568
517done: 569bail:
518 return 0; 570 return 0;
519} 571}
520 572
@@ -524,7 +576,8 @@ done:
524 * 576 *
525 * This is called from ipath_rc_rcv() and only uses the receive 577 * This is called from ipath_rc_rcv() and only uses the receive
526 * side QP state. 578 * side QP state.
527 * Note that RDMA reads are handled in the send side QP state and tasklet. 579 * Note that RDMA reads and atomics are handled in the
580 * send side QP state and tasklet.
528 */ 581 */
529static void send_rc_ack(struct ipath_qp *qp) 582static void send_rc_ack(struct ipath_qp *qp)
530{ 583{
@@ -535,6 +588,10 @@ static void send_rc_ack(struct ipath_qp *qp)
535 struct ipath_ib_header hdr; 588 struct ipath_ib_header hdr;
536 struct ipath_other_headers *ohdr; 589 struct ipath_other_headers *ohdr;
537 590
591 /* Don't send ACK or NAK if a RDMA read or atomic is pending. */
592 if (qp->r_head_ack_queue != qp->s_tail_ack_queue)
593 goto queue_ack;
594
538 /* Construct the header. */ 595 /* Construct the header. */
539 ohdr = &hdr.u.oth; 596 ohdr = &hdr.u.oth;
540 lrh0 = IPATH_LRH_BTH; 597 lrh0 = IPATH_LRH_BTH;
@@ -548,19 +605,14 @@ static void send_rc_ack(struct ipath_qp *qp)
548 lrh0 = IPATH_LRH_GRH; 605 lrh0 = IPATH_LRH_GRH;
549 } 606 }
550 /* read pkey_index w/o lock (its atomic) */ 607 /* read pkey_index w/o lock (its atomic) */
551 bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index); 608 bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index) |
609 OP(ACKNOWLEDGE) << 24;
552 if (qp->r_nak_state) 610 if (qp->r_nak_state)
553 ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | 611 ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
554 (qp->r_nak_state << 612 (qp->r_nak_state <<
555 IPATH_AETH_CREDIT_SHIFT)); 613 IPATH_AETH_CREDIT_SHIFT));
556 else 614 else
557 ohdr->u.aeth = ipath_compute_aeth(qp); 615 ohdr->u.aeth = ipath_compute_aeth(qp);
558 if (qp->r_ack_state >= OP(COMPARE_SWAP)) {
559 bth0 |= OP(ATOMIC_ACKNOWLEDGE) << 24;
560 ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data);
561 hwords += sizeof(ohdr->u.at.atomic_ack_eth) / 4;
562 } else
563 bth0 |= OP(ACKNOWLEDGE) << 24;
564 lrh0 |= qp->remote_ah_attr.sl << 4; 616 lrh0 |= qp->remote_ah_attr.sl << 4;
565 hdr.lrh[0] = cpu_to_be16(lrh0); 617 hdr.lrh[0] = cpu_to_be16(lrh0);
566 hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); 618 hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
@@ -574,31 +626,31 @@ static void send_rc_ack(struct ipath_qp *qp)
574 * If we can send the ACK, clear the ACK state. 626 * If we can send the ACK, clear the ACK state.
575 */ 627 */
576 if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) { 628 if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) {
577 qp->r_ack_state = OP(ACKNOWLEDGE);
578 dev->n_unicast_xmit++; 629 dev->n_unicast_xmit++;
579 } else { 630 goto done;
580 /*
581 * We are out of PIO buffers at the moment.
582 * Pass responsibility for sending the ACK to the
583 * send tasklet so that when a PIO buffer becomes
584 * available, the ACK is sent ahead of other outgoing
585 * packets.
586 */
587 dev->n_rc_qacks++;
588 spin_lock_irq(&qp->s_lock);
589 /* Don't coalesce if a RDMA read or atomic is pending. */
590 if (qp->s_ack_state == OP(ACKNOWLEDGE) ||
591 qp->s_ack_state < OP(RDMA_READ_REQUEST)) {
592 qp->s_ack_state = qp->r_ack_state;
593 qp->s_nak_state = qp->r_nak_state;
594 qp->s_ack_psn = qp->r_ack_psn;
595 qp->r_ack_state = OP(ACKNOWLEDGE);
596 }
597 spin_unlock_irq(&qp->s_lock);
598
599 /* Call ipath_do_rc_send() in another thread. */
600 tasklet_hi_schedule(&qp->s_task);
601 } 631 }
632
633 /*
634 * We are out of PIO buffers at the moment.
635 * Pass responsibility for sending the ACK to the
636 * send tasklet so that when a PIO buffer becomes
637 * available, the ACK is sent ahead of other outgoing
638 * packets.
639 */
640 dev->n_rc_qacks++;
641
642queue_ack:
643 spin_lock_irq(&qp->s_lock);
644 qp->s_flags |= IPATH_S_ACK_PENDING;
645 qp->s_nak_state = qp->r_nak_state;
646 qp->s_ack_psn = qp->r_ack_psn;
647 spin_unlock_irq(&qp->s_lock);
648
649 /* Call ipath_do_rc_send() in another thread. */
650 tasklet_hi_schedule(&qp->s_task);
651
652done:
653 return;
602} 654}
603 655
604/** 656/**
@@ -727,7 +779,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
727 if (wqe->wr.opcode == IB_WR_RDMA_READ) 779 if (wqe->wr.opcode == IB_WR_RDMA_READ)
728 dev->n_rc_resends++; 780 dev->n_rc_resends++;
729 else 781 else
730 dev->n_rc_resends += (int)qp->s_psn - (int)psn; 782 dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK;
731 783
732 reset_psn(qp, psn); 784 reset_psn(qp, psn);
733 tasklet_hi_schedule(&qp->s_task); 785 tasklet_hi_schedule(&qp->s_task);
@@ -775,10 +827,6 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
775 list_del_init(&qp->timerwait); 827 list_del_init(&qp->timerwait);
776 spin_unlock(&dev->pending_lock); 828 spin_unlock(&dev->pending_lock);
777 829
778 /* Nothing is pending to ACK/NAK. */
779 if (unlikely(qp->s_last == qp->s_tail))
780 goto bail;
781
782 /* 830 /*
783 * Note that NAKs implicitly ACK outstanding SEND and RDMA write 831 * Note that NAKs implicitly ACK outstanding SEND and RDMA write
784 * requests and implicitly NAK RDMA read and atomic requests issued 832 * requests and implicitly NAK RDMA read and atomic requests issued
@@ -806,7 +854,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
806 */ 854 */
807 if ((wqe->wr.opcode == IB_WR_RDMA_READ && 855 if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
808 (opcode != OP(RDMA_READ_RESPONSE_LAST) || 856 (opcode != OP(RDMA_READ_RESPONSE_LAST) ||
809 ipath_cmp24(ack_psn, wqe->lpsn) != 0)) || 857 ipath_cmp24(ack_psn, wqe->lpsn) != 0)) ||
810 ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || 858 ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
811 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && 859 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
812 (opcode != OP(ATOMIC_ACKNOWLEDGE) || 860 (opcode != OP(ATOMIC_ACKNOWLEDGE) ||
@@ -824,20 +872,33 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
824 */ 872 */
825 goto bail; 873 goto bail;
826 } 874 }
827 if (wqe->wr.opcode == IB_WR_RDMA_READ || 875 if (qp->s_num_rd_atomic &&
828 wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || 876 (wqe->wr.opcode == IB_WR_RDMA_READ ||
829 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) 877 wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
830 tasklet_hi_schedule(&qp->s_task); 878 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
879 qp->s_num_rd_atomic--;
880 /* Restart sending task if fence is complete */
881 if ((qp->s_flags & IPATH_S_FENCE_PENDING) &&
882 !qp->s_num_rd_atomic) {
883 qp->s_flags &= ~IPATH_S_FENCE_PENDING;
884 tasklet_hi_schedule(&qp->s_task);
885 } else if (qp->s_flags & IPATH_S_RDMAR_PENDING) {
886 qp->s_flags &= ~IPATH_S_RDMAR_PENDING;
887 tasklet_hi_schedule(&qp->s_task);
888 }
889 }
831 /* Post a send completion queue entry if requested. */ 890 /* Post a send completion queue entry if requested. */
832 if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) || 891 if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
833 (wqe->wr.send_flags & IB_SEND_SIGNALED)) { 892 (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
834 wc.wr_id = wqe->wr.wr_id; 893 wc.wr_id = wqe->wr.wr_id;
835 wc.status = IB_WC_SUCCESS; 894 wc.status = IB_WC_SUCCESS;
836 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; 895 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
837 wc.vendor_err = 0; 896 wc.vendor_err = 0;
838 wc.byte_len = wqe->length; 897 wc.byte_len = wqe->length;
898 wc.imm_data = 0;
839 wc.qp = &qp->ibqp; 899 wc.qp = &qp->ibqp;
840 wc.src_qp = qp->remote_qpn; 900 wc.src_qp = qp->remote_qpn;
901 wc.wc_flags = 0;
841 wc.pkey_index = 0; 902 wc.pkey_index = 0;
842 wc.slid = qp->remote_ah_attr.dlid; 903 wc.slid = qp->remote_ah_attr.dlid;
843 wc.sl = qp->remote_ah_attr.sl; 904 wc.sl = qp->remote_ah_attr.sl;
@@ -854,15 +915,19 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
854 if (qp->s_last == qp->s_cur) { 915 if (qp->s_last == qp->s_cur) {
855 if (++qp->s_cur >= qp->s_size) 916 if (++qp->s_cur >= qp->s_size)
856 qp->s_cur = 0; 917 qp->s_cur = 0;
918 qp->s_last = qp->s_cur;
919 if (qp->s_last == qp->s_tail)
920 break;
857 wqe = get_swqe_ptr(qp, qp->s_cur); 921 wqe = get_swqe_ptr(qp, qp->s_cur);
858 qp->s_state = OP(SEND_LAST); 922 qp->s_state = OP(SEND_LAST);
859 qp->s_psn = wqe->psn; 923 qp->s_psn = wqe->psn;
924 } else {
925 if (++qp->s_last >= qp->s_size)
926 qp->s_last = 0;
927 if (qp->s_last == qp->s_tail)
928 break;
929 wqe = get_swqe_ptr(qp, qp->s_last);
860 } 930 }
861 if (++qp->s_last >= qp->s_size)
862 qp->s_last = 0;
863 wqe = get_swqe_ptr(qp, qp->s_last);
864 if (qp->s_last == qp->s_tail)
865 break;
866 } 931 }
867 932
868 switch (aeth >> 29) { 933 switch (aeth >> 29) {
@@ -874,6 +939,18 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
874 list_add_tail(&qp->timerwait, 939 list_add_tail(&qp->timerwait,
875 &dev->pending[dev->pending_index]); 940 &dev->pending[dev->pending_index]);
876 spin_unlock(&dev->pending_lock); 941 spin_unlock(&dev->pending_lock);
942 /*
943 * If we get a partial ACK for a resent operation,
944 * we can stop resending the earlier packets and
945 * continue with the next packet the receiver wants.
946 */
947 if (ipath_cmp24(qp->s_psn, psn) <= 0) {
948 reset_psn(qp, psn + 1);
949 tasklet_hi_schedule(&qp->s_task);
950 }
951 } else if (ipath_cmp24(qp->s_psn, psn) <= 0) {
952 qp->s_state = OP(SEND_LAST);
953 qp->s_psn = psn + 1;
877 } 954 }
878 ipath_get_credit(qp, aeth); 955 ipath_get_credit(qp, aeth);
879 qp->s_rnr_retry = qp->s_rnr_retry_cnt; 956 qp->s_rnr_retry = qp->s_rnr_retry_cnt;
@@ -884,22 +961,23 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
884 961
885 case 1: /* RNR NAK */ 962 case 1: /* RNR NAK */
886 dev->n_rnr_naks++; 963 dev->n_rnr_naks++;
964 if (qp->s_last == qp->s_tail)
965 goto bail;
887 if (qp->s_rnr_retry == 0) { 966 if (qp->s_rnr_retry == 0) {
888 if (qp->s_last == qp->s_tail)
889 goto bail;
890
891 wc.status = IB_WC_RNR_RETRY_EXC_ERR; 967 wc.status = IB_WC_RNR_RETRY_EXC_ERR;
892 goto class_b; 968 goto class_b;
893 } 969 }
894 if (qp->s_rnr_retry_cnt < 7) 970 if (qp->s_rnr_retry_cnt < 7)
895 qp->s_rnr_retry--; 971 qp->s_rnr_retry--;
896 if (qp->s_last == qp->s_tail)
897 goto bail;
898 972
899 /* The last valid PSN is the previous PSN. */ 973 /* The last valid PSN is the previous PSN. */
900 update_last_psn(qp, psn - 1); 974 update_last_psn(qp, psn - 1);
901 975
902 dev->n_rc_resends += (int)qp->s_psn - (int)psn; 976 if (wqe->wr.opcode == IB_WR_RDMA_READ)
977 dev->n_rc_resends++;
978 else
979 dev->n_rc_resends +=
980 (qp->s_psn - psn) & IPATH_PSN_MASK;
903 981
904 reset_psn(qp, psn); 982 reset_psn(qp, psn);
905 983
@@ -910,26 +988,20 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
910 goto bail; 988 goto bail;
911 989
912 case 3: /* NAK */ 990 case 3: /* NAK */
913 /* The last valid PSN seen is the previous request's. */ 991 if (qp->s_last == qp->s_tail)
914 if (qp->s_last != qp->s_tail) 992 goto bail;
915 update_last_psn(qp, wqe->psn - 1); 993 /* The last valid PSN is the previous PSN. */
994 update_last_psn(qp, psn - 1);
916 switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) & 995 switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) &
917 IPATH_AETH_CREDIT_MASK) { 996 IPATH_AETH_CREDIT_MASK) {
918 case 0: /* PSN sequence error */ 997 case 0: /* PSN sequence error */
919 dev->n_seq_naks++; 998 dev->n_seq_naks++;
920 /* 999 /*
921 * Back up to the responder's expected PSN. XXX 1000 * Back up to the responder's expected PSN.
922 * Note that we might get a NAK in the middle of an 1001 * Note that we might get a NAK in the middle of an
923 * RDMA READ response which terminates the RDMA 1002 * RDMA READ response which terminates the RDMA
924 * READ. 1003 * READ.
925 */ 1004 */
926 if (qp->s_last == qp->s_tail)
927 break;
928
929 if (ipath_cmp24(psn, wqe->psn) < 0)
930 break;
931
932 /* Retry the request. */
933 ipath_restart_rc(qp, psn, &wc); 1005 ipath_restart_rc(qp, psn, &wc);
934 break; 1006 break;
935 1007
@@ -1003,6 +1075,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1003 u32 psn, u32 hdrsize, u32 pmtu, 1075 u32 psn, u32 hdrsize, u32 pmtu,
1004 int header_in_data) 1076 int header_in_data)
1005{ 1077{
1078 struct ipath_swqe *wqe;
1006 unsigned long flags; 1079 unsigned long flags;
1007 struct ib_wc wc; 1080 struct ib_wc wc;
1008 int diff; 1081 int diff;
@@ -1032,6 +1105,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1032 goto ack_done; 1105 goto ack_done;
1033 } 1106 }
1034 1107
1108 if (unlikely(qp->s_last == qp->s_tail))
1109 goto ack_done;
1110 wqe = get_swqe_ptr(qp, qp->s_last);
1111
1035 switch (opcode) { 1112 switch (opcode) {
1036 case OP(ACKNOWLEDGE): 1113 case OP(ACKNOWLEDGE):
1037 case OP(ATOMIC_ACKNOWLEDGE): 1114 case OP(ATOMIC_ACKNOWLEDGE):
@@ -1042,38 +1119,49 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1042 aeth = be32_to_cpu(((__be32 *) data)[0]); 1119 aeth = be32_to_cpu(((__be32 *) data)[0]);
1043 data += sizeof(__be32); 1120 data += sizeof(__be32);
1044 } 1121 }
1045 if (opcode == OP(ATOMIC_ACKNOWLEDGE)) 1122 if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
1046 *(u64 *) qp->s_sge.sge.vaddr = *(u64 *) data; 1123 u64 val;
1124
1125 if (!header_in_data) {
1126 __be32 *p = ohdr->u.at.atomic_ack_eth;
1127
1128 val = ((u64) be32_to_cpu(p[0]) << 32) |
1129 be32_to_cpu(p[1]);
1130 } else
1131 val = be64_to_cpu(((__be64 *) data)[0]);
1132 *(u64 *) wqe->sg_list[0].vaddr = val;
1133 }
1047 if (!do_rc_ack(qp, aeth, psn, opcode) || 1134 if (!do_rc_ack(qp, aeth, psn, opcode) ||
1048 opcode != OP(RDMA_READ_RESPONSE_FIRST)) 1135 opcode != OP(RDMA_READ_RESPONSE_FIRST))
1049 goto ack_done; 1136 goto ack_done;
1050 hdrsize += 4; 1137 hdrsize += 4;
1138 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1139 goto ack_op_err;
1051 /* 1140 /*
1052 * do_rc_ack() has already checked the PSN so skip 1141 * If this is a response to a resent RDMA read, we
1053 * the sequence check. 1142 * have to be careful to copy the data to the right
1143 * location.
1054 */ 1144 */
1055 goto rdma_read; 1145 qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
1146 wqe, psn, pmtu);
1147 goto read_middle;
1056 1148
1057 case OP(RDMA_READ_RESPONSE_MIDDLE): 1149 case OP(RDMA_READ_RESPONSE_MIDDLE):
1058 /* no AETH, no ACK */ 1150 /* no AETH, no ACK */
1059 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { 1151 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
1060 dev->n_rdma_seq++; 1152 dev->n_rdma_seq++;
1061 if (qp->s_last != qp->s_tail) 1153 ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
1062 ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
1063 goto ack_done; 1154 goto ack_done;
1064 } 1155 }
1065 rdma_read: 1156 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1066 if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST))) 1157 goto ack_op_err;
1067 goto ack_done; 1158 read_middle:
1068 if (unlikely(tlen != (hdrsize + pmtu + 4))) 1159 if (unlikely(tlen != (hdrsize + pmtu + 4)))
1069 goto ack_done; 1160 goto ack_len_err;
1070 if (unlikely(pmtu >= qp->s_len)) 1161 if (unlikely(pmtu >= qp->s_rdma_read_len))
1071 goto ack_done; 1162 goto ack_len_err;
1163
1072 /* We got a response so update the timeout. */ 1164 /* We got a response so update the timeout. */
1073 if (unlikely(qp->s_last == qp->s_tail ||
1074 get_swqe_ptr(qp, qp->s_last)->wr.opcode !=
1075 IB_WR_RDMA_READ))
1076 goto ack_done;
1077 spin_lock(&dev->pending_lock); 1165 spin_lock(&dev->pending_lock);
1078 if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait)) 1166 if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait))
1079 list_move_tail(&qp->timerwait, 1167 list_move_tail(&qp->timerwait,
@@ -1082,67 +1170,97 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1082 /* 1170 /*
1083 * Update the RDMA receive state but do the copy w/o 1171 * Update the RDMA receive state but do the copy w/o
1084 * holding the locks and blocking interrupts. 1172 * holding the locks and blocking interrupts.
1085 * XXX Yet another place that affects relaxed RDMA order
1086 * since we don't want s_sge modified.
1087 */ 1173 */
1088 qp->s_len -= pmtu; 1174 qp->s_rdma_read_len -= pmtu;
1089 update_last_psn(qp, psn); 1175 update_last_psn(qp, psn);
1090 spin_unlock_irqrestore(&qp->s_lock, flags); 1176 spin_unlock_irqrestore(&qp->s_lock, flags);
1091 ipath_copy_sge(&qp->s_sge, data, pmtu); 1177 ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu);
1092 goto bail; 1178 goto bail;
1093 1179
1094 case OP(RDMA_READ_RESPONSE_LAST): 1180 case OP(RDMA_READ_RESPONSE_ONLY):
1095 /* ACKs READ req. */
1096 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { 1181 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
1097 dev->n_rdma_seq++; 1182 dev->n_rdma_seq++;
1098 if (qp->s_last != qp->s_tail) 1183 ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
1099 ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
1100 goto ack_done; 1184 goto ack_done;
1101 } 1185 }
1102 /* FALLTHROUGH */ 1186 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1103 case OP(RDMA_READ_RESPONSE_ONLY): 1187 goto ack_op_err;
1104 if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST))) 1188 /* Get the number of bytes the message was padded by. */
1105 goto ack_done; 1189 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
1190 /*
1191 * Check that the data size is >= 0 && <= pmtu.
1192 * Remember to account for the AETH header (4) and
1193 * ICRC (4).
1194 */
1195 if (unlikely(tlen < (hdrsize + pad + 8)))
1196 goto ack_len_err;
1106 /* 1197 /*
1107 * Get the number of bytes the message was padded by. 1198 * If this is a response to a resent RDMA read, we
1199 * have to be careful to copy the data to the right
1200 * location.
1108 */ 1201 */
1202 qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
1203 wqe, psn, pmtu);
1204 goto read_last;
1205
1206 case OP(RDMA_READ_RESPONSE_LAST):
1207 /* ACKs READ req. */
1208 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
1209 dev->n_rdma_seq++;
1210 ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
1211 goto ack_done;
1212 }
1213 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1214 goto ack_op_err;
1215 /* Get the number of bytes the message was padded by. */
1109 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; 1216 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
1110 /* 1217 /*
1111 * Check that the data size is >= 1 && <= pmtu. 1218 * Check that the data size is >= 1 && <= pmtu.
1112 * Remember to account for the AETH header (4) and 1219 * Remember to account for the AETH header (4) and
1113 * ICRC (4). 1220 * ICRC (4).
1114 */ 1221 */
1115 if (unlikely(tlen <= (hdrsize + pad + 8))) { 1222 if (unlikely(tlen <= (hdrsize + pad + 8)))
1116 /* XXX Need to generate an error CQ entry. */ 1223 goto ack_len_err;
1117 goto ack_done; 1224 read_last:
1118 }
1119 tlen -= hdrsize + pad + 8; 1225 tlen -= hdrsize + pad + 8;
1120 if (unlikely(tlen != qp->s_len)) { 1226 if (unlikely(tlen != qp->s_rdma_read_len))
1121 /* XXX Need to generate an error CQ entry. */ 1227 goto ack_len_err;
1122 goto ack_done;
1123 }
1124 if (!header_in_data) 1228 if (!header_in_data)
1125 aeth = be32_to_cpu(ohdr->u.aeth); 1229 aeth = be32_to_cpu(ohdr->u.aeth);
1126 else { 1230 else {
1127 aeth = be32_to_cpu(((__be32 *) data)[0]); 1231 aeth = be32_to_cpu(((__be32 *) data)[0]);
1128 data += sizeof(__be32); 1232 data += sizeof(__be32);
1129 } 1233 }
1130 ipath_copy_sge(&qp->s_sge, data, tlen); 1234 ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen);
1131 if (do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST))) { 1235 (void) do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST));
1132 /*
1133 * Change the state so we contimue
1134 * processing new requests and wake up the
1135 * tasklet if there are posted sends.
1136 */
1137 qp->s_state = OP(SEND_LAST);
1138 if (qp->s_tail != qp->s_head)
1139 tasklet_hi_schedule(&qp->s_task);
1140 }
1141 goto ack_done; 1236 goto ack_done;
1142 } 1237 }
1143 1238
1144ack_done: 1239ack_done:
1145 spin_unlock_irqrestore(&qp->s_lock, flags); 1240 spin_unlock_irqrestore(&qp->s_lock, flags);
1241 goto bail;
1242
1243ack_op_err:
1244 wc.status = IB_WC_LOC_QP_OP_ERR;
1245 goto ack_err;
1246
1247ack_len_err:
1248 wc.status = IB_WC_LOC_LEN_ERR;
1249ack_err:
1250 wc.wr_id = wqe->wr.wr_id;
1251 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
1252 wc.vendor_err = 0;
1253 wc.byte_len = 0;
1254 wc.imm_data = 0;
1255 wc.qp = &qp->ibqp;
1256 wc.src_qp = qp->remote_qpn;
1257 wc.wc_flags = 0;
1258 wc.pkey_index = 0;
1259 wc.slid = qp->remote_ah_attr.dlid;
1260 wc.sl = qp->remote_ah_attr.sl;
1261 wc.dlid_path_bits = 0;
1262 wc.port_num = 0;
1263 ipath_sqerror_qp(qp, &wc);
1146bail: 1264bail:
1147 return; 1265 return;
1148} 1266}
@@ -1162,7 +1280,7 @@ bail:
1162 * incoming RC packet for the given QP. 1280 * incoming RC packet for the given QP.
1163 * Called at interrupt level. 1281 * Called at interrupt level.
1164 * Return 1 if no more processing is needed; otherwise return 0 to 1282 * Return 1 if no more processing is needed; otherwise return 0 to
1165 * schedule a response to be sent and the s_lock unlocked. 1283 * schedule a response to be sent.
1166 */ 1284 */
1167static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, 1285static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1168 struct ipath_other_headers *ohdr, 1286 struct ipath_other_headers *ohdr,
@@ -1173,25 +1291,23 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1173 int diff, 1291 int diff,
1174 int header_in_data) 1292 int header_in_data)
1175{ 1293{
1176 struct ib_reth *reth; 1294 struct ipath_ack_entry *e;
1295 u8 i, prev;
1296 int old_req;
1177 1297
1178 if (diff > 0) { 1298 if (diff > 0) {
1179 /* 1299 /*
1180 * Packet sequence error. 1300 * Packet sequence error.
1181 * A NAK will ACK earlier sends and RDMA writes. 1301 * A NAK will ACK earlier sends and RDMA writes.
1182 * Don't queue the NAK if a RDMA read, atomic, or 1302 * Don't queue the NAK if we already sent one.
1183 * NAK is pending though.
1184 */ 1303 */
1185 if (qp->s_ack_state != OP(ACKNOWLEDGE) || 1304 if (!qp->r_nak_state) {
1186 qp->r_nak_state != 0)
1187 goto done;
1188 if (qp->r_ack_state < OP(COMPARE_SWAP)) {
1189 qp->r_ack_state = OP(SEND_ONLY);
1190 qp->r_nak_state = IB_NAK_PSN_ERROR; 1305 qp->r_nak_state = IB_NAK_PSN_ERROR;
1191 /* Use the expected PSN. */ 1306 /* Use the expected PSN. */
1192 qp->r_ack_psn = qp->r_psn; 1307 qp->r_ack_psn = qp->r_psn;
1308 goto send_ack;
1193 } 1309 }
1194 goto send_ack; 1310 goto done;
1195 } 1311 }
1196 1312
1197 /* 1313 /*
@@ -1204,8 +1320,46 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1204 * can coalesce an outstanding duplicate ACK. We have to 1320 * can coalesce an outstanding duplicate ACK. We have to
1205 * send the earliest so that RDMA reads can be restarted at 1321 * send the earliest so that RDMA reads can be restarted at
1206 * the requester's expected PSN. 1322 * the requester's expected PSN.
1323 *
1324 * First, find where this duplicate PSN falls within the
1325 * ACKs previously sent.
1207 */ 1326 */
1208 if (opcode == OP(RDMA_READ_REQUEST)) { 1327 psn &= IPATH_PSN_MASK;
1328 e = NULL;
1329 old_req = 1;
1330 spin_lock_irq(&qp->s_lock);
1331 for (i = qp->r_head_ack_queue; ; i = prev) {
1332 if (i == qp->s_tail_ack_queue)
1333 old_req = 0;
1334 if (i)
1335 prev = i - 1;
1336 else
1337 prev = IPATH_MAX_RDMA_ATOMIC;
1338 if (prev == qp->r_head_ack_queue) {
1339 e = NULL;
1340 break;
1341 }
1342 e = &qp->s_ack_queue[prev];
1343 if (!e->opcode) {
1344 e = NULL;
1345 break;
1346 }
1347 if (ipath_cmp24(psn, e->psn) >= 0)
1348 break;
1349 }
1350 switch (opcode) {
1351 case OP(RDMA_READ_REQUEST): {
1352 struct ib_reth *reth;
1353 u32 offset;
1354 u32 len;
1355
1356 /*
1357 * If we didn't find the RDMA read request in the ack queue,
1358 * or the send tasklet is already backed up to send an
1359 * earlier entry, we can ignore this request.
1360 */
1361 if (!e || e->opcode != OP(RDMA_READ_REQUEST) || old_req)
1362 goto unlock_done;
1209 /* RETH comes after BTH */ 1363 /* RETH comes after BTH */
1210 if (!header_in_data) 1364 if (!header_in_data)
1211 reth = &ohdr->u.rc.reth; 1365 reth = &ohdr->u.rc.reth;
@@ -1214,88 +1368,87 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1214 data += sizeof(*reth); 1368 data += sizeof(*reth);
1215 } 1369 }
1216 /* 1370 /*
1217 * If we receive a duplicate RDMA request, it means the 1371 * Address range must be a subset of the original
1218 * requester saw a sequence error and needs to restart 1372 * request and start on pmtu boundaries.
1219 * from an earlier point. We can abort the current 1373 * We reuse the old ack_queue slot since the requester
1220 * RDMA read send in that case. 1374 * should not back up and request an earlier PSN for the
1375 * same request.
1221 */ 1376 */
1222 spin_lock_irq(&qp->s_lock); 1377 offset = ((psn - e->psn) & IPATH_PSN_MASK) *
1223 if (qp->s_ack_state != OP(ACKNOWLEDGE) && 1378 ib_mtu_enum_to_int(qp->path_mtu);
1224 (qp->s_hdrwords || ipath_cmp24(psn, qp->s_ack_psn) >= 0)) { 1379 len = be32_to_cpu(reth->length);
1225 /* 1380 if (unlikely(offset + len > e->rdma_sge.sge.sge_length))
1226 * We are already sending earlier requested data. 1381 goto unlock_done;
1227 * Don't abort it to send later out of sequence data. 1382 if (len != 0) {
1228 */
1229 spin_unlock_irq(&qp->s_lock);
1230 goto done;
1231 }
1232 qp->s_rdma_len = be32_to_cpu(reth->length);
1233 if (qp->s_rdma_len != 0) {
1234 u32 rkey = be32_to_cpu(reth->rkey); 1383 u32 rkey = be32_to_cpu(reth->rkey);
1235 u64 vaddr = be64_to_cpu(reth->vaddr); 1384 u64 vaddr = be64_to_cpu(reth->vaddr);
1236 int ok; 1385 int ok;
1237 1386
1238 /* 1387 ok = ipath_rkey_ok(qp, &e->rdma_sge,
1239 * Address range must be a subset of the original 1388 len, vaddr, rkey,
1240 * request and start on pmtu boundaries.
1241 */
1242 ok = ipath_rkey_ok(qp, &qp->s_rdma_sge,
1243 qp->s_rdma_len, vaddr, rkey,
1244 IB_ACCESS_REMOTE_READ); 1389 IB_ACCESS_REMOTE_READ);
1245 if (unlikely(!ok)) { 1390 if (unlikely(!ok))
1246 spin_unlock_irq(&qp->s_lock); 1391 goto unlock_done;
1247 goto done;
1248 }
1249 } else { 1392 } else {
1250 qp->s_rdma_sge.sg_list = NULL; 1393 e->rdma_sge.sg_list = NULL;
1251 qp->s_rdma_sge.num_sge = 0; 1394 e->rdma_sge.num_sge = 0;
1252 qp->s_rdma_sge.sge.mr = NULL; 1395 e->rdma_sge.sge.mr = NULL;
1253 qp->s_rdma_sge.sge.vaddr = NULL; 1396 e->rdma_sge.sge.vaddr = NULL;
1254 qp->s_rdma_sge.sge.length = 0; 1397 e->rdma_sge.sge.length = 0;
1255 qp->s_rdma_sge.sge.sge_length = 0; 1398 e->rdma_sge.sge.sge_length = 0;
1256 } 1399 }
1257 qp->s_ack_state = opcode; 1400 e->psn = psn;
1258 qp->s_ack_psn = psn; 1401 qp->s_ack_state = OP(ACKNOWLEDGE);
1259 spin_unlock_irq(&qp->s_lock); 1402 qp->s_tail_ack_queue = prev;
1260 tasklet_hi_schedule(&qp->s_task); 1403 break;
1261 goto send_ack;
1262 } 1404 }
1263 1405
1264 /*
1265 * A pending RDMA read will ACK anything before it so
1266 * ignore earlier duplicate requests.
1267 */
1268 if (qp->s_ack_state != OP(ACKNOWLEDGE))
1269 goto done;
1270
1271 /*
1272 * If an ACK is pending, don't replace the pending ACK
1273 * with an earlier one since the later one will ACK the earlier.
1274 * Also, if we already have a pending atomic, send it.
1275 */
1276 if (qp->r_ack_state != OP(ACKNOWLEDGE) &&
1277 (ipath_cmp24(psn, qp->r_ack_psn) <= 0 ||
1278 qp->r_ack_state >= OP(COMPARE_SWAP)))
1279 goto send_ack;
1280 switch (opcode) {
1281 case OP(COMPARE_SWAP): 1406 case OP(COMPARE_SWAP):
1282 case OP(FETCH_ADD): 1407 case OP(FETCH_ADD): {
1283 /* 1408 /*
1284 * Check for the PSN of the last atomic operation 1409 * If we didn't find the atomic request in the ack queue
1285 * performed and resend the result if found. 1410 * or the send tasklet is already backed up to send an
1411 * earlier entry, we can ignore this request.
1286 */ 1412 */
1287 if ((psn & IPATH_PSN_MASK) != qp->r_atomic_psn) 1413 if (!e || e->opcode != (u8) opcode || old_req)
1288 goto done; 1414 goto unlock_done;
1415 qp->s_ack_state = OP(ACKNOWLEDGE);
1416 qp->s_tail_ack_queue = prev;
1417 break;
1418 }
1419
1420 default:
1421 if (old_req)
1422 goto unlock_done;
1423 /*
1424 * Resend the most recent ACK if this request is
1425 * after all the previous RDMA reads and atomics.
1426 */
1427 if (i == qp->r_head_ack_queue) {
1428 spin_unlock_irq(&qp->s_lock);
1429 qp->r_nak_state = 0;
1430 qp->r_ack_psn = qp->r_psn - 1;
1431 goto send_ack;
1432 }
1433 /*
1434 * Resend the RDMA read or atomic op which
1435 * ACKs this duplicate request.
1436 */
1437 qp->s_ack_state = OP(ACKNOWLEDGE);
1438 qp->s_tail_ack_queue = i;
1289 break; 1439 break;
1290 } 1440 }
1291 qp->r_ack_state = opcode;
1292 qp->r_nak_state = 0; 1441 qp->r_nak_state = 0;
1293 qp->r_ack_psn = psn; 1442 spin_unlock_irq(&qp->s_lock);
1294send_ack: 1443 tasklet_hi_schedule(&qp->s_task);
1295 return 0;
1296 1444
1445unlock_done:
1446 spin_unlock_irq(&qp->s_lock);
1297done: 1447done:
1298 return 1; 1448 return 1;
1449
1450send_ack:
1451 return 0;
1299} 1452}
1300 1453
1301static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) 1454static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
@@ -1391,15 +1544,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1391 opcode == OP(SEND_LAST_WITH_IMMEDIATE)) 1544 opcode == OP(SEND_LAST_WITH_IMMEDIATE))
1392 break; 1545 break;
1393 nack_inv: 1546 nack_inv:
1394 /*
1395 * A NAK will ACK earlier sends and RDMA writes.
1396 * Don't queue the NAK if a RDMA read, atomic, or NAK
1397 * is pending though.
1398 */
1399 if (qp->r_ack_state >= OP(COMPARE_SWAP))
1400 goto send_ack;
1401 ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR); 1547 ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR);
1402 qp->r_ack_state = OP(SEND_ONLY);
1403 qp->r_nak_state = IB_NAK_INVALID_REQUEST; 1548 qp->r_nak_state = IB_NAK_INVALID_REQUEST;
1404 qp->r_ack_psn = qp->r_psn; 1549 qp->r_ack_psn = qp->r_psn;
1405 goto send_ack; 1550 goto send_ack;
@@ -1441,9 +1586,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1441 * Don't queue the NAK if a RDMA read or atomic 1586 * Don't queue the NAK if a RDMA read or atomic
1442 * is pending though. 1587 * is pending though.
1443 */ 1588 */
1444 if (qp->r_ack_state >= OP(COMPARE_SWAP)) 1589 if (qp->r_nak_state)
1445 goto send_ack; 1590 goto done;
1446 qp->r_ack_state = OP(SEND_ONLY);
1447 qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; 1591 qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
1448 qp->r_ack_psn = qp->r_psn; 1592 qp->r_ack_psn = qp->r_psn;
1449 goto send_ack; 1593 goto send_ack;
@@ -1567,7 +1711,19 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1567 goto rnr_nak; 1711 goto rnr_nak;
1568 goto send_last_imm; 1712 goto send_last_imm;
1569 1713
1570 case OP(RDMA_READ_REQUEST): 1714 case OP(RDMA_READ_REQUEST): {
1715 struct ipath_ack_entry *e;
1716 u32 len;
1717 u8 next;
1718
1719 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
1720 goto nack_acc;
1721 next = qp->r_head_ack_queue + 1;
1722 if (next > IPATH_MAX_RDMA_ATOMIC)
1723 next = 0;
1724 if (unlikely(next == qp->s_tail_ack_queue))
1725 goto nack_inv;
1726 e = &qp->s_ack_queue[qp->r_head_ack_queue];
1571 /* RETH comes after BTH */ 1727 /* RETH comes after BTH */
1572 if (!header_in_data) 1728 if (!header_in_data)
1573 reth = &ohdr->u.rc.reth; 1729 reth = &ohdr->u.rc.reth;
@@ -1575,72 +1731,75 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1575 reth = (struct ib_reth *)data; 1731 reth = (struct ib_reth *)data;
1576 data += sizeof(*reth); 1732 data += sizeof(*reth);
1577 } 1733 }
1578 if (unlikely(!(qp->qp_access_flags & 1734 len = be32_to_cpu(reth->length);
1579 IB_ACCESS_REMOTE_READ))) 1735 if (len) {
1580 goto nack_acc;
1581 spin_lock_irq(&qp->s_lock);
1582 qp->s_rdma_len = be32_to_cpu(reth->length);
1583 if (qp->s_rdma_len != 0) {
1584 u32 rkey = be32_to_cpu(reth->rkey); 1736 u32 rkey = be32_to_cpu(reth->rkey);
1585 u64 vaddr = be64_to_cpu(reth->vaddr); 1737 u64 vaddr = be64_to_cpu(reth->vaddr);
1586 int ok; 1738 int ok;
1587 1739
1588 /* Check rkey & NAK */ 1740 /* Check rkey & NAK */
1589 ok = ipath_rkey_ok(qp, &qp->s_rdma_sge, 1741 ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr,
1590 qp->s_rdma_len, vaddr, rkey, 1742 rkey, IB_ACCESS_REMOTE_READ);
1591 IB_ACCESS_REMOTE_READ); 1743 if (unlikely(!ok))
1592 if (unlikely(!ok)) {
1593 spin_unlock_irq(&qp->s_lock);
1594 goto nack_acc; 1744 goto nack_acc;
1595 }
1596 /* 1745 /*
1597 * Update the next expected PSN. We add 1 later 1746 * Update the next expected PSN. We add 1 later
1598 * below, so only add the remainder here. 1747 * below, so only add the remainder here.
1599 */ 1748 */
1600 if (qp->s_rdma_len > pmtu) 1749 if (len > pmtu)
1601 qp->r_psn += (qp->s_rdma_len - 1) / pmtu; 1750 qp->r_psn += (len - 1) / pmtu;
1602 } else { 1751 } else {
1603 qp->s_rdma_sge.sg_list = NULL; 1752 e->rdma_sge.sg_list = NULL;
1604 qp->s_rdma_sge.num_sge = 0; 1753 e->rdma_sge.num_sge = 0;
1605 qp->s_rdma_sge.sge.mr = NULL; 1754 e->rdma_sge.sge.mr = NULL;
1606 qp->s_rdma_sge.sge.vaddr = NULL; 1755 e->rdma_sge.sge.vaddr = NULL;
1607 qp->s_rdma_sge.sge.length = 0; 1756 e->rdma_sge.sge.length = 0;
1608 qp->s_rdma_sge.sge.sge_length = 0; 1757 e->rdma_sge.sge.sge_length = 0;
1609 } 1758 }
1759 e->opcode = opcode;
1760 e->psn = psn;
1610 /* 1761 /*
1611 * We need to increment the MSN here instead of when we 1762 * We need to increment the MSN here instead of when we
1612 * finish sending the result since a duplicate request would 1763 * finish sending the result since a duplicate request would
1613 * increment it more than once. 1764 * increment it more than once.
1614 */ 1765 */
1615 qp->r_msn++; 1766 qp->r_msn++;
1616
1617 qp->s_ack_state = opcode;
1618 qp->s_ack_psn = psn;
1619 spin_unlock_irq(&qp->s_lock);
1620
1621 qp->r_psn++; 1767 qp->r_psn++;
1622 qp->r_state = opcode; 1768 qp->r_state = opcode;
1623 qp->r_nak_state = 0; 1769 qp->r_nak_state = 0;
1770 barrier();
1771 qp->r_head_ack_queue = next;
1624 1772
1625 /* Call ipath_do_rc_send() in another thread. */ 1773 /* Call ipath_do_rc_send() in another thread. */
1626 tasklet_hi_schedule(&qp->s_task); 1774 tasklet_hi_schedule(&qp->s_task);
1627 1775
1628 goto done; 1776 goto done;
1777 }
1629 1778
1630 case OP(COMPARE_SWAP): 1779 case OP(COMPARE_SWAP):
1631 case OP(FETCH_ADD): { 1780 case OP(FETCH_ADD): {
1632 struct ib_atomic_eth *ateth; 1781 struct ib_atomic_eth *ateth;
1782 struct ipath_ack_entry *e;
1633 u64 vaddr; 1783 u64 vaddr;
1784 atomic64_t *maddr;
1634 u64 sdata; 1785 u64 sdata;
1635 u32 rkey; 1786 u32 rkey;
1787 u8 next;
1636 1788
1789 if (unlikely(!(qp->qp_access_flags &
1790 IB_ACCESS_REMOTE_ATOMIC)))
1791 goto nack_acc;
1792 next = qp->r_head_ack_queue + 1;
1793 if (next > IPATH_MAX_RDMA_ATOMIC)
1794 next = 0;
1795 if (unlikely(next == qp->s_tail_ack_queue))
1796 goto nack_inv;
1637 if (!header_in_data) 1797 if (!header_in_data)
1638 ateth = &ohdr->u.atomic_eth; 1798 ateth = &ohdr->u.atomic_eth;
1639 else { 1799 else
1640 ateth = (struct ib_atomic_eth *)data; 1800 ateth = (struct ib_atomic_eth *)data;
1641 data += sizeof(*ateth); 1801 vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
1642 } 1802 be32_to_cpu(ateth->vaddr[1]);
1643 vaddr = be64_to_cpu(ateth->vaddr);
1644 if (unlikely(vaddr & (sizeof(u64) - 1))) 1803 if (unlikely(vaddr & (sizeof(u64) - 1)))
1645 goto nack_inv; 1804 goto nack_inv;
1646 rkey = be32_to_cpu(ateth->rkey); 1805 rkey = be32_to_cpu(ateth->rkey);
@@ -1649,63 +1808,50 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1649 sizeof(u64), vaddr, rkey, 1808 sizeof(u64), vaddr, rkey,
1650 IB_ACCESS_REMOTE_ATOMIC))) 1809 IB_ACCESS_REMOTE_ATOMIC)))
1651 goto nack_acc; 1810 goto nack_acc;
1652 if (unlikely(!(qp->qp_access_flags &
1653 IB_ACCESS_REMOTE_ATOMIC)))
1654 goto nack_acc;
1655 /* Perform atomic OP and save result. */ 1811 /* Perform atomic OP and save result. */
1812 maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
1656 sdata = be64_to_cpu(ateth->swap_data); 1813 sdata = be64_to_cpu(ateth->swap_data);
1657 spin_lock_irq(&dev->pending_lock); 1814 e = &qp->s_ack_queue[qp->r_head_ack_queue];
1658 qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr; 1815 e->atomic_data = (opcode == OP(FETCH_ADD)) ?
1659 if (opcode == OP(FETCH_ADD)) 1816 (u64) atomic64_add_return(sdata, maddr) - sdata :
1660 *(u64 *) qp->r_sge.sge.vaddr = 1817 (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
1661 qp->r_atomic_data + sdata; 1818 be64_to_cpu(ateth->compare_data),
1662 else if (qp->r_atomic_data == 1819 sdata);
1663 be64_to_cpu(ateth->compare_data)) 1820 e->opcode = opcode;
1664 *(u64 *) qp->r_sge.sge.vaddr = sdata; 1821 e->psn = psn & IPATH_PSN_MASK;
1665 spin_unlock_irq(&dev->pending_lock);
1666 qp->r_msn++; 1822 qp->r_msn++;
1667 qp->r_atomic_psn = psn & IPATH_PSN_MASK; 1823 qp->r_psn++;
1668 psn |= 1 << 31; 1824 qp->r_state = opcode;
1669 break; 1825 qp->r_nak_state = 0;
1826 barrier();
1827 qp->r_head_ack_queue = next;
1828
1829 /* Call ipath_do_rc_send() in another thread. */
1830 tasklet_hi_schedule(&qp->s_task);
1831
1832 goto done;
1670 } 1833 }
1671 1834
1672 default: 1835 default:
1673 /* Drop packet for unknown opcodes. */ 1836 /* NAK unknown opcodes. */
1674 goto done; 1837 goto nack_inv;
1675 } 1838 }
1676 qp->r_psn++; 1839 qp->r_psn++;
1677 qp->r_state = opcode; 1840 qp->r_state = opcode;
1841 qp->r_ack_psn = psn;
1678 qp->r_nak_state = 0; 1842 qp->r_nak_state = 0;
1679 /* Send an ACK if requested or required. */ 1843 /* Send an ACK if requested or required. */
1680 if (psn & (1 << 31)) { 1844 if (psn & (1 << 31))
1681 /*
1682 * Coalesce ACKs unless there is a RDMA READ or
1683 * ATOMIC pending.
1684 */
1685 if (qp->r_ack_state < OP(COMPARE_SWAP)) {
1686 qp->r_ack_state = opcode;
1687 qp->r_ack_psn = psn;
1688 }
1689 goto send_ack; 1845 goto send_ack;
1690 }
1691 goto done; 1846 goto done;
1692 1847
1693nack_acc: 1848nack_acc:
1694 /* 1849 ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR);
1695 * A NAK will ACK earlier sends and RDMA writes. 1850 qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
1696 * Don't queue the NAK if a RDMA read, atomic, or NAK 1851 qp->r_ack_psn = qp->r_psn;
1697 * is pending though. 1852
1698 */
1699 if (qp->r_ack_state < OP(COMPARE_SWAP)) {
1700 ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR);
1701 qp->r_ack_state = OP(RDMA_WRITE_ONLY);
1702 qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
1703 qp->r_ack_psn = qp->r_psn;
1704 }
1705send_ack: 1853send_ack:
1706 /* Send ACK right away unless the send tasklet has a pending ACK. */ 1854 send_rc_ack(qp);
1707 if (qp->s_ack_state == OP(ACKNOWLEDGE))
1708 send_rc_ack(qp);
1709 1855
1710done: 1856done:
1711 return; 1857 return;
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index dffc76016d3c..c182bcd62098 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -126,9 +126,18 @@
126#define INFINIPATH_E_RESET 0x0004000000000000ULL 126#define INFINIPATH_E_RESET 0x0004000000000000ULL
127#define INFINIPATH_E_HARDWARE 0x0008000000000000ULL 127#define INFINIPATH_E_HARDWARE 0x0008000000000000ULL
128 128
129/*
130 * this is used to print "common" packet errors only when the
131 * __IPATH_ERRPKTDBG bit is set in ipath_debug.
132 */
133#define INFINIPATH_E_PKTERRS ( INFINIPATH_E_SPKTLEN \
134 | INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_RVCRC \
135 | INFINIPATH_E_RICRC | INFINIPATH_E_RSHORTPKTLEN \
136 | INFINIPATH_E_REBP )
137
129/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */ 138/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
130/* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo 139/* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo
131 * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: eagerTID, 3: expTID 140 * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: expTID, 3: eagerTID
132 * bit 4: flag buffer, 5: datainfo, 6: header info */ 141 * bit 4: flag buffer, 5: datainfo, 6: header info */
133#define INFINIPATH_HWE_TXEMEMPARITYERR_MASK 0xFULL 142#define INFINIPATH_HWE_TXEMEMPARITYERR_MASK 0xFULL
134#define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40 143#define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40
@@ -143,8 +152,8 @@
143/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */ 152/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */
144#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF 0x01ULL 153#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF 0x01ULL
145#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ 0x02ULL 154#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ 0x02ULL
146#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x04ULL 155#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID 0x04ULL
147#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID 0x08ULL 156#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x08ULL
148#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF 0x10ULL 157#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF 0x10ULL
149#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL 158#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL
150#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO 0x40ULL 159#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO 0x40ULL
@@ -299,13 +308,6 @@
299#define INFINIPATH_XGXS_RX_POL_SHIFT 19 308#define INFINIPATH_XGXS_RX_POL_SHIFT 19
300#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL 309#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL
301 310
302#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */
303
304/* TID entries (memory), HT-only */
305#define INFINIPATH_RT_VALID 0x8000000000000000ULL
306#define INFINIPATH_RT_ADDR_SHIFT 0
307#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFF
308#define INFINIPATH_RT_BUFSIZE_SHIFT 48
309 311
310/* 312/*
311 * IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our 313 * IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index e86cb171872e..d9c2a9b15d86 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -202,6 +202,7 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
202 wq->tail = tail; 202 wq->tail = tail;
203 203
204 ret = 1; 204 ret = 1;
205 qp->r_wrid_valid = 1;
205 if (handler) { 206 if (handler) {
206 u32 n; 207 u32 n;
207 208
@@ -229,7 +230,6 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
229 } 230 }
230 } 231 }
231 spin_unlock_irqrestore(&rq->lock, flags); 232 spin_unlock_irqrestore(&rq->lock, flags);
232 qp->r_wrid_valid = 1;
233 233
234bail: 234bail:
235 return ret; 235 return ret;
@@ -255,6 +255,7 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
255 unsigned long flags; 255 unsigned long flags;
256 struct ib_wc wc; 256 struct ib_wc wc;
257 u64 sdata; 257 u64 sdata;
258 atomic64_t *maddr;
258 259
259 qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn); 260 qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
260 if (!qp) { 261 if (!qp) {
@@ -265,7 +266,8 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
265again: 266again:
266 spin_lock_irqsave(&sqp->s_lock, flags); 267 spin_lock_irqsave(&sqp->s_lock, flags);
267 268
268 if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK)) { 269 if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) ||
270 qp->s_rnr_timeout) {
269 spin_unlock_irqrestore(&sqp->s_lock, flags); 271 spin_unlock_irqrestore(&sqp->s_lock, flags);
270 goto done; 272 goto done;
271 } 273 }
@@ -310,7 +312,7 @@ again:
310 sqp->s_rnr_retry--; 312 sqp->s_rnr_retry--;
311 dev->n_rnr_naks++; 313 dev->n_rnr_naks++;
312 sqp->s_rnr_timeout = 314 sqp->s_rnr_timeout =
313 ib_ipath_rnr_table[sqp->r_min_rnr_timer]; 315 ib_ipath_rnr_table[qp->r_min_rnr_timer];
314 ipath_insert_rnr_queue(sqp); 316 ipath_insert_rnr_queue(sqp);
315 goto done; 317 goto done;
316 } 318 }
@@ -343,20 +345,22 @@ again:
343 wc.sl = sqp->remote_ah_attr.sl; 345 wc.sl = sqp->remote_ah_attr.sl;
344 wc.dlid_path_bits = 0; 346 wc.dlid_path_bits = 0;
345 wc.port_num = 0; 347 wc.port_num = 0;
348 spin_lock_irqsave(&sqp->s_lock, flags);
346 ipath_sqerror_qp(sqp, &wc); 349 ipath_sqerror_qp(sqp, &wc);
350 spin_unlock_irqrestore(&sqp->s_lock, flags);
347 goto done; 351 goto done;
348 } 352 }
349 break; 353 break;
350 354
351 case IB_WR_RDMA_READ: 355 case IB_WR_RDMA_READ:
356 if (unlikely(!(qp->qp_access_flags &
357 IB_ACCESS_REMOTE_READ)))
358 goto acc_err;
352 if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length, 359 if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
353 wqe->wr.wr.rdma.remote_addr, 360 wqe->wr.wr.rdma.remote_addr,
354 wqe->wr.wr.rdma.rkey, 361 wqe->wr.wr.rdma.rkey,
355 IB_ACCESS_REMOTE_READ))) 362 IB_ACCESS_REMOTE_READ)))
356 goto acc_err; 363 goto acc_err;
357 if (unlikely(!(qp->qp_access_flags &
358 IB_ACCESS_REMOTE_READ)))
359 goto acc_err;
360 qp->r_sge.sge = wqe->sg_list[0]; 364 qp->r_sge.sge = wqe->sg_list[0];
361 qp->r_sge.sg_list = wqe->sg_list + 1; 365 qp->r_sge.sg_list = wqe->sg_list + 1;
362 qp->r_sge.num_sge = wqe->wr.num_sge; 366 qp->r_sge.num_sge = wqe->wr.num_sge;
@@ -364,22 +368,22 @@ again:
364 368
365 case IB_WR_ATOMIC_CMP_AND_SWP: 369 case IB_WR_ATOMIC_CMP_AND_SWP:
366 case IB_WR_ATOMIC_FETCH_AND_ADD: 370 case IB_WR_ATOMIC_FETCH_AND_ADD:
371 if (unlikely(!(qp->qp_access_flags &
372 IB_ACCESS_REMOTE_ATOMIC)))
373 goto acc_err;
367 if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), 374 if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
368 wqe->wr.wr.rdma.remote_addr, 375 wqe->wr.wr.atomic.remote_addr,
369 wqe->wr.wr.rdma.rkey, 376 wqe->wr.wr.atomic.rkey,
370 IB_ACCESS_REMOTE_ATOMIC))) 377 IB_ACCESS_REMOTE_ATOMIC)))
371 goto acc_err; 378 goto acc_err;
372 /* Perform atomic OP and save result. */ 379 /* Perform atomic OP and save result. */
373 sdata = wqe->wr.wr.atomic.swap; 380 maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
374 spin_lock_irqsave(&dev->pending_lock, flags); 381 sdata = wqe->wr.wr.atomic.compare_add;
375 qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr; 382 *(u64 *) sqp->s_sge.sge.vaddr =
376 if (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) 383 (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
377 *(u64 *) qp->r_sge.sge.vaddr = 384 (u64) atomic64_add_return(sdata, maddr) - sdata :
378 qp->r_atomic_data + sdata; 385 (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
379 else if (qp->r_atomic_data == wqe->wr.wr.atomic.compare_add) 386 sdata, wqe->wr.wr.atomic.swap);
380 *(u64 *) qp->r_sge.sge.vaddr = sdata;
381 spin_unlock_irqrestore(&dev->pending_lock, flags);
382 *(u64 *) sqp->s_sge.sge.vaddr = qp->r_atomic_data;
383 goto send_comp; 387 goto send_comp;
384 388
385 default: 389 default:
@@ -440,7 +444,7 @@ again:
440send_comp: 444send_comp:
441 sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; 445 sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
442 446
443 if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &sqp->s_flags) || 447 if (!(sqp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
444 (wqe->wr.send_flags & IB_SEND_SIGNALED)) { 448 (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
445 wc.wr_id = wqe->wr.wr_id; 449 wc.wr_id = wqe->wr.wr_id;
446 wc.status = IB_WC_SUCCESS; 450 wc.status = IB_WC_SUCCESS;
@@ -502,7 +506,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
502 * We clear the tasklet flag now since we are committing to return 506 * We clear the tasklet flag now since we are committing to return
503 * from the tasklet function. 507 * from the tasklet function.
504 */ 508 */
505 clear_bit(IPATH_S_BUSY, &qp->s_flags); 509 clear_bit(IPATH_S_BUSY, &qp->s_busy);
506 tasklet_unlock(&qp->s_task); 510 tasklet_unlock(&qp->s_task);
507 want_buffer(dev->dd); 511 want_buffer(dev->dd);
508 dev->n_piowait++; 512 dev->n_piowait++;
@@ -541,6 +545,9 @@ int ipath_post_ruc_send(struct ipath_qp *qp, struct ib_send_wr *wr)
541 wr->sg_list[0].addr & (sizeof(u64) - 1))) { 545 wr->sg_list[0].addr & (sizeof(u64) - 1))) {
542 ret = -EINVAL; 546 ret = -EINVAL;
543 goto bail; 547 goto bail;
548 } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) {
549 ret = -EINVAL;
550 goto bail;
544 } 551 }
545 /* IB spec says that num_sge == 0 is OK. */ 552 /* IB spec says that num_sge == 0 is OK. */
546 if (wr->num_sge > qp->s_max_sge) { 553 if (wr->num_sge > qp->s_max_sge) {
@@ -647,7 +654,7 @@ void ipath_do_ruc_send(unsigned long data)
647 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); 654 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
648 struct ipath_other_headers *ohdr; 655 struct ipath_other_headers *ohdr;
649 656
650 if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags)) 657 if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy))
651 goto bail; 658 goto bail;
652 659
653 if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) { 660 if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) {
@@ -683,19 +690,15 @@ again:
683 */ 690 */
684 spin_lock_irqsave(&qp->s_lock, flags); 691 spin_lock_irqsave(&qp->s_lock, flags);
685 692
686 /* Sending responses has higher priority over sending requests. */ 693 if (!((qp->ibqp.qp_type == IB_QPT_RC) ?
687 if (qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE && 694 ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) :
688 (bth0 = ipath_make_rc_ack(qp, ohdr, pmtu)) != 0) 695 ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) {
689 bth2 = qp->s_ack_psn++ & IPATH_PSN_MASK;
690 else if (!((qp->ibqp.qp_type == IB_QPT_RC) ?
691 ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) :
692 ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) {
693 /* 696 /*
694 * Clear the busy bit before unlocking to avoid races with 697 * Clear the busy bit before unlocking to avoid races with
695 * adding new work queue items and then failing to process 698 * adding new work queue items and then failing to process
696 * them. 699 * them.
697 */ 700 */
698 clear_bit(IPATH_S_BUSY, &qp->s_flags); 701 clear_bit(IPATH_S_BUSY, &qp->s_busy);
699 spin_unlock_irqrestore(&qp->s_lock, flags); 702 spin_unlock_irqrestore(&qp->s_lock, flags);
700 goto bail; 703 goto bail;
701 } 704 }
@@ -728,7 +731,7 @@ again:
728 goto again; 731 goto again;
729 732
730clear: 733clear:
731 clear_bit(IPATH_S_BUSY, &qp->s_flags); 734 clear_bit(IPATH_S_BUSY, &qp->s_busy);
732bail: 735bail:
733 return; 736 return;
734} 737}
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
index 30a825928fcf..9307f7187ca5 100644
--- a/drivers/infiniband/hw/ipath/ipath_stats.c
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -207,7 +207,7 @@ void ipath_get_faststats(unsigned long opaque)
207 * don't access the chip while running diags, or memory diags can 207 * don't access the chip while running diags, or memory diags can
208 * fail 208 * fail
209 */ 209 */
210 if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT) || 210 if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_INITTED) ||
211 ipath_diag_inuse) 211 ipath_diag_inuse)
212 /* but re-arm the timer, for diags case; won't hurt other */ 212 /* but re-arm the timer, for diags case; won't hurt other */
213 goto done; 213 goto done;
@@ -237,11 +237,13 @@ void ipath_get_faststats(unsigned long opaque)
237 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) 237 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs)
238 && time_after(jiffies, dd->ipath_unmasktime)) { 238 && time_after(jiffies, dd->ipath_unmasktime)) {
239 char ebuf[256]; 239 char ebuf[256];
240 ipath_decode_err(ebuf, sizeof ebuf, 240 int iserr;
241 iserr = ipath_decode_err(ebuf, sizeof ebuf,
241 (dd->ipath_maskederrs & ~dd-> 242 (dd->ipath_maskederrs & ~dd->
242 ipath_ignorederrs)); 243 ipath_ignorederrs));
243 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) & 244 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
244 ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL)) 245 ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
246 INFINIPATH_E_PKTERRS ))
245 ipath_dev_err(dd, "Re-enabling masked errors " 247 ipath_dev_err(dd, "Re-enabling masked errors "
246 "(%s)\n", ebuf); 248 "(%s)\n", ebuf);
247 else { 249 else {
@@ -252,8 +254,12 @@ void ipath_get_faststats(unsigned long opaque)
252 * them. So only complain about these at debug 254 * them. So only complain about these at debug
253 * level. 255 * level.
254 */ 256 */
255 ipath_dbg("Disabling frequent queue full errors " 257 if (iserr)
256 "(%s)\n", ebuf); 258 ipath_dbg("Re-enabling queue full errors (%s)\n",
259 ebuf);
260 else
261 ipath_cdbg(ERRPKT, "Re-enabling packet"
262 " problem interrupt (%s)\n", ebuf);
257 } 263 }
258 dd->ipath_maskederrs = dd->ipath_ignorederrs; 264 dd->ipath_maskederrs = dd->ipath_ignorederrs;
259 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 265 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index 325d6634ff53..1c2b03c2ef5e 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -42,7 +42,7 @@ static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe,
42{ 42{
43 if (++qp->s_last == qp->s_size) 43 if (++qp->s_last == qp->s_size)
44 qp->s_last = 0; 44 qp->s_last = 0;
45 if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) || 45 if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
46 (wqe->wr.send_flags & IB_SEND_SIGNALED)) { 46 (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
47 wc->wr_id = wqe->wr.wr_id; 47 wc->wr_id = wqe->wr.wr_id;
48 wc->status = IB_WC_SUCCESS; 48 wc->status = IB_WC_SUCCESS;
@@ -344,13 +344,13 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
344 send_first: 344 send_first:
345 if (qp->r_reuse_sge) { 345 if (qp->r_reuse_sge) {
346 qp->r_reuse_sge = 0; 346 qp->r_reuse_sge = 0;
347 qp->r_sge = qp->s_rdma_sge; 347 qp->r_sge = qp->s_rdma_read_sge;
348 } else if (!ipath_get_rwqe(qp, 0)) { 348 } else if (!ipath_get_rwqe(qp, 0)) {
349 dev->n_pkt_drops++; 349 dev->n_pkt_drops++;
350 goto done; 350 goto done;
351 } 351 }
352 /* Save the WQE so we can reuse it in case of an error. */ 352 /* Save the WQE so we can reuse it in case of an error. */
353 qp->s_rdma_sge = qp->r_sge; 353 qp->s_rdma_read_sge = qp->r_sge;
354 qp->r_rcv_len = 0; 354 qp->r_rcv_len = 0;
355 if (opcode == OP(SEND_ONLY)) 355 if (opcode == OP(SEND_ONLY))
356 goto send_last; 356 goto send_last;
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 9a3e54664ee4..a518f7c8fa83 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -308,6 +308,11 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
308 goto bail; 308 goto bail;
309 } 309 }
310 310
311 if (wr->wr.ud.ah->pd != qp->ibqp.pd) {
312 ret = -EPERM;
313 goto bail;
314 }
315
311 /* IB spec says that num_sge == 0 is OK. */ 316 /* IB spec says that num_sge == 0 is OK. */
312 if (wr->num_sge > qp->s_max_sge) { 317 if (wr->num_sge > qp->s_max_sge) {
313 ret = -EINVAL; 318 ret = -EINVAL;
@@ -467,7 +472,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
467 472
468done: 473done:
469 /* Queue the completion status entry. */ 474 /* Queue the completion status entry. */
470 if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) || 475 if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
471 (wr->send_flags & IB_SEND_SIGNALED)) { 476 (wr->send_flags & IB_SEND_SIGNALED)) {
472 wc.wr_id = wr->wr_id; 477 wc.wr_id = wr->wr_id;
473 wc.status = IB_WC_SUCCESS; 478 wc.status = IB_WC_SUCCESS;
@@ -647,6 +652,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
647 ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh)); 652 ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
648 ipath_copy_sge(&qp->r_sge, data, 653 ipath_copy_sge(&qp->r_sge, data,
649 wc.byte_len - sizeof(struct ib_grh)); 654 wc.byte_len - sizeof(struct ib_grh));
655 qp->r_wrid_valid = 0;
650 wc.wr_id = qp->r_wr_id; 656 wc.wr_id = qp->r_wr_id;
651 wc.status = IB_WC_SUCCESS; 657 wc.status = IB_WC_SUCCESS;
652 wc.opcode = IB_WC_RECV; 658 wc.opcode = IB_WC_RECV;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 2aaacdb7e52a..18c6df2052c2 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -438,6 +438,10 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
438 struct ipath_mcast *mcast; 438 struct ipath_mcast *mcast;
439 struct ipath_mcast_qp *p; 439 struct ipath_mcast_qp *p;
440 440
441 if (lnh != IPATH_LRH_GRH) {
442 dev->n_pkt_drops++;
443 goto bail;
444 }
441 mcast = ipath_mcast_find(&hdr->u.l.grh.dgid); 445 mcast = ipath_mcast_find(&hdr->u.l.grh.dgid);
442 if (mcast == NULL) { 446 if (mcast == NULL) {
443 dev->n_pkt_drops++; 447 dev->n_pkt_drops++;
@@ -445,8 +449,7 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
445 } 449 }
446 dev->n_multicast_rcv++; 450 dev->n_multicast_rcv++;
447 list_for_each_entry_rcu(p, &mcast->qp_list, list) 451 list_for_each_entry_rcu(p, &mcast->qp_list, list)
448 ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data, 452 ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp);
449 tlen, p->qp);
450 /* 453 /*
451 * Notify ipath_multicast_detach() if it is waiting for us 454 * Notify ipath_multicast_detach() if it is waiting for us
452 * to finish. 455 * to finish.
@@ -773,7 +776,6 @@ int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
773 /* +1 is for the qword padding of pbc */ 776 /* +1 is for the qword padding of pbc */
774 plen = hdrwords + ((len + 3) >> 2) + 1; 777 plen = hdrwords + ((len + 3) >> 2) + 1;
775 if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) { 778 if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) {
776 ipath_dbg("packet len 0x%x too long, failing\n", plen);
777 ret = -EINVAL; 779 ret = -EINVAL;
778 goto bail; 780 goto bail;
779 } 781 }
@@ -980,14 +982,14 @@ static int ipath_query_device(struct ib_device *ibdev,
980 props->max_cqe = ib_ipath_max_cqes; 982 props->max_cqe = ib_ipath_max_cqes;
981 props->max_mr = dev->lk_table.max; 983 props->max_mr = dev->lk_table.max;
982 props->max_pd = ib_ipath_max_pds; 984 props->max_pd = ib_ipath_max_pds;
983 props->max_qp_rd_atom = 1; 985 props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC;
984 props->max_qp_init_rd_atom = 1; 986 props->max_qp_init_rd_atom = 255;
985 /* props->max_res_rd_atom */ 987 /* props->max_res_rd_atom */
986 props->max_srq = ib_ipath_max_srqs; 988 props->max_srq = ib_ipath_max_srqs;
987 props->max_srq_wr = ib_ipath_max_srq_wrs; 989 props->max_srq_wr = ib_ipath_max_srq_wrs;
988 props->max_srq_sge = ib_ipath_max_srq_sges; 990 props->max_srq_sge = ib_ipath_max_srq_sges;
989 /* props->local_ca_ack_delay */ 991 /* props->local_ca_ack_delay */
990 props->atomic_cap = IB_ATOMIC_HCA; 992 props->atomic_cap = IB_ATOMIC_GLOB;
991 props->max_pkeys = ipath_get_npkeys(dev->dd); 993 props->max_pkeys = ipath_get_npkeys(dev->dd);
992 props->max_mcast_grp = ib_ipath_max_mcast_grps; 994 props->max_mcast_grp = ib_ipath_max_mcast_grps;
993 props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached; 995 props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
@@ -1557,7 +1559,6 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
1557 dev->node_type = RDMA_NODE_IB_CA; 1559 dev->node_type = RDMA_NODE_IB_CA;
1558 dev->phys_port_cnt = 1; 1560 dev->phys_port_cnt = 1;
1559 dev->dma_device = &dd->pcidev->dev; 1561 dev->dma_device = &dd->pcidev->dev;
1560 dev->class_dev.dev = dev->dma_device;
1561 dev->query_device = ipath_query_device; 1562 dev->query_device = ipath_query_device;
1562 dev->modify_device = ipath_modify_device; 1563 dev->modify_device = ipath_modify_device;
1563 dev->query_port = ipath_query_port; 1564 dev->query_port = ipath_query_port;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index c0c8d5b24a7d..7c4929f1cb5b 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -40,9 +40,12 @@
40#include <linux/interrupt.h> 40#include <linux/interrupt.h>
41#include <linux/kref.h> 41#include <linux/kref.h>
42#include <rdma/ib_pack.h> 42#include <rdma/ib_pack.h>
43#include <rdma/ib_user_verbs.h>
43 44
44#include "ipath_layer.h" 45#include "ipath_layer.h"
45 46
47#define IPATH_MAX_RDMA_ATOMIC 4
48
46#define QPN_MAX (1 << 24) 49#define QPN_MAX (1 << 24)
47#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) 50#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
48 51
@@ -89,7 +92,7 @@ struct ib_reth {
89} __attribute__ ((packed)); 92} __attribute__ ((packed));
90 93
91struct ib_atomic_eth { 94struct ib_atomic_eth {
92 __be64 vaddr; 95 __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */
93 __be32 rkey; 96 __be32 rkey;
94 __be64 swap_data; 97 __be64 swap_data;
95 __be64 compare_data; 98 __be64 compare_data;
@@ -108,7 +111,7 @@ struct ipath_other_headers {
108 } rc; 111 } rc;
109 struct { 112 struct {
110 __be32 aeth; 113 __be32 aeth;
111 __be64 atomic_ack_eth; 114 __be32 atomic_ack_eth[2];
112 } at; 115 } at;
113 __be32 imm_data; 116 __be32 imm_data;
114 __be32 aeth; 117 __be32 aeth;
@@ -186,7 +189,7 @@ struct ipath_mmap_info {
186struct ipath_cq_wc { 189struct ipath_cq_wc {
187 u32 head; /* index of next entry to fill */ 190 u32 head; /* index of next entry to fill */
188 u32 tail; /* index of next ib_poll_cq() entry */ 191 u32 tail; /* index of next ib_poll_cq() entry */
189 struct ib_wc queue[1]; /* this is actually size ibcq.cqe + 1 */ 192 struct ib_uverbs_wc queue[1]; /* this is actually size ibcq.cqe + 1 */
190}; 193};
191 194
192/* 195/*
@@ -312,6 +315,19 @@ struct ipath_sge_state {
312}; 315};
313 316
314/* 317/*
318 * This structure holds the information that the send tasklet needs
319 * to send a RDMA read response or atomic operation.
320 */
321struct ipath_ack_entry {
322 u8 opcode;
323 u32 psn;
324 union {
325 struct ipath_sge_state rdma_sge;
326 u64 atomic_data;
327 };
328};
329
330/*
315 * Variables prefixed with s_ are for the requester (sender). 331 * Variables prefixed with s_ are for the requester (sender).
316 * Variables prefixed with r_ are for the responder (receiver). 332 * Variables prefixed with r_ are for the responder (receiver).
317 * Variables prefixed with ack_ are for responder replies. 333 * Variables prefixed with ack_ are for responder replies.
@@ -333,24 +349,24 @@ struct ipath_qp {
333 struct ipath_mmap_info *ip; 349 struct ipath_mmap_info *ip;
334 struct ipath_sge_state *s_cur_sge; 350 struct ipath_sge_state *s_cur_sge;
335 struct ipath_sge_state s_sge; /* current send request data */ 351 struct ipath_sge_state s_sge; /* current send request data */
336 /* current RDMA read send data */ 352 struct ipath_ack_entry s_ack_queue[IPATH_MAX_RDMA_ATOMIC + 1];
337 struct ipath_sge_state s_rdma_sge; 353 struct ipath_sge_state s_ack_rdma_sge;
354 struct ipath_sge_state s_rdma_read_sge;
338 struct ipath_sge_state r_sge; /* current receive data */ 355 struct ipath_sge_state r_sge; /* current receive data */
339 spinlock_t s_lock; 356 spinlock_t s_lock;
340 unsigned long s_flags; 357 unsigned long s_busy;
341 u32 s_hdrwords; /* size of s_hdr in 32 bit words */ 358 u32 s_hdrwords; /* size of s_hdr in 32 bit words */
342 u32 s_cur_size; /* size of send packet in bytes */ 359 u32 s_cur_size; /* size of send packet in bytes */
343 u32 s_len; /* total length of s_sge */ 360 u32 s_len; /* total length of s_sge */
344 u32 s_rdma_len; /* total length of s_rdma_sge */ 361 u32 s_rdma_read_len; /* total length of s_rdma_read_sge */
345 u32 s_next_psn; /* PSN for next request */ 362 u32 s_next_psn; /* PSN for next request */
346 u32 s_last_psn; /* last response PSN processed */ 363 u32 s_last_psn; /* last response PSN processed */
347 u32 s_psn; /* current packet sequence number */ 364 u32 s_psn; /* current packet sequence number */
348 u32 s_ack_psn; /* PSN for RDMA_READ */ 365 u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */
366 u32 s_ack_psn; /* PSN for acking sends and RDMA writes */
349 u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ 367 u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
350 u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ 368 u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
351 u64 r_wr_id; /* ID for current receive WQE */ 369 u64 r_wr_id; /* ID for current receive WQE */
352 u64 r_atomic_data; /* data for last atomic op */
353 u32 r_atomic_psn; /* PSN of last atomic op */
354 u32 r_len; /* total length of r_sge */ 370 u32 r_len; /* total length of r_sge */
355 u32 r_rcv_len; /* receive data len processed */ 371 u32 r_rcv_len; /* receive data len processed */
356 u32 r_psn; /* expected rcv packet sequence number */ 372 u32 r_psn; /* expected rcv packet sequence number */
@@ -360,12 +376,13 @@ struct ipath_qp {
360 u8 s_ack_state; /* opcode of packet to ACK */ 376 u8 s_ack_state; /* opcode of packet to ACK */
361 u8 s_nak_state; /* non-zero if NAK is pending */ 377 u8 s_nak_state; /* non-zero if NAK is pending */
362 u8 r_state; /* opcode of last packet received */ 378 u8 r_state; /* opcode of last packet received */
363 u8 r_ack_state; /* opcode of packet to ACK */
364 u8 r_nak_state; /* non-zero if NAK is pending */ 379 u8 r_nak_state; /* non-zero if NAK is pending */
365 u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ 380 u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
366 u8 r_reuse_sge; /* for UC receive errors */ 381 u8 r_reuse_sge; /* for UC receive errors */
367 u8 r_sge_inx; /* current index into sg_list */ 382 u8 r_sge_inx; /* current index into sg_list */
368 u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */ 383 u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */
384 u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */
385 u8 r_head_ack_queue; /* index into s_ack_queue[] */
369 u8 qp_access_flags; 386 u8 qp_access_flags;
370 u8 s_max_sge; /* size of s_wq->sg_list */ 387 u8 s_max_sge; /* size of s_wq->sg_list */
371 u8 s_retry_cnt; /* number of times to retry */ 388 u8 s_retry_cnt; /* number of times to retry */
@@ -374,6 +391,10 @@ struct ipath_qp {
374 u8 s_rnr_retry; /* requester RNR retry counter */ 391 u8 s_rnr_retry; /* requester RNR retry counter */
375 u8 s_wait_credit; /* limit number of unacked packets sent */ 392 u8 s_wait_credit; /* limit number of unacked packets sent */
376 u8 s_pkey_index; /* PKEY index to use */ 393 u8 s_pkey_index; /* PKEY index to use */
394 u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */
395 u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
396 u8 s_tail_ack_queue; /* index into s_ack_queue[] */
397 u8 s_flags;
377 u8 timeout; /* Timeout for this QP */ 398 u8 timeout; /* Timeout for this QP */
378 enum ib_mtu path_mtu; 399 enum ib_mtu path_mtu;
379 u32 remote_qpn; 400 u32 remote_qpn;
@@ -390,11 +411,16 @@ struct ipath_qp {
390 struct ipath_sge r_sg_list[0]; /* verified SGEs */ 411 struct ipath_sge r_sg_list[0]; /* verified SGEs */
391}; 412};
392 413
414/* Bit definition for s_busy. */
415#define IPATH_S_BUSY 0
416
393/* 417/*
394 * Bit definitions for s_flags. 418 * Bit definitions for s_flags.
395 */ 419 */
396#define IPATH_S_BUSY 0 420#define IPATH_S_SIGNAL_REQ_WR 0x01
397#define IPATH_S_SIGNAL_REQ_WR 1 421#define IPATH_S_FENCE_PENDING 0x02
422#define IPATH_S_RDMAR_PENDING 0x04
423#define IPATH_S_ACK_PENDING 0x08
398 424
399#define IPATH_PSN_CREDIT 2048 425#define IPATH_PSN_CREDIT 2048
400 426
@@ -706,8 +732,6 @@ int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
706 732
707int ipath_destroy_srq(struct ib_srq *ibsrq); 733int ipath_destroy_srq(struct ib_srq *ibsrq);
708 734
709void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
710
711int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); 735int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
712 736
713struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, 737struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
@@ -757,9 +781,6 @@ u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
757 781
758void ipath_do_ruc_send(unsigned long data); 782void ipath_do_ruc_send(unsigned long data);
759 783
760u32 ipath_make_rc_ack(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
761 u32 pmtu);
762
763int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr, 784int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
764 u32 pmtu, u32 *bth0p, u32 *bth2p); 785 u32 pmtu, u32 *bth0p, u32 *bth2p);
765 786
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 0d9b7d06bbc2..773145e29947 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -1013,14 +1013,14 @@ static struct {
1013 u64 latest_fw; 1013 u64 latest_fw;
1014 u32 flags; 1014 u32 flags;
1015} mthca_hca_table[] = { 1015} mthca_hca_table[] = {
1016 [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 4, 0), 1016 [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 5, 0),
1017 .flags = 0 }, 1017 .flags = 0 },
1018 [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 600), 1018 [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 8, 200),
1019 .flags = MTHCA_FLAG_PCIE }, 1019 .flags = MTHCA_FLAG_PCIE },
1020 [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 400), 1020 [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 2, 0),
1021 .flags = MTHCA_FLAG_MEMFREE | 1021 .flags = MTHCA_FLAG_MEMFREE |
1022 MTHCA_FLAG_PCIE }, 1022 MTHCA_FLAG_PCIE },
1023 [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 1, 0), 1023 [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 2, 0),
1024 .flags = MTHCA_FLAG_MEMFREE | 1024 .flags = MTHCA_FLAG_MEMFREE |
1025 MTHCA_FLAG_PCIE | 1025 MTHCA_FLAG_PCIE |
1026 MTHCA_FLAG_SINAI_OPT } 1026 MTHCA_FLAG_SINAI_OPT }
@@ -1135,7 +1135,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
1135 goto err_cmd; 1135 goto err_cmd;
1136 1136
1137 if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) { 1137 if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) {
1138 mthca_warn(mdev, "HCA FW version %d.%d.%d is old (%d.%d.%d is current).\n", 1138 mthca_warn(mdev, "HCA FW version %d.%d.%3d is old (%d.%d.%3d is current).\n",
1139 (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff, 1139 (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff,
1140 (int) (mdev->fw_ver & 0xffff), 1140 (int) (mdev->fw_ver & 0xffff),
1141 (int) (mthca_hca_table[hca_type].latest_fw >> 32), 1141 (int) (mthca_hca_table[hca_type].latest_fw >> 32),
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index ee561c569d5f..aa6c70a6a36f 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -297,7 +297,8 @@ out:
297 297
298int mthca_write_mtt_size(struct mthca_dev *dev) 298int mthca_write_mtt_size(struct mthca_dev *dev)
299{ 299{
300 if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy) 300 if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy ||
301 !(dev->mthca_flags & MTHCA_FLAG_FMR))
301 /* 302 /*
302 * Be friendly to WRITE_MTT command 303 * Be friendly to WRITE_MTT command
303 * and leave two empty slots for the 304 * and leave two empty slots for the
@@ -355,7 +356,8 @@ int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
355 int size = mthca_write_mtt_size(dev); 356 int size = mthca_write_mtt_size(dev);
356 int chunk; 357 int chunk;
357 358
358 if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy) 359 if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy ||
360 !(dev->mthca_flags & MTHCA_FLAG_FMR))
359 return __mthca_write_mtt(dev, mtt, start_index, buffer_list, list_len); 361 return __mthca_write_mtt(dev, mtt, start_index, buffer_list, list_len);
360 362
361 while (list_len > 0) { 363 while (list_len > 0) {
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 0725ad7ad9bf..47e6fd46d9c2 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1293,7 +1293,6 @@ int mthca_register_device(struct mthca_dev *dev)
1293 dev->ib_dev.node_type = RDMA_NODE_IB_CA; 1293 dev->ib_dev.node_type = RDMA_NODE_IB_CA;
1294 dev->ib_dev.phys_port_cnt = dev->limits.num_ports; 1294 dev->ib_dev.phys_port_cnt = dev->limits.num_ports;
1295 dev->ib_dev.dma_device = &dev->pdev->dev; 1295 dev->ib_dev.dma_device = &dev->pdev->dev;
1296 dev->ib_dev.class_dev.dev = &dev->pdev->dev;
1297 dev->ib_dev.query_device = mthca_query_device; 1296 dev->ib_dev.query_device = mthca_query_device;
1298 dev->ib_dev.query_port = mthca_query_port; 1297 dev->ib_dev.query_port = mthca_query_port;
1299 dev->ib_dev.modify_device = mthca_modify_device; 1298 dev->ib_dev.modify_device = mthca_modify_device;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 1c6b63aca268..8fe6fee7a97a 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1419,11 +1419,10 @@ void mthca_free_qp(struct mthca_dev *dev,
1419 * unref the mem-free tables and free the QPN in our table. 1419 * unref the mem-free tables and free the QPN in our table.
1420 */ 1420 */
1421 if (!qp->ibqp.uobject) { 1421 if (!qp->ibqp.uobject) {
1422 mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn, 1422 mthca_cq_clean(dev, recv_cq, qp->qpn,
1423 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); 1423 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
1424 if (qp->ibqp.send_cq != qp->ibqp.recv_cq) 1424 if (send_cq != recv_cq)
1425 mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn, 1425 mthca_cq_clean(dev, send_cq, qp->qpn, NULL);
1426 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
1427 1426
1428 mthca_free_memfree(dev, qp); 1427 mthca_free_memfree(dev, qp);
1429 mthca_free_wqe_buf(dev, qp); 1428 mthca_free_wqe_buf(dev, qp);