aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
authorDmitry Torokhov <dtor@insightbb.com>2007-05-01 00:24:54 -0400
committerDmitry Torokhov <dtor@insightbb.com>2007-05-01 00:24:54 -0400
commitbc95f3669f5e6f63cf0b84fe4922c3c6dd4aa775 (patch)
tree427fcf2a7287c16d4b5aa6cbf494d59579a6a8b1 /drivers/infiniband/hw
parent3d29cdff999c37b3876082278a8134a0642a02cd (diff)
parentdc87c3985e9b442c60994308a96f887579addc39 (diff)
Merge master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6
Conflicts: drivers/usb/input/Makefile drivers/usb/input/gtco.c
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/amso1100/c2.c6
-rw-r--r--drivers/infiniband/hw/amso1100/c2_provider.c1
-rw-r--r--drivers/infiniband/hw/cxgb3/Makefile1
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_dbg.c1
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c45
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.h6
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_resource.c15
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_resource.h1
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_wr.h1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.c1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.h1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c55
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.h1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cq.c1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_ev.c13
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_mem.c1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c49
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h35
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c34
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_user.h1
-rw-r--r--drivers/infiniband/hw/ehca/Kconfig8
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h26
-rw-r--r--drivers/infiniband/hw/ehca/ehca_cq.c16
-rw-r--r--drivers/infiniband/hw/ehca/ehca_eq.c1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_hca.c58
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c336
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.h1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c37
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.c24
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.h4
-rw-r--r--drivers/infiniband/hw/ehca/ipz_pt_fn.h11
-rw-r--r--drivers/infiniband/hw/ipath/ipath_common.h23
-rw-r--r--drivers/infiniband/hw/ipath/ipath_cq.c38
-rw-r--r--drivers/infiniband/hw/ipath/ipath_debug.h1
-rw-r--r--drivers/infiniband/hw/ipath/ipath_diag.c11
-rw-r--r--drivers/infiniband/hw/ipath/ipath_dma.c6
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c123
-rw-r--r--drivers/infiniband/hw/ipath/ipath_eeprom.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c287
-rw-r--r--drivers/infiniband/hw/ipath/ipath_fs.c16
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6110.c154
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6120.c75
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c86
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c100
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h10
-rw-r--r--drivers/infiniband/hw/ipath/ipath_keys.c14
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mr.c12
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c133
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c920
-rw-r--r--drivers/infiniband/hw/ipath/ipath_registers.h22
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c63
-rw-r--r--drivers/infiniband/hw/ipath/ipath_stats.c16
-rw-r--r--drivers/infiniband/hw/ipath/ipath_uc.c6
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c8
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c15
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h57
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c10
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c21
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c22
61 files changed, 1879 insertions, 1170 deletions
diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
index 59243d9aedd6..58bc272bd407 100644
--- a/drivers/infiniband/hw/amso1100/c2.c
+++ b/drivers/infiniband/hw/amso1100/c2.c
@@ -439,7 +439,8 @@ static void c2_rx_error(struct c2_port *c2_port, struct c2_element *elem)
439 } 439 }
440 440
441 /* Setup the skb for reuse since we're dropping this pkt */ 441 /* Setup the skb for reuse since we're dropping this pkt */
442 elem->skb->tail = elem->skb->data = elem->skb->head; 442 elem->skb->data = elem->skb->head;
443 skb_reset_tail_pointer(elem->skb);
443 444
444 /* Zero out the rxp hdr in the sk_buff */ 445 /* Zero out the rxp hdr in the sk_buff */
445 memset(elem->skb->data, 0, sizeof(*rxp_hdr)); 446 memset(elem->skb->data, 0, sizeof(*rxp_hdr));
@@ -521,9 +522,8 @@ static void c2_rx_interrupt(struct net_device *netdev)
521 * "sizeof(struct c2_rxp_hdr)". 522 * "sizeof(struct c2_rxp_hdr)".
522 */ 523 */
523 skb->data += sizeof(*rxp_hdr); 524 skb->data += sizeof(*rxp_hdr);
524 skb->tail = skb->data + buflen; 525 skb_set_tail_pointer(skb, buflen);
525 skb->len = buflen; 526 skb->len = buflen;
526 skb->dev = netdev;
527 skb->protocol = eth_type_trans(skb, netdev); 527 skb->protocol = eth_type_trans(skb, netdev);
528 528
529 netif_rx(skb); 529 netif_rx(skb);
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index fef972752912..607c09bf764c 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -796,7 +796,6 @@ int c2_register_device(struct c2_dev *dev)
796 memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6); 796 memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6);
797 dev->ibdev.phys_port_cnt = 1; 797 dev->ibdev.phys_port_cnt = 1;
798 dev->ibdev.dma_device = &dev->pcidev->dev; 798 dev->ibdev.dma_device = &dev->pcidev->dev;
799 dev->ibdev.class_dev.dev = &dev->pcidev->dev;
800 dev->ibdev.query_device = c2_query_device; 799 dev->ibdev.query_device = c2_query_device;
801 dev->ibdev.query_port = c2_query_port; 800 dev->ibdev.query_port = c2_query_port;
802 dev->ibdev.modify_port = c2_modify_port; 801 dev->ibdev.modify_port = c2_modify_port;
diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile
index 0e110f32f128..36b98989b15e 100644
--- a/drivers/infiniband/hw/cxgb3/Makefile
+++ b/drivers/infiniband/hw/cxgb3/Makefile
@@ -8,5 +8,4 @@ iw_cxgb3-y := iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \
8 8
9ifdef CONFIG_INFINIBAND_CXGB3_DEBUG 9ifdef CONFIG_INFINIBAND_CXGB3_DEBUG
10EXTRA_CFLAGS += -DDEBUG 10EXTRA_CFLAGS += -DDEBUG
11iw_cxgb3-y += cxio_dbg.o
12endif 11endif
diff --git a/drivers/infiniband/hw/cxgb3/cxio_dbg.c b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
index 5a7306f5efae..75f7b16a271d 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_dbg.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index 82fa72041989..f5e9aeec6f6e 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
@@ -37,6 +36,7 @@
37#include <linux/sched.h> 36#include <linux/sched.h>
38#include <linux/spinlock.h> 37#include <linux/spinlock.h>
39#include <linux/pci.h> 38#include <linux/pci.h>
39#include <linux/dma-mapping.h>
40 40
41#include "cxio_resource.h" 41#include "cxio_resource.h"
42#include "cxio_hal.h" 42#include "cxio_hal.h"
@@ -46,7 +46,7 @@
46static LIST_HEAD(rdev_list); 46static LIST_HEAD(rdev_list);
47static cxio_hal_ev_callback_func_t cxio_ev_cb = NULL; 47static cxio_hal_ev_callback_func_t cxio_ev_cb = NULL;
48 48
49static inline struct cxio_rdev *cxio_hal_find_rdev_by_name(char *dev_name) 49static struct cxio_rdev *cxio_hal_find_rdev_by_name(char *dev_name)
50{ 50{
51 struct cxio_rdev *rdev; 51 struct cxio_rdev *rdev;
52 52
@@ -56,8 +56,7 @@ static inline struct cxio_rdev *cxio_hal_find_rdev_by_name(char *dev_name)
56 return NULL; 56 return NULL;
57} 57}
58 58
59static inline struct cxio_rdev *cxio_hal_find_rdev_by_t3cdev(struct t3cdev 59static struct cxio_rdev *cxio_hal_find_rdev_by_t3cdev(struct t3cdev *tdev)
60 *tdev)
61{ 60{
62 struct cxio_rdev *rdev; 61 struct cxio_rdev *rdev;
63 62
@@ -119,7 +118,7 @@ int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq,
119 return 0; 118 return 0;
120} 119}
121 120
122static inline int cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid) 121static int cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid)
123{ 122{
124 struct rdma_cq_setup setup; 123 struct rdma_cq_setup setup;
125 setup.id = cqid; 124 setup.id = cqid;
@@ -131,7 +130,7 @@ static inline int cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid)
131 return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); 130 return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
132} 131}
133 132
134int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid) 133static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
135{ 134{
136 u64 sge_cmd; 135 u64 sge_cmd;
137 struct t3_modify_qp_wr *wqe; 136 struct t3_modify_qp_wr *wqe;
@@ -426,7 +425,7 @@ void cxio_flush_hw_cq(struct t3_cq *cq)
426 } 425 }
427} 426}
428 427
429static inline int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq) 428static int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq)
430{ 429{
431 if (CQE_OPCODE(*cqe) == T3_TERMINATE) 430 if (CQE_OPCODE(*cqe) == T3_TERMINATE)
432 return 0; 431 return 0;
@@ -499,9 +498,9 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
499 u64 sge_cmd, ctx0, ctx1; 498 u64 sge_cmd, ctx0, ctx1;
500 u64 base_addr; 499 u64 base_addr;
501 struct t3_modify_qp_wr *wqe; 500 struct t3_modify_qp_wr *wqe;
502 struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_KERNEL); 501 struct sk_buff *skb;
503
504 502
503 skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
505 if (!skb) { 504 if (!skb) {
506 PDBG("%s alloc_skb failed\n", __FUNCTION__); 505 PDBG("%s alloc_skb failed\n", __FUNCTION__);
507 return -ENOMEM; 506 return -ENOMEM;
@@ -509,7 +508,7 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
509 err = cxio_hal_init_ctrl_cq(rdev_p); 508 err = cxio_hal_init_ctrl_cq(rdev_p);
510 if (err) { 509 if (err) {
511 PDBG("%s err %d initializing ctrl_cq\n", __FUNCTION__, err); 510 PDBG("%s err %d initializing ctrl_cq\n", __FUNCTION__, err);
512 return err; 511 goto err;
513 } 512 }
514 rdev_p->ctrl_qp.workq = dma_alloc_coherent( 513 rdev_p->ctrl_qp.workq = dma_alloc_coherent(
515 &(rdev_p->rnic_info.pdev->dev), 514 &(rdev_p->rnic_info.pdev->dev),
@@ -519,7 +518,8 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
519 GFP_KERNEL); 518 GFP_KERNEL);
520 if (!rdev_p->ctrl_qp.workq) { 519 if (!rdev_p->ctrl_qp.workq) {
521 PDBG("%s dma_alloc_coherent failed\n", __FUNCTION__); 520 PDBG("%s dma_alloc_coherent failed\n", __FUNCTION__);
522 return -ENOMEM; 521 err = -ENOMEM;
522 goto err;
523 } 523 }
524 pci_unmap_addr_set(&rdev_p->ctrl_qp, mapping, 524 pci_unmap_addr_set(&rdev_p->ctrl_qp, mapping,
525 rdev_p->ctrl_qp.dma_addr); 525 rdev_p->ctrl_qp.dma_addr);
@@ -557,6 +557,9 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
557 rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2); 557 rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2);
558 skb->priority = CPL_PRIORITY_CONTROL; 558 skb->priority = CPL_PRIORITY_CONTROL;
559 return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb)); 559 return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb));
560err:
561 kfree_skb(skb);
562 return err;
560} 563}
561 564
562static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p) 565static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p)
@@ -761,17 +764,6 @@ ret:
761 return err; 764 return err;
762} 765}
763 766
764/* IN : stag key, pdid, pbl_size
765 * Out: stag index, actaul pbl_size, and pbl_addr allocated.
766 */
767int cxio_allocate_stag(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid,
768 enum tpt_mem_perm perm, u32 * pbl_size, u32 * pbl_addr)
769{
770 *stag = T3_STAG_UNSET;
771 return (__cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_NON_SHARED_MR,
772 perm, 0, 0ULL, 0, 0, NULL, pbl_size, pbl_addr));
773}
774
775int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, 767int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
776 enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, 768 enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
777 u8 page_size, __be64 *pbl, u32 *pbl_size, 769 u8 page_size, __be64 *pbl, u32 *pbl_size,
@@ -1030,7 +1022,7 @@ void __exit cxio_hal_exit(void)
1030 cxio_hal_destroy_rhdl_resource(); 1022 cxio_hal_destroy_rhdl_resource();
1031} 1023}
1032 1024
1033static inline void flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq) 1025static void flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq)
1034{ 1026{
1035 struct t3_swsq *sqp; 1027 struct t3_swsq *sqp;
1036 __u32 ptr = wq->sq_rptr; 1028 __u32 ptr = wq->sq_rptr;
@@ -1059,9 +1051,8 @@ static inline void flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq)
1059 break; 1051 break;
1060} 1052}
1061 1053
1062static inline void create_read_req_cqe(struct t3_wq *wq, 1054static void create_read_req_cqe(struct t3_wq *wq, struct t3_cqe *hw_cqe,
1063 struct t3_cqe *hw_cqe, 1055 struct t3_cqe *read_cqe)
1064 struct t3_cqe *read_cqe)
1065{ 1056{
1066 read_cqe->u.scqe.wrid_hi = wq->oldest_read->sq_wptr; 1057 read_cqe->u.scqe.wrid_hi = wq->oldest_read->sq_wptr;
1067 read_cqe->len = wq->oldest_read->read_len; 1058 read_cqe->len = wq->oldest_read->read_len;
@@ -1074,7 +1065,7 @@ static inline void create_read_req_cqe(struct t3_wq *wq,
1074/* 1065/*
1075 * Return a ptr to the next read wr in the SWSQ or NULL. 1066 * Return a ptr to the next read wr in the SWSQ or NULL.
1076 */ 1067 */
1077static inline void advance_oldest_read(struct t3_wq *wq) 1068static void advance_oldest_read(struct t3_wq *wq)
1078{ 1069{
1079 1070
1080 u32 rptr = wq->oldest_read - wq->sq + 1; 1071 u32 rptr = wq->oldest_read - wq->sq + 1;
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
index 1b97e80b8780..99543d634704 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
@@ -144,7 +143,6 @@ int cxio_rdev_open(struct cxio_rdev *rdev);
144void cxio_rdev_close(struct cxio_rdev *rdev); 143void cxio_rdev_close(struct cxio_rdev *rdev);
145int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq, 144int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq,
146 enum t3_cq_opcode op, u32 credit); 145 enum t3_cq_opcode op, u32 credit);
147int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev, u32 qpid);
148int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq); 146int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
149int cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq); 147int cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
150int cxio_resize_cq(struct cxio_rdev *rdev, struct t3_cq *cq); 148int cxio_resize_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
@@ -155,8 +153,6 @@ int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq,
155int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq, 153int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq,
156 struct cxio_ucontext *uctx); 154 struct cxio_ucontext *uctx);
157int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode); 155int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode);
158int cxio_allocate_stag(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
159 enum tpt_mem_perm perm, u32 * pbl_size, u32 * pbl_addr);
160int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid, 156int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
161 enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, 157 enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
162 u8 page_size, __be64 *pbl, u32 *pbl_size, 158 u8 page_size, __be64 *pbl, u32 *pbl_size,
@@ -172,8 +168,6 @@ int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag);
172int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr); 168int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr);
173void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb); 169void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
174void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb); 170void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
175u32 cxio_hal_get_rhdl(void);
176void cxio_hal_put_rhdl(u32 rhdl);
177u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp); 171u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp);
178void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid); 172void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid);
179int __init cxio_hal_init(void); 173int __init cxio_hal_init(void);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index 997aa32cbf07..d3095ae5bc2e 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
@@ -180,7 +179,7 @@ tpt_err:
180/* 179/*
181 * returns 0 if no resource available 180 * returns 0 if no resource available
182 */ 181 */
183static inline u32 cxio_hal_get_resource(struct kfifo *fifo) 182static u32 cxio_hal_get_resource(struct kfifo *fifo)
184{ 183{
185 u32 entry; 184 u32 entry;
186 if (kfifo_get(fifo, (unsigned char *) &entry, sizeof(u32))) 185 if (kfifo_get(fifo, (unsigned char *) &entry, sizeof(u32)))
@@ -189,21 +188,11 @@ static inline u32 cxio_hal_get_resource(struct kfifo *fifo)
189 return 0; /* fifo emptry */ 188 return 0; /* fifo emptry */
190} 189}
191 190
192static inline void cxio_hal_put_resource(struct kfifo *fifo, u32 entry) 191static void cxio_hal_put_resource(struct kfifo *fifo, u32 entry)
193{ 192{
194 BUG_ON(kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32)) == 0); 193 BUG_ON(kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32)) == 0);
195} 194}
196 195
197u32 cxio_hal_get_rhdl(void)
198{
199 return cxio_hal_get_resource(rhdl_fifo);
200}
201
202void cxio_hal_put_rhdl(u32 rhdl)
203{
204 cxio_hal_put_resource(rhdl_fifo, rhdl);
205}
206
207u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp) 196u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp)
208{ 197{
209 return cxio_hal_get_resource(rscp->tpt_fifo); 198 return cxio_hal_get_resource(rscp->tpt_fifo);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.h b/drivers/infiniband/hw/cxgb3/cxio_resource.h
index a6bbe8370d81..a2703a3d882d 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.h
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
index 103fc42d6976..90d7b8972cb4 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_wr.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
index 4611afa52220..0315c9d9fce9 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.c
+++ b/drivers/infiniband/hw/cxgb3/iwch.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h
index 6517ef85026f..caf4e6007a44 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.h
+++ b/drivers/infiniband/hw/cxgb3/iwch.h
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index a522b1baa3b4..3b4b0acd707f 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
@@ -210,8 +209,7 @@ static enum iwch_ep_state state_read(struct iwch_ep_common *epc)
210 return state; 209 return state;
211} 210}
212 211
213static inline void __state_set(struct iwch_ep_common *epc, 212static void __state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
214 enum iwch_ep_state new)
215{ 213{
216 epc->state = new; 214 epc->state = new;
217} 215}
@@ -307,8 +305,7 @@ static int status2errno(int status)
307 */ 305 */
308static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp) 306static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
309{ 307{
310 if (skb) { 308 if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) {
311 BUG_ON(skb_cloned(skb));
312 skb_trim(skb, 0); 309 skb_trim(skb, 0);
313 skb_get(skb); 310 skb_get(skb);
314 } else { 311 } else {
@@ -480,7 +477,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
480 BUG_ON(skb_cloned(skb)); 477 BUG_ON(skb_cloned(skb));
481 478
482 mpalen = sizeof(*mpa) + ep->plen; 479 mpalen = sizeof(*mpa) + ep->plen;
483 if (skb->data + mpalen + sizeof(*req) > skb->end) { 480 if (skb->data + mpalen + sizeof(*req) > skb_end_pointer(skb)) {
484 kfree_skb(skb); 481 kfree_skb(skb);
485 skb=alloc_skb(mpalen + sizeof(*req), GFP_KERNEL); 482 skb=alloc_skb(mpalen + sizeof(*req), GFP_KERNEL);
486 if (!skb) { 483 if (!skb) {
@@ -510,7 +507,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
510 */ 507 */
511 skb_get(skb); 508 skb_get(skb);
512 set_arp_failure_handler(skb, arp_failure_discard); 509 set_arp_failure_handler(skb, arp_failure_discard);
513 skb->h.raw = skb->data; 510 skb_reset_transport_header(skb);
514 len = skb->len; 511 len = skb->len;
515 req = (struct tx_data_wr *) skb_push(skb, sizeof(*req)); 512 req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
516 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); 513 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
@@ -562,7 +559,7 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
562 skb_get(skb); 559 skb_get(skb);
563 skb->priority = CPL_PRIORITY_DATA; 560 skb->priority = CPL_PRIORITY_DATA;
564 set_arp_failure_handler(skb, arp_failure_discard); 561 set_arp_failure_handler(skb, arp_failure_discard);
565 skb->h.raw = skb->data; 562 skb_reset_transport_header(skb);
566 req = (struct tx_data_wr *) skb_push(skb, sizeof(*req)); 563 req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
567 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); 564 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
568 req->wr_lo = htonl(V_WR_TID(ep->hwtid)); 565 req->wr_lo = htonl(V_WR_TID(ep->hwtid));
@@ -613,7 +610,7 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
613 */ 610 */
614 skb_get(skb); 611 skb_get(skb);
615 set_arp_failure_handler(skb, arp_failure_discard); 612 set_arp_failure_handler(skb, arp_failure_discard);
616 skb->h.raw = skb->data; 613 skb_reset_transport_header(skb);
617 len = skb->len; 614 len = skb->len;
618 req = (struct tx_data_wr *) skb_push(skb, sizeof(*req)); 615 req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
619 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); 616 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
@@ -824,7 +821,8 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
824 /* 821 /*
825 * copy the new data into our accumulation buffer. 822 * copy the new data into our accumulation buffer.
826 */ 823 */
827 memcpy(&(ep->mpa_pkt[ep->mpa_pkt_len]), skb->data, skb->len); 824 skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
825 skb->len);
828 ep->mpa_pkt_len += skb->len; 826 ep->mpa_pkt_len += skb->len;
829 827
830 /* 828 /*
@@ -943,7 +941,8 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
943 /* 941 /*
944 * Copy the new data into our accumulation buffer. 942 * Copy the new data into our accumulation buffer.
945 */ 943 */
946 memcpy(&(ep->mpa_pkt[ep->mpa_pkt_len]), skb->data, skb->len); 944 skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
945 skb->len);
947 ep->mpa_pkt_len += skb->len; 946 ep->mpa_pkt_len += skb->len;
948 947
949 /* 948 /*
@@ -1417,6 +1416,7 @@ static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1417 wake_up(&ep->com.waitq); 1416 wake_up(&ep->com.waitq);
1418 break; 1417 break;
1419 case FPDU_MODE: 1418 case FPDU_MODE:
1419 start_ep_timer(ep);
1420 __state_set(&ep->com, CLOSING); 1420 __state_set(&ep->com, CLOSING);
1421 attrs.next_state = IWCH_QP_STATE_CLOSING; 1421 attrs.next_state = IWCH_QP_STATE_CLOSING;
1422 iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, 1422 iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
@@ -1427,7 +1427,6 @@ static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1427 disconnect = 0; 1427 disconnect = 0;
1428 break; 1428 break;
1429 case CLOSING: 1429 case CLOSING:
1430 start_ep_timer(ep);
1431 __state_set(&ep->com, MORIBUND); 1430 __state_set(&ep->com, MORIBUND);
1432 disconnect = 0; 1431 disconnect = 0;
1433 break; 1432 break;
@@ -1460,7 +1459,7 @@ static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1460/* 1459/*
1461 * Returns whether an ABORT_REQ_RSS message is a negative advice. 1460 * Returns whether an ABORT_REQ_RSS message is a negative advice.
1462 */ 1461 */
1463static inline int is_neg_adv_abort(unsigned int status) 1462static int is_neg_adv_abort(unsigned int status)
1464{ 1463{
1465 return status == CPL_ERR_RTX_NEG_ADVICE || 1464 return status == CPL_ERR_RTX_NEG_ADVICE ||
1466 status == CPL_ERR_PERSIST_NEG_ADVICE; 1465 status == CPL_ERR_PERSIST_NEG_ADVICE;
@@ -1489,8 +1488,10 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1489 case CONNECTING: 1488 case CONNECTING:
1490 break; 1489 break;
1491 case MPA_REQ_WAIT: 1490 case MPA_REQ_WAIT:
1491 stop_ep_timer(ep);
1492 break; 1492 break;
1493 case MPA_REQ_SENT: 1493 case MPA_REQ_SENT:
1494 stop_ep_timer(ep);
1494 connect_reply_upcall(ep, -ECONNRESET); 1495 connect_reply_upcall(ep, -ECONNRESET);
1495 break; 1496 break;
1496 case MPA_REP_SENT: 1497 case MPA_REP_SENT:
@@ -1509,9 +1510,10 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1509 get_ep(&ep->com); 1510 get_ep(&ep->com);
1510 break; 1511 break;
1511 case MORIBUND: 1512 case MORIBUND:
1513 case CLOSING:
1512 stop_ep_timer(ep); 1514 stop_ep_timer(ep);
1515 /*FALLTHROUGH*/
1513 case FPDU_MODE: 1516 case FPDU_MODE:
1514 case CLOSING:
1515 if (ep->com.cm_id && ep->com.qp) { 1517 if (ep->com.cm_id && ep->com.qp) {
1516 attrs.next_state = IWCH_QP_STATE_ERROR; 1518 attrs.next_state = IWCH_QP_STATE_ERROR;
1517 ret = iwch_modify_qp(ep->com.qp->rhp, 1519 ret = iwch_modify_qp(ep->com.qp->rhp,
@@ -1572,7 +1574,6 @@ static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1572 spin_lock_irqsave(&ep->com.lock, flags); 1574 spin_lock_irqsave(&ep->com.lock, flags);
1573 switch (ep->com.state) { 1575 switch (ep->com.state) {
1574 case CLOSING: 1576 case CLOSING:
1575 start_ep_timer(ep);
1576 __state_set(&ep->com, MORIBUND); 1577 __state_set(&ep->com, MORIBUND);
1577 break; 1578 break;
1578 case MORIBUND: 1579 case MORIBUND:
@@ -1588,6 +1589,8 @@ static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1588 __state_set(&ep->com, DEAD); 1589 __state_set(&ep->com, DEAD);
1589 release = 1; 1590 release = 1;
1590 break; 1591 break;
1592 case ABORTING:
1593 break;
1591 case DEAD: 1594 case DEAD:
1592 default: 1595 default:
1593 BUG_ON(1); 1596 BUG_ON(1);
@@ -1618,7 +1621,8 @@ static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1618 PDBG("%s ep %p\n", __FUNCTION__, ep); 1621 PDBG("%s ep %p\n", __FUNCTION__, ep);
1619 skb_pull(skb, sizeof(struct cpl_rdma_terminate)); 1622 skb_pull(skb, sizeof(struct cpl_rdma_terminate));
1620 PDBG("%s saving %d bytes of term msg\n", __FUNCTION__, skb->len); 1623 PDBG("%s saving %d bytes of term msg\n", __FUNCTION__, skb->len);
1621 memcpy(ep->com.qp->attr.terminate_buffer, skb->data, skb->len); 1624 skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer,
1625 skb->len);
1622 ep->com.qp->attr.terminate_msg_len = skb->len; 1626 ep->com.qp->attr.terminate_msg_len = skb->len;
1623 ep->com.qp->attr.is_terminate_local = 0; 1627 ep->com.qp->attr.is_terminate_local = 0;
1624 return CPL_RET_BUF_DONE; 1628 return CPL_RET_BUF_DONE;
@@ -1636,6 +1640,7 @@ static int ec_status(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1636 1640
1637 printk(KERN_ERR MOD "%s BAD CLOSE - Aborting tid %u\n", 1641 printk(KERN_ERR MOD "%s BAD CLOSE - Aborting tid %u\n",
1638 __FUNCTION__, ep->hwtid); 1642 __FUNCTION__, ep->hwtid);
1643 stop_ep_timer(ep);
1639 attrs.next_state = IWCH_QP_STATE_ERROR; 1644 attrs.next_state = IWCH_QP_STATE_ERROR;
1640 iwch_modify_qp(ep->com.qp->rhp, 1645 iwch_modify_qp(ep->com.qp->rhp,
1641 ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, 1646 ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
@@ -1660,6 +1665,7 @@ static void ep_timeout(unsigned long arg)
1660 break; 1665 break;
1661 case MPA_REQ_WAIT: 1666 case MPA_REQ_WAIT:
1662 break; 1667 break;
1668 case CLOSING:
1663 case MORIBUND: 1669 case MORIBUND:
1664 if (ep->com.cm_id && ep->com.qp) { 1670 if (ep->com.cm_id && ep->com.qp) {
1665 attrs.next_state = IWCH_QP_STATE_ERROR; 1671 attrs.next_state = IWCH_QP_STATE_ERROR;
@@ -1688,12 +1694,11 @@ int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
1688 return -ECONNRESET; 1694 return -ECONNRESET;
1689 } 1695 }
1690 BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); 1696 BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
1691 state_set(&ep->com, CLOSING);
1692 if (mpa_rev == 0) 1697 if (mpa_rev == 0)
1693 abort_connection(ep, NULL, GFP_KERNEL); 1698 abort_connection(ep, NULL, GFP_KERNEL);
1694 else { 1699 else {
1695 err = send_mpa_reject(ep, pdata, pdata_len); 1700 err = send_mpa_reject(ep, pdata, pdata_len);
1696 err = send_halfclose(ep, GFP_KERNEL); 1701 err = iwch_ep_disconnect(ep, 0, GFP_KERNEL);
1697 } 1702 }
1698 return 0; 1703 return 0;
1699} 1704}
@@ -1958,11 +1963,11 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
1958 case MPA_REQ_RCVD: 1963 case MPA_REQ_RCVD:
1959 case MPA_REP_SENT: 1964 case MPA_REP_SENT:
1960 case FPDU_MODE: 1965 case FPDU_MODE:
1966 start_ep_timer(ep);
1961 ep->com.state = CLOSING; 1967 ep->com.state = CLOSING;
1962 close = 1; 1968 close = 1;
1963 break; 1969 break;
1964 case CLOSING: 1970 case CLOSING:
1965 start_ep_timer(ep);
1966 ep->com.state = MORIBUND; 1971 ep->com.state = MORIBUND;
1967 close = 1; 1972 close = 1;
1968 break; 1973 break;
@@ -2024,6 +2029,17 @@ static int sched(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
2024 return 0; 2029 return 0;
2025} 2030}
2026 2031
2032static int set_tcb_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
2033{
2034 struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
2035
2036 if (rpl->status != CPL_ERR_NONE) {
2037 printk(KERN_ERR MOD "Unexpected SET_TCB_RPL status %u "
2038 "for tid %u\n", rpl->status, GET_TID(rpl));
2039 }
2040 return CPL_RET_BUF_DONE;
2041}
2042
2027int __init iwch_cm_init(void) 2043int __init iwch_cm_init(void)
2028{ 2044{
2029 skb_queue_head_init(&rxq); 2045 skb_queue_head_init(&rxq);
@@ -2051,6 +2067,7 @@ int __init iwch_cm_init(void)
2051 t3c_handlers[CPL_ABORT_REQ_RSS] = sched; 2067 t3c_handlers[CPL_ABORT_REQ_RSS] = sched;
2052 t3c_handlers[CPL_RDMA_TERMINATE] = sched; 2068 t3c_handlers[CPL_RDMA_TERMINATE] = sched;
2053 t3c_handlers[CPL_RDMA_EC_STATUS] = sched; 2069 t3c_handlers[CPL_RDMA_EC_STATUS] = sched;
2070 t3c_handlers[CPL_SET_TCB_RPL] = set_tcb_rpl;
2054 2071
2055 /* 2072 /*
2056 * These are the real handlers that are called from a 2073 * These are the real handlers that are called from a
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
index 7c810d904279..0c6f281bd4a0 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
index 98b3bdb5de9e..d7624c170ee7 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c
index a6efa8fe15d8..b40676662a8a 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_ev.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_ev.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
@@ -48,12 +47,6 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp,
48 struct iwch_qp_attributes attrs; 47 struct iwch_qp_attributes attrs;
49 struct iwch_qp *qhp; 48 struct iwch_qp *qhp;
50 49
51 printk(KERN_ERR "%s - AE qpid 0x%x opcode %d status 0x%x "
52 "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __FUNCTION__,
53 CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe),
54 CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
55 CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
56
57 spin_lock(&rnicp->lock); 50 spin_lock(&rnicp->lock);
58 qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe)); 51 qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe));
59 52
@@ -74,6 +67,12 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp,
74 return; 67 return;
75 } 68 }
76 69
70 printk(KERN_ERR "%s - AE qpid 0x%x opcode %d status 0x%x "
71 "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __FUNCTION__,
72 CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe),
73 CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
74 CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
75
77 atomic_inc(&qhp->refcnt); 76 atomic_inc(&qhp->refcnt);
78 spin_unlock(&rnicp->lock); 77 spin_unlock(&rnicp->lock);
79 78
diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c
index 2b6cd53bb3fc..a6c2c4ba29e6 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_mem.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 6861087d776c..af28a317016d 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
@@ -332,6 +331,7 @@ static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
332 int ret = 0; 331 int ret = 0;
333 struct iwch_mm_entry *mm; 332 struct iwch_mm_entry *mm;
334 struct iwch_ucontext *ucontext; 333 struct iwch_ucontext *ucontext;
334 u64 addr;
335 335
336 PDBG("%s pgoff 0x%lx key 0x%x len %d\n", __FUNCTION__, vma->vm_pgoff, 336 PDBG("%s pgoff 0x%lx key 0x%x len %d\n", __FUNCTION__, vma->vm_pgoff,
337 key, len); 337 key, len);
@@ -346,10 +346,11 @@ static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
346 mm = remove_mmap(ucontext, key, len); 346 mm = remove_mmap(ucontext, key, len);
347 if (!mm) 347 if (!mm)
348 return -EINVAL; 348 return -EINVAL;
349 addr = mm->addr;
349 kfree(mm); 350 kfree(mm);
350 351
351 if ((mm->addr >= rdev_p->rnic_info.udbell_physbase) && 352 if ((addr >= rdev_p->rnic_info.udbell_physbase) &&
352 (mm->addr < (rdev_p->rnic_info.udbell_physbase + 353 (addr < (rdev_p->rnic_info.udbell_physbase +
353 rdev_p->rnic_info.udbell_len))) { 354 rdev_p->rnic_info.udbell_len))) {
354 355
355 /* 356 /*
@@ -363,7 +364,7 @@ static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
363 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; 364 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
364 vma->vm_flags &= ~VM_MAYREAD; 365 vma->vm_flags &= ~VM_MAYREAD;
365 ret = io_remap_pfn_range(vma, vma->vm_start, 366 ret = io_remap_pfn_range(vma, vma->vm_start,
366 mm->addr >> PAGE_SHIFT, 367 addr >> PAGE_SHIFT,
367 len, vma->vm_page_prot); 368 len, vma->vm_page_prot);
368 } else { 369 } else {
369 370
@@ -371,7 +372,7 @@ static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
371 * Map WQ or CQ contig dma memory... 372 * Map WQ or CQ contig dma memory...
372 */ 373 */
373 ret = remap_pfn_range(vma, vma->vm_start, 374 ret = remap_pfn_range(vma, vma->vm_start,
374 mm->addr >> PAGE_SHIFT, 375 addr >> PAGE_SHIFT,
375 len, vma->vm_page_prot); 376 len, vma->vm_page_prot);
376 } 377 }
377 378
@@ -464,9 +465,6 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
464 php = to_iwch_pd(pd); 465 php = to_iwch_pd(pd);
465 rhp = php->rhp; 466 rhp = php->rhp;
466 467
467 acc = iwch_convert_access(acc);
468
469
470 mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); 468 mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
471 if (!mhp) 469 if (!mhp)
472 return ERR_PTR(-ENOMEM); 470 return ERR_PTR(-ENOMEM);
@@ -492,12 +490,7 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
492 mhp->attr.pdid = php->pdid; 490 mhp->attr.pdid = php->pdid;
493 mhp->attr.zbva = 0; 491 mhp->attr.zbva = 0;
494 492
495 /* NOTE: TPT perms are backwards from BIND WR perms! */ 493 mhp->attr.perms = iwch_ib_to_tpt_access(acc);
496 mhp->attr.perms = (acc & 0x1) << 3;
497 mhp->attr.perms |= (acc & 0x2) << 1;
498 mhp->attr.perms |= (acc & 0x4) >> 1;
499 mhp->attr.perms |= (acc & 0x8) >> 3;
500
501 mhp->attr.va_fbo = *iova_start; 494 mhp->attr.va_fbo = *iova_start;
502 mhp->attr.page_size = shift - 12; 495 mhp->attr.page_size = shift - 12;
503 496
@@ -526,7 +519,6 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
526 struct iwch_mr mh, *mhp; 519 struct iwch_mr mh, *mhp;
527 struct iwch_pd *php; 520 struct iwch_pd *php;
528 struct iwch_dev *rhp; 521 struct iwch_dev *rhp;
529 int new_acc;
530 __be64 *page_list = NULL; 522 __be64 *page_list = NULL;
531 int shift = 0; 523 int shift = 0;
532 u64 total_size; 524 u64 total_size;
@@ -547,19 +539,20 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
547 if (rhp != php->rhp) 539 if (rhp != php->rhp)
548 return -EINVAL; 540 return -EINVAL;
549 541
550 new_acc = mhp->attr.perms;
551
552 memcpy(&mh, mhp, sizeof *mhp); 542 memcpy(&mh, mhp, sizeof *mhp);
553 543
554 if (mr_rereg_mask & IB_MR_REREG_PD) 544 if (mr_rereg_mask & IB_MR_REREG_PD)
555 php = to_iwch_pd(pd); 545 php = to_iwch_pd(pd);
556 if (mr_rereg_mask & IB_MR_REREG_ACCESS) 546 if (mr_rereg_mask & IB_MR_REREG_ACCESS)
557 mh.attr.perms = iwch_convert_access(acc); 547 mh.attr.perms = iwch_ib_to_tpt_access(acc);
558 if (mr_rereg_mask & IB_MR_REREG_TRANS) 548 if (mr_rereg_mask & IB_MR_REREG_TRANS) {
559 ret = build_phys_page_list(buffer_list, num_phys_buf, 549 ret = build_phys_page_list(buffer_list, num_phys_buf,
560 iova_start, 550 iova_start,
561 &total_size, &npages, 551 &total_size, &npages,
562 &shift, &page_list); 552 &shift, &page_list);
553 if (ret)
554 return ret;
555 }
563 556
564 ret = iwch_reregister_mem(rhp, php, &mh, shift, page_list, npages); 557 ret = iwch_reregister_mem(rhp, php, &mh, shift, page_list, npages);
565 kfree(page_list); 558 kfree(page_list);
@@ -569,7 +562,7 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
569 if (mr_rereg_mask & IB_MR_REREG_PD) 562 if (mr_rereg_mask & IB_MR_REREG_PD)
570 mhp->attr.pdid = php->pdid; 563 mhp->attr.pdid = php->pdid;
571 if (mr_rereg_mask & IB_MR_REREG_ACCESS) 564 if (mr_rereg_mask & IB_MR_REREG_ACCESS)
572 mhp->attr.perms = acc; 565 mhp->attr.perms = iwch_ib_to_tpt_access(acc);
573 if (mr_rereg_mask & IB_MR_REREG_TRANS) { 566 if (mr_rereg_mask & IB_MR_REREG_TRANS) {
574 mhp->attr.zbva = 0; 567 mhp->attr.zbva = 0;
575 mhp->attr.va_fbo = *iova_start; 568 mhp->attr.va_fbo = *iova_start;
@@ -614,8 +607,6 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
614 goto err; 607 goto err;
615 } 608 }
616 609
617 acc = iwch_convert_access(acc);
618
619 i = n = 0; 610 i = n = 0;
620 611
621 list_for_each_entry(chunk, &region->chunk_list, list) 612 list_for_each_entry(chunk, &region->chunk_list, list)
@@ -631,10 +622,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
631 mhp->rhp = rhp; 622 mhp->rhp = rhp;
632 mhp->attr.pdid = php->pdid; 623 mhp->attr.pdid = php->pdid;
633 mhp->attr.zbva = 0; 624 mhp->attr.zbva = 0;
634 mhp->attr.perms = (acc & 0x1) << 3; 625 mhp->attr.perms = iwch_ib_to_tpt_access(acc);
635 mhp->attr.perms |= (acc & 0x2) << 1;
636 mhp->attr.perms |= (acc & 0x4) >> 1;
637 mhp->attr.perms |= (acc & 0x8) >> 3;
638 mhp->attr.va_fbo = region->virt_base; 626 mhp->attr.va_fbo = region->virt_base;
639 mhp->attr.page_size = shift - 12; 627 mhp->attr.page_size = shift - 12;
640 mhp->attr.len = (u32) region->length; 628 mhp->attr.len = (u32) region->length;
@@ -737,10 +725,8 @@ static int iwch_destroy_qp(struct ib_qp *ib_qp)
737 qhp = to_iwch_qp(ib_qp); 725 qhp = to_iwch_qp(ib_qp);
738 rhp = qhp->rhp; 726 rhp = qhp->rhp;
739 727
740 if (qhp->attr.state == IWCH_QP_STATE_RTS) { 728 attrs.next_state = IWCH_QP_STATE_ERROR;
741 attrs.next_state = IWCH_QP_STATE_ERROR; 729 iwch_modify_qp(rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 0);
742 iwch_modify_qp(rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 0);
743 }
744 wait_event(qhp->wait, !qhp->ep); 730 wait_event(qhp->wait, !qhp->ep);
745 731
746 remove_handle(rhp, &rhp->qpidr, qhp->wq.qpid); 732 remove_handle(rhp, &rhp->qpidr, qhp->wq.qpid);
@@ -949,7 +935,7 @@ void iwch_qp_rem_ref(struct ib_qp *qp)
949 wake_up(&(to_iwch_qp(qp)->wait)); 935 wake_up(&(to_iwch_qp(qp)->wait));
950} 936}
951 937
952struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn) 938static struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn)
953{ 939{
954 PDBG("%s ib_dev %p qpn 0x%x\n", __FUNCTION__, dev, qpn); 940 PDBG("%s ib_dev %p qpn 0x%x\n", __FUNCTION__, dev, qpn);
955 return (struct ib_qp *)get_qhp(to_iwch_dev(dev), qpn); 941 return (struct ib_qp *)get_qhp(to_iwch_dev(dev), qpn);
@@ -1122,7 +1108,6 @@ int iwch_register_device(struct iwch_dev *dev)
1122 memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC)); 1108 memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC));
1123 dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports; 1109 dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports;
1124 dev->ibdev.dma_device = &(dev->rdev.rnic_info.pdev->dev); 1110 dev->ibdev.dma_device = &(dev->rdev.rnic_info.pdev->dev);
1125 dev->ibdev.class_dev.dev = &(dev->rdev.rnic_info.pdev->dev);
1126 dev->ibdev.query_device = iwch_query_device; 1111 dev->ibdev.query_device = iwch_query_device;
1127 dev->ibdev.query_port = iwch_query_port; 1112 dev->ibdev.query_port = iwch_query_port;
1128 dev->ibdev.modify_port = iwch_modify_port; 1113 dev->ibdev.modify_port = iwch_modify_port;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 61e3278fd7a8..93bcc56756bd 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
@@ -179,7 +178,6 @@ static inline struct iwch_qp *to_iwch_qp(struct ib_qp *ibqp)
179 178
180void iwch_qp_add_ref(struct ib_qp *qp); 179void iwch_qp_add_ref(struct ib_qp *qp);
181void iwch_qp_rem_ref(struct ib_qp *qp); 180void iwch_qp_rem_ref(struct ib_qp *qp);
182struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn);
183 181
184struct iwch_ucontext { 182struct iwch_ucontext {
185 struct ib_ucontext ibucontext; 183 struct ib_ucontext ibucontext;
@@ -288,27 +286,20 @@ static inline int iwch_convert_state(enum ib_qp_state ib_state)
288 } 286 }
289} 287}
290 288
291enum iwch_mem_perms { 289static inline u32 iwch_ib_to_tpt_access(int acc)
292 IWCH_MEM_ACCESS_LOCAL_READ = 1 << 0,
293 IWCH_MEM_ACCESS_LOCAL_WRITE = 1 << 1,
294 IWCH_MEM_ACCESS_REMOTE_READ = 1 << 2,
295 IWCH_MEM_ACCESS_REMOTE_WRITE = 1 << 3,
296 IWCH_MEM_ACCESS_ATOMICS = 1 << 4,
297 IWCH_MEM_ACCESS_BINDING = 1 << 5,
298 IWCH_MEM_ACCESS_LOCAL =
299 (IWCH_MEM_ACCESS_LOCAL_READ | IWCH_MEM_ACCESS_LOCAL_WRITE),
300 IWCH_MEM_ACCESS_REMOTE =
301 (IWCH_MEM_ACCESS_REMOTE_WRITE | IWCH_MEM_ACCESS_REMOTE_READ)
302 /* cannot go beyond 1 << 31 */
303} __attribute__ ((packed));
304
305static inline u32 iwch_convert_access(int acc)
306{ 290{
307 return (acc & IB_ACCESS_REMOTE_WRITE ? IWCH_MEM_ACCESS_REMOTE_WRITE : 0) 291 return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) |
308 | (acc & IB_ACCESS_REMOTE_READ ? IWCH_MEM_ACCESS_REMOTE_READ : 0) | 292 (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0) |
309 (acc & IB_ACCESS_LOCAL_WRITE ? IWCH_MEM_ACCESS_LOCAL_WRITE : 0) | 293 (acc & IB_ACCESS_LOCAL_WRITE ? TPT_LOCAL_WRITE : 0) |
310 (acc & IB_ACCESS_MW_BIND ? IWCH_MEM_ACCESS_BINDING : 0) | 294 TPT_LOCAL_READ;
311 IWCH_MEM_ACCESS_LOCAL_READ; 295}
296
297static inline u32 iwch_ib_to_mwbind_access(int acc)
298{
299 return (acc & IB_ACCESS_REMOTE_WRITE ? T3_MEM_ACCESS_REM_WRITE : 0) |
300 (acc & IB_ACCESS_REMOTE_READ ? T3_MEM_ACCESS_REM_READ : 0) |
301 (acc & IB_ACCESS_LOCAL_WRITE ? T3_MEM_ACCESS_LOCAL_WRITE : 0) |
302 T3_MEM_ACCESS_LOCAL_READ;
312} 303}
313 304
314enum iwch_mmid_state { 305enum iwch_mmid_state {
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index e066727504b6..0a472c9b44db 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
@@ -37,8 +36,8 @@
37 36
38#define NO_SUPPORT -1 37#define NO_SUPPORT -1
39 38
40static inline int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, 39static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
41 u8 * flit_cnt) 40 u8 * flit_cnt)
42{ 41{
43 int i; 42 int i;
44 u32 plen; 43 u32 plen;
@@ -97,8 +96,8 @@ static inline int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
97 return 0; 96 return 0;
98} 97}
99 98
100static inline int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, 99static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
101 u8 *flit_cnt) 100 u8 *flit_cnt)
102{ 101{
103 int i; 102 int i;
104 u32 plen; 103 u32 plen;
@@ -138,8 +137,8 @@ static inline int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
138 return 0; 137 return 0;
139} 138}
140 139
141static inline int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, 140static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
142 u8 *flit_cnt) 141 u8 *flit_cnt)
143{ 142{
144 if (wr->num_sge > 1) 143 if (wr->num_sge > 1)
145 return -EINVAL; 144 return -EINVAL;
@@ -159,9 +158,8 @@ static inline int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
159/* 158/*
160 * TBD: this is going to be moved to firmware. Missing pdid/qpid check for now. 159 * TBD: this is going to be moved to firmware. Missing pdid/qpid check for now.
161 */ 160 */
162static inline int iwch_sgl2pbl_map(struct iwch_dev *rhp, 161static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
163 struct ib_sge *sg_list, u32 num_sgle, 162 u32 num_sgle, u32 * pbl_addr, u8 * page_size)
164 u32 * pbl_addr, u8 * page_size)
165{ 163{
166 int i; 164 int i;
167 struct iwch_mr *mhp; 165 struct iwch_mr *mhp;
@@ -207,9 +205,8 @@ static inline int iwch_sgl2pbl_map(struct iwch_dev *rhp,
207 return 0; 205 return 0;
208} 206}
209 207
210static inline int iwch_build_rdma_recv(struct iwch_dev *rhp, 208static int iwch_build_rdma_recv(struct iwch_dev *rhp, union t3_wr *wqe,
211 union t3_wr *wqe, 209 struct ib_recv_wr *wr)
212 struct ib_recv_wr *wr)
213{ 210{
214 int i, err = 0; 211 int i, err = 0;
215 u32 pbl_addr[4]; 212 u32 pbl_addr[4];
@@ -442,7 +439,7 @@ int iwch_bind_mw(struct ib_qp *qp,
442 wqe->bind.type = T3_VA_BASED_TO; 439 wqe->bind.type = T3_VA_BASED_TO;
443 440
444 /* TBD: check perms */ 441 /* TBD: check perms */
445 wqe->bind.perms = iwch_convert_access(mw_bind->mw_access_flags); 442 wqe->bind.perms = iwch_ib_to_mwbind_access(mw_bind->mw_access_flags);
446 wqe->bind.mr_stag = cpu_to_be32(mw_bind->mr->lkey); 443 wqe->bind.mr_stag = cpu_to_be32(mw_bind->mr->lkey);
447 wqe->bind.mw_stag = cpu_to_be32(mw->rkey); 444 wqe->bind.mw_stag = cpu_to_be32(mw->rkey);
448 wqe->bind.mw_len = cpu_to_be32(mw_bind->length); 445 wqe->bind.mw_len = cpu_to_be32(mw_bind->length);
@@ -474,8 +471,7 @@ int iwch_bind_mw(struct ib_qp *qp,
474 return err; 471 return err;
475} 472}
476 473
477static inline void build_term_codes(int t3err, u8 *layer_type, u8 *ecode, 474static void build_term_codes(int t3err, u8 *layer_type, u8 *ecode, int tagged)
478 int tagged)
479{ 475{
480 switch (t3err) { 476 switch (t3err) {
481 case TPT_ERR_STAG: 477 case TPT_ERR_STAG:
@@ -673,7 +669,7 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag)
673 spin_lock_irqsave(&qhp->lock, *flag); 669 spin_lock_irqsave(&qhp->lock, *flag);
674} 670}
675 671
676static inline void flush_qp(struct iwch_qp *qhp, unsigned long *flag) 672static void flush_qp(struct iwch_qp *qhp, unsigned long *flag)
677{ 673{
678 if (t3b_device(qhp->rhp)) 674 if (t3b_device(qhp->rhp))
679 cxio_set_wq_in_error(&qhp->wq); 675 cxio_set_wq_in_error(&qhp->wq);
@@ -685,7 +681,7 @@ static inline void flush_qp(struct iwch_qp *qhp, unsigned long *flag)
685/* 681/*
686 * Return non zero if at least one RECV was pre-posted. 682 * Return non zero if at least one RECV was pre-posted.
687 */ 683 */
688static inline int rqes_posted(struct iwch_qp *qhp) 684static int rqes_posted(struct iwch_qp *qhp)
689{ 685{
690 return fw_riwrh_opcode((struct fw_riwrh *)qhp->wq.queue) == T3_WR_RCV; 686 return fw_riwrh_opcode((struct fw_riwrh *)qhp->wq.queue) == T3_WR_RCV;
691} 687}
@@ -846,6 +842,8 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
846 break; 842 break;
847 case IWCH_QP_STATE_TERMINATE: 843 case IWCH_QP_STATE_TERMINATE:
848 qhp->attr.state = IWCH_QP_STATE_TERMINATE; 844 qhp->attr.state = IWCH_QP_STATE_TERMINATE;
845 if (t3b_device(qhp->rhp))
846 cxio_set_wq_in_error(&qhp->wq);
849 if (!internal) 847 if (!internal)
850 terminate = 1; 848 terminate = 1;
851 break; 849 break;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_user.h b/drivers/infiniband/hw/cxgb3/iwch_user.h
index c4e7fbea8bbd..cb7086f558c1 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_user.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_user.h
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved. 2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/ehca/Kconfig b/drivers/infiniband/hw/ehca/Kconfig
index 727b10d89686..1a854598e0e6 100644
--- a/drivers/infiniband/hw/ehca/Kconfig
+++ b/drivers/infiniband/hw/ehca/Kconfig
@@ -7,11 +7,3 @@ config INFINIBAND_EHCA
7 To compile the driver as a module, choose M here. The module 7 To compile the driver as a module, choose M here. The module
8 will be called ib_ehca. 8 will be called ib_ehca.
9 9
10config INFINIBAND_EHCA_SCALING
11 bool "Scaling support (EXPERIMENTAL)"
12 depends on IBMEBUS && INFINIBAND_EHCA && HOTPLUG_CPU && EXPERIMENTAL
13 default y
14 ---help---
15 eHCA scaling support schedules the CQ callbacks to different CPUs.
16
17 To enable this feature choose Y here.
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index cf95ee474b0f..10fb8fbafa0c 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -42,8 +42,6 @@
42#ifndef __EHCA_CLASSES_H__ 42#ifndef __EHCA_CLASSES_H__
43#define __EHCA_CLASSES_H__ 43#define __EHCA_CLASSES_H__
44 44
45#include "ehca_classes.h"
46#include "ipz_pt_fn.h"
47 45
48struct ehca_module; 46struct ehca_module;
49struct ehca_qp; 47struct ehca_qp;
@@ -54,15 +52,25 @@ struct ehca_mw;
54struct ehca_pd; 52struct ehca_pd;
55struct ehca_av; 53struct ehca_av;
56 54
57#ifdef CONFIG_PPC64 55#include <linux/wait.h>
58#include "ehca_classes_pSeries.h"
59#endif
60 56
61#include <rdma/ib_verbs.h> 57#include <rdma/ib_verbs.h>
62#include <rdma/ib_user_verbs.h> 58#include <rdma/ib_user_verbs.h>
63 59
60#ifdef CONFIG_PPC64
61#include "ehca_classes_pSeries.h"
62#endif
63#include "ipz_pt_fn.h"
64#include "ehca_qes.h"
64#include "ehca_irq.h" 65#include "ehca_irq.h"
65 66
67#define EHCA_EQE_CACHE_SIZE 20
68
69struct ehca_eqe_cache_entry {
70 struct ehca_eqe *eqe;
71 struct ehca_cq *cq;
72};
73
66struct ehca_eq { 74struct ehca_eq {
67 u32 length; 75 u32 length;
68 struct ipz_queue ipz_queue; 76 struct ipz_queue ipz_queue;
@@ -74,6 +82,8 @@ struct ehca_eq {
74 spinlock_t spinlock; 82 spinlock_t spinlock;
75 struct tasklet_struct interrupt_task; 83 struct tasklet_struct interrupt_task;
76 u32 ist; 84 u32 ist;
85 spinlock_t irq_spinlock;
86 struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE];
77}; 87};
78 88
79struct ehca_sport { 89struct ehca_sport {
@@ -96,6 +106,7 @@ struct ehca_shca {
96 struct ehca_mr *maxmr; 106 struct ehca_mr *maxmr;
97 struct ehca_pd *pd; 107 struct ehca_pd *pd;
98 struct h_galpas galpas; 108 struct h_galpas galpas;
109 struct mutex modify_mutex;
99}; 110};
100 111
101struct ehca_pd { 112struct ehca_pd {
@@ -145,7 +156,9 @@ struct ehca_cq {
145 spinlock_t cb_lock; 156 spinlock_t cb_lock;
146 struct hlist_head qp_hashtab[QP_HASHTAB_LEN]; 157 struct hlist_head qp_hashtab[QP_HASHTAB_LEN];
147 struct list_head entry; 158 struct list_head entry;
148 u32 nr_callbacks; 159 u32 nr_callbacks; /* #events assigned to cpu by scaling code */
160 u32 nr_events; /* #events seen */
161 wait_queue_head_t wait_completion;
149 spinlock_t task_lock; 162 spinlock_t task_lock;
150 u32 ownpid; 163 u32 ownpid;
151 /* mmap counter for resources mapped into user space */ 164 /* mmap counter for resources mapped into user space */
@@ -269,6 +282,7 @@ extern struct idr ehca_cq_idr;
269extern int ehca_static_rate; 282extern int ehca_static_rate;
270extern int ehca_port_act_time; 283extern int ehca_port_act_time;
271extern int ehca_use_hp_mr; 284extern int ehca_use_hp_mr;
285extern int ehca_scaling_code;
272 286
273struct ipzu_queue_resp { 287struct ipzu_queue_resp {
274 u32 qe_size; /* queue entry size */ 288 u32 qe_size; /* queue entry size */
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index 6ebfa27e4e16..e2cdc1a16fe9 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -146,6 +146,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
146 spin_lock_init(&my_cq->spinlock); 146 spin_lock_init(&my_cq->spinlock);
147 spin_lock_init(&my_cq->cb_lock); 147 spin_lock_init(&my_cq->cb_lock);
148 spin_lock_init(&my_cq->task_lock); 148 spin_lock_init(&my_cq->task_lock);
149 init_waitqueue_head(&my_cq->wait_completion);
149 my_cq->ownpid = current->tgid; 150 my_cq->ownpid = current->tgid;
150 151
151 cq = &my_cq->ib_cq; 152 cq = &my_cq->ib_cq;
@@ -302,6 +303,16 @@ create_cq_exit1:
302 return cq; 303 return cq;
303} 304}
304 305
306static int get_cq_nr_events(struct ehca_cq *my_cq)
307{
308 int ret;
309 unsigned long flags;
310 spin_lock_irqsave(&ehca_cq_idr_lock, flags);
311 ret = my_cq->nr_events;
312 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
313 return ret;
314}
315
305int ehca_destroy_cq(struct ib_cq *cq) 316int ehca_destroy_cq(struct ib_cq *cq)
306{ 317{
307 u64 h_ret; 318 u64 h_ret;
@@ -329,10 +340,11 @@ int ehca_destroy_cq(struct ib_cq *cq)
329 } 340 }
330 341
331 spin_lock_irqsave(&ehca_cq_idr_lock, flags); 342 spin_lock_irqsave(&ehca_cq_idr_lock, flags);
332 while (my_cq->nr_callbacks) { 343 while (my_cq->nr_events) {
333 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); 344 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
334 yield(); 345 wait_event(my_cq->wait_completion, !get_cq_nr_events(my_cq));
335 spin_lock_irqsave(&ehca_cq_idr_lock, flags); 346 spin_lock_irqsave(&ehca_cq_idr_lock, flags);
347 /* recheck nr_events to assure no cqe has just arrived */
336 } 348 }
337 349
338 idr_remove(&ehca_cq_idr, my_cq->token); 350 idr_remove(&ehca_cq_idr, my_cq->token);
diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c
index 24ceab0bae4a..4961eb88827c 100644
--- a/drivers/infiniband/hw/ehca/ehca_eq.c
+++ b/drivers/infiniband/hw/ehca/ehca_eq.c
@@ -61,6 +61,7 @@ int ehca_create_eq(struct ehca_shca *shca,
61 struct ib_device *ib_dev = &shca->ib_device; 61 struct ib_device *ib_dev = &shca->ib_device;
62 62
63 spin_lock_init(&eq->spinlock); 63 spin_lock_init(&eq->spinlock);
64 spin_lock_init(&eq->irq_spinlock);
64 eq->is_initialized = 0; 65 eq->is_initialized = 0;
65 66
66 if (type != EHCA_EQ && type != EHCA_NEQ) { 67 if (type != EHCA_EQ && type != EHCA_NEQ) {
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
index b7be950ab47c..32b55a4f0e5b 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -147,6 +147,7 @@ int ehca_query_port(struct ib_device *ibdev,
147 break; 147 break;
148 } 148 }
149 149
150 props->port_cap_flags = rblock->capability_mask;
150 props->gid_tbl_len = rblock->gid_tbl_len; 151 props->gid_tbl_len = rblock->gid_tbl_len;
151 props->max_msg_sz = rblock->max_msg_sz; 152 props->max_msg_sz = rblock->max_msg_sz;
152 props->bad_pkey_cntr = rblock->bad_pkey_cntr; 153 props->bad_pkey_cntr = rblock->bad_pkey_cntr;
@@ -162,6 +163,9 @@ int ehca_query_port(struct ib_device *ibdev,
162 props->active_width = IB_WIDTH_12X; 163 props->active_width = IB_WIDTH_12X;
163 props->active_speed = 0x1; 164 props->active_speed = 0x1;
164 165
166 /* at the moment (logical) link state is always LINK_UP */
167 props->phys_state = 0x5;
168
165query_port1: 169query_port1:
166 ehca_free_fw_ctrlblock(rblock); 170 ehca_free_fw_ctrlblock(rblock);
167 171
@@ -233,10 +237,60 @@ query_gid1:
233 return ret; 237 return ret;
234} 238}
235 239
240const u32 allowed_port_caps = (
241 IB_PORT_SM | IB_PORT_LED_INFO_SUP | IB_PORT_CM_SUP |
242 IB_PORT_SNMP_TUNNEL_SUP | IB_PORT_DEVICE_MGMT_SUP |
243 IB_PORT_VENDOR_CLASS_SUP);
244
236int ehca_modify_port(struct ib_device *ibdev, 245int ehca_modify_port(struct ib_device *ibdev,
237 u8 port, int port_modify_mask, 246 u8 port, int port_modify_mask,
238 struct ib_port_modify *props) 247 struct ib_port_modify *props)
239{ 248{
240 /* Not implemented yet */ 249 int ret = 0;
241 return -EFAULT; 250 struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device);
251 struct hipz_query_port *rblock;
252 u32 cap;
253 u64 hret;
254
255 if ((props->set_port_cap_mask | props->clr_port_cap_mask)
256 & ~allowed_port_caps) {
257 ehca_err(&shca->ib_device, "Non-changeable bits set in masks "
258 "set=%x clr=%x allowed=%x", props->set_port_cap_mask,
259 props->clr_port_cap_mask, allowed_port_caps);
260 return -EINVAL;
261 }
262
263 if (mutex_lock_interruptible(&shca->modify_mutex))
264 return -ERESTARTSYS;
265
266 rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
267 if (!rblock) {
268 ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
269 ret = -ENOMEM;
270 goto modify_port1;
271 }
272
273 if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
274 ehca_err(&shca->ib_device, "Can't query port properties");
275 ret = -EINVAL;
276 goto modify_port2;
277 }
278
279 cap = (rblock->capability_mask | props->set_port_cap_mask)
280 & ~props->clr_port_cap_mask;
281
282 hret = hipz_h_modify_port(shca->ipz_hca_handle, port,
283 cap, props->init_type, port_modify_mask);
284 if (hret != H_SUCCESS) {
285 ehca_err(&shca->ib_device, "Modify port failed hret=%lx", hret);
286 ret = -EINVAL;
287 }
288
289modify_port2:
290 ehca_free_fw_ctrlblock(rblock);
291
292modify_port1:
293 mutex_unlock(&shca->modify_mutex);
294
295 return ret;
242} 296}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index 6c4f9f91b15d..f284be1c9166 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -63,13 +63,11 @@
63#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52,63) 63#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52,63)
64#define ERROR_DATA_TYPE EHCA_BMASK_IBM(0,7) 64#define ERROR_DATA_TYPE EHCA_BMASK_IBM(0,7)
65 65
66#ifdef CONFIG_INFINIBAND_EHCA_SCALING
67
68static void queue_comp_task(struct ehca_cq *__cq); 66static void queue_comp_task(struct ehca_cq *__cq);
69 67
70static struct ehca_comp_pool* pool; 68static struct ehca_comp_pool* pool;
69#ifdef CONFIG_HOTPLUG_CPU
71static struct notifier_block comp_pool_callback_nb; 70static struct notifier_block comp_pool_callback_nb;
72
73#endif 71#endif
74 72
75static inline void comp_event_callback(struct ehca_cq *cq) 73static inline void comp_event_callback(struct ehca_cq *cq)
@@ -206,7 +204,7 @@ static void qp_event_callback(struct ehca_shca *shca,
206} 204}
207 205
208static void cq_event_callback(struct ehca_shca *shca, 206static void cq_event_callback(struct ehca_shca *shca,
209 u64 eqe) 207 u64 eqe)
210{ 208{
211 struct ehca_cq *cq; 209 struct ehca_cq *cq;
212 unsigned long flags; 210 unsigned long flags;
@@ -318,7 +316,7 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
318 "disruptive port %x configuration change", port); 316 "disruptive port %x configuration change", port);
319 317
320 ehca_info(&shca->ib_device, 318 ehca_info(&shca->ib_device,
321 "port %x is inactive.", port); 319 "port %x is inactive.", port);
322 event.device = &shca->ib_device; 320 event.device = &shca->ib_device;
323 event.event = IB_EVENT_PORT_ERR; 321 event.event = IB_EVENT_PORT_ERR;
324 event.element.port_num = port; 322 event.element.port_num = port;
@@ -326,7 +324,7 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
326 ib_dispatch_event(&event); 324 ib_dispatch_event(&event);
327 325
328 ehca_info(&shca->ib_device, 326 ehca_info(&shca->ib_device,
329 "port %x is active.", port); 327 "port %x is active.", port);
330 event.device = &shca->ib_device; 328 event.device = &shca->ib_device;
331 event.event = IB_EVENT_PORT_ACTIVE; 329 event.event = IB_EVENT_PORT_ACTIVE;
332 event.element.port_num = port; 330 event.element.port_num = port;
@@ -401,200 +399,274 @@ irqreturn_t ehca_interrupt_eq(int irq, void *dev_id)
401 return IRQ_HANDLED; 399 return IRQ_HANDLED;
402} 400}
403 401
404void ehca_tasklet_eq(unsigned long data) 402
403static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
405{ 404{
406 struct ehca_shca *shca = (struct ehca_shca*)data; 405 u64 eqe_value;
407 struct ehca_eqe *eqe; 406 u32 token;
408 int int_state; 407 unsigned long flags;
409 int query_cnt = 0; 408 struct ehca_cq *cq;
410 409
411 do { 410 eqe_value = eqe->entry;
412 eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq); 411 ehca_dbg(&shca->ib_device, "eqe_value=%lx", eqe_value);
412 if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
413 ehca_dbg(&shca->ib_device, "Got completion event");
414 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
415 spin_lock_irqsave(&ehca_cq_idr_lock, flags);
416 cq = idr_find(&ehca_cq_idr, token);
417 if (cq == NULL) {
418 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
419 ehca_err(&shca->ib_device,
420 "Invalid eqe for non-existing cq token=%x",
421 token);
422 return;
423 }
424 reset_eq_pending(cq);
425 cq->nr_events++;
426 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
427 if (ehca_scaling_code)
428 queue_comp_task(cq);
429 else {
430 comp_event_callback(cq);
431 spin_lock_irqsave(&ehca_cq_idr_lock, flags);
432 cq->nr_events--;
433 if (!cq->nr_events)
434 wake_up(&cq->wait_completion);
435 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
436 }
437 } else {
438 ehca_dbg(&shca->ib_device, "Got non completion event");
439 parse_identifier(shca, eqe_value);
440 }
441}
413 442
414 if ((shca->hw_level >= 2) && eqe) 443void ehca_process_eq(struct ehca_shca *shca, int is_irq)
415 int_state = 1; 444{
416 else 445 struct ehca_eq *eq = &shca->eq;
417 int_state = 0; 446 struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
418 447 u64 eqe_value;
419 while ((int_state == 1) || eqe) { 448 unsigned long flags;
420 while (eqe) { 449 int eqe_cnt, i;
421 u64 eqe_value = eqe->entry; 450 int eq_empty = 0;
422 451
423 ehca_dbg(&shca->ib_device, 452 spin_lock_irqsave(&eq->irq_spinlock, flags);
424 "eqe_value=%lx", eqe_value); 453 if (is_irq) {
425 454 const int max_query_cnt = 100;
426 /* TODO: better structure */ 455 int query_cnt = 0;
427 if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, 456 int int_state = 1;
428 eqe_value)) { 457 do {
429 unsigned long flags; 458 int_state = hipz_h_query_int_state(
430 u32 token; 459 shca->ipz_hca_handle, eq->ist);
431 struct ehca_cq *cq; 460 query_cnt++;
432 461 iosync();
433 ehca_dbg(&shca->ib_device, 462 } while (int_state && query_cnt < max_query_cnt);
434 "... completion event"); 463 if (unlikely((query_cnt == max_query_cnt)))
435 token = 464 ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x",
436 EHCA_BMASK_GET(EQE_CQ_TOKEN, 465 int_state, query_cnt);
437 eqe_value); 466 }
438 spin_lock_irqsave(&ehca_cq_idr_lock,
439 flags);
440 cq = idr_find(&ehca_cq_idr, token);
441
442 if (cq == NULL) {
443 spin_unlock_irqrestore(&ehca_cq_idr_lock,
444 flags);
445 break;
446 }
447
448 reset_eq_pending(cq);
449#ifdef CONFIG_INFINIBAND_EHCA_SCALING
450 queue_comp_task(cq);
451 spin_unlock_irqrestore(&ehca_cq_idr_lock,
452 flags);
453#else
454 spin_unlock_irqrestore(&ehca_cq_idr_lock,
455 flags);
456 comp_event_callback(cq);
457#endif
458 } else {
459 ehca_dbg(&shca->ib_device,
460 "... non completion event");
461 parse_identifier(shca, eqe_value);
462 }
463 eqe =
464 (struct ehca_eqe *)ehca_poll_eq(shca,
465 &shca->eq);
466 }
467 467
468 if (shca->hw_level >= 2) { 468 /* read out all eqes */
469 int_state = 469 eqe_cnt = 0;
470 hipz_h_query_int_state(shca->ipz_hca_handle, 470 do {
471 shca->eq.ist); 471 u32 token;
472 query_cnt++; 472 eqe_cache[eqe_cnt].eqe =
473 iosync(); 473 (struct ehca_eqe *)ehca_poll_eq(shca, eq);
474 if (query_cnt >= 100) { 474 if (!eqe_cache[eqe_cnt].eqe)
475 query_cnt = 0; 475 break;
476 int_state = 0; 476 eqe_value = eqe_cache[eqe_cnt].eqe->entry;
477 } 477 if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
478 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
479 spin_lock(&ehca_cq_idr_lock);
480 eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token);
481 if (!eqe_cache[eqe_cnt].cq) {
482 spin_unlock(&ehca_cq_idr_lock);
483 ehca_err(&shca->ib_device,
484 "Invalid eqe for non-existing cq "
485 "token=%x", token);
486 continue;
478 } 487 }
479 eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq); 488 eqe_cache[eqe_cnt].cq->nr_events++;
480 489 spin_unlock(&ehca_cq_idr_lock);
490 } else
491 eqe_cache[eqe_cnt].cq = NULL;
492 eqe_cnt++;
493 } while (eqe_cnt < EHCA_EQE_CACHE_SIZE);
494 if (!eqe_cnt) {
495 if (is_irq)
496 ehca_dbg(&shca->ib_device,
497 "No eqe found for irq event");
498 goto unlock_irq_spinlock;
499 } else if (!is_irq)
500 ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt);
501 if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE))
502 ehca_dbg(&shca->ib_device, "too many eqes for one irq event");
503 /* enable irq for new packets */
504 for (i = 0; i < eqe_cnt; i++) {
505 if (eq->eqe_cache[i].cq)
506 reset_eq_pending(eq->eqe_cache[i].cq);
507 }
508 /* check eq */
509 spin_lock(&eq->spinlock);
510 eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue));
511 spin_unlock(&eq->spinlock);
512 /* call completion handler for cached eqes */
513 for (i = 0; i < eqe_cnt; i++)
514 if (eq->eqe_cache[i].cq) {
515 if (ehca_scaling_code)
516 queue_comp_task(eq->eqe_cache[i].cq);
517 else {
518 struct ehca_cq *cq = eq->eqe_cache[i].cq;
519 comp_event_callback(cq);
520 spin_lock_irqsave(&ehca_cq_idr_lock, flags);
521 cq->nr_events--;
522 if (!cq->nr_events)
523 wake_up(&cq->wait_completion);
524 spin_unlock_irqrestore(&ehca_cq_idr_lock,
525 flags);
526 }
527 } else {
528 ehca_dbg(&shca->ib_device, "Got non completion event");
529 parse_identifier(shca, eq->eqe_cache[i].eqe->entry);
481 } 530 }
482 } while (int_state != 0); 531 /* poll eq if not empty */
532 if (eq_empty)
533 goto unlock_irq_spinlock;
534 do {
535 struct ehca_eqe *eqe;
536 eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
537 if (!eqe)
538 break;
539 process_eqe(shca, eqe);
540 } while (1);
483 541
484 return; 542unlock_irq_spinlock:
543 spin_unlock_irqrestore(&eq->irq_spinlock, flags);
485} 544}
486 545
487#ifdef CONFIG_INFINIBAND_EHCA_SCALING 546void ehca_tasklet_eq(unsigned long data)
547{
548 ehca_process_eq((struct ehca_shca*)data, 1);
549}
488 550
489static inline int find_next_online_cpu(struct ehca_comp_pool* pool) 551static inline int find_next_online_cpu(struct ehca_comp_pool* pool)
490{ 552{
491 unsigned long flags_last_cpu; 553 int cpu;
554 unsigned long flags;
492 555
556 WARN_ON_ONCE(!in_interrupt());
493 if (ehca_debug_level) 557 if (ehca_debug_level)
494 ehca_dmp(&cpu_online_map, sizeof(cpumask_t), ""); 558 ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");
495 559
496 spin_lock_irqsave(&pool->last_cpu_lock, flags_last_cpu); 560 spin_lock_irqsave(&pool->last_cpu_lock, flags);
497 pool->last_cpu = next_cpu(pool->last_cpu, cpu_online_map); 561 cpu = next_cpu(pool->last_cpu, cpu_online_map);
498 if (pool->last_cpu == NR_CPUS) 562 if (cpu == NR_CPUS)
499 pool->last_cpu = first_cpu(cpu_online_map); 563 cpu = first_cpu(cpu_online_map);
500 spin_unlock_irqrestore(&pool->last_cpu_lock, flags_last_cpu); 564 pool->last_cpu = cpu;
565 spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
501 566
502 return pool->last_cpu; 567 return cpu;
503} 568}
504 569
505static void __queue_comp_task(struct ehca_cq *__cq, 570static void __queue_comp_task(struct ehca_cq *__cq,
506 struct ehca_cpu_comp_task *cct) 571 struct ehca_cpu_comp_task *cct)
507{ 572{
508 unsigned long flags_cct; 573 unsigned long flags;
509 unsigned long flags_cq;
510 574
511 spin_lock_irqsave(&cct->task_lock, flags_cct); 575 spin_lock_irqsave(&cct->task_lock, flags);
512 spin_lock_irqsave(&__cq->task_lock, flags_cq); 576 spin_lock(&__cq->task_lock);
513 577
514 if (__cq->nr_callbacks == 0) { 578 if (__cq->nr_callbacks == 0) {
515 __cq->nr_callbacks++; 579 __cq->nr_callbacks++;
516 list_add_tail(&__cq->entry, &cct->cq_list); 580 list_add_tail(&__cq->entry, &cct->cq_list);
517 cct->cq_jobs++; 581 cct->cq_jobs++;
518 wake_up(&cct->wait_queue); 582 wake_up(&cct->wait_queue);
519 } 583 } else
520 else
521 __cq->nr_callbacks++; 584 __cq->nr_callbacks++;
522 585
523 spin_unlock_irqrestore(&__cq->task_lock, flags_cq); 586 spin_unlock(&__cq->task_lock);
524 spin_unlock_irqrestore(&cct->task_lock, flags_cct); 587 spin_unlock_irqrestore(&cct->task_lock, flags);
525} 588}
526 589
527static void queue_comp_task(struct ehca_cq *__cq) 590static void queue_comp_task(struct ehca_cq *__cq)
528{ 591{
529 int cpu;
530 int cpu_id; 592 int cpu_id;
531 struct ehca_cpu_comp_task *cct; 593 struct ehca_cpu_comp_task *cct;
594 int cq_jobs;
595 unsigned long flags;
532 596
533 cpu = get_cpu();
534 cpu_id = find_next_online_cpu(pool); 597 cpu_id = find_next_online_cpu(pool);
535
536 BUG_ON(!cpu_online(cpu_id)); 598 BUG_ON(!cpu_online(cpu_id));
537 599
538 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); 600 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
601 BUG_ON(!cct);
539 602
540 if (cct->cq_jobs > 0) { 603 spin_lock_irqsave(&cct->task_lock, flags);
604 cq_jobs = cct->cq_jobs;
605 spin_unlock_irqrestore(&cct->task_lock, flags);
606 if (cq_jobs > 0) {
541 cpu_id = find_next_online_cpu(pool); 607 cpu_id = find_next_online_cpu(pool);
542 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); 608 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
609 BUG_ON(!cct);
543 } 610 }
544 611
545 __queue_comp_task(__cq, cct); 612 __queue_comp_task(__cq, cct);
546
547 put_cpu();
548
549 return;
550} 613}
551 614
552static void run_comp_task(struct ehca_cpu_comp_task* cct) 615static void run_comp_task(struct ehca_cpu_comp_task* cct)
553{ 616{
554 struct ehca_cq *cq; 617 struct ehca_cq *cq;
555 unsigned long flags_cct; 618 unsigned long flags;
556 unsigned long flags_cq;
557 619
558 spin_lock_irqsave(&cct->task_lock, flags_cct); 620 spin_lock_irqsave(&cct->task_lock, flags);
559 621
560 while (!list_empty(&cct->cq_list)) { 622 while (!list_empty(&cct->cq_list)) {
561 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); 623 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
562 spin_unlock_irqrestore(&cct->task_lock, flags_cct); 624 spin_unlock_irqrestore(&cct->task_lock, flags);
563 comp_event_callback(cq); 625 comp_event_callback(cq);
564 spin_lock_irqsave(&cct->task_lock, flags_cct);
565 626
566 spin_lock_irqsave(&cq->task_lock, flags_cq); 627 spin_lock_irqsave(&ehca_cq_idr_lock, flags);
628 cq->nr_events--;
629 if (!cq->nr_events)
630 wake_up(&cq->wait_completion);
631 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
632
633 spin_lock_irqsave(&cct->task_lock, flags);
634 spin_lock(&cq->task_lock);
567 cq->nr_callbacks--; 635 cq->nr_callbacks--;
568 if (cq->nr_callbacks == 0) { 636 if (!cq->nr_callbacks) {
569 list_del_init(cct->cq_list.next); 637 list_del_init(cct->cq_list.next);
570 cct->cq_jobs--; 638 cct->cq_jobs--;
571 } 639 }
572 spin_unlock_irqrestore(&cq->task_lock, flags_cq); 640 spin_unlock(&cq->task_lock);
573
574 } 641 }
575 642
576 spin_unlock_irqrestore(&cct->task_lock, flags_cct); 643 spin_unlock_irqrestore(&cct->task_lock, flags);
577
578 return;
579} 644}
580 645
581static int comp_task(void *__cct) 646static int comp_task(void *__cct)
582{ 647{
583 struct ehca_cpu_comp_task* cct = __cct; 648 struct ehca_cpu_comp_task* cct = __cct;
649 int cql_empty;
584 DECLARE_WAITQUEUE(wait, current); 650 DECLARE_WAITQUEUE(wait, current);
585 651
586 set_current_state(TASK_INTERRUPTIBLE); 652 set_current_state(TASK_INTERRUPTIBLE);
587 while(!kthread_should_stop()) { 653 while(!kthread_should_stop()) {
588 add_wait_queue(&cct->wait_queue, &wait); 654 add_wait_queue(&cct->wait_queue, &wait);
589 655
590 if (list_empty(&cct->cq_list)) 656 spin_lock_irq(&cct->task_lock);
657 cql_empty = list_empty(&cct->cq_list);
658 spin_unlock_irq(&cct->task_lock);
659 if (cql_empty)
591 schedule(); 660 schedule();
592 else 661 else
593 __set_current_state(TASK_RUNNING); 662 __set_current_state(TASK_RUNNING);
594 663
595 remove_wait_queue(&cct->wait_queue, &wait); 664 remove_wait_queue(&cct->wait_queue, &wait);
596 665
597 if (!list_empty(&cct->cq_list)) 666 spin_lock_irq(&cct->task_lock);
667 cql_empty = list_empty(&cct->cq_list);
668 spin_unlock_irq(&cct->task_lock);
669 if (!cql_empty)
598 run_comp_task(__cct); 670 run_comp_task(__cct);
599 671
600 set_current_state(TASK_INTERRUPTIBLE); 672 set_current_state(TASK_INTERRUPTIBLE);
@@ -637,8 +709,6 @@ static void destroy_comp_task(struct ehca_comp_pool *pool,
637 709
638 if (task) 710 if (task)
639 kthread_stop(task); 711 kthread_stop(task);
640
641 return;
642} 712}
643 713
644static void take_over_work(struct ehca_comp_pool *pool, 714static void take_over_work(struct ehca_comp_pool *pool,
@@ -654,17 +724,18 @@ static void take_over_work(struct ehca_comp_pool *pool,
654 list_splice_init(&cct->cq_list, &list); 724 list_splice_init(&cct->cq_list, &list);
655 725
656 while(!list_empty(&list)) { 726 while(!list_empty(&list)) {
657 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); 727 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
658 728
659 list_del(&cq->entry); 729 list_del(&cq->entry);
660 __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks, 730 __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks,
661 smp_processor_id())); 731 smp_processor_id()));
662 } 732 }
663 733
664 spin_unlock_irqrestore(&cct->task_lock, flags_cct); 734 spin_unlock_irqrestore(&cct->task_lock, flags_cct);
665 735
666} 736}
667 737
738#ifdef CONFIG_HOTPLUG_CPU
668static int comp_pool_callback(struct notifier_block *nfb, 739static int comp_pool_callback(struct notifier_block *nfb,
669 unsigned long action, 740 unsigned long action,
670 void *hcpu) 741 void *hcpu)
@@ -707,15 +778,16 @@ static int comp_pool_callback(struct notifier_block *nfb,
707 778
708 return NOTIFY_OK; 779 return NOTIFY_OK;
709} 780}
710
711#endif 781#endif
712 782
713int ehca_create_comp_pool(void) 783int ehca_create_comp_pool(void)
714{ 784{
715#ifdef CONFIG_INFINIBAND_EHCA_SCALING
716 int cpu; 785 int cpu;
717 struct task_struct *task; 786 struct task_struct *task;
718 787
788 if (!ehca_scaling_code)
789 return 0;
790
719 pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL); 791 pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL);
720 if (pool == NULL) 792 if (pool == NULL)
721 return -ENOMEM; 793 return -ENOMEM;
@@ -737,20 +809,27 @@ int ehca_create_comp_pool(void)
737 } 809 }
738 } 810 }
739 811
812#ifdef CONFIG_HOTPLUG_CPU
740 comp_pool_callback_nb.notifier_call = comp_pool_callback; 813 comp_pool_callback_nb.notifier_call = comp_pool_callback;
741 comp_pool_callback_nb.priority =0; 814 comp_pool_callback_nb.priority =0;
742 register_cpu_notifier(&comp_pool_callback_nb); 815 register_cpu_notifier(&comp_pool_callback_nb);
743#endif 816#endif
744 817
818 printk(KERN_INFO "eHCA scaling code enabled\n");
819
745 return 0; 820 return 0;
746} 821}
747 822
748void ehca_destroy_comp_pool(void) 823void ehca_destroy_comp_pool(void)
749{ 824{
750#ifdef CONFIG_INFINIBAND_EHCA_SCALING
751 int i; 825 int i;
752 826
827 if (!ehca_scaling_code)
828 return;
829
830#ifdef CONFIG_HOTPLUG_CPU
753 unregister_cpu_notifier(&comp_pool_callback_nb); 831 unregister_cpu_notifier(&comp_pool_callback_nb);
832#endif
754 833
755 for (i = 0; i < NR_CPUS; i++) { 834 for (i = 0; i < NR_CPUS; i++) {
756 if (cpu_online(i)) 835 if (cpu_online(i))
@@ -758,7 +837,4 @@ void ehca_destroy_comp_pool(void)
758 } 837 }
759 free_percpu(pool->cpu_comp_tasks); 838 free_percpu(pool->cpu_comp_tasks);
760 kfree(pool); 839 kfree(pool);
761#endif
762
763 return;
764} 840}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h
index be579cc0adf6..6ed06ee033ed 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.h
+++ b/drivers/infiniband/hw/ehca/ehca_irq.h
@@ -56,6 +56,7 @@ void ehca_tasklet_neq(unsigned long data);
56 56
57irqreturn_t ehca_interrupt_eq(int irq, void *dev_id); 57irqreturn_t ehca_interrupt_eq(int irq, void *dev_id);
58void ehca_tasklet_eq(unsigned long data); 58void ehca_tasklet_eq(unsigned long data);
59void ehca_process_eq(struct ehca_shca *shca, int is_irq);
59 60
60struct ehca_cpu_comp_task { 61struct ehca_cpu_comp_task {
61 wait_queue_head_t wait_queue; 62 wait_queue_head_t wait_queue;
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 1155bcf48212..4700085ba834 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -52,7 +52,7 @@
52MODULE_LICENSE("Dual BSD/GPL"); 52MODULE_LICENSE("Dual BSD/GPL");
53MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>"); 53MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
54MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); 54MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
55MODULE_VERSION("SVNEHCA_0020"); 55MODULE_VERSION("SVNEHCA_0022");
56 56
57int ehca_open_aqp1 = 0; 57int ehca_open_aqp1 = 0;
58int ehca_debug_level = 0; 58int ehca_debug_level = 0;
@@ -62,6 +62,7 @@ int ehca_use_hp_mr = 0;
62int ehca_port_act_time = 30; 62int ehca_port_act_time = 30;
63int ehca_poll_all_eqs = 1; 63int ehca_poll_all_eqs = 1;
64int ehca_static_rate = -1; 64int ehca_static_rate = -1;
65int ehca_scaling_code = 1;
65 66
66module_param_named(open_aqp1, ehca_open_aqp1, int, 0); 67module_param_named(open_aqp1, ehca_open_aqp1, int, 0);
67module_param_named(debug_level, ehca_debug_level, int, 0); 68module_param_named(debug_level, ehca_debug_level, int, 0);
@@ -71,6 +72,7 @@ module_param_named(use_hp_mr, ehca_use_hp_mr, int, 0);
71module_param_named(port_act_time, ehca_port_act_time, int, 0); 72module_param_named(port_act_time, ehca_port_act_time, int, 0);
72module_param_named(poll_all_eqs, ehca_poll_all_eqs, int, 0); 73module_param_named(poll_all_eqs, ehca_poll_all_eqs, int, 0);
73module_param_named(static_rate, ehca_static_rate, int, 0); 74module_param_named(static_rate, ehca_static_rate, int, 0);
75module_param_named(scaling_code, ehca_scaling_code, int, 0);
74 76
75MODULE_PARM_DESC(open_aqp1, 77MODULE_PARM_DESC(open_aqp1,
76 "AQP1 on startup (0: no (default), 1: yes)"); 78 "AQP1 on startup (0: no (default), 1: yes)");
@@ -91,6 +93,8 @@ MODULE_PARM_DESC(poll_all_eqs,
91 " (0: no, 1: yes (default))"); 93 " (0: no, 1: yes (default))");
92MODULE_PARM_DESC(static_rate, 94MODULE_PARM_DESC(static_rate,
93 "set permanent static rate (default: disabled)"); 95 "set permanent static rate (default: disabled)");
96MODULE_PARM_DESC(scaling_code,
97 "set scaling code (0: disabled, 1: enabled/default)");
94 98
95spinlock_t ehca_qp_idr_lock; 99spinlock_t ehca_qp_idr_lock;
96spinlock_t ehca_cq_idr_lock; 100spinlock_t ehca_cq_idr_lock;
@@ -432,8 +436,8 @@ static int ehca_destroy_aqp1(struct ehca_sport *sport)
432 436
433static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf) 437static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf)
434{ 438{
435 return snprintf(buf, PAGE_SIZE, "%d\n", 439 return snprintf(buf, PAGE_SIZE, "%d\n",
436 ehca_debug_level); 440 ehca_debug_level);
437} 441}
438 442
439static ssize_t ehca_store_debug_level(struct device_driver *ddp, 443static ssize_t ehca_store_debug_level(struct device_driver *ddp,
@@ -561,11 +565,11 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev,
561 const struct of_device_id *id) 565 const struct of_device_id *id)
562{ 566{
563 struct ehca_shca *shca; 567 struct ehca_shca *shca;
564 u64 *handle; 568 const u64 *handle;
565 struct ib_pd *ibpd; 569 struct ib_pd *ibpd;
566 int ret; 570 int ret;
567 571
568 handle = (u64 *)get_property(dev->ofdev.node, "ibm,hca-handle", NULL); 572 handle = get_property(dev->ofdev.node, "ibm,hca-handle", NULL);
569 if (!handle) { 573 if (!handle) {
570 ehca_gen_err("Cannot get eHCA handle for adapter: %s.", 574 ehca_gen_err("Cannot get eHCA handle for adapter: %s.",
571 dev->ofdev.node->full_name); 575 dev->ofdev.node->full_name);
@@ -583,6 +587,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev,
583 ehca_gen_err("Cannot allocate shca memory."); 587 ehca_gen_err("Cannot allocate shca memory.");
584 return -ENOMEM; 588 return -ENOMEM;
585 } 589 }
590 mutex_init(&shca->modify_mutex);
586 591
587 shca->ibmebus_dev = dev; 592 shca->ibmebus_dev = dev;
588 shca->ipz_hca_handle.handle = *handle; 593 shca->ipz_hca_handle.handle = *handle;
@@ -778,8 +783,24 @@ void ehca_poll_eqs(unsigned long data)
778 783
779 spin_lock(&shca_list_lock); 784 spin_lock(&shca_list_lock);
780 list_for_each_entry(shca, &shca_list, shca_list) { 785 list_for_each_entry(shca, &shca_list, shca_list) {
781 if (shca->eq.is_initialized) 786 if (shca->eq.is_initialized) {
782 ehca_tasklet_eq((unsigned long)(void*)shca); 787 /* call deadman proc only if eq ptr does not change */
788 struct ehca_eq *eq = &shca->eq;
789 int max = 3;
790 volatile u64 q_ofs, q_ofs2;
791 u64 flags;
792 spin_lock_irqsave(&eq->spinlock, flags);
793 q_ofs = eq->ipz_queue.current_q_offset;
794 spin_unlock_irqrestore(&eq->spinlock, flags);
795 do {
796 spin_lock_irqsave(&eq->spinlock, flags);
797 q_ofs2 = eq->ipz_queue.current_q_offset;
798 spin_unlock_irqrestore(&eq->spinlock, flags);
799 max--;
800 } while (q_ofs == q_ofs2 && max > 0);
801 if (q_ofs == q_ofs2)
802 ehca_process_eq(shca, 0);
803 }
783 } 804 }
784 mod_timer(&poll_eqs_timer, jiffies + HZ); 805 mod_timer(&poll_eqs_timer, jiffies + HZ);
785 spin_unlock(&shca_list_lock); 806 spin_unlock(&shca_list_lock);
@@ -790,7 +811,7 @@ int __init ehca_module_init(void)
790 int ret; 811 int ret;
791 812
792 printk(KERN_INFO "eHCA Infiniband Device Driver " 813 printk(KERN_INFO "eHCA Infiniband Device Driver "
793 "(Rel.: SVNEHCA_0020)\n"); 814 "(Rel.: SVNEHCA_0022)\n");
794 idr_init(&ehca_qp_idr); 815 idr_init(&ehca_qp_idr);
795 idr_init(&ehca_cq_idr); 816 idr_init(&ehca_cq_idr);
796 spin_lock_init(&ehca_qp_idr_lock); 817 spin_lock_init(&ehca_qp_idr_lock);
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
index 3fb46e67df87..b564fcd3b282 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.c
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -70,6 +70,10 @@
70#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES EHCA_BMASK_IBM(0, 31) 70#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES EHCA_BMASK_IBM(0, 31)
71#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES EHCA_BMASK_IBM(32, 63) 71#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES EHCA_BMASK_IBM(32, 63)
72 72
73#define H_MP_INIT_TYPE EHCA_BMASK_IBM(44, 47)
74#define H_MP_SHUTDOWN EHCA_BMASK_IBM(48, 48)
75#define H_MP_RESET_QKEY_CTR EHCA_BMASK_IBM(49, 49)
76
73/* direct access qp controls */ 77/* direct access qp controls */
74#define DAQP_CTRL_ENABLE 0x01 78#define DAQP_CTRL_ENABLE 0x01
75#define DAQP_CTRL_SEND_COMP 0x20 79#define DAQP_CTRL_SEND_COMP 0x20
@@ -364,6 +368,26 @@ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
364 return ret; 368 return ret;
365} 369}
366 370
371u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
372 const u8 port_id, const u32 port_cap,
373 const u8 init_type, const int modify_mask)
374{
375 u64 port_attributes = port_cap;
376
377 if (modify_mask & IB_PORT_SHUTDOWN)
378 port_attributes |= EHCA_BMASK_SET(H_MP_SHUTDOWN, 1);
379 if (modify_mask & IB_PORT_INIT_TYPE)
380 port_attributes |= EHCA_BMASK_SET(H_MP_INIT_TYPE, init_type);
381 if (modify_mask & IB_PORT_RESET_QKEY_CNTR)
382 port_attributes |= EHCA_BMASK_SET(H_MP_RESET_QKEY_CTR, 1);
383
384 return ehca_plpar_hcall_norets(H_MODIFY_PORT,
385 adapter_handle.handle, /* r4 */
386 port_id, /* r5 */
387 port_attributes, /* r6 */
388 0, 0, 0, 0);
389}
390
367u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, 391u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
368 struct hipz_query_hca *query_hca_rblock) 392 struct hipz_query_hca *query_hca_rblock)
369{ 393{
diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h
index 587ebd470959..2869f7dd6196 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.h
+++ b/drivers/infiniband/hw/ehca/hcp_if.h
@@ -85,6 +85,10 @@ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
85 const u8 port_id, 85 const u8 port_id,
86 struct hipz_query_port *query_port_response_block); 86 struct hipz_query_port *query_port_response_block);
87 87
88u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
89 const u8 port_id, const u32 port_cap,
90 const u8 init_type, const int modify_mask);
91
88u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, 92u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
89 struct hipz_query_hca *query_hca_rblock); 93 struct hipz_query_hca *query_hca_rblock);
90 94
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
index dc3bda2634b7..8199c45768a3 100644
--- a/drivers/infiniband/hw/ehca/ipz_pt_fn.h
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
@@ -79,7 +79,7 @@ static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset)
79 if (q_offset >= queue->queue_length) 79 if (q_offset >= queue->queue_length)
80 return NULL; 80 return NULL;
81 current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT]; 81 current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT];
82 return &current_page->entries[q_offset & (EHCA_PAGESIZE - 1)]; 82 return &current_page->entries[q_offset & (EHCA_PAGESIZE - 1)];
83} 83}
84 84
85/* 85/*
@@ -247,6 +247,15 @@ static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue)
247 return ret; 247 return ret;
248} 248}
249 249
250static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue)
251{
252 void *ret = ipz_qeit_get(queue);
253 u32 qe = *(u8 *) ret;
254 if ((qe >> 7) != (queue->toggle_state & 1))
255 return NULL;
256 return ret;
257}
258
250/* returns address (GX) of first queue entry */ 259/* returns address (GX) of first queue entry */
251static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt) 260static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt)
252{ 261{
diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
index 54139d398181..10c008f22ba6 100644
--- a/drivers/infiniband/hw/ipath/ipath_common.h
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -78,6 +78,8 @@
78#define IPATH_IB_LINKINIT 3 78#define IPATH_IB_LINKINIT 3
79#define IPATH_IB_LINKDOWN_SLEEP 4 79#define IPATH_IB_LINKDOWN_SLEEP 4
80#define IPATH_IB_LINKDOWN_DISABLE 5 80#define IPATH_IB_LINKDOWN_DISABLE 5
81#define IPATH_IB_LINK_LOOPBACK 6 /* enable local loopback */
82#define IPATH_IB_LINK_EXTERNAL 7 /* normal, disable local loopback */
81 83
82/* 84/*
83 * stats maintained by the driver. For now, at least, this is global 85 * stats maintained by the driver. For now, at least, this is global
@@ -316,11 +318,17 @@ struct ipath_base_info {
316 /* address of readonly memory copy of the rcvhdrq tail register. */ 318 /* address of readonly memory copy of the rcvhdrq tail register. */
317 __u64 spi_rcvhdr_tailaddr; 319 __u64 spi_rcvhdr_tailaddr;
318 320
319 /* shared memory pages for subports if IPATH_RUNTIME_MASTER is set */ 321 /* shared memory pages for subports if port is shared */
320 __u64 spi_subport_uregbase; 322 __u64 spi_subport_uregbase;
321 __u64 spi_subport_rcvegrbuf; 323 __u64 spi_subport_rcvegrbuf;
322 __u64 spi_subport_rcvhdr_base; 324 __u64 spi_subport_rcvhdr_base;
323 325
326 /* shared memory page for hardware port if it is shared */
327 __u64 spi_port_uregbase;
328 __u64 spi_port_rcvegrbuf;
329 __u64 spi_port_rcvhdr_base;
330 __u64 spi_port_rcvhdr_tailaddr;
331
324} __attribute__ ((aligned(8))); 332} __attribute__ ((aligned(8)));
325 333
326 334
@@ -344,7 +352,7 @@ struct ipath_base_info {
344 * may not be implemented; the user code must deal with this if it 352 * may not be implemented; the user code must deal with this if it
345 * cares, or it must abort after initialization reports the difference. 353 * cares, or it must abort after initialization reports the difference.
346 */ 354 */
347#define IPATH_USER_SWMINOR 3 355#define IPATH_USER_SWMINOR 5
348 356
349#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR) 357#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR)
350 358
@@ -418,11 +426,14 @@ struct ipath_user_info {
418#define IPATH_CMD_TID_UPDATE 19 /* update expected TID entries */ 426#define IPATH_CMD_TID_UPDATE 19 /* update expected TID entries */
419#define IPATH_CMD_TID_FREE 20 /* free expected TID entries */ 427#define IPATH_CMD_TID_FREE 20 /* free expected TID entries */
420#define IPATH_CMD_SET_PART_KEY 21 /* add partition key */ 428#define IPATH_CMD_SET_PART_KEY 21 /* add partition key */
421#define IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes */ 429#define __IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes (for old user code) */
422#define IPATH_CMD_ASSIGN_PORT 23 /* allocate HCA and port */ 430#define IPATH_CMD_ASSIGN_PORT 23 /* allocate HCA and port */
423#define IPATH_CMD_USER_INIT 24 /* set up userspace */ 431#define IPATH_CMD_USER_INIT 24 /* set up userspace */
432#define IPATH_CMD_UNUSED_1 25
433#define IPATH_CMD_UNUSED_2 26
434#define IPATH_CMD_PIOAVAILUPD 27 /* force an update of PIOAvail reg */
424 435
425#define IPATH_CMD_MAX 24 436#define IPATH_CMD_MAX 27
426 437
427struct ipath_port_info { 438struct ipath_port_info {
428 __u32 num_active; /* number of active units */ 439 __u32 num_active; /* number of active units */
@@ -430,7 +441,7 @@ struct ipath_port_info {
430 __u16 port; /* port on unit assigned to caller */ 441 __u16 port; /* port on unit assigned to caller */
431 __u16 subport; /* subport on unit assigned to caller */ 442 __u16 subport; /* subport on unit assigned to caller */
432 __u16 num_ports; /* number of ports available on unit */ 443 __u16 num_ports; /* number of ports available on unit */
433 __u16 num_subports; /* number of subport slaves opened on port */ 444 __u16 num_subports; /* number of subports opened on port */
434}; 445};
435 446
436struct ipath_tid_info { 447struct ipath_tid_info {
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
index 87462e0cb4d2..ea78e6dddc90 100644
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -76,7 +76,20 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
76 } 76 }
77 return; 77 return;
78 } 78 }
79 wc->queue[head] = *entry; 79 wc->queue[head].wr_id = entry->wr_id;
80 wc->queue[head].status = entry->status;
81 wc->queue[head].opcode = entry->opcode;
82 wc->queue[head].vendor_err = entry->vendor_err;
83 wc->queue[head].byte_len = entry->byte_len;
84 wc->queue[head].imm_data = (__u32 __force)entry->imm_data;
85 wc->queue[head].qp_num = entry->qp->qp_num;
86 wc->queue[head].src_qp = entry->src_qp;
87 wc->queue[head].wc_flags = entry->wc_flags;
88 wc->queue[head].pkey_index = entry->pkey_index;
89 wc->queue[head].slid = entry->slid;
90 wc->queue[head].sl = entry->sl;
91 wc->queue[head].dlid_path_bits = entry->dlid_path_bits;
92 wc->queue[head].port_num = entry->port_num;
80 wc->head = next; 93 wc->head = next;
81 94
82 if (cq->notify == IB_CQ_NEXT_COMP || 95 if (cq->notify == IB_CQ_NEXT_COMP ||
@@ -122,9 +135,30 @@ int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
122 if (tail > (u32) cq->ibcq.cqe) 135 if (tail > (u32) cq->ibcq.cqe)
123 tail = (u32) cq->ibcq.cqe; 136 tail = (u32) cq->ibcq.cqe;
124 for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { 137 for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
138 struct ipath_qp *qp;
139
125 if (tail == wc->head) 140 if (tail == wc->head)
126 break; 141 break;
127 *entry = wc->queue[tail]; 142
143 qp = ipath_lookup_qpn(&to_idev(cq->ibcq.device)->qp_table,
144 wc->queue[tail].qp_num);
145 entry->qp = &qp->ibqp;
146 if (atomic_dec_and_test(&qp->refcount))
147 wake_up(&qp->wait);
148
149 entry->wr_id = wc->queue[tail].wr_id;
150 entry->status = wc->queue[tail].status;
151 entry->opcode = wc->queue[tail].opcode;
152 entry->vendor_err = wc->queue[tail].vendor_err;
153 entry->byte_len = wc->queue[tail].byte_len;
154 entry->imm_data = wc->queue[tail].imm_data;
155 entry->src_qp = wc->queue[tail].src_qp;
156 entry->wc_flags = wc->queue[tail].wc_flags;
157 entry->pkey_index = wc->queue[tail].pkey_index;
158 entry->slid = wc->queue[tail].slid;
159 entry->sl = wc->queue[tail].sl;
160 entry->dlid_path_bits = wc->queue[tail].dlid_path_bits;
161 entry->port_num = wc->queue[tail].port_num;
128 if (tail >= cq->ibcq.cqe) 162 if (tail >= cq->ibcq.cqe)
129 tail = 0; 163 tail = 0;
130 else 164 else
diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h
index df69f0d80b8b..42bfbdb0d3e6 100644
--- a/drivers/infiniband/hw/ipath/ipath_debug.h
+++ b/drivers/infiniband/hw/ipath/ipath_debug.h
@@ -57,6 +57,7 @@
57#define __IPATH_PROCDBG 0x100 57#define __IPATH_PROCDBG 0x100
58/* print mmap/nopage stuff, not using VDBG any more */ 58/* print mmap/nopage stuff, not using VDBG any more */
59#define __IPATH_MMDBG 0x200 59#define __IPATH_MMDBG 0x200
60#define __IPATH_ERRPKTDBG 0x400
60#define __IPATH_USER_SEND 0x1000 /* use user mode send */ 61#define __IPATH_USER_SEND 0x1000 /* use user mode send */
61#define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */ 62#define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */
62#define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */ 63#define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index 0f13a2182cc7..63e8368b0e95 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -296,7 +296,7 @@ static int ipath_diag_open(struct inode *in, struct file *fp)
296 } 296 }
297 297
298 fp->private_data = dd; 298 fp->private_data = dd;
299 ipath_diag_inuse = 1; 299 ipath_diag_inuse = -2;
300 diag_set_link = 0; 300 diag_set_link = 0;
301 ret = 0; 301 ret = 0;
302 302
@@ -461,6 +461,8 @@ static ssize_t ipath_diag_read(struct file *fp, char __user *data,
461 else if ((count % 4) || (*off % 4)) 461 else if ((count % 4) || (*off % 4))
462 /* address or length is not 32-bit aligned, hence invalid */ 462 /* address or length is not 32-bit aligned, hence invalid */
463 ret = -EINVAL; 463 ret = -EINVAL;
464 else if (ipath_diag_inuse < 1 && (*off || count != 8))
465 ret = -EINVAL; /* prevent cat /dev/ipath_diag* */
464 else if ((count % 8) || (*off % 8)) 466 else if ((count % 8) || (*off % 8))
465 /* address or length not 64-bit aligned; do 32-bit reads */ 467 /* address or length not 64-bit aligned; do 32-bit reads */
466 ret = ipath_read_umem32(dd, data, kreg_base + *off, count); 468 ret = ipath_read_umem32(dd, data, kreg_base + *off, count);
@@ -470,6 +472,8 @@ static ssize_t ipath_diag_read(struct file *fp, char __user *data,
470 if (ret >= 0) { 472 if (ret >= 0) {
471 *off += count; 473 *off += count;
472 ret = count; 474 ret = count;
475 if (ipath_diag_inuse == -2)
476 ipath_diag_inuse++;
473 } 477 }
474 478
475 return ret; 479 return ret;
@@ -489,6 +493,9 @@ static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
489 else if ((count % 4) || (*off % 4)) 493 else if ((count % 4) || (*off % 4))
490 /* address or length is not 32-bit aligned, hence invalid */ 494 /* address or length is not 32-bit aligned, hence invalid */
491 ret = -EINVAL; 495 ret = -EINVAL;
496 else if ((ipath_diag_inuse == -1 && (*off || count != 8)) ||
497 ipath_diag_inuse == -2) /* read qw off 0, write qw off 0 */
498 ret = -EINVAL; /* before any other write allowed */
492 else if ((count % 8) || (*off % 8)) 499 else if ((count % 8) || (*off % 8))
493 /* address or length not 64-bit aligned; do 32-bit writes */ 500 /* address or length not 64-bit aligned; do 32-bit writes */
494 ret = ipath_write_umem32(dd, kreg_base + *off, data, count); 501 ret = ipath_write_umem32(dd, kreg_base + *off, data, count);
@@ -498,6 +505,8 @@ static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
498 if (ret >= 0) { 505 if (ret >= 0) {
499 *off += count; 506 *off += count;
500 ret = count; 507 ret = count;
508 if (ipath_diag_inuse == -1)
509 ipath_diag_inuse = 1; /* all read/write OK now */
501 } 510 }
502 511
503 return ret; 512 return ret;
diff --git a/drivers/infiniband/hw/ipath/ipath_dma.c b/drivers/infiniband/hw/ipath/ipath_dma.c
index 6e0f2b8918ce..f87f003e3ef8 100644
--- a/drivers/infiniband/hw/ipath/ipath_dma.c
+++ b/drivers/infiniband/hw/ipath/ipath_dma.c
@@ -96,8 +96,8 @@ static void ipath_dma_unmap_page(struct ib_device *dev,
96 BUG_ON(!valid_dma_direction(direction)); 96 BUG_ON(!valid_dma_direction(direction));
97} 97}
98 98
99int ipath_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents, 99static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents,
100 enum dma_data_direction direction) 100 enum dma_data_direction direction)
101{ 101{
102 u64 addr; 102 u64 addr;
103 int i; 103 int i;
@@ -167,7 +167,7 @@ static void *ipath_dma_alloc_coherent(struct ib_device *dev, size_t size,
167} 167}
168 168
169static void ipath_dma_free_coherent(struct ib_device *dev, size_t size, 169static void ipath_dma_free_coherent(struct ib_device *dev, size_t size,
170 void *cpu_addr, dma_addr_t dma_handle) 170 void *cpu_addr, u64 dma_handle)
171{ 171{
172 free_pages((unsigned long) cpu_addr, get_order(size)); 172 free_pages((unsigned long) cpu_addr, get_order(size));
173} 173}
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index ae7f21a0cdc0..e3a223209710 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -390,15 +390,23 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
390 390
391 /* setup the chip-specific functions, as early as possible. */ 391 /* setup the chip-specific functions, as early as possible. */
392 switch (ent->device) { 392 switch (ent->device) {
393#ifdef CONFIG_HT_IRQ
394 case PCI_DEVICE_ID_INFINIPATH_HT: 393 case PCI_DEVICE_ID_INFINIPATH_HT:
394#ifdef CONFIG_HT_IRQ
395 ipath_init_iba6110_funcs(dd); 395 ipath_init_iba6110_funcs(dd);
396 break; 396 break;
397#else
398 ipath_dev_err(dd, "QLogic HT device 0x%x cannot work if "
399 "CONFIG_HT_IRQ is not enabled\n", ent->device);
400 return -ENODEV;
397#endif 401#endif
398#ifdef CONFIG_PCI_MSI
399 case PCI_DEVICE_ID_INFINIPATH_PE800: 402 case PCI_DEVICE_ID_INFINIPATH_PE800:
403#ifdef CONFIG_PCI_MSI
400 ipath_init_iba6120_funcs(dd); 404 ipath_init_iba6120_funcs(dd);
401 break; 405 break;
406#else
407 ipath_dev_err(dd, "QLogic PCIE device 0x%x cannot work if "
408 "CONFIG_PCI_MSI is not enabled\n", ent->device);
409 return -ENODEV;
402#endif 410#endif
403 default: 411 default:
404 ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, " 412 ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
@@ -486,7 +494,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
486 494
487 ret = ipath_init_chip(dd, 0); /* do the chip-specific init */ 495 ret = ipath_init_chip(dd, 0); /* do the chip-specific init */
488 if (ret) 496 if (ret)
489 goto bail_iounmap; 497 goto bail_irqsetup;
490 498
491 ret = ipath_enable_wc(dd); 499 ret = ipath_enable_wc(dd);
492 500
@@ -505,6 +513,9 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
505 513
506 goto bail; 514 goto bail;
507 515
516bail_irqsetup:
517 if (pdev->irq) free_irq(pdev->irq, dd);
518
508bail_iounmap: 519bail_iounmap:
509 iounmap((volatile void __iomem *) dd->ipath_kregbase); 520 iounmap((volatile void __iomem *) dd->ipath_kregbase);
510 521
@@ -525,8 +536,6 @@ static void __devexit cleanup_device(struct ipath_devdata *dd)
525{ 536{
526 int port; 537 int port;
527 538
528 ipath_shutdown_device(dd);
529
530 if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) { 539 if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
531 /* can't do anything more with chip; needs re-init */ 540 /* can't do anything more with chip; needs re-init */
532 *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT; 541 *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
@@ -594,8 +603,9 @@ static void __devexit cleanup_device(struct ipath_devdata *dd)
594 603
595 ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n", 604 ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
596 dd->ipath_pageshadow); 605 dd->ipath_pageshadow);
597 vfree(dd->ipath_pageshadow); 606 tmpp = dd->ipath_pageshadow;
598 dd->ipath_pageshadow = NULL; 607 dd->ipath_pageshadow = NULL;
608 vfree(tmpp);
599 } 609 }
600 610
601 /* 611 /*
@@ -622,6 +632,12 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev)
622 632
623 ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd); 633 ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd);
624 634
635 /*
636 * disable the IB link early, to be sure no new packets arrive, which
637 * complicates the shutdown process
638 */
639 ipath_shutdown_device(dd);
640
625 if (dd->verbs_dev) 641 if (dd->verbs_dev)
626 ipath_unregister_ib_device(dd->verbs_dev); 642 ipath_unregister_ib_device(dd->verbs_dev);
627 643
@@ -754,9 +770,42 @@ static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state,
754 return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT; 770 return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;
755} 771}
756 772
757void ipath_decode_err(char *buf, size_t blen, ipath_err_t err) 773/*
774 * Decode the error status into strings, deciding whether to always
775 * print * it or not depending on "normal packet errors" vs everything
776 * else. Return 1 if "real" errors, otherwise 0 if only packet
777 * errors, so caller can decide what to print with the string.
778 */
779int ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
758{ 780{
781 int iserr = 1;
759 *buf = '\0'; 782 *buf = '\0';
783 if (err & INFINIPATH_E_PKTERRS) {
784 if (!(err & ~INFINIPATH_E_PKTERRS))
785 iserr = 0; // if only packet errors.
786 if (ipath_debug & __IPATH_ERRPKTDBG) {
787 if (err & INFINIPATH_E_REBP)
788 strlcat(buf, "EBP ", blen);
789 if (err & INFINIPATH_E_RVCRC)
790 strlcat(buf, "VCRC ", blen);
791 if (err & INFINIPATH_E_RICRC) {
792 strlcat(buf, "CRC ", blen);
793 // clear for check below, so only once
794 err &= INFINIPATH_E_RICRC;
795 }
796 if (err & INFINIPATH_E_RSHORTPKTLEN)
797 strlcat(buf, "rshortpktlen ", blen);
798 if (err & INFINIPATH_E_SDROPPEDDATAPKT)
799 strlcat(buf, "sdroppeddatapkt ", blen);
800 if (err & INFINIPATH_E_SPKTLEN)
801 strlcat(buf, "spktlen ", blen);
802 }
803 if ((err & INFINIPATH_E_RICRC) &&
804 !(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP)))
805 strlcat(buf, "CRC ", blen);
806 if (!iserr)
807 goto done;
808 }
760 if (err & INFINIPATH_E_RHDRLEN) 809 if (err & INFINIPATH_E_RHDRLEN)
761 strlcat(buf, "rhdrlen ", blen); 810 strlcat(buf, "rhdrlen ", blen);
762 if (err & INFINIPATH_E_RBADTID) 811 if (err & INFINIPATH_E_RBADTID)
@@ -767,12 +816,12 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
767 strlcat(buf, "rhdr ", blen); 816 strlcat(buf, "rhdr ", blen);
768 if (err & INFINIPATH_E_RLONGPKTLEN) 817 if (err & INFINIPATH_E_RLONGPKTLEN)
769 strlcat(buf, "rlongpktlen ", blen); 818 strlcat(buf, "rlongpktlen ", blen);
770 if (err & INFINIPATH_E_RSHORTPKTLEN)
771 strlcat(buf, "rshortpktlen ", blen);
772 if (err & INFINIPATH_E_RMAXPKTLEN) 819 if (err & INFINIPATH_E_RMAXPKTLEN)
773 strlcat(buf, "rmaxpktlen ", blen); 820 strlcat(buf, "rmaxpktlen ", blen);
774 if (err & INFINIPATH_E_RMINPKTLEN) 821 if (err & INFINIPATH_E_RMINPKTLEN)
775 strlcat(buf, "rminpktlen ", blen); 822 strlcat(buf, "rminpktlen ", blen);
823 if (err & INFINIPATH_E_SMINPKTLEN)
824 strlcat(buf, "sminpktlen ", blen);
776 if (err & INFINIPATH_E_RFORMATERR) 825 if (err & INFINIPATH_E_RFORMATERR)
777 strlcat(buf, "rformaterr ", blen); 826 strlcat(buf, "rformaterr ", blen);
778 if (err & INFINIPATH_E_RUNSUPVL) 827 if (err & INFINIPATH_E_RUNSUPVL)
@@ -781,32 +830,20 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
781 strlcat(buf, "runexpchar ", blen); 830 strlcat(buf, "runexpchar ", blen);
782 if (err & INFINIPATH_E_RIBFLOW) 831 if (err & INFINIPATH_E_RIBFLOW)
783 strlcat(buf, "ribflow ", blen); 832 strlcat(buf, "ribflow ", blen);
784 if (err & INFINIPATH_E_REBP)
785 strlcat(buf, "EBP ", blen);
786 if (err & INFINIPATH_E_SUNDERRUN) 833 if (err & INFINIPATH_E_SUNDERRUN)
787 strlcat(buf, "sunderrun ", blen); 834 strlcat(buf, "sunderrun ", blen);
788 if (err & INFINIPATH_E_SPIOARMLAUNCH) 835 if (err & INFINIPATH_E_SPIOARMLAUNCH)
789 strlcat(buf, "spioarmlaunch ", blen); 836 strlcat(buf, "spioarmlaunch ", blen);
790 if (err & INFINIPATH_E_SUNEXPERRPKTNUM) 837 if (err & INFINIPATH_E_SUNEXPERRPKTNUM)
791 strlcat(buf, "sunexperrpktnum ", blen); 838 strlcat(buf, "sunexperrpktnum ", blen);
792 if (err & INFINIPATH_E_SDROPPEDDATAPKT)
793 strlcat(buf, "sdroppeddatapkt ", blen);
794 if (err & INFINIPATH_E_SDROPPEDSMPPKT) 839 if (err & INFINIPATH_E_SDROPPEDSMPPKT)
795 strlcat(buf, "sdroppedsmppkt ", blen); 840 strlcat(buf, "sdroppedsmppkt ", blen);
796 if (err & INFINIPATH_E_SMAXPKTLEN) 841 if (err & INFINIPATH_E_SMAXPKTLEN)
797 strlcat(buf, "smaxpktlen ", blen); 842 strlcat(buf, "smaxpktlen ", blen);
798 if (err & INFINIPATH_E_SMINPKTLEN)
799 strlcat(buf, "sminpktlen ", blen);
800 if (err & INFINIPATH_E_SUNSUPVL) 843 if (err & INFINIPATH_E_SUNSUPVL)
801 strlcat(buf, "sunsupVL ", blen); 844 strlcat(buf, "sunsupVL ", blen);
802 if (err & INFINIPATH_E_SPKTLEN)
803 strlcat(buf, "spktlen ", blen);
804 if (err & INFINIPATH_E_INVALIDADDR) 845 if (err & INFINIPATH_E_INVALIDADDR)
805 strlcat(buf, "invalidaddr ", blen); 846 strlcat(buf, "invalidaddr ", blen);
806 if (err & INFINIPATH_E_RICRC)
807 strlcat(buf, "CRC ", blen);
808 if (err & INFINIPATH_E_RVCRC)
809 strlcat(buf, "VCRC ", blen);
810 if (err & INFINIPATH_E_RRCVEGRFULL) 847 if (err & INFINIPATH_E_RRCVEGRFULL)
811 strlcat(buf, "rcvegrfull ", blen); 848 strlcat(buf, "rcvegrfull ", blen);
812 if (err & INFINIPATH_E_RRCVHDRFULL) 849 if (err & INFINIPATH_E_RRCVHDRFULL)
@@ -819,6 +856,8 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
819 strlcat(buf, "hardware ", blen); 856 strlcat(buf, "hardware ", blen);
820 if (err & INFINIPATH_E_RESET) 857 if (err & INFINIPATH_E_RESET)
821 strlcat(buf, "reset ", blen); 858 strlcat(buf, "reset ", blen);
859done:
860 return iserr;
822} 861}
823 862
824/** 863/**
@@ -1662,6 +1701,22 @@ int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
1662 lstate = IPATH_LINKACTIVE; 1701 lstate = IPATH_LINKACTIVE;
1663 break; 1702 break;
1664 1703
1704 case IPATH_IB_LINK_LOOPBACK:
1705 dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n");
1706 dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK;
1707 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
1708 dd->ipath_ibcctrl);
1709 ret = 0;
1710 goto bail; // no state change to wait for
1711
1712 case IPATH_IB_LINK_EXTERNAL:
1713 dev_info(&dd->pcidev->dev, "Disabling IB local loopback (normal)\n");
1714 dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK;
1715 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
1716 dd->ipath_ibcctrl);
1717 ret = 0;
1718 goto bail; // no state change to wait for
1719
1665 default: 1720 default:
1666 ipath_dbg("Invalid linkstate 0x%x requested\n", newstate); 1721 ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
1667 ret = -EINVAL; 1722 ret = -EINVAL;
@@ -1765,29 +1820,6 @@ int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
1765 return 0; 1820 return 0;
1766} 1821}
1767 1822
1768/**
1769 * ipath_read_kreg64_port - read a device's per-port 64-bit kernel register
1770 * @dd: the infinipath device
1771 * @regno: the register number to read
1772 * @port: the port containing the register
1773 *
1774 * Registers that vary with the chip implementation constants (port)
1775 * use this routine.
1776 */
1777u64 ipath_read_kreg64_port(const struct ipath_devdata *dd, ipath_kreg regno,
1778 unsigned port)
1779{
1780 u16 where;
1781
1782 if (port < dd->ipath_portcnt &&
1783 (regno == dd->ipath_kregs->kr_rcvhdraddr ||
1784 regno == dd->ipath_kregs->kr_rcvhdrtailaddr))
1785 where = regno + port;
1786 else
1787 where = -1;
1788
1789 return ipath_read_kreg64(dd, where);
1790}
1791 1823
1792/** 1824/**
1793 * ipath_write_kreg_port - write a device's per-port 64-bit kernel register 1825 * ipath_write_kreg_port - write a device's per-port 64-bit kernel register
@@ -1973,7 +2005,8 @@ static int __init infinipath_init(void)
1973{ 2005{
1974 int ret; 2006 int ret;
1975 2007
1976 ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version); 2008 if (ipath_debug & __IPATH_DBG)
2009 printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
1977 2010
1978 /* 2011 /*
1979 * These must be called before the driver is registered with 2012 * These must be called before the driver is registered with
diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c
index a4019a6b7560..030185f90ee2 100644
--- a/drivers/infiniband/hw/ipath/ipath_eeprom.c
+++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c
@@ -626,6 +626,10 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
626 } else 626 } else
627 memcpy(dd->ipath_serial, ifp->if_serial, 627 memcpy(dd->ipath_serial, ifp->if_serial,
628 sizeof ifp->if_serial); 628 sizeof ifp->if_serial);
629 if (!strstr(ifp->if_comment, "Tested successfully"))
630 ipath_dev_err(dd, "Board SN %s did not pass functional "
631 "test: %s\n", dd->ipath_serial,
632 ifp->if_comment);
629 633
630 ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n", 634 ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
631 (unsigned long long) be64_to_cpu(dd->ipath_guid)); 635 (unsigned long long) be64_to_cpu(dd->ipath_guid));
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 5d64ff875297..1272aaf2a785 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved. 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -41,12 +41,6 @@
41#include "ipath_kernel.h" 41#include "ipath_kernel.h"
42#include "ipath_common.h" 42#include "ipath_common.h"
43 43
44/*
45 * mmap64 doesn't allow all 64 bits for 32-bit applications
46 * so only use the low 43 bits.
47 */
48#define MMAP64_MASK 0x7FFFFFFFFFFUL
49
50static int ipath_open(struct inode *, struct file *); 44static int ipath_open(struct inode *, struct file *);
51static int ipath_close(struct inode *, struct file *); 45static int ipath_close(struct inode *, struct file *);
52static ssize_t ipath_write(struct file *, const char __user *, size_t, 46static ssize_t ipath_write(struct file *, const char __user *, size_t,
@@ -63,6 +57,24 @@ static const struct file_operations ipath_file_ops = {
63 .mmap = ipath_mmap 57 .mmap = ipath_mmap
64}; 58};
65 59
60/*
61 * Convert kernel virtual addresses to physical addresses so they don't
62 * potentially conflict with the chip addresses used as mmap offsets.
63 * It doesn't really matter what mmap offset we use as long as we can
64 * interpret it correctly.
65 */
66static u64 cvt_kvaddr(void *p)
67{
68 struct page *page;
69 u64 paddr = 0;
70
71 page = vmalloc_to_page(p);
72 if (page)
73 paddr = page_to_pfn(page) << PAGE_SHIFT;
74
75 return paddr;
76}
77
66static int ipath_get_base_info(struct file *fp, 78static int ipath_get_base_info(struct file *fp,
67 void __user *ubase, size_t ubase_size) 79 void __user *ubase, size_t ubase_size)
68{ 80{
@@ -87,7 +99,7 @@ static int ipath_get_base_info(struct file *fp,
87 sz = sizeof(*kinfo); 99 sz = sizeof(*kinfo);
88 /* If port sharing is not requested, allow the old size structure */ 100 /* If port sharing is not requested, allow the old size structure */
89 if (!shared) 101 if (!shared)
90 sz -= 3 * sizeof(u64); 102 sz -= 7 * sizeof(u64);
91 if (ubase_size < sz) { 103 if (ubase_size < sz) {
92 ipath_cdbg(PROC, 104 ipath_cdbg(PROC,
93 "Base size %zu, need %zu (version mismatch?)\n", 105 "Base size %zu, need %zu (version mismatch?)\n",
@@ -165,24 +177,41 @@ static int ipath_get_base_info(struct file *fp,
165 kinfo->spi_piobufbase = (u64) pd->port_piobufs + 177 kinfo->spi_piobufbase = (u64) pd->port_piobufs +
166 dd->ipath_palign * 178 dd->ipath_palign *
167 (dd->ipath_pbufsport - kinfo->spi_piocnt); 179 (dd->ipath_pbufsport - kinfo->spi_piocnt);
168 kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
169 dd->ipath_palign * pd->port_port;
170 } else { 180 } else {
171 unsigned slave = subport_fp(fp) - 1; 181 unsigned slave = subport_fp(fp) - 1;
172 182
173 kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt; 183 kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt;
174 kinfo->spi_piobufbase = (u64) pd->port_piobufs + 184 kinfo->spi_piobufbase = (u64) pd->port_piobufs +
175 dd->ipath_palign * kinfo->spi_piocnt * slave; 185 dd->ipath_palign * kinfo->spi_piocnt * slave;
176 kinfo->__spi_uregbase = ((u64) pd->subport_uregbase + 186 }
177 PAGE_SIZE * slave) & MMAP64_MASK; 187 if (shared) {
188 kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase +
189 dd->ipath_palign * pd->port_port;
190 kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs;
191 kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base;
192 kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr;
178 193
179 kinfo->spi_rcvhdr_base = ((u64) pd->subport_rcvhdr_base + 194 kinfo->__spi_uregbase = cvt_kvaddr(pd->subport_uregbase +
180 pd->port_rcvhdrq_size * slave) & MMAP64_MASK; 195 PAGE_SIZE * subport_fp(fp));
181 kinfo->spi_rcvhdr_tailaddr = 196
182 (u64) pd->port_rcvhdrqtailaddr_phys & MMAP64_MASK; 197 kinfo->spi_rcvhdr_base = cvt_kvaddr(pd->subport_rcvhdr_base +
183 kinfo->spi_rcv_egrbufs = ((u64) pd->subport_rcvegrbuf + 198 pd->port_rcvhdrq_size * subport_fp(fp));
184 dd->ipath_rcvegrcnt * dd->ipath_rcvegrbufsize * slave) & 199 kinfo->spi_rcvhdr_tailaddr = 0;
185 MMAP64_MASK; 200 kinfo->spi_rcv_egrbufs = cvt_kvaddr(pd->subport_rcvegrbuf +
201 pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size *
202 subport_fp(fp));
203
204 kinfo->spi_subport_uregbase =
205 cvt_kvaddr(pd->subport_uregbase);
206 kinfo->spi_subport_rcvegrbuf =
207 cvt_kvaddr(pd->subport_rcvegrbuf);
208 kinfo->spi_subport_rcvhdr_base =
209 cvt_kvaddr(pd->subport_rcvhdr_base);
210 ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
211 kinfo->spi_port, kinfo->spi_runtime_flags,
212 (unsigned long long) kinfo->spi_subport_uregbase,
213 (unsigned long long) kinfo->spi_subport_rcvegrbuf,
214 (unsigned long long) kinfo->spi_subport_rcvhdr_base);
186 } 215 }
187 216
188 kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) / 217 kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) /
@@ -199,20 +228,10 @@ static int ipath_get_base_info(struct file *fp,
199 228
200 if (master) { 229 if (master) {
201 kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER; 230 kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER;
202 kinfo->spi_subport_uregbase =
203 (u64) pd->subport_uregbase & MMAP64_MASK;
204 kinfo->spi_subport_rcvegrbuf =
205 (u64) pd->subport_rcvegrbuf & MMAP64_MASK;
206 kinfo->spi_subport_rcvhdr_base =
207 (u64) pd->subport_rcvhdr_base & MMAP64_MASK;
208 ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
209 kinfo->spi_port, kinfo->spi_runtime_flags,
210 (unsigned long long) kinfo->spi_subport_uregbase,
211 (unsigned long long) kinfo->spi_subport_rcvegrbuf,
212 (unsigned long long) kinfo->spi_subport_rcvhdr_base);
213 } 231 }
214 232
215 if (copy_to_user(ubase, kinfo, sizeof(*kinfo))) 233 sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo);
234 if (copy_to_user(ubase, kinfo, sz))
216 ret = -EFAULT; 235 ret = -EFAULT;
217 236
218bail: 237bail:
@@ -1132,67 +1151,55 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
1132 struct ipath_devdata *dd; 1151 struct ipath_devdata *dd;
1133 void *addr; 1152 void *addr;
1134 size_t size; 1153 size_t size;
1135 int ret; 1154 int ret = 0;
1136 1155
1137 /* If the port is not shared, all addresses should be physical */ 1156 /* If the port is not shared, all addresses should be physical */
1138 if (!pd->port_subport_cnt) { 1157 if (!pd->port_subport_cnt)
1139 ret = -EINVAL;
1140 goto bail; 1158 goto bail;
1141 }
1142 1159
1143 dd = pd->port_dd; 1160 dd = pd->port_dd;
1144 size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size; 1161 size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
1145 1162
1146 /* 1163 /*
1147 * Master has all the slave uregbase, rcvhdrq, and 1164 * Each process has all the subport uregbase, rcvhdrq, and
1148 * rcvegrbufs mmapped. 1165 * rcvegrbufs mmapped - as an array for all the processes,
1166 * and also separately for this process.
1149 */ 1167 */
1150 if (subport == 0) { 1168 if (pgaddr == cvt_kvaddr(pd->subport_uregbase)) {
1151 unsigned num_slaves = pd->port_subport_cnt - 1; 1169 addr = pd->subport_uregbase;
1152 1170 size = PAGE_SIZE * pd->port_subport_cnt;
1153 if (pgaddr == ((u64) pd->subport_uregbase & MMAP64_MASK)) { 1171 } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base)) {
1154 addr = pd->subport_uregbase; 1172 addr = pd->subport_rcvhdr_base;
1155 size = PAGE_SIZE * num_slaves; 1173 size = pd->port_rcvhdrq_size * pd->port_subport_cnt;
1156 } else if (pgaddr == ((u64) pd->subport_rcvhdr_base & 1174 } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf)) {
1157 MMAP64_MASK)) { 1175 addr = pd->subport_rcvegrbuf;
1158 addr = pd->subport_rcvhdr_base; 1176 size *= pd->port_subport_cnt;
1159 size = pd->port_rcvhdrq_size * num_slaves; 1177 } else if (pgaddr == cvt_kvaddr(pd->subport_uregbase +
1160 } else if (pgaddr == ((u64) pd->subport_rcvegrbuf & 1178 PAGE_SIZE * subport)) {
1161 MMAP64_MASK)) { 1179 addr = pd->subport_uregbase + PAGE_SIZE * subport;
1162 addr = pd->subport_rcvegrbuf; 1180 size = PAGE_SIZE;
1163 size *= num_slaves; 1181 } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base +
1164 } else { 1182 pd->port_rcvhdrq_size * subport)) {
1165 ret = -EINVAL; 1183 addr = pd->subport_rcvhdr_base +
1166 goto bail; 1184 pd->port_rcvhdrq_size * subport;
1167 } 1185 size = pd->port_rcvhdrq_size;
1168 } else if (pgaddr == (((u64) pd->subport_uregbase + 1186 } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf +
1169 PAGE_SIZE * (subport - 1)) & MMAP64_MASK)) { 1187 size * subport)) {
1170 addr = pd->subport_uregbase + PAGE_SIZE * (subport - 1); 1188 addr = pd->subport_rcvegrbuf + size * subport;
1171 size = PAGE_SIZE; 1189 /* rcvegrbufs are read-only on the slave */
1172 } else if (pgaddr == (((u64) pd->subport_rcvhdr_base + 1190 if (vma->vm_flags & VM_WRITE) {
1173 pd->port_rcvhdrq_size * (subport - 1)) & 1191 dev_info(&dd->pcidev->dev,
1174 MMAP64_MASK)) { 1192 "Can't map eager buffers as "
1175 addr = pd->subport_rcvhdr_base + 1193 "writable (flags=%lx)\n", vma->vm_flags);
1176 pd->port_rcvhdrq_size * (subport - 1); 1194 ret = -EPERM;
1177 size = pd->port_rcvhdrq_size; 1195 goto bail;
1178 } else if (pgaddr == (((u64) pd->subport_rcvegrbuf + 1196 }
1179 size * (subport - 1)) & MMAP64_MASK)) { 1197 /*
1180 addr = pd->subport_rcvegrbuf + size * (subport - 1); 1198 * Don't allow permission to later change to writeable
1181 /* rcvegrbufs are read-only on the slave */ 1199 * with mprotect.
1182 if (vma->vm_flags & VM_WRITE) { 1200 */
1183 dev_info(&dd->pcidev->dev, 1201 vma->vm_flags &= ~VM_MAYWRITE;
1184 "Can't map eager buffers as "
1185 "writable (flags=%lx)\n", vma->vm_flags);
1186 ret = -EPERM;
1187 goto bail;
1188 }
1189 /*
1190 * Don't allow permission to later change to writeable
1191 * with mprotect.
1192 */
1193 vma->vm_flags &= ~VM_MAYWRITE;
1194 } else { 1202 } else {
1195 ret = -EINVAL;
1196 goto bail; 1203 goto bail;
1197 } 1204 }
1198 len = vma->vm_end - vma->vm_start; 1205 len = vma->vm_end - vma->vm_start;
@@ -1205,7 +1212,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
1205 vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT; 1212 vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
1206 vma->vm_ops = &ipath_file_vm_ops; 1213 vma->vm_ops = &ipath_file_vm_ops;
1207 vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND; 1214 vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
1208 ret = 0; 1215 ret = 1;
1209 1216
1210bail: 1217bail:
1211 return ret; 1218 return ret;
@@ -1265,19 +1272,20 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
1265 * Check for kernel virtual addresses first, anything else must 1272 * Check for kernel virtual addresses first, anything else must
1266 * match a HW or memory address. 1273 * match a HW or memory address.
1267 */ 1274 */
1268 if (pgaddr >= (1ULL<<40)) { 1275 ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp));
1269 ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp)); 1276 if (ret) {
1277 if (ret > 0)
1278 ret = 0;
1270 goto bail; 1279 goto bail;
1271 } 1280 }
1272 1281
1282 ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
1273 if (!pd->port_subport_cnt) { 1283 if (!pd->port_subport_cnt) {
1274 /* port is not shared */ 1284 /* port is not shared */
1275 ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
1276 piocnt = dd->ipath_pbufsport; 1285 piocnt = dd->ipath_pbufsport;
1277 piobufs = pd->port_piobufs; 1286 piobufs = pd->port_piobufs;
1278 } else if (!subport_fp(fp)) { 1287 } else if (!subport_fp(fp)) {
1279 /* caller is the master */ 1288 /* caller is the master */
1280 ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
1281 piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) + 1289 piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) +
1282 (dd->ipath_pbufsport % pd->port_subport_cnt); 1290 (dd->ipath_pbufsport % pd->port_subport_cnt);
1283 piobufs = pd->port_piobufs + 1291 piobufs = pd->port_piobufs +
@@ -1286,7 +1294,6 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
1286 unsigned slave = subport_fp(fp) - 1; 1294 unsigned slave = subport_fp(fp) - 1;
1287 1295
1288 /* caller is a slave */ 1296 /* caller is a slave */
1289 ureg = 0;
1290 piocnt = dd->ipath_pbufsport / pd->port_subport_cnt; 1297 piocnt = dd->ipath_pbufsport / pd->port_subport_cnt;
1291 piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave; 1298 piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
1292 } 1299 }
@@ -1300,9 +1307,6 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
1300 ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, 1307 ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
1301 (void *) dd->ipath_pioavailregs_dma, 1308 (void *) dd->ipath_pioavailregs_dma,
1302 "pioavail registers"); 1309 "pioavail registers");
1303 else if (subport_fp(fp))
1304 /* Subports don't mmap the physical receive buffers */
1305 ret = -EINVAL;
1306 else if (pgaddr == pd->port_rcvegr_phys) 1310 else if (pgaddr == pd->port_rcvegr_phys)
1307 ret = mmap_rcvegrbufs(vma, pd); 1311 ret = mmap_rcvegrbufs(vma, pd);
1308 else if (pgaddr == (u64) pd->port_rcvhdrq_phys) 1312 else if (pgaddr == (u64) pd->port_rcvhdrq_phys)
@@ -1400,32 +1404,41 @@ static int init_subports(struct ipath_devdata *dd,
1400 const struct ipath_user_info *uinfo) 1404 const struct ipath_user_info *uinfo)
1401{ 1405{
1402 int ret = 0; 1406 int ret = 0;
1403 unsigned num_slaves; 1407 unsigned num_subports;
1404 size_t size; 1408 size_t size;
1405 1409
1406 /* Old user binaries don't know about subports */
1407 if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR)
1408 goto bail;
1409 /* 1410 /*
1410 * If the user is requesting zero or one port, 1411 * If the user is requesting zero or one port,
1411 * skip the subport allocation. 1412 * skip the subport allocation.
1412 */ 1413 */
1413 if (uinfo->spu_subport_cnt <= 1) 1414 if (uinfo->spu_subport_cnt <= 1)
1414 goto bail; 1415 goto bail;
1415 if (uinfo->spu_subport_cnt > 4) { 1416
1417 /* Old user binaries don't know about new subport implementation */
1418 if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) {
1419 dev_info(&dd->pcidev->dev,
1420 "Mismatched user minor version (%d) and driver "
1421 "minor version (%d) while port sharing. Ensure "
1422 "that driver and library are from the same "
1423 "release.\n",
1424 (int) (uinfo->spu_userversion & 0xffff),
1425 IPATH_USER_SWMINOR);
1426 goto bail;
1427 }
1428 if (uinfo->spu_subport_cnt > INFINIPATH_MAX_SUBPORT) {
1416 ret = -EINVAL; 1429 ret = -EINVAL;
1417 goto bail; 1430 goto bail;
1418 } 1431 }
1419 1432
1420 num_slaves = uinfo->spu_subport_cnt - 1; 1433 num_subports = uinfo->spu_subport_cnt;
1421 pd->subport_uregbase = vmalloc(PAGE_SIZE * num_slaves); 1434 pd->subport_uregbase = vmalloc(PAGE_SIZE * num_subports);
1422 if (!pd->subport_uregbase) { 1435 if (!pd->subport_uregbase) {
1423 ret = -ENOMEM; 1436 ret = -ENOMEM;
1424 goto bail; 1437 goto bail;
1425 } 1438 }
1426 /* Note: pd->port_rcvhdrq_size isn't initialized yet. */ 1439 /* Note: pd->port_rcvhdrq_size isn't initialized yet. */
1427 size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize * 1440 size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
1428 sizeof(u32), PAGE_SIZE) * num_slaves; 1441 sizeof(u32), PAGE_SIZE) * num_subports;
1429 pd->subport_rcvhdr_base = vmalloc(size); 1442 pd->subport_rcvhdr_base = vmalloc(size);
1430 if (!pd->subport_rcvhdr_base) { 1443 if (!pd->subport_rcvhdr_base) {
1431 ret = -ENOMEM; 1444 ret = -ENOMEM;
@@ -1434,7 +1447,7 @@ static int init_subports(struct ipath_devdata *dd,
1434 1447
1435 pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks * 1448 pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks *
1436 pd->port_rcvegrbuf_size * 1449 pd->port_rcvegrbuf_size *
1437 num_slaves); 1450 num_subports);
1438 if (!pd->subport_rcvegrbuf) { 1451 if (!pd->subport_rcvegrbuf) {
1439 ret = -ENOMEM; 1452 ret = -ENOMEM;
1440 goto bail_rhdr; 1453 goto bail_rhdr;
@@ -1443,6 +1456,12 @@ static int init_subports(struct ipath_devdata *dd,
1443 pd->port_subport_cnt = uinfo->spu_subport_cnt; 1456 pd->port_subport_cnt = uinfo->spu_subport_cnt;
1444 pd->port_subport_id = uinfo->spu_subport_id; 1457 pd->port_subport_id = uinfo->spu_subport_id;
1445 pd->active_slaves = 1; 1458 pd->active_slaves = 1;
1459 set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
1460 memset(pd->subport_uregbase, 0, PAGE_SIZE * num_subports);
1461 memset(pd->subport_rcvhdr_base, 0, size);
1462 memset(pd->subport_rcvegrbuf, 0, pd->port_rcvegrbuf_chunks *
1463 pd->port_rcvegrbuf_size *
1464 num_subports);
1446 goto bail; 1465 goto bail;
1447 1466
1448bail_rhdr: 1467bail_rhdr:
@@ -1573,18 +1592,19 @@ static int find_best_unit(struct file *fp,
1573 */ 1592 */
1574 if (!cpus_empty(current->cpus_allowed) && 1593 if (!cpus_empty(current->cpus_allowed) &&
1575 !cpus_full(current->cpus_allowed)) { 1594 !cpus_full(current->cpus_allowed)) {
1576 int ncpus = num_online_cpus(), curcpu = -1; 1595 int ncpus = num_online_cpus(), curcpu = -1, nset = 0;
1577 for (i = 0; i < ncpus; i++) 1596 for (i = 0; i < ncpus; i++)
1578 if (cpu_isset(i, current->cpus_allowed)) { 1597 if (cpu_isset(i, current->cpus_allowed)) {
1579 ipath_cdbg(PROC, "%s[%u] affinity set for " 1598 ipath_cdbg(PROC, "%s[%u] affinity set for "
1580 "cpu %d\n", current->comm, 1599 "cpu %d/%d\n", current->comm,
1581 current->pid, i); 1600 current->pid, i, ncpus);
1582 curcpu = i; 1601 curcpu = i;
1602 nset++;
1583 } 1603 }
1584 if (curcpu != -1) { 1604 if (curcpu != -1 && nset != ncpus) {
1585 if (npresent) { 1605 if (npresent) {
1586 prefunit = curcpu / (ncpus / npresent); 1606 prefunit = curcpu / (ncpus / npresent);
1587 ipath_dbg("%s[%u] %d chips, %d cpus, " 1607 ipath_cdbg(PROC,"%s[%u] %d chips, %d cpus, "
1588 "%d cpus/chip, select unit %d\n", 1608 "%d cpus/chip, select unit %d\n",
1589 current->comm, current->pid, 1609 current->comm, current->pid,
1590 npresent, ncpus, ncpus / npresent, 1610 npresent, ncpus, ncpus / npresent,
@@ -1764,11 +1784,17 @@ static int ipath_do_user_init(struct file *fp,
1764 const struct ipath_user_info *uinfo) 1784 const struct ipath_user_info *uinfo)
1765{ 1785{
1766 int ret; 1786 int ret;
1767 struct ipath_portdata *pd; 1787 struct ipath_portdata *pd = port_fp(fp);
1768 struct ipath_devdata *dd; 1788 struct ipath_devdata *dd;
1769 u32 head32; 1789 u32 head32;
1770 1790
1771 pd = port_fp(fp); 1791 /* Subports don't need to initialize anything since master did it. */
1792 if (subport_fp(fp)) {
1793 ret = wait_event_interruptible(pd->port_wait,
1794 !test_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag));
1795 goto done;
1796 }
1797
1772 dd = pd->port_dd; 1798 dd = pd->port_dd;
1773 1799
1774 if (uinfo->spu_rcvhdrsize) { 1800 if (uinfo->spu_rcvhdrsize) {
@@ -1826,6 +1852,11 @@ static int ipath_do_user_init(struct file *fp,
1826 dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD); 1852 dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD);
1827 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 1853 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
1828 dd->ipath_rcvctrl); 1854 dd->ipath_rcvctrl);
1855 /* Notify any waiting slaves */
1856 if (pd->port_subport_cnt) {
1857 clear_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
1858 wake_up(&pd->port_wait);
1859 }
1829done: 1860done:
1830 return ret; 1861 return ret;
1831} 1862}
@@ -2017,6 +2048,17 @@ static int ipath_get_slave_info(struct ipath_portdata *pd,
2017 return ret; 2048 return ret;
2018} 2049}
2019 2050
2051static int ipath_force_pio_avail_update(struct ipath_devdata *dd)
2052{
2053 u64 reg = dd->ipath_sendctrl;
2054
2055 clear_bit(IPATH_S_PIOBUFAVAILUPD, &reg);
2056 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, reg);
2057 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
2058
2059 return 0;
2060}
2061
2020static ssize_t ipath_write(struct file *fp, const char __user *data, 2062static ssize_t ipath_write(struct file *fp, const char __user *data,
2021 size_t count, loff_t *off) 2063 size_t count, loff_t *off)
2022{ 2064{
@@ -2071,27 +2113,35 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
2071 dest = &cmd.cmd.part_key; 2113 dest = &cmd.cmd.part_key;
2072 src = &ucmd->cmd.part_key; 2114 src = &ucmd->cmd.part_key;
2073 break; 2115 break;
2074 case IPATH_CMD_SLAVE_INFO: 2116 case __IPATH_CMD_SLAVE_INFO:
2075 copy = sizeof(cmd.cmd.slave_mask_addr); 2117 copy = sizeof(cmd.cmd.slave_mask_addr);
2076 dest = &cmd.cmd.slave_mask_addr; 2118 dest = &cmd.cmd.slave_mask_addr;
2077 src = &ucmd->cmd.slave_mask_addr; 2119 src = &ucmd->cmd.slave_mask_addr;
2078 break; 2120 break;
2121 case IPATH_CMD_PIOAVAILUPD: // force an update of PIOAvail reg
2122 copy = 0;
2123 src = NULL;
2124 dest = NULL;
2125 break;
2079 default: 2126 default:
2080 ret = -EINVAL; 2127 ret = -EINVAL;
2081 goto bail; 2128 goto bail;
2082 } 2129 }
2083 2130
2084 if ((count - consumed) < copy) { 2131 if (copy) {
2085 ret = -EINVAL; 2132 if ((count - consumed) < copy) {
2086 goto bail; 2133 ret = -EINVAL;
2087 } 2134 goto bail;
2135 }
2088 2136
2089 if (copy_from_user(dest, src, copy)) { 2137 if (copy_from_user(dest, src, copy)) {
2090 ret = -EFAULT; 2138 ret = -EFAULT;
2091 goto bail; 2139 goto bail;
2140 }
2141
2142 consumed += copy;
2092 } 2143 }
2093 2144
2094 consumed += copy;
2095 pd = port_fp(fp); 2145 pd = port_fp(fp);
2096 if (!pd && cmd.type != __IPATH_CMD_USER_INIT && 2146 if (!pd && cmd.type != __IPATH_CMD_USER_INIT &&
2097 cmd.type != IPATH_CMD_ASSIGN_PORT) { 2147 cmd.type != IPATH_CMD_ASSIGN_PORT) {
@@ -2137,11 +2187,14 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
2137 case IPATH_CMD_SET_PART_KEY: 2187 case IPATH_CMD_SET_PART_KEY:
2138 ret = ipath_set_part_key(pd, cmd.cmd.part_key); 2188 ret = ipath_set_part_key(pd, cmd.cmd.part_key);
2139 break; 2189 break;
2140 case IPATH_CMD_SLAVE_INFO: 2190 case __IPATH_CMD_SLAVE_INFO:
2141 ret = ipath_get_slave_info(pd, 2191 ret = ipath_get_slave_info(pd,
2142 (void __user *) (unsigned long) 2192 (void __user *) (unsigned long)
2143 cmd.cmd.slave_mask_addr); 2193 cmd.cmd.slave_mask_addr);
2144 break; 2194 break;
2195 case IPATH_CMD_PIOAVAILUPD:
2196 ret = ipath_force_pio_avail_update(pd->port_dd);
2197 break;
2145 } 2198 }
2146 2199
2147 if (ret >= 0) 2200 if (ret >= 0)
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index 5b40a846ff95..ed55979bfd34 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -451,12 +451,18 @@ bail:
451 return ret; 451 return ret;
452} 452}
453 453
454static void remove_file(struct dentry *parent, char *name) 454static int remove_file(struct dentry *parent, char *name)
455{ 455{
456 struct dentry *tmp; 456 struct dentry *tmp;
457 int ret;
457 458
458 tmp = lookup_one_len(name, parent, strlen(name)); 459 tmp = lookup_one_len(name, parent, strlen(name));
459 460
461 if (IS_ERR(tmp)) {
462 ret = PTR_ERR(tmp);
463 goto bail;
464 }
465
460 spin_lock(&dcache_lock); 466 spin_lock(&dcache_lock);
461 spin_lock(&tmp->d_lock); 467 spin_lock(&tmp->d_lock);
462 if (!(d_unhashed(tmp) && tmp->d_inode)) { 468 if (!(d_unhashed(tmp) && tmp->d_inode)) {
@@ -469,6 +475,14 @@ static void remove_file(struct dentry *parent, char *name)
469 spin_unlock(&tmp->d_lock); 475 spin_unlock(&tmp->d_lock);
470 spin_unlock(&dcache_lock); 476 spin_unlock(&dcache_lock);
471 } 477 }
478
479 ret = 0;
480bail:
481 /*
482 * We don't expect clients to care about the return value, but
483 * it's there if they need it.
484 */
485 return ret;
472} 486}
473 487
474static int remove_device_files(struct super_block *sb, 488static int remove_device_files(struct super_block *sb,
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 7468477ba837..4171198fc202 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -43,6 +43,9 @@
43#include "ipath_kernel.h" 43#include "ipath_kernel.h"
44#include "ipath_registers.h" 44#include "ipath_registers.h"
45 45
46static void ipath_setup_ht_setextled(struct ipath_devdata *, u64, u64);
47
48
46/* 49/*
47 * This lists the InfiniPath registers, in the actual chip layout. 50 * This lists the InfiniPath registers, in the actual chip layout.
48 * This structure should never be directly accessed. 51 * This structure should never be directly accessed.
@@ -208,8 +211,8 @@ static const struct ipath_kregs ipath_ht_kregs = {
208 .kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus), 211 .kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus),
209 .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig), 212 .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig),
210 /* 213 /*
211 * These should not be used directly via ipath_read_kreg64(), 214 * These should not be used directly via ipath_write_kreg64(),
212 * use them with ipath_read_kreg64_port(), 215 * use them with ipath_write_kreg64_port(),
213 */ 216 */
214 .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0), 217 .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
215 .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0) 218 .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0)
@@ -284,6 +287,14 @@ static const struct ipath_cregs ipath_ht_cregs = {
284#define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000 287#define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000
285#define INFINIPATH_EXTS_MEMBIST_CORRECT 0x0000000000008000 288#define INFINIPATH_EXTS_MEMBIST_CORRECT 0x0000000000008000
286 289
290
291/* TID entries (memory), HT-only */
292#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */
293#define INFINIPATH_RT_VALID 0x8000000000000000ULL
294#define INFINIPATH_RT_ADDR_SHIFT 0
295#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFFULL
296#define INFINIPATH_RT_BUFSIZE_SHIFT 48
297
287/* 298/*
288 * masks and bits that are different in different chips, or present only 299 * masks and bits that are different in different chips, or present only
289 * in one 300 * in one
@@ -402,6 +413,14 @@ static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = {
402 INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), 413 INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
403}; 414};
404 415
416#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
417 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
418 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
419#define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \
420 << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)
421
422static int ipath_ht_txe_recover(struct ipath_devdata *);
423
405/** 424/**
406 * ipath_ht_handle_hwerrors - display hardware errors. 425 * ipath_ht_handle_hwerrors - display hardware errors.
407 * @dd: the infinipath device 426 * @dd: the infinipath device
@@ -450,13 +469,12 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
450 469
451 /* 470 /*
452 * make sure we get this much out, unless told to be quiet, 471 * make sure we get this much out, unless told to be quiet,
472 * it's a parity error we may recover from,
453 * or it's occurred within the last 5 seconds 473 * or it's occurred within the last 5 seconds
454 */ 474 */
455 if ((hwerrs & ~(dd->ipath_lasthwerror | 475 if ((hwerrs & ~(dd->ipath_lasthwerror | TXE_PIO_PARITY |
456 ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | 476 RXE_EAGER_PARITY)) ||
457 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) 477 (ipath_debug & __IPATH_VERBDBG))
458 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
459 (ipath_debug & __IPATH_VERBDBG))
460 dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx " 478 dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
461 "(cleared)\n", (unsigned long long) hwerrs); 479 "(cleared)\n", (unsigned long long) hwerrs);
462 dd->ipath_lasthwerror |= hwerrs; 480 dd->ipath_lasthwerror |= hwerrs;
@@ -467,7 +485,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
467 (hwerrs & ~dd->ipath_hwe_bitsextant)); 485 (hwerrs & ~dd->ipath_hwe_bitsextant));
468 486
469 ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); 487 ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
470 if (ctrl & INFINIPATH_C_FREEZEMODE) { 488 if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
471 /* 489 /*
472 * parity errors in send memory are recoverable, 490 * parity errors in send memory are recoverable,
473 * just cancel the send (if indicated in * sendbuffererror), 491 * just cancel the send (if indicated in * sendbuffererror),
@@ -476,50 +494,14 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
476 * occur if a processor speculative read is done to the PIO 494 * occur if a processor speculative read is done to the PIO
477 * buffer while we are sending a packet, for example. 495 * buffer while we are sending a packet, for example.
478 */ 496 */
479 if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | 497 if ((hwerrs & TXE_PIO_PARITY) && ipath_ht_txe_recover(dd))
480 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) 498 hwerrs &= ~TXE_PIO_PARITY;
481 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) { 499 if (hwerrs & RXE_EAGER_PARITY)
482 ipath_stats.sps_txeparity++; 500 ipath_dev_err(dd, "RXE parity, Eager TID error is not "
483 ipath_dbg("Recovering from TXE parity error (%llu), " 501 "recoverable\n");
484 "hwerrstatus=%llx\n", 502 if (!hwerrs) {
485 (unsigned long long) ipath_stats.sps_txeparity, 503 ipath_dbg("Clearing freezemode on ignored or "
486 (unsigned long long) hwerrs); 504 "recovered hardware error\n");
487 ipath_disarm_senderrbufs(dd);
488 hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
489 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
490 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
491 if (!hwerrs) { /* else leave in freeze mode */
492 ipath_write_kreg(dd,
493 dd->ipath_kregs->kr_control,
494 dd->ipath_control);
495 return;
496 }
497 }
498 if (hwerrs) {
499 /*
500 * if any set that we aren't ignoring; only
501 * make the complaint once, in case it's stuck
502 * or recurring, and we get here multiple
503 * times.
504 */
505 if (dd->ipath_flags & IPATH_INITTED) {
506 ipath_dev_err(dd, "Fatal Hardware Error (freeze "
507 "mode), no longer usable, SN %.16s\n",
508 dd->ipath_serial);
509 isfatal = 1;
510 }
511 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
512 /* mark as having had error */
513 *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
514 /*
515 * mark as not usable, at a minimum until driver
516 * is reloaded, probably until reboot, since no
517 * other reset is possible.
518 */
519 dd->ipath_flags &= ~IPATH_INITTED;
520 } else {
521 ipath_dbg("Clearing freezemode on ignored hardware "
522 "error\n");
523 ctrl &= ~INFINIPATH_C_FREEZEMODE; 505 ctrl &= ~INFINIPATH_C_FREEZEMODE;
524 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 506 ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
525 ctrl); 507 ctrl);
@@ -587,7 +569,39 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
587 dd->ipath_hwerrmask); 569 dd->ipath_hwerrmask);
588 } 570 }
589 571
590 ipath_dev_err(dd, "%s hardware error\n", msg); 572 if (hwerrs) {
573 /*
574 * if any set that we aren't ignoring; only
575 * make the complaint once, in case it's stuck
576 * or recurring, and we get here multiple
577 * times.
578 * force link down, so switch knows, and
579 * LEDs are turned off
580 */
581 if (dd->ipath_flags & IPATH_INITTED) {
582 ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
583 ipath_setup_ht_setextled(dd,
584 INFINIPATH_IBCS_L_STATE_DOWN,
585 INFINIPATH_IBCS_LT_STATE_DISABLED);
586 ipath_dev_err(dd, "Fatal Hardware Error (freeze "
587 "mode), no longer usable, SN %.16s\n",
588 dd->ipath_serial);
589 isfatal = 1;
590 }
591 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
592 /* mark as having had error */
593 *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
594 /*
595 * mark as not usable, at a minimum until driver
596 * is reloaded, probably until reboot, since no
597 * other reset is possible.
598 */
599 dd->ipath_flags &= ~IPATH_INITTED;
600 }
601 else
602 *msg = 0; /* recovered from all of them */
603 if (*msg)
604 ipath_dev_err(dd, "%s hardware error\n", msg);
591 if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) 605 if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
592 /* 606 /*
593 * for status file; if no trailing brace is copied, 607 * for status file; if no trailing brace is copied,
@@ -658,7 +672,8 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
658 if (n) 672 if (n)
659 snprintf(name, namelen, "%s", n); 673 snprintf(name, namelen, "%s", n);
660 674
661 if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 3)) { 675 if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 ||
676 dd->ipath_minrev > 3)) {
662 /* 677 /*
663 * This version of the driver only supports Rev 3.2 and 3.3 678 * This version of the driver only supports Rev 3.2 and 3.3
664 */ 679 */
@@ -1163,6 +1178,8 @@ static void ipath_ht_init_hwerrors(struct ipath_devdata *dd)
1163 1178
1164 if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST)) 1179 if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
1165 ipath_dev_err(dd, "MemBIST did not complete!\n"); 1180 ipath_dev_err(dd, "MemBIST did not complete!\n");
1181 if (extsval & INFINIPATH_EXTS_MEMBIST_CORRECT)
1182 ipath_dbg("MemBIST corrected\n");
1166 1183
1167 ipath_check_htlink(dd); 1184 ipath_check_htlink(dd);
1168 1185
@@ -1366,6 +1383,9 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd,
1366 u64 __iomem *tidptr, u32 type, 1383 u64 __iomem *tidptr, u32 type,
1367 unsigned long pa) 1384 unsigned long pa)
1368{ 1385{
1386 if (!dd->ipath_kregbase)
1387 return;
1388
1369 if (pa != dd->ipath_tidinvalid) { 1389 if (pa != dd->ipath_tidinvalid) {
1370 if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) { 1390 if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) {
1371 dev_info(&dd->pcidev->dev, 1391 dev_info(&dd->pcidev->dev,
@@ -1382,10 +1402,10 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd,
1382 pa |= lenvalid | INFINIPATH_RT_VALID; 1402 pa |= lenvalid | INFINIPATH_RT_VALID;
1383 } 1403 }
1384 } 1404 }
1385 if (dd->ipath_kregbase) 1405 writeq(pa, tidptr);
1386 writeq(pa, tidptr);
1387} 1406}
1388 1407
1408
1389/** 1409/**
1390 * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager 1410 * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager
1391 * @dd: the infinipath device 1411 * @dd: the infinipath device
@@ -1515,7 +1535,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
1515 INFINIPATH_S_ABORT); 1535 INFINIPATH_S_ABORT);
1516 1536
1517 ipath_get_eeprom_info(dd); 1537 ipath_get_eeprom_info(dd);
1518 if(dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' && 1538 if (dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
1519 dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') { 1539 dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') {
1520 /* 1540 /*
1521 * Later production QHT7040 has same changes as QHT7140, so 1541 * Later production QHT7040 has same changes as QHT7140, so
@@ -1528,13 +1548,31 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
1528 return 0; 1548 return 0;
1529} 1549}
1530 1550
1551
1552static int ipath_ht_txe_recover(struct ipath_devdata *dd)
1553{
1554 int cnt = ++ipath_stats.sps_txeparity;
1555 if (cnt >= IPATH_MAX_PARITY_ATTEMPTS) {
1556 if (cnt == IPATH_MAX_PARITY_ATTEMPTS)
1557 ipath_dev_err(dd,
1558 "Too many attempts to recover from "
1559 "TXE parity, giving up\n");
1560 return 0;
1561 }
1562 dev_info(&dd->pcidev->dev,
1563 "Recovering from TXE PIO parity error\n");
1564 ipath_disarm_senderrbufs(dd, 1);
1565 return 1;
1566}
1567
1568
1531/** 1569/**
1532 * ipath_init_ht_get_base_info - set chip-specific flags for user code 1570 * ipath_init_ht_get_base_info - set chip-specific flags for user code
1533 * @dd: the infinipath device 1571 * @dd: the infinipath device
1534 * @kbase: ipath_base_info pointer 1572 * @kbase: ipath_base_info pointer
1535 * 1573 *
1536 * We set the PCIE flag because the lower bandwidth on PCIe vs 1574 * We set the PCIE flag because the lower bandwidth on PCIe vs
1537 * HyperTransport can affect some user packet algorithims. 1575 * HyperTransport can affect some user packet algorithms.
1538 */ 1576 */
1539static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase) 1577static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase)
1540{ 1578{
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index ae8bf9950c6d..1b9c30857754 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -43,6 +43,8 @@
43#include "ipath_kernel.h" 43#include "ipath_kernel.h"
44#include "ipath_registers.h" 44#include "ipath_registers.h"
45 45
46static void ipath_setup_pe_setextled(struct ipath_devdata *, u64, u64);
47
46/* 48/*
47 * This file contains all the chip-specific register information and 49 * This file contains all the chip-specific register information and
48 * access functions for the QLogic InfiniPath PCI-Express chip. 50 * access functions for the QLogic InfiniPath PCI-Express chip.
@@ -207,8 +209,8 @@ static const struct ipath_kregs ipath_pe_kregs = {
207 .kr_ibpllcfg = IPATH_KREG_OFFSET(IBPLLCfg), 209 .kr_ibpllcfg = IPATH_KREG_OFFSET(IBPLLCfg),
208 210
209 /* 211 /*
210 * These should not be used directly via ipath_read_kreg64(), 212 * These should not be used directly via ipath_write_kreg64(),
211 * use them with ipath_read_kreg64_port() 213 * use them with ipath_write_kreg64_port(),
212 */ 214 */
213 .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0), 215 .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
214 .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0), 216 .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0),
@@ -321,6 +323,12 @@ static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = {
321 INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), 323 INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
322}; 324};
323 325
326#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
327 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
328 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
329
330static int ipath_pe_txe_recover(struct ipath_devdata *);
331
324/** 332/**
325 * ipath_pe_handle_hwerrors - display hardware errors. 333 * ipath_pe_handle_hwerrors - display hardware errors.
326 * @dd: the infinipath device 334 * @dd: the infinipath device
@@ -394,32 +402,21 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
394 * occur if a processor speculative read is done to the PIO 402 * occur if a processor speculative read is done to the PIO
395 * buffer while we are sending a packet, for example. 403 * buffer while we are sending a packet, for example.
396 */ 404 */
397 if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | 405 if ((hwerrs & TXE_PIO_PARITY) && ipath_pe_txe_recover(dd))
398 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) 406 hwerrs &= ~TXE_PIO_PARITY;
399 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
400 ipath_stats.sps_txeparity++;
401 ipath_dbg("Recovering from TXE parity error (%llu), "
402 "hwerrstatus=%llx\n",
403 (unsigned long long) ipath_stats.sps_txeparity,
404 (unsigned long long) hwerrs);
405 ipath_disarm_senderrbufs(dd);
406 hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
407 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
408 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
409 if (!hwerrs) { /* else leave in freeze mode */
410 ipath_write_kreg(dd,
411 dd->ipath_kregs->kr_control,
412 dd->ipath_control);
413 return;
414 }
415 }
416 if (hwerrs) { 407 if (hwerrs) {
417 /* 408 /*
418 * if any set that we aren't ignoring only make the 409 * if any set that we aren't ignoring only make the
419 * complaint once, in case it's stuck or recurring, 410 * complaint once, in case it's stuck or recurring,
420 * and we get here multiple times 411 * and we get here multiple times
412 * Force link down, so switch knows, and
413 * LEDs are turned off
421 */ 414 */
422 if (dd->ipath_flags & IPATH_INITTED) { 415 if (dd->ipath_flags & IPATH_INITTED) {
416 ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
417 ipath_setup_pe_setextled(dd,
418 INFINIPATH_IBCS_L_STATE_DOWN,
419 INFINIPATH_IBCS_LT_STATE_DISABLED);
423 ipath_dev_err(dd, "Fatal Hardware Error (freeze " 420 ipath_dev_err(dd, "Fatal Hardware Error (freeze "
424 "mode), no longer usable, SN %.16s\n", 421 "mode), no longer usable, SN %.16s\n",
425 dd->ipath_serial); 422 dd->ipath_serial);
@@ -493,7 +490,8 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
493 dd->ipath_hwerrmask); 490 dd->ipath_hwerrmask);
494 } 491 }
495 492
496 ipath_dev_err(dd, "%s hardware error\n", msg); 493 if (*msg)
494 ipath_dev_err(dd, "%s hardware error\n", msg);
497 if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) { 495 if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) {
498 /* 496 /*
499 * for /sys status file ; if no trailing } is copied, we'll 497 * for /sys status file ; if no trailing } is copied, we'll
@@ -581,6 +579,8 @@ static void ipath_pe_init_hwerrors(struct ipath_devdata *dd)
581 579
582 if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST)) 580 if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
583 ipath_dev_err(dd, "MemBIST did not complete!\n"); 581 ipath_dev_err(dd, "MemBIST did not complete!\n");
582 if (extsval & INFINIPATH_EXTS_MEMBIST_FOUND)
583 ipath_dbg("MemBIST corrected\n");
584 584
585 val = ~0ULL; /* barring bugs, all hwerrors become interrupts, */ 585 val = ~0ULL; /* barring bugs, all hwerrors become interrupts, */
586 586
@@ -1293,7 +1293,7 @@ int __attribute__((weak)) ipath_unordered_wc(void)
1293 * @kbase: ipath_base_info pointer 1293 * @kbase: ipath_base_info pointer
1294 * 1294 *
1295 * We set the PCIE flag because the lower bandwidth on PCIe vs 1295 * We set the PCIE flag because the lower bandwidth on PCIe vs
1296 * HyperTransport can affect some user packet algorithims. 1296 * HyperTransport can affect some user packet algorithms.
1297 */ 1297 */
1298static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase) 1298static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
1299{ 1299{
@@ -1330,6 +1330,35 @@ static void ipath_pe_free_irq(struct ipath_devdata *dd)
1330 dd->ipath_irq = 0; 1330 dd->ipath_irq = 0;
1331} 1331}
1332 1332
1333/*
1334 * On platforms using this chip, and not having ordered WC stores, we
1335 * can get TXE parity errors due to speculative reads to the PIO buffers,
1336 * and this, due to a chip bug can result in (many) false parity error
1337 * reports. So it's a debug print on those, and an info print on systems
1338 * where the speculative reads don't occur.
1339 * Because we can get lots of false errors, we have no upper limit
1340 * on recovery attempts on those platforms.
1341 */
1342static int ipath_pe_txe_recover(struct ipath_devdata *dd)
1343{
1344 if (ipath_unordered_wc())
1345 ipath_dbg("Recovering from TXE PIO parity error\n");
1346 else {
1347 int cnt = ++ipath_stats.sps_txeparity;
1348 if (cnt >= IPATH_MAX_PARITY_ATTEMPTS) {
1349 if (cnt == IPATH_MAX_PARITY_ATTEMPTS)
1350 ipath_dev_err(dd,
1351 "Too many attempts to recover from "
1352 "TXE parity, giving up\n");
1353 return 0;
1354 }
1355 dev_info(&dd->pcidev->dev,
1356 "Recovering from TXE PIO parity error\n");
1357 }
1358 ipath_disarm_senderrbufs(dd, 1);
1359 return 1;
1360}
1361
1333/** 1362/**
1334 * ipath_init_iba6120_funcs - set up the chip-specific function pointers 1363 * ipath_init_iba6120_funcs - set up the chip-specific function pointers
1335 * @dd: the infinipath device 1364 * @dd: the infinipath device
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index d4f6b5239ef8..7045ba689494 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -216,6 +216,20 @@ static int bringup_link(struct ipath_devdata *dd)
216 return ret; 216 return ret;
217} 217}
218 218
219static struct ipath_portdata *create_portdata0(struct ipath_devdata *dd)
220{
221 struct ipath_portdata *pd = NULL;
222
223 pd = kzalloc(sizeof(*pd), GFP_KERNEL);
224 if (pd) {
225 pd->port_dd = dd;
226 pd->port_cnt = 1;
227 /* The port 0 pkey table is used by the layer interface. */
228 pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY;
229 }
230 return pd;
231}
232
219static int init_chip_first(struct ipath_devdata *dd, 233static int init_chip_first(struct ipath_devdata *dd,
220 struct ipath_portdata **pdp) 234 struct ipath_portdata **pdp)
221{ 235{
@@ -271,20 +285,16 @@ static int init_chip_first(struct ipath_devdata *dd,
271 goto done; 285 goto done;
272 } 286 }
273 287
274 dd->ipath_pd[0] = kzalloc(sizeof(*pd), GFP_KERNEL); 288 pd = create_portdata0(dd);
275 289
276 if (!dd->ipath_pd[0]) { 290 if (!pd) {
277 ipath_dev_err(dd, "Unable to allocate portdata for port " 291 ipath_dev_err(dd, "Unable to allocate portdata for port "
278 "0, failing\n"); 292 "0, failing\n");
279 ret = -ENOMEM; 293 ret = -ENOMEM;
280 goto done; 294 goto done;
281 } 295 }
282 pd = dd->ipath_pd[0]; 296 dd->ipath_pd[0] = pd;
283 pd->port_dd = dd; 297
284 pd->port_port = 0;
285 pd->port_cnt = 1;
286 /* The port 0 pkey table is used by the layer interface. */
287 pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY;
288 dd->ipath_rcvtidcnt = 298 dd->ipath_rcvtidcnt =
289 ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt); 299 ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
290 dd->ipath_rcvtidbase = 300 dd->ipath_rcvtidbase =
@@ -590,6 +600,10 @@ static int init_housekeeping(struct ipath_devdata *dd,
590 goto done; 600 goto done;
591 } 601 }
592 602
603
604 /* clear diagctrl register, in case diags were running and crashed */
605 ipath_write_kreg (dd, dd->ipath_kregs->kr_hwdiagctrl, 0);
606
593 /* clear the initial reset flag, in case first driver load */ 607 /* clear the initial reset flag, in case first driver load */
594 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 608 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
595 INFINIPATH_E_RESET); 609 INFINIPATH_E_RESET);
@@ -668,6 +682,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
668{ 682{
669 int ret = 0, i; 683 int ret = 0, i;
670 u32 val32, kpiobufs; 684 u32 val32, kpiobufs;
685 u32 piobufs, uports;
671 u64 val; 686 u64 val;
672 struct ipath_portdata *pd = NULL; /* keep gcc4 happy */ 687 struct ipath_portdata *pd = NULL; /* keep gcc4 happy */
673 gfp_t gfp_flags = GFP_USER | __GFP_COMP; 688 gfp_t gfp_flags = GFP_USER | __GFP_COMP;
@@ -702,16 +717,17 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
702 * the in memory DMA'ed copies of the registers. This has to 717 * the in memory DMA'ed copies of the registers. This has to
703 * be done early, before we calculate lastport, etc. 718 * be done early, before we calculate lastport, etc.
704 */ 719 */
705 val = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; 720 piobufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
706 /* 721 /*
707 * calc number of pioavail registers, and save it; we have 2 722 * calc number of pioavail registers, and save it; we have 2
708 * bits per buffer. 723 * bits per buffer.
709 */ 724 */
710 dd->ipath_pioavregs = ALIGN(val, sizeof(u64) * BITS_PER_BYTE / 2) 725 dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2)
711 / (sizeof(u64) * BITS_PER_BYTE / 2); 726 / (sizeof(u64) * BITS_PER_BYTE / 2);
727 uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
712 if (ipath_kpiobufs == 0) { 728 if (ipath_kpiobufs == 0) {
713 /* not set by user (this is default) */ 729 /* not set by user (this is default) */
714 if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 128) 730 if (piobufs >= (uports * IPATH_MIN_USER_PORT_BUFCNT) + 32)
715 kpiobufs = 32; 731 kpiobufs = 32;
716 else 732 else
717 kpiobufs = 16; 733 kpiobufs = 16;
@@ -719,31 +735,25 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
719 else 735 else
720 kpiobufs = ipath_kpiobufs; 736 kpiobufs = ipath_kpiobufs;
721 737
722 if (kpiobufs > 738 if (kpiobufs + (uports * IPATH_MIN_USER_PORT_BUFCNT) > piobufs) {
723 (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - 739 i = (int) piobufs -
724 (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT))) { 740 (int) (uports * IPATH_MIN_USER_PORT_BUFCNT);
725 i = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k -
726 (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT);
727 if (i < 0) 741 if (i < 0)
728 i = 0; 742 i = 0;
729 dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs for " 743 dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of "
730 "kernel leaves too few for %d user ports " 744 "%d for kernel leaves too few for %d user ports "
731 "(%d each); using %u\n", kpiobufs, 745 "(%d each); using %u\n", kpiobufs,
732 dd->ipath_cfgports - 1, 746 piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i);
733 IPATH_MIN_USER_PORT_BUFCNT, i);
734 /* 747 /*
735 * shouldn't change ipath_kpiobufs, because could be 748 * shouldn't change ipath_kpiobufs, because could be
736 * different for different devices... 749 * different for different devices...
737 */ 750 */
738 kpiobufs = i; 751 kpiobufs = i;
739 } 752 }
740 dd->ipath_lastport_piobuf = 753 dd->ipath_lastport_piobuf = piobufs - kpiobufs;
741 dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - kpiobufs; 754 dd->ipath_pbufsport =
742 dd->ipath_pbufsport = dd->ipath_cfgports > 1 755 uports ? dd->ipath_lastport_piobuf / uports : 0;
743 ? dd->ipath_lastport_piobuf / (dd->ipath_cfgports - 1) 756 val32 = dd->ipath_lastport_piobuf - (dd->ipath_pbufsport * uports);
744 : 0;
745 val32 = dd->ipath_lastport_piobuf -
746 (dd->ipath_pbufsport * (dd->ipath_cfgports - 1));
747 if (val32 > 0) { 757 if (val32 > 0) {
748 ipath_dbg("allocating %u pbufs/port leaves %u unused, " 758 ipath_dbg("allocating %u pbufs/port leaves %u unused, "
749 "add to kernel\n", dd->ipath_pbufsport, val32); 759 "add to kernel\n", dd->ipath_pbufsport, val32);
@@ -754,8 +764,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
754 dd->ipath_lastpioindex = dd->ipath_lastport_piobuf; 764 dd->ipath_lastpioindex = dd->ipath_lastport_piobuf;
755 ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u " 765 ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u "
756 "each for %u user ports\n", kpiobufs, 766 "each for %u user ports\n", kpiobufs,
757 dd->ipath_piobcnt2k + dd->ipath_piobcnt4k, 767 piobufs, dd->ipath_pbufsport, uports);
758 dd->ipath_pbufsport, dd->ipath_cfgports - 1);
759 768
760 dd->ipath_f_early_init(dd); 769 dd->ipath_f_early_init(dd);
761 770
@@ -839,11 +848,24 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
839 * Set up the port 0 (kernel) rcvhdr q and egr TIDs. If doing 848 * Set up the port 0 (kernel) rcvhdr q and egr TIDs. If doing
840 * re-init, the simplest way to handle this is to free 849 * re-init, the simplest way to handle this is to free
841 * existing, and re-allocate. 850 * existing, and re-allocate.
851 * Need to re-create rest of port 0 portdata as well.
842 */ 852 */
843 if (reinit) { 853 if (reinit) {
844 struct ipath_portdata *pd = dd->ipath_pd[0]; 854 /* Alloc and init new ipath_portdata for port0,
845 dd->ipath_pd[0] = NULL; 855 * Then free old pd. Could lead to fragmentation, but also
846 ipath_free_pddata(dd, pd); 856 * makes later support for hot-swap easier.
857 */
858 struct ipath_portdata *npd;
859 npd = create_portdata0(dd);
860 if (npd) {
861 ipath_free_pddata(dd, pd);
862 dd->ipath_pd[0] = pd = npd;
863 } else {
864 ipath_dev_err(dd, "Unable to allocate portdata for"
865 " port 0, failing\n");
866 ret = -ENOMEM;
867 goto done;
868 }
847 } 869 }
848 dd->ipath_f_tidtemplate(dd); 870 dd->ipath_f_tidtemplate(dd);
849 ret = ipath_create_rcvhdrq(dd, pd); 871 ret = ipath_create_rcvhdrq(dd, pd);
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 72b9e279d19d..45d033169c6e 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -38,10 +38,39 @@
38#include "ipath_common.h" 38#include "ipath_common.h"
39 39
40/* 40/*
41 * clear (write) a pio buffer, to clear a parity error. This routine
42 * should only be called when in freeze mode, and the buffer should be
43 * canceled afterwards.
44 */
45static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum)
46{
47 u32 __iomem *pbuf;
48 u32 dwcnt; /* dword count to write */
49 if (pnum < dd->ipath_piobcnt2k) {
50 pbuf = (u32 __iomem *) (dd->ipath_pio2kbase + pnum *
51 dd->ipath_palign);
52 dwcnt = dd->ipath_piosize2k >> 2;
53 }
54 else {
55 pbuf = (u32 __iomem *) (dd->ipath_pio4kbase +
56 (pnum - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
57 dwcnt = dd->ipath_piosize4k >> 2;
58 }
59 dev_info(&dd->pcidev->dev,
60 "Rewrite PIO buffer %u, to recover from parity error\n",
61 pnum);
62 *pbuf = dwcnt+1; /* no flush required, since already in freeze */
63 while(--dwcnt)
64 *pbuf++ = 0;
65}
66
67/*
41 * Called when we might have an error that is specific to a particular 68 * Called when we might have an error that is specific to a particular
42 * PIO buffer, and may need to cancel that buffer, so it can be re-used. 69 * PIO buffer, and may need to cancel that buffer, so it can be re-used.
70 * If rewrite is true, and bits are set in the sendbufferror registers,
71 * we'll write to the buffer, for error recovery on parity errors.
43 */ 72 */
44void ipath_disarm_senderrbufs(struct ipath_devdata *dd) 73void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
45{ 74{
46 u32 piobcnt; 75 u32 piobcnt;
47 unsigned long sbuf[4]; 76 unsigned long sbuf[4];
@@ -74,8 +103,11 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
74 } 103 }
75 104
76 for (i = 0; i < piobcnt; i++) 105 for (i = 0; i < piobcnt; i++)
77 if (test_bit(i, sbuf)) 106 if (test_bit(i, sbuf)) {
107 if (rewrite)
108 ipath_clrpiobuf(dd, i);
78 ipath_disarm_piobufs(dd, i, 1); 109 ipath_disarm_piobufs(dd, i, 1);
110 }
79 dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */ 111 dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */
80 } 112 }
81} 113}
@@ -114,7 +146,7 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
114{ 146{
115 u64 ignore_this_time = 0; 147 u64 ignore_this_time = 0;
116 148
117 ipath_disarm_senderrbufs(dd); 149 ipath_disarm_senderrbufs(dd, 0);
118 if ((errs & E_SUM_LINK_PKTERRS) && 150 if ((errs & E_SUM_LINK_PKTERRS) &&
119 !(dd->ipath_flags & IPATH_LINKACTIVE)) { 151 !(dd->ipath_flags & IPATH_LINKACTIVE)) {
120 /* 152 /*
@@ -403,10 +435,13 @@ static void handle_supp_msgs(struct ipath_devdata *dd,
403 * happens so often we never want to count it. 435 * happens so often we never want to count it.
404 */ 436 */
405 if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) { 437 if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) {
406 ipath_decode_err(msg, sizeof msg, dd->ipath_lasterror & 438 int iserr;
407 ~INFINIPATH_E_IBSTATUSCHANGED); 439 iserr = ipath_decode_err(msg, sizeof msg,
440 dd->ipath_lasterror &
441 ~INFINIPATH_E_IBSTATUSCHANGED);
408 if (dd->ipath_lasterror & 442 if (dd->ipath_lasterror &
409 ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL)) 443 ~(INFINIPATH_E_RRCVEGRFULL |
444 INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
410 ipath_dev_err(dd, "Suppressed %u messages for " 445 ipath_dev_err(dd, "Suppressed %u messages for "
411 "fast-repeating errors (%s) (%llx)\n", 446 "fast-repeating errors (%s) (%llx)\n",
412 supp_msgs, msg, 447 supp_msgs, msg,
@@ -420,8 +455,13 @@ static void handle_supp_msgs(struct ipath_devdata *dd,
420 * them. So only complain about these at debug 455 * them. So only complain about these at debug
421 * level. 456 * level.
422 */ 457 */
423 ipath_dbg("Suppressed %u messages for %s\n", 458 if (iserr)
424 supp_msgs, msg); 459 ipath_dbg("Suppressed %u messages for %s\n",
460 supp_msgs, msg);
461 else
462 ipath_cdbg(ERRPKT,
463 "Suppressed %u messages for %s\n",
464 supp_msgs, msg);
425 } 465 }
426 } 466 }
427} 467}
@@ -462,7 +502,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
462{ 502{
463 char msg[512]; 503 char msg[512];
464 u64 ignore_this_time = 0; 504 u64 ignore_this_time = 0;
465 int i; 505 int i, iserr = 0;
466 int chkerrpkts = 0, noprint = 0; 506 int chkerrpkts = 0, noprint = 0;
467 unsigned supp_msgs; 507 unsigned supp_msgs;
468 508
@@ -502,6 +542,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
502 } 542 }
503 543
504 if (supp_msgs == 250000) { 544 if (supp_msgs == 250000) {
545 int s_iserr;
505 /* 546 /*
506 * It's not entirely reasonable assuming that the errors set 547 * It's not entirely reasonable assuming that the errors set
507 * in the last clear period are all responsible for the 548 * in the last clear period are all responsible for the
@@ -511,17 +552,17 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
511 dd->ipath_maskederrs |= dd->ipath_lasterror | errs; 552 dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
512 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 553 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
513 ~dd->ipath_maskederrs); 554 ~dd->ipath_maskederrs);
514 ipath_decode_err(msg, sizeof msg, 555 s_iserr = ipath_decode_err(msg, sizeof msg,
515 (dd->ipath_maskederrs & ~dd-> 556 (dd->ipath_maskederrs & ~dd->
516 ipath_ignorederrs)); 557 ipath_ignorederrs));
517 558
518 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) & 559 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
519 ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL)) 560 ~(INFINIPATH_E_RRCVEGRFULL |
520 ipath_dev_err(dd, "Disabling error(s) %llx because " 561 INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
521 "occurring too frequently (%s)\n", 562 ipath_dev_err(dd, "Temporarily disabling "
522 (unsigned long long) 563 "error(s) %llx reporting; too frequent (%s)\n",
523 (dd->ipath_maskederrs & 564 (unsigned long long) (dd->ipath_maskederrs &
524 ~dd->ipath_ignorederrs), msg); 565 ~dd->ipath_ignorederrs), msg);
525 else { 566 else {
526 /* 567 /*
527 * rcvegrfull and rcvhdrqfull are "normal", 568 * rcvegrfull and rcvhdrqfull are "normal",
@@ -530,8 +571,15 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
530 * processing them. So only complain about 571 * processing them. So only complain about
531 * these at debug level. 572 * these at debug level.
532 */ 573 */
533 ipath_dbg("Disabling frequent queue full errors " 574 if (s_iserr)
534 "(%s)\n", msg); 575 ipath_dbg("Temporarily disabling reporting "
576 "too frequent queue full errors (%s)\n",
577 msg);
578 else
579 ipath_cdbg(ERRPKT,
580 "Temporarily disabling reporting too"
581 " frequent packet errors (%s)\n",
582 msg);
535 } 583 }
536 584
537 /* 585 /*
@@ -589,6 +637,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
589 ipath_stats.sps_crcerrs++; 637 ipath_stats.sps_crcerrs++;
590 chkerrpkts = 1; 638 chkerrpkts = 1;
591 } 639 }
640 iserr = errs & ~(E_SUM_PKTERRS | INFINIPATH_E_PKTERRS);
641
592 642
593 /* 643 /*
594 * We don't want to print these two as they happen, or we can make 644 * We don't want to print these two as they happen, or we can make
@@ -677,8 +727,13 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
677 *dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF; 727 *dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF;
678 } 728 }
679 729
680 if (!noprint && *msg) 730 if (!noprint && *msg) {
681 ipath_dev_err(dd, "%s error\n", msg); 731 if (iserr)
732 ipath_dev_err(dd, "%s error\n", msg);
733 else
734 dev_info(&dd->pcidev->dev, "%s packet problems\n",
735 msg);
736 }
682 if (dd->ipath_state_wanted & dd->ipath_flags) { 737 if (dd->ipath_state_wanted & dd->ipath_flags) {
683 ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, " 738 ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
684 "waking\n", dd->ipath_state_wanted, 739 "waking\n", dd->ipath_state_wanted,
@@ -819,11 +874,10 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat)
819 struct ipath_portdata *pd = dd->ipath_pd[i]; 874 struct ipath_portdata *pd = dd->ipath_pd[i];
820 if (portr & (1 << i) && pd && pd->port_cnt && 875 if (portr & (1 << i) && pd && pd->port_cnt &&
821 test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) { 876 test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) {
822 int rcbit;
823 clear_bit(IPATH_PORT_WAITING_RCV, 877 clear_bit(IPATH_PORT_WAITING_RCV,
824 &pd->port_flag); 878 &pd->port_flag);
825 rcbit = i + INFINIPATH_R_INTRAVAIL_SHIFT; 879 clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT,
826 clear_bit(1UL << rcbit, &dd->ipath_rcvctrl); 880 &dd->ipath_rcvctrl);
827 wake_up_interruptible(&pd->port_wait); 881 wake_up_interruptible(&pd->port_wait);
828 rcvdint = 1; 882 rcvdint = 1;
829 } 883 }
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 6d8d05fb5999..e900c2593f44 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -590,7 +590,6 @@ int ipath_enable_wc(struct ipath_devdata *dd);
590void ipath_disable_wc(struct ipath_devdata *dd); 590void ipath_disable_wc(struct ipath_devdata *dd);
591int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp); 591int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp);
592void ipath_shutdown_device(struct ipath_devdata *); 592void ipath_shutdown_device(struct ipath_devdata *);
593void ipath_disarm_senderrbufs(struct ipath_devdata *);
594 593
595struct file_operations; 594struct file_operations;
596int ipath_cdev_init(int minor, char *name, const struct file_operations *fops, 595int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
@@ -611,7 +610,7 @@ struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t);
611extern int ipath_diag_inuse; 610extern int ipath_diag_inuse;
612 611
613irqreturn_t ipath_intr(int irq, void *devid); 612irqreturn_t ipath_intr(int irq, void *devid);
614void ipath_decode_err(char *buf, size_t blen, ipath_err_t err); 613int ipath_decode_err(char *buf, size_t blen, ipath_err_t err);
615#if __IPATH_INFO || __IPATH_DBG 614#if __IPATH_INFO || __IPATH_DBG
616extern const char *ipath_ibcstatus_str[]; 615extern const char *ipath_ibcstatus_str[];
617#endif 616#endif
@@ -701,6 +700,8 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
701#define IPATH_PORT_WAITING_RCV 2 700#define IPATH_PORT_WAITING_RCV 2
702 /* waiting for a PIO buffer to be available */ 701 /* waiting for a PIO buffer to be available */
703#define IPATH_PORT_WAITING_PIO 3 702#define IPATH_PORT_WAITING_PIO 3
703 /* master has not finished initializing */
704#define IPATH_PORT_MASTER_UNINIT 4
704 705
705/* free up any allocated data at closes */ 706/* free up any allocated data at closes */
706void ipath_free_data(struct ipath_portdata *dd); 707void ipath_free_data(struct ipath_portdata *dd);
@@ -711,6 +712,7 @@ void ipath_init_iba6120_funcs(struct ipath_devdata *);
711void ipath_init_iba6110_funcs(struct ipath_devdata *); 712void ipath_init_iba6110_funcs(struct ipath_devdata *);
712void ipath_get_eeprom_info(struct ipath_devdata *); 713void ipath_get_eeprom_info(struct ipath_devdata *);
713u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); 714u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
715void ipath_disarm_senderrbufs(struct ipath_devdata *, int);
714 716
715/* 717/*
716 * number of words used for protocol header if not set by ipath_userinit(); 718 * number of words used for protocol header if not set by ipath_userinit();
@@ -754,8 +756,6 @@ int ipath_eeprom_write(struct ipath_devdata *, u8, const void *, int);
754/* these are used for the registers that vary with port */ 756/* these are used for the registers that vary with port */
755void ipath_write_kreg_port(const struct ipath_devdata *, ipath_kreg, 757void ipath_write_kreg_port(const struct ipath_devdata *, ipath_kreg,
756 unsigned, u64); 758 unsigned, u64);
757u64 ipath_read_kreg64_port(const struct ipath_devdata *, ipath_kreg,
758 unsigned);
759 759
760/* 760/*
761 * We could have a single register get/put routine, that takes a group type, 761 * We could have a single register get/put routine, that takes a group type,
@@ -897,6 +897,8 @@ dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int);
897 897
898extern unsigned ipath_debug; /* debugging bit mask */ 898extern unsigned ipath_debug; /* debugging bit mask */
899 899
900#define IPATH_MAX_PARITY_ATTEMPTS 10000 /* max times to try recovery */
901
900const char *ipath_get_unit_name(int unit); 902const char *ipath_get_unit_name(int unit);
901 903
902extern struct mutex ipath_mutex; 904extern struct mutex ipath_mutex;
diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c
index 851763d7d2db..dd487c100f5b 100644
--- a/drivers/infiniband/hw/ipath/ipath_keys.c
+++ b/drivers/infiniband/hw/ipath/ipath_keys.c
@@ -61,7 +61,7 @@ int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr)
61 r = (r + 1) & (rkt->max - 1); 61 r = (r + 1) & (rkt->max - 1);
62 if (r == n) { 62 if (r == n) {
63 spin_unlock_irqrestore(&rkt->lock, flags); 63 spin_unlock_irqrestore(&rkt->lock, flags);
64 ipath_dbg(KERN_INFO "LKEY table full\n"); 64 ipath_dbg("LKEY table full\n");
65 ret = 0; 65 ret = 0;
66 goto bail; 66 goto bail;
67 } 67 }
@@ -133,6 +133,12 @@ int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
133 * being reversible by calling bus_to_virt(). 133 * being reversible by calling bus_to_virt().
134 */ 134 */
135 if (sge->lkey == 0) { 135 if (sge->lkey == 0) {
136 struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
137
138 if (pd->user) {
139 ret = 0;
140 goto bail;
141 }
136 isge->mr = NULL; 142 isge->mr = NULL;
137 isge->vaddr = (void *) sge->addr; 143 isge->vaddr = (void *) sge->addr;
138 isge->length = sge->length; 144 isge->length = sge->length;
@@ -206,6 +212,12 @@ int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
206 * (see ipath_get_dma_mr and ipath_dma.c). 212 * (see ipath_get_dma_mr and ipath_dma.c).
207 */ 213 */
208 if (rkey == 0) { 214 if (rkey == 0) {
215 struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
216
217 if (pd->user) {
218 ret = 0;
219 goto bail;
220 }
209 sge->mr = NULL; 221 sge->mr = NULL;
210 sge->vaddr = (void *) vaddr; 222 sge->vaddr = (void *) vaddr;
211 sge->length = len; 223 sge->length = len;
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index 8cc8598d6c69..31e70732e369 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -210,9 +210,15 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
210 m = 0; 210 m = 0;
211 n = 0; 211 n = 0;
212 list_for_each_entry(chunk, &region->chunk_list, list) { 212 list_for_each_entry(chunk, &region->chunk_list, list) {
213 for (i = 0; i < chunk->nmap; i++) { 213 for (i = 0; i < chunk->nents; i++) {
214 mr->mr.map[m]->segs[n].vaddr = 214 void *vaddr;
215 page_address(chunk->page_list[i].page); 215
216 vaddr = page_address(chunk->page_list[i].page);
217 if (!vaddr) {
218 ret = ERR_PTR(-EINVAL);
219 goto bail;
220 }
221 mr->mr.map[m]->segs[n].vaddr = vaddr;
216 mr->mr.map[m]->segs[n].length = region->page_size; 222 mr->mr.map[m]->segs[n].length = region->page_size;
217 n++; 223 n++;
218 if (n == IPATH_SEGSZ) { 224 if (n == IPATH_SEGSZ) {
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 64f07b19349f..16db9ac0b402 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -81,11 +81,51 @@ static u32 credit_table[31] = {
81 32768 /* 1E */ 81 32768 /* 1E */
82}; 82};
83 83
84static u32 alloc_qpn(struct ipath_qp_table *qpt) 84
85static void get_map_page(struct ipath_qp_table *qpt, struct qpn_map *map)
86{
87 unsigned long page = get_zeroed_page(GFP_KERNEL);
88 unsigned long flags;
89
90 /*
91 * Free the page if someone raced with us installing it.
92 */
93
94 spin_lock_irqsave(&qpt->lock, flags);
95 if (map->page)
96 free_page(page);
97 else
98 map->page = (void *)page;
99 spin_unlock_irqrestore(&qpt->lock, flags);
100}
101
102
103static int alloc_qpn(struct ipath_qp_table *qpt, enum ib_qp_type type)
85{ 104{
86 u32 i, offset, max_scan, qpn; 105 u32 i, offset, max_scan, qpn;
87 struct qpn_map *map; 106 struct qpn_map *map;
88 u32 ret; 107 u32 ret = -1;
108
109 if (type == IB_QPT_SMI)
110 ret = 0;
111 else if (type == IB_QPT_GSI)
112 ret = 1;
113
114 if (ret != -1) {
115 map = &qpt->map[0];
116 if (unlikely(!map->page)) {
117 get_map_page(qpt, map);
118 if (unlikely(!map->page)) {
119 ret = -ENOMEM;
120 goto bail;
121 }
122 }
123 if (!test_and_set_bit(ret, map->page))
124 atomic_dec(&map->n_free);
125 else
126 ret = -EBUSY;
127 goto bail;
128 }
89 129
90 qpn = qpt->last + 1; 130 qpn = qpt->last + 1;
91 if (qpn >= QPN_MAX) 131 if (qpn >= QPN_MAX)
@@ -95,19 +135,7 @@ static u32 alloc_qpn(struct ipath_qp_table *qpt)
95 max_scan = qpt->nmaps - !offset; 135 max_scan = qpt->nmaps - !offset;
96 for (i = 0;;) { 136 for (i = 0;;) {
97 if (unlikely(!map->page)) { 137 if (unlikely(!map->page)) {
98 unsigned long page = get_zeroed_page(GFP_KERNEL); 138 get_map_page(qpt, map);
99 unsigned long flags;
100
101 /*
102 * Free the page if someone raced with us
103 * installing it:
104 */
105 spin_lock_irqsave(&qpt->lock, flags);
106 if (map->page)
107 free_page(page);
108 else
109 map->page = (void *)page;
110 spin_unlock_irqrestore(&qpt->lock, flags);
111 if (unlikely(!map->page)) 139 if (unlikely(!map->page))
112 break; 140 break;
113 } 141 }
@@ -151,7 +179,7 @@ static u32 alloc_qpn(struct ipath_qp_table *qpt)
151 qpn = mk_qpn(qpt, map, offset); 179 qpn = mk_qpn(qpt, map, offset);
152 } 180 }
153 181
154 ret = 0; 182 ret = -ENOMEM;
155 183
156bail: 184bail:
157 return ret; 185 return ret;
@@ -180,29 +208,19 @@ static int ipath_alloc_qpn(struct ipath_qp_table *qpt, struct ipath_qp *qp,
180 enum ib_qp_type type) 208 enum ib_qp_type type)
181{ 209{
182 unsigned long flags; 210 unsigned long flags;
183 u32 qpn;
184 int ret; 211 int ret;
185 212
186 if (type == IB_QPT_SMI) 213 ret = alloc_qpn(qpt, type);
187 qpn = 0; 214 if (ret < 0)
188 else if (type == IB_QPT_GSI) 215 goto bail;
189 qpn = 1; 216 qp->ibqp.qp_num = ret;
190 else {
191 /* Allocate the next available QPN */
192 qpn = alloc_qpn(qpt);
193 if (qpn == 0) {
194 ret = -ENOMEM;
195 goto bail;
196 }
197 }
198 qp->ibqp.qp_num = qpn;
199 217
200 /* Add the QP to the hash table. */ 218 /* Add the QP to the hash table. */
201 spin_lock_irqsave(&qpt->lock, flags); 219 spin_lock_irqsave(&qpt->lock, flags);
202 220
203 qpn %= qpt->max; 221 ret %= qpt->max;
204 qp->next = qpt->table[qpn]; 222 qp->next = qpt->table[ret];
205 qpt->table[qpn] = qp; 223 qpt->table[ret] = qp;
206 atomic_inc(&qp->refcount); 224 atomic_inc(&qp->refcount);
207 225
208 spin_unlock_irqrestore(&qpt->lock, flags); 226 spin_unlock_irqrestore(&qpt->lock, flags);
@@ -245,9 +263,7 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
245 if (!fnd) 263 if (!fnd)
246 return; 264 return;
247 265
248 /* If QPN is not reserved, mark QPN free in the bitmap. */ 266 free_qpn(qpt, qp->ibqp.qp_num);
249 if (qp->ibqp.qp_num > 1)
250 free_qpn(qpt, qp->ibqp.qp_num);
251 267
252 wait_event(qp->wait, !atomic_read(&qp->refcount)); 268 wait_event(qp->wait, !atomic_read(&qp->refcount));
253} 269}
@@ -270,11 +286,10 @@ void ipath_free_all_qps(struct ipath_qp_table *qpt)
270 286
271 while (qp) { 287 while (qp) {
272 nqp = qp->next; 288 nqp = qp->next;
273 if (qp->ibqp.qp_num > 1) 289 free_qpn(qpt, qp->ibqp.qp_num);
274 free_qpn(qpt, qp->ibqp.qp_num);
275 if (!atomic_dec_and_test(&qp->refcount) || 290 if (!atomic_dec_and_test(&qp->refcount) ||
276 !ipath_destroy_qp(&qp->ibqp)) 291 !ipath_destroy_qp(&qp->ibqp))
277 ipath_dbg(KERN_INFO "QP memory leak!\n"); 292 ipath_dbg("QP memory leak!\n");
278 qp = nqp; 293 qp = nqp;
279 } 294 }
280 } 295 }
@@ -320,7 +335,8 @@ static void ipath_reset_qp(struct ipath_qp *qp)
320 qp->remote_qpn = 0; 335 qp->remote_qpn = 0;
321 qp->qkey = 0; 336 qp->qkey = 0;
322 qp->qp_access_flags = 0; 337 qp->qp_access_flags = 0;
323 clear_bit(IPATH_S_BUSY, &qp->s_flags); 338 qp->s_busy = 0;
339 qp->s_flags &= ~IPATH_S_SIGNAL_REQ_WR;
324 qp->s_hdrwords = 0; 340 qp->s_hdrwords = 0;
325 qp->s_psn = 0; 341 qp->s_psn = 0;
326 qp->r_psn = 0; 342 qp->r_psn = 0;
@@ -333,7 +349,6 @@ static void ipath_reset_qp(struct ipath_qp *qp)
333 qp->r_state = IB_OPCODE_UC_SEND_LAST; 349 qp->r_state = IB_OPCODE_UC_SEND_LAST;
334 } 350 }
335 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; 351 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
336 qp->r_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
337 qp->r_nak_state = 0; 352 qp->r_nak_state = 0;
338 qp->r_wrid_valid = 0; 353 qp->r_wrid_valid = 0;
339 qp->s_rnr_timeout = 0; 354 qp->s_rnr_timeout = 0;
@@ -344,6 +359,10 @@ static void ipath_reset_qp(struct ipath_qp *qp)
344 qp->s_ssn = 1; 359 qp->s_ssn = 1;
345 qp->s_lsn = 0; 360 qp->s_lsn = 0;
346 qp->s_wait_credit = 0; 361 qp->s_wait_credit = 0;
362 memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
363 qp->r_head_ack_queue = 0;
364 qp->s_tail_ack_queue = 0;
365 qp->s_num_rd_atomic = 0;
347 if (qp->r_rq.wq) { 366 if (qp->r_rq.wq) {
348 qp->r_rq.wq->head = 0; 367 qp->r_rq.wq->head = 0;
349 qp->r_rq.wq->tail = 0; 368 qp->r_rq.wq->tail = 0;
@@ -357,7 +376,7 @@ static void ipath_reset_qp(struct ipath_qp *qp)
357 * @err: the receive completion error to signal if a RWQE is active 376 * @err: the receive completion error to signal if a RWQE is active
358 * 377 *
359 * Flushes both send and receive work queues. 378 * Flushes both send and receive work queues.
360 * QP s_lock should be held and interrupts disabled. 379 * The QP s_lock should be held and interrupts disabled.
361 */ 380 */
362 381
363void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) 382void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
@@ -365,7 +384,7 @@ void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
365 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 384 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
366 struct ib_wc wc; 385 struct ib_wc wc;
367 386
368 ipath_dbg(KERN_INFO "QP%d/%d in error state\n", 387 ipath_dbg("QP%d/%d in error state\n",
369 qp->ibqp.qp_num, qp->remote_qpn); 388 qp->ibqp.qp_num, qp->remote_qpn);
370 389
371 spin_lock(&dev->pending_lock); 390 spin_lock(&dev->pending_lock);
@@ -389,6 +408,8 @@ void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
389 wc.port_num = 0; 408 wc.port_num = 0;
390 if (qp->r_wrid_valid) { 409 if (qp->r_wrid_valid) {
391 qp->r_wrid_valid = 0; 410 qp->r_wrid_valid = 0;
411 wc.wr_id = qp->r_wr_id;
412 wc.opcode = IB_WC_RECV;
392 wc.status = err; 413 wc.status = err;
393 ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1); 414 ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
394 } 415 }
@@ -503,13 +524,17 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
503 attr->path_mig_state != IB_MIG_REARM) 524 attr->path_mig_state != IB_MIG_REARM)
504 goto inval; 525 goto inval;
505 526
527 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
528 if (attr->max_dest_rd_atomic > IPATH_MAX_RDMA_ATOMIC)
529 goto inval;
530
506 switch (new_state) { 531 switch (new_state) {
507 case IB_QPS_RESET: 532 case IB_QPS_RESET:
508 ipath_reset_qp(qp); 533 ipath_reset_qp(qp);
509 break; 534 break;
510 535
511 case IB_QPS_ERR: 536 case IB_QPS_ERR:
512 ipath_error_qp(qp, IB_WC_GENERAL_ERR); 537 ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
513 break; 538 break;
514 539
515 default: 540 default:
@@ -559,6 +584,12 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
559 if (attr_mask & IB_QP_QKEY) 584 if (attr_mask & IB_QP_QKEY)
560 qp->qkey = attr->qkey; 585 qp->qkey = attr->qkey;
561 586
587 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
588 qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
589
590 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
591 qp->s_max_rd_atomic = attr->max_rd_atomic;
592
562 qp->state = new_state; 593 qp->state = new_state;
563 spin_unlock_irqrestore(&qp->s_lock, flags); 594 spin_unlock_irqrestore(&qp->s_lock, flags);
564 595
@@ -598,8 +629,8 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
598 attr->alt_pkey_index = 0; 629 attr->alt_pkey_index = 0;
599 attr->en_sqd_async_notify = 0; 630 attr->en_sqd_async_notify = 0;
600 attr->sq_draining = 0; 631 attr->sq_draining = 0;
601 attr->max_rd_atomic = 1; 632 attr->max_rd_atomic = qp->s_max_rd_atomic;
602 attr->max_dest_rd_atomic = 1; 633 attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
603 attr->min_rnr_timer = qp->r_min_rnr_timer; 634 attr->min_rnr_timer = qp->r_min_rnr_timer;
604 attr->port_num = 1; 635 attr->port_num = 1;
605 attr->timeout = qp->timeout; 636 attr->timeout = qp->timeout;
@@ -614,7 +645,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
614 init_attr->recv_cq = qp->ibqp.recv_cq; 645 init_attr->recv_cq = qp->ibqp.recv_cq;
615 init_attr->srq = qp->ibqp.srq; 646 init_attr->srq = qp->ibqp.srq;
616 init_attr->cap = attr->cap; 647 init_attr->cap = attr->cap;
617 if (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR)) 648 if (qp->s_flags & IPATH_S_SIGNAL_REQ_WR)
618 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; 649 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
619 else 650 else
620 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; 651 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
@@ -786,7 +817,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
786 qp->s_size = init_attr->cap.max_send_wr + 1; 817 qp->s_size = init_attr->cap.max_send_wr + 1;
787 qp->s_max_sge = init_attr->cap.max_send_sge; 818 qp->s_max_sge = init_attr->cap.max_send_sge;
788 if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) 819 if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
789 qp->s_flags = 1 << IPATH_S_SIGNAL_REQ_WR; 820 qp->s_flags = IPATH_S_SIGNAL_REQ_WR;
790 else 821 else
791 qp->s_flags = 0; 822 qp->s_flags = 0;
792 dev = to_idev(ibpd->device); 823 dev = to_idev(ibpd->device);
@@ -958,7 +989,7 @@ bail:
958 * @wc: the WC responsible for putting the QP in this state 989 * @wc: the WC responsible for putting the QP in this state
959 * 990 *
960 * Flushes the send work queue. 991 * Flushes the send work queue.
961 * The QP s_lock should be held. 992 * The QP s_lock should be held and interrupts disabled.
962 */ 993 */
963 994
964void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc) 995void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
@@ -966,7 +997,7 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
966 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 997 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
967 struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); 998 struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
968 999
969 ipath_dbg(KERN_INFO "Send queue error on QP%d/%d: err: %d\n", 1000 ipath_dbg("Send queue error on QP%d/%d: err: %d\n",
970 qp->ibqp.qp_num, qp->remote_qpn, wc->status); 1001 qp->ibqp.qp_num, qp->remote_qpn, wc->status);
971 1002
972 spin_lock(&dev->pending_lock); 1003 spin_lock(&dev->pending_lock);
@@ -984,12 +1015,12 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
984 wc->status = IB_WC_WR_FLUSH_ERR; 1015 wc->status = IB_WC_WR_FLUSH_ERR;
985 1016
986 while (qp->s_last != qp->s_head) { 1017 while (qp->s_last != qp->s_head) {
1018 wqe = get_swqe_ptr(qp, qp->s_last);
987 wc->wr_id = wqe->wr.wr_id; 1019 wc->wr_id = wqe->wr.wr_id;
988 wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; 1020 wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
989 ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1); 1021 ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1);
990 if (++qp->s_last >= qp->s_size) 1022 if (++qp->s_last >= qp->s_size)
991 qp->s_last = 0; 1023 qp->s_last = 0;
992 wqe = get_swqe_ptr(qp, qp->s_last);
993 } 1024 }
994 qp->s_cur = qp->s_tail = qp->s_head; 1025 qp->s_cur = qp->s_tail = qp->s_head;
995 qp->state = IB_QPS_SQE; 1026 qp->state = IB_QPS_SQE;
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index 5ff20cb04494..b4b88d0b53f5 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -37,6 +37,19 @@
37/* cut down ridiculously long IB macro names */ 37/* cut down ridiculously long IB macro names */
38#define OP(x) IB_OPCODE_RC_##x 38#define OP(x) IB_OPCODE_RC_##x
39 39
40static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe,
41 u32 psn, u32 pmtu)
42{
43 u32 len;
44
45 len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
46 ss->sge = wqe->sg_list[0];
47 ss->sg_list = wqe->sg_list + 1;
48 ss->num_sge = wqe->wr.num_sge;
49 ipath_skip_sge(ss, len);
50 return wqe->length - len;
51}
52
40/** 53/**
41 * ipath_init_restart- initialize the qp->s_sge after a restart 54 * ipath_init_restart- initialize the qp->s_sge after a restart
42 * @qp: the QP who's SGE we're restarting 55 * @qp: the QP who's SGE we're restarting
@@ -47,15 +60,9 @@
47static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe) 60static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
48{ 61{
49 struct ipath_ibdev *dev; 62 struct ipath_ibdev *dev;
50 u32 len;
51 63
52 len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * 64 qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn,
53 ib_mtu_enum_to_int(qp->path_mtu); 65 ib_mtu_enum_to_int(qp->path_mtu));
54 qp->s_sge.sge = wqe->sg_list[0];
55 qp->s_sge.sg_list = wqe->sg_list + 1;
56 qp->s_sge.num_sge = wqe->wr.num_sge;
57 ipath_skip_sge(&qp->s_sge, len);
58 qp->s_len = wqe->length - len;
59 dev = to_idev(qp->ibqp.device); 66 dev = to_idev(qp->ibqp.device);
60 spin_lock(&dev->pending_lock); 67 spin_lock(&dev->pending_lock);
61 if (list_empty(&qp->timerwait)) 68 if (list_empty(&qp->timerwait))
@@ -70,107 +77,123 @@ static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
70 * @ohdr: a pointer to the IB header being constructed 77 * @ohdr: a pointer to the IB header being constructed
71 * @pmtu: the path MTU 78 * @pmtu: the path MTU
72 * 79 *
73 * Return bth0 if constructed; otherwise, return 0. 80 * Return 1 if constructed; otherwise, return 0.
81 * Note that we are in the responder's side of the QP context.
74 * Note the QP s_lock must be held. 82 * Note the QP s_lock must be held.
75 */ 83 */
76u32 ipath_make_rc_ack(struct ipath_qp *qp, 84static int ipath_make_rc_ack(struct ipath_qp *qp,
77 struct ipath_other_headers *ohdr, 85 struct ipath_other_headers *ohdr,
78 u32 pmtu) 86 u32 pmtu, u32 *bth0p, u32 *bth2p)
79{ 87{
88 struct ipath_ack_entry *e;
80 u32 hwords; 89 u32 hwords;
81 u32 len; 90 u32 len;
82 u32 bth0; 91 u32 bth0;
92 u32 bth2;
83 93
84 /* header size in 32-bit words LRH+BTH = (8+12)/4. */ 94 /* header size in 32-bit words LRH+BTH = (8+12)/4. */
85 hwords = 5; 95 hwords = 5;
86 96
87 /*
88 * Send a response. Note that we are in the responder's
89 * side of the QP context.
90 */
91 switch (qp->s_ack_state) { 97 switch (qp->s_ack_state) {
92 case OP(RDMA_READ_REQUEST): 98 case OP(RDMA_READ_RESPONSE_LAST):
93 qp->s_cur_sge = &qp->s_rdma_sge; 99 case OP(RDMA_READ_RESPONSE_ONLY):
94 len = qp->s_rdma_len; 100 case OP(ATOMIC_ACKNOWLEDGE):
95 if (len > pmtu) { 101 qp->s_ack_state = OP(ACKNOWLEDGE);
96 len = pmtu; 102 /* FALLTHROUGH */
97 qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); 103 case OP(ACKNOWLEDGE):
98 } else 104 /* Check for no next entry in the queue. */
99 qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY); 105 if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
100 qp->s_rdma_len -= len; 106 if (qp->s_flags & IPATH_S_ACK_PENDING)
107 goto normal;
108 goto bail;
109 }
110
111 e = &qp->s_ack_queue[qp->s_tail_ack_queue];
112 if (e->opcode == OP(RDMA_READ_REQUEST)) {
113 /* Copy SGE state in case we need to resend */
114 qp->s_ack_rdma_sge = e->rdma_sge;
115 qp->s_cur_sge = &qp->s_ack_rdma_sge;
116 len = e->rdma_sge.sge.sge_length;
117 if (len > pmtu) {
118 len = pmtu;
119 qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
120 } else {
121 qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
122 if (++qp->s_tail_ack_queue >
123 IPATH_MAX_RDMA_ATOMIC)
124 qp->s_tail_ack_queue = 0;
125 }
126 ohdr->u.aeth = ipath_compute_aeth(qp);
127 hwords++;
128 qp->s_ack_rdma_psn = e->psn;
129 bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
130 } else {
131 /* COMPARE_SWAP or FETCH_ADD */
132 qp->s_cur_sge = NULL;
133 len = 0;
134 qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
135 ohdr->u.at.aeth = ipath_compute_aeth(qp);
136 ohdr->u.at.atomic_ack_eth[0] =
137 cpu_to_be32(e->atomic_data >> 32);
138 ohdr->u.at.atomic_ack_eth[1] =
139 cpu_to_be32(e->atomic_data);
140 hwords += sizeof(ohdr->u.at) / sizeof(u32);
141 bth2 = e->psn;
142 if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
143 qp->s_tail_ack_queue = 0;
144 }
101 bth0 = qp->s_ack_state << 24; 145 bth0 = qp->s_ack_state << 24;
102 ohdr->u.aeth = ipath_compute_aeth(qp);
103 hwords++;
104 break; 146 break;
105 147
106 case OP(RDMA_READ_RESPONSE_FIRST): 148 case OP(RDMA_READ_RESPONSE_FIRST):
107 qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); 149 qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
108 /* FALLTHROUGH */ 150 /* FALLTHROUGH */
109 case OP(RDMA_READ_RESPONSE_MIDDLE): 151 case OP(RDMA_READ_RESPONSE_MIDDLE):
110 qp->s_cur_sge = &qp->s_rdma_sge; 152 len = qp->s_ack_rdma_sge.sge.sge_length;
111 len = qp->s_rdma_len;
112 if (len > pmtu) 153 if (len > pmtu)
113 len = pmtu; 154 len = pmtu;
114 else { 155 else {
115 ohdr->u.aeth = ipath_compute_aeth(qp); 156 ohdr->u.aeth = ipath_compute_aeth(qp);
116 hwords++; 157 hwords++;
117 qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); 158 qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
159 if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
160 qp->s_tail_ack_queue = 0;
118 } 161 }
119 qp->s_rdma_len -= len;
120 bth0 = qp->s_ack_state << 24; 162 bth0 = qp->s_ack_state << 24;
121 break; 163 bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
122
123 case OP(RDMA_READ_RESPONSE_LAST):
124 case OP(RDMA_READ_RESPONSE_ONLY):
125 /*
126 * We have to prevent new requests from changing
127 * the r_sge state while a ipath_verbs_send()
128 * is in progress.
129 */
130 qp->s_ack_state = OP(ACKNOWLEDGE);
131 bth0 = 0;
132 goto bail;
133
134 case OP(COMPARE_SWAP):
135 case OP(FETCH_ADD):
136 qp->s_cur_sge = NULL;
137 len = 0;
138 /*
139 * Set the s_ack_state so the receive interrupt handler
140 * won't try to send an ACK (out of order) until this one
141 * is actually sent.
142 */
143 qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
144 bth0 = OP(ATOMIC_ACKNOWLEDGE) << 24;
145 ohdr->u.at.aeth = ipath_compute_aeth(qp);
146 ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data);
147 hwords += sizeof(ohdr->u.at) / 4;
148 break; 164 break;
149 165
150 default: 166 default:
151 /* Send a regular ACK. */ 167 normal:
152 qp->s_cur_sge = NULL;
153 len = 0;
154 /* 168 /*
155 * Set the s_ack_state so the receive interrupt handler 169 * Send a regular ACK.
156 * won't try to send an ACK (out of order) until this one 170 * Set the s_ack_state so we wait until after sending
157 * is actually sent. 171 * the ACK before setting s_ack_state to ACKNOWLEDGE
172 * (see above).
158 */ 173 */
159 qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); 174 qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
160 bth0 = OP(ACKNOWLEDGE) << 24; 175 qp->s_flags &= ~IPATH_S_ACK_PENDING;
176 qp->s_cur_sge = NULL;
161 if (qp->s_nak_state) 177 if (qp->s_nak_state)
162 ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | 178 ohdr->u.aeth =
163 (qp->s_nak_state << 179 cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
164 IPATH_AETH_CREDIT_SHIFT)); 180 (qp->s_nak_state <<
181 IPATH_AETH_CREDIT_SHIFT));
165 else 182 else
166 ohdr->u.aeth = ipath_compute_aeth(qp); 183 ohdr->u.aeth = ipath_compute_aeth(qp);
167 hwords++; 184 hwords++;
185 len = 0;
186 bth0 = OP(ACKNOWLEDGE) << 24;
187 bth2 = qp->s_ack_psn & IPATH_PSN_MASK;
168 } 188 }
169 qp->s_hdrwords = hwords; 189 qp->s_hdrwords = hwords;
170 qp->s_cur_size = len; 190 qp->s_cur_size = len;
191 *bth0p = bth0;
192 *bth2p = bth2;
193 return 1;
171 194
172bail: 195bail:
173 return bth0; 196 return 0;
174} 197}
175 198
176/** 199/**
@@ -197,9 +220,16 @@ int ipath_make_rc_req(struct ipath_qp *qp,
197 u32 bth2; 220 u32 bth2;
198 char newreq; 221 char newreq;
199 222
223 /* Sending responses has higher priority over sending requests. */
224 if ((qp->r_head_ack_queue != qp->s_tail_ack_queue ||
225 (qp->s_flags & IPATH_S_ACK_PENDING) ||
226 qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE) &&
227 ipath_make_rc_ack(qp, ohdr, pmtu, bth0p, bth2p))
228 goto done;
229
200 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) || 230 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) ||
201 qp->s_rnr_timeout) 231 qp->s_rnr_timeout)
202 goto done; 232 goto bail;
203 233
204 /* Limit the number of packets sent without an ACK. */ 234 /* Limit the number of packets sent without an ACK. */
205 if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) { 235 if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) {
@@ -210,7 +240,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
210 list_add_tail(&qp->timerwait, 240 list_add_tail(&qp->timerwait,
211 &dev->pending[dev->pending_index]); 241 &dev->pending[dev->pending_index]);
212 spin_unlock(&dev->pending_lock); 242 spin_unlock(&dev->pending_lock);
213 goto done; 243 goto bail;
214 } 244 }
215 245
216 /* header size in 32-bit words LRH+BTH = (8+12)/4. */ 246 /* header size in 32-bit words LRH+BTH = (8+12)/4. */
@@ -232,7 +262,16 @@ int ipath_make_rc_req(struct ipath_qp *qp,
232 if (qp->s_cur == qp->s_tail) { 262 if (qp->s_cur == qp->s_tail) {
233 /* Check if send work queue is empty. */ 263 /* Check if send work queue is empty. */
234 if (qp->s_tail == qp->s_head) 264 if (qp->s_tail == qp->s_head)
235 goto done; 265 goto bail;
266 /*
267 * If a fence is requested, wait for previous
268 * RDMA read and atomic operations to finish.
269 */
270 if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
271 qp->s_num_rd_atomic) {
272 qp->s_flags |= IPATH_S_FENCE_PENDING;
273 goto bail;
274 }
236 wqe->psn = qp->s_next_psn; 275 wqe->psn = qp->s_next_psn;
237 newreq = 1; 276 newreq = 1;
238 } 277 }
@@ -250,7 +289,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
250 /* If no credit, return. */ 289 /* If no credit, return. */
251 if (qp->s_lsn != (u32) -1 && 290 if (qp->s_lsn != (u32) -1 &&
252 ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) 291 ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
253 goto done; 292 goto bail;
254 wqe->lpsn = wqe->psn; 293 wqe->lpsn = wqe->psn;
255 if (len > pmtu) { 294 if (len > pmtu) {
256 wqe->lpsn += (len - 1) / pmtu; 295 wqe->lpsn += (len - 1) / pmtu;
@@ -281,13 +320,13 @@ int ipath_make_rc_req(struct ipath_qp *qp,
281 /* If no credit, return. */ 320 /* If no credit, return. */
282 if (qp->s_lsn != (u32) -1 && 321 if (qp->s_lsn != (u32) -1 &&
283 ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) 322 ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
284 goto done; 323 goto bail;
285 ohdr->u.rc.reth.vaddr = 324 ohdr->u.rc.reth.vaddr =
286 cpu_to_be64(wqe->wr.wr.rdma.remote_addr); 325 cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
287 ohdr->u.rc.reth.rkey = 326 ohdr->u.rc.reth.rkey =
288 cpu_to_be32(wqe->wr.wr.rdma.rkey); 327 cpu_to_be32(wqe->wr.wr.rdma.rkey);
289 ohdr->u.rc.reth.length = cpu_to_be32(len); 328 ohdr->u.rc.reth.length = cpu_to_be32(len);
290 hwords += sizeof(struct ib_reth) / 4; 329 hwords += sizeof(struct ib_reth) / sizeof(u32);
291 wqe->lpsn = wqe->psn; 330 wqe->lpsn = wqe->psn;
292 if (len > pmtu) { 331 if (len > pmtu) {
293 wqe->lpsn += (len - 1) / pmtu; 332 wqe->lpsn += (len - 1) / pmtu;
@@ -312,14 +351,17 @@ int ipath_make_rc_req(struct ipath_qp *qp,
312 break; 351 break;
313 352
314 case IB_WR_RDMA_READ: 353 case IB_WR_RDMA_READ:
315 ohdr->u.rc.reth.vaddr = 354 /*
316 cpu_to_be64(wqe->wr.wr.rdma.remote_addr); 355 * Don't allow more operations to be started
317 ohdr->u.rc.reth.rkey = 356 * than the QP limits allow.
318 cpu_to_be32(wqe->wr.wr.rdma.rkey); 357 */
319 ohdr->u.rc.reth.length = cpu_to_be32(len);
320 qp->s_state = OP(RDMA_READ_REQUEST);
321 hwords += sizeof(ohdr->u.rc.reth) / 4;
322 if (newreq) { 358 if (newreq) {
359 if (qp->s_num_rd_atomic >=
360 qp->s_max_rd_atomic) {
361 qp->s_flags |= IPATH_S_RDMAR_PENDING;
362 goto bail;
363 }
364 qp->s_num_rd_atomic++;
323 if (qp->s_lsn != (u32) -1) 365 if (qp->s_lsn != (u32) -1)
324 qp->s_lsn++; 366 qp->s_lsn++;
325 /* 367 /*
@@ -330,6 +372,13 @@ int ipath_make_rc_req(struct ipath_qp *qp,
330 qp->s_next_psn += (len - 1) / pmtu; 372 qp->s_next_psn += (len - 1) / pmtu;
331 wqe->lpsn = qp->s_next_psn++; 373 wqe->lpsn = qp->s_next_psn++;
332 } 374 }
375 ohdr->u.rc.reth.vaddr =
376 cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
377 ohdr->u.rc.reth.rkey =
378 cpu_to_be32(wqe->wr.wr.rdma.rkey);
379 ohdr->u.rc.reth.length = cpu_to_be32(len);
380 qp->s_state = OP(RDMA_READ_REQUEST);
381 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
333 ss = NULL; 382 ss = NULL;
334 len = 0; 383 len = 0;
335 if (++qp->s_cur == qp->s_size) 384 if (++qp->s_cur == qp->s_size)
@@ -338,32 +387,48 @@ int ipath_make_rc_req(struct ipath_qp *qp,
338 387
339 case IB_WR_ATOMIC_CMP_AND_SWP: 388 case IB_WR_ATOMIC_CMP_AND_SWP:
340 case IB_WR_ATOMIC_FETCH_AND_ADD: 389 case IB_WR_ATOMIC_FETCH_AND_ADD:
341 if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) 390 /*
342 qp->s_state = OP(COMPARE_SWAP); 391 * Don't allow more operations to be started
343 else 392 * than the QP limits allow.
344 qp->s_state = OP(FETCH_ADD); 393 */
345 ohdr->u.atomic_eth.vaddr = cpu_to_be64(
346 wqe->wr.wr.atomic.remote_addr);
347 ohdr->u.atomic_eth.rkey = cpu_to_be32(
348 wqe->wr.wr.atomic.rkey);
349 ohdr->u.atomic_eth.swap_data = cpu_to_be64(
350 wqe->wr.wr.atomic.swap);
351 ohdr->u.atomic_eth.compare_data = cpu_to_be64(
352 wqe->wr.wr.atomic.compare_add);
353 hwords += sizeof(struct ib_atomic_eth) / 4;
354 if (newreq) { 394 if (newreq) {
395 if (qp->s_num_rd_atomic >=
396 qp->s_max_rd_atomic) {
397 qp->s_flags |= IPATH_S_RDMAR_PENDING;
398 goto bail;
399 }
400 qp->s_num_rd_atomic++;
355 if (qp->s_lsn != (u32) -1) 401 if (qp->s_lsn != (u32) -1)
356 qp->s_lsn++; 402 qp->s_lsn++;
357 wqe->lpsn = wqe->psn; 403 wqe->lpsn = wqe->psn;
358 } 404 }
359 if (++qp->s_cur == qp->s_size) 405 if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
360 qp->s_cur = 0; 406 qp->s_state = OP(COMPARE_SWAP);
407 ohdr->u.atomic_eth.swap_data = cpu_to_be64(
408 wqe->wr.wr.atomic.swap);
409 ohdr->u.atomic_eth.compare_data = cpu_to_be64(
410 wqe->wr.wr.atomic.compare_add);
411 } else {
412 qp->s_state = OP(FETCH_ADD);
413 ohdr->u.atomic_eth.swap_data = cpu_to_be64(
414 wqe->wr.wr.atomic.compare_add);
415 ohdr->u.atomic_eth.compare_data = 0;
416 }
417 ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
418 wqe->wr.wr.atomic.remote_addr >> 32);
419 ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
420 wqe->wr.wr.atomic.remote_addr);
421 ohdr->u.atomic_eth.rkey = cpu_to_be32(
422 wqe->wr.wr.atomic.rkey);
423 hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
361 ss = NULL; 424 ss = NULL;
362 len = 0; 425 len = 0;
426 if (++qp->s_cur == qp->s_size)
427 qp->s_cur = 0;
363 break; 428 break;
364 429
365 default: 430 default:
366 goto done; 431 goto bail;
367 } 432 }
368 qp->s_sge.sge = wqe->sg_list[0]; 433 qp->s_sge.sge = wqe->sg_list[0];
369 qp->s_sge.sg_list = wqe->sg_list + 1; 434 qp->s_sge.sg_list = wqe->sg_list + 1;
@@ -379,7 +444,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
379 qp->s_psn = wqe->lpsn + 1; 444 qp->s_psn = wqe->lpsn + 1;
380 else { 445 else {
381 qp->s_psn++; 446 qp->s_psn++;
382 if ((int)(qp->s_psn - qp->s_next_psn) > 0) 447 if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
383 qp->s_next_psn = qp->s_psn; 448 qp->s_next_psn = qp->s_psn;
384 } 449 }
385 /* 450 /*
@@ -406,7 +471,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
406 /* FALLTHROUGH */ 471 /* FALLTHROUGH */
407 case OP(SEND_MIDDLE): 472 case OP(SEND_MIDDLE):
408 bth2 = qp->s_psn++ & IPATH_PSN_MASK; 473 bth2 = qp->s_psn++ & IPATH_PSN_MASK;
409 if ((int)(qp->s_psn - qp->s_next_psn) > 0) 474 if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
410 qp->s_next_psn = qp->s_psn; 475 qp->s_next_psn = qp->s_psn;
411 ss = &qp->s_sge; 476 ss = &qp->s_sge;
412 len = qp->s_len; 477 len = qp->s_len;
@@ -442,7 +507,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
442 /* FALLTHROUGH */ 507 /* FALLTHROUGH */
443 case OP(RDMA_WRITE_MIDDLE): 508 case OP(RDMA_WRITE_MIDDLE):
444 bth2 = qp->s_psn++ & IPATH_PSN_MASK; 509 bth2 = qp->s_psn++ & IPATH_PSN_MASK;
445 if ((int)(qp->s_psn - qp->s_next_psn) > 0) 510 if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
446 qp->s_next_psn = qp->s_psn; 511 qp->s_next_psn = qp->s_psn;
447 ss = &qp->s_sge; 512 ss = &qp->s_sge;
448 len = qp->s_len; 513 len = qp->s_len;
@@ -479,9 +544,9 @@ int ipath_make_rc_req(struct ipath_qp *qp,
479 cpu_to_be32(wqe->wr.wr.rdma.rkey); 544 cpu_to_be32(wqe->wr.wr.rdma.rkey);
480 ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len); 545 ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);
481 qp->s_state = OP(RDMA_READ_REQUEST); 546 qp->s_state = OP(RDMA_READ_REQUEST);
482 hwords += sizeof(ohdr->u.rc.reth) / 4; 547 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
483 bth2 = qp->s_psn++ & IPATH_PSN_MASK; 548 bth2 = qp->s_psn++ & IPATH_PSN_MASK;
484 if ((int)(qp->s_psn - qp->s_next_psn) > 0) 549 if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
485 qp->s_next_psn = qp->s_psn; 550 qp->s_next_psn = qp->s_psn;
486 ss = NULL; 551 ss = NULL;
487 len = 0; 552 len = 0;
@@ -489,20 +554,6 @@ int ipath_make_rc_req(struct ipath_qp *qp,
489 if (qp->s_cur == qp->s_size) 554 if (qp->s_cur == qp->s_size)
490 qp->s_cur = 0; 555 qp->s_cur = 0;
491 break; 556 break;
492
493 case OP(RDMA_READ_REQUEST):
494 case OP(COMPARE_SWAP):
495 case OP(FETCH_ADD):
496 /*
497 * We shouldn't start anything new until this request is
498 * finished. The ACK will handle rescheduling us. XXX The
499 * number of outstanding ones is negotiated at connection
500 * setup time (see pg. 258,289)? XXX Also, if we support
501 * multiple outstanding requests, we need to check the WQE
502 * IB_SEND_FENCE flag and not send a new request if a RDMA
503 * read or atomic is pending.
504 */
505 goto done;
506 } 557 }
507 if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0) 558 if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0)
508 bth2 |= 1 << 31; /* Request ACK. */ 559 bth2 |= 1 << 31; /* Request ACK. */
@@ -512,9 +563,10 @@ int ipath_make_rc_req(struct ipath_qp *qp,
512 qp->s_cur_size = len; 563 qp->s_cur_size = len;
513 *bth0p = bth0 | (qp->s_state << 24); 564 *bth0p = bth0 | (qp->s_state << 24);
514 *bth2p = bth2; 565 *bth2p = bth2;
566done:
515 return 1; 567 return 1;
516 568
517done: 569bail:
518 return 0; 570 return 0;
519} 571}
520 572
@@ -524,7 +576,8 @@ done:
524 * 576 *
525 * This is called from ipath_rc_rcv() and only uses the receive 577 * This is called from ipath_rc_rcv() and only uses the receive
526 * side QP state. 578 * side QP state.
527 * Note that RDMA reads are handled in the send side QP state and tasklet. 579 * Note that RDMA reads and atomics are handled in the
580 * send side QP state and tasklet.
528 */ 581 */
529static void send_rc_ack(struct ipath_qp *qp) 582static void send_rc_ack(struct ipath_qp *qp)
530{ 583{
@@ -535,6 +588,10 @@ static void send_rc_ack(struct ipath_qp *qp)
535 struct ipath_ib_header hdr; 588 struct ipath_ib_header hdr;
536 struct ipath_other_headers *ohdr; 589 struct ipath_other_headers *ohdr;
537 590
591 /* Don't send ACK or NAK if a RDMA read or atomic is pending. */
592 if (qp->r_head_ack_queue != qp->s_tail_ack_queue)
593 goto queue_ack;
594
538 /* Construct the header. */ 595 /* Construct the header. */
539 ohdr = &hdr.u.oth; 596 ohdr = &hdr.u.oth;
540 lrh0 = IPATH_LRH_BTH; 597 lrh0 = IPATH_LRH_BTH;
@@ -548,19 +605,14 @@ static void send_rc_ack(struct ipath_qp *qp)
548 lrh0 = IPATH_LRH_GRH; 605 lrh0 = IPATH_LRH_GRH;
549 } 606 }
550 /* read pkey_index w/o lock (its atomic) */ 607 /* read pkey_index w/o lock (its atomic) */
551 bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index); 608 bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index) |
609 OP(ACKNOWLEDGE) << 24;
552 if (qp->r_nak_state) 610 if (qp->r_nak_state)
553 ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | 611 ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
554 (qp->r_nak_state << 612 (qp->r_nak_state <<
555 IPATH_AETH_CREDIT_SHIFT)); 613 IPATH_AETH_CREDIT_SHIFT));
556 else 614 else
557 ohdr->u.aeth = ipath_compute_aeth(qp); 615 ohdr->u.aeth = ipath_compute_aeth(qp);
558 if (qp->r_ack_state >= OP(COMPARE_SWAP)) {
559 bth0 |= OP(ATOMIC_ACKNOWLEDGE) << 24;
560 ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data);
561 hwords += sizeof(ohdr->u.at.atomic_ack_eth) / 4;
562 } else
563 bth0 |= OP(ACKNOWLEDGE) << 24;
564 lrh0 |= qp->remote_ah_attr.sl << 4; 616 lrh0 |= qp->remote_ah_attr.sl << 4;
565 hdr.lrh[0] = cpu_to_be16(lrh0); 617 hdr.lrh[0] = cpu_to_be16(lrh0);
566 hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); 618 hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
@@ -574,31 +626,31 @@ static void send_rc_ack(struct ipath_qp *qp)
574 * If we can send the ACK, clear the ACK state. 626 * If we can send the ACK, clear the ACK state.
575 */ 627 */
576 if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) { 628 if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) {
577 qp->r_ack_state = OP(ACKNOWLEDGE);
578 dev->n_unicast_xmit++; 629 dev->n_unicast_xmit++;
579 } else { 630 goto done;
580 /*
581 * We are out of PIO buffers at the moment.
582 * Pass responsibility for sending the ACK to the
583 * send tasklet so that when a PIO buffer becomes
584 * available, the ACK is sent ahead of other outgoing
585 * packets.
586 */
587 dev->n_rc_qacks++;
588 spin_lock_irq(&qp->s_lock);
589 /* Don't coalesce if a RDMA read or atomic is pending. */
590 if (qp->s_ack_state == OP(ACKNOWLEDGE) ||
591 qp->s_ack_state < OP(RDMA_READ_REQUEST)) {
592 qp->s_ack_state = qp->r_ack_state;
593 qp->s_nak_state = qp->r_nak_state;
594 qp->s_ack_psn = qp->r_ack_psn;
595 qp->r_ack_state = OP(ACKNOWLEDGE);
596 }
597 spin_unlock_irq(&qp->s_lock);
598
599 /* Call ipath_do_rc_send() in another thread. */
600 tasklet_hi_schedule(&qp->s_task);
601 } 631 }
632
633 /*
634 * We are out of PIO buffers at the moment.
635 * Pass responsibility for sending the ACK to the
636 * send tasklet so that when a PIO buffer becomes
637 * available, the ACK is sent ahead of other outgoing
638 * packets.
639 */
640 dev->n_rc_qacks++;
641
642queue_ack:
643 spin_lock_irq(&qp->s_lock);
644 qp->s_flags |= IPATH_S_ACK_PENDING;
645 qp->s_nak_state = qp->r_nak_state;
646 qp->s_ack_psn = qp->r_ack_psn;
647 spin_unlock_irq(&qp->s_lock);
648
649 /* Call ipath_do_rc_send() in another thread. */
650 tasklet_hi_schedule(&qp->s_task);
651
652done:
653 return;
602} 654}
603 655
604/** 656/**
@@ -727,7 +779,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
727 if (wqe->wr.opcode == IB_WR_RDMA_READ) 779 if (wqe->wr.opcode == IB_WR_RDMA_READ)
728 dev->n_rc_resends++; 780 dev->n_rc_resends++;
729 else 781 else
730 dev->n_rc_resends += (int)qp->s_psn - (int)psn; 782 dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK;
731 783
732 reset_psn(qp, psn); 784 reset_psn(qp, psn);
733 tasklet_hi_schedule(&qp->s_task); 785 tasklet_hi_schedule(&qp->s_task);
@@ -775,10 +827,6 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
775 list_del_init(&qp->timerwait); 827 list_del_init(&qp->timerwait);
776 spin_unlock(&dev->pending_lock); 828 spin_unlock(&dev->pending_lock);
777 829
778 /* Nothing is pending to ACK/NAK. */
779 if (unlikely(qp->s_last == qp->s_tail))
780 goto bail;
781
782 /* 830 /*
783 * Note that NAKs implicitly ACK outstanding SEND and RDMA write 831 * Note that NAKs implicitly ACK outstanding SEND and RDMA write
784 * requests and implicitly NAK RDMA read and atomic requests issued 832 * requests and implicitly NAK RDMA read and atomic requests issued
@@ -806,7 +854,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
806 */ 854 */
807 if ((wqe->wr.opcode == IB_WR_RDMA_READ && 855 if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
808 (opcode != OP(RDMA_READ_RESPONSE_LAST) || 856 (opcode != OP(RDMA_READ_RESPONSE_LAST) ||
809 ipath_cmp24(ack_psn, wqe->lpsn) != 0)) || 857 ipath_cmp24(ack_psn, wqe->lpsn) != 0)) ||
810 ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || 858 ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
811 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && 859 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
812 (opcode != OP(ATOMIC_ACKNOWLEDGE) || 860 (opcode != OP(ATOMIC_ACKNOWLEDGE) ||
@@ -824,20 +872,33 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
824 */ 872 */
825 goto bail; 873 goto bail;
826 } 874 }
827 if (wqe->wr.opcode == IB_WR_RDMA_READ || 875 if (qp->s_num_rd_atomic &&
828 wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || 876 (wqe->wr.opcode == IB_WR_RDMA_READ ||
829 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) 877 wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
830 tasklet_hi_schedule(&qp->s_task); 878 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
879 qp->s_num_rd_atomic--;
880 /* Restart sending task if fence is complete */
881 if ((qp->s_flags & IPATH_S_FENCE_PENDING) &&
882 !qp->s_num_rd_atomic) {
883 qp->s_flags &= ~IPATH_S_FENCE_PENDING;
884 tasklet_hi_schedule(&qp->s_task);
885 } else if (qp->s_flags & IPATH_S_RDMAR_PENDING) {
886 qp->s_flags &= ~IPATH_S_RDMAR_PENDING;
887 tasklet_hi_schedule(&qp->s_task);
888 }
889 }
831 /* Post a send completion queue entry if requested. */ 890 /* Post a send completion queue entry if requested. */
832 if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) || 891 if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
833 (wqe->wr.send_flags & IB_SEND_SIGNALED)) { 892 (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
834 wc.wr_id = wqe->wr.wr_id; 893 wc.wr_id = wqe->wr.wr_id;
835 wc.status = IB_WC_SUCCESS; 894 wc.status = IB_WC_SUCCESS;
836 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; 895 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
837 wc.vendor_err = 0; 896 wc.vendor_err = 0;
838 wc.byte_len = wqe->length; 897 wc.byte_len = wqe->length;
898 wc.imm_data = 0;
839 wc.qp = &qp->ibqp; 899 wc.qp = &qp->ibqp;
840 wc.src_qp = qp->remote_qpn; 900 wc.src_qp = qp->remote_qpn;
901 wc.wc_flags = 0;
841 wc.pkey_index = 0; 902 wc.pkey_index = 0;
842 wc.slid = qp->remote_ah_attr.dlid; 903 wc.slid = qp->remote_ah_attr.dlid;
843 wc.sl = qp->remote_ah_attr.sl; 904 wc.sl = qp->remote_ah_attr.sl;
@@ -854,15 +915,19 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
854 if (qp->s_last == qp->s_cur) { 915 if (qp->s_last == qp->s_cur) {
855 if (++qp->s_cur >= qp->s_size) 916 if (++qp->s_cur >= qp->s_size)
856 qp->s_cur = 0; 917 qp->s_cur = 0;
918 qp->s_last = qp->s_cur;
919 if (qp->s_last == qp->s_tail)
920 break;
857 wqe = get_swqe_ptr(qp, qp->s_cur); 921 wqe = get_swqe_ptr(qp, qp->s_cur);
858 qp->s_state = OP(SEND_LAST); 922 qp->s_state = OP(SEND_LAST);
859 qp->s_psn = wqe->psn; 923 qp->s_psn = wqe->psn;
924 } else {
925 if (++qp->s_last >= qp->s_size)
926 qp->s_last = 0;
927 if (qp->s_last == qp->s_tail)
928 break;
929 wqe = get_swqe_ptr(qp, qp->s_last);
860 } 930 }
861 if (++qp->s_last >= qp->s_size)
862 qp->s_last = 0;
863 wqe = get_swqe_ptr(qp, qp->s_last);
864 if (qp->s_last == qp->s_tail)
865 break;
866 } 931 }
867 932
868 switch (aeth >> 29) { 933 switch (aeth >> 29) {
@@ -874,6 +939,18 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
874 list_add_tail(&qp->timerwait, 939 list_add_tail(&qp->timerwait,
875 &dev->pending[dev->pending_index]); 940 &dev->pending[dev->pending_index]);
876 spin_unlock(&dev->pending_lock); 941 spin_unlock(&dev->pending_lock);
942 /*
943 * If we get a partial ACK for a resent operation,
944 * we can stop resending the earlier packets and
945 * continue with the next packet the receiver wants.
946 */
947 if (ipath_cmp24(qp->s_psn, psn) <= 0) {
948 reset_psn(qp, psn + 1);
949 tasklet_hi_schedule(&qp->s_task);
950 }
951 } else if (ipath_cmp24(qp->s_psn, psn) <= 0) {
952 qp->s_state = OP(SEND_LAST);
953 qp->s_psn = psn + 1;
877 } 954 }
878 ipath_get_credit(qp, aeth); 955 ipath_get_credit(qp, aeth);
879 qp->s_rnr_retry = qp->s_rnr_retry_cnt; 956 qp->s_rnr_retry = qp->s_rnr_retry_cnt;
@@ -884,22 +961,23 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
884 961
885 case 1: /* RNR NAK */ 962 case 1: /* RNR NAK */
886 dev->n_rnr_naks++; 963 dev->n_rnr_naks++;
964 if (qp->s_last == qp->s_tail)
965 goto bail;
887 if (qp->s_rnr_retry == 0) { 966 if (qp->s_rnr_retry == 0) {
888 if (qp->s_last == qp->s_tail)
889 goto bail;
890
891 wc.status = IB_WC_RNR_RETRY_EXC_ERR; 967 wc.status = IB_WC_RNR_RETRY_EXC_ERR;
892 goto class_b; 968 goto class_b;
893 } 969 }
894 if (qp->s_rnr_retry_cnt < 7) 970 if (qp->s_rnr_retry_cnt < 7)
895 qp->s_rnr_retry--; 971 qp->s_rnr_retry--;
896 if (qp->s_last == qp->s_tail)
897 goto bail;
898 972
899 /* The last valid PSN is the previous PSN. */ 973 /* The last valid PSN is the previous PSN. */
900 update_last_psn(qp, psn - 1); 974 update_last_psn(qp, psn - 1);
901 975
902 dev->n_rc_resends += (int)qp->s_psn - (int)psn; 976 if (wqe->wr.opcode == IB_WR_RDMA_READ)
977 dev->n_rc_resends++;
978 else
979 dev->n_rc_resends +=
980 (qp->s_psn - psn) & IPATH_PSN_MASK;
903 981
904 reset_psn(qp, psn); 982 reset_psn(qp, psn);
905 983
@@ -910,26 +988,20 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
910 goto bail; 988 goto bail;
911 989
912 case 3: /* NAK */ 990 case 3: /* NAK */
913 /* The last valid PSN seen is the previous request's. */ 991 if (qp->s_last == qp->s_tail)
914 if (qp->s_last != qp->s_tail) 992 goto bail;
915 update_last_psn(qp, wqe->psn - 1); 993 /* The last valid PSN is the previous PSN. */
994 update_last_psn(qp, psn - 1);
916 switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) & 995 switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) &
917 IPATH_AETH_CREDIT_MASK) { 996 IPATH_AETH_CREDIT_MASK) {
918 case 0: /* PSN sequence error */ 997 case 0: /* PSN sequence error */
919 dev->n_seq_naks++; 998 dev->n_seq_naks++;
920 /* 999 /*
921 * Back up to the responder's expected PSN. XXX 1000 * Back up to the responder's expected PSN.
922 * Note that we might get a NAK in the middle of an 1001 * Note that we might get a NAK in the middle of an
923 * RDMA READ response which terminates the RDMA 1002 * RDMA READ response which terminates the RDMA
924 * READ. 1003 * READ.
925 */ 1004 */
926 if (qp->s_last == qp->s_tail)
927 break;
928
929 if (ipath_cmp24(psn, wqe->psn) < 0)
930 break;
931
932 /* Retry the request. */
933 ipath_restart_rc(qp, psn, &wc); 1005 ipath_restart_rc(qp, psn, &wc);
934 break; 1006 break;
935 1007
@@ -1003,6 +1075,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1003 u32 psn, u32 hdrsize, u32 pmtu, 1075 u32 psn, u32 hdrsize, u32 pmtu,
1004 int header_in_data) 1076 int header_in_data)
1005{ 1077{
1078 struct ipath_swqe *wqe;
1006 unsigned long flags; 1079 unsigned long flags;
1007 struct ib_wc wc; 1080 struct ib_wc wc;
1008 int diff; 1081 int diff;
@@ -1032,6 +1105,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1032 goto ack_done; 1105 goto ack_done;
1033 } 1106 }
1034 1107
1108 if (unlikely(qp->s_last == qp->s_tail))
1109 goto ack_done;
1110 wqe = get_swqe_ptr(qp, qp->s_last);
1111
1035 switch (opcode) { 1112 switch (opcode) {
1036 case OP(ACKNOWLEDGE): 1113 case OP(ACKNOWLEDGE):
1037 case OP(ATOMIC_ACKNOWLEDGE): 1114 case OP(ATOMIC_ACKNOWLEDGE):
@@ -1042,38 +1119,49 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1042 aeth = be32_to_cpu(((__be32 *) data)[0]); 1119 aeth = be32_to_cpu(((__be32 *) data)[0]);
1043 data += sizeof(__be32); 1120 data += sizeof(__be32);
1044 } 1121 }
1045 if (opcode == OP(ATOMIC_ACKNOWLEDGE)) 1122 if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
1046 *(u64 *) qp->s_sge.sge.vaddr = *(u64 *) data; 1123 u64 val;
1124
1125 if (!header_in_data) {
1126 __be32 *p = ohdr->u.at.atomic_ack_eth;
1127
1128 val = ((u64) be32_to_cpu(p[0]) << 32) |
1129 be32_to_cpu(p[1]);
1130 } else
1131 val = be64_to_cpu(((__be64 *) data)[0]);
1132 *(u64 *) wqe->sg_list[0].vaddr = val;
1133 }
1047 if (!do_rc_ack(qp, aeth, psn, opcode) || 1134 if (!do_rc_ack(qp, aeth, psn, opcode) ||
1048 opcode != OP(RDMA_READ_RESPONSE_FIRST)) 1135 opcode != OP(RDMA_READ_RESPONSE_FIRST))
1049 goto ack_done; 1136 goto ack_done;
1050 hdrsize += 4; 1137 hdrsize += 4;
1138 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1139 goto ack_op_err;
1051 /* 1140 /*
1052 * do_rc_ack() has already checked the PSN so skip 1141 * If this is a response to a resent RDMA read, we
1053 * the sequence check. 1142 * have to be careful to copy the data to the right
1143 * location.
1054 */ 1144 */
1055 goto rdma_read; 1145 qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
1146 wqe, psn, pmtu);
1147 goto read_middle;
1056 1148
1057 case OP(RDMA_READ_RESPONSE_MIDDLE): 1149 case OP(RDMA_READ_RESPONSE_MIDDLE):
1058 /* no AETH, no ACK */ 1150 /* no AETH, no ACK */
1059 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { 1151 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
1060 dev->n_rdma_seq++; 1152 dev->n_rdma_seq++;
1061 if (qp->s_last != qp->s_tail) 1153 ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
1062 ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
1063 goto ack_done; 1154 goto ack_done;
1064 } 1155 }
1065 rdma_read: 1156 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1066 if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST))) 1157 goto ack_op_err;
1067 goto ack_done; 1158 read_middle:
1068 if (unlikely(tlen != (hdrsize + pmtu + 4))) 1159 if (unlikely(tlen != (hdrsize + pmtu + 4)))
1069 goto ack_done; 1160 goto ack_len_err;
1070 if (unlikely(pmtu >= qp->s_len)) 1161 if (unlikely(pmtu >= qp->s_rdma_read_len))
1071 goto ack_done; 1162 goto ack_len_err;
1163
1072 /* We got a response so update the timeout. */ 1164 /* We got a response so update the timeout. */
1073 if (unlikely(qp->s_last == qp->s_tail ||
1074 get_swqe_ptr(qp, qp->s_last)->wr.opcode !=
1075 IB_WR_RDMA_READ))
1076 goto ack_done;
1077 spin_lock(&dev->pending_lock); 1165 spin_lock(&dev->pending_lock);
1078 if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait)) 1166 if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait))
1079 list_move_tail(&qp->timerwait, 1167 list_move_tail(&qp->timerwait,
@@ -1082,67 +1170,97 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1082 /* 1170 /*
1083 * Update the RDMA receive state but do the copy w/o 1171 * Update the RDMA receive state but do the copy w/o
1084 * holding the locks and blocking interrupts. 1172 * holding the locks and blocking interrupts.
1085 * XXX Yet another place that affects relaxed RDMA order
1086 * since we don't want s_sge modified.
1087 */ 1173 */
1088 qp->s_len -= pmtu; 1174 qp->s_rdma_read_len -= pmtu;
1089 update_last_psn(qp, psn); 1175 update_last_psn(qp, psn);
1090 spin_unlock_irqrestore(&qp->s_lock, flags); 1176 spin_unlock_irqrestore(&qp->s_lock, flags);
1091 ipath_copy_sge(&qp->s_sge, data, pmtu); 1177 ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu);
1092 goto bail; 1178 goto bail;
1093 1179
1094 case OP(RDMA_READ_RESPONSE_LAST): 1180 case OP(RDMA_READ_RESPONSE_ONLY):
1095 /* ACKs READ req. */
1096 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { 1181 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
1097 dev->n_rdma_seq++; 1182 dev->n_rdma_seq++;
1098 if (qp->s_last != qp->s_tail) 1183 ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
1099 ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
1100 goto ack_done; 1184 goto ack_done;
1101 } 1185 }
1102 /* FALLTHROUGH */ 1186 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1103 case OP(RDMA_READ_RESPONSE_ONLY): 1187 goto ack_op_err;
1104 if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST))) 1188 /* Get the number of bytes the message was padded by. */
1105 goto ack_done; 1189 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
1190 /*
1191 * Check that the data size is >= 0 && <= pmtu.
1192 * Remember to account for the AETH header (4) and
1193 * ICRC (4).
1194 */
1195 if (unlikely(tlen < (hdrsize + pad + 8)))
1196 goto ack_len_err;
1106 /* 1197 /*
1107 * Get the number of bytes the message was padded by. 1198 * If this is a response to a resent RDMA read, we
1199 * have to be careful to copy the data to the right
1200 * location.
1108 */ 1201 */
1202 qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
1203 wqe, psn, pmtu);
1204 goto read_last;
1205
1206 case OP(RDMA_READ_RESPONSE_LAST):
1207 /* ACKs READ req. */
1208 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
1209 dev->n_rdma_seq++;
1210 ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
1211 goto ack_done;
1212 }
1213 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1214 goto ack_op_err;
1215 /* Get the number of bytes the message was padded by. */
1109 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; 1216 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
1110 /* 1217 /*
1111 * Check that the data size is >= 1 && <= pmtu. 1218 * Check that the data size is >= 1 && <= pmtu.
1112 * Remember to account for the AETH header (4) and 1219 * Remember to account for the AETH header (4) and
1113 * ICRC (4). 1220 * ICRC (4).
1114 */ 1221 */
1115 if (unlikely(tlen <= (hdrsize + pad + 8))) { 1222 if (unlikely(tlen <= (hdrsize + pad + 8)))
1116 /* XXX Need to generate an error CQ entry. */ 1223 goto ack_len_err;
1117 goto ack_done; 1224 read_last:
1118 }
1119 tlen -= hdrsize + pad + 8; 1225 tlen -= hdrsize + pad + 8;
1120 if (unlikely(tlen != qp->s_len)) { 1226 if (unlikely(tlen != qp->s_rdma_read_len))
1121 /* XXX Need to generate an error CQ entry. */ 1227 goto ack_len_err;
1122 goto ack_done;
1123 }
1124 if (!header_in_data) 1228 if (!header_in_data)
1125 aeth = be32_to_cpu(ohdr->u.aeth); 1229 aeth = be32_to_cpu(ohdr->u.aeth);
1126 else { 1230 else {
1127 aeth = be32_to_cpu(((__be32 *) data)[0]); 1231 aeth = be32_to_cpu(((__be32 *) data)[0]);
1128 data += sizeof(__be32); 1232 data += sizeof(__be32);
1129 } 1233 }
1130 ipath_copy_sge(&qp->s_sge, data, tlen); 1234 ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen);
1131 if (do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST))) { 1235 (void) do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST));
1132 /*
1133 * Change the state so we contimue
1134 * processing new requests and wake up the
1135 * tasklet if there are posted sends.
1136 */
1137 qp->s_state = OP(SEND_LAST);
1138 if (qp->s_tail != qp->s_head)
1139 tasklet_hi_schedule(&qp->s_task);
1140 }
1141 goto ack_done; 1236 goto ack_done;
1142 } 1237 }
1143 1238
1144ack_done: 1239ack_done:
1145 spin_unlock_irqrestore(&qp->s_lock, flags); 1240 spin_unlock_irqrestore(&qp->s_lock, flags);
1241 goto bail;
1242
1243ack_op_err:
1244 wc.status = IB_WC_LOC_QP_OP_ERR;
1245 goto ack_err;
1246
1247ack_len_err:
1248 wc.status = IB_WC_LOC_LEN_ERR;
1249ack_err:
1250 wc.wr_id = wqe->wr.wr_id;
1251 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
1252 wc.vendor_err = 0;
1253 wc.byte_len = 0;
1254 wc.imm_data = 0;
1255 wc.qp = &qp->ibqp;
1256 wc.src_qp = qp->remote_qpn;
1257 wc.wc_flags = 0;
1258 wc.pkey_index = 0;
1259 wc.slid = qp->remote_ah_attr.dlid;
1260 wc.sl = qp->remote_ah_attr.sl;
1261 wc.dlid_path_bits = 0;
1262 wc.port_num = 0;
1263 ipath_sqerror_qp(qp, &wc);
1146bail: 1264bail:
1147 return; 1265 return;
1148} 1266}
@@ -1162,7 +1280,7 @@ bail:
1162 * incoming RC packet for the given QP. 1280 * incoming RC packet for the given QP.
1163 * Called at interrupt level. 1281 * Called at interrupt level.
1164 * Return 1 if no more processing is needed; otherwise return 0 to 1282 * Return 1 if no more processing is needed; otherwise return 0 to
1165 * schedule a response to be sent and the s_lock unlocked. 1283 * schedule a response to be sent.
1166 */ 1284 */
1167static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, 1285static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1168 struct ipath_other_headers *ohdr, 1286 struct ipath_other_headers *ohdr,
@@ -1173,25 +1291,23 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1173 int diff, 1291 int diff,
1174 int header_in_data) 1292 int header_in_data)
1175{ 1293{
1176 struct ib_reth *reth; 1294 struct ipath_ack_entry *e;
1295 u8 i, prev;
1296 int old_req;
1177 1297
1178 if (diff > 0) { 1298 if (diff > 0) {
1179 /* 1299 /*
1180 * Packet sequence error. 1300 * Packet sequence error.
1181 * A NAK will ACK earlier sends and RDMA writes. 1301 * A NAK will ACK earlier sends and RDMA writes.
1182 * Don't queue the NAK if a RDMA read, atomic, or 1302 * Don't queue the NAK if we already sent one.
1183 * NAK is pending though.
1184 */ 1303 */
1185 if (qp->s_ack_state != OP(ACKNOWLEDGE) || 1304 if (!qp->r_nak_state) {
1186 qp->r_nak_state != 0)
1187 goto done;
1188 if (qp->r_ack_state < OP(COMPARE_SWAP)) {
1189 qp->r_ack_state = OP(SEND_ONLY);
1190 qp->r_nak_state = IB_NAK_PSN_ERROR; 1305 qp->r_nak_state = IB_NAK_PSN_ERROR;
1191 /* Use the expected PSN. */ 1306 /* Use the expected PSN. */
1192 qp->r_ack_psn = qp->r_psn; 1307 qp->r_ack_psn = qp->r_psn;
1308 goto send_ack;
1193 } 1309 }
1194 goto send_ack; 1310 goto done;
1195 } 1311 }
1196 1312
1197 /* 1313 /*
@@ -1204,8 +1320,46 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1204 * can coalesce an outstanding duplicate ACK. We have to 1320 * can coalesce an outstanding duplicate ACK. We have to
1205 * send the earliest so that RDMA reads can be restarted at 1321 * send the earliest so that RDMA reads can be restarted at
1206 * the requester's expected PSN. 1322 * the requester's expected PSN.
1323 *
1324 * First, find where this duplicate PSN falls within the
1325 * ACKs previously sent.
1207 */ 1326 */
1208 if (opcode == OP(RDMA_READ_REQUEST)) { 1327 psn &= IPATH_PSN_MASK;
1328 e = NULL;
1329 old_req = 1;
1330 spin_lock_irq(&qp->s_lock);
1331 for (i = qp->r_head_ack_queue; ; i = prev) {
1332 if (i == qp->s_tail_ack_queue)
1333 old_req = 0;
1334 if (i)
1335 prev = i - 1;
1336 else
1337 prev = IPATH_MAX_RDMA_ATOMIC;
1338 if (prev == qp->r_head_ack_queue) {
1339 e = NULL;
1340 break;
1341 }
1342 e = &qp->s_ack_queue[prev];
1343 if (!e->opcode) {
1344 e = NULL;
1345 break;
1346 }
1347 if (ipath_cmp24(psn, e->psn) >= 0)
1348 break;
1349 }
1350 switch (opcode) {
1351 case OP(RDMA_READ_REQUEST): {
1352 struct ib_reth *reth;
1353 u32 offset;
1354 u32 len;
1355
1356 /*
1357 * If we didn't find the RDMA read request in the ack queue,
1358 * or the send tasklet is already backed up to send an
1359 * earlier entry, we can ignore this request.
1360 */
1361 if (!e || e->opcode != OP(RDMA_READ_REQUEST) || old_req)
1362 goto unlock_done;
1209 /* RETH comes after BTH */ 1363 /* RETH comes after BTH */
1210 if (!header_in_data) 1364 if (!header_in_data)
1211 reth = &ohdr->u.rc.reth; 1365 reth = &ohdr->u.rc.reth;
@@ -1214,88 +1368,87 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1214 data += sizeof(*reth); 1368 data += sizeof(*reth);
1215 } 1369 }
1216 /* 1370 /*
1217 * If we receive a duplicate RDMA request, it means the 1371 * Address range must be a subset of the original
1218 * requester saw a sequence error and needs to restart 1372 * request and start on pmtu boundaries.
1219 * from an earlier point. We can abort the current 1373 * We reuse the old ack_queue slot since the requester
1220 * RDMA read send in that case. 1374 * should not back up and request an earlier PSN for the
1375 * same request.
1221 */ 1376 */
1222 spin_lock_irq(&qp->s_lock); 1377 offset = ((psn - e->psn) & IPATH_PSN_MASK) *
1223 if (qp->s_ack_state != OP(ACKNOWLEDGE) && 1378 ib_mtu_enum_to_int(qp->path_mtu);
1224 (qp->s_hdrwords || ipath_cmp24(psn, qp->s_ack_psn) >= 0)) { 1379 len = be32_to_cpu(reth->length);
1225 /* 1380 if (unlikely(offset + len > e->rdma_sge.sge.sge_length))
1226 * We are already sending earlier requested data. 1381 goto unlock_done;
1227 * Don't abort it to send later out of sequence data. 1382 if (len != 0) {
1228 */
1229 spin_unlock_irq(&qp->s_lock);
1230 goto done;
1231 }
1232 qp->s_rdma_len = be32_to_cpu(reth->length);
1233 if (qp->s_rdma_len != 0) {
1234 u32 rkey = be32_to_cpu(reth->rkey); 1383 u32 rkey = be32_to_cpu(reth->rkey);
1235 u64 vaddr = be64_to_cpu(reth->vaddr); 1384 u64 vaddr = be64_to_cpu(reth->vaddr);
1236 int ok; 1385 int ok;
1237 1386
1238 /* 1387 ok = ipath_rkey_ok(qp, &e->rdma_sge,
1239 * Address range must be a subset of the original 1388 len, vaddr, rkey,
1240 * request and start on pmtu boundaries.
1241 */
1242 ok = ipath_rkey_ok(qp, &qp->s_rdma_sge,
1243 qp->s_rdma_len, vaddr, rkey,
1244 IB_ACCESS_REMOTE_READ); 1389 IB_ACCESS_REMOTE_READ);
1245 if (unlikely(!ok)) { 1390 if (unlikely(!ok))
1246 spin_unlock_irq(&qp->s_lock); 1391 goto unlock_done;
1247 goto done;
1248 }
1249 } else { 1392 } else {
1250 qp->s_rdma_sge.sg_list = NULL; 1393 e->rdma_sge.sg_list = NULL;
1251 qp->s_rdma_sge.num_sge = 0; 1394 e->rdma_sge.num_sge = 0;
1252 qp->s_rdma_sge.sge.mr = NULL; 1395 e->rdma_sge.sge.mr = NULL;
1253 qp->s_rdma_sge.sge.vaddr = NULL; 1396 e->rdma_sge.sge.vaddr = NULL;
1254 qp->s_rdma_sge.sge.length = 0; 1397 e->rdma_sge.sge.length = 0;
1255 qp->s_rdma_sge.sge.sge_length = 0; 1398 e->rdma_sge.sge.sge_length = 0;
1256 } 1399 }
1257 qp->s_ack_state = opcode; 1400 e->psn = psn;
1258 qp->s_ack_psn = psn; 1401 qp->s_ack_state = OP(ACKNOWLEDGE);
1259 spin_unlock_irq(&qp->s_lock); 1402 qp->s_tail_ack_queue = prev;
1260 tasklet_hi_schedule(&qp->s_task); 1403 break;
1261 goto send_ack;
1262 } 1404 }
1263 1405
1264 /*
1265 * A pending RDMA read will ACK anything before it so
1266 * ignore earlier duplicate requests.
1267 */
1268 if (qp->s_ack_state != OP(ACKNOWLEDGE))
1269 goto done;
1270
1271 /*
1272 * If an ACK is pending, don't replace the pending ACK
1273 * with an earlier one since the later one will ACK the earlier.
1274 * Also, if we already have a pending atomic, send it.
1275 */
1276 if (qp->r_ack_state != OP(ACKNOWLEDGE) &&
1277 (ipath_cmp24(psn, qp->r_ack_psn) <= 0 ||
1278 qp->r_ack_state >= OP(COMPARE_SWAP)))
1279 goto send_ack;
1280 switch (opcode) {
1281 case OP(COMPARE_SWAP): 1406 case OP(COMPARE_SWAP):
1282 case OP(FETCH_ADD): 1407 case OP(FETCH_ADD): {
1283 /* 1408 /*
1284 * Check for the PSN of the last atomic operation 1409 * If we didn't find the atomic request in the ack queue
1285 * performed and resend the result if found. 1410 * or the send tasklet is already backed up to send an
1411 * earlier entry, we can ignore this request.
1286 */ 1412 */
1287 if ((psn & IPATH_PSN_MASK) != qp->r_atomic_psn) 1413 if (!e || e->opcode != (u8) opcode || old_req)
1288 goto done; 1414 goto unlock_done;
1415 qp->s_ack_state = OP(ACKNOWLEDGE);
1416 qp->s_tail_ack_queue = prev;
1417 break;
1418 }
1419
1420 default:
1421 if (old_req)
1422 goto unlock_done;
1423 /*
1424 * Resend the most recent ACK if this request is
1425 * after all the previous RDMA reads and atomics.
1426 */
1427 if (i == qp->r_head_ack_queue) {
1428 spin_unlock_irq(&qp->s_lock);
1429 qp->r_nak_state = 0;
1430 qp->r_ack_psn = qp->r_psn - 1;
1431 goto send_ack;
1432 }
1433 /*
1434 * Resend the RDMA read or atomic op which
1435 * ACKs this duplicate request.
1436 */
1437 qp->s_ack_state = OP(ACKNOWLEDGE);
1438 qp->s_tail_ack_queue = i;
1289 break; 1439 break;
1290 } 1440 }
1291 qp->r_ack_state = opcode;
1292 qp->r_nak_state = 0; 1441 qp->r_nak_state = 0;
1293 qp->r_ack_psn = psn; 1442 spin_unlock_irq(&qp->s_lock);
1294send_ack: 1443 tasklet_hi_schedule(&qp->s_task);
1295 return 0;
1296 1444
1445unlock_done:
1446 spin_unlock_irq(&qp->s_lock);
1297done: 1447done:
1298 return 1; 1448 return 1;
1449
1450send_ack:
1451 return 0;
1299} 1452}
1300 1453
1301static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) 1454static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
@@ -1391,15 +1544,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1391 opcode == OP(SEND_LAST_WITH_IMMEDIATE)) 1544 opcode == OP(SEND_LAST_WITH_IMMEDIATE))
1392 break; 1545 break;
1393 nack_inv: 1546 nack_inv:
1394 /*
1395 * A NAK will ACK earlier sends and RDMA writes.
1396 * Don't queue the NAK if a RDMA read, atomic, or NAK
1397 * is pending though.
1398 */
1399 if (qp->r_ack_state >= OP(COMPARE_SWAP))
1400 goto send_ack;
1401 ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR); 1547 ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR);
1402 qp->r_ack_state = OP(SEND_ONLY);
1403 qp->r_nak_state = IB_NAK_INVALID_REQUEST; 1548 qp->r_nak_state = IB_NAK_INVALID_REQUEST;
1404 qp->r_ack_psn = qp->r_psn; 1549 qp->r_ack_psn = qp->r_psn;
1405 goto send_ack; 1550 goto send_ack;
@@ -1441,9 +1586,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1441 * Don't queue the NAK if a RDMA read or atomic 1586 * Don't queue the NAK if a RDMA read or atomic
1442 * is pending though. 1587 * is pending though.
1443 */ 1588 */
1444 if (qp->r_ack_state >= OP(COMPARE_SWAP)) 1589 if (qp->r_nak_state)
1445 goto send_ack; 1590 goto done;
1446 qp->r_ack_state = OP(SEND_ONLY);
1447 qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; 1591 qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
1448 qp->r_ack_psn = qp->r_psn; 1592 qp->r_ack_psn = qp->r_psn;
1449 goto send_ack; 1593 goto send_ack;
@@ -1567,7 +1711,19 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1567 goto rnr_nak; 1711 goto rnr_nak;
1568 goto send_last_imm; 1712 goto send_last_imm;
1569 1713
1570 case OP(RDMA_READ_REQUEST): 1714 case OP(RDMA_READ_REQUEST): {
1715 struct ipath_ack_entry *e;
1716 u32 len;
1717 u8 next;
1718
1719 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
1720 goto nack_acc;
1721 next = qp->r_head_ack_queue + 1;
1722 if (next > IPATH_MAX_RDMA_ATOMIC)
1723 next = 0;
1724 if (unlikely(next == qp->s_tail_ack_queue))
1725 goto nack_inv;
1726 e = &qp->s_ack_queue[qp->r_head_ack_queue];
1571 /* RETH comes after BTH */ 1727 /* RETH comes after BTH */
1572 if (!header_in_data) 1728 if (!header_in_data)
1573 reth = &ohdr->u.rc.reth; 1729 reth = &ohdr->u.rc.reth;
@@ -1575,72 +1731,75 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1575 reth = (struct ib_reth *)data; 1731 reth = (struct ib_reth *)data;
1576 data += sizeof(*reth); 1732 data += sizeof(*reth);
1577 } 1733 }
1578 if (unlikely(!(qp->qp_access_flags & 1734 len = be32_to_cpu(reth->length);
1579 IB_ACCESS_REMOTE_READ))) 1735 if (len) {
1580 goto nack_acc;
1581 spin_lock_irq(&qp->s_lock);
1582 qp->s_rdma_len = be32_to_cpu(reth->length);
1583 if (qp->s_rdma_len != 0) {
1584 u32 rkey = be32_to_cpu(reth->rkey); 1736 u32 rkey = be32_to_cpu(reth->rkey);
1585 u64 vaddr = be64_to_cpu(reth->vaddr); 1737 u64 vaddr = be64_to_cpu(reth->vaddr);
1586 int ok; 1738 int ok;
1587 1739
1588 /* Check rkey & NAK */ 1740 /* Check rkey & NAK */
1589 ok = ipath_rkey_ok(qp, &qp->s_rdma_sge, 1741 ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr,
1590 qp->s_rdma_len, vaddr, rkey, 1742 rkey, IB_ACCESS_REMOTE_READ);
1591 IB_ACCESS_REMOTE_READ); 1743 if (unlikely(!ok))
1592 if (unlikely(!ok)) {
1593 spin_unlock_irq(&qp->s_lock);
1594 goto nack_acc; 1744 goto nack_acc;
1595 }
1596 /* 1745 /*
1597 * Update the next expected PSN. We add 1 later 1746 * Update the next expected PSN. We add 1 later
1598 * below, so only add the remainder here. 1747 * below, so only add the remainder here.
1599 */ 1748 */
1600 if (qp->s_rdma_len > pmtu) 1749 if (len > pmtu)
1601 qp->r_psn += (qp->s_rdma_len - 1) / pmtu; 1750 qp->r_psn += (len - 1) / pmtu;
1602 } else { 1751 } else {
1603 qp->s_rdma_sge.sg_list = NULL; 1752 e->rdma_sge.sg_list = NULL;
1604 qp->s_rdma_sge.num_sge = 0; 1753 e->rdma_sge.num_sge = 0;
1605 qp->s_rdma_sge.sge.mr = NULL; 1754 e->rdma_sge.sge.mr = NULL;
1606 qp->s_rdma_sge.sge.vaddr = NULL; 1755 e->rdma_sge.sge.vaddr = NULL;
1607 qp->s_rdma_sge.sge.length = 0; 1756 e->rdma_sge.sge.length = 0;
1608 qp->s_rdma_sge.sge.sge_length = 0; 1757 e->rdma_sge.sge.sge_length = 0;
1609 } 1758 }
1759 e->opcode = opcode;
1760 e->psn = psn;
1610 /* 1761 /*
1611 * We need to increment the MSN here instead of when we 1762 * We need to increment the MSN here instead of when we
1612 * finish sending the result since a duplicate request would 1763 * finish sending the result since a duplicate request would
1613 * increment it more than once. 1764 * increment it more than once.
1614 */ 1765 */
1615 qp->r_msn++; 1766 qp->r_msn++;
1616
1617 qp->s_ack_state = opcode;
1618 qp->s_ack_psn = psn;
1619 spin_unlock_irq(&qp->s_lock);
1620
1621 qp->r_psn++; 1767 qp->r_psn++;
1622 qp->r_state = opcode; 1768 qp->r_state = opcode;
1623 qp->r_nak_state = 0; 1769 qp->r_nak_state = 0;
1770 barrier();
1771 qp->r_head_ack_queue = next;
1624 1772
1625 /* Call ipath_do_rc_send() in another thread. */ 1773 /* Call ipath_do_rc_send() in another thread. */
1626 tasklet_hi_schedule(&qp->s_task); 1774 tasklet_hi_schedule(&qp->s_task);
1627 1775
1628 goto done; 1776 goto done;
1777 }
1629 1778
1630 case OP(COMPARE_SWAP): 1779 case OP(COMPARE_SWAP):
1631 case OP(FETCH_ADD): { 1780 case OP(FETCH_ADD): {
1632 struct ib_atomic_eth *ateth; 1781 struct ib_atomic_eth *ateth;
1782 struct ipath_ack_entry *e;
1633 u64 vaddr; 1783 u64 vaddr;
1784 atomic64_t *maddr;
1634 u64 sdata; 1785 u64 sdata;
1635 u32 rkey; 1786 u32 rkey;
1787 u8 next;
1636 1788
1789 if (unlikely(!(qp->qp_access_flags &
1790 IB_ACCESS_REMOTE_ATOMIC)))
1791 goto nack_acc;
1792 next = qp->r_head_ack_queue + 1;
1793 if (next > IPATH_MAX_RDMA_ATOMIC)
1794 next = 0;
1795 if (unlikely(next == qp->s_tail_ack_queue))
1796 goto nack_inv;
1637 if (!header_in_data) 1797 if (!header_in_data)
1638 ateth = &ohdr->u.atomic_eth; 1798 ateth = &ohdr->u.atomic_eth;
1639 else { 1799 else
1640 ateth = (struct ib_atomic_eth *)data; 1800 ateth = (struct ib_atomic_eth *)data;
1641 data += sizeof(*ateth); 1801 vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
1642 } 1802 be32_to_cpu(ateth->vaddr[1]);
1643 vaddr = be64_to_cpu(ateth->vaddr);
1644 if (unlikely(vaddr & (sizeof(u64) - 1))) 1803 if (unlikely(vaddr & (sizeof(u64) - 1)))
1645 goto nack_inv; 1804 goto nack_inv;
1646 rkey = be32_to_cpu(ateth->rkey); 1805 rkey = be32_to_cpu(ateth->rkey);
@@ -1649,63 +1808,50 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1649 sizeof(u64), vaddr, rkey, 1808 sizeof(u64), vaddr, rkey,
1650 IB_ACCESS_REMOTE_ATOMIC))) 1809 IB_ACCESS_REMOTE_ATOMIC)))
1651 goto nack_acc; 1810 goto nack_acc;
1652 if (unlikely(!(qp->qp_access_flags &
1653 IB_ACCESS_REMOTE_ATOMIC)))
1654 goto nack_acc;
1655 /* Perform atomic OP and save result. */ 1811 /* Perform atomic OP and save result. */
1812 maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
1656 sdata = be64_to_cpu(ateth->swap_data); 1813 sdata = be64_to_cpu(ateth->swap_data);
1657 spin_lock_irq(&dev->pending_lock); 1814 e = &qp->s_ack_queue[qp->r_head_ack_queue];
1658 qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr; 1815 e->atomic_data = (opcode == OP(FETCH_ADD)) ?
1659 if (opcode == OP(FETCH_ADD)) 1816 (u64) atomic64_add_return(sdata, maddr) - sdata :
1660 *(u64 *) qp->r_sge.sge.vaddr = 1817 (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
1661 qp->r_atomic_data + sdata; 1818 be64_to_cpu(ateth->compare_data),
1662 else if (qp->r_atomic_data == 1819 sdata);
1663 be64_to_cpu(ateth->compare_data)) 1820 e->opcode = opcode;
1664 *(u64 *) qp->r_sge.sge.vaddr = sdata; 1821 e->psn = psn & IPATH_PSN_MASK;
1665 spin_unlock_irq(&dev->pending_lock);
1666 qp->r_msn++; 1822 qp->r_msn++;
1667 qp->r_atomic_psn = psn & IPATH_PSN_MASK; 1823 qp->r_psn++;
1668 psn |= 1 << 31; 1824 qp->r_state = opcode;
1669 break; 1825 qp->r_nak_state = 0;
1826 barrier();
1827 qp->r_head_ack_queue = next;
1828
1829 /* Call ipath_do_rc_send() in another thread. */
1830 tasklet_hi_schedule(&qp->s_task);
1831
1832 goto done;
1670 } 1833 }
1671 1834
1672 default: 1835 default:
1673 /* Drop packet for unknown opcodes. */ 1836 /* NAK unknown opcodes. */
1674 goto done; 1837 goto nack_inv;
1675 } 1838 }
1676 qp->r_psn++; 1839 qp->r_psn++;
1677 qp->r_state = opcode; 1840 qp->r_state = opcode;
1841 qp->r_ack_psn = psn;
1678 qp->r_nak_state = 0; 1842 qp->r_nak_state = 0;
1679 /* Send an ACK if requested or required. */ 1843 /* Send an ACK if requested or required. */
1680 if (psn & (1 << 31)) { 1844 if (psn & (1 << 31))
1681 /*
1682 * Coalesce ACKs unless there is a RDMA READ or
1683 * ATOMIC pending.
1684 */
1685 if (qp->r_ack_state < OP(COMPARE_SWAP)) {
1686 qp->r_ack_state = opcode;
1687 qp->r_ack_psn = psn;
1688 }
1689 goto send_ack; 1845 goto send_ack;
1690 }
1691 goto done; 1846 goto done;
1692 1847
1693nack_acc: 1848nack_acc:
1694 /* 1849 ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR);
1695 * A NAK will ACK earlier sends and RDMA writes. 1850 qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
1696 * Don't queue the NAK if a RDMA read, atomic, or NAK 1851 qp->r_ack_psn = qp->r_psn;
1697 * is pending though. 1852
1698 */
1699 if (qp->r_ack_state < OP(COMPARE_SWAP)) {
1700 ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR);
1701 qp->r_ack_state = OP(RDMA_WRITE_ONLY);
1702 qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
1703 qp->r_ack_psn = qp->r_psn;
1704 }
1705send_ack: 1853send_ack:
1706 /* Send ACK right away unless the send tasklet has a pending ACK. */ 1854 send_rc_ack(qp);
1707 if (qp->s_ack_state == OP(ACKNOWLEDGE))
1708 send_rc_ack(qp);
1709 1855
1710done: 1856done:
1711 return; 1857 return;
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index dffc76016d3c..c182bcd62098 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -126,9 +126,18 @@
126#define INFINIPATH_E_RESET 0x0004000000000000ULL 126#define INFINIPATH_E_RESET 0x0004000000000000ULL
127#define INFINIPATH_E_HARDWARE 0x0008000000000000ULL 127#define INFINIPATH_E_HARDWARE 0x0008000000000000ULL
128 128
129/*
130 * this is used to print "common" packet errors only when the
131 * __IPATH_ERRPKTDBG bit is set in ipath_debug.
132 */
133#define INFINIPATH_E_PKTERRS ( INFINIPATH_E_SPKTLEN \
134 | INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_RVCRC \
135 | INFINIPATH_E_RICRC | INFINIPATH_E_RSHORTPKTLEN \
136 | INFINIPATH_E_REBP )
137
129/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */ 138/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
130/* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo 139/* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo
131 * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: eagerTID, 3: expTID 140 * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: expTID, 3: eagerTID
132 * bit 4: flag buffer, 5: datainfo, 6: header info */ 141 * bit 4: flag buffer, 5: datainfo, 6: header info */
133#define INFINIPATH_HWE_TXEMEMPARITYERR_MASK 0xFULL 142#define INFINIPATH_HWE_TXEMEMPARITYERR_MASK 0xFULL
134#define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40 143#define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40
@@ -143,8 +152,8 @@
143/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */ 152/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */
144#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF 0x01ULL 153#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF 0x01ULL
145#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ 0x02ULL 154#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ 0x02ULL
146#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x04ULL 155#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID 0x04ULL
147#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID 0x08ULL 156#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x08ULL
148#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF 0x10ULL 157#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF 0x10ULL
149#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL 158#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL
150#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO 0x40ULL 159#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO 0x40ULL
@@ -299,13 +308,6 @@
299#define INFINIPATH_XGXS_RX_POL_SHIFT 19 308#define INFINIPATH_XGXS_RX_POL_SHIFT 19
300#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL 309#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL
301 310
302#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */
303
304/* TID entries (memory), HT-only */
305#define INFINIPATH_RT_VALID 0x8000000000000000ULL
306#define INFINIPATH_RT_ADDR_SHIFT 0
307#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFF
308#define INFINIPATH_RT_BUFSIZE_SHIFT 48
309 311
310/* 312/*
311 * IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our 313 * IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index e86cb171872e..d9c2a9b15d86 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -202,6 +202,7 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
202 wq->tail = tail; 202 wq->tail = tail;
203 203
204 ret = 1; 204 ret = 1;
205 qp->r_wrid_valid = 1;
205 if (handler) { 206 if (handler) {
206 u32 n; 207 u32 n;
207 208
@@ -229,7 +230,6 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
229 } 230 }
230 } 231 }
231 spin_unlock_irqrestore(&rq->lock, flags); 232 spin_unlock_irqrestore(&rq->lock, flags);
232 qp->r_wrid_valid = 1;
233 233
234bail: 234bail:
235 return ret; 235 return ret;
@@ -255,6 +255,7 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
255 unsigned long flags; 255 unsigned long flags;
256 struct ib_wc wc; 256 struct ib_wc wc;
257 u64 sdata; 257 u64 sdata;
258 atomic64_t *maddr;
258 259
259 qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn); 260 qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
260 if (!qp) { 261 if (!qp) {
@@ -265,7 +266,8 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
265again: 266again:
266 spin_lock_irqsave(&sqp->s_lock, flags); 267 spin_lock_irqsave(&sqp->s_lock, flags);
267 268
268 if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK)) { 269 if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) ||
270 qp->s_rnr_timeout) {
269 spin_unlock_irqrestore(&sqp->s_lock, flags); 271 spin_unlock_irqrestore(&sqp->s_lock, flags);
270 goto done; 272 goto done;
271 } 273 }
@@ -310,7 +312,7 @@ again:
310 sqp->s_rnr_retry--; 312 sqp->s_rnr_retry--;
311 dev->n_rnr_naks++; 313 dev->n_rnr_naks++;
312 sqp->s_rnr_timeout = 314 sqp->s_rnr_timeout =
313 ib_ipath_rnr_table[sqp->r_min_rnr_timer]; 315 ib_ipath_rnr_table[qp->r_min_rnr_timer];
314 ipath_insert_rnr_queue(sqp); 316 ipath_insert_rnr_queue(sqp);
315 goto done; 317 goto done;
316 } 318 }
@@ -343,20 +345,22 @@ again:
343 wc.sl = sqp->remote_ah_attr.sl; 345 wc.sl = sqp->remote_ah_attr.sl;
344 wc.dlid_path_bits = 0; 346 wc.dlid_path_bits = 0;
345 wc.port_num = 0; 347 wc.port_num = 0;
348 spin_lock_irqsave(&sqp->s_lock, flags);
346 ipath_sqerror_qp(sqp, &wc); 349 ipath_sqerror_qp(sqp, &wc);
350 spin_unlock_irqrestore(&sqp->s_lock, flags);
347 goto done; 351 goto done;
348 } 352 }
349 break; 353 break;
350 354
351 case IB_WR_RDMA_READ: 355 case IB_WR_RDMA_READ:
356 if (unlikely(!(qp->qp_access_flags &
357 IB_ACCESS_REMOTE_READ)))
358 goto acc_err;
352 if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length, 359 if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
353 wqe->wr.wr.rdma.remote_addr, 360 wqe->wr.wr.rdma.remote_addr,
354 wqe->wr.wr.rdma.rkey, 361 wqe->wr.wr.rdma.rkey,
355 IB_ACCESS_REMOTE_READ))) 362 IB_ACCESS_REMOTE_READ)))
356 goto acc_err; 363 goto acc_err;
357 if (unlikely(!(qp->qp_access_flags &
358 IB_ACCESS_REMOTE_READ)))
359 goto acc_err;
360 qp->r_sge.sge = wqe->sg_list[0]; 364 qp->r_sge.sge = wqe->sg_list[0];
361 qp->r_sge.sg_list = wqe->sg_list + 1; 365 qp->r_sge.sg_list = wqe->sg_list + 1;
362 qp->r_sge.num_sge = wqe->wr.num_sge; 366 qp->r_sge.num_sge = wqe->wr.num_sge;
@@ -364,22 +368,22 @@ again:
364 368
365 case IB_WR_ATOMIC_CMP_AND_SWP: 369 case IB_WR_ATOMIC_CMP_AND_SWP:
366 case IB_WR_ATOMIC_FETCH_AND_ADD: 370 case IB_WR_ATOMIC_FETCH_AND_ADD:
371 if (unlikely(!(qp->qp_access_flags &
372 IB_ACCESS_REMOTE_ATOMIC)))
373 goto acc_err;
367 if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), 374 if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
368 wqe->wr.wr.rdma.remote_addr, 375 wqe->wr.wr.atomic.remote_addr,
369 wqe->wr.wr.rdma.rkey, 376 wqe->wr.wr.atomic.rkey,
370 IB_ACCESS_REMOTE_ATOMIC))) 377 IB_ACCESS_REMOTE_ATOMIC)))
371 goto acc_err; 378 goto acc_err;
372 /* Perform atomic OP and save result. */ 379 /* Perform atomic OP and save result. */
373 sdata = wqe->wr.wr.atomic.swap; 380 maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
374 spin_lock_irqsave(&dev->pending_lock, flags); 381 sdata = wqe->wr.wr.atomic.compare_add;
375 qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr; 382 *(u64 *) sqp->s_sge.sge.vaddr =
376 if (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) 383 (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
377 *(u64 *) qp->r_sge.sge.vaddr = 384 (u64) atomic64_add_return(sdata, maddr) - sdata :
378 qp->r_atomic_data + sdata; 385 (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
379 else if (qp->r_atomic_data == wqe->wr.wr.atomic.compare_add) 386 sdata, wqe->wr.wr.atomic.swap);
380 *(u64 *) qp->r_sge.sge.vaddr = sdata;
381 spin_unlock_irqrestore(&dev->pending_lock, flags);
382 *(u64 *) sqp->s_sge.sge.vaddr = qp->r_atomic_data;
383 goto send_comp; 387 goto send_comp;
384 388
385 default: 389 default:
@@ -440,7 +444,7 @@ again:
440send_comp: 444send_comp:
441 sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; 445 sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
442 446
443 if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &sqp->s_flags) || 447 if (!(sqp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
444 (wqe->wr.send_flags & IB_SEND_SIGNALED)) { 448 (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
445 wc.wr_id = wqe->wr.wr_id; 449 wc.wr_id = wqe->wr.wr_id;
446 wc.status = IB_WC_SUCCESS; 450 wc.status = IB_WC_SUCCESS;
@@ -502,7 +506,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
502 * We clear the tasklet flag now since we are committing to return 506 * We clear the tasklet flag now since we are committing to return
503 * from the tasklet function. 507 * from the tasklet function.
504 */ 508 */
505 clear_bit(IPATH_S_BUSY, &qp->s_flags); 509 clear_bit(IPATH_S_BUSY, &qp->s_busy);
506 tasklet_unlock(&qp->s_task); 510 tasklet_unlock(&qp->s_task);
507 want_buffer(dev->dd); 511 want_buffer(dev->dd);
508 dev->n_piowait++; 512 dev->n_piowait++;
@@ -541,6 +545,9 @@ int ipath_post_ruc_send(struct ipath_qp *qp, struct ib_send_wr *wr)
541 wr->sg_list[0].addr & (sizeof(u64) - 1))) { 545 wr->sg_list[0].addr & (sizeof(u64) - 1))) {
542 ret = -EINVAL; 546 ret = -EINVAL;
543 goto bail; 547 goto bail;
548 } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) {
549 ret = -EINVAL;
550 goto bail;
544 } 551 }
545 /* IB spec says that num_sge == 0 is OK. */ 552 /* IB spec says that num_sge == 0 is OK. */
546 if (wr->num_sge > qp->s_max_sge) { 553 if (wr->num_sge > qp->s_max_sge) {
@@ -647,7 +654,7 @@ void ipath_do_ruc_send(unsigned long data)
647 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); 654 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
648 struct ipath_other_headers *ohdr; 655 struct ipath_other_headers *ohdr;
649 656
650 if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags)) 657 if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy))
651 goto bail; 658 goto bail;
652 659
653 if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) { 660 if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) {
@@ -683,19 +690,15 @@ again:
683 */ 690 */
684 spin_lock_irqsave(&qp->s_lock, flags); 691 spin_lock_irqsave(&qp->s_lock, flags);
685 692
686 /* Sending responses has higher priority over sending requests. */ 693 if (!((qp->ibqp.qp_type == IB_QPT_RC) ?
687 if (qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE && 694 ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) :
688 (bth0 = ipath_make_rc_ack(qp, ohdr, pmtu)) != 0) 695 ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) {
689 bth2 = qp->s_ack_psn++ & IPATH_PSN_MASK;
690 else if (!((qp->ibqp.qp_type == IB_QPT_RC) ?
691 ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) :
692 ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) {
693 /* 696 /*
694 * Clear the busy bit before unlocking to avoid races with 697 * Clear the busy bit before unlocking to avoid races with
695 * adding new work queue items and then failing to process 698 * adding new work queue items and then failing to process
696 * them. 699 * them.
697 */ 700 */
698 clear_bit(IPATH_S_BUSY, &qp->s_flags); 701 clear_bit(IPATH_S_BUSY, &qp->s_busy);
699 spin_unlock_irqrestore(&qp->s_lock, flags); 702 spin_unlock_irqrestore(&qp->s_lock, flags);
700 goto bail; 703 goto bail;
701 } 704 }
@@ -728,7 +731,7 @@ again:
728 goto again; 731 goto again;
729 732
730clear: 733clear:
731 clear_bit(IPATH_S_BUSY, &qp->s_flags); 734 clear_bit(IPATH_S_BUSY, &qp->s_busy);
732bail: 735bail:
733 return; 736 return;
734} 737}
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
index 30a825928fcf..9307f7187ca5 100644
--- a/drivers/infiniband/hw/ipath/ipath_stats.c
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -207,7 +207,7 @@ void ipath_get_faststats(unsigned long opaque)
207 * don't access the chip while running diags, or memory diags can 207 * don't access the chip while running diags, or memory diags can
208 * fail 208 * fail
209 */ 209 */
210 if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT) || 210 if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_INITTED) ||
211 ipath_diag_inuse) 211 ipath_diag_inuse)
212 /* but re-arm the timer, for diags case; won't hurt other */ 212 /* but re-arm the timer, for diags case; won't hurt other */
213 goto done; 213 goto done;
@@ -237,11 +237,13 @@ void ipath_get_faststats(unsigned long opaque)
237 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) 237 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs)
238 && time_after(jiffies, dd->ipath_unmasktime)) { 238 && time_after(jiffies, dd->ipath_unmasktime)) {
239 char ebuf[256]; 239 char ebuf[256];
240 ipath_decode_err(ebuf, sizeof ebuf, 240 int iserr;
241 iserr = ipath_decode_err(ebuf, sizeof ebuf,
241 (dd->ipath_maskederrs & ~dd-> 242 (dd->ipath_maskederrs & ~dd->
242 ipath_ignorederrs)); 243 ipath_ignorederrs));
243 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) & 244 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
244 ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL)) 245 ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
246 INFINIPATH_E_PKTERRS ))
245 ipath_dev_err(dd, "Re-enabling masked errors " 247 ipath_dev_err(dd, "Re-enabling masked errors "
246 "(%s)\n", ebuf); 248 "(%s)\n", ebuf);
247 else { 249 else {
@@ -252,8 +254,12 @@ void ipath_get_faststats(unsigned long opaque)
252 * them. So only complain about these at debug 254 * them. So only complain about these at debug
253 * level. 255 * level.
254 */ 256 */
255 ipath_dbg("Disabling frequent queue full errors " 257 if (iserr)
256 "(%s)\n", ebuf); 258 ipath_dbg("Re-enabling queue full errors (%s)\n",
259 ebuf);
260 else
261 ipath_cdbg(ERRPKT, "Re-enabling packet"
262 " problem interrupt (%s)\n", ebuf);
257 } 263 }
258 dd->ipath_maskederrs = dd->ipath_ignorederrs; 264 dd->ipath_maskederrs = dd->ipath_ignorederrs;
259 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 265 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index 325d6634ff53..1c2b03c2ef5e 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -42,7 +42,7 @@ static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe,
42{ 42{
43 if (++qp->s_last == qp->s_size) 43 if (++qp->s_last == qp->s_size)
44 qp->s_last = 0; 44 qp->s_last = 0;
45 if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) || 45 if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
46 (wqe->wr.send_flags & IB_SEND_SIGNALED)) { 46 (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
47 wc->wr_id = wqe->wr.wr_id; 47 wc->wr_id = wqe->wr.wr_id;
48 wc->status = IB_WC_SUCCESS; 48 wc->status = IB_WC_SUCCESS;
@@ -344,13 +344,13 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
344 send_first: 344 send_first:
345 if (qp->r_reuse_sge) { 345 if (qp->r_reuse_sge) {
346 qp->r_reuse_sge = 0; 346 qp->r_reuse_sge = 0;
347 qp->r_sge = qp->s_rdma_sge; 347 qp->r_sge = qp->s_rdma_read_sge;
348 } else if (!ipath_get_rwqe(qp, 0)) { 348 } else if (!ipath_get_rwqe(qp, 0)) {
349 dev->n_pkt_drops++; 349 dev->n_pkt_drops++;
350 goto done; 350 goto done;
351 } 351 }
352 /* Save the WQE so we can reuse it in case of an error. */ 352 /* Save the WQE so we can reuse it in case of an error. */
353 qp->s_rdma_sge = qp->r_sge; 353 qp->s_rdma_read_sge = qp->r_sge;
354 qp->r_rcv_len = 0; 354 qp->r_rcv_len = 0;
355 if (opcode == OP(SEND_ONLY)) 355 if (opcode == OP(SEND_ONLY))
356 goto send_last; 356 goto send_last;
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 9a3e54664ee4..a518f7c8fa83 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -308,6 +308,11 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
308 goto bail; 308 goto bail;
309 } 309 }
310 310
311 if (wr->wr.ud.ah->pd != qp->ibqp.pd) {
312 ret = -EPERM;
313 goto bail;
314 }
315
311 /* IB spec says that num_sge == 0 is OK. */ 316 /* IB spec says that num_sge == 0 is OK. */
312 if (wr->num_sge > qp->s_max_sge) { 317 if (wr->num_sge > qp->s_max_sge) {
313 ret = -EINVAL; 318 ret = -EINVAL;
@@ -467,7 +472,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
467 472
468done: 473done:
469 /* Queue the completion status entry. */ 474 /* Queue the completion status entry. */
470 if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) || 475 if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
471 (wr->send_flags & IB_SEND_SIGNALED)) { 476 (wr->send_flags & IB_SEND_SIGNALED)) {
472 wc.wr_id = wr->wr_id; 477 wc.wr_id = wr->wr_id;
473 wc.status = IB_WC_SUCCESS; 478 wc.status = IB_WC_SUCCESS;
@@ -647,6 +652,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
647 ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh)); 652 ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
648 ipath_copy_sge(&qp->r_sge, data, 653 ipath_copy_sge(&qp->r_sge, data,
649 wc.byte_len - sizeof(struct ib_grh)); 654 wc.byte_len - sizeof(struct ib_grh));
655 qp->r_wrid_valid = 0;
650 wc.wr_id = qp->r_wr_id; 656 wc.wr_id = qp->r_wr_id;
651 wc.status = IB_WC_SUCCESS; 657 wc.status = IB_WC_SUCCESS;
652 wc.opcode = IB_WC_RECV; 658 wc.opcode = IB_WC_RECV;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 2aaacdb7e52a..18c6df2052c2 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -438,6 +438,10 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
438 struct ipath_mcast *mcast; 438 struct ipath_mcast *mcast;
439 struct ipath_mcast_qp *p; 439 struct ipath_mcast_qp *p;
440 440
441 if (lnh != IPATH_LRH_GRH) {
442 dev->n_pkt_drops++;
443 goto bail;
444 }
441 mcast = ipath_mcast_find(&hdr->u.l.grh.dgid); 445 mcast = ipath_mcast_find(&hdr->u.l.grh.dgid);
442 if (mcast == NULL) { 446 if (mcast == NULL) {
443 dev->n_pkt_drops++; 447 dev->n_pkt_drops++;
@@ -445,8 +449,7 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
445 } 449 }
446 dev->n_multicast_rcv++; 450 dev->n_multicast_rcv++;
447 list_for_each_entry_rcu(p, &mcast->qp_list, list) 451 list_for_each_entry_rcu(p, &mcast->qp_list, list)
448 ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data, 452 ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp);
449 tlen, p->qp);
450 /* 453 /*
451 * Notify ipath_multicast_detach() if it is waiting for us 454 * Notify ipath_multicast_detach() if it is waiting for us
452 * to finish. 455 * to finish.
@@ -773,7 +776,6 @@ int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
773 /* +1 is for the qword padding of pbc */ 776 /* +1 is for the qword padding of pbc */
774 plen = hdrwords + ((len + 3) >> 2) + 1; 777 plen = hdrwords + ((len + 3) >> 2) + 1;
775 if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) { 778 if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) {
776 ipath_dbg("packet len 0x%x too long, failing\n", plen);
777 ret = -EINVAL; 779 ret = -EINVAL;
778 goto bail; 780 goto bail;
779 } 781 }
@@ -980,14 +982,14 @@ static int ipath_query_device(struct ib_device *ibdev,
980 props->max_cqe = ib_ipath_max_cqes; 982 props->max_cqe = ib_ipath_max_cqes;
981 props->max_mr = dev->lk_table.max; 983 props->max_mr = dev->lk_table.max;
982 props->max_pd = ib_ipath_max_pds; 984 props->max_pd = ib_ipath_max_pds;
983 props->max_qp_rd_atom = 1; 985 props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC;
984 props->max_qp_init_rd_atom = 1; 986 props->max_qp_init_rd_atom = 255;
985 /* props->max_res_rd_atom */ 987 /* props->max_res_rd_atom */
986 props->max_srq = ib_ipath_max_srqs; 988 props->max_srq = ib_ipath_max_srqs;
987 props->max_srq_wr = ib_ipath_max_srq_wrs; 989 props->max_srq_wr = ib_ipath_max_srq_wrs;
988 props->max_srq_sge = ib_ipath_max_srq_sges; 990 props->max_srq_sge = ib_ipath_max_srq_sges;
989 /* props->local_ca_ack_delay */ 991 /* props->local_ca_ack_delay */
990 props->atomic_cap = IB_ATOMIC_HCA; 992 props->atomic_cap = IB_ATOMIC_GLOB;
991 props->max_pkeys = ipath_get_npkeys(dev->dd); 993 props->max_pkeys = ipath_get_npkeys(dev->dd);
992 props->max_mcast_grp = ib_ipath_max_mcast_grps; 994 props->max_mcast_grp = ib_ipath_max_mcast_grps;
993 props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached; 995 props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
@@ -1557,7 +1559,6 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
1557 dev->node_type = RDMA_NODE_IB_CA; 1559 dev->node_type = RDMA_NODE_IB_CA;
1558 dev->phys_port_cnt = 1; 1560 dev->phys_port_cnt = 1;
1559 dev->dma_device = &dd->pcidev->dev; 1561 dev->dma_device = &dd->pcidev->dev;
1560 dev->class_dev.dev = dev->dma_device;
1561 dev->query_device = ipath_query_device; 1562 dev->query_device = ipath_query_device;
1562 dev->modify_device = ipath_modify_device; 1563 dev->modify_device = ipath_modify_device;
1563 dev->query_port = ipath_query_port; 1564 dev->query_port = ipath_query_port;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index c0c8d5b24a7d..7c4929f1cb5b 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -40,9 +40,12 @@
40#include <linux/interrupt.h> 40#include <linux/interrupt.h>
41#include <linux/kref.h> 41#include <linux/kref.h>
42#include <rdma/ib_pack.h> 42#include <rdma/ib_pack.h>
43#include <rdma/ib_user_verbs.h>
43 44
44#include "ipath_layer.h" 45#include "ipath_layer.h"
45 46
47#define IPATH_MAX_RDMA_ATOMIC 4
48
46#define QPN_MAX (1 << 24) 49#define QPN_MAX (1 << 24)
47#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) 50#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
48 51
@@ -89,7 +92,7 @@ struct ib_reth {
89} __attribute__ ((packed)); 92} __attribute__ ((packed));
90 93
91struct ib_atomic_eth { 94struct ib_atomic_eth {
92 __be64 vaddr; 95 __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */
93 __be32 rkey; 96 __be32 rkey;
94 __be64 swap_data; 97 __be64 swap_data;
95 __be64 compare_data; 98 __be64 compare_data;
@@ -108,7 +111,7 @@ struct ipath_other_headers {
108 } rc; 111 } rc;
109 struct { 112 struct {
110 __be32 aeth; 113 __be32 aeth;
111 __be64 atomic_ack_eth; 114 __be32 atomic_ack_eth[2];
112 } at; 115 } at;
113 __be32 imm_data; 116 __be32 imm_data;
114 __be32 aeth; 117 __be32 aeth;
@@ -186,7 +189,7 @@ struct ipath_mmap_info {
186struct ipath_cq_wc { 189struct ipath_cq_wc {
187 u32 head; /* index of next entry to fill */ 190 u32 head; /* index of next entry to fill */
188 u32 tail; /* index of next ib_poll_cq() entry */ 191 u32 tail; /* index of next ib_poll_cq() entry */
189 struct ib_wc queue[1]; /* this is actually size ibcq.cqe + 1 */ 192 struct ib_uverbs_wc queue[1]; /* this is actually size ibcq.cqe + 1 */
190}; 193};
191 194
192/* 195/*
@@ -312,6 +315,19 @@ struct ipath_sge_state {
312}; 315};
313 316
314/* 317/*
318 * This structure holds the information that the send tasklet needs
319 * to send a RDMA read response or atomic operation.
320 */
321struct ipath_ack_entry {
322 u8 opcode;
323 u32 psn;
324 union {
325 struct ipath_sge_state rdma_sge;
326 u64 atomic_data;
327 };
328};
329
330/*
315 * Variables prefixed with s_ are for the requester (sender). 331 * Variables prefixed with s_ are for the requester (sender).
316 * Variables prefixed with r_ are for the responder (receiver). 332 * Variables prefixed with r_ are for the responder (receiver).
317 * Variables prefixed with ack_ are for responder replies. 333 * Variables prefixed with ack_ are for responder replies.
@@ -333,24 +349,24 @@ struct ipath_qp {
333 struct ipath_mmap_info *ip; 349 struct ipath_mmap_info *ip;
334 struct ipath_sge_state *s_cur_sge; 350 struct ipath_sge_state *s_cur_sge;
335 struct ipath_sge_state s_sge; /* current send request data */ 351 struct ipath_sge_state s_sge; /* current send request data */
336 /* current RDMA read send data */ 352 struct ipath_ack_entry s_ack_queue[IPATH_MAX_RDMA_ATOMIC + 1];
337 struct ipath_sge_state s_rdma_sge; 353 struct ipath_sge_state s_ack_rdma_sge;
354 struct ipath_sge_state s_rdma_read_sge;
338 struct ipath_sge_state r_sge; /* current receive data */ 355 struct ipath_sge_state r_sge; /* current receive data */
339 spinlock_t s_lock; 356 spinlock_t s_lock;
340 unsigned long s_flags; 357 unsigned long s_busy;
341 u32 s_hdrwords; /* size of s_hdr in 32 bit words */ 358 u32 s_hdrwords; /* size of s_hdr in 32 bit words */
342 u32 s_cur_size; /* size of send packet in bytes */ 359 u32 s_cur_size; /* size of send packet in bytes */
343 u32 s_len; /* total length of s_sge */ 360 u32 s_len; /* total length of s_sge */
344 u32 s_rdma_len; /* total length of s_rdma_sge */ 361 u32 s_rdma_read_len; /* total length of s_rdma_read_sge */
345 u32 s_next_psn; /* PSN for next request */ 362 u32 s_next_psn; /* PSN for next request */
346 u32 s_last_psn; /* last response PSN processed */ 363 u32 s_last_psn; /* last response PSN processed */
347 u32 s_psn; /* current packet sequence number */ 364 u32 s_psn; /* current packet sequence number */
348 u32 s_ack_psn; /* PSN for RDMA_READ */ 365 u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */
366 u32 s_ack_psn; /* PSN for acking sends and RDMA writes */
349 u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ 367 u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
350 u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ 368 u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
351 u64 r_wr_id; /* ID for current receive WQE */ 369 u64 r_wr_id; /* ID for current receive WQE */
352 u64 r_atomic_data; /* data for last atomic op */
353 u32 r_atomic_psn; /* PSN of last atomic op */
354 u32 r_len; /* total length of r_sge */ 370 u32 r_len; /* total length of r_sge */
355 u32 r_rcv_len; /* receive data len processed */ 371 u32 r_rcv_len; /* receive data len processed */
356 u32 r_psn; /* expected rcv packet sequence number */ 372 u32 r_psn; /* expected rcv packet sequence number */
@@ -360,12 +376,13 @@ struct ipath_qp {
360 u8 s_ack_state; /* opcode of packet to ACK */ 376 u8 s_ack_state; /* opcode of packet to ACK */
361 u8 s_nak_state; /* non-zero if NAK is pending */ 377 u8 s_nak_state; /* non-zero if NAK is pending */
362 u8 r_state; /* opcode of last packet received */ 378 u8 r_state; /* opcode of last packet received */
363 u8 r_ack_state; /* opcode of packet to ACK */
364 u8 r_nak_state; /* non-zero if NAK is pending */ 379 u8 r_nak_state; /* non-zero if NAK is pending */
365 u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ 380 u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
366 u8 r_reuse_sge; /* for UC receive errors */ 381 u8 r_reuse_sge; /* for UC receive errors */
367 u8 r_sge_inx; /* current index into sg_list */ 382 u8 r_sge_inx; /* current index into sg_list */
368 u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */ 383 u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */
384 u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */
385 u8 r_head_ack_queue; /* index into s_ack_queue[] */
369 u8 qp_access_flags; 386 u8 qp_access_flags;
370 u8 s_max_sge; /* size of s_wq->sg_list */ 387 u8 s_max_sge; /* size of s_wq->sg_list */
371 u8 s_retry_cnt; /* number of times to retry */ 388 u8 s_retry_cnt; /* number of times to retry */
@@ -374,6 +391,10 @@ struct ipath_qp {
374 u8 s_rnr_retry; /* requester RNR retry counter */ 391 u8 s_rnr_retry; /* requester RNR retry counter */
375 u8 s_wait_credit; /* limit number of unacked packets sent */ 392 u8 s_wait_credit; /* limit number of unacked packets sent */
376 u8 s_pkey_index; /* PKEY index to use */ 393 u8 s_pkey_index; /* PKEY index to use */
394 u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */
395 u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
396 u8 s_tail_ack_queue; /* index into s_ack_queue[] */
397 u8 s_flags;
377 u8 timeout; /* Timeout for this QP */ 398 u8 timeout; /* Timeout for this QP */
378 enum ib_mtu path_mtu; 399 enum ib_mtu path_mtu;
379 u32 remote_qpn; 400 u32 remote_qpn;
@@ -390,11 +411,16 @@ struct ipath_qp {
390 struct ipath_sge r_sg_list[0]; /* verified SGEs */ 411 struct ipath_sge r_sg_list[0]; /* verified SGEs */
391}; 412};
392 413
414/* Bit definition for s_busy. */
415#define IPATH_S_BUSY 0
416
393/* 417/*
394 * Bit definitions for s_flags. 418 * Bit definitions for s_flags.
395 */ 419 */
396#define IPATH_S_BUSY 0 420#define IPATH_S_SIGNAL_REQ_WR 0x01
397#define IPATH_S_SIGNAL_REQ_WR 1 421#define IPATH_S_FENCE_PENDING 0x02
422#define IPATH_S_RDMAR_PENDING 0x04
423#define IPATH_S_ACK_PENDING 0x08
398 424
399#define IPATH_PSN_CREDIT 2048 425#define IPATH_PSN_CREDIT 2048
400 426
@@ -706,8 +732,6 @@ int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
706 732
707int ipath_destroy_srq(struct ib_srq *ibsrq); 733int ipath_destroy_srq(struct ib_srq *ibsrq);
708 734
709void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
710
711int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); 735int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
712 736
713struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, 737struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
@@ -757,9 +781,6 @@ u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
757 781
758void ipath_do_ruc_send(unsigned long data); 782void ipath_do_ruc_send(unsigned long data);
759 783
760u32 ipath_make_rc_ack(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
761 u32 pmtu);
762
763int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr, 784int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
764 u32 pmtu, u32 *bth0p, u32 *bth2p); 785 u32 pmtu, u32 *bth0p, u32 *bth2p);
765 786
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 0d9b7d06bbc2..773145e29947 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -1013,14 +1013,14 @@ static struct {
1013 u64 latest_fw; 1013 u64 latest_fw;
1014 u32 flags; 1014 u32 flags;
1015} mthca_hca_table[] = { 1015} mthca_hca_table[] = {
1016 [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 4, 0), 1016 [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 5, 0),
1017 .flags = 0 }, 1017 .flags = 0 },
1018 [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 600), 1018 [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 8, 200),
1019 .flags = MTHCA_FLAG_PCIE }, 1019 .flags = MTHCA_FLAG_PCIE },
1020 [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 400), 1020 [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 2, 0),
1021 .flags = MTHCA_FLAG_MEMFREE | 1021 .flags = MTHCA_FLAG_MEMFREE |
1022 MTHCA_FLAG_PCIE }, 1022 MTHCA_FLAG_PCIE },
1023 [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 1, 0), 1023 [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 2, 0),
1024 .flags = MTHCA_FLAG_MEMFREE | 1024 .flags = MTHCA_FLAG_MEMFREE |
1025 MTHCA_FLAG_PCIE | 1025 MTHCA_FLAG_PCIE |
1026 MTHCA_FLAG_SINAI_OPT } 1026 MTHCA_FLAG_SINAI_OPT }
@@ -1135,7 +1135,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
1135 goto err_cmd; 1135 goto err_cmd;
1136 1136
1137 if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) { 1137 if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) {
1138 mthca_warn(mdev, "HCA FW version %d.%d.%d is old (%d.%d.%d is current).\n", 1138 mthca_warn(mdev, "HCA FW version %d.%d.%3d is old (%d.%d.%3d is current).\n",
1139 (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff, 1139 (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff,
1140 (int) (mdev->fw_ver & 0xffff), 1140 (int) (mdev->fw_ver & 0xffff),
1141 (int) (mthca_hca_table[hca_type].latest_fw >> 32), 1141 (int) (mthca_hca_table[hca_type].latest_fw >> 32),
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index 0b9d053a599d..48f7c65e9aed 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -175,7 +175,9 @@ struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
175 if (!ret) { 175 if (!ret) {
176 ++chunk->npages; 176 ++chunk->npages;
177 177
178 if (!coherent && chunk->npages == MTHCA_ICM_CHUNK_LEN) { 178 if (coherent)
179 ++chunk->nsg;
180 else if (chunk->npages == MTHCA_ICM_CHUNK_LEN) {
179 chunk->nsg = pci_map_sg(dev->pdev, chunk->mem, 181 chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
180 chunk->npages, 182 chunk->npages,
181 PCI_DMA_BIDIRECTIONAL); 183 PCI_DMA_BIDIRECTIONAL);
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index 6037dd3f87df..aa6c70a6a36f 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -297,7 +297,8 @@ out:
297 297
298int mthca_write_mtt_size(struct mthca_dev *dev) 298int mthca_write_mtt_size(struct mthca_dev *dev)
299{ 299{
300 if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy) 300 if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy ||
301 !(dev->mthca_flags & MTHCA_FLAG_FMR))
301 /* 302 /*
302 * Be friendly to WRITE_MTT command 303 * Be friendly to WRITE_MTT command
303 * and leave two empty slots for the 304 * and leave two empty slots for the
@@ -310,8 +311,9 @@ int mthca_write_mtt_size(struct mthca_dev *dev)
310 return mthca_is_memfree(dev) ? (PAGE_SIZE / sizeof (u64)) : 0x7ffffff; 311 return mthca_is_memfree(dev) ? (PAGE_SIZE / sizeof (u64)) : 0x7ffffff;
311} 312}
312 313
313void mthca_tavor_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt, 314static void mthca_tavor_write_mtt_seg(struct mthca_dev *dev,
314 int start_index, u64 *buffer_list, int list_len) 315 struct mthca_mtt *mtt, int start_index,
316 u64 *buffer_list, int list_len)
315{ 317{
316 u64 __iomem *mtts; 318 u64 __iomem *mtts;
317 int i; 319 int i;
@@ -323,8 +325,9 @@ void mthca_tavor_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt,
323 mtts + i); 325 mtts + i);
324} 326}
325 327
326void mthca_arbel_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt, 328static void mthca_arbel_write_mtt_seg(struct mthca_dev *dev,
327 int start_index, u64 *buffer_list, int list_len) 329 struct mthca_mtt *mtt, int start_index,
330 u64 *buffer_list, int list_len)
328{ 331{
329 __be64 *mtts; 332 __be64 *mtts;
330 dma_addr_t dma_handle; 333 dma_addr_t dma_handle;
@@ -353,7 +356,8 @@ int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
353 int size = mthca_write_mtt_size(dev); 356 int size = mthca_write_mtt_size(dev);
354 int chunk; 357 int chunk;
355 358
356 if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy) 359 if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy ||
360 !(dev->mthca_flags & MTHCA_FLAG_FMR))
357 return __mthca_write_mtt(dev, mtt, start_index, buffer_list, list_len); 361 return __mthca_write_mtt(dev, mtt, start_index, buffer_list, list_len);
358 362
359 while (list_len > 0) { 363 while (list_len > 0) {
@@ -833,6 +837,7 @@ void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
833 837
834 key = arbel_key_to_hw_index(fmr->ibmr.lkey); 838 key = arbel_key_to_hw_index(fmr->ibmr.lkey);
835 key &= dev->limits.num_mpts - 1; 839 key &= dev->limits.num_mpts - 1;
840 key = adjust_key(dev, key);
836 fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key); 841 fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
837 842
838 fmr->maps = 0; 843 fmr->maps = 0;
@@ -879,8 +884,8 @@ int mthca_init_mr_table(struct mthca_dev *dev)
879 } 884 }
880 mpts = mtts = 1 << i; 885 mpts = mtts = 1 << i;
881 } else { 886 } else {
882 mpts = dev->limits.num_mtt_segs; 887 mtts = dev->limits.num_mtt_segs;
883 mtts = dev->limits.num_mpts; 888 mpts = dev->limits.num_mpts;
884 } 889 }
885 890
886 if (!mthca_is_memfree(dev) && 891 if (!mthca_is_memfree(dev) &&
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 0725ad7ad9bf..47e6fd46d9c2 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1293,7 +1293,6 @@ int mthca_register_device(struct mthca_dev *dev)
1293 dev->ib_dev.node_type = RDMA_NODE_IB_CA; 1293 dev->ib_dev.node_type = RDMA_NODE_IB_CA;
1294 dev->ib_dev.phys_port_cnt = dev->limits.num_ports; 1294 dev->ib_dev.phys_port_cnt = dev->limits.num_ports;
1295 dev->ib_dev.dma_device = &dev->pdev->dev; 1295 dev->ib_dev.dma_device = &dev->pdev->dev;
1296 dev->ib_dev.class_dev.dev = &dev->pdev->dev;
1297 dev->ib_dev.query_device = mthca_query_device; 1296 dev->ib_dev.query_device = mthca_query_device;
1298 dev->ib_dev.query_port = mthca_query_port; 1297 dev->ib_dev.query_port = mthca_query_port;
1299 dev->ib_dev.modify_device = mthca_modify_device; 1298 dev->ib_dev.modify_device = mthca_modify_device;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 224c93dd29eb..8fe6fee7a97a 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -573,6 +573,11 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
573 goto out; 573 goto out;
574 } 574 }
575 575
576 if (cur_state == new_state && cur_state == IB_QPS_RESET) {
577 err = 0;
578 goto out;
579 }
580
576 if ((attr_mask & IB_QP_PKEY_INDEX) && 581 if ((attr_mask & IB_QP_PKEY_INDEX) &&
577 attr->pkey_index >= dev->limits.pkey_table_len) { 582 attr->pkey_index >= dev->limits.pkey_table_len) {
578 mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n", 583 mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n",
@@ -1083,21 +1088,21 @@ static void mthca_unmap_memfree(struct mthca_dev *dev,
1083static int mthca_alloc_memfree(struct mthca_dev *dev, 1088static int mthca_alloc_memfree(struct mthca_dev *dev,
1084 struct mthca_qp *qp) 1089 struct mthca_qp *qp)
1085{ 1090{
1086 int ret = 0;
1087
1088 if (mthca_is_memfree(dev)) { 1091 if (mthca_is_memfree(dev)) {
1089 qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ, 1092 qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ,
1090 qp->qpn, &qp->rq.db); 1093 qp->qpn, &qp->rq.db);
1091 if (qp->rq.db_index < 0) 1094 if (qp->rq.db_index < 0)
1092 return ret; 1095 return -ENOMEM;
1093 1096
1094 qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ, 1097 qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ,
1095 qp->qpn, &qp->sq.db); 1098 qp->qpn, &qp->sq.db);
1096 if (qp->sq.db_index < 0) 1099 if (qp->sq.db_index < 0) {
1097 mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index); 1100 mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
1101 return -ENOMEM;
1102 }
1098 } 1103 }
1099 1104
1100 return ret; 1105 return 0;
1101} 1106}
1102 1107
1103static void mthca_free_memfree(struct mthca_dev *dev, 1108static void mthca_free_memfree(struct mthca_dev *dev,
@@ -1414,11 +1419,10 @@ void mthca_free_qp(struct mthca_dev *dev,
1414 * unref the mem-free tables and free the QPN in our table. 1419 * unref the mem-free tables and free the QPN in our table.
1415 */ 1420 */
1416 if (!qp->ibqp.uobject) { 1421 if (!qp->ibqp.uobject) {
1417 mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn, 1422 mthca_cq_clean(dev, recv_cq, qp->qpn,
1418 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); 1423 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
1419 if (qp->ibqp.send_cq != qp->ibqp.recv_cq) 1424 if (send_cq != recv_cq)
1420 mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn, 1425 mthca_cq_clean(dev, send_cq, qp->qpn, NULL);
1421 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
1422 1426
1423 mthca_free_memfree(dev, qp); 1427 mthca_free_memfree(dev, qp);
1424 mthca_free_wqe_buf(dev, qp); 1428 mthca_free_wqe_buf(dev, qp);