aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-05-08 13:50:34 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-05-08 13:50:34 -0400
commit8e1bf9ffb1aca693e3cf4a4f7144c8f70c8a08b0 (patch)
tree909d84df0515f4729257f228fceb5767f1a995d2
parent148c69b4b0ec267b08d3619651ae4a10a1768b04 (diff)
parent12137c593d127c6c1a3eb050674da047682badaf (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: IB/ehca: Wait for async events to finish before destroying QP IB/ipath: Fix SDMA error recovery in absence of link status change IB/ipath: Need to always request and handle PIO avail interrupts IB/ipath: Fix count of packets received by kernel IB/ipath: Return the correct opcode for RDMA WRITE with immediate IB/ipath: Fix bug that can leave sends disabled after freeze recovery IB/ipath: Only increment SSN if WQE is put on send queue IB/ipath: Only warn about prototype chip during init RDMA/cxgb3: Fix severe limit on userspace memory registration size RDMA/cxgb3: Don't add PBL memory to gen_pool in chunks
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c90
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.h8
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_resource.c36
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_mem.c75
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c68
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h8
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h2
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c4
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c5
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c138
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c72
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba7220.c26
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c95
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c80
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h8
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c6
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c7
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sdma.c44
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c2
19 files changed, 458 insertions, 316 deletions
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index 5fd8506a8657..ebf9d3043f80 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -588,7 +588,7 @@ static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p)
588 * caller aquires the ctrl_qp lock before the call 588 * caller aquires the ctrl_qp lock before the call
589 */ 589 */
590static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr, 590static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
591 u32 len, void *data, int completion) 591 u32 len, void *data)
592{ 592{
593 u32 i, nr_wqe, copy_len; 593 u32 i, nr_wqe, copy_len;
594 u8 *copy_data; 594 u8 *copy_data;
@@ -624,7 +624,7 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
624 flag = 0; 624 flag = 0;
625 if (i == (nr_wqe - 1)) { 625 if (i == (nr_wqe - 1)) {
626 /* last WQE */ 626 /* last WQE */
627 flag = completion ? T3_COMPLETION_FLAG : 0; 627 flag = T3_COMPLETION_FLAG;
628 if (len % 32) 628 if (len % 32)
629 utx_len = len / 32 + 1; 629 utx_len = len / 32 + 1;
630 else 630 else
@@ -683,21 +683,20 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
683 return 0; 683 return 0;
684} 684}
685 685
686/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl, and pbl_size 686/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl_size and pbl_addr
687 * OUT: stag index, actual pbl_size, pbl_addr allocated. 687 * OUT: stag index
688 * TBD: shared memory region support 688 * TBD: shared memory region support
689 */ 689 */
690static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry, 690static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
691 u32 *stag, u8 stag_state, u32 pdid, 691 u32 *stag, u8 stag_state, u32 pdid,
692 enum tpt_mem_type type, enum tpt_mem_perm perm, 692 enum tpt_mem_type type, enum tpt_mem_perm perm,
693 u32 zbva, u64 to, u32 len, u8 page_size, __be64 *pbl, 693 u32 zbva, u64 to, u32 len, u8 page_size,
694 u32 *pbl_size, u32 *pbl_addr) 694 u32 pbl_size, u32 pbl_addr)
695{ 695{
696 int err; 696 int err;
697 struct tpt_entry tpt; 697 struct tpt_entry tpt;
698 u32 stag_idx; 698 u32 stag_idx;
699 u32 wptr; 699 u32 wptr;
700 int rereg = (*stag != T3_STAG_UNSET);
701 700
702 stag_state = stag_state > 0; 701 stag_state = stag_state > 0;
703 stag_idx = (*stag) >> 8; 702 stag_idx = (*stag) >> 8;
@@ -711,30 +710,8 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
711 PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n", 710 PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
712 __func__, stag_state, type, pdid, stag_idx); 711 __func__, stag_state, type, pdid, stag_idx);
713 712
714 if (reset_tpt_entry)
715 cxio_hal_pblpool_free(rdev_p, *pbl_addr, *pbl_size << 3);
716 else if (!rereg) {
717 *pbl_addr = cxio_hal_pblpool_alloc(rdev_p, *pbl_size << 3);
718 if (!*pbl_addr) {
719 return -ENOMEM;
720 }
721 }
722
723 mutex_lock(&rdev_p->ctrl_qp.lock); 713 mutex_lock(&rdev_p->ctrl_qp.lock);
724 714
725 /* write PBL first if any - update pbl only if pbl list exist */
726 if (pbl) {
727
728 PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
729 __func__, *pbl_addr, rdev_p->rnic_info.pbl_base,
730 *pbl_size);
731 err = cxio_hal_ctrl_qp_write_mem(rdev_p,
732 (*pbl_addr >> 5),
733 (*pbl_size << 3), pbl, 0);
734 if (err)
735 goto ret;
736 }
737
738 /* write TPT entry */ 715 /* write TPT entry */
739 if (reset_tpt_entry) 716 if (reset_tpt_entry)
740 memset(&tpt, 0, sizeof(tpt)); 717 memset(&tpt, 0, sizeof(tpt));
@@ -749,23 +726,23 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
749 V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) | 726 V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) |
750 V_TPT_PAGE_SIZE(page_size)); 727 V_TPT_PAGE_SIZE(page_size));
751 tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 : 728 tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 :
752 cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, *pbl_addr)>>3)); 729 cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3));
753 tpt.len = cpu_to_be32(len); 730 tpt.len = cpu_to_be32(len);
754 tpt.va_hi = cpu_to_be32((u32) (to >> 32)); 731 tpt.va_hi = cpu_to_be32((u32) (to >> 32));
755 tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL)); 732 tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL));
756 tpt.rsvd_bind_cnt_or_pstag = 0; 733 tpt.rsvd_bind_cnt_or_pstag = 0;
757 tpt.rsvd_pbl_size = reset_tpt_entry ? 0 : 734 tpt.rsvd_pbl_size = reset_tpt_entry ? 0 :
758 cpu_to_be32(V_TPT_PBL_SIZE((*pbl_size) >> 2)); 735 cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2));
759 } 736 }
760 err = cxio_hal_ctrl_qp_write_mem(rdev_p, 737 err = cxio_hal_ctrl_qp_write_mem(rdev_p,
761 stag_idx + 738 stag_idx +
762 (rdev_p->rnic_info.tpt_base >> 5), 739 (rdev_p->rnic_info.tpt_base >> 5),
763 sizeof(tpt), &tpt, 1); 740 sizeof(tpt), &tpt);
764 741
765 /* release the stag index to free pool */ 742 /* release the stag index to free pool */
766 if (reset_tpt_entry) 743 if (reset_tpt_entry)
767 cxio_hal_put_stag(rdev_p->rscp, stag_idx); 744 cxio_hal_put_stag(rdev_p->rscp, stag_idx);
768ret: 745
769 wptr = rdev_p->ctrl_qp.wptr; 746 wptr = rdev_p->ctrl_qp.wptr;
770 mutex_unlock(&rdev_p->ctrl_qp.lock); 747 mutex_unlock(&rdev_p->ctrl_qp.lock);
771 if (!err) 748 if (!err)
@@ -776,44 +753,67 @@ ret:
776 return err; 753 return err;
777} 754}
778 755
756int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
757 u32 pbl_addr, u32 pbl_size)
758{
759 u32 wptr;
760 int err;
761
762 PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
763 __func__, pbl_addr, rdev_p->rnic_info.pbl_base,
764 pbl_size);
765
766 mutex_lock(&rdev_p->ctrl_qp.lock);
767 err = cxio_hal_ctrl_qp_write_mem(rdev_p, pbl_addr >> 5, pbl_size << 3,
768 pbl);
769 wptr = rdev_p->ctrl_qp.wptr;
770 mutex_unlock(&rdev_p->ctrl_qp.lock);
771 if (err)
772 return err;
773
774 if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
775 SEQ32_GE(rdev_p->ctrl_qp.rptr,
776 wptr)))
777 return -ERESTARTSYS;
778
779 return 0;
780}
781
779int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, 782int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
780 enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, 783 enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
781 u8 page_size, __be64 *pbl, u32 *pbl_size, 784 u8 page_size, u32 pbl_size, u32 pbl_addr)
782 u32 *pbl_addr)
783{ 785{
784 *stag = T3_STAG_UNSET; 786 *stag = T3_STAG_UNSET;
785 return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm, 787 return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
786 zbva, to, len, page_size, pbl, pbl_size, pbl_addr); 788 zbva, to, len, page_size, pbl_size, pbl_addr);
787} 789}
788 790
789int cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, 791int cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
790 enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, 792 enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
791 u8 page_size, __be64 *pbl, u32 *pbl_size, 793 u8 page_size, u32 pbl_size, u32 pbl_addr)
792 u32 *pbl_addr)
793{ 794{
794 return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm, 795 return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
795 zbva, to, len, page_size, pbl, pbl_size, pbl_addr); 796 zbva, to, len, page_size, pbl_size, pbl_addr);
796} 797}
797 798
798int cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size, 799int cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size,
799 u32 pbl_addr) 800 u32 pbl_addr)
800{ 801{
801 return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL, 802 return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
802 &pbl_size, &pbl_addr); 803 pbl_size, pbl_addr);
803} 804}
804 805
805int cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid) 806int cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid)
806{ 807{
807 u32 pbl_size = 0;
808 *stag = T3_STAG_UNSET; 808 *stag = T3_STAG_UNSET;
809 return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0, 809 return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0,
810 NULL, &pbl_size, NULL); 810 0, 0);
811} 811}
812 812
813int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag) 813int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag)
814{ 814{
815 return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL, 815 return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
816 NULL, NULL); 816 0, 0);
817} 817}
818 818
819int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr) 819int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
index 69ab08ebc680..6e128f6bab05 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -154,14 +154,14 @@ int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq,
154int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq, 154int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq,
155 struct cxio_ucontext *uctx); 155 struct cxio_ucontext *uctx);
156int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode); 156int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode);
157int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
158 u32 pbl_addr, u32 pbl_size);
157int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid, 159int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
158 enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, 160 enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
159 u8 page_size, __be64 *pbl, u32 *pbl_size, 161 u8 page_size, u32 pbl_size, u32 pbl_addr);
160 u32 *pbl_addr);
161int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid, 162int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
162 enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, 163 enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
163 u8 page_size, __be64 *pbl, u32 *pbl_size, 164 u8 page_size, u32 pbl_size, u32 pbl_addr);
164 u32 *pbl_addr);
165int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size, 165int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size,
166 u32 pbl_addr); 166 u32 pbl_addr);
167int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid); 167int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index 45ed4f25ef78..bd233c087653 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -250,7 +250,6 @@ void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp)
250 */ 250 */
251 251
252#define MIN_PBL_SHIFT 8 /* 256B == min PBL size (32 entries) */ 252#define MIN_PBL_SHIFT 8 /* 256B == min PBL size (32 entries) */
253#define PBL_CHUNK 2*1024*1024
254 253
255u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size) 254u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size)
256{ 255{
@@ -267,14 +266,35 @@ void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
267 266
268int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p) 267int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p)
269{ 268{
270 unsigned long i; 269 unsigned pbl_start, pbl_chunk;
270
271 rdev_p->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1); 271 rdev_p->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1);
272 if (rdev_p->pbl_pool) 272 if (!rdev_p->pbl_pool)
273 for (i = rdev_p->rnic_info.pbl_base; 273 return -ENOMEM;
274 i <= rdev_p->rnic_info.pbl_top - PBL_CHUNK + 1; 274
275 i += PBL_CHUNK) 275 pbl_start = rdev_p->rnic_info.pbl_base;
276 gen_pool_add(rdev_p->pbl_pool, i, PBL_CHUNK, -1); 276 pbl_chunk = rdev_p->rnic_info.pbl_top - pbl_start + 1;
277 return rdev_p->pbl_pool ? 0 : -ENOMEM; 277
278 while (pbl_start < rdev_p->rnic_info.pbl_top) {
279 pbl_chunk = min(rdev_p->rnic_info.pbl_top - pbl_start + 1,
280 pbl_chunk);
281 if (gen_pool_add(rdev_p->pbl_pool, pbl_start, pbl_chunk, -1)) {
282 PDBG("%s failed to add PBL chunk (%x/%x)\n",
283 __func__, pbl_start, pbl_chunk);
284 if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) {
285 printk(KERN_WARNING MOD "%s: Failed to add all PBL chunks (%x/%x)\n",
286 __func__, pbl_start, rdev_p->rnic_info.pbl_top - pbl_start);
287 return 0;
288 }
289 pbl_chunk >>= 1;
290 } else {
291 PDBG("%s added PBL chunk (%x/%x)\n",
292 __func__, pbl_start, pbl_chunk);
293 pbl_start += pbl_chunk;
294 }
295 }
296
297 return 0;
278} 298}
279 299
280void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p) 300void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p)
diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c
index 58c3d61bcd14..ec49a5cbdebb 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_mem.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c
@@ -35,17 +35,26 @@
35#include <rdma/ib_verbs.h> 35#include <rdma/ib_verbs.h>
36 36
37#include "cxio_hal.h" 37#include "cxio_hal.h"
38#include "cxio_resource.h"
38#include "iwch.h" 39#include "iwch.h"
39#include "iwch_provider.h" 40#include "iwch_provider.h"
40 41
41int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, 42static void iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
42 struct iwch_mr *mhp,
43 int shift,
44 __be64 *page_list)
45{ 43{
46 u32 stag;
47 u32 mmid; 44 u32 mmid;
48 45
46 mhp->attr.state = 1;
47 mhp->attr.stag = stag;
48 mmid = stag >> 8;
49 mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
50 insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
51 PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
52}
53
54int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
55 struct iwch_mr *mhp, int shift)
56{
57 u32 stag;
49 58
50 if (cxio_register_phys_mem(&rhp->rdev, 59 if (cxio_register_phys_mem(&rhp->rdev,
51 &stag, mhp->attr.pdid, 60 &stag, mhp->attr.pdid,
@@ -53,28 +62,21 @@ int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
53 mhp->attr.zbva, 62 mhp->attr.zbva,
54 mhp->attr.va_fbo, 63 mhp->attr.va_fbo,
55 mhp->attr.len, 64 mhp->attr.len,
56 shift-12, 65 shift - 12,
57 page_list, 66 mhp->attr.pbl_size, mhp->attr.pbl_addr))
58 &mhp->attr.pbl_size, &mhp->attr.pbl_addr))
59 return -ENOMEM; 67 return -ENOMEM;
60 mhp->attr.state = 1; 68
61 mhp->attr.stag = stag; 69 iwch_finish_mem_reg(mhp, stag);
62 mmid = stag >> 8; 70
63 mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
64 insert_handle(rhp, &rhp->mmidr, mhp, mmid);
65 PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
66 return 0; 71 return 0;
67} 72}
68 73
69int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php, 74int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
70 struct iwch_mr *mhp, 75 struct iwch_mr *mhp,
71 int shift, 76 int shift,
72 __be64 *page_list,
73 int npages) 77 int npages)
74{ 78{
75 u32 stag; 79 u32 stag;
76 u32 mmid;
77
78 80
79 /* We could support this... */ 81 /* We could support this... */
80 if (npages > mhp->attr.pbl_size) 82 if (npages > mhp->attr.pbl_size)
@@ -87,19 +89,40 @@ int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
87 mhp->attr.zbva, 89 mhp->attr.zbva,
88 mhp->attr.va_fbo, 90 mhp->attr.va_fbo,
89 mhp->attr.len, 91 mhp->attr.len,
90 shift-12, 92 shift - 12,
91 page_list, 93 mhp->attr.pbl_size, mhp->attr.pbl_addr))
92 &mhp->attr.pbl_size, &mhp->attr.pbl_addr))
93 return -ENOMEM; 94 return -ENOMEM;
94 mhp->attr.state = 1; 95
95 mhp->attr.stag = stag; 96 iwch_finish_mem_reg(mhp, stag);
96 mmid = stag >> 8; 97
97 mhp->ibmr.rkey = mhp->ibmr.lkey = stag; 98 return 0;
98 insert_handle(rhp, &rhp->mmidr, mhp, mmid); 99}
99 PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp); 100
101int iwch_alloc_pbl(struct iwch_mr *mhp, int npages)
102{
103 mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev,
104 npages << 3);
105
106 if (!mhp->attr.pbl_addr)
107 return -ENOMEM;
108
109 mhp->attr.pbl_size = npages;
110
100 return 0; 111 return 0;
101} 112}
102 113
114void iwch_free_pbl(struct iwch_mr *mhp)
115{
116 cxio_hal_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
117 mhp->attr.pbl_size << 3);
118}
119
120int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset)
121{
122 return cxio_write_pbl(&mhp->rhp->rdev, pages,
123 mhp->attr.pbl_addr + (offset << 3), npages);
124}
125
103int build_phys_page_list(struct ib_phys_buf *buffer_list, 126int build_phys_page_list(struct ib_phys_buf *buffer_list,
104 int num_phys_buf, 127 int num_phys_buf,
105 u64 *iova_start, 128 u64 *iova_start,
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index d07d3a377b5f..8934178a23ee 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -442,6 +442,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
442 mmid = mhp->attr.stag >> 8; 442 mmid = mhp->attr.stag >> 8;
443 cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, 443 cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
444 mhp->attr.pbl_addr); 444 mhp->attr.pbl_addr);
445 iwch_free_pbl(mhp);
445 remove_handle(rhp, &rhp->mmidr, mmid); 446 remove_handle(rhp, &rhp->mmidr, mmid);
446 if (mhp->kva) 447 if (mhp->kva)
447 kfree((void *) (unsigned long) mhp->kva); 448 kfree((void *) (unsigned long) mhp->kva);
@@ -475,6 +476,8 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
475 if (!mhp) 476 if (!mhp)
476 return ERR_PTR(-ENOMEM); 477 return ERR_PTR(-ENOMEM);
477 478
479 mhp->rhp = rhp;
480
478 /* First check that we have enough alignment */ 481 /* First check that we have enough alignment */
479 if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) { 482 if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
480 ret = -EINVAL; 483 ret = -EINVAL;
@@ -492,7 +495,17 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
492 if (ret) 495 if (ret)
493 goto err; 496 goto err;
494 497
495 mhp->rhp = rhp; 498 ret = iwch_alloc_pbl(mhp, npages);
499 if (ret) {
500 kfree(page_list);
501 goto err_pbl;
502 }
503
504 ret = iwch_write_pbl(mhp, page_list, npages, 0);
505 kfree(page_list);
506 if (ret)
507 goto err_pbl;
508
496 mhp->attr.pdid = php->pdid; 509 mhp->attr.pdid = php->pdid;
497 mhp->attr.zbva = 0; 510 mhp->attr.zbva = 0;
498 511
@@ -502,12 +515,15 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
502 515
503 mhp->attr.len = (u32) total_size; 516 mhp->attr.len = (u32) total_size;
504 mhp->attr.pbl_size = npages; 517 mhp->attr.pbl_size = npages;
505 ret = iwch_register_mem(rhp, php, mhp, shift, page_list); 518 ret = iwch_register_mem(rhp, php, mhp, shift);
506 kfree(page_list); 519 if (ret)
507 if (ret) { 520 goto err_pbl;
508 goto err; 521
509 }
510 return &mhp->ibmr; 522 return &mhp->ibmr;
523
524err_pbl:
525 iwch_free_pbl(mhp);
526
511err: 527err:
512 kfree(mhp); 528 kfree(mhp);
513 return ERR_PTR(ret); 529 return ERR_PTR(ret);
@@ -560,7 +576,7 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
560 return ret; 576 return ret;
561 } 577 }
562 578
563 ret = iwch_reregister_mem(rhp, php, &mh, shift, page_list, npages); 579 ret = iwch_reregister_mem(rhp, php, &mh, shift, npages);
564 kfree(page_list); 580 kfree(page_list);
565 if (ret) { 581 if (ret) {
566 return ret; 582 return ret;
@@ -602,6 +618,8 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
602 if (!mhp) 618 if (!mhp)
603 return ERR_PTR(-ENOMEM); 619 return ERR_PTR(-ENOMEM);
604 620
621 mhp->rhp = rhp;
622
605 mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0); 623 mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
606 if (IS_ERR(mhp->umem)) { 624 if (IS_ERR(mhp->umem)) {
607 err = PTR_ERR(mhp->umem); 625 err = PTR_ERR(mhp->umem);
@@ -615,10 +633,14 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
615 list_for_each_entry(chunk, &mhp->umem->chunk_list, list) 633 list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
616 n += chunk->nents; 634 n += chunk->nents;
617 635
618 pages = kmalloc(n * sizeof(u64), GFP_KERNEL); 636 err = iwch_alloc_pbl(mhp, n);
637 if (err)
638 goto err;
639
640 pages = (__be64 *) __get_free_page(GFP_KERNEL);
619 if (!pages) { 641 if (!pages) {
620 err = -ENOMEM; 642 err = -ENOMEM;
621 goto err; 643 goto err_pbl;
622 } 644 }
623 645
624 i = n = 0; 646 i = n = 0;
@@ -630,25 +652,38 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
630 pages[i++] = cpu_to_be64(sg_dma_address( 652 pages[i++] = cpu_to_be64(sg_dma_address(
631 &chunk->page_list[j]) + 653 &chunk->page_list[j]) +
632 mhp->umem->page_size * k); 654 mhp->umem->page_size * k);
655 if (i == PAGE_SIZE / sizeof *pages) {
656 err = iwch_write_pbl(mhp, pages, i, n);
657 if (err)
658 goto pbl_done;
659 n += i;
660 i = 0;
661 }
633 } 662 }
634 } 663 }
635 664
636 mhp->rhp = rhp; 665 if (i)
666 err = iwch_write_pbl(mhp, pages, i, n);
667
668pbl_done:
669 free_page((unsigned long) pages);
670 if (err)
671 goto err_pbl;
672
637 mhp->attr.pdid = php->pdid; 673 mhp->attr.pdid = php->pdid;
638 mhp->attr.zbva = 0; 674 mhp->attr.zbva = 0;
639 mhp->attr.perms = iwch_ib_to_tpt_access(acc); 675 mhp->attr.perms = iwch_ib_to_tpt_access(acc);
640 mhp->attr.va_fbo = virt; 676 mhp->attr.va_fbo = virt;
641 mhp->attr.page_size = shift - 12; 677 mhp->attr.page_size = shift - 12;
642 mhp->attr.len = (u32) length; 678 mhp->attr.len = (u32) length;
643 mhp->attr.pbl_size = i; 679
644 err = iwch_register_mem(rhp, php, mhp, shift, pages); 680 err = iwch_register_mem(rhp, php, mhp, shift);
645 kfree(pages);
646 if (err) 681 if (err)
647 goto err; 682 goto err_pbl;
648 683
649 if (udata && !t3a_device(rhp)) { 684 if (udata && !t3a_device(rhp)) {
650 uresp.pbl_addr = (mhp->attr.pbl_addr - 685 uresp.pbl_addr = (mhp->attr.pbl_addr -
651 rhp->rdev.rnic_info.pbl_base) >> 3; 686 rhp->rdev.rnic_info.pbl_base) >> 3;
652 PDBG("%s user resp pbl_addr 0x%x\n", __func__, 687 PDBG("%s user resp pbl_addr 0x%x\n", __func__,
653 uresp.pbl_addr); 688 uresp.pbl_addr);
654 689
@@ -661,6 +696,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
661 696
662 return &mhp->ibmr; 697 return &mhp->ibmr;
663 698
699err_pbl:
700 iwch_free_pbl(mhp);
701
664err: 702err:
665 ib_umem_release(mhp->umem); 703 ib_umem_release(mhp->umem);
666 kfree(mhp); 704 kfree(mhp);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index db5100d27ca2..836163fc5429 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -340,14 +340,14 @@ int iwch_quiesce_qps(struct iwch_cq *chp);
340int iwch_resume_qps(struct iwch_cq *chp); 340int iwch_resume_qps(struct iwch_cq *chp);
341void stop_read_rep_timer(struct iwch_qp *qhp); 341void stop_read_rep_timer(struct iwch_qp *qhp);
342int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, 342int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
343 struct iwch_mr *mhp, 343 struct iwch_mr *mhp, int shift);
344 int shift,
345 __be64 *page_list);
346int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php, 344int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
347 struct iwch_mr *mhp, 345 struct iwch_mr *mhp,
348 int shift, 346 int shift,
349 __be64 *page_list,
350 int npages); 347 int npages);
348int iwch_alloc_pbl(struct iwch_mr *mhp, int npages);
349void iwch_free_pbl(struct iwch_mr *mhp);
350int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset);
351int build_phys_page_list(struct ib_phys_buf *buffer_list, 351int build_phys_page_list(struct ib_phys_buf *buffer_list,
352 int num_phys_buf, 352 int num_phys_buf,
353 u64 *iova_start, 353 u64 *iova_start,
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 00bab60f6de4..1e9e99a13933 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -192,6 +192,8 @@ struct ehca_qp {
192 int mtu_shift; 192 int mtu_shift;
193 u32 message_count; 193 u32 message_count;
194 u32 packet_count; 194 u32 packet_count;
195 atomic_t nr_events; /* events seen */
196 wait_queue_head_t wait_completion;
195}; 197};
196 198
197#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) 199#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index ca5eb0cb628c..ce1ab0571be3 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -204,6 +204,8 @@ static void qp_event_callback(struct ehca_shca *shca, u64 eqe,
204 204
205 read_lock(&ehca_qp_idr_lock); 205 read_lock(&ehca_qp_idr_lock);
206 qp = idr_find(&ehca_qp_idr, token); 206 qp = idr_find(&ehca_qp_idr, token);
207 if (qp)
208 atomic_inc(&qp->nr_events);
207 read_unlock(&ehca_qp_idr_lock); 209 read_unlock(&ehca_qp_idr_lock);
208 210
209 if (!qp) 211 if (!qp)
@@ -223,6 +225,8 @@ static void qp_event_callback(struct ehca_shca *shca, u64 eqe,
223 if (fatal && qp->ext_type == EQPT_SRQBASE) 225 if (fatal && qp->ext_type == EQPT_SRQBASE)
224 dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED); 226 dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED);
225 227
228 if (atomic_dec_and_test(&qp->nr_events))
229 wake_up(&qp->wait_completion);
226 return; 230 return;
227} 231}
228 232
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index 18fba92fa7ae..3f59587338ea 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -566,6 +566,8 @@ static struct ehca_qp *internal_create_qp(
566 return ERR_PTR(-ENOMEM); 566 return ERR_PTR(-ENOMEM);
567 } 567 }
568 568
569 atomic_set(&my_qp->nr_events, 0);
570 init_waitqueue_head(&my_qp->wait_completion);
569 spin_lock_init(&my_qp->spinlock_s); 571 spin_lock_init(&my_qp->spinlock_s);
570 spin_lock_init(&my_qp->spinlock_r); 572 spin_lock_init(&my_qp->spinlock_r);
571 my_qp->qp_type = qp_type; 573 my_qp->qp_type = qp_type;
@@ -1934,6 +1936,9 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
1934 idr_remove(&ehca_qp_idr, my_qp->token); 1936 idr_remove(&ehca_qp_idr, my_qp->token);
1935 write_unlock_irqrestore(&ehca_qp_idr_lock, flags); 1937 write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
1936 1938
1939 /* now wait until all pending events have completed */
1940 wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events));
1941
1937 h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); 1942 h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
1938 if (h_ret != H_SUCCESS) { 1943 if (h_ret != H_SUCCESS) {
1939 ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%li " 1944 ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%li "
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index acf30c06a0c0..ce7b7c34360e 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -1197,7 +1197,7 @@ void ipath_kreceive(struct ipath_portdata *pd)
1197 } 1197 }
1198 1198
1199reloop: 1199reloop:
1200 for (last = 0, i = 1; !last; i++) { 1200 for (last = 0, i = 1; !last; i += !last) {
1201 hdr = dd->ipath_f_get_msgheader(dd, rhf_addr); 1201 hdr = dd->ipath_f_get_msgheader(dd, rhf_addr);
1202 eflags = ipath_hdrget_err_flags(rhf_addr); 1202 eflags = ipath_hdrget_err_flags(rhf_addr);
1203 etype = ipath_hdrget_rcv_type(rhf_addr); 1203 etype = ipath_hdrget_rcv_type(rhf_addr);
@@ -1428,6 +1428,40 @@ static void ipath_update_pio_bufs(struct ipath_devdata *dd)
1428 spin_unlock_irqrestore(&ipath_pioavail_lock, flags); 1428 spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1429} 1429}
1430 1430
1431/*
1432 * used to force update of pioavailshadow if we can't get a pio buffer.
1433 * Needed primarily due to exitting freeze mode after recovering
1434 * from errors. Done lazily, because it's safer (known to not
1435 * be writing pio buffers).
1436 */
1437static void ipath_reset_availshadow(struct ipath_devdata *dd)
1438{
1439 int i, im;
1440 unsigned long flags;
1441
1442 spin_lock_irqsave(&ipath_pioavail_lock, flags);
1443 for (i = 0; i < dd->ipath_pioavregs; i++) {
1444 u64 val, oldval;
1445 /* deal with 6110 chip bug on high register #s */
1446 im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
1447 i ^ 1 : i;
1448 val = le64_to_cpu(dd->ipath_pioavailregs_dma[im]);
1449 /*
1450 * busy out the buffers not in the kernel avail list,
1451 * without changing the generation bits.
1452 */
1453 oldval = dd->ipath_pioavailshadow[i];
1454 dd->ipath_pioavailshadow[i] = val |
1455 ((~dd->ipath_pioavailkernel[i] <<
1456 INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT) &
1457 0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */
1458 if (oldval != dd->ipath_pioavailshadow[i])
1459 ipath_dbg("shadow[%d] was %Lx, now %lx\n",
1460 i, oldval, dd->ipath_pioavailshadow[i]);
1461 }
1462 spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1463}
1464
1431/** 1465/**
1432 * ipath_setrcvhdrsize - set the receive header size 1466 * ipath_setrcvhdrsize - set the receive header size
1433 * @dd: the infinipath device 1467 * @dd: the infinipath device
@@ -1482,9 +1516,12 @@ static noinline void no_pio_bufs(struct ipath_devdata *dd)
1482 */ 1516 */
1483 ipath_stats.sps_nopiobufs++; 1517 ipath_stats.sps_nopiobufs++;
1484 if (!(++dd->ipath_consec_nopiobuf % 100000)) { 1518 if (!(++dd->ipath_consec_nopiobuf % 100000)) {
1485 ipath_dbg("%u pio sends with no bufavail; dmacopy: " 1519 ipath_force_pio_avail_update(dd); /* at start */
1486 "%llx %llx %llx %llx; shadow: %lx %lx %lx %lx\n", 1520 ipath_dbg("%u tries no piobufavail ts%lx; dmacopy: "
1521 "%llx %llx %llx %llx\n"
1522 "ipath shadow: %lx %lx %lx %lx\n",
1487 dd->ipath_consec_nopiobuf, 1523 dd->ipath_consec_nopiobuf,
1524 (unsigned long)get_cycles(),
1488 (unsigned long long) le64_to_cpu(dma[0]), 1525 (unsigned long long) le64_to_cpu(dma[0]),
1489 (unsigned long long) le64_to_cpu(dma[1]), 1526 (unsigned long long) le64_to_cpu(dma[1]),
1490 (unsigned long long) le64_to_cpu(dma[2]), 1527 (unsigned long long) le64_to_cpu(dma[2]),
@@ -1496,14 +1533,17 @@ static noinline void no_pio_bufs(struct ipath_devdata *dd)
1496 */ 1533 */
1497 if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 1534 if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) >
1498 (sizeof(shadow[0]) * 4 * 4)) 1535 (sizeof(shadow[0]) * 4 * 4))
1499 ipath_dbg("2nd group: dmacopy: %llx %llx " 1536 ipath_dbg("2nd group: dmacopy: "
1500 "%llx %llx; shadow: %lx %lx %lx %lx\n", 1537 "%llx %llx %llx %llx\n"
1538 "ipath shadow: %lx %lx %lx %lx\n",
1501 (unsigned long long)le64_to_cpu(dma[4]), 1539 (unsigned long long)le64_to_cpu(dma[4]),
1502 (unsigned long long)le64_to_cpu(dma[5]), 1540 (unsigned long long)le64_to_cpu(dma[5]),
1503 (unsigned long long)le64_to_cpu(dma[6]), 1541 (unsigned long long)le64_to_cpu(dma[6]),
1504 (unsigned long long)le64_to_cpu(dma[7]), 1542 (unsigned long long)le64_to_cpu(dma[7]),
1505 shadow[4], shadow[5], shadow[6], 1543 shadow[4], shadow[5], shadow[6], shadow[7]);
1506 shadow[7]); 1544
1545 /* at end, so update likely happened */
1546 ipath_reset_availshadow(dd);
1507 } 1547 }
1508} 1548}
1509 1549
@@ -1652,19 +1692,46 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
1652 unsigned len, int avail) 1692 unsigned len, int avail)
1653{ 1693{
1654 unsigned long flags; 1694 unsigned long flags;
1655 unsigned end; 1695 unsigned end, cnt = 0, next;
1656 1696
1657 /* There are two bits per send buffer (busy and generation) */ 1697 /* There are two bits per send buffer (busy and generation) */
1658 start *= 2; 1698 start *= 2;
1659 len *= 2; 1699 end = start + len * 2;
1660 end = start + len;
1661 1700
1662 /* Set or clear the generation bits. */
1663 spin_lock_irqsave(&ipath_pioavail_lock, flags); 1701 spin_lock_irqsave(&ipath_pioavail_lock, flags);
1702 /* Set or clear the busy bit in the shadow. */
1664 while (start < end) { 1703 while (start < end) {
1665 if (avail) { 1704 if (avail) {
1666 __clear_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT, 1705 unsigned long dma;
1667 dd->ipath_pioavailshadow); 1706 int i, im;
1707 /*
1708 * the BUSY bit will never be set, because we disarm
1709 * the user buffers before we hand them back to the
1710 * kernel. We do have to make sure the generation
1711 * bit is set correctly in shadow, since it could
1712 * have changed many times while allocated to user.
1713 * We can't use the bitmap functions on the full
1714 * dma array because it is always little-endian, so
1715 * we have to flip to host-order first.
1716 * BITS_PER_LONG is slightly wrong, since it's
1717 * always 64 bits per register in chip...
1718 * We only work on 64 bit kernels, so that's OK.
1719 */
1720 /* deal with 6110 chip bug on high register #s */
1721 i = start / BITS_PER_LONG;
1722 im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
1723 i ^ 1 : i;
1724 __clear_bit(INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT
1725 + start, dd->ipath_pioavailshadow);
1726 dma = (unsigned long) le64_to_cpu(
1727 dd->ipath_pioavailregs_dma[im]);
1728 if (test_bit((INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
1729 + start) % BITS_PER_LONG, &dma))
1730 __set_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
1731 + start, dd->ipath_pioavailshadow);
1732 else
1733 __clear_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
1734 + start, dd->ipath_pioavailshadow);
1668 __set_bit(start, dd->ipath_pioavailkernel); 1735 __set_bit(start, dd->ipath_pioavailkernel);
1669 } else { 1736 } else {
1670 __set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT, 1737 __set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,
@@ -1673,7 +1740,44 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
1673 } 1740 }
1674 start += 2; 1741 start += 2;
1675 } 1742 }
1743
1744 if (dd->ipath_pioupd_thresh) {
1745 end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
1746 next = find_first_bit(dd->ipath_pioavailkernel, end);
1747 while (next < end) {
1748 cnt++;
1749 next = find_next_bit(dd->ipath_pioavailkernel, end,
1750 next + 1);
1751 }
1752 }
1676 spin_unlock_irqrestore(&ipath_pioavail_lock, flags); 1753 spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1754
1755 /*
1756 * When moving buffers from kernel to user, if number assigned to
1757 * the user is less than the pio update threshold, and threshold
1758 * is supported (cnt was computed > 0), drop the update threshold
1759 * so we update at least once per allocated number of buffers.
1760 * In any case, if the kernel buffers are less than the threshold,
1761 * drop the threshold. We don't bother increasing it, having once
1762 * decreased it, since it would typically just cycle back and forth.
1763 * If we don't decrease below buffers in use, we can wait a long
1764 * time for an update, until some other context uses PIO buffers.
1765 */
1766 if (!avail && len < cnt)
1767 cnt = len;
1768 if (cnt < dd->ipath_pioupd_thresh) {
1769 dd->ipath_pioupd_thresh = cnt;
1770 ipath_dbg("Decreased pio update threshold to %u\n",
1771 dd->ipath_pioupd_thresh);
1772 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
1773 dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK
1774 << INFINIPATH_S_UPDTHRESH_SHIFT);
1775 dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
1776 << INFINIPATH_S_UPDTHRESH_SHIFT;
1777 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1778 dd->ipath_sendctrl);
1779 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
1780 }
1677} 1781}
1678 1782
1679/** 1783/**
@@ -1794,8 +1898,8 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
1794 1898
1795 spin_lock_irqsave(&dd->ipath_sdma_lock, flags); 1899 spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
1796 skip_cancel = 1900 skip_cancel =
1797 !test_bit(IPATH_SDMA_DISABLED, statp) && 1901 test_and_set_bit(IPATH_SDMA_ABORTING, statp)
1798 test_and_set_bit(IPATH_SDMA_ABORTING, statp); 1902 && !test_bit(IPATH_SDMA_DISABLED, statp);
1799 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); 1903 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
1800 if (skip_cancel) 1904 if (skip_cancel)
1801 goto bail; 1905 goto bail;
@@ -1826,6 +1930,9 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
1826 ipath_disarm_piobufs(dd, 0, 1930 ipath_disarm_piobufs(dd, 0,
1827 dd->ipath_piobcnt2k + dd->ipath_piobcnt4k); 1931 dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
1828 1932
1933 if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
1934 set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
1935
1829 if (restore_sendctrl) { 1936 if (restore_sendctrl) {
1830 /* else done by caller later if needed */ 1937 /* else done by caller later if needed */
1831 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 1938 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
@@ -1845,7 +1952,6 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
1845 /* only wait so long for intr */ 1952 /* only wait so long for intr */
1846 dd->ipath_sdma_abort_intr_timeout = jiffies + HZ; 1953 dd->ipath_sdma_abort_intr_timeout = jiffies + HZ;
1847 dd->ipath_sdma_reset_wait = 200; 1954 dd->ipath_sdma_reset_wait = 200;
1848 __set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
1849 if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) 1955 if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
1850 tasklet_hi_schedule(&dd->ipath_sdma_abort_task); 1956 tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
1851 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); 1957 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 8b1752202e78..3295177c937e 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -173,47 +173,25 @@ static int ipath_get_base_info(struct file *fp,
173 (void *) dd->ipath_statusp - 173 (void *) dd->ipath_statusp -
174 (void *) dd->ipath_pioavailregs_dma; 174 (void *) dd->ipath_pioavailregs_dma;
175 if (!shared) { 175 if (!shared) {
176 kinfo->spi_piocnt = dd->ipath_pbufsport; 176 kinfo->spi_piocnt = pd->port_piocnt;
177 kinfo->spi_piobufbase = (u64) pd->port_piobufs; 177 kinfo->spi_piobufbase = (u64) pd->port_piobufs;
178 kinfo->__spi_uregbase = (u64) dd->ipath_uregbase + 178 kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
179 dd->ipath_ureg_align * pd->port_port; 179 dd->ipath_ureg_align * pd->port_port;
180 } else if (master) { 180 } else if (master) {
181 kinfo->spi_piocnt = (dd->ipath_pbufsport / subport_cnt) + 181 kinfo->spi_piocnt = (pd->port_piocnt / subport_cnt) +
182 (dd->ipath_pbufsport % subport_cnt); 182 (pd->port_piocnt % subport_cnt);
183 /* Master's PIO buffers are after all the slave's */ 183 /* Master's PIO buffers are after all the slave's */
184 kinfo->spi_piobufbase = (u64) pd->port_piobufs + 184 kinfo->spi_piobufbase = (u64) pd->port_piobufs +
185 dd->ipath_palign * 185 dd->ipath_palign *
186 (dd->ipath_pbufsport - kinfo->spi_piocnt); 186 (pd->port_piocnt - kinfo->spi_piocnt);
187 } else { 187 } else {
188 unsigned slave = subport_fp(fp) - 1; 188 unsigned slave = subport_fp(fp) - 1;
189 189
190 kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt; 190 kinfo->spi_piocnt = pd->port_piocnt / subport_cnt;
191 kinfo->spi_piobufbase = (u64) pd->port_piobufs + 191 kinfo->spi_piobufbase = (u64) pd->port_piobufs +
192 dd->ipath_palign * kinfo->spi_piocnt * slave; 192 dd->ipath_palign * kinfo->spi_piocnt * slave;
193 } 193 }
194 194
195 /*
196 * Set the PIO avail update threshold to no larger
197 * than the number of buffers per process. Note that
198 * we decrease it here, but won't ever increase it.
199 */
200 if (dd->ipath_pioupd_thresh &&
201 kinfo->spi_piocnt < dd->ipath_pioupd_thresh) {
202 unsigned long flags;
203
204 dd->ipath_pioupd_thresh = kinfo->spi_piocnt;
205 ipath_dbg("Decreased pio update threshold to %u\n",
206 dd->ipath_pioupd_thresh);
207 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
208 dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK
209 << INFINIPATH_S_UPDTHRESH_SHIFT);
210 dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
211 << INFINIPATH_S_UPDTHRESH_SHIFT;
212 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
213 dd->ipath_sendctrl);
214 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
215 }
216
217 if (shared) { 195 if (shared) {
218 kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase + 196 kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase +
219 dd->ipath_ureg_align * pd->port_port; 197 dd->ipath_ureg_align * pd->port_port;
@@ -1309,19 +1287,19 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
1309 ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port; 1287 ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port;
1310 if (!pd->port_subport_cnt) { 1288 if (!pd->port_subport_cnt) {
1311 /* port is not shared */ 1289 /* port is not shared */
1312 piocnt = dd->ipath_pbufsport; 1290 piocnt = pd->port_piocnt;
1313 piobufs = pd->port_piobufs; 1291 piobufs = pd->port_piobufs;
1314 } else if (!subport_fp(fp)) { 1292 } else if (!subport_fp(fp)) {
1315 /* caller is the master */ 1293 /* caller is the master */
1316 piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) + 1294 piocnt = (pd->port_piocnt / pd->port_subport_cnt) +
1317 (dd->ipath_pbufsport % pd->port_subport_cnt); 1295 (pd->port_piocnt % pd->port_subport_cnt);
1318 piobufs = pd->port_piobufs + 1296 piobufs = pd->port_piobufs +
1319 dd->ipath_palign * (dd->ipath_pbufsport - piocnt); 1297 dd->ipath_palign * (pd->port_piocnt - piocnt);
1320 } else { 1298 } else {
1321 unsigned slave = subport_fp(fp) - 1; 1299 unsigned slave = subport_fp(fp) - 1;
1322 1300
1323 /* caller is a slave */ 1301 /* caller is a slave */
1324 piocnt = dd->ipath_pbufsport / pd->port_subport_cnt; 1302 piocnt = pd->port_piocnt / pd->port_subport_cnt;
1325 piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave; 1303 piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
1326 } 1304 }
1327 1305
@@ -1633,9 +1611,6 @@ static int try_alloc_port(struct ipath_devdata *dd, int port,
1633 port_fp(fp) = pd; 1611 port_fp(fp) = pd;
1634 pd->port_pid = current->pid; 1612 pd->port_pid = current->pid;
1635 strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm)); 1613 strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
1636 ipath_chg_pioavailkernel(dd,
1637 dd->ipath_pbufsport * (pd->port_port - 1),
1638 dd->ipath_pbufsport, 0);
1639 ipath_stats.sps_ports++; 1614 ipath_stats.sps_ports++;
1640 ret = 0; 1615 ret = 0;
1641 } else 1616 } else
@@ -1938,11 +1913,25 @@ static int ipath_do_user_init(struct file *fp,
1938 1913
1939 /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */ 1914 /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
1940 1915
1916 /* some ports may get extra buffers, calculate that here */
1917 if (pd->port_port <= dd->ipath_ports_extrabuf)
1918 pd->port_piocnt = dd->ipath_pbufsport + 1;
1919 else
1920 pd->port_piocnt = dd->ipath_pbufsport;
1921
1941 /* for right now, kernel piobufs are at end, so port 1 is at 0 */ 1922 /* for right now, kernel piobufs are at end, so port 1 is at 0 */
1923 if (pd->port_port <= dd->ipath_ports_extrabuf)
1924 pd->port_pio_base = (dd->ipath_pbufsport + 1)
1925 * (pd->port_port - 1);
1926 else
1927 pd->port_pio_base = dd->ipath_ports_extrabuf +
1928 dd->ipath_pbufsport * (pd->port_port - 1);
1942 pd->port_piobufs = dd->ipath_piobufbase + 1929 pd->port_piobufs = dd->ipath_piobufbase +
1943 dd->ipath_pbufsport * (pd->port_port - 1) * dd->ipath_palign; 1930 pd->port_pio_base * dd->ipath_palign;
1944 ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n", 1931 ipath_cdbg(VERBOSE, "piobuf base for port %u is 0x%x, piocnt %u,"
1945 pd->port_port, pd->port_piobufs); 1932 " first pio %u\n", pd->port_port, pd->port_piobufs,
1933 pd->port_piocnt, pd->port_pio_base);
1934 ipath_chg_pioavailkernel(dd, pd->port_pio_base, pd->port_piocnt, 0);
1946 1935
1947 /* 1936 /*
1948 * Now allocate the rcvhdr Q and eager TIDs; skip the TID 1937 * Now allocate the rcvhdr Q and eager TIDs; skip the TID
@@ -2107,7 +2096,6 @@ static int ipath_close(struct inode *in, struct file *fp)
2107 } 2096 }
2108 2097
2109 if (dd->ipath_kregbase) { 2098 if (dd->ipath_kregbase) {
2110 int i;
2111 /* atomically clear receive enable port and intr avail. */ 2099 /* atomically clear receive enable port and intr avail. */
2112 clear_bit(dd->ipath_r_portenable_shift + port, 2100 clear_bit(dd->ipath_r_portenable_shift + port,
2113 &dd->ipath_rcvctrl); 2101 &dd->ipath_rcvctrl);
@@ -2136,9 +2124,9 @@ static int ipath_close(struct inode *in, struct file *fp)
2136 ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr, 2124 ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
2137 pd->port_port, dd->ipath_dummy_hdrq_phys); 2125 pd->port_port, dd->ipath_dummy_hdrq_phys);
2138 2126
2139 i = dd->ipath_pbufsport * (port - 1); 2127 ipath_disarm_piobufs(dd, pd->port_pio_base, pd->port_piocnt);
2140 ipath_disarm_piobufs(dd, i, dd->ipath_pbufsport); 2128 ipath_chg_pioavailkernel(dd, pd->port_pio_base,
2141 ipath_chg_pioavailkernel(dd, i, dd->ipath_pbufsport, 1); 2129 pd->port_piocnt, 1);
2142 2130
2143 dd->ipath_f_clear_tids(dd, pd->port_port); 2131 dd->ipath_f_clear_tids(dd, pd->port_port);
2144 2132
diff --git a/drivers/infiniband/hw/ipath/ipath_iba7220.c b/drivers/infiniband/hw/ipath/ipath_iba7220.c
index e3ec0d1bdf50..8eee7830f042 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba7220.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba7220.c
@@ -595,7 +595,7 @@ static void ipath_7220_txe_recover(struct ipath_devdata *dd)
595 595
596 dev_info(&dd->pcidev->dev, 596 dev_info(&dd->pcidev->dev,
597 "Recovering from TXE PIO parity error\n"); 597 "Recovering from TXE PIO parity error\n");
598 ipath_disarm_senderrbufs(dd, 1); 598 ipath_disarm_senderrbufs(dd);
599} 599}
600 600
601 601
@@ -675,10 +675,8 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg,
675 ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); 675 ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
676 if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) { 676 if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
677 /* 677 /*
678 * Parity errors in send memory are recoverable, 678 * Parity errors in send memory are recoverable by h/w
679 * just cancel the send (if indicated in * sendbuffererror), 679 * just do housekeeping, exit freeze mode and continue.
680 * count the occurrence, unfreeze (if no other handled
681 * hardware error bits are set), and continue.
682 */ 680 */
683 if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | 681 if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
684 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) 682 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
@@ -687,13 +685,6 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg,
687 hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | 685 hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
688 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) 686 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
689 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT); 687 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
690 if (!hwerrs) {
691 /* else leave in freeze mode */
692 ipath_write_kreg(dd,
693 dd->ipath_kregs->kr_control,
694 dd->ipath_control);
695 goto bail;
696 }
697 } 688 }
698 if (hwerrs) { 689 if (hwerrs) {
699 /* 690 /*
@@ -723,8 +714,8 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg,
723 *dd->ipath_statusp |= IPATH_STATUS_HWERROR; 714 *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
724 dd->ipath_flags &= ~IPATH_INITTED; 715 dd->ipath_flags &= ~IPATH_INITTED;
725 } else { 716 } else {
726 ipath_dbg("Clearing freezemode on ignored hardware " 717 ipath_dbg("Clearing freezemode on ignored or "
727 "error\n"); 718 "recovered hardware error\n");
728 ipath_clear_freeze(dd); 719 ipath_clear_freeze(dd);
729 } 720 }
730 } 721 }
@@ -870,8 +861,9 @@ static int ipath_7220_boardname(struct ipath_devdata *dd, char *name,
870 "revision %u.%u!\n", 861 "revision %u.%u!\n",
871 dd->ipath_majrev, dd->ipath_minrev); 862 dd->ipath_majrev, dd->ipath_minrev);
872 ret = 1; 863 ret = 1;
873 } else if (dd->ipath_minrev == 1) { 864 } else if (dd->ipath_minrev == 1 &&
874 /* Rev1 chips are prototype. Complain, but allow use */ 865 !(dd->ipath_flags & IPATH_INITTED)) {
866 /* Rev1 chips are prototype. Complain at init, but allow use */
875 ipath_dev_err(dd, "Unsupported hardware " 867 ipath_dev_err(dd, "Unsupported hardware "
876 "revision %u.%u, Contact support@qlogic.com\n", 868 "revision %u.%u, Contact support@qlogic.com\n",
877 dd->ipath_majrev, dd->ipath_minrev); 869 dd->ipath_majrev, dd->ipath_minrev);
@@ -1966,7 +1958,7 @@ static void ipath_7220_config_ports(struct ipath_devdata *dd, ushort cfgports)
1966 dd->ipath_rcvctrl); 1958 dd->ipath_rcvctrl);
1967 dd->ipath_p0_rcvegrcnt = 2048; /* always */ 1959 dd->ipath_p0_rcvegrcnt = 2048; /* always */
1968 if (dd->ipath_flags & IPATH_HAS_SEND_DMA) 1960 if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
1969 dd->ipath_pioreserved = 1; /* reserve a buffer */ 1961 dd->ipath_pioreserved = 3; /* kpiobufs used for PIO */
1970} 1962}
1971 1963
1972 1964
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 27dd89476660..3e5baa43fc82 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -41,7 +41,7 @@
41/* 41/*
42 * min buffers we want to have per port, after driver 42 * min buffers we want to have per port, after driver
43 */ 43 */
44#define IPATH_MIN_USER_PORT_BUFCNT 8 44#define IPATH_MIN_USER_PORT_BUFCNT 7
45 45
46/* 46/*
47 * Number of ports we are configured to use (to allow for more pio 47 * Number of ports we are configured to use (to allow for more pio
@@ -54,13 +54,9 @@ MODULE_PARM_DESC(cfgports, "Set max number of ports to use");
54 54
55/* 55/*
56 * Number of buffers reserved for driver (verbs and layered drivers.) 56 * Number of buffers reserved for driver (verbs and layered drivers.)
57 * Reserved at end of buffer list. Initialized based on 57 * Initialized based on number of PIO buffers if not set via module interface.
58 * number of PIO buffers if not set via module interface.
59 * The problem with this is that it's global, but we'll use different 58 * The problem with this is that it's global, but we'll use different
60 * numbers for different chip types. So the default value is not 59 * numbers for different chip types.
61 * very useful. I've redefined it for the 1.3 release so that it's
62 * zero unless set by the user to something else, in which case we
63 * try to respect it.
64 */ 60 */
65static ushort ipath_kpiobufs; 61static ushort ipath_kpiobufs;
66 62
@@ -546,9 +542,12 @@ static void enable_chip(struct ipath_devdata *dd, int reinit)
546 pioavail = dd->ipath_pioavailregs_dma[i ^ 1]; 542 pioavail = dd->ipath_pioavailregs_dma[i ^ 1];
547 else 543 else
548 pioavail = dd->ipath_pioavailregs_dma[i]; 544 pioavail = dd->ipath_pioavailregs_dma[i];
549 dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail) | 545 /*
550 (~dd->ipath_pioavailkernel[i] << 546 * don't need to worry about ipath_pioavailkernel here
551 INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT); 547 * because we will call ipath_chg_pioavailkernel() later
548 * in initialization, to busy out buffers as needed
549 */
550 dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail);
552 } 551 }
553 /* can get counters, stats, etc. */ 552 /* can get counters, stats, etc. */
554 dd->ipath_flags |= IPATH_PRESENT; 553 dd->ipath_flags |= IPATH_PRESENT;
@@ -708,12 +707,11 @@ static void verify_interrupt(unsigned long opaque)
708int ipath_init_chip(struct ipath_devdata *dd, int reinit) 707int ipath_init_chip(struct ipath_devdata *dd, int reinit)
709{ 708{
710 int ret = 0; 709 int ret = 0;
711 u32 val32, kpiobufs; 710 u32 kpiobufs, defkbufs;
712 u32 piobufs, uports; 711 u32 piobufs, uports;
713 u64 val; 712 u64 val;
714 struct ipath_portdata *pd; 713 struct ipath_portdata *pd;
715 gfp_t gfp_flags = GFP_USER | __GFP_COMP; 714 gfp_t gfp_flags = GFP_USER | __GFP_COMP;
716 unsigned long flags;
717 715
718 ret = init_housekeeping(dd, reinit); 716 ret = init_housekeeping(dd, reinit);
719 if (ret) 717 if (ret)
@@ -753,56 +751,46 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
753 dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2) 751 dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2)
754 / (sizeof(u64) * BITS_PER_BYTE / 2); 752 / (sizeof(u64) * BITS_PER_BYTE / 2);
755 uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0; 753 uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
756 if (ipath_kpiobufs == 0) { 754 if (piobufs > 144)
757 /* not set by user (this is default) */ 755 defkbufs = 32 + dd->ipath_pioreserved;
758 if (piobufs > 144)
759 kpiobufs = 32;
760 else
761 kpiobufs = 16;
762 }
763 else 756 else
764 kpiobufs = ipath_kpiobufs; 757 defkbufs = 16 + dd->ipath_pioreserved;
765 758
766 if (kpiobufs + (uports * IPATH_MIN_USER_PORT_BUFCNT) > piobufs) { 759 if (ipath_kpiobufs && (ipath_kpiobufs +
760 (uports * IPATH_MIN_USER_PORT_BUFCNT)) > piobufs) {
767 int i = (int) piobufs - 761 int i = (int) piobufs -
768 (int) (uports * IPATH_MIN_USER_PORT_BUFCNT); 762 (int) (uports * IPATH_MIN_USER_PORT_BUFCNT);
769 if (i < 1) 763 if (i < 1)
770 i = 1; 764 i = 1;
771 dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of " 765 dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of "
772 "%d for kernel leaves too few for %d user ports " 766 "%d for kernel leaves too few for %d user ports "
773 "(%d each); using %u\n", kpiobufs, 767 "(%d each); using %u\n", ipath_kpiobufs,
774 piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i); 768 piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i);
775 /* 769 /*
776 * shouldn't change ipath_kpiobufs, because could be 770 * shouldn't change ipath_kpiobufs, because could be
777 * different for different devices... 771 * different for different devices...
778 */ 772 */
779 kpiobufs = i; 773 kpiobufs = i;
780 } 774 } else if (ipath_kpiobufs)
775 kpiobufs = ipath_kpiobufs;
776 else
777 kpiobufs = defkbufs;
781 dd->ipath_lastport_piobuf = piobufs - kpiobufs; 778 dd->ipath_lastport_piobuf = piobufs - kpiobufs;
782 dd->ipath_pbufsport = 779 dd->ipath_pbufsport =
783 uports ? dd->ipath_lastport_piobuf / uports : 0; 780 uports ? dd->ipath_lastport_piobuf / uports : 0;
784 val32 = dd->ipath_lastport_piobuf - (dd->ipath_pbufsport * uports); 781 /* if not an even divisor, some user ports get extra buffers */
785 if (val32 > 0) { 782 dd->ipath_ports_extrabuf = dd->ipath_lastport_piobuf -
786 ipath_dbg("allocating %u pbufs/port leaves %u unused, " 783 (dd->ipath_pbufsport * uports);
787 "add to kernel\n", dd->ipath_pbufsport, val32); 784 if (dd->ipath_ports_extrabuf)
788 dd->ipath_lastport_piobuf -= val32; 785 ipath_dbg("%u pbufs/port leaves some unused, add 1 buffer to "
789 kpiobufs += val32; 786 "ports <= %u\n", dd->ipath_pbufsport,
790 ipath_dbg("%u pbufs/port leaves %u unused, add to kernel\n", 787 dd->ipath_ports_extrabuf);
791 dd->ipath_pbufsport, val32);
792 }
793 dd->ipath_lastpioindex = 0; 788 dd->ipath_lastpioindex = 0;
794 dd->ipath_lastpioindexl = dd->ipath_piobcnt2k; 789 dd->ipath_lastpioindexl = dd->ipath_piobcnt2k;
795 ipath_chg_pioavailkernel(dd, 0, piobufs, 1); 790 /* ipath_pioavailshadow initialized earlier */
796 ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u " 791 ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u "
797 "each for %u user ports\n", kpiobufs, 792 "each for %u user ports\n", kpiobufs,
798 piobufs, dd->ipath_pbufsport, uports); 793 piobufs, dd->ipath_pbufsport, uports);
799 if (dd->ipath_pioupd_thresh) {
800 if (dd->ipath_pbufsport < dd->ipath_pioupd_thresh)
801 dd->ipath_pioupd_thresh = dd->ipath_pbufsport;
802 if (kpiobufs < dd->ipath_pioupd_thresh)
803 dd->ipath_pioupd_thresh = kpiobufs;
804 }
805
806 ret = dd->ipath_f_early_init(dd); 794 ret = dd->ipath_f_early_init(dd);
807 if (ret) { 795 if (ret) {
808 ipath_dev_err(dd, "Early initialization failure\n"); 796 ipath_dev_err(dd, "Early initialization failure\n");
@@ -810,13 +798,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
810 } 798 }
811 799
812 /* 800 /*
813 * Cancel any possible active sends from early driver load.
814 * Follows early_init because some chips have to initialize
815 * PIO buffers in early_init to avoid false parity errors.
816 */
817 ipath_cancel_sends(dd, 0);
818
819 /*
820 * Early_init sets rcvhdrentsize and rcvhdrsize, so this must be 801 * Early_init sets rcvhdrentsize and rcvhdrsize, so this must be
821 * done after early_init. 802 * done after early_init.
822 */ 803 */
@@ -836,6 +817,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
836 817
837 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr, 818 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr,
838 dd->ipath_pioavailregs_phys); 819 dd->ipath_pioavailregs_phys);
820
839 /* 821 /*
840 * this is to detect s/w errors, which the h/w works around by 822 * this is to detect s/w errors, which the h/w works around by
841 * ignoring the low 6 bits of address, if it wasn't aligned. 823 * ignoring the low 6 bits of address, if it wasn't aligned.
@@ -862,12 +844,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
862 ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED); 844 ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED);
863 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL); 845 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL);
864 846
865 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
866 dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE;
867 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
868 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
869 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
870
871 /* 847 /*
872 * before error clears, since we expect serdes pll errors during 848 * before error clears, since we expect serdes pll errors during
873 * this, the first time after reset 849 * this, the first time after reset
@@ -940,6 +916,19 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
940 else 916 else
941 enable_chip(dd, reinit); 917 enable_chip(dd, reinit);
942 918
919 /* after enable_chip, so pioavailshadow setup */
920 ipath_chg_pioavailkernel(dd, 0, piobufs, 1);
921
922 /*
923 * Cancel any possible active sends from early driver load.
924 * Follows early_init because some chips have to initialize
925 * PIO buffers in early_init to avoid false parity errors.
926 * After enable and ipath_chg_pioavailkernel so we can safely
927 * enable pioavail updates and PIOENABLE; packets are now
928 * ready to go out.
929 */
930 ipath_cancel_sends(dd, 1);
931
943 if (!reinit) { 932 if (!reinit) {
944 /* 933 /*
945 * Used when we close a port, for DMA already in flight 934 * Used when we close a port, for DMA already in flight
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 1b58f4737c71..26900b3b7a4e 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -38,42 +38,12 @@
38#include "ipath_verbs.h" 38#include "ipath_verbs.h"
39#include "ipath_common.h" 39#include "ipath_common.h"
40 40
41/*
42 * clear (write) a pio buffer, to clear a parity error. This routine
43 * should only be called when in freeze mode, and the buffer should be
44 * canceled afterwards.
45 */
46static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum)
47{
48 u32 __iomem *pbuf;
49 u32 dwcnt; /* dword count to write */
50 if (pnum < dd->ipath_piobcnt2k) {
51 pbuf = (u32 __iomem *) (dd->ipath_pio2kbase + pnum *
52 dd->ipath_palign);
53 dwcnt = dd->ipath_piosize2k >> 2;
54 }
55 else {
56 pbuf = (u32 __iomem *) (dd->ipath_pio4kbase +
57 (pnum - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
58 dwcnt = dd->ipath_piosize4k >> 2;
59 }
60 dev_info(&dd->pcidev->dev,
61 "Rewrite PIO buffer %u, to recover from parity error\n",
62 pnum);
63
64 /* no flush required, since already in freeze */
65 writel(dwcnt + 1, pbuf);
66 while (--dwcnt)
67 writel(0, pbuf++);
68}
69 41
70/* 42/*
71 * Called when we might have an error that is specific to a particular 43 * Called when we might have an error that is specific to a particular
72 * PIO buffer, and may need to cancel that buffer, so it can be re-used. 44 * PIO buffer, and may need to cancel that buffer, so it can be re-used.
73 * If rewrite is true, and bits are set in the sendbufferror registers,
74 * we'll write to the buffer, for error recovery on parity errors.
75 */ 45 */
76void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite) 46void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
77{ 47{
78 u32 piobcnt; 48 u32 piobcnt;
79 unsigned long sbuf[4]; 49 unsigned long sbuf[4];
@@ -109,11 +79,8 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
109 } 79 }
110 80
111 for (i = 0; i < piobcnt; i++) 81 for (i = 0; i < piobcnt; i++)
112 if (test_bit(i, sbuf)) { 82 if (test_bit(i, sbuf))
113 if (rewrite)
114 ipath_clrpiobuf(dd, i);
115 ipath_disarm_piobufs(dd, i, 1); 83 ipath_disarm_piobufs(dd, i, 1);
116 }
117 /* ignore armlaunch errs for a bit */ 84 /* ignore armlaunch errs for a bit */
118 dd->ipath_lastcancel = jiffies+3; 85 dd->ipath_lastcancel = jiffies+3;
119 } 86 }
@@ -164,7 +131,7 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
164{ 131{
165 u64 ignore_this_time = 0; 132 u64 ignore_this_time = 0;
166 133
167 ipath_disarm_senderrbufs(dd, 0); 134 ipath_disarm_senderrbufs(dd);
168 if ((errs & E_SUM_LINK_PKTERRS) && 135 if ((errs & E_SUM_LINK_PKTERRS) &&
169 !(dd->ipath_flags & IPATH_LINKACTIVE)) { 136 !(dd->ipath_flags & IPATH_LINKACTIVE)) {
170 /* 137 /*
@@ -909,8 +876,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
909 * processes (causing armlaunch), send errors due to going into freeze mode, 876 * processes (causing armlaunch), send errors due to going into freeze mode,
910 * etc., and try to avoid causing extra interrupts while doing so. 877 * etc., and try to avoid causing extra interrupts while doing so.
911 * Forcibly update the in-memory pioavail register copies after cleanup 878 * Forcibly update the in-memory pioavail register copies after cleanup
912 * because the chip won't do it for anything changing while in freeze mode 879 * because the chip won't do it while in freeze mode (the register values
913 * (we don't want to wait for the next pio buffer state change). 880 * themselves are kept correct).
914 * Make sure that we don't lose any important interrupts by using the chip 881 * Make sure that we don't lose any important interrupts by using the chip
915 * feature that says that writing 0 to a bit in *clear that is set in 882 * feature that says that writing 0 to a bit in *clear that is set in
916 * *status will cause an interrupt to be generated again (if allowed by 883 * *status will cause an interrupt to be generated again (if allowed by
@@ -918,44 +885,23 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
918 */ 885 */
919void ipath_clear_freeze(struct ipath_devdata *dd) 886void ipath_clear_freeze(struct ipath_devdata *dd)
920{ 887{
921 int i, im;
922 u64 val;
923
924 /* disable error interrupts, to avoid confusion */ 888 /* disable error interrupts, to avoid confusion */
925 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL); 889 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
926 890
927 /* also disable interrupts; errormask is sometimes overwriten */ 891 /* also disable interrupts; errormask is sometimes overwriten */
928 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); 892 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
929 893
930 /* 894 ipath_cancel_sends(dd, 1);
931 * clear all sends, because they have may been 895
932 * completed by usercode while in freeze mode, and 896 /* clear the freeze, and be sure chip saw it */
933 * therefore would not be sent, and eventually
934 * might cause the process to run out of bufs
935 */
936 ipath_cancel_sends(dd, 0);
937 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 897 ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
938 dd->ipath_control); 898 dd->ipath_control);
899 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
939 900
940 /* ensure pio avail updates continue */ 901 /* force in-memory update now we are out of freeze */
941 ipath_force_pio_avail_update(dd); 902 ipath_force_pio_avail_update(dd);
942 903
943 /* 904 /*
944 * We just enabled pioavailupdate, so dma copy is almost certainly
945 * not yet right, so read the registers directly. Similar to init
946 */
947 for (i = 0; i < dd->ipath_pioavregs; i++) {
948 /* deal with 6110 chip bug */
949 im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
950 i ^ 1 : i;
951 val = ipath_read_kreg64(dd, (0x1000 / sizeof(u64)) + im);
952 dd->ipath_pioavailregs_dma[i] = cpu_to_le64(val);
953 dd->ipath_pioavailshadow[i] = val |
954 (~dd->ipath_pioavailkernel[i] <<
955 INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT);
956 }
957
958 /*
959 * force new interrupt if any hwerr, error or interrupt bits are 905 * force new interrupt if any hwerr, error or interrupt bits are
960 * still set, and clear "safe" send packet errors related to freeze 906 * still set, and clear "safe" send packet errors related to freeze
961 * and cancelling sends. Re-enable error interrupts before possible 907 * and cancelling sends. Re-enable error interrupts before possible
@@ -1312,10 +1258,8 @@ irqreturn_t ipath_intr(int irq, void *data)
1312 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 1258 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1313 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 1259 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
1314 1260
1315 if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA)) 1261 /* always process; sdma verbs uses PIO for acks and VL15 */
1316 handle_layer_pioavail(dd); 1262 handle_layer_pioavail(dd);
1317 else
1318 ipath_dbg("unexpected BUFAVAIL intr\n");
1319 } 1263 }
1320 1264
1321 ret = IRQ_HANDLED; 1265 ret = IRQ_HANDLED;
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 202337ae90dc..02b24a340599 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -117,6 +117,10 @@ struct ipath_portdata {
117 u16 port_subport_cnt; 117 u16 port_subport_cnt;
118 /* non-zero if port is being shared. */ 118 /* non-zero if port is being shared. */
119 u16 port_subport_id; 119 u16 port_subport_id;
120 /* number of pio bufs for this port (all procs, if shared) */
121 u32 port_piocnt;
122 /* first pio buffer for this port */
123 u32 port_pio_base;
120 /* chip offset of PIO buffers for this port */ 124 /* chip offset of PIO buffers for this port */
121 u32 port_piobufs; 125 u32 port_piobufs;
122 /* how many alloc_pages() chunks in port_rcvegrbuf_pages */ 126 /* how many alloc_pages() chunks in port_rcvegrbuf_pages */
@@ -384,6 +388,8 @@ struct ipath_devdata {
384 u32 ipath_lastrpkts; 388 u32 ipath_lastrpkts;
385 /* pio bufs allocated per port */ 389 /* pio bufs allocated per port */
386 u32 ipath_pbufsport; 390 u32 ipath_pbufsport;
391 /* if remainder on bufs/port, ports < extrabuf get 1 extra */
392 u32 ipath_ports_extrabuf;
387 u32 ipath_pioupd_thresh; /* update threshold, some chips */ 393 u32 ipath_pioupd_thresh; /* update threshold, some chips */
388 /* 394 /*
389 * number of ports configured as max; zero is set to number chip 395 * number of ports configured as max; zero is set to number chip
@@ -1011,7 +1017,7 @@ void ipath_get_eeprom_info(struct ipath_devdata *);
1011int ipath_update_eeprom_log(struct ipath_devdata *dd); 1017int ipath_update_eeprom_log(struct ipath_devdata *dd);
1012void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr); 1018void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
1013u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); 1019u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
1014void ipath_disarm_senderrbufs(struct ipath_devdata *, int); 1020void ipath_disarm_senderrbufs(struct ipath_devdata *);
1015void ipath_force_pio_avail_update(struct ipath_devdata *); 1021void ipath_force_pio_avail_update(struct ipath_devdata *);
1016void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev); 1022void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev);
1017 1023
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index c405dfba5531..08b11b567614 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -1746,7 +1746,11 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1746 qp->r_wrid_valid = 0; 1746 qp->r_wrid_valid = 0;
1747 wc.wr_id = qp->r_wr_id; 1747 wc.wr_id = qp->r_wr_id;
1748 wc.status = IB_WC_SUCCESS; 1748 wc.status = IB_WC_SUCCESS;
1749 wc.opcode = IB_WC_RECV; 1749 if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
1750 opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
1751 wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
1752 else
1753 wc.opcode = IB_WC_RECV;
1750 wc.vendor_err = 0; 1754 wc.vendor_err = 0;
1751 wc.qp = &qp->ibqp; 1755 wc.qp = &qp->ibqp;
1752 wc.src_qp = qp->remote_qpn; 1756 wc.src_qp = qp->remote_qpn;
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index 8ac5c1d82ccd..9e3fe61cbd08 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -481,9 +481,10 @@ done:
481 wake_up(&qp->wait); 481 wake_up(&qp->wait);
482} 482}
483 483
484static void want_buffer(struct ipath_devdata *dd) 484static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
485{ 485{
486 if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA)) { 486 if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) ||
487 qp->ibqp.qp_type == IB_QPT_SMI) {
487 unsigned long flags; 488 unsigned long flags;
488 489
489 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 490 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
@@ -519,7 +520,7 @@ static void ipath_no_bufs_available(struct ipath_qp *qp,
519 spin_lock_irqsave(&dev->pending_lock, flags); 520 spin_lock_irqsave(&dev->pending_lock, flags);
520 list_add_tail(&qp->piowait, &dev->piowait); 521 list_add_tail(&qp->piowait, &dev->piowait);
521 spin_unlock_irqrestore(&dev->pending_lock, flags); 522 spin_unlock_irqrestore(&dev->pending_lock, flags);
522 want_buffer(dev->dd); 523 want_buffer(dev->dd, qp);
523 dev->n_piowait++; 524 dev->n_piowait++;
524} 525}
525 526
diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c
index 1974df7a9f78..3697449c1ba4 100644
--- a/drivers/infiniband/hw/ipath/ipath_sdma.c
+++ b/drivers/infiniband/hw/ipath/ipath_sdma.c
@@ -308,13 +308,15 @@ static void sdma_abort_task(unsigned long opaque)
308 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); 308 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
309 309
310 /* 310 /*
311 * Don't restart sdma here. Wait until link is up to ACTIVE. 311 * Don't restart sdma here (with the exception
312 * VL15 MADs used to bring the link up use PIO, and multiple 312 * below). Wait until link is up to ACTIVE. VL15 MADs
313 * link transitions otherwise cause the sdma engine to be 313 * used to bring the link up use PIO, and multiple link
314 * transitions otherwise cause the sdma engine to be
314 * stopped and started multiple times. 315 * stopped and started multiple times.
315 * The disable is done here, including the shadow, so the 316 * The disable is done here, including the shadow,
316 * state is kept consistent. 317 * so the state is kept consistent.
317 * See ipath_restart_sdma() for the actual starting of sdma. 318 * See ipath_restart_sdma() for the actual starting
319 * of sdma.
318 */ 320 */
319 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 321 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
320 dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE; 322 dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
@@ -326,6 +328,13 @@ static void sdma_abort_task(unsigned long opaque)
326 /* make sure I see next message */ 328 /* make sure I see next message */
327 dd->ipath_sdma_abort_jiffies = 0; 329 dd->ipath_sdma_abort_jiffies = 0;
328 330
331 /*
332 * Not everything that takes SDMA offline is a link
333 * status change. If the link was up, restart SDMA.
334 */
335 if (dd->ipath_flags & IPATH_LINKACTIVE)
336 ipath_restart_sdma(dd);
337
329 goto done; 338 goto done;
330 } 339 }
331 340
@@ -427,7 +436,12 @@ int setup_sdma(struct ipath_devdata *dd)
427 goto done; 436 goto done;
428 } 437 }
429 438
430 dd->ipath_sdma_status = 0; 439 /*
440 * Set initial status as if we had been up, then gone down.
441 * This lets initial start on transition to ACTIVE be the
442 * same as restart after link flap.
443 */
444 dd->ipath_sdma_status = IPATH_SDMA_ABORT_ABORTED;
431 dd->ipath_sdma_abort_jiffies = 0; 445 dd->ipath_sdma_abort_jiffies = 0;
432 dd->ipath_sdma_generation = 0; 446 dd->ipath_sdma_generation = 0;
433 dd->ipath_sdma_descq_tail = 0; 447 dd->ipath_sdma_descq_tail = 0;
@@ -449,16 +463,19 @@ int setup_sdma(struct ipath_devdata *dd)
449 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr, 463 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr,
450 dd->ipath_sdma_head_phys); 464 dd->ipath_sdma_head_phys);
451 465
452 /* Reserve all the former "kernel" piobufs */ 466 /*
453 n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - dd->ipath_pioreserved; 467 * Reserve all the former "kernel" piobufs, using high number range
454 for (i = dd->ipath_lastport_piobuf; i < n; ++i) { 468 * so we get as many 4K buffers as possible
469 */
470 n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
471 i = dd->ipath_lastport_piobuf + dd->ipath_pioreserved;
472 ipath_chg_pioavailkernel(dd, i, n - i , 0);
473 for (; i < n; ++i) {
455 unsigned word = i / 64; 474 unsigned word = i / 64;
456 unsigned bit = i & 63; 475 unsigned bit = i & 63;
457 BUG_ON(word >= 3); 476 BUG_ON(word >= 3);
458 senddmabufmask[word] |= 1ULL << bit; 477 senddmabufmask[word] |= 1ULL << bit;
459 } 478 }
460 ipath_chg_pioavailkernel(dd, dd->ipath_lastport_piobuf,
461 n - dd->ipath_lastport_piobuf, 0);
462 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0, 479 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0,
463 senddmabufmask[0]); 480 senddmabufmask[0]);
464 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1, 481 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1,
@@ -615,6 +632,9 @@ void ipath_restart_sdma(struct ipath_devdata *dd)
615 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 632 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
616 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 633 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
617 634
635 /* notify upper layers */
636 ipath_ib_piobufavail(dd->verbs_dev);
637
618bail: 638bail:
619 return; 639 return;
620} 640}
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index e63927cce5b5..5015cd2e57bd 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -396,7 +396,6 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
396 396
397 wqe = get_swqe_ptr(qp, qp->s_head); 397 wqe = get_swqe_ptr(qp, qp->s_head);
398 wqe->wr = *wr; 398 wqe->wr = *wr;
399 wqe->ssn = qp->s_ssn++;
400 wqe->length = 0; 399 wqe->length = 0;
401 if (wr->num_sge) { 400 if (wr->num_sge) {
402 acc = wr->opcode >= IB_WR_RDMA_READ ? 401 acc = wr->opcode >= IB_WR_RDMA_READ ?
@@ -422,6 +421,7 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
422 goto bail_inval; 421 goto bail_inval;
423 } else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu) 422 } else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu)
424 goto bail_inval; 423 goto bail_inval;
424 wqe->ssn = qp->s_ssn++;
425 qp->s_head = next; 425 qp->s_head = next;
426 426
427 ret = 0; 427 ret = 0;