aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-03-03 10:33:17 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2010-03-03 10:33:17 -0500
commit3ff1562ea48cddaa5ac1adcb8892227389a4c96c (patch)
tree927f885b02579402859f339db707a0a76470aa51
parent88b68033b99b16b42e734ab56a4ed6bc1d53516a (diff)
parentfe8875e5a41a36ef26072cf05731df5493ade0e1 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (48 commits) IB/srp: Clean up error path in srp_create_target_ib() IB/srp: Split send and recieve CQs to reduce number of interrupts RDMA/nes: Add support for KR device id 0x0110 IB/uverbs: Use anon_inodes instead of private infinibandeventfs IB/core: Fix and clean up ib_ud_header_init() RDMA/cxgb3: Mark RDMA device with CXIO_ERROR_FATAL when removing RDMA/cxgb3: Don't allocate the SW queue for user mode CQs RDMA/cxgb3: Increase the max CQ depth RDMA/cxgb3: Doorbell overflow avoidance and recovery IB/core: Pack struct ib_device a little tighter IB/ucm: Clean whitespace errors IB/ucm: Increase maximum devices supported IB/ucm: Use stack variable 'base' in ib_ucm_add_one IB/ucm: Use stack variable 'devnum' in ib_ucm_add_one IB/umad: Clean whitespace IB/umad: Increase maximum devices supported IB/umad: Use stack variable 'base' in ib_umad_init_port IB/umad: Use stack variable 'devnum' in ib_umad_init_port IB/umad: Remove port_table[] IB/umad: Convert *cdev to cdev in struct ib_umad_port ...
-rw-r--r--drivers/infiniband/Kconfig1
-rw-r--r--drivers/infiniband/core/ucm.c63
-rw-r--r--drivers/infiniband/core/ud_header.c14
-rw-r--r--drivers/infiniband/core/umem.c2
-rw-r--r--drivers/infiniband/core/user_mad.c173
-rw-r--r--drivers/infiniband/core/uverbs.h11
-rw-r--r--drivers/infiniband/core/uverbs_main.c234
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c15
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.h4
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_wr.h17
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.c80
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.h2
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c2
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c9
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c5
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c4
-rw-r--r--drivers/infiniband/hw/ehca/ehca_sqp.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_pages.c3
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c2
-rw-r--r--drivers/infiniband/hw/nes/nes.c1
-rw-r--r--drivers/infiniband/hw/nes/nes.h9
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c11
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c484
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.h2
-rw-r--r--drivers/infiniband/hw/nes/nes_nic.c61
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c6
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c10
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c47
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h97
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c506
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c64
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c281
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c91
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h6
-rw-r--r--drivers/net/cxgb3/adapter.h5
-rw-r--r--drivers/net/cxgb3/cxgb3_main.c57
-rw-r--r--drivers/net/cxgb3/cxgb3_offload.h5
-rw-r--r--drivers/net/cxgb3/regs.h16
-rw-r--r--drivers/net/cxgb3/sge.c10
-rw-r--r--drivers/net/cxgb3/t3_hw.c5
-rw-r--r--include/rdma/ib_pack.h1
-rw-r--r--include/rdma/ib_verbs.h4
-rw-r--r--include/rdma/rdma_cm.h1
44 files changed, 1219 insertions, 1208 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index dd0db67bf8d7..975adce5f40c 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -20,6 +20,7 @@ config INFINIBAND_USER_MAD
20 20
21config INFINIBAND_USER_ACCESS 21config INFINIBAND_USER_ACCESS
22 tristate "InfiniBand userspace access (verbs and CM)" 22 tristate "InfiniBand userspace access (verbs and CM)"
23 select ANON_INODES
23 ---help--- 24 ---help---
24 Userspace InfiniBand access support. This enables the 25 Userspace InfiniBand access support. This enables the
25 kernel side of userspace verbs and the userspace 26 kernel side of userspace verbs and the userspace
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index f504c9b00c1b..1b09b735c5a8 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -1215,15 +1215,18 @@ static void ib_ucm_release_dev(struct device *dev)
1215 1215
1216 ucm_dev = container_of(dev, struct ib_ucm_device, dev); 1216 ucm_dev = container_of(dev, struct ib_ucm_device, dev);
1217 cdev_del(&ucm_dev->cdev); 1217 cdev_del(&ucm_dev->cdev);
1218 clear_bit(ucm_dev->devnum, dev_map); 1218 if (ucm_dev->devnum < IB_UCM_MAX_DEVICES)
1219 clear_bit(ucm_dev->devnum, dev_map);
1220 else
1221 clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, dev_map);
1219 kfree(ucm_dev); 1222 kfree(ucm_dev);
1220} 1223}
1221 1224
1222static const struct file_operations ucm_fops = { 1225static const struct file_operations ucm_fops = {
1223 .owner = THIS_MODULE, 1226 .owner = THIS_MODULE,
1224 .open = ib_ucm_open, 1227 .open = ib_ucm_open,
1225 .release = ib_ucm_close, 1228 .release = ib_ucm_close,
1226 .write = ib_ucm_write, 1229 .write = ib_ucm_write,
1227 .poll = ib_ucm_poll, 1230 .poll = ib_ucm_poll,
1228}; 1231};
1229 1232
@@ -1237,8 +1240,32 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
1237} 1240}
1238static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); 1241static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
1239 1242
1243static dev_t overflow_maj;
1244static DECLARE_BITMAP(overflow_map, IB_UCM_MAX_DEVICES);
1245static int find_overflow_devnum(void)
1246{
1247 int ret;
1248
1249 if (!overflow_maj) {
1250 ret = alloc_chrdev_region(&overflow_maj, 0, IB_UCM_MAX_DEVICES,
1251 "infiniband_cm");
1252 if (ret) {
1253 printk(KERN_ERR "ucm: couldn't register dynamic device number\n");
1254 return ret;
1255 }
1256 }
1257
1258 ret = find_first_zero_bit(overflow_map, IB_UCM_MAX_DEVICES);
1259 if (ret >= IB_UCM_MAX_DEVICES)
1260 return -1;
1261
1262 return ret;
1263}
1264
1240static void ib_ucm_add_one(struct ib_device *device) 1265static void ib_ucm_add_one(struct ib_device *device)
1241{ 1266{
1267 int devnum;
1268 dev_t base;
1242 struct ib_ucm_device *ucm_dev; 1269 struct ib_ucm_device *ucm_dev;
1243 1270
1244 if (!device->alloc_ucontext || 1271 if (!device->alloc_ucontext ||
@@ -1251,16 +1278,25 @@ static void ib_ucm_add_one(struct ib_device *device)
1251 1278
1252 ucm_dev->ib_dev = device; 1279 ucm_dev->ib_dev = device;
1253 1280
1254 ucm_dev->devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES); 1281 devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES);
1255 if (ucm_dev->devnum >= IB_UCM_MAX_DEVICES) 1282 if (devnum >= IB_UCM_MAX_DEVICES) {
1256 goto err; 1283 devnum = find_overflow_devnum();
1257 1284 if (devnum < 0)
1258 set_bit(ucm_dev->devnum, dev_map); 1285 goto err;
1286
1287 ucm_dev->devnum = devnum + IB_UCM_MAX_DEVICES;
1288 base = devnum + overflow_maj;
1289 set_bit(devnum, overflow_map);
1290 } else {
1291 ucm_dev->devnum = devnum;
1292 base = devnum + IB_UCM_BASE_DEV;
1293 set_bit(devnum, dev_map);
1294 }
1259 1295
1260 cdev_init(&ucm_dev->cdev, &ucm_fops); 1296 cdev_init(&ucm_dev->cdev, &ucm_fops);
1261 ucm_dev->cdev.owner = THIS_MODULE; 1297 ucm_dev->cdev.owner = THIS_MODULE;
1262 kobject_set_name(&ucm_dev->cdev.kobj, "ucm%d", ucm_dev->devnum); 1298 kobject_set_name(&ucm_dev->cdev.kobj, "ucm%d", ucm_dev->devnum);
1263 if (cdev_add(&ucm_dev->cdev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1)) 1299 if (cdev_add(&ucm_dev->cdev, base, 1))
1264 goto err; 1300 goto err;
1265 1301
1266 ucm_dev->dev.class = &cm_class; 1302 ucm_dev->dev.class = &cm_class;
@@ -1281,7 +1317,10 @@ err_dev:
1281 device_unregister(&ucm_dev->dev); 1317 device_unregister(&ucm_dev->dev);
1282err_cdev: 1318err_cdev:
1283 cdev_del(&ucm_dev->cdev); 1319 cdev_del(&ucm_dev->cdev);
1284 clear_bit(ucm_dev->devnum, dev_map); 1320 if (ucm_dev->devnum < IB_UCM_MAX_DEVICES)
1321 clear_bit(devnum, dev_map);
1322 else
1323 clear_bit(devnum, overflow_map);
1285err: 1324err:
1286 kfree(ucm_dev); 1325 kfree(ucm_dev);
1287 return; 1326 return;
@@ -1340,6 +1379,8 @@ static void __exit ib_ucm_cleanup(void)
1340 ib_unregister_client(&ucm_client); 1379 ib_unregister_client(&ucm_client);
1341 class_remove_file(&cm_class, &class_attr_abi_version); 1380 class_remove_file(&cm_class, &class_attr_abi_version);
1342 unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); 1381 unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
1382 if (overflow_maj)
1383 unregister_chrdev_region(overflow_maj, IB_UCM_MAX_DEVICES);
1343 idr_destroy(&ctx_id_table); 1384 idr_destroy(&ctx_id_table);
1344} 1385}
1345 1386
diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c
index 8ec7876bedcf..650b501eb142 100644
--- a/drivers/infiniband/core/ud_header.c
+++ b/drivers/infiniband/core/ud_header.c
@@ -181,6 +181,7 @@ static const struct ib_field deth_table[] = {
181 * ib_ud_header_init - Initialize UD header structure 181 * ib_ud_header_init - Initialize UD header structure
182 * @payload_bytes:Length of packet payload 182 * @payload_bytes:Length of packet payload
183 * @grh_present:GRH flag (if non-zero, GRH will be included) 183 * @grh_present:GRH flag (if non-zero, GRH will be included)
184 * @immediate_present: specify if immediate data should be used
184 * @header:Structure to initialize 185 * @header:Structure to initialize
185 * 186 *
186 * ib_ud_header_init() initializes the lrh.link_version, lrh.link_next_header, 187 * ib_ud_header_init() initializes the lrh.link_version, lrh.link_next_header,
@@ -191,21 +192,13 @@ static const struct ib_field deth_table[] = {
191 */ 192 */
192void ib_ud_header_init(int payload_bytes, 193void ib_ud_header_init(int payload_bytes,
193 int grh_present, 194 int grh_present,
195 int immediate_present,
194 struct ib_ud_header *header) 196 struct ib_ud_header *header)
195{ 197{
196 int header_len;
197 u16 packet_length; 198 u16 packet_length;
198 199
199 memset(header, 0, sizeof *header); 200 memset(header, 0, sizeof *header);
200 201
201 header_len =
202 IB_LRH_BYTES +
203 IB_BTH_BYTES +
204 IB_DETH_BYTES;
205 if (grh_present) {
206 header_len += IB_GRH_BYTES;
207 }
208
209 header->lrh.link_version = 0; 202 header->lrh.link_version = 0;
210 header->lrh.link_next_header = 203 header->lrh.link_next_header =
211 grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL; 204 grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL;
@@ -231,7 +224,8 @@ void ib_ud_header_init(int payload_bytes,
231 224
232 header->lrh.packet_length = cpu_to_be16(packet_length); 225 header->lrh.packet_length = cpu_to_be16(packet_length);
233 226
234 if (header->immediate_present) 227 header->immediate_present = immediate_present;
228 if (immediate_present)
235 header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; 229 header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
236 else 230 else
237 header->bth.opcode = IB_OPCODE_UD_SEND_ONLY; 231 header->bth.opcode = IB_OPCODE_UD_SEND_ONLY;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 6f7c096abf13..4f906f0614f0 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -136,7 +136,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
136 down_write(&current->mm->mmap_sem); 136 down_write(&current->mm->mmap_sem);
137 137
138 locked = npages + current->mm->locked_vm; 138 locked = npages + current->mm->locked_vm;
139 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; 139 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
140 140
141 if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { 141 if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
142 ret = -ENOMEM; 142 ret = -ENOMEM;
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 7de02969ed7d..02d360cfc2f7 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -65,12 +65,9 @@ enum {
65}; 65};
66 66
67/* 67/*
68 * Our lifetime rules for these structs are the following: each time a 68 * Our lifetime rules for these structs are the following:
69 * device special file is opened, we look up the corresponding struct 69 * device special file is opened, we take a reference on the
70 * ib_umad_port by minor in the umad_port[] table while holding the 70 * ib_umad_port's struct ib_umad_device. We drop these
71 * port_lock. If this lookup succeeds, we take a reference on the
72 * ib_umad_port's struct ib_umad_device while still holding the
73 * port_lock; if the lookup fails, we fail the open(). We drop these
74 * references in the corresponding close(). 71 * references in the corresponding close().
75 * 72 *
76 * In addition to references coming from open character devices, there 73 * In addition to references coming from open character devices, there
@@ -78,19 +75,14 @@ enum {
78 * module's reference taken when allocating the ib_umad_device in 75 * module's reference taken when allocating the ib_umad_device in
79 * ib_umad_add_one(). 76 * ib_umad_add_one().
80 * 77 *
81 * When destroying an ib_umad_device, we clear all of its 78 * When destroying an ib_umad_device, we drop the module's reference.
82 * ib_umad_ports from umad_port[] while holding port_lock before
83 * dropping the module's reference to the ib_umad_device. This is
84 * always safe because any open() calls will either succeed and obtain
85 * a reference before we clear the umad_port[] entries, or fail after
86 * we clear the umad_port[] entries.
87 */ 79 */
88 80
89struct ib_umad_port { 81struct ib_umad_port {
90 struct cdev *cdev; 82 struct cdev cdev;
91 struct device *dev; 83 struct device *dev;
92 84
93 struct cdev *sm_cdev; 85 struct cdev sm_cdev;
94 struct device *sm_dev; 86 struct device *sm_dev;
95 struct semaphore sm_sem; 87 struct semaphore sm_sem;
96 88
@@ -136,7 +128,6 @@ static struct class *umad_class;
136static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE); 128static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
137 129
138static DEFINE_SPINLOCK(port_lock); 130static DEFINE_SPINLOCK(port_lock);
139static struct ib_umad_port *umad_port[IB_UMAD_MAX_PORTS];
140static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS); 131static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);
141 132
142static void ib_umad_add_one(struct ib_device *device); 133static void ib_umad_add_one(struct ib_device *device);
@@ -496,8 +487,8 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
496 ah_attr.ah_flags = IB_AH_GRH; 487 ah_attr.ah_flags = IB_AH_GRH;
497 memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16); 488 memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16);
498 ah_attr.grh.sgid_index = packet->mad.hdr.gid_index; 489 ah_attr.grh.sgid_index = packet->mad.hdr.gid_index;
499 ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label); 490 ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label);
500 ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit; 491 ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit;
501 ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class; 492 ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class;
502 } 493 }
503 494
@@ -528,9 +519,9 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
528 goto err_ah; 519 goto err_ah;
529 } 520 }
530 521
531 packet->msg->ah = ah; 522 packet->msg->ah = ah;
532 packet->msg->timeout_ms = packet->mad.hdr.timeout_ms; 523 packet->msg->timeout_ms = packet->mad.hdr.timeout_ms;
533 packet->msg->retries = packet->mad.hdr.retries; 524 packet->msg->retries = packet->mad.hdr.retries;
534 packet->msg->context[0] = packet; 525 packet->msg->context[0] = packet;
535 526
536 /* Copy MAD header. Any RMPP header is already in place. */ 527 /* Copy MAD header. Any RMPP header is already in place. */
@@ -779,15 +770,11 @@ static long ib_umad_compat_ioctl(struct file *filp, unsigned int cmd,
779/* 770/*
780 * ib_umad_open() does not need the BKL: 771 * ib_umad_open() does not need the BKL:
781 * 772 *
782 * - umad_port[] accesses are protected by port_lock, the 773 * - the ib_umad_port structures are properly reference counted, and
783 * ib_umad_port structures are properly reference counted, and
784 * everything else is purely local to the file being created, so 774 * everything else is purely local to the file being created, so
785 * races against other open calls are not a problem; 775 * races against other open calls are not a problem;
786 * - the ioctl method does not affect any global state outside of the 776 * - the ioctl method does not affect any global state outside of the
787 * file structure being operated on; 777 * file structure being operated on;
788 * - the port is added to umad_port[] as the last part of module
789 * initialization so the open method will either immediately run
790 * -ENXIO, or all required initialization will be done.
791 */ 778 */
792static int ib_umad_open(struct inode *inode, struct file *filp) 779static int ib_umad_open(struct inode *inode, struct file *filp)
793{ 780{
@@ -795,13 +782,10 @@ static int ib_umad_open(struct inode *inode, struct file *filp)
795 struct ib_umad_file *file; 782 struct ib_umad_file *file;
796 int ret = 0; 783 int ret = 0;
797 784
798 spin_lock(&port_lock); 785 port = container_of(inode->i_cdev, struct ib_umad_port, cdev);
799 port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE];
800 if (port) 786 if (port)
801 kref_get(&port->umad_dev->ref); 787 kref_get(&port->umad_dev->ref);
802 spin_unlock(&port_lock); 788 else
803
804 if (!port)
805 return -ENXIO; 789 return -ENXIO;
806 790
807 mutex_lock(&port->file_mutex); 791 mutex_lock(&port->file_mutex);
@@ -872,16 +856,16 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
872} 856}
873 857
874static const struct file_operations umad_fops = { 858static const struct file_operations umad_fops = {
875 .owner = THIS_MODULE, 859 .owner = THIS_MODULE,
876 .read = ib_umad_read, 860 .read = ib_umad_read,
877 .write = ib_umad_write, 861 .write = ib_umad_write,
878 .poll = ib_umad_poll, 862 .poll = ib_umad_poll,
879 .unlocked_ioctl = ib_umad_ioctl, 863 .unlocked_ioctl = ib_umad_ioctl,
880#ifdef CONFIG_COMPAT 864#ifdef CONFIG_COMPAT
881 .compat_ioctl = ib_umad_compat_ioctl, 865 .compat_ioctl = ib_umad_compat_ioctl,
882#endif 866#endif
883 .open = ib_umad_open, 867 .open = ib_umad_open,
884 .release = ib_umad_close 868 .release = ib_umad_close
885}; 869};
886 870
887static int ib_umad_sm_open(struct inode *inode, struct file *filp) 871static int ib_umad_sm_open(struct inode *inode, struct file *filp)
@@ -892,13 +876,10 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp)
892 }; 876 };
893 int ret; 877 int ret;
894 878
895 spin_lock(&port_lock); 879 port = container_of(inode->i_cdev, struct ib_umad_port, sm_cdev);
896 port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE - IB_UMAD_MAX_PORTS];
897 if (port) 880 if (port)
898 kref_get(&port->umad_dev->ref); 881 kref_get(&port->umad_dev->ref);
899 spin_unlock(&port_lock); 882 else
900
901 if (!port)
902 return -ENXIO; 883 return -ENXIO;
903 884
904 if (filp->f_flags & O_NONBLOCK) { 885 if (filp->f_flags & O_NONBLOCK) {
@@ -949,8 +930,8 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp)
949} 930}
950 931
951static const struct file_operations umad_sm_fops = { 932static const struct file_operations umad_sm_fops = {
952 .owner = THIS_MODULE, 933 .owner = THIS_MODULE,
953 .open = ib_umad_sm_open, 934 .open = ib_umad_sm_open,
954 .release = ib_umad_sm_close 935 .release = ib_umad_sm_close
955}; 936};
956 937
@@ -990,16 +971,51 @@ static ssize_t show_abi_version(struct class *class, char *buf)
990} 971}
991static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); 972static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
992 973
974static dev_t overflow_maj;
975static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS);
976static int find_overflow_devnum(void)
977{
978 int ret;
979
980 if (!overflow_maj) {
981 ret = alloc_chrdev_region(&overflow_maj, 0, IB_UMAD_MAX_PORTS * 2,
982 "infiniband_mad");
983 if (ret) {
984 printk(KERN_ERR "user_mad: couldn't register dynamic device number\n");
985 return ret;
986 }
987 }
988
989 ret = find_first_zero_bit(overflow_map, IB_UMAD_MAX_PORTS);
990 if (ret >= IB_UMAD_MAX_PORTS)
991 return -1;
992
993 return ret;
994}
995
993static int ib_umad_init_port(struct ib_device *device, int port_num, 996static int ib_umad_init_port(struct ib_device *device, int port_num,
994 struct ib_umad_port *port) 997 struct ib_umad_port *port)
995{ 998{
999 int devnum;
1000 dev_t base;
1001
996 spin_lock(&port_lock); 1002 spin_lock(&port_lock);
997 port->dev_num = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); 1003 devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
998 if (port->dev_num >= IB_UMAD_MAX_PORTS) { 1004 if (devnum >= IB_UMAD_MAX_PORTS) {
999 spin_unlock(&port_lock); 1005 spin_unlock(&port_lock);
1000 return -1; 1006 devnum = find_overflow_devnum();
1007 if (devnum < 0)
1008 return -1;
1009
1010 spin_lock(&port_lock);
1011 port->dev_num = devnum + IB_UMAD_MAX_PORTS;
1012 base = devnum + overflow_maj;
1013 set_bit(devnum, overflow_map);
1014 } else {
1015 port->dev_num = devnum;
1016 base = devnum + base_dev;
1017 set_bit(devnum, dev_map);
1001 } 1018 }
1002 set_bit(port->dev_num, dev_map);
1003 spin_unlock(&port_lock); 1019 spin_unlock(&port_lock);
1004 1020
1005 port->ib_dev = device; 1021 port->ib_dev = device;
@@ -1008,17 +1024,14 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
1008 mutex_init(&port->file_mutex); 1024 mutex_init(&port->file_mutex);
1009 INIT_LIST_HEAD(&port->file_list); 1025 INIT_LIST_HEAD(&port->file_list);
1010 1026
1011 port->cdev = cdev_alloc(); 1027 cdev_init(&port->cdev, &umad_fops);
1012 if (!port->cdev) 1028 port->cdev.owner = THIS_MODULE;
1013 return -1; 1029 kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num);
1014 port->cdev->owner = THIS_MODULE; 1030 if (cdev_add(&port->cdev, base, 1))
1015 port->cdev->ops = &umad_fops;
1016 kobject_set_name(&port->cdev->kobj, "umad%d", port->dev_num);
1017 if (cdev_add(port->cdev, base_dev + port->dev_num, 1))
1018 goto err_cdev; 1031 goto err_cdev;
1019 1032
1020 port->dev = device_create(umad_class, device->dma_device, 1033 port->dev = device_create(umad_class, device->dma_device,
1021 port->cdev->dev, port, 1034 port->cdev.dev, port,
1022 "umad%d", port->dev_num); 1035 "umad%d", port->dev_num);
1023 if (IS_ERR(port->dev)) 1036 if (IS_ERR(port->dev))
1024 goto err_cdev; 1037 goto err_cdev;
@@ -1028,17 +1041,15 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
1028 if (device_create_file(port->dev, &dev_attr_port)) 1041 if (device_create_file(port->dev, &dev_attr_port))
1029 goto err_dev; 1042 goto err_dev;
1030 1043
1031 port->sm_cdev = cdev_alloc(); 1044 base += IB_UMAD_MAX_PORTS;
1032 if (!port->sm_cdev) 1045 cdev_init(&port->sm_cdev, &umad_sm_fops);
1033 goto err_dev; 1046 port->sm_cdev.owner = THIS_MODULE;
1034 port->sm_cdev->owner = THIS_MODULE; 1047 kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num);
1035 port->sm_cdev->ops = &umad_sm_fops; 1048 if (cdev_add(&port->sm_cdev, base, 1))
1036 kobject_set_name(&port->sm_cdev->kobj, "issm%d", port->dev_num);
1037 if (cdev_add(port->sm_cdev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1))
1038 goto err_sm_cdev; 1049 goto err_sm_cdev;
1039 1050
1040 port->sm_dev = device_create(umad_class, device->dma_device, 1051 port->sm_dev = device_create(umad_class, device->dma_device,
1041 port->sm_cdev->dev, port, 1052 port->sm_cdev.dev, port,
1042 "issm%d", port->dev_num); 1053 "issm%d", port->dev_num);
1043 if (IS_ERR(port->sm_dev)) 1054 if (IS_ERR(port->sm_dev))
1044 goto err_sm_cdev; 1055 goto err_sm_cdev;
@@ -1048,24 +1059,23 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
1048 if (device_create_file(port->sm_dev, &dev_attr_port)) 1059 if (device_create_file(port->sm_dev, &dev_attr_port))
1049 goto err_sm_dev; 1060 goto err_sm_dev;
1050 1061
1051 spin_lock(&port_lock);
1052 umad_port[port->dev_num] = port;
1053 spin_unlock(&port_lock);
1054
1055 return 0; 1062 return 0;
1056 1063
1057err_sm_dev: 1064err_sm_dev:
1058 device_destroy(umad_class, port->sm_cdev->dev); 1065 device_destroy(umad_class, port->sm_cdev.dev);
1059 1066
1060err_sm_cdev: 1067err_sm_cdev:
1061 cdev_del(port->sm_cdev); 1068 cdev_del(&port->sm_cdev);
1062 1069
1063err_dev: 1070err_dev:
1064 device_destroy(umad_class, port->cdev->dev); 1071 device_destroy(umad_class, port->cdev.dev);
1065 1072
1066err_cdev: 1073err_cdev:
1067 cdev_del(port->cdev); 1074 cdev_del(&port->cdev);
1068 clear_bit(port->dev_num, dev_map); 1075 if (port->dev_num < IB_UMAD_MAX_PORTS)
1076 clear_bit(devnum, dev_map);
1077 else
1078 clear_bit(devnum, overflow_map);
1069 1079
1070 return -1; 1080 return -1;
1071} 1081}
@@ -1079,15 +1089,11 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
1079 dev_set_drvdata(port->dev, NULL); 1089 dev_set_drvdata(port->dev, NULL);
1080 dev_set_drvdata(port->sm_dev, NULL); 1090 dev_set_drvdata(port->sm_dev, NULL);
1081 1091
1082 device_destroy(umad_class, port->cdev->dev); 1092 device_destroy(umad_class, port->cdev.dev);
1083 device_destroy(umad_class, port->sm_cdev->dev); 1093 device_destroy(umad_class, port->sm_cdev.dev);
1084 1094
1085 cdev_del(port->cdev); 1095 cdev_del(&port->cdev);
1086 cdev_del(port->sm_cdev); 1096 cdev_del(&port->sm_cdev);
1087
1088 spin_lock(&port_lock);
1089 umad_port[port->dev_num] = NULL;
1090 spin_unlock(&port_lock);
1091 1097
1092 mutex_lock(&port->file_mutex); 1098 mutex_lock(&port->file_mutex);
1093 1099
@@ -1106,7 +1112,10 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
1106 1112
1107 mutex_unlock(&port->file_mutex); 1113 mutex_unlock(&port->file_mutex);
1108 1114
1109 clear_bit(port->dev_num, dev_map); 1115 if (port->dev_num < IB_UMAD_MAX_PORTS)
1116 clear_bit(port->dev_num, dev_map);
1117 else
1118 clear_bit(port->dev_num - IB_UMAD_MAX_PORTS, overflow_map);
1110} 1119}
1111 1120
1112static void ib_umad_add_one(struct ib_device *device) 1121static void ib_umad_add_one(struct ib_device *device)
@@ -1214,6 +1223,8 @@ static void __exit ib_umad_cleanup(void)
1214 ib_unregister_client(&umad_client); 1223 ib_unregister_client(&umad_client);
1215 class_destroy(umad_class); 1224 class_destroy(umad_class);
1216 unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); 1225 unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
1226 if (overflow_maj)
1227 unregister_chrdev_region(overflow_maj, IB_UMAD_MAX_PORTS * 2);
1217} 1228}
1218 1229
1219module_init(ib_umad_init); 1230module_init(ib_umad_init);
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index b3ea9587dc80..e54d9ac6d1ca 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -41,6 +41,7 @@
41#include <linux/idr.h> 41#include <linux/idr.h>
42#include <linux/mutex.h> 42#include <linux/mutex.h>
43#include <linux/completion.h> 43#include <linux/completion.h>
44#include <linux/cdev.h>
44 45
45#include <rdma/ib_verbs.h> 46#include <rdma/ib_verbs.h>
46#include <rdma/ib_umem.h> 47#include <rdma/ib_umem.h>
@@ -69,23 +70,23 @@
69 70
70struct ib_uverbs_device { 71struct ib_uverbs_device {
71 struct kref ref; 72 struct kref ref;
73 int num_comp_vectors;
72 struct completion comp; 74 struct completion comp;
73 int devnum;
74 struct cdev *cdev;
75 struct device *dev; 75 struct device *dev;
76 struct ib_device *ib_dev; 76 struct ib_device *ib_dev;
77 int num_comp_vectors; 77 int devnum;
78 struct cdev cdev;
78}; 79};
79 80
80struct ib_uverbs_event_file { 81struct ib_uverbs_event_file {
81 struct kref ref; 82 struct kref ref;
83 int is_async;
82 struct ib_uverbs_file *uverbs_file; 84 struct ib_uverbs_file *uverbs_file;
83 spinlock_t lock; 85 spinlock_t lock;
86 int is_closed;
84 wait_queue_head_t poll_wait; 87 wait_queue_head_t poll_wait;
85 struct fasync_struct *async_queue; 88 struct fasync_struct *async_queue;
86 struct list_head event_list; 89 struct list_head event_list;
87 int is_async;
88 int is_closed;
89}; 90};
90 91
91struct ib_uverbs_file { 92struct ib_uverbs_file {
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 5f284ffd430e..ff59a795e840 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -42,8 +42,8 @@
42#include <linux/poll.h> 42#include <linux/poll.h>
43#include <linux/sched.h> 43#include <linux/sched.h>
44#include <linux/file.h> 44#include <linux/file.h>
45#include <linux/mount.h>
46#include <linux/cdev.h> 45#include <linux/cdev.h>
46#include <linux/anon_inodes.h>
47 47
48#include <asm/uaccess.h> 48#include <asm/uaccess.h>
49 49
@@ -53,8 +53,6 @@ MODULE_AUTHOR("Roland Dreier");
53MODULE_DESCRIPTION("InfiniBand userspace verbs access"); 53MODULE_DESCRIPTION("InfiniBand userspace verbs access");
54MODULE_LICENSE("Dual BSD/GPL"); 54MODULE_LICENSE("Dual BSD/GPL");
55 55
56#define INFINIBANDEVENTFS_MAGIC 0x49426576 /* "IBev" */
57
58enum { 56enum {
59 IB_UVERBS_MAJOR = 231, 57 IB_UVERBS_MAJOR = 231,
60 IB_UVERBS_BASE_MINOR = 192, 58 IB_UVERBS_BASE_MINOR = 192,
@@ -75,44 +73,41 @@ DEFINE_IDR(ib_uverbs_qp_idr);
75DEFINE_IDR(ib_uverbs_srq_idr); 73DEFINE_IDR(ib_uverbs_srq_idr);
76 74
77static DEFINE_SPINLOCK(map_lock); 75static DEFINE_SPINLOCK(map_lock);
78static struct ib_uverbs_device *dev_table[IB_UVERBS_MAX_DEVICES];
79static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); 76static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
80 77
81static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, 78static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
82 const char __user *buf, int in_len, 79 const char __user *buf, int in_len,
83 int out_len) = { 80 int out_len) = {
84 [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, 81 [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
85 [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, 82 [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
86 [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, 83 [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
87 [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, 84 [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
88 [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, 85 [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
89 [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, 86 [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
90 [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, 87 [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
91 [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel, 88 [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
92 [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, 89 [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
93 [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq, 90 [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq,
94 [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq, 91 [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq,
95 [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq, 92 [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq,
96 [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, 93 [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
97 [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, 94 [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
98 [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp, 95 [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp,
99 [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, 96 [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
100 [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, 97 [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
101 [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send, 98 [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send,
102 [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv, 99 [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv,
103 [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv, 100 [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv,
104 [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah, 101 [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah,
105 [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah, 102 [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah,
106 [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, 103 [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
107 [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, 104 [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
108 [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, 105 [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq,
109 [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, 106 [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
110 [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq, 107 [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq,
111 [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, 108 [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
112}; 109};
113 110
114static struct vfsmount *uverbs_event_mnt;
115
116static void ib_uverbs_add_one(struct ib_device *device); 111static void ib_uverbs_add_one(struct ib_device *device);
117static void ib_uverbs_remove_one(struct ib_device *device); 112static void ib_uverbs_remove_one(struct ib_device *device);
118 113
@@ -370,7 +365,7 @@ static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
370 365
371static const struct file_operations uverbs_event_fops = { 366static const struct file_operations uverbs_event_fops = {
372 .owner = THIS_MODULE, 367 .owner = THIS_MODULE,
373 .read = ib_uverbs_event_read, 368 .read = ib_uverbs_event_read,
374 .poll = ib_uverbs_event_poll, 369 .poll = ib_uverbs_event_poll,
375 .release = ib_uverbs_event_close, 370 .release = ib_uverbs_event_close,
376 .fasync = ib_uverbs_event_fasync 371 .fasync = ib_uverbs_event_fasync
@@ -492,7 +487,6 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
492 int is_async, int *fd) 487 int is_async, int *fd)
493{ 488{
494 struct ib_uverbs_event_file *ev_file; 489 struct ib_uverbs_event_file *ev_file;
495 struct path path;
496 struct file *filp; 490 struct file *filp;
497 int ret; 491 int ret;
498 492
@@ -515,27 +509,16 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
515 goto err; 509 goto err;
516 } 510 }
517 511
518 /* 512 filp = anon_inode_getfile("[uverbs-event]", &uverbs_event_fops,
519 * fops_get() can't fail here, because we're coming from a 513 ev_file, O_RDONLY);
520 * system call on a uverbs file, which will already have a
521 * module reference.
522 */
523 path.mnt = uverbs_event_mnt;
524 path.dentry = uverbs_event_mnt->mnt_root;
525 path_get(&path);
526 filp = alloc_file(&path, FMODE_READ, fops_get(&uverbs_event_fops));
527 if (!filp) { 514 if (!filp) {
528 ret = -ENFILE; 515 ret = -ENFILE;
529 goto err_fd; 516 goto err_fd;
530 } 517 }
531 518
532 filp->private_data = ev_file;
533
534 return filp; 519 return filp;
535 520
536err_fd: 521err_fd:
537 fops_put(&uverbs_event_fops);
538 path_put(&path);
539 put_unused_fd(*fd); 522 put_unused_fd(*fd);
540 523
541err: 524err:
@@ -617,14 +600,12 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
617/* 600/*
618 * ib_uverbs_open() does not need the BKL: 601 * ib_uverbs_open() does not need the BKL:
619 * 602 *
620 * - dev_table[] accesses are protected by map_lock, the 603 * - the ib_uverbs_device structures are properly reference counted and
621 * ib_uverbs_device structures are properly reference counted, and
622 * everything else is purely local to the file being created, so 604 * everything else is purely local to the file being created, so
623 * races against other open calls are not a problem; 605 * races against other open calls are not a problem;
624 * - there is no ioctl method to race against; 606 * - there is no ioctl method to race against;
625 * - the device is added to dev_table[] as the last part of module 607 * - the open method will either immediately run -ENXIO, or all
626 * initialization, the open method will either immediately run 608 * required initialization will be done.
627 * -ENXIO, or all required initialization will be done.
628 */ 609 */
629static int ib_uverbs_open(struct inode *inode, struct file *filp) 610static int ib_uverbs_open(struct inode *inode, struct file *filp)
630{ 611{
@@ -632,13 +613,10 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
632 struct ib_uverbs_file *file; 613 struct ib_uverbs_file *file;
633 int ret; 614 int ret;
634 615
635 spin_lock(&map_lock); 616 dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev);
636 dev = dev_table[iminor(inode) - IB_UVERBS_BASE_MINOR];
637 if (dev) 617 if (dev)
638 kref_get(&dev->ref); 618 kref_get(&dev->ref);
639 spin_unlock(&map_lock); 619 else
640
641 if (!dev)
642 return -ENXIO; 620 return -ENXIO;
643 621
644 if (!try_module_get(dev->ib_dev->owner)) { 622 if (!try_module_get(dev->ib_dev->owner)) {
@@ -685,17 +663,17 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
685} 663}
686 664
687static const struct file_operations uverbs_fops = { 665static const struct file_operations uverbs_fops = {
688 .owner = THIS_MODULE, 666 .owner = THIS_MODULE,
689 .write = ib_uverbs_write, 667 .write = ib_uverbs_write,
690 .open = ib_uverbs_open, 668 .open = ib_uverbs_open,
691 .release = ib_uverbs_close 669 .release = ib_uverbs_close
692}; 670};
693 671
694static const struct file_operations uverbs_mmap_fops = { 672static const struct file_operations uverbs_mmap_fops = {
695 .owner = THIS_MODULE, 673 .owner = THIS_MODULE,
696 .write = ib_uverbs_write, 674 .write = ib_uverbs_write,
697 .mmap = ib_uverbs_mmap, 675 .mmap = ib_uverbs_mmap,
698 .open = ib_uverbs_open, 676 .open = ib_uverbs_open,
699 .release = ib_uverbs_close 677 .release = ib_uverbs_close
700}; 678};
701 679
@@ -735,8 +713,38 @@ static ssize_t show_abi_version(struct class *class, char *buf)
735} 713}
736static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); 714static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
737 715
716static dev_t overflow_maj;
717static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES);
718
719/*
720 * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by
721 * requesting a new major number and doubling the number of max devices we
722 * support. It's stupid, but simple.
723 */
724static int find_overflow_devnum(void)
725{
726 int ret;
727
728 if (!overflow_maj) {
729 ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES,
730 "infiniband_verbs");
731 if (ret) {
732 printk(KERN_ERR "user_verbs: couldn't register dynamic device number\n");
733 return ret;
734 }
735 }
736
737 ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES);
738 if (ret >= IB_UVERBS_MAX_DEVICES)
739 return -1;
740
741 return ret;
742}
743
738static void ib_uverbs_add_one(struct ib_device *device) 744static void ib_uverbs_add_one(struct ib_device *device)
739{ 745{
746 int devnum;
747 dev_t base;
740 struct ib_uverbs_device *uverbs_dev; 748 struct ib_uverbs_device *uverbs_dev;
741 749
742 if (!device->alloc_ucontext) 750 if (!device->alloc_ucontext)
@@ -750,28 +758,36 @@ static void ib_uverbs_add_one(struct ib_device *device)
750 init_completion(&uverbs_dev->comp); 758 init_completion(&uverbs_dev->comp);
751 759
752 spin_lock(&map_lock); 760 spin_lock(&map_lock);
753 uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); 761 devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
754 if (uverbs_dev->devnum >= IB_UVERBS_MAX_DEVICES) { 762 if (devnum >= IB_UVERBS_MAX_DEVICES) {
755 spin_unlock(&map_lock); 763 spin_unlock(&map_lock);
756 goto err; 764 devnum = find_overflow_devnum();
765 if (devnum < 0)
766 goto err;
767
768 spin_lock(&map_lock);
769 uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES;
770 base = devnum + overflow_maj;
771 set_bit(devnum, overflow_map);
772 } else {
773 uverbs_dev->devnum = devnum;
774 base = devnum + IB_UVERBS_BASE_DEV;
775 set_bit(devnum, dev_map);
757 } 776 }
758 set_bit(uverbs_dev->devnum, dev_map);
759 spin_unlock(&map_lock); 777 spin_unlock(&map_lock);
760 778
761 uverbs_dev->ib_dev = device; 779 uverbs_dev->ib_dev = device;
762 uverbs_dev->num_comp_vectors = device->num_comp_vectors; 780 uverbs_dev->num_comp_vectors = device->num_comp_vectors;
763 781
764 uverbs_dev->cdev = cdev_alloc(); 782 cdev_init(&uverbs_dev->cdev, NULL);
765 if (!uverbs_dev->cdev) 783 uverbs_dev->cdev.owner = THIS_MODULE;
766 goto err; 784 uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
767 uverbs_dev->cdev->owner = THIS_MODULE; 785 kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
768 uverbs_dev->cdev->ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; 786 if (cdev_add(&uverbs_dev->cdev, base, 1))
769 kobject_set_name(&uverbs_dev->cdev->kobj, "uverbs%d", uverbs_dev->devnum);
770 if (cdev_add(uverbs_dev->cdev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1))
771 goto err_cdev; 787 goto err_cdev;
772 788
773 uverbs_dev->dev = device_create(uverbs_class, device->dma_device, 789 uverbs_dev->dev = device_create(uverbs_class, device->dma_device,
774 uverbs_dev->cdev->dev, uverbs_dev, 790 uverbs_dev->cdev.dev, uverbs_dev,
775 "uverbs%d", uverbs_dev->devnum); 791 "uverbs%d", uverbs_dev->devnum);
776 if (IS_ERR(uverbs_dev->dev)) 792 if (IS_ERR(uverbs_dev->dev))
777 goto err_cdev; 793 goto err_cdev;
@@ -781,20 +797,19 @@ static void ib_uverbs_add_one(struct ib_device *device)
781 if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version)) 797 if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
782 goto err_class; 798 goto err_class;
783 799
784 spin_lock(&map_lock);
785 dev_table[uverbs_dev->devnum] = uverbs_dev;
786 spin_unlock(&map_lock);
787
788 ib_set_client_data(device, &uverbs_client, uverbs_dev); 800 ib_set_client_data(device, &uverbs_client, uverbs_dev);
789 801
790 return; 802 return;
791 803
792err_class: 804err_class:
793 device_destroy(uverbs_class, uverbs_dev->cdev->dev); 805 device_destroy(uverbs_class, uverbs_dev->cdev.dev);
794 806
795err_cdev: 807err_cdev:
796 cdev_del(uverbs_dev->cdev); 808 cdev_del(&uverbs_dev->cdev);
797 clear_bit(uverbs_dev->devnum, dev_map); 809 if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
810 clear_bit(devnum, dev_map);
811 else
812 clear_bit(devnum, overflow_map);
798 813
799err: 814err:
800 kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); 815 kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
@@ -811,35 +826,19 @@ static void ib_uverbs_remove_one(struct ib_device *device)
811 return; 826 return;
812 827
813 dev_set_drvdata(uverbs_dev->dev, NULL); 828 dev_set_drvdata(uverbs_dev->dev, NULL);
814 device_destroy(uverbs_class, uverbs_dev->cdev->dev); 829 device_destroy(uverbs_class, uverbs_dev->cdev.dev);
815 cdev_del(uverbs_dev->cdev); 830 cdev_del(&uverbs_dev->cdev);
816 831
817 spin_lock(&map_lock); 832 if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
818 dev_table[uverbs_dev->devnum] = NULL; 833 clear_bit(uverbs_dev->devnum, dev_map);
819 spin_unlock(&map_lock); 834 else
820 835 clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
821 clear_bit(uverbs_dev->devnum, dev_map);
822 836
823 kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); 837 kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
824 wait_for_completion(&uverbs_dev->comp); 838 wait_for_completion(&uverbs_dev->comp);
825 kfree(uverbs_dev); 839 kfree(uverbs_dev);
826} 840}
827 841
828static int uverbs_event_get_sb(struct file_system_type *fs_type, int flags,
829 const char *dev_name, void *data,
830 struct vfsmount *mnt)
831{
832 return get_sb_pseudo(fs_type, "infinibandevent:", NULL,
833 INFINIBANDEVENTFS_MAGIC, mnt);
834}
835
836static struct file_system_type uverbs_event_fs = {
837 /* No owner field so module can be unloaded */
838 .name = "infinibandeventfs",
839 .get_sb = uverbs_event_get_sb,
840 .kill_sb = kill_litter_super
841};
842
843static int __init ib_uverbs_init(void) 842static int __init ib_uverbs_init(void)
844{ 843{
845 int ret; 844 int ret;
@@ -864,33 +863,14 @@ static int __init ib_uverbs_init(void)
864 goto out_class; 863 goto out_class;
865 } 864 }
866 865
867 ret = register_filesystem(&uverbs_event_fs);
868 if (ret) {
869 printk(KERN_ERR "user_verbs: couldn't register infinibandeventfs\n");
870 goto out_class;
871 }
872
873 uverbs_event_mnt = kern_mount(&uverbs_event_fs);
874 if (IS_ERR(uverbs_event_mnt)) {
875 ret = PTR_ERR(uverbs_event_mnt);
876 printk(KERN_ERR "user_verbs: couldn't mount infinibandeventfs\n");
877 goto out_fs;
878 }
879
880 ret = ib_register_client(&uverbs_client); 866 ret = ib_register_client(&uverbs_client);
881 if (ret) { 867 if (ret) {
882 printk(KERN_ERR "user_verbs: couldn't register client\n"); 868 printk(KERN_ERR "user_verbs: couldn't register client\n");
883 goto out_mnt; 869 goto out_class;
884 } 870 }
885 871
886 return 0; 872 return 0;
887 873
888out_mnt:
889 mntput(uverbs_event_mnt);
890
891out_fs:
892 unregister_filesystem(&uverbs_event_fs);
893
894out_class: 874out_class:
895 class_destroy(uverbs_class); 875 class_destroy(uverbs_class);
896 876
@@ -904,10 +884,10 @@ out:
904static void __exit ib_uverbs_cleanup(void) 884static void __exit ib_uverbs_cleanup(void)
905{ 885{
906 ib_unregister_client(&uverbs_client); 886 ib_unregister_client(&uverbs_client);
907 mntput(uverbs_event_mnt);
908 unregister_filesystem(&uverbs_event_fs);
909 class_destroy(uverbs_class); 887 class_destroy(uverbs_class);
910 unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); 888 unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
889 if (overflow_maj)
890 unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES);
911 idr_destroy(&ib_uverbs_pd_idr); 891 idr_destroy(&ib_uverbs_pd_idr);
912 idr_destroy(&ib_uverbs_mr_idr); 892 idr_destroy(&ib_uverbs_mr_idr);
913 idr_destroy(&ib_uverbs_mw_idr); 893 idr_destroy(&ib_uverbs_mw_idr);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index 0677fc7dfd51..a28e862f2d68 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -109,7 +109,6 @@ int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq,
109 while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) { 109 while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) {
110 udelay(1); 110 udelay(1);
111 if (i++ > 1000000) { 111 if (i++ > 1000000) {
112 BUG_ON(1);
113 printk(KERN_ERR "%s: stalled rnic\n", 112 printk(KERN_ERR "%s: stalled rnic\n",
114 rdev_p->dev_name); 113 rdev_p->dev_name);
115 return -EIO; 114 return -EIO;
@@ -155,7 +154,7 @@ static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
155 return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); 154 return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb);
156} 155}
157 156
158int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) 157int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel)
159{ 158{
160 struct rdma_cq_setup setup; 159 struct rdma_cq_setup setup;
161 int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe); 160 int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe);
@@ -163,12 +162,12 @@ int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
163 cq->cqid = cxio_hal_get_cqid(rdev_p->rscp); 162 cq->cqid = cxio_hal_get_cqid(rdev_p->rscp);
164 if (!cq->cqid) 163 if (!cq->cqid)
165 return -ENOMEM; 164 return -ENOMEM;
166 cq->sw_queue = kzalloc(size, GFP_KERNEL); 165 if (kernel) {
167 if (!cq->sw_queue) 166 cq->sw_queue = kzalloc(size, GFP_KERNEL);
168 return -ENOMEM; 167 if (!cq->sw_queue)
169 cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), 168 return -ENOMEM;
170 (1UL << (cq->size_log2)) * 169 }
171 sizeof(struct t3_cqe), 170 cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), size,
172 &(cq->dma_addr), GFP_KERNEL); 171 &(cq->dma_addr), GFP_KERNEL);
173 if (!cq->queue) { 172 if (!cq->queue) {
174 kfree(cq->sw_queue); 173 kfree(cq->sw_queue);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
index f3d440cc68f2..073373c2c560 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -53,7 +53,7 @@
53#define T3_MAX_PBL_SIZE 256 53#define T3_MAX_PBL_SIZE 256
54#define T3_MAX_RQ_SIZE 1024 54#define T3_MAX_RQ_SIZE 1024
55#define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1) 55#define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1)
56#define T3_MAX_CQ_DEPTH 8192 56#define T3_MAX_CQ_DEPTH 262144
57#define T3_MAX_NUM_STAG (1<<15) 57#define T3_MAX_NUM_STAG (1<<15)
58#define T3_MAX_MR_SIZE 0x100000000ULL 58#define T3_MAX_MR_SIZE 0x100000000ULL
59#define T3_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */ 59#define T3_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */
@@ -157,7 +157,7 @@ int cxio_rdev_open(struct cxio_rdev *rdev);
157void cxio_rdev_close(struct cxio_rdev *rdev); 157void cxio_rdev_close(struct cxio_rdev *rdev);
158int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq, 158int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq,
159 enum t3_cq_opcode op, u32 credit); 159 enum t3_cq_opcode op, u32 credit);
160int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq); 160int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq, int kernel);
161int cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq); 161int cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
162int cxio_resize_cq(struct cxio_rdev *rdev, struct t3_cq *cq); 162int cxio_resize_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
163void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx); 163void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
index a197a5b7ac7f..15073b2da1c5 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_wr.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h
@@ -730,7 +730,22 @@ struct t3_cq {
730 730
731static inline void cxio_set_wq_in_error(struct t3_wq *wq) 731static inline void cxio_set_wq_in_error(struct t3_wq *wq)
732{ 732{
733 wq->queue->wq_in_err.err = 1; 733 wq->queue->wq_in_err.err |= 1;
734}
735
736static inline void cxio_disable_wq_db(struct t3_wq *wq)
737{
738 wq->queue->wq_in_err.err |= 2;
739}
740
741static inline void cxio_enable_wq_db(struct t3_wq *wq)
742{
743 wq->queue->wq_in_err.err &= ~2;
744}
745
746static inline int cxio_wq_db_enabled(struct t3_wq *wq)
747{
748 return !(wq->queue->wq_in_err.err & 2);
734} 749}
735 750
736static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq) 751static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq)
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
index b0ea0105ddf6..ee1d8b4d4541 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.c
+++ b/drivers/infiniband/hw/cxgb3/iwch.c
@@ -65,6 +65,46 @@ struct cxgb3_client t3c_client = {
65static LIST_HEAD(dev_list); 65static LIST_HEAD(dev_list);
66static DEFINE_MUTEX(dev_mutex); 66static DEFINE_MUTEX(dev_mutex);
67 67
68static int disable_qp_db(int id, void *p, void *data)
69{
70 struct iwch_qp *qhp = p;
71
72 cxio_disable_wq_db(&qhp->wq);
73 return 0;
74}
75
76static int enable_qp_db(int id, void *p, void *data)
77{
78 struct iwch_qp *qhp = p;
79
80 if (data)
81 ring_doorbell(qhp->rhp->rdev.ctrl_qp.doorbell, qhp->wq.qpid);
82 cxio_enable_wq_db(&qhp->wq);
83 return 0;
84}
85
86static void disable_dbs(struct iwch_dev *rnicp)
87{
88 spin_lock_irq(&rnicp->lock);
89 idr_for_each(&rnicp->qpidr, disable_qp_db, NULL);
90 spin_unlock_irq(&rnicp->lock);
91}
92
93static void enable_dbs(struct iwch_dev *rnicp, int ring_db)
94{
95 spin_lock_irq(&rnicp->lock);
96 idr_for_each(&rnicp->qpidr, enable_qp_db,
97 (void *)(unsigned long)ring_db);
98 spin_unlock_irq(&rnicp->lock);
99}
100
101static void iwch_db_drop_task(struct work_struct *work)
102{
103 struct iwch_dev *rnicp = container_of(work, struct iwch_dev,
104 db_drop_task.work);
105 enable_dbs(rnicp, 1);
106}
107
68static void rnic_init(struct iwch_dev *rnicp) 108static void rnic_init(struct iwch_dev *rnicp)
69{ 109{
70 PDBG("%s iwch_dev %p\n", __func__, rnicp); 110 PDBG("%s iwch_dev %p\n", __func__, rnicp);
@@ -72,6 +112,7 @@ static void rnic_init(struct iwch_dev *rnicp)
72 idr_init(&rnicp->qpidr); 112 idr_init(&rnicp->qpidr);
73 idr_init(&rnicp->mmidr); 113 idr_init(&rnicp->mmidr);
74 spin_lock_init(&rnicp->lock); 114 spin_lock_init(&rnicp->lock);
115 INIT_DELAYED_WORK(&rnicp->db_drop_task, iwch_db_drop_task);
75 116
76 rnicp->attr.max_qps = T3_MAX_NUM_QP - 32; 117 rnicp->attr.max_qps = T3_MAX_NUM_QP - 32;
77 rnicp->attr.max_wrs = T3_MAX_QP_DEPTH; 118 rnicp->attr.max_wrs = T3_MAX_QP_DEPTH;
@@ -147,6 +188,8 @@ static void close_rnic_dev(struct t3cdev *tdev)
147 mutex_lock(&dev_mutex); 188 mutex_lock(&dev_mutex);
148 list_for_each_entry_safe(dev, tmp, &dev_list, entry) { 189 list_for_each_entry_safe(dev, tmp, &dev_list, entry) {
149 if (dev->rdev.t3cdev_p == tdev) { 190 if (dev->rdev.t3cdev_p == tdev) {
191 dev->rdev.flags = CXIO_ERROR_FATAL;
192 cancel_delayed_work_sync(&dev->db_drop_task);
150 list_del(&dev->entry); 193 list_del(&dev->entry);
151 iwch_unregister_device(dev); 194 iwch_unregister_device(dev);
152 cxio_rdev_close(&dev->rdev); 195 cxio_rdev_close(&dev->rdev);
@@ -165,7 +208,8 @@ static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id)
165 struct cxio_rdev *rdev = tdev->ulp; 208 struct cxio_rdev *rdev = tdev->ulp;
166 struct iwch_dev *rnicp; 209 struct iwch_dev *rnicp;
167 struct ib_event event; 210 struct ib_event event;
168 u32 portnum = port_id + 1; 211 u32 portnum = port_id + 1;
212 int dispatch = 0;
169 213
170 if (!rdev) 214 if (!rdev)
171 return; 215 return;
@@ -174,21 +218,49 @@ static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id)
174 case OFFLOAD_STATUS_DOWN: { 218 case OFFLOAD_STATUS_DOWN: {
175 rdev->flags = CXIO_ERROR_FATAL; 219 rdev->flags = CXIO_ERROR_FATAL;
176 event.event = IB_EVENT_DEVICE_FATAL; 220 event.event = IB_EVENT_DEVICE_FATAL;
221 dispatch = 1;
177 break; 222 break;
178 } 223 }
179 case OFFLOAD_PORT_DOWN: { 224 case OFFLOAD_PORT_DOWN: {
180 event.event = IB_EVENT_PORT_ERR; 225 event.event = IB_EVENT_PORT_ERR;
226 dispatch = 1;
181 break; 227 break;
182 } 228 }
183 case OFFLOAD_PORT_UP: { 229 case OFFLOAD_PORT_UP: {
184 event.event = IB_EVENT_PORT_ACTIVE; 230 event.event = IB_EVENT_PORT_ACTIVE;
231 dispatch = 1;
232 break;
233 }
234 case OFFLOAD_DB_FULL: {
235 disable_dbs(rnicp);
236 break;
237 }
238 case OFFLOAD_DB_EMPTY: {
239 enable_dbs(rnicp, 1);
240 break;
241 }
242 case OFFLOAD_DB_DROP: {
243 unsigned long delay = 1000;
244 unsigned short r;
245
246 disable_dbs(rnicp);
247 get_random_bytes(&r, 2);
248 delay += r & 1023;
249
250 /*
251 * delay is between 1000-2023 usecs.
252 */
253 schedule_delayed_work(&rnicp->db_drop_task,
254 usecs_to_jiffies(delay));
185 break; 255 break;
186 } 256 }
187 } 257 }
188 258
189 event.device = &rnicp->ibdev; 259 if (dispatch) {
190 event.element.port_num = portnum; 260 event.device = &rnicp->ibdev;
191 ib_dispatch_event(&event); 261 event.element.port_num = portnum;
262 ib_dispatch_event(&event);
263 }
192 264
193 return; 265 return;
194} 266}
diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h
index 84735506333f..a1c44578e039 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.h
+++ b/drivers/infiniband/hw/cxgb3/iwch.h
@@ -36,6 +36,7 @@
36#include <linux/list.h> 36#include <linux/list.h>
37#include <linux/spinlock.h> 37#include <linux/spinlock.h>
38#include <linux/idr.h> 38#include <linux/idr.h>
39#include <linux/workqueue.h>
39 40
40#include <rdma/ib_verbs.h> 41#include <rdma/ib_verbs.h>
41 42
@@ -110,6 +111,7 @@ struct iwch_dev {
110 struct idr mmidr; 111 struct idr mmidr;
111 spinlock_t lock; 112 spinlock_t lock;
112 struct list_head entry; 113 struct list_head entry;
114 struct delayed_work db_drop_task;
113}; 115};
114 116
115static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev) 117static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev)
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index ed7175549ebd..47b35c6608d2 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -187,7 +187,7 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int ve
187 entries = roundup_pow_of_two(entries); 187 entries = roundup_pow_of_two(entries);
188 chp->cq.size_log2 = ilog2(entries); 188 chp->cq.size_log2 = ilog2(entries);
189 189
190 if (cxio_create_cq(&rhp->rdev, &chp->cq)) { 190 if (cxio_create_cq(&rhp->rdev, &chp->cq, !ucontext)) {
191 kfree(chp); 191 kfree(chp);
192 return ERR_PTR(-ENOMEM); 192 return ERR_PTR(-ENOMEM);
193 } 193 }
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 3eb8cecf81d7..b4d893de3650 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -452,7 +452,8 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
452 ++(qhp->wq.sq_wptr); 452 ++(qhp->wq.sq_wptr);
453 } 453 }
454 spin_unlock_irqrestore(&qhp->lock, flag); 454 spin_unlock_irqrestore(&qhp->lock, flag);
455 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); 455 if (cxio_wq_db_enabled(&qhp->wq))
456 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
456 457
457out: 458out:
458 if (err) 459 if (err)
@@ -514,7 +515,8 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
514 num_wrs--; 515 num_wrs--;
515 } 516 }
516 spin_unlock_irqrestore(&qhp->lock, flag); 517 spin_unlock_irqrestore(&qhp->lock, flag);
517 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); 518 if (cxio_wq_db_enabled(&qhp->wq))
519 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
518 520
519out: 521out:
520 if (err) 522 if (err)
@@ -597,7 +599,8 @@ int iwch_bind_mw(struct ib_qp *qp,
597 ++(qhp->wq.sq_wptr); 599 ++(qhp->wq.sq_wptr);
598 spin_unlock_irqrestore(&qhp->lock, flag); 600 spin_unlock_irqrestore(&qhp->lock, flag);
599 601
600 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); 602 if (cxio_wq_db_enabled(&qhp->wq))
603 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
601 604
602 return err; 605 return err;
603} 606}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index 42be0b15084b..b2b6fea2b141 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -548,11 +548,10 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq)
548 struct ehca_eq *eq = &shca->eq; 548 struct ehca_eq *eq = &shca->eq;
549 struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache; 549 struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
550 u64 eqe_value, ret; 550 u64 eqe_value, ret;
551 unsigned long flags;
552 int eqe_cnt, i; 551 int eqe_cnt, i;
553 int eq_empty = 0; 552 int eq_empty = 0;
554 553
555 spin_lock_irqsave(&eq->irq_spinlock, flags); 554 spin_lock(&eq->irq_spinlock);
556 if (is_irq) { 555 if (is_irq) {
557 const int max_query_cnt = 100; 556 const int max_query_cnt = 100;
558 int query_cnt = 0; 557 int query_cnt = 0;
@@ -643,7 +642,7 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq)
643 } while (1); 642 } while (1);
644 643
645unlock_irq_spinlock: 644unlock_irq_spinlock:
646 spin_unlock_irqrestore(&eq->irq_spinlock, flags); 645 spin_unlock(&eq->irq_spinlock);
647} 646}
648 647
649void ehca_tasklet_eq(unsigned long data) 648void ehca_tasklet_eq(unsigned long data)
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index 0338f1fabe8a..b105f664d3ef 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -55,9 +55,7 @@ static struct kmem_cache *qp_cache;
55/* 55/*
56 * attributes not supported by query qp 56 * attributes not supported by query qp
57 */ 57 */
58#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_MAX_DEST_RD_ATOMIC | \ 58#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_ACCESS_FLAGS | \
59 IB_QP_MAX_QP_RD_ATOMIC | \
60 IB_QP_ACCESS_FLAGS | \
61 IB_QP_EN_SQD_ASYNC_NOTIFY) 59 IB_QP_EN_SQD_ASYNC_NOTIFY)
62 60
63/* 61/*
diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c
index 8c1213f8916a..dba8f9f8b996 100644
--- a/drivers/infiniband/hw/ehca/ehca_sqp.c
+++ b/drivers/infiniband/hw/ehca/ehca_sqp.c
@@ -222,7 +222,7 @@ int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
222{ 222{
223 int ret; 223 int ret;
224 224
225 if (!port_num || port_num > ibdev->phys_port_cnt) 225 if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc)
226 return IB_MAD_RESULT_FAILURE; 226 return IB_MAD_RESULT_FAILURE;
227 227
228 /* accept only pma request */ 228 /* accept only pma request */
diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c
index 82878e348627..eb7d59abd12d 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_pages.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c
@@ -59,8 +59,7 @@ static int __get_user_pages(unsigned long start_page, size_t num_pages,
59 size_t got; 59 size_t got;
60 int ret; 60 int ret;
61 61
62 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> 62 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
63 PAGE_SHIFT;
64 63
65 if (num_pages > lock_limit) { 64 if (num_pages > lock_limit) {
66 ret = -ENOMEM; 65 ret = -ENOMEM;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 2a97c964b9ef..ae75389937d6 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1214,7 +1214,7 @@ out:
1214static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, 1214static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1215 void *wqe, unsigned *mlx_seg_len) 1215 void *wqe, unsigned *mlx_seg_len)
1216{ 1216{
1217 struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev; 1217 struct ib_device *ib_dev = sqp->qp.ibqp.device;
1218 struct mlx4_wqe_mlx_seg *mlx = wqe; 1218 struct mlx4_wqe_mlx_seg *mlx = wqe;
1219 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; 1219 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
1220 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); 1220 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
@@ -1228,7 +1228,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1228 for (i = 0; i < wr->num_sge; ++i) 1228 for (i = 0; i < wr->num_sge; ++i)
1229 send_size += wr->sg_list[i].length; 1229 send_size += wr->sg_list[i].length;
1230 1230
1231 ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), &sqp->ud_header); 1231 ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), 0, &sqp->ud_header);
1232 1232
1233 sqp->ud_header.lrh.service_level = 1233 sqp->ud_header.lrh.service_level =
1234 be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28; 1234 be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index c10576fa60c1..d2d172e6289c 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1494,7 +1494,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
1494 u16 pkey; 1494 u16 pkey;
1495 1495
1496 ib_ud_header_init(256, /* assume a MAD */ 1496 ib_ud_header_init(256, /* assume a MAD */
1497 mthca_ah_grh_present(to_mah(wr->wr.ud.ah)), 1497 mthca_ah_grh_present(to_mah(wr->wr.ud.ah)), 0,
1498 &sqp->ud_header); 1498 &sqp->ud_header);
1499 1499
1500 err = mthca_read_ah(dev, to_mah(wr->wr.ud.ah), &sqp->ud_header); 1500 err = mthca_read_ah(dev, to_mah(wr->wr.ud.ah), &sqp->ud_header);
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index b9d09bafd6c1..4272c52e38a4 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -110,6 +110,7 @@ static unsigned int sysfs_idx_addr;
110 110
111static struct pci_device_id nes_pci_table[] = { 111static struct pci_device_id nes_pci_table[] = {
112 {PCI_VENDOR_ID_NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020, PCI_ANY_ID, PCI_ANY_ID}, 112 {PCI_VENDOR_ID_NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020, PCI_ANY_ID, PCI_ANY_ID},
113 {PCI_VENDOR_ID_NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020_KR, PCI_ANY_ID, PCI_ANY_ID},
113 {0} 114 {0}
114}; 115};
115 116
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index 98840564bb2f..cc78fee1dd51 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -64,8 +64,9 @@
64 * NetEffect PCI vendor id and NE010 PCI device id. 64 * NetEffect PCI vendor id and NE010 PCI device id.
65 */ 65 */
66#ifndef PCI_VENDOR_ID_NETEFFECT /* not in pci.ids yet */ 66#ifndef PCI_VENDOR_ID_NETEFFECT /* not in pci.ids yet */
67#define PCI_VENDOR_ID_NETEFFECT 0x1678 67#define PCI_VENDOR_ID_NETEFFECT 0x1678
68#define PCI_DEVICE_ID_NETEFFECT_NE020 0x0100 68#define PCI_DEVICE_ID_NETEFFECT_NE020 0x0100
69#define PCI_DEVICE_ID_NETEFFECT_NE020_KR 0x0110
69#endif 70#endif
70 71
71#define NE020_REV 4 72#define NE020_REV 4
@@ -193,8 +194,8 @@ extern u32 cm_packets_created;
193extern u32 cm_packets_received; 194extern u32 cm_packets_received;
194extern u32 cm_packets_dropped; 195extern u32 cm_packets_dropped;
195extern u32 cm_packets_retrans; 196extern u32 cm_packets_retrans;
196extern u32 cm_listens_created; 197extern atomic_t cm_listens_created;
197extern u32 cm_listens_destroyed; 198extern atomic_t cm_listens_destroyed;
198extern u32 cm_backlog_drops; 199extern u32 cm_backlog_drops;
199extern atomic_t cm_loopbacks; 200extern atomic_t cm_loopbacks;
200extern atomic_t cm_nodes_created; 201extern atomic_t cm_nodes_created;
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 39468c277036..2a49ee40b520 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -67,8 +67,8 @@ u32 cm_packets_dropped;
67u32 cm_packets_retrans; 67u32 cm_packets_retrans;
68u32 cm_packets_created; 68u32 cm_packets_created;
69u32 cm_packets_received; 69u32 cm_packets_received;
70u32 cm_listens_created; 70atomic_t cm_listens_created;
71u32 cm_listens_destroyed; 71atomic_t cm_listens_destroyed;
72u32 cm_backlog_drops; 72u32 cm_backlog_drops;
73atomic_t cm_loopbacks; 73atomic_t cm_loopbacks;
74atomic_t cm_nodes_created; 74atomic_t cm_nodes_created;
@@ -1011,9 +1011,10 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
1011 event.cm_info.loc_port = 1011 event.cm_info.loc_port =
1012 loopback->loc_port; 1012 loopback->loc_port;
1013 event.cm_info.cm_id = loopback->cm_id; 1013 event.cm_info.cm_id = loopback->cm_id;
1014 add_ref_cm_node(loopback);
1015 loopback->state = NES_CM_STATE_CLOSED;
1014 cm_event_connect_error(&event); 1016 cm_event_connect_error(&event);
1015 cm_node->state = NES_CM_STATE_LISTENER_DESTROYED; 1017 cm_node->state = NES_CM_STATE_LISTENER_DESTROYED;
1016 loopback->state = NES_CM_STATE_CLOSED;
1017 1018
1018 rem_ref_cm_node(cm_node->cm_core, 1019 rem_ref_cm_node(cm_node->cm_core,
1019 cm_node); 1020 cm_node);
@@ -1042,7 +1043,7 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
1042 kfree(listener); 1043 kfree(listener);
1043 listener = NULL; 1044 listener = NULL;
1044 ret = 0; 1045 ret = 0;
1045 cm_listens_destroyed++; 1046 atomic_inc(&cm_listens_destroyed);
1046 } else { 1047 } else {
1047 spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); 1048 spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
1048 } 1049 }
@@ -3172,7 +3173,7 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
3172 g_cm_core->api->stop_listener(g_cm_core, (void *)cm_node); 3173 g_cm_core->api->stop_listener(g_cm_core, (void *)cm_node);
3173 return err; 3174 return err;
3174 } 3175 }
3175 cm_listens_created++; 3176 atomic_inc(&cm_listens_created);
3176 } 3177 }
3177 3178
3178 cm_id->add_ref(cm_id); 3179 cm_id->add_ref(cm_id);
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index b1c2cbb88f09..ce7f53833577 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -748,16 +748,28 @@ static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count,
748 748
749 if (hw_rev != NE020_REV) { 749 if (hw_rev != NE020_REV) {
750 /* init serdes 0 */ 750 /* init serdes 0 */
751 if (wide_ppm_offset && (nesadapter->phy_type[0] == NES_PHY_TYPE_CX4)) 751 switch (nesadapter->phy_type[0]) {
752 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000FFFAA); 752 case NES_PHY_TYPE_CX4:
753 else 753 if (wide_ppm_offset)
754 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000FFFAA);
755 else
756 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
757 break;
758 case NES_PHY_TYPE_KR:
759 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
760 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x00000000);
761 break;
762 case NES_PHY_TYPE_PUMA_1G:
754 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF); 763 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
755
756 if (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) {
757 sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0); 764 sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0);
758 sds |= 0x00000100; 765 sds |= 0x00000100;
759 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds); 766 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds);
767 break;
768 default:
769 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
770 break;
760 } 771 }
772
761 if (!OneG_Mode) 773 if (!OneG_Mode)
762 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE0, 0x11110000); 774 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE0, 0x11110000);
763 775
@@ -778,6 +790,9 @@ static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count,
778 if (wide_ppm_offset) 790 if (wide_ppm_offset)
779 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000FFFAA); 791 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000FFFAA);
780 break; 792 break;
793 case NES_PHY_TYPE_KR:
794 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP1, 0x00000000);
795 break;
781 case NES_PHY_TYPE_PUMA_1G: 796 case NES_PHY_TYPE_PUMA_1G:
782 sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1); 797 sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1);
783 sds |= 0x000000100; 798 sds |= 0x000000100;
@@ -1279,115 +1294,115 @@ int nes_destroy_cqp(struct nes_device *nesdev)
1279 1294
1280 1295
1281/** 1296/**
1282 * nes_init_phy 1297 * nes_init_1g_phy
1283 */ 1298 */
1284int nes_init_phy(struct nes_device *nesdev) 1299int nes_init_1g_phy(struct nes_device *nesdev, u8 phy_type, u8 phy_index)
1285{ 1300{
1286 struct nes_adapter *nesadapter = nesdev->nesadapter;
1287 u32 counter = 0; 1301 u32 counter = 0;
1288 u32 sds;
1289 u32 mac_index = nesdev->mac_index;
1290 u32 tx_config = 0;
1291 u16 phy_data; 1302 u16 phy_data;
1292 u32 temp_phy_data = 0; 1303 int ret = 0;
1293 u32 temp_phy_data2 = 0;
1294 u8 phy_type = nesadapter->phy_type[mac_index];
1295 u8 phy_index = nesadapter->phy_index[mac_index];
1296
1297 if ((nesadapter->OneG_Mode) &&
1298 (phy_type != NES_PHY_TYPE_PUMA_1G)) {
1299 nes_debug(NES_DBG_PHY, "1G PHY, mac_index = %d.\n", mac_index);
1300 if (phy_type == NES_PHY_TYPE_1G) {
1301 tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
1302 tx_config &= 0xFFFFFFE3;
1303 tx_config |= 0x04;
1304 nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
1305 }
1306 1304
1307 nes_read_1G_phy_reg(nesdev, 1, phy_index, &phy_data); 1305 nes_read_1G_phy_reg(nesdev, 1, phy_index, &phy_data);
1308 nes_write_1G_phy_reg(nesdev, 23, phy_index, 0xb000); 1306 nes_write_1G_phy_reg(nesdev, 23, phy_index, 0xb000);
1309 1307
1310 /* Reset the PHY */ 1308 /* Reset the PHY */
1311 nes_write_1G_phy_reg(nesdev, 0, phy_index, 0x8000); 1309 nes_write_1G_phy_reg(nesdev, 0, phy_index, 0x8000);
1312 udelay(100); 1310 udelay(100);
1313 counter = 0; 1311 counter = 0;
1314 do { 1312 do {
1315 nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
1316 if (counter++ > 100)
1317 break;
1318 } while (phy_data & 0x8000);
1319
1320 /* Setting no phy loopback */
1321 phy_data &= 0xbfff;
1322 phy_data |= 0x1140;
1323 nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data);
1324 nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); 1313 nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
1325 nes_read_1G_phy_reg(nesdev, 0x17, phy_index, &phy_data); 1314 if (counter++ > 100) {
1326 nes_read_1G_phy_reg(nesdev, 0x1e, phy_index, &phy_data); 1315 ret = -1;
1327 1316 break;
1328 /* Setting the interrupt mask */ 1317 }
1329 nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data); 1318 } while (phy_data & 0x8000);
1330 nes_write_1G_phy_reg(nesdev, 0x19, phy_index, 0xffee); 1319
1331 nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data); 1320 /* Setting no phy loopback */
1321 phy_data &= 0xbfff;
1322 phy_data |= 0x1140;
1323 nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data);
1324 nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
1325 nes_read_1G_phy_reg(nesdev, 0x17, phy_index, &phy_data);
1326 nes_read_1G_phy_reg(nesdev, 0x1e, phy_index, &phy_data);
1327
1328 /* Setting the interrupt mask */
1329 nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data);
1330 nes_write_1G_phy_reg(nesdev, 0x19, phy_index, 0xffee);
1331 nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data);
1332
1333 /* turning on flow control */
1334 nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data);
1335 nes_write_1G_phy_reg(nesdev, 4, phy_index, (phy_data & ~(0x03E0)) | 0xc00);
1336 nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data);
1337
1338 /* Clear Half duplex */
1339 nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data);
1340 nes_write_1G_phy_reg(nesdev, 9, phy_index, phy_data & ~(0x0100));
1341 nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data);
1342
1343 nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
1344 nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data | 0x0300);
1345
1346 return ret;
1347}
1332 1348
1333 /* turning on flow control */
1334 nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data);
1335 nes_write_1G_phy_reg(nesdev, 4, phy_index, (phy_data & ~(0x03E0)) | 0xc00);
1336 nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data);
1337 1349
1338 /* Clear Half duplex */ 1350/**
1339 nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data); 1351 * nes_init_2025_phy
1340 nes_write_1G_phy_reg(nesdev, 9, phy_index, phy_data & ~(0x0100)); 1352 */
1341 nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data); 1353int nes_init_2025_phy(struct nes_device *nesdev, u8 phy_type, u8 phy_index)
1354{
1355 u32 temp_phy_data = 0;
1356 u32 temp_phy_data2 = 0;
1357 u32 counter = 0;
1358 u32 sds;
1359 u32 mac_index = nesdev->mac_index;
1360 int ret = 0;
1361 unsigned int first_attempt = 1;
1342 1362
1343 nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); 1363 /* Check firmware heartbeat */
1344 nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data | 0x0300); 1364 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
1365 temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
1366 udelay(1500);
1367 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
1368 temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
1345 1369
1346 return 0; 1370 if (temp_phy_data != temp_phy_data2) {
1371 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd);
1372 temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
1373 if ((temp_phy_data & 0xff) > 0x20)
1374 return 0;
1375 printk(PFX "Reinitialize external PHY\n");
1347 } 1376 }
1348 1377
1349 if ((phy_type == NES_PHY_TYPE_IRIS) || 1378 /* no heartbeat, configure the PHY */
1350 (phy_type == NES_PHY_TYPE_ARGUS) || 1379 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0x0000, 0x8000);
1351 (phy_type == NES_PHY_TYPE_SFP_D)) { 1380 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0000);
1352 /* setup 10G MDIO operation */ 1381 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A);
1353 tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG); 1382 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052);
1354 tx_config &= 0xFFFFFFE3;
1355 tx_config |= 0x15;
1356 nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
1357 }
1358 if ((phy_type == NES_PHY_TYPE_ARGUS) ||
1359 (phy_type == NES_PHY_TYPE_SFP_D)) {
1360 u32 first_time = 1;
1361 1383
1362 /* Check firmware heartbeat */ 1384 switch (phy_type) {
1363 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); 1385 case NES_PHY_TYPE_ARGUS:
1364 temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); 1386 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A);
1365 udelay(1500); 1387 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052);
1366 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); 1388 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x000C);
1367 temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); 1389 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0008);
1390 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0001);
1391 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0098);
1392 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00);
1368 1393
1369 if (temp_phy_data != temp_phy_data2) { 1394 /* setup LEDs */
1370 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd); 1395 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x0007);
1371 temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); 1396 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x000A);
1372 if ((temp_phy_data & 0xff) > 0x20) 1397 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0009);
1373 return 0; 1398 break;
1374 printk(PFX "Reinitializing PHY\n");
1375 }
1376 1399
1377 /* no heartbeat, configure the PHY */ 1400 case NES_PHY_TYPE_SFP_D:
1378 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0x0000, 0x8000);
1379 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0000);
1380 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A); 1401 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A);
1381 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052); 1402 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052);
1382 if (phy_type == NES_PHY_TYPE_ARGUS) { 1403 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x0004);
1383 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x000C); 1404 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0038);
1384 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0008); 1405 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0013);
1385 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0001);
1386 } else {
1387 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x0004);
1388 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0038);
1389 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0013);
1390 }
1391 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0098); 1406 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0098);
1392 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00); 1407 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00);
1393 1408
@@ -1395,71 +1410,136 @@ int nes_init_phy(struct nes_device *nesdev)
1395 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x0007); 1410 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x0007);
1396 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x000A); 1411 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x000A);
1397 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0009); 1412 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0009);
1413 break;
1414
1415 case NES_PHY_TYPE_KR:
1416 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A);
1417 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052);
1418 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x000C);
1419 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0010);
1420 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0013);
1421 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0080);
1422 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00);
1423
1424 /* setup LEDs */
1425 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x000B);
1426 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x0003);
1427 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0004);
1398 1428
1399 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0028, 0xA528); 1429 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0022, 0x406D);
1430 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0023, 0x0020);
1431 break;
1432 }
1433
1434 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0028, 0xA528);
1400 1435
1401 /* Bring PHY out of reset */ 1436 /* Bring PHY out of reset */
1402 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0002); 1437 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0002);
1403 1438
1404 /* Check for heartbeat */ 1439 /* Check for heartbeat */
1405 counter = 0; 1440 counter = 0;
1406 mdelay(690); 1441 mdelay(690);
1442 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
1443 temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
1444 do {
1445 if (counter++ > 150) {
1446 printk(PFX "No PHY heartbeat\n");
1447 break;
1448 }
1449 mdelay(1);
1407 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); 1450 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
1451 temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
1452 } while ((temp_phy_data2 == temp_phy_data));
1453
1454 /* wait for tracking */
1455 counter = 0;
1456 do {
1457 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd);
1408 temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); 1458 temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
1409 do { 1459 if (counter++ > 300) {
1410 if (counter++ > 150) { 1460 if (((temp_phy_data & 0xff) == 0x0) && first_attempt) {
1411 printk(PFX "No PHY heartbeat\n"); 1461 first_attempt = 0;
1462 counter = 0;
1463 /* reset AMCC PHY and try again */
1464 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x00c0);
1465 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x0040);
1466 continue;
1467 } else {
1468 ret = 1;
1412 break; 1469 break;
1413 } 1470 }
1414 mdelay(1); 1471 }
1415 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); 1472 mdelay(10);
1416 temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); 1473 } while ((temp_phy_data & 0xff) < 0x30);
1417 } while ((temp_phy_data2 == temp_phy_data)); 1474
1418 1475 /* setup signal integrity */
1419 /* wait for tracking */ 1476 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd003, 0x0000);
1420 counter = 0; 1477 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00D, 0x00FE);
1421 do { 1478 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00E, 0x0032);
1422 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd); 1479 if (phy_type == NES_PHY_TYPE_KR) {
1423 temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); 1480 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00F, 0x000C);
1424 if (counter++ > 300) { 1481 } else {
1425 if (((temp_phy_data & 0xff) == 0x0) && first_time) {
1426 first_time = 0;
1427 counter = 0;
1428 /* reset AMCC PHY and try again */
1429 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x00c0);
1430 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x0040);
1431 continue;
1432 } else {
1433 printk(PFX "PHY did not track\n");
1434 break;
1435 }
1436 }
1437 mdelay(10);
1438 } while ((temp_phy_data & 0xff) < 0x30);
1439
1440 /* setup signal integrity */
1441 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd003, 0x0000);
1442 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00D, 0x00FE);
1443 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00E, 0x0032);
1444 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00F, 0x0002); 1482 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00F, 0x0002);
1445 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc314, 0x0063); 1483 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc314, 0x0063);
1484 }
1485
1486 /* reset serdes */
1487 sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200);
1488 sds |= 0x1;
1489 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200, sds);
1490 sds &= 0xfffffffe;
1491 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200, sds);
1492
1493 counter = 0;
1494 while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) & 0x00000040) != 0x00000040)
1495 && (counter++ < 5000))
1496 ;
1497
1498 return ret;
1499}
1500
1446 1501
1447 /* reset serdes */ 1502/**
1448 sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + 1503 * nes_init_phy
1449 mac_index * 0x200); 1504 */
1450 sds |= 0x1; 1505int nes_init_phy(struct nes_device *nesdev)
1451 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + 1506{
1452 mac_index * 0x200, sds); 1507 struct nes_adapter *nesadapter = nesdev->nesadapter;
1453 sds &= 0xfffffffe; 1508 u32 mac_index = nesdev->mac_index;
1454 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + 1509 u32 tx_config = 0;
1455 mac_index * 0x200, sds); 1510 unsigned long flags;
1456 1511 u8 phy_type = nesadapter->phy_type[mac_index];
1457 counter = 0; 1512 u8 phy_index = nesadapter->phy_index[mac_index];
1458 while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) & 0x00000040) != 0x00000040) 1513 int ret = 0;
1459 && (counter++ < 5000)) 1514
1460 ; 1515 tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
1516 if (phy_type == NES_PHY_TYPE_1G) {
1517 /* setup 1G MDIO operation */
1518 tx_config &= 0xFFFFFFE3;
1519 tx_config |= 0x04;
1520 } else {
1521 /* setup 10G MDIO operation */
1522 tx_config &= 0xFFFFFFE3;
1523 tx_config |= 0x15;
1461 } 1524 }
1462 return 0; 1525 nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
1526
1527 spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags);
1528
1529 switch (phy_type) {
1530 case NES_PHY_TYPE_1G:
1531 ret = nes_init_1g_phy(nesdev, phy_type, phy_index);
1532 break;
1533 case NES_PHY_TYPE_ARGUS:
1534 case NES_PHY_TYPE_SFP_D:
1535 case NES_PHY_TYPE_KR:
1536 ret = nes_init_2025_phy(nesdev, phy_type, phy_index);
1537 break;
1538 }
1539
1540 spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags);
1541
1542 return ret;
1463} 1543}
1464 1544
1465 1545
@@ -2460,23 +2540,9 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
2460 } 2540 }
2461 } else { 2541 } else {
2462 switch (nesadapter->phy_type[mac_index]) { 2542 switch (nesadapter->phy_type[mac_index]) {
2463 case NES_PHY_TYPE_IRIS:
2464 nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1);
2465 temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
2466 u32temp = 20;
2467 do {
2468 nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1);
2469 phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
2470 if ((phy_data == temp_phy_data) || (!(--u32temp)))
2471 break;
2472 temp_phy_data = phy_data;
2473 } while (1);
2474 nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n",
2475 __func__, phy_data, nesadapter->mac_link_down[mac_index] ? "DOWN" : "UP");
2476 break;
2477
2478 case NES_PHY_TYPE_ARGUS: 2543 case NES_PHY_TYPE_ARGUS:
2479 case NES_PHY_TYPE_SFP_D: 2544 case NES_PHY_TYPE_SFP_D:
2545 case NES_PHY_TYPE_KR:
2480 /* clear the alarms */ 2546 /* clear the alarms */
2481 nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0x0008); 2547 nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0x0008);
2482 nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc001); 2548 nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc001);
@@ -3352,8 +3418,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
3352 u16 async_event_id; 3418 u16 async_event_id;
3353 u8 tcp_state; 3419 u8 tcp_state;
3354 u8 iwarp_state; 3420 u8 iwarp_state;
3355 int must_disconn = 1;
3356 int must_terminate = 0;
3357 struct ib_event ibevent; 3421 struct ib_event ibevent;
3358 3422
3359 nes_debug(NES_DBG_AEQ, "\n"); 3423 nes_debug(NES_DBG_AEQ, "\n");
@@ -3367,6 +3431,8 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
3367 BUG_ON(!context); 3431 BUG_ON(!context);
3368 } 3432 }
3369 3433
3434 /* context is nesqp unless async_event_id == CQ ERROR */
3435 nesqp = (struct nes_qp *)(unsigned long)context;
3370 async_event_id = (u16)aeq_info; 3436 async_event_id = (u16)aeq_info;
3371 tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT; 3437 tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT;
3372 iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT; 3438 iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT;
@@ -3378,8 +3444,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
3378 3444
3379 switch (async_event_id) { 3445 switch (async_event_id) {
3380 case NES_AEQE_AEID_LLP_FIN_RECEIVED: 3446 case NES_AEQE_AEID_LLP_FIN_RECEIVED:
3381 nesqp = (struct nes_qp *)(unsigned long)context;
3382
3383 if (nesqp->term_flags) 3447 if (nesqp->term_flags)
3384 return; /* Ignore it, wait for close complete */ 3448 return; /* Ignore it, wait for close complete */
3385 3449
@@ -3394,79 +3458,48 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
3394 async_event_id, nesqp->last_aeq, tcp_state); 3458 async_event_id, nesqp->last_aeq, tcp_state);
3395 } 3459 }
3396 3460
3397 if ((tcp_state != NES_AEQE_TCP_STATE_CLOSE_WAIT) || 3461 break;
3398 (nesqp->ibqp_state != IB_QPS_RTS)) {
3399 /* FIN Received but tcp state or IB state moved on,
3400 should expect a close complete */
3401 return;
3402 }
3403
3404 case NES_AEQE_AEID_LLP_CLOSE_COMPLETE: 3462 case NES_AEQE_AEID_LLP_CLOSE_COMPLETE:
3405 nesqp = (struct nes_qp *)(unsigned long)context;
3406 if (nesqp->term_flags) { 3463 if (nesqp->term_flags) {
3407 nes_terminate_done(nesqp, 0); 3464 nes_terminate_done(nesqp, 0);
3408 return; 3465 return;
3409 } 3466 }
3467 spin_lock_irqsave(&nesqp->lock, flags);
3468 nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
3469 spin_unlock_irqrestore(&nesqp->lock, flags);
3470 nes_hw_modify_qp(nesdev, nesqp, NES_CQP_QP_IWARP_STATE_CLOSING, 0, 0);
3471 nes_cm_disconn(nesqp);
3472 break;
3410 3473
3411 case NES_AEQE_AEID_LLP_CONNECTION_RESET:
3412 case NES_AEQE_AEID_RESET_SENT: 3474 case NES_AEQE_AEID_RESET_SENT:
3413 nesqp = (struct nes_qp *)(unsigned long)context; 3475 tcp_state = NES_AEQE_TCP_STATE_CLOSED;
3414 if (async_event_id == NES_AEQE_AEID_RESET_SENT) {
3415 tcp_state = NES_AEQE_TCP_STATE_CLOSED;
3416 }
3417 spin_lock_irqsave(&nesqp->lock, flags); 3476 spin_lock_irqsave(&nesqp->lock, flags);
3418 nesqp->hw_iwarp_state = iwarp_state; 3477 nesqp->hw_iwarp_state = iwarp_state;
3419 nesqp->hw_tcp_state = tcp_state; 3478 nesqp->hw_tcp_state = tcp_state;
3420 nesqp->last_aeq = async_event_id; 3479 nesqp->last_aeq = async_event_id;
3421 3480 nesqp->hte_added = 0;
3422 if ((tcp_state == NES_AEQE_TCP_STATE_CLOSED) ||
3423 (tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT)) {
3424 nesqp->hte_added = 0;
3425 next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE;
3426 }
3427
3428 if ((nesqp->ibqp_state == IB_QPS_RTS) &&
3429 ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
3430 (async_event_id == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
3431 switch (nesqp->hw_iwarp_state) {
3432 case NES_AEQE_IWARP_STATE_RTS:
3433 next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING;
3434 nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
3435 break;
3436 case NES_AEQE_IWARP_STATE_TERMINATE:
3437 must_disconn = 0; /* terminate path takes care of disconn */
3438 if (nesqp->term_flags == 0)
3439 must_terminate = 1;
3440 break;
3441 }
3442 } else {
3443 if (async_event_id == NES_AEQE_AEID_LLP_FIN_RECEIVED) {
3444 /* FIN Received but ib state not RTS,
3445 close complete will be on its way */
3446 must_disconn = 0;
3447 }
3448 }
3449 spin_unlock_irqrestore(&nesqp->lock, flags); 3481 spin_unlock_irqrestore(&nesqp->lock, flags);
3482 next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE;
3483 nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0);
3484 nes_cm_disconn(nesqp);
3485 break;
3450 3486
3451 if (must_terminate) 3487 case NES_AEQE_AEID_LLP_CONNECTION_RESET:
3452 nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL); 3488 if (atomic_read(&nesqp->close_timer_started))
3453 else if (must_disconn) { 3489 return;
3454 if (next_iwarp_state) { 3490 spin_lock_irqsave(&nesqp->lock, flags);
3455 nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X\n", 3491 nesqp->hw_iwarp_state = iwarp_state;
3456 nesqp->hwqp.qp_id, next_iwarp_state); 3492 nesqp->hw_tcp_state = tcp_state;
3457 nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0); 3493 nesqp->last_aeq = async_event_id;
3458 } 3494 spin_unlock_irqrestore(&nesqp->lock, flags);
3459 nes_cm_disconn(nesqp); 3495 nes_cm_disconn(nesqp);
3460 }
3461 break; 3496 break;
3462 3497
3463 case NES_AEQE_AEID_TERMINATE_SENT: 3498 case NES_AEQE_AEID_TERMINATE_SENT:
3464 nesqp = (struct nes_qp *)(unsigned long)context;
3465 nes_terminate_send_fin(nesdev, nesqp, aeqe); 3499 nes_terminate_send_fin(nesdev, nesqp, aeqe);
3466 break; 3500 break;
3467 3501
3468 case NES_AEQE_AEID_LLP_TERMINATE_RECEIVED: 3502 case NES_AEQE_AEID_LLP_TERMINATE_RECEIVED:
3469 nesqp = (struct nes_qp *)(unsigned long)context;
3470 nes_terminate_received(nesdev, nesqp, aeqe); 3503 nes_terminate_received(nesdev, nesqp, aeqe);
3471 break; 3504 break;
3472 3505
@@ -3480,7 +3513,8 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
3480 case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER: 3513 case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
3481 case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION: 3514 case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION:
3482 case NES_AEQE_AEID_AMP_TO_WRAP: 3515 case NES_AEQE_AEID_AMP_TO_WRAP:
3483 nesqp = (struct nes_qp *)(unsigned long)context; 3516 printk(KERN_ERR PFX "QP[%u] async_event_id=0x%04X IB_EVENT_QP_ACCESS_ERR\n",
3517 nesqp->hwqp.qp_id, async_event_id);
3484 nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_ACCESS_ERR); 3518 nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_ACCESS_ERR);
3485 break; 3519 break;
3486 3520
@@ -3488,7 +3522,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
3488 case NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL: 3522 case NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL:
3489 case NES_AEQE_AEID_DDP_UBE_INVALID_MO: 3523 case NES_AEQE_AEID_DDP_UBE_INVALID_MO:
3490 case NES_AEQE_AEID_DDP_UBE_INVALID_QN: 3524 case NES_AEQE_AEID_DDP_UBE_INVALID_QN:
3491 nesqp = (struct nes_qp *)(unsigned long)context;
3492 if (iwarp_opcode(nesqp, aeq_info) > IWARP_OPCODE_TERM) { 3525 if (iwarp_opcode(nesqp, aeq_info) > IWARP_OPCODE_TERM) {
3493 aeq_info &= 0xffff0000; 3526 aeq_info &= 0xffff0000;
3494 aeq_info |= NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE; 3527 aeq_info |= NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE;
@@ -3530,7 +3563,8 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
3530 case NES_AEQE_AEID_STAG_ZERO_INVALID: 3563 case NES_AEQE_AEID_STAG_ZERO_INVALID:
3531 case NES_AEQE_AEID_ROE_INVALID_RDMA_READ_REQUEST: 3564 case NES_AEQE_AEID_ROE_INVALID_RDMA_READ_REQUEST:
3532 case NES_AEQE_AEID_ROE_INVALID_RDMA_WRITE_OR_READ_RESP: 3565 case NES_AEQE_AEID_ROE_INVALID_RDMA_WRITE_OR_READ_RESP:
3533 nesqp = (struct nes_qp *)(unsigned long)context; 3566 printk(KERN_ERR PFX "QP[%u] async_event_id=0x%04X IB_EVENT_QP_FATAL\n",
3567 nesqp->hwqp.qp_id, async_event_id);
3534 nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL); 3568 nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL);
3535 break; 3569 break;
3536 3570
diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h
index 084be0ee689b..9b1e7f869d83 100644
--- a/drivers/infiniband/hw/nes/nes_hw.h
+++ b/drivers/infiniband/hw/nes/nes_hw.h
@@ -37,12 +37,12 @@
37 37
38#define NES_PHY_TYPE_CX4 1 38#define NES_PHY_TYPE_CX4 1
39#define NES_PHY_TYPE_1G 2 39#define NES_PHY_TYPE_1G 2
40#define NES_PHY_TYPE_IRIS 3
41#define NES_PHY_TYPE_ARGUS 4 40#define NES_PHY_TYPE_ARGUS 4
42#define NES_PHY_TYPE_PUMA_1G 5 41#define NES_PHY_TYPE_PUMA_1G 5
43#define NES_PHY_TYPE_PUMA_10G 6 42#define NES_PHY_TYPE_PUMA_10G 6
44#define NES_PHY_TYPE_GLADIUS 7 43#define NES_PHY_TYPE_GLADIUS 7
45#define NES_PHY_TYPE_SFP_D 8 44#define NES_PHY_TYPE_SFP_D 8
45#define NES_PHY_TYPE_KR 9
46 46
47#define NES_MULTICAST_PF_MAX 8 47#define NES_MULTICAST_PF_MAX 8
48 48
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index 9384f5d3d33b..a1d79b6856ac 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -1243,8 +1243,8 @@ static void nes_netdev_get_ethtool_stats(struct net_device *netdev,
1243 target_stat_values[++index] = cm_packets_received; 1243 target_stat_values[++index] = cm_packets_received;
1244 target_stat_values[++index] = cm_packets_dropped; 1244 target_stat_values[++index] = cm_packets_dropped;
1245 target_stat_values[++index] = cm_packets_retrans; 1245 target_stat_values[++index] = cm_packets_retrans;
1246 target_stat_values[++index] = cm_listens_created; 1246 target_stat_values[++index] = atomic_read(&cm_listens_created);
1247 target_stat_values[++index] = cm_listens_destroyed; 1247 target_stat_values[++index] = atomic_read(&cm_listens_destroyed);
1248 target_stat_values[++index] = cm_backlog_drops; 1248 target_stat_values[++index] = cm_backlog_drops;
1249 target_stat_values[++index] = atomic_read(&cm_loopbacks); 1249 target_stat_values[++index] = atomic_read(&cm_loopbacks);
1250 target_stat_values[++index] = atomic_read(&cm_nodes_created); 1250 target_stat_values[++index] = atomic_read(&cm_nodes_created);
@@ -1474,9 +1474,9 @@ static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd
1474 } 1474 }
1475 return 0; 1475 return 0;
1476 } 1476 }
1477 if ((phy_type == NES_PHY_TYPE_IRIS) || 1477 if ((phy_type == NES_PHY_TYPE_ARGUS) ||
1478 (phy_type == NES_PHY_TYPE_ARGUS) || 1478 (phy_type == NES_PHY_TYPE_SFP_D) ||
1479 (phy_type == NES_PHY_TYPE_SFP_D)) { 1479 (phy_type == NES_PHY_TYPE_KR)) {
1480 et_cmd->transceiver = XCVR_EXTERNAL; 1480 et_cmd->transceiver = XCVR_EXTERNAL;
1481 et_cmd->port = PORT_FIBRE; 1481 et_cmd->port = PORT_FIBRE;
1482 et_cmd->supported = SUPPORTED_FIBRE; 1482 et_cmd->supported = SUPPORTED_FIBRE;
@@ -1596,8 +1596,7 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
1596 struct net_device *netdev; 1596 struct net_device *netdev;
1597 struct nic_qp_map *curr_qp_map; 1597 struct nic_qp_map *curr_qp_map;
1598 u32 u32temp; 1598 u32 u32temp;
1599 u16 phy_data; 1599 u8 phy_type = nesdev->nesadapter->phy_type[nesdev->mac_index];
1600 u16 temp_phy_data;
1601 1600
1602 netdev = alloc_etherdev(sizeof(struct nes_vnic)); 1601 netdev = alloc_etherdev(sizeof(struct nes_vnic));
1603 if (!netdev) { 1602 if (!netdev) {
@@ -1705,65 +1704,23 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
1705 1704
1706 if ((nesdev->netdev_count == 0) && 1705 if ((nesdev->netdev_count == 0) &&
1707 ((PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index) || 1706 ((PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index) ||
1708 ((nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) && 1707 ((phy_type == NES_PHY_TYPE_PUMA_1G) &&
1709 (((PCI_FUNC(nesdev->pcidev->devfn) == 1) && (nesdev->mac_index == 2)) || 1708 (((PCI_FUNC(nesdev->pcidev->devfn) == 1) && (nesdev->mac_index == 2)) ||
1710 ((PCI_FUNC(nesdev->pcidev->devfn) == 2) && (nesdev->mac_index == 1)))))) { 1709 ((PCI_FUNC(nesdev->pcidev->devfn) == 2) && (nesdev->mac_index == 1)))))) {
1711 /*
1712 * nes_debug(NES_DBG_INIT, "Setting up PHY interrupt mask. Using register index 0x%04X\n",
1713 * NES_IDX_PHY_PCS_CONTROL_STATUS0 + (0x200 * (nesvnic->logical_port & 1)));
1714 */
1715 u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + 1710 u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
1716 (0x200 * (nesdev->mac_index & 1))); 1711 (0x200 * (nesdev->mac_index & 1)));
1717 if (nesdev->nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_PUMA_1G) { 1712 if (phy_type != NES_PHY_TYPE_PUMA_1G) {
1718 u32temp |= 0x00200000; 1713 u32temp |= 0x00200000;
1719 nes_write_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + 1714 nes_write_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
1720 (0x200 * (nesdev->mac_index & 1)), u32temp); 1715 (0x200 * (nesdev->mac_index & 1)), u32temp);
1721 } 1716 }
1722 1717
1723 u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
1724 (0x200 * (nesdev->mac_index & 1)));
1725
1726 if ((u32temp&0x0f1f0000) == 0x0f0f0000) {
1727 if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_IRIS) {
1728 nes_init_phy(nesdev);
1729 nes_read_10G_phy_reg(nesdev, nesdev->nesadapter->phy_index[nesdev->mac_index], 1, 1);
1730 temp_phy_data = (u16)nes_read_indexed(nesdev,
1731 NES_IDX_MAC_MDIO_CONTROL);
1732 u32temp = 20;
1733 do {
1734 nes_read_10G_phy_reg(nesdev, nesdev->nesadapter->phy_index[nesdev->mac_index], 1, 1);
1735 phy_data = (u16)nes_read_indexed(nesdev,
1736 NES_IDX_MAC_MDIO_CONTROL);
1737 if ((phy_data == temp_phy_data) || (!(--u32temp)))
1738 break;
1739 temp_phy_data = phy_data;
1740 } while (1);
1741 if (phy_data & 4) {
1742 nes_debug(NES_DBG_INIT, "The Link is UP!!.\n");
1743 nesvnic->linkup = 1;
1744 } else {
1745 nes_debug(NES_DBG_INIT, "The Link is DOWN!!.\n");
1746 }
1747 } else {
1748 nes_debug(NES_DBG_INIT, "The Link is UP!!.\n");
1749 nesvnic->linkup = 1;
1750 }
1751 } else if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) {
1752 nes_debug(NES_DBG_INIT, "mac_index=%d, logical_port=%d, u32temp=0x%04X, PCI_FUNC=%d\n",
1753 nesdev->mac_index, nesvnic->logical_port, u32temp, PCI_FUNC(nesdev->pcidev->devfn));
1754 if (((nesdev->mac_index < 2) && ((u32temp&0x01010000) == 0x01010000)) ||
1755 ((nesdev->mac_index > 1) && ((u32temp&0x02020000) == 0x02020000))) {
1756 nes_debug(NES_DBG_INIT, "The Link is UP!!.\n");
1757 nesvnic->linkup = 1;
1758 }
1759 }
1760 /* clear the MAC interrupt status, assumes direct logical to physical mapping */ 1718 /* clear the MAC interrupt status, assumes direct logical to physical mapping */
1761 u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index)); 1719 u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index));
1762 nes_debug(NES_DBG_INIT, "Phy interrupt status = 0x%X.\n", u32temp); 1720 nes_debug(NES_DBG_INIT, "Phy interrupt status = 0x%X.\n", u32temp);
1763 nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index), u32temp); 1721 nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index), u32temp);
1764 1722
1765 if (nesdev->nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_IRIS) 1723 nes_init_phy(nesdev);
1766 nes_init_phy(nesdev);
1767 1724
1768 } 1725 }
1769 1726
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 64d3136e3747..815725f886c4 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -228,7 +228,7 @@ static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw,
228 /* Check for SQ overflow */ 228 /* Check for SQ overflow */
229 if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) { 229 if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) {
230 spin_unlock_irqrestore(&nesqp->lock, flags); 230 spin_unlock_irqrestore(&nesqp->lock, flags);
231 return -EINVAL; 231 return -ENOMEM;
232 } 232 }
233 233
234 wqe = &nesqp->hwqp.sq_vbase[head]; 234 wqe = &nesqp->hwqp.sq_vbase[head];
@@ -3294,7 +3294,7 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
3294 3294
3295 /* Check for SQ overflow */ 3295 /* Check for SQ overflow */
3296 if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) { 3296 if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) {
3297 err = -EINVAL; 3297 err = -ENOMEM;
3298 break; 3298 break;
3299 } 3299 }
3300 3300
@@ -3577,7 +3577,7 @@ static int nes_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
3577 } 3577 }
3578 /* Check for RQ overflow */ 3578 /* Check for RQ overflow */
3579 if (((head + (2 * qsize) - nesqp->hwqp.rq_tail) % qsize) == (qsize - 1)) { 3579 if (((head + (2 * qsize) - nesqp->hwqp.rq_tail) % qsize) == (qsize - 1)) {
3580 err = -EINVAL; 3580 err = -ENOMEM;
3581 break; 3581 break;
3582 } 3582 }
3583 3583
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index e9795f60e5d6..d10b4ec68d28 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -55,9 +55,7 @@ static int ipoib_get_coalesce(struct net_device *dev,
55 struct ipoib_dev_priv *priv = netdev_priv(dev); 55 struct ipoib_dev_priv *priv = netdev_priv(dev);
56 56
57 coal->rx_coalesce_usecs = priv->ethtool.coalesce_usecs; 57 coal->rx_coalesce_usecs = priv->ethtool.coalesce_usecs;
58 coal->tx_coalesce_usecs = priv->ethtool.coalesce_usecs;
59 coal->rx_max_coalesced_frames = priv->ethtool.max_coalesced_frames; 58 coal->rx_max_coalesced_frames = priv->ethtool.max_coalesced_frames;
60 coal->tx_max_coalesced_frames = priv->ethtool.max_coalesced_frames;
61 59
62 return 0; 60 return 0;
63} 61}
@@ -69,10 +67,8 @@ static int ipoib_set_coalesce(struct net_device *dev,
69 int ret; 67 int ret;
70 68
71 /* 69 /*
72 * Since IPoIB uses a single CQ for both rx and tx, we assume 70 * These values are saved in the private data and returned
73 * that rx params dictate the configuration. These values are 71 * when ipoib_get_coalesce() is called
74 * saved in the private data and returned when ipoib_get_coalesce()
75 * is called.
76 */ 72 */
77 if (coal->rx_coalesce_usecs > 0xffff || 73 if (coal->rx_coalesce_usecs > 0xffff ||
78 coal->rx_max_coalesced_frames > 0xffff) 74 coal->rx_max_coalesced_frames > 0xffff)
@@ -85,8 +81,6 @@ static int ipoib_set_coalesce(struct net_device *dev,
85 return ret; 81 return ret;
86 } 82 }
87 83
88 coal->tx_coalesce_usecs = coal->rx_coalesce_usecs;
89 coal->tx_max_coalesced_frames = coal->rx_max_coalesced_frames;
90 priv->ethtool.coalesce_usecs = coal->rx_coalesce_usecs; 84 priv->ethtool.coalesce_usecs = coal->rx_coalesce_usecs;
91 priv->ethtool.max_coalesced_frames = coal->rx_max_coalesced_frames; 85 priv->ethtool.max_coalesced_frames = coal->rx_max_coalesced_frames;
92 86
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 5f7a6fca0a4d..71237f8f78f7 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -128,6 +128,28 @@ static int iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
128 return 0; 128 return 0;
129} 129}
130 130
131int iser_initialize_task_headers(struct iscsi_task *task,
132 struct iser_tx_desc *tx_desc)
133{
134 struct iscsi_iser_conn *iser_conn = task->conn->dd_data;
135 struct iser_device *device = iser_conn->ib_conn->device;
136 struct iscsi_iser_task *iser_task = task->dd_data;
137 u64 dma_addr;
138
139 dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc,
140 ISER_HEADERS_LEN, DMA_TO_DEVICE);
141 if (ib_dma_mapping_error(device->ib_device, dma_addr))
142 return -ENOMEM;
143
144 tx_desc->dma_addr = dma_addr;
145 tx_desc->tx_sg[0].addr = tx_desc->dma_addr;
146 tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
147 tx_desc->tx_sg[0].lkey = device->mr->lkey;
148
149 iser_task->headers_initialized = 1;
150 iser_task->iser_conn = iser_conn;
151 return 0;
152}
131/** 153/**
132 * iscsi_iser_task_init - Initialize task 154 * iscsi_iser_task_init - Initialize task
133 * @task: iscsi task 155 * @task: iscsi task
@@ -137,17 +159,17 @@ static int iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
137static int 159static int
138iscsi_iser_task_init(struct iscsi_task *task) 160iscsi_iser_task_init(struct iscsi_task *task)
139{ 161{
140 struct iscsi_iser_conn *iser_conn = task->conn->dd_data;
141 struct iscsi_iser_task *iser_task = task->dd_data; 162 struct iscsi_iser_task *iser_task = task->dd_data;
142 163
164 if (!iser_task->headers_initialized)
165 if (iser_initialize_task_headers(task, &iser_task->desc))
166 return -ENOMEM;
167
143 /* mgmt task */ 168 /* mgmt task */
144 if (!task->sc) { 169 if (!task->sc)
145 iser_task->desc.data = task->data;
146 return 0; 170 return 0;
147 }
148 171
149 iser_task->command_sent = 0; 172 iser_task->command_sent = 0;
150 iser_task->iser_conn = iser_conn;
151 iser_task_rdma_init(iser_task); 173 iser_task_rdma_init(iser_task);
152 return 0; 174 return 0;
153} 175}
@@ -168,7 +190,7 @@ iscsi_iser_mtask_xmit(struct iscsi_conn *conn, struct iscsi_task *task)
168{ 190{
169 int error = 0; 191 int error = 0;
170 192
171 iser_dbg("task deq [cid %d itt 0x%x]\n", conn->id, task->itt); 193 iser_dbg("mtask xmit [cid %d itt 0x%x]\n", conn->id, task->itt);
172 194
173 error = iser_send_control(conn, task); 195 error = iser_send_control(conn, task);
174 196
@@ -178,9 +200,6 @@ iscsi_iser_mtask_xmit(struct iscsi_conn *conn, struct iscsi_task *task)
178 * - if yes, the task is recycled at iscsi_complete_pdu 200 * - if yes, the task is recycled at iscsi_complete_pdu
179 * - if no, the task is recycled at iser_snd_completion 201 * - if no, the task is recycled at iser_snd_completion
180 */ 202 */
181 if (error && error != -ENOBUFS)
182 iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
183
184 return error; 203 return error;
185} 204}
186 205
@@ -232,7 +251,7 @@ iscsi_iser_task_xmit(struct iscsi_task *task)
232 task->imm_count, task->unsol_r2t.data_length); 251 task->imm_count, task->unsol_r2t.data_length);
233 } 252 }
234 253
235 iser_dbg("task deq [cid %d itt 0x%x]\n", 254 iser_dbg("ctask xmit [cid %d itt 0x%x]\n",
236 conn->id, task->itt); 255 conn->id, task->itt);
237 256
238 /* Send the cmd PDU */ 257 /* Send the cmd PDU */
@@ -248,8 +267,6 @@ iscsi_iser_task_xmit(struct iscsi_task *task)
248 error = iscsi_iser_task_xmit_unsol_data(conn, task); 267 error = iscsi_iser_task_xmit_unsol_data(conn, task);
249 268
250 iscsi_iser_task_xmit_exit: 269 iscsi_iser_task_xmit_exit:
251 if (error && error != -ENOBUFS)
252 iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
253 return error; 270 return error;
254} 271}
255 272
@@ -283,7 +300,7 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
283 * due to issues with the login code re iser sematics 300 * due to issues with the login code re iser sematics
284 * this not set in iscsi_conn_setup - FIXME 301 * this not set in iscsi_conn_setup - FIXME
285 */ 302 */
286 conn->max_recv_dlength = 128; 303 conn->max_recv_dlength = ISER_RECV_DATA_SEG_LEN;
287 304
288 iser_conn = conn->dd_data; 305 iser_conn = conn->dd_data;
289 conn->dd_data = iser_conn; 306 conn->dd_data = iser_conn;
@@ -401,7 +418,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
401 struct Scsi_Host *shost; 418 struct Scsi_Host *shost;
402 struct iser_conn *ib_conn; 419 struct iser_conn *ib_conn;
403 420
404 shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 1); 421 shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0);
405 if (!shost) 422 if (!shost)
406 return NULL; 423 return NULL;
407 shost->transportt = iscsi_iser_scsi_transport; 424 shost->transportt = iscsi_iser_scsi_transport;
@@ -675,7 +692,7 @@ static int __init iser_init(void)
675 memset(&ig, 0, sizeof(struct iser_global)); 692 memset(&ig, 0, sizeof(struct iser_global));
676 693
677 ig.desc_cache = kmem_cache_create("iser_descriptors", 694 ig.desc_cache = kmem_cache_create("iser_descriptors",
678 sizeof (struct iser_desc), 695 sizeof(struct iser_tx_desc),
679 0, SLAB_HWCACHE_ALIGN, 696 0, SLAB_HWCACHE_ALIGN,
680 NULL); 697 NULL);
681 if (ig.desc_cache == NULL) 698 if (ig.desc_cache == NULL)
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 9d529cae1f0d..036934cdcb92 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -102,9 +102,9 @@
102#define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), * 102#define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), *
103 * SCSI_TMFUNC(2), LOGOUT(1) */ 103 * SCSI_TMFUNC(2), LOGOUT(1) */
104 104
105#define ISER_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX + \ 105#define ISER_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX)
106 ISER_MAX_RX_MISC_PDUS + \ 106
107 ISER_MAX_TX_MISC_PDUS) 107#define ISER_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
108 108
109/* the max TX (send) WR supported by the iSER QP is defined by * 109/* the max TX (send) WR supported by the iSER QP is defined by *
110 * max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect * 110 * max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect *
@@ -132,6 +132,12 @@ struct iser_hdr {
132 __be64 read_va; 132 __be64 read_va;
133} __attribute__((packed)); 133} __attribute__((packed));
134 134
135/* Constant PDU lengths calculations */
136#define ISER_HEADERS_LEN (sizeof(struct iser_hdr) + sizeof(struct iscsi_hdr))
137
138#define ISER_RECV_DATA_SEG_LEN 128
139#define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
140#define ISER_RX_LOGIN_SIZE (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN)
135 141
136/* Length of an object name string */ 142/* Length of an object name string */
137#define ISER_OBJECT_NAME_SIZE 64 143#define ISER_OBJECT_NAME_SIZE 64
@@ -187,51 +193,43 @@ struct iser_regd_buf {
187 struct iser_mem_reg reg; /* memory registration info */ 193 struct iser_mem_reg reg; /* memory registration info */
188 void *virt_addr; 194 void *virt_addr;
189 struct iser_device *device; /* device->device for dma_unmap */ 195 struct iser_device *device; /* device->device for dma_unmap */
190 u64 dma_addr; /* if non zero, addr for dma_unmap */
191 enum dma_data_direction direction; /* direction for dma_unmap */ 196 enum dma_data_direction direction; /* direction for dma_unmap */
192 unsigned int data_size; 197 unsigned int data_size;
193 atomic_t ref_count; /* refcount, freed when dec to 0 */
194};
195
196#define MAX_REGD_BUF_VECTOR_LEN 2
197
198struct iser_dto {
199 struct iscsi_iser_task *task;
200 struct iser_conn *ib_conn;
201 int notify_enable;
202
203 /* vector of registered buffers */
204 unsigned int regd_vector_len;
205 struct iser_regd_buf *regd[MAX_REGD_BUF_VECTOR_LEN];
206
207 /* offset into the registered buffer may be specified */
208 unsigned int offset[MAX_REGD_BUF_VECTOR_LEN];
209
210 /* a smaller size may be specified, if 0, then full size is used */
211 unsigned int used_sz[MAX_REGD_BUF_VECTOR_LEN];
212}; 198};
213 199
214enum iser_desc_type { 200enum iser_desc_type {
215 ISCSI_RX,
216 ISCSI_TX_CONTROL , 201 ISCSI_TX_CONTROL ,
217 ISCSI_TX_SCSI_COMMAND, 202 ISCSI_TX_SCSI_COMMAND,
218 ISCSI_TX_DATAOUT 203 ISCSI_TX_DATAOUT
219}; 204};
220 205
221struct iser_desc { 206struct iser_tx_desc {
222 struct iser_hdr iser_header; 207 struct iser_hdr iser_header;
223 struct iscsi_hdr iscsi_header; 208 struct iscsi_hdr iscsi_header;
224 struct iser_regd_buf hdr_regd_buf;
225 void *data; /* used by RX & TX_CONTROL */
226 struct iser_regd_buf data_regd_buf; /* used by RX & TX_CONTROL */
227 enum iser_desc_type type; 209 enum iser_desc_type type;
228 struct iser_dto dto; 210 u64 dma_addr;
211 /* sg[0] points to iser/iscsi headers, sg[1] optionally points to either
212 of immediate data, unsolicited data-out or control (login,text) */
213 struct ib_sge tx_sg[2];
214 int num_sge;
229}; 215};
230 216
217#define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \
218 sizeof(u64) + sizeof(struct ib_sge)))
219struct iser_rx_desc {
220 struct iser_hdr iser_header;
221 struct iscsi_hdr iscsi_header;
222 char data[ISER_RECV_DATA_SEG_LEN];
223 u64 dma_addr;
224 struct ib_sge rx_sg;
225 char pad[ISER_RX_PAD_SIZE];
226} __attribute__((packed));
227
231struct iser_device { 228struct iser_device {
232 struct ib_device *ib_device; 229 struct ib_device *ib_device;
233 struct ib_pd *pd; 230 struct ib_pd *pd;
234 struct ib_cq *cq; 231 struct ib_cq *rx_cq;
232 struct ib_cq *tx_cq;
235 struct ib_mr *mr; 233 struct ib_mr *mr;
236 struct tasklet_struct cq_tasklet; 234 struct tasklet_struct cq_tasklet;
237 struct list_head ig_list; /* entry in ig devices list */ 235 struct list_head ig_list; /* entry in ig devices list */
@@ -250,15 +248,18 @@ struct iser_conn {
250 struct ib_fmr_pool *fmr_pool; /* pool of IB FMRs */ 248 struct ib_fmr_pool *fmr_pool; /* pool of IB FMRs */
251 int disc_evt_flag; /* disconn event delivered */ 249 int disc_evt_flag; /* disconn event delivered */
252 wait_queue_head_t wait; /* waitq for conn/disconn */ 250 wait_queue_head_t wait; /* waitq for conn/disconn */
253 atomic_t post_recv_buf_count; /* posted rx count */ 251 int post_recv_buf_count; /* posted rx count */
254 atomic_t post_send_buf_count; /* posted tx count */ 252 atomic_t post_send_buf_count; /* posted tx count */
255 atomic_t unexpected_pdu_count;/* count of received *
256 * unexpected pdus *
257 * not yet retired */
258 char name[ISER_OBJECT_NAME_SIZE]; 253 char name[ISER_OBJECT_NAME_SIZE];
259 struct iser_page_vec *page_vec; /* represents SG to fmr maps* 254 struct iser_page_vec *page_vec; /* represents SG to fmr maps*
260 * maps serialized as tx is*/ 255 * maps serialized as tx is*/
261 struct list_head conn_list; /* entry in ig conn list */ 256 struct list_head conn_list; /* entry in ig conn list */
257
258 char *login_buf;
259 u64 login_dma;
260 unsigned int rx_desc_head;
261 struct iser_rx_desc *rx_descs;
262 struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX];
262}; 263};
263 264
264struct iscsi_iser_conn { 265struct iscsi_iser_conn {
@@ -267,7 +268,7 @@ struct iscsi_iser_conn {
267}; 268};
268 269
269struct iscsi_iser_task { 270struct iscsi_iser_task {
270 struct iser_desc desc; 271 struct iser_tx_desc desc;
271 struct iscsi_iser_conn *iser_conn; 272 struct iscsi_iser_conn *iser_conn;
272 enum iser_task_status status; 273 enum iser_task_status status;
273 int command_sent; /* set if command sent */ 274 int command_sent; /* set if command sent */
@@ -275,6 +276,7 @@ struct iscsi_iser_task {
275 struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */ 276 struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */
276 struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/ 277 struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/
277 struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */ 278 struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */
279 int headers_initialized;
278}; 280};
279 281
280struct iser_page_vec { 282struct iser_page_vec {
@@ -322,22 +324,17 @@ void iser_conn_put(struct iser_conn *ib_conn);
322 324
323void iser_conn_terminate(struct iser_conn *ib_conn); 325void iser_conn_terminate(struct iser_conn *ib_conn);
324 326
325void iser_rcv_completion(struct iser_desc *desc, 327void iser_rcv_completion(struct iser_rx_desc *desc,
326 unsigned long dto_xfer_len); 328 unsigned long dto_xfer_len,
329 struct iser_conn *ib_conn);
327 330
328void iser_snd_completion(struct iser_desc *desc); 331void iser_snd_completion(struct iser_tx_desc *desc, struct iser_conn *ib_conn);
329 332
330void iser_task_rdma_init(struct iscsi_iser_task *task); 333void iser_task_rdma_init(struct iscsi_iser_task *task);
331 334
332void iser_task_rdma_finalize(struct iscsi_iser_task *task); 335void iser_task_rdma_finalize(struct iscsi_iser_task *task);
333 336
334void iser_dto_buffs_release(struct iser_dto *dto); 337void iser_free_rx_descriptors(struct iser_conn *ib_conn);
335
336int iser_regd_buff_release(struct iser_regd_buf *regd_buf);
337
338void iser_reg_single(struct iser_device *device,
339 struct iser_regd_buf *regd_buf,
340 enum dma_data_direction direction);
341 338
342void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task, 339void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task,
343 enum iser_data_dir cmd_dir); 340 enum iser_data_dir cmd_dir);
@@ -356,11 +353,9 @@ int iser_reg_page_vec(struct iser_conn *ib_conn,
356 353
357void iser_unreg_mem(struct iser_mem_reg *mem_reg); 354void iser_unreg_mem(struct iser_mem_reg *mem_reg);
358 355
359int iser_post_recv(struct iser_desc *rx_desc); 356int iser_post_recvl(struct iser_conn *ib_conn);
360int iser_post_send(struct iser_desc *tx_desc); 357int iser_post_recvm(struct iser_conn *ib_conn, int count);
361 358int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc);
362int iser_conn_state_comp(struct iser_conn *ib_conn,
363 enum iser_ib_conn_state comp);
364 359
365int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, 360int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
366 struct iser_data_buf *data, 361 struct iser_data_buf *data,
@@ -368,4 +363,6 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
368 enum dma_data_direction dma_dir); 363 enum dma_data_direction dma_dir);
369 364
370void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task); 365void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task);
366int iser_initialize_task_headers(struct iscsi_task *task,
367 struct iser_tx_desc *tx_desc);
371#endif 368#endif
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 9de640200ad3..0b9ef0716588 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -39,29 +39,6 @@
39 39
40#include "iscsi_iser.h" 40#include "iscsi_iser.h"
41 41
42/* Constant PDU lengths calculations */
43#define ISER_TOTAL_HEADERS_LEN (sizeof (struct iser_hdr) + \
44 sizeof (struct iscsi_hdr))
45
46/* iser_dto_add_regd_buff - increments the reference count for *
47 * the registered buffer & adds it to the DTO object */
48static void iser_dto_add_regd_buff(struct iser_dto *dto,
49 struct iser_regd_buf *regd_buf,
50 unsigned long use_offset,
51 unsigned long use_size)
52{
53 int add_idx;
54
55 atomic_inc(&regd_buf->ref_count);
56
57 add_idx = dto->regd_vector_len;
58 dto->regd[add_idx] = regd_buf;
59 dto->used_sz[add_idx] = use_size;
60 dto->offset[add_idx] = use_offset;
61
62 dto->regd_vector_len++;
63}
64
65/* Register user buffer memory and initialize passive rdma 42/* Register user buffer memory and initialize passive rdma
66 * dto descriptor. Total data size is stored in 43 * dto descriptor. Total data size is stored in
67 * iser_task->data[ISER_DIR_IN].data_len 44 * iser_task->data[ISER_DIR_IN].data_len
@@ -122,9 +99,9 @@ iser_prepare_write_cmd(struct iscsi_task *task,
122 struct iscsi_iser_task *iser_task = task->dd_data; 99 struct iscsi_iser_task *iser_task = task->dd_data;
123 struct iser_regd_buf *regd_buf; 100 struct iser_regd_buf *regd_buf;
124 int err; 101 int err;
125 struct iser_dto *send_dto = &iser_task->desc.dto;
126 struct iser_hdr *hdr = &iser_task->desc.iser_header; 102 struct iser_hdr *hdr = &iser_task->desc.iser_header;
127 struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT]; 103 struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT];
104 struct ib_sge *tx_dsg = &iser_task->desc.tx_sg[1];
128 105
129 err = iser_dma_map_task_data(iser_task, 106 err = iser_dma_map_task_data(iser_task,
130 buf_out, 107 buf_out,
@@ -163,135 +140,100 @@ iser_prepare_write_cmd(struct iscsi_task *task,
163 if (imm_sz > 0) { 140 if (imm_sz > 0) {
164 iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n", 141 iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n",
165 task->itt, imm_sz); 142 task->itt, imm_sz);
166 iser_dto_add_regd_buff(send_dto, 143 tx_dsg->addr = regd_buf->reg.va;
167 regd_buf, 144 tx_dsg->length = imm_sz;
168 0, 145 tx_dsg->lkey = regd_buf->reg.lkey;
169 imm_sz); 146 iser_task->desc.num_sge = 2;
170 } 147 }
171 148
172 return 0; 149 return 0;
173} 150}
174 151
175/** 152/* creates a new tx descriptor and adds header regd buffer */
176 * iser_post_receive_control - allocates, initializes and posts receive DTO. 153static void iser_create_send_desc(struct iser_conn *ib_conn,
177 */ 154 struct iser_tx_desc *tx_desc)
178static int iser_post_receive_control(struct iscsi_conn *conn)
179{ 155{
180 struct iscsi_iser_conn *iser_conn = conn->dd_data; 156 struct iser_device *device = ib_conn->device;
181 struct iser_desc *rx_desc;
182 struct iser_regd_buf *regd_hdr;
183 struct iser_regd_buf *regd_data;
184 struct iser_dto *recv_dto = NULL;
185 struct iser_device *device = iser_conn->ib_conn->device;
186 int rx_data_size, err;
187 int posts, outstanding_unexp_pdus;
188
189 /* for the login sequence we must support rx of upto 8K; login is done
190 * after conn create/bind (connect) and conn stop/bind (reconnect),
191 * what's common for both schemes is that the connection is not started
192 */
193 if (conn->c_stage != ISCSI_CONN_STARTED)
194 rx_data_size = ISCSI_DEF_MAX_RECV_SEG_LEN;
195 else /* FIXME till user space sets conn->max_recv_dlength correctly */
196 rx_data_size = 128;
197
198 outstanding_unexp_pdus =
199 atomic_xchg(&iser_conn->ib_conn->unexpected_pdu_count, 0);
200
201 /*
202 * in addition to the response buffer, replace those consumed by
203 * unexpected pdus.
204 */
205 for (posts = 0; posts < 1 + outstanding_unexp_pdus; posts++) {
206 rx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO);
207 if (rx_desc == NULL) {
208 iser_err("Failed to alloc desc for post recv %d\n",
209 posts);
210 err = -ENOMEM;
211 goto post_rx_cache_alloc_failure;
212 }
213 rx_desc->type = ISCSI_RX;
214 rx_desc->data = kmalloc(rx_data_size, GFP_NOIO);
215 if (rx_desc->data == NULL) {
216 iser_err("Failed to alloc data buf for post recv %d\n",
217 posts);
218 err = -ENOMEM;
219 goto post_rx_kmalloc_failure;
220 }
221
222 recv_dto = &rx_desc->dto;
223 recv_dto->ib_conn = iser_conn->ib_conn;
224 recv_dto->regd_vector_len = 0;
225 157
226 regd_hdr = &rx_desc->hdr_regd_buf; 158 ib_dma_sync_single_for_cpu(device->ib_device,
227 memset(regd_hdr, 0, sizeof(struct iser_regd_buf)); 159 tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
228 regd_hdr->device = device;
229 regd_hdr->virt_addr = rx_desc; /* == &rx_desc->iser_header */
230 regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN;
231 160
232 iser_reg_single(device, regd_hdr, DMA_FROM_DEVICE); 161 memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
233 162 tx_desc->iser_header.flags = ISER_VER;
234 iser_dto_add_regd_buff(recv_dto, regd_hdr, 0, 0);
235 163
236 regd_data = &rx_desc->data_regd_buf; 164 tx_desc->num_sge = 1;
237 memset(regd_data, 0, sizeof(struct iser_regd_buf));
238 regd_data->device = device;
239 regd_data->virt_addr = rx_desc->data;
240 regd_data->data_size = rx_data_size;
241 165
242 iser_reg_single(device, regd_data, DMA_FROM_DEVICE); 166 if (tx_desc->tx_sg[0].lkey != device->mr->lkey) {
167 tx_desc->tx_sg[0].lkey = device->mr->lkey;
168 iser_dbg("sdesc %p lkey mismatch, fixing\n", tx_desc);
169 }
170}
243 171
244 iser_dto_add_regd_buff(recv_dto, regd_data, 0, 0);
245 172
246 err = iser_post_recv(rx_desc); 173int iser_alloc_rx_descriptors(struct iser_conn *ib_conn)
247 if (err) { 174{
248 iser_err("Failed iser_post_recv for post %d\n", posts); 175 int i, j;
249 goto post_rx_post_recv_failure; 176 u64 dma_addr;
250 } 177 struct iser_rx_desc *rx_desc;
178 struct ib_sge *rx_sg;
179 struct iser_device *device = ib_conn->device;
180
181 ib_conn->rx_descs = kmalloc(ISER_QP_MAX_RECV_DTOS *
182 sizeof(struct iser_rx_desc), GFP_KERNEL);
183 if (!ib_conn->rx_descs)
184 goto rx_desc_alloc_fail;
185
186 rx_desc = ib_conn->rx_descs;
187
188 for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++) {
189 dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc,
190 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
191 if (ib_dma_mapping_error(device->ib_device, dma_addr))
192 goto rx_desc_dma_map_failed;
193
194 rx_desc->dma_addr = dma_addr;
195
196 rx_sg = &rx_desc->rx_sg;
197 rx_sg->addr = rx_desc->dma_addr;
198 rx_sg->length = ISER_RX_PAYLOAD_SIZE;
199 rx_sg->lkey = device->mr->lkey;
251 } 200 }
252 /* all posts successful */
253 return 0;
254 201
255post_rx_post_recv_failure: 202 ib_conn->rx_desc_head = 0;
256 iser_dto_buffs_release(recv_dto); 203 return 0;
257 kfree(rx_desc->data);
258post_rx_kmalloc_failure:
259 kmem_cache_free(ig.desc_cache, rx_desc);
260post_rx_cache_alloc_failure:
261 if (posts > 0) {
262 /*
263 * response buffer posted, but did not replace all unexpected
264 * pdu recv bufs. Ignore error, retry occurs next send
265 */
266 outstanding_unexp_pdus -= (posts - 1);
267 err = 0;
268 }
269 atomic_add(outstanding_unexp_pdus,
270 &iser_conn->ib_conn->unexpected_pdu_count);
271 204
272 return err; 205rx_desc_dma_map_failed:
206 rx_desc = ib_conn->rx_descs;
207 for (j = 0; j < i; j++, rx_desc++)
208 ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
209 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
210 kfree(ib_conn->rx_descs);
211 ib_conn->rx_descs = NULL;
212rx_desc_alloc_fail:
213 iser_err("failed allocating rx descriptors / data buffers\n");
214 return -ENOMEM;
273} 215}
274 216
275/* creates a new tx descriptor and adds header regd buffer */ 217void iser_free_rx_descriptors(struct iser_conn *ib_conn)
276static void iser_create_send_desc(struct iscsi_iser_conn *iser_conn,
277 struct iser_desc *tx_desc)
278{ 218{
279 struct iser_regd_buf *regd_hdr = &tx_desc->hdr_regd_buf; 219 int i;
280 struct iser_dto *send_dto = &tx_desc->dto; 220 struct iser_rx_desc *rx_desc;
221 struct iser_device *device = ib_conn->device;
281 222
282 memset(regd_hdr, 0, sizeof(struct iser_regd_buf)); 223 if (ib_conn->login_buf) {
283 regd_hdr->device = iser_conn->ib_conn->device; 224 ib_dma_unmap_single(device->ib_device, ib_conn->login_dma,
284 regd_hdr->virt_addr = tx_desc; /* == &tx_desc->iser_header */ 225 ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
285 regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN; 226 kfree(ib_conn->login_buf);
227 }
286 228
287 send_dto->ib_conn = iser_conn->ib_conn; 229 if (!ib_conn->rx_descs)
288 send_dto->notify_enable = 1; 230 return;
289 send_dto->regd_vector_len = 0;
290 231
291 memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr)); 232 rx_desc = ib_conn->rx_descs;
292 tx_desc->iser_header.flags = ISER_VER; 233 for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++)
293 234 ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
294 iser_dto_add_regd_buff(send_dto, regd_hdr, 0, 0); 235 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
236 kfree(ib_conn->rx_descs);
295} 237}
296 238
297/** 239/**
@@ -301,46 +243,23 @@ int iser_conn_set_full_featured_mode(struct iscsi_conn *conn)
301{ 243{
302 struct iscsi_iser_conn *iser_conn = conn->dd_data; 244 struct iscsi_iser_conn *iser_conn = conn->dd_data;
303 245
304 int i; 246 iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX);
305 /*
306 * FIXME this value should be declared to the target during login with
307 * the MaxOutstandingUnexpectedPDUs key when supported
308 */
309 int initial_post_recv_bufs_num = ISER_MAX_RX_MISC_PDUS;
310
311 iser_dbg("Initially post: %d\n", initial_post_recv_bufs_num);
312 247
313 /* Check that there is no posted recv or send buffers left - */ 248 /* Check that there is no posted recv or send buffers left - */
314 /* they must be consumed during the login phase */ 249 /* they must be consumed during the login phase */
315 BUG_ON(atomic_read(&iser_conn->ib_conn->post_recv_buf_count) != 0); 250 BUG_ON(iser_conn->ib_conn->post_recv_buf_count != 0);
316 BUG_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0); 251 BUG_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0);
317 252
318 /* Initial post receive buffers */ 253 if (iser_alloc_rx_descriptors(iser_conn->ib_conn))
319 for (i = 0; i < initial_post_recv_bufs_num; i++) { 254 return -ENOMEM;
320 if (iser_post_receive_control(conn) != 0) {
321 iser_err("Failed to post recv bufs at:%d conn:0x%p\n",
322 i, conn);
323 return -ENOMEM;
324 }
325 }
326 iser_dbg("Posted %d post recv bufs, conn:0x%p\n", i, conn);
327 return 0;
328}
329 255
330static int 256 /* Initial post receive buffers */
331iser_check_xmit(struct iscsi_conn *conn, void *task) 257 if (iser_post_recvm(iser_conn->ib_conn, ISER_MIN_POSTED_RX))
332{ 258 return -ENOMEM;
333 struct iscsi_iser_conn *iser_conn = conn->dd_data;
334 259
335 if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) ==
336 ISER_QP_MAX_REQ_DTOS) {
337 iser_dbg("%ld can't xmit task %p\n",jiffies,task);
338 return -ENOBUFS;
339 }
340 return 0; 260 return 0;
341} 261}
342 262
343
344/** 263/**
345 * iser_send_command - send command PDU 264 * iser_send_command - send command PDU
346 */ 265 */
@@ -349,27 +268,18 @@ int iser_send_command(struct iscsi_conn *conn,
349{ 268{
350 struct iscsi_iser_conn *iser_conn = conn->dd_data; 269 struct iscsi_iser_conn *iser_conn = conn->dd_data;
351 struct iscsi_iser_task *iser_task = task->dd_data; 270 struct iscsi_iser_task *iser_task = task->dd_data;
352 struct iser_dto *send_dto = NULL;
353 unsigned long edtl; 271 unsigned long edtl;
354 int err = 0; 272 int err;
355 struct iser_data_buf *data_buf; 273 struct iser_data_buf *data_buf;
356 struct iscsi_cmd *hdr = (struct iscsi_cmd *)task->hdr; 274 struct iscsi_cmd *hdr = (struct iscsi_cmd *)task->hdr;
357 struct scsi_cmnd *sc = task->sc; 275 struct scsi_cmnd *sc = task->sc;
358 276 struct iser_tx_desc *tx_desc = &iser_task->desc;
359 if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) {
360 iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn);
361 return -EPERM;
362 }
363 if (iser_check_xmit(conn, task))
364 return -ENOBUFS;
365 277
366 edtl = ntohl(hdr->data_length); 278 edtl = ntohl(hdr->data_length);
367 279
368 /* build the tx desc regd header and add it to the tx desc dto */ 280 /* build the tx desc regd header and add it to the tx desc dto */
369 iser_task->desc.type = ISCSI_TX_SCSI_COMMAND; 281 tx_desc->type = ISCSI_TX_SCSI_COMMAND;
370 send_dto = &iser_task->desc.dto; 282 iser_create_send_desc(iser_conn->ib_conn, tx_desc);
371 send_dto->task = iser_task;
372 iser_create_send_desc(iser_conn, &iser_task->desc);
373 283
374 if (hdr->flags & ISCSI_FLAG_CMD_READ) 284 if (hdr->flags & ISCSI_FLAG_CMD_READ)
375 data_buf = &iser_task->data[ISER_DIR_IN]; 285 data_buf = &iser_task->data[ISER_DIR_IN];
@@ -398,23 +308,13 @@ int iser_send_command(struct iscsi_conn *conn,
398 goto send_command_error; 308 goto send_command_error;
399 } 309 }
400 310
401 iser_reg_single(iser_conn->ib_conn->device,
402 send_dto->regd[0], DMA_TO_DEVICE);
403
404 if (iser_post_receive_control(conn) != 0) {
405 iser_err("post_recv failed!\n");
406 err = -ENOMEM;
407 goto send_command_error;
408 }
409
410 iser_task->status = ISER_TASK_STATUS_STARTED; 311 iser_task->status = ISER_TASK_STATUS_STARTED;
411 312
412 err = iser_post_send(&iser_task->desc); 313 err = iser_post_send(iser_conn->ib_conn, tx_desc);
413 if (!err) 314 if (!err)
414 return 0; 315 return 0;
415 316
416send_command_error: 317send_command_error:
417 iser_dto_buffs_release(send_dto);
418 iser_err("conn %p failed task->itt %d err %d\n",conn, task->itt, err); 318 iser_err("conn %p failed task->itt %d err %d\n",conn, task->itt, err);
419 return err; 319 return err;
420} 320}
@@ -428,20 +328,13 @@ int iser_send_data_out(struct iscsi_conn *conn,
428{ 328{
429 struct iscsi_iser_conn *iser_conn = conn->dd_data; 329 struct iscsi_iser_conn *iser_conn = conn->dd_data;
430 struct iscsi_iser_task *iser_task = task->dd_data; 330 struct iscsi_iser_task *iser_task = task->dd_data;
431 struct iser_desc *tx_desc = NULL; 331 struct iser_tx_desc *tx_desc = NULL;
432 struct iser_dto *send_dto = NULL; 332 struct iser_regd_buf *regd_buf;
433 unsigned long buf_offset; 333 unsigned long buf_offset;
434 unsigned long data_seg_len; 334 unsigned long data_seg_len;
435 uint32_t itt; 335 uint32_t itt;
436 int err = 0; 336 int err = 0;
437 337 struct ib_sge *tx_dsg;
438 if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) {
439 iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn);
440 return -EPERM;
441 }
442
443 if (iser_check_xmit(conn, task))
444 return -ENOBUFS;
445 338
446 itt = (__force uint32_t)hdr->itt; 339 itt = (__force uint32_t)hdr->itt;
447 data_seg_len = ntoh24(hdr->dlength); 340 data_seg_len = ntoh24(hdr->dlength);
@@ -450,28 +343,25 @@ int iser_send_data_out(struct iscsi_conn *conn,
450 iser_dbg("%s itt %d dseg_len %d offset %d\n", 343 iser_dbg("%s itt %d dseg_len %d offset %d\n",
451 __func__,(int)itt,(int)data_seg_len,(int)buf_offset); 344 __func__,(int)itt,(int)data_seg_len,(int)buf_offset);
452 345
453 tx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO); 346 tx_desc = kmem_cache_zalloc(ig.desc_cache, GFP_ATOMIC);
454 if (tx_desc == NULL) { 347 if (tx_desc == NULL) {
455 iser_err("Failed to alloc desc for post dataout\n"); 348 iser_err("Failed to alloc desc for post dataout\n");
456 return -ENOMEM; 349 return -ENOMEM;
457 } 350 }
458 351
459 tx_desc->type = ISCSI_TX_DATAOUT; 352 tx_desc->type = ISCSI_TX_DATAOUT;
353 tx_desc->iser_header.flags = ISER_VER;
460 memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr)); 354 memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr));
461 355
462 /* build the tx desc regd header and add it to the tx desc dto */ 356 /* build the tx desc */
463 send_dto = &tx_desc->dto; 357 iser_initialize_task_headers(task, tx_desc);
464 send_dto->task = iser_task;
465 iser_create_send_desc(iser_conn, tx_desc);
466
467 iser_reg_single(iser_conn->ib_conn->device,
468 send_dto->regd[0], DMA_TO_DEVICE);
469 358
470 /* all data was registered for RDMA, we can use the lkey */ 359 regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT];
471 iser_dto_add_regd_buff(send_dto, 360 tx_dsg = &tx_desc->tx_sg[1];
472 &iser_task->rdma_regd[ISER_DIR_OUT], 361 tx_dsg->addr = regd_buf->reg.va + buf_offset;
473 buf_offset, 362 tx_dsg->length = data_seg_len;
474 data_seg_len); 363 tx_dsg->lkey = regd_buf->reg.lkey;
364 tx_desc->num_sge = 2;
475 365
476 if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) { 366 if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) {
477 iser_err("Offset:%ld & DSL:%ld in Data-Out " 367 iser_err("Offset:%ld & DSL:%ld in Data-Out "
@@ -485,12 +375,11 @@ int iser_send_data_out(struct iscsi_conn *conn,
485 itt, buf_offset, data_seg_len); 375 itt, buf_offset, data_seg_len);
486 376
487 377
488 err = iser_post_send(tx_desc); 378 err = iser_post_send(iser_conn->ib_conn, tx_desc);
489 if (!err) 379 if (!err)
490 return 0; 380 return 0;
491 381
492send_data_out_error: 382send_data_out_error:
493 iser_dto_buffs_release(send_dto);
494 kmem_cache_free(ig.desc_cache, tx_desc); 383 kmem_cache_free(ig.desc_cache, tx_desc);
495 iser_err("conn %p failed err %d\n",conn, err); 384 iser_err("conn %p failed err %d\n",conn, err);
496 return err; 385 return err;
@@ -501,64 +390,44 @@ int iser_send_control(struct iscsi_conn *conn,
501{ 390{
502 struct iscsi_iser_conn *iser_conn = conn->dd_data; 391 struct iscsi_iser_conn *iser_conn = conn->dd_data;
503 struct iscsi_iser_task *iser_task = task->dd_data; 392 struct iscsi_iser_task *iser_task = task->dd_data;
504 struct iser_desc *mdesc = &iser_task->desc; 393 struct iser_tx_desc *mdesc = &iser_task->desc;
505 struct iser_dto *send_dto = NULL;
506 unsigned long data_seg_len; 394 unsigned long data_seg_len;
507 int err = 0; 395 int err = 0;
508 struct iser_regd_buf *regd_buf;
509 struct iser_device *device; 396 struct iser_device *device;
510 unsigned char opcode;
511
512 if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) {
513 iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn);
514 return -EPERM;
515 }
516
517 if (iser_check_xmit(conn, task))
518 return -ENOBUFS;
519 397
520 /* build the tx desc regd header and add it to the tx desc dto */ 398 /* build the tx desc regd header and add it to the tx desc dto */
521 mdesc->type = ISCSI_TX_CONTROL; 399 mdesc->type = ISCSI_TX_CONTROL;
522 send_dto = &mdesc->dto; 400 iser_create_send_desc(iser_conn->ib_conn, mdesc);
523 send_dto->task = NULL;
524 iser_create_send_desc(iser_conn, mdesc);
525 401
526 device = iser_conn->ib_conn->device; 402 device = iser_conn->ib_conn->device;
527 403
528 iser_reg_single(device, send_dto->regd[0], DMA_TO_DEVICE);
529
530 data_seg_len = ntoh24(task->hdr->dlength); 404 data_seg_len = ntoh24(task->hdr->dlength);
531 405
532 if (data_seg_len > 0) { 406 if (data_seg_len > 0) {
533 regd_buf = &mdesc->data_regd_buf; 407 struct ib_sge *tx_dsg = &mdesc->tx_sg[1];
534 memset(regd_buf, 0, sizeof(struct iser_regd_buf)); 408 if (task != conn->login_task) {
535 regd_buf->device = device; 409 iser_err("data present on non login task!!!\n");
536 regd_buf->virt_addr = task->data; 410 goto send_control_error;
537 regd_buf->data_size = task->data_count; 411 }
538 iser_reg_single(device, regd_buf, 412 memcpy(iser_conn->ib_conn->login_buf, task->data,
539 DMA_TO_DEVICE); 413 task->data_count);
540 iser_dto_add_regd_buff(send_dto, regd_buf, 414 tx_dsg->addr = iser_conn->ib_conn->login_dma;
541 0, 415 tx_dsg->length = data_seg_len;
542 data_seg_len); 416 tx_dsg->lkey = device->mr->lkey;
417 mdesc->num_sge = 2;
543 } 418 }
544 419
545 opcode = task->hdr->opcode & ISCSI_OPCODE_MASK; 420 if (task == conn->login_task) {
546 421 err = iser_post_recvl(iser_conn->ib_conn);
547 /* post recv buffer for response if one is expected */ 422 if (err)
548 if (!(opcode == ISCSI_OP_NOOP_OUT && task->hdr->itt == RESERVED_ITT)) {
549 if (iser_post_receive_control(conn) != 0) {
550 iser_err("post_rcv_buff failed!\n");
551 err = -ENOMEM;
552 goto send_control_error; 423 goto send_control_error;
553 }
554 } 424 }
555 425
556 err = iser_post_send(mdesc); 426 err = iser_post_send(iser_conn->ib_conn, mdesc);
557 if (!err) 427 if (!err)
558 return 0; 428 return 0;
559 429
560send_control_error: 430send_control_error:
561 iser_dto_buffs_release(send_dto);
562 iser_err("conn %p failed err %d\n",conn, err); 431 iser_err("conn %p failed err %d\n",conn, err);
563 return err; 432 return err;
564} 433}
@@ -566,104 +435,71 @@ send_control_error:
566/** 435/**
567 * iser_rcv_dto_completion - recv DTO completion 436 * iser_rcv_dto_completion - recv DTO completion
568 */ 437 */
569void iser_rcv_completion(struct iser_desc *rx_desc, 438void iser_rcv_completion(struct iser_rx_desc *rx_desc,
570 unsigned long dto_xfer_len) 439 unsigned long rx_xfer_len,
440 struct iser_conn *ib_conn)
571{ 441{
572 struct iser_dto *dto = &rx_desc->dto; 442 struct iscsi_iser_conn *conn = ib_conn->iser_conn;
573 struct iscsi_iser_conn *conn = dto->ib_conn->iser_conn;
574 struct iscsi_task *task;
575 struct iscsi_iser_task *iser_task;
576 struct iscsi_hdr *hdr; 443 struct iscsi_hdr *hdr;
577 char *rx_data = NULL; 444 u64 rx_dma;
578 int rx_data_len = 0; 445 int rx_buflen, outstanding, count, err;
579 unsigned char opcode; 446
580 447 /* differentiate between login to all other PDUs */
581 hdr = &rx_desc->iscsi_header; 448 if ((char *)rx_desc == ib_conn->login_buf) {
449 rx_dma = ib_conn->login_dma;
450 rx_buflen = ISER_RX_LOGIN_SIZE;
451 } else {
452 rx_dma = rx_desc->dma_addr;
453 rx_buflen = ISER_RX_PAYLOAD_SIZE;
454 }
582 455
583 iser_dbg("op 0x%x itt 0x%x\n", hdr->opcode,hdr->itt); 456 ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma,
457 rx_buflen, DMA_FROM_DEVICE);
584 458
585 if (dto_xfer_len > ISER_TOTAL_HEADERS_LEN) { /* we have data */ 459 hdr = &rx_desc->iscsi_header;
586 rx_data_len = dto_xfer_len - ISER_TOTAL_HEADERS_LEN;
587 rx_data = dto->regd[1]->virt_addr;
588 rx_data += dto->offset[1];
589 }
590 460
591 opcode = hdr->opcode & ISCSI_OPCODE_MASK; 461 iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
592 462 hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN));
593 if (opcode == ISCSI_OP_SCSI_CMD_RSP) {
594 spin_lock(&conn->iscsi_conn->session->lock);
595 task = iscsi_itt_to_ctask(conn->iscsi_conn, hdr->itt);
596 if (task)
597 __iscsi_get_task(task);
598 spin_unlock(&conn->iscsi_conn->session->lock);
599
600 if (!task)
601 iser_err("itt can't be matched to task!!! "
602 "conn %p opcode %d itt %d\n",
603 conn->iscsi_conn, opcode, hdr->itt);
604 else {
605 iser_task = task->dd_data;
606 iser_dbg("itt %d task %p\n",hdr->itt, task);
607 iser_task->status = ISER_TASK_STATUS_COMPLETED;
608 iser_task_rdma_finalize(iser_task);
609 iscsi_put_task(task);
610 }
611 }
612 iser_dto_buffs_release(dto);
613 463
614 iscsi_iser_recv(conn->iscsi_conn, hdr, rx_data, rx_data_len); 464 iscsi_iser_recv(conn->iscsi_conn, hdr,
465 rx_desc->data, rx_xfer_len - ISER_HEADERS_LEN);
615 466
616 kfree(rx_desc->data); 467 ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma,
617 kmem_cache_free(ig.desc_cache, rx_desc); 468 rx_buflen, DMA_FROM_DEVICE);
618 469
619 /* decrementing conn->post_recv_buf_count only --after-- freeing the * 470 /* decrementing conn->post_recv_buf_count only --after-- freeing the *
620 * task eliminates the need to worry on tasks which are completed in * 471 * task eliminates the need to worry on tasks which are completed in *
621 * parallel to the execution of iser_conn_term. So the code that waits * 472 * parallel to the execution of iser_conn_term. So the code that waits *
622 * for the posted rx bufs refcount to become zero handles everything */ 473 * for the posted rx bufs refcount to become zero handles everything */
623 atomic_dec(&conn->ib_conn->post_recv_buf_count); 474 conn->ib_conn->post_recv_buf_count--;
624 475
625 /* 476 if (rx_dma == ib_conn->login_dma)
626 * if an unexpected PDU was received then the recv wr consumed must 477 return;
627 * be replaced, this is done in the next send of a control-type PDU 478
628 */ 479 outstanding = ib_conn->post_recv_buf_count;
629 if (opcode == ISCSI_OP_NOOP_IN && hdr->itt == RESERVED_ITT) { 480 if (outstanding + ISER_MIN_POSTED_RX <= ISER_QP_MAX_RECV_DTOS) {
630 /* nop-in with itt = 0xffffffff */ 481 count = min(ISER_QP_MAX_RECV_DTOS - outstanding,
631 atomic_inc(&conn->ib_conn->unexpected_pdu_count); 482 ISER_MIN_POSTED_RX);
632 } 483 err = iser_post_recvm(ib_conn, count);
633 else if (opcode == ISCSI_OP_ASYNC_EVENT) { 484 if (err)
634 /* asyncronous message */ 485 iser_err("posting %d rx bufs err %d\n", count, err);
635 atomic_inc(&conn->ib_conn->unexpected_pdu_count);
636 } 486 }
637 /* a reject PDU consumes the recv buf posted for the response */
638} 487}
639 488
640void iser_snd_completion(struct iser_desc *tx_desc) 489void iser_snd_completion(struct iser_tx_desc *tx_desc,
490 struct iser_conn *ib_conn)
641{ 491{
642 struct iser_dto *dto = &tx_desc->dto;
643 struct iser_conn *ib_conn = dto->ib_conn;
644 struct iscsi_iser_conn *iser_conn = ib_conn->iser_conn;
645 struct iscsi_conn *conn = iser_conn->iscsi_conn;
646 struct iscsi_task *task; 492 struct iscsi_task *task;
647 int resume_tx = 0; 493 struct iser_device *device = ib_conn->device;
648
649 iser_dbg("Initiator, Data sent dto=0x%p\n", dto);
650
651 iser_dto_buffs_release(dto);
652 494
653 if (tx_desc->type == ISCSI_TX_DATAOUT) 495 if (tx_desc->type == ISCSI_TX_DATAOUT) {
496 ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
497 ISER_HEADERS_LEN, DMA_TO_DEVICE);
654 kmem_cache_free(ig.desc_cache, tx_desc); 498 kmem_cache_free(ig.desc_cache, tx_desc);
655 499 }
656 if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) ==
657 ISER_QP_MAX_REQ_DTOS)
658 resume_tx = 1;
659 500
660 atomic_dec(&ib_conn->post_send_buf_count); 501 atomic_dec(&ib_conn->post_send_buf_count);
661 502
662 if (resume_tx) {
663 iser_dbg("%ld resuming tx\n",jiffies);
664 iscsi_conn_queue_work(conn);
665 }
666
667 if (tx_desc->type == ISCSI_TX_CONTROL) { 503 if (tx_desc->type == ISCSI_TX_CONTROL) {
668 /* this arithmetic is legal by libiscsi dd_data allocation */ 504 /* this arithmetic is legal by libiscsi dd_data allocation */
669 task = (void *) ((long)(void *)tx_desc - 505 task = (void *) ((long)(void *)tx_desc -
@@ -692,7 +528,6 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
692 528
693void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) 529void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
694{ 530{
695 int deferred;
696 int is_rdma_aligned = 1; 531 int is_rdma_aligned = 1;
697 struct iser_regd_buf *regd; 532 struct iser_regd_buf *regd;
698 533
@@ -710,32 +545,17 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
710 545
711 if (iser_task->dir[ISER_DIR_IN]) { 546 if (iser_task->dir[ISER_DIR_IN]) {
712 regd = &iser_task->rdma_regd[ISER_DIR_IN]; 547 regd = &iser_task->rdma_regd[ISER_DIR_IN];
713 deferred = iser_regd_buff_release(regd); 548 if (regd->reg.is_fmr)
714 if (deferred) { 549 iser_unreg_mem(&regd->reg);
715 iser_err("%d references remain for BUF-IN rdma reg\n",
716 atomic_read(&regd->ref_count));
717 }
718 } 550 }
719 551
720 if (iser_task->dir[ISER_DIR_OUT]) { 552 if (iser_task->dir[ISER_DIR_OUT]) {
721 regd = &iser_task->rdma_regd[ISER_DIR_OUT]; 553 regd = &iser_task->rdma_regd[ISER_DIR_OUT];
722 deferred = iser_regd_buff_release(regd); 554 if (regd->reg.is_fmr)
723 if (deferred) { 555 iser_unreg_mem(&regd->reg);
724 iser_err("%d references remain for BUF-OUT rdma reg\n",
725 atomic_read(&regd->ref_count));
726 }
727 } 556 }
728 557
729 /* if the data was unaligned, it was already unmapped and then copied */ 558 /* if the data was unaligned, it was already unmapped and then copied */
730 if (is_rdma_aligned) 559 if (is_rdma_aligned)
731 iser_dma_unmap_task_data(iser_task); 560 iser_dma_unmap_task_data(iser_task);
732} 561}
733
734void iser_dto_buffs_release(struct iser_dto *dto)
735{
736 int i;
737
738 for (i = 0; i < dto->regd_vector_len; i++)
739 iser_regd_buff_release(dto->regd[i]);
740}
741
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 274c883ef3ea..fb88d6896b67 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -41,62 +41,6 @@
41#define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */ 41#define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */
42 42
43/** 43/**
44 * Decrements the reference count for the
45 * registered buffer & releases it
46 *
47 * returns 0 if released, 1 if deferred
48 */
49int iser_regd_buff_release(struct iser_regd_buf *regd_buf)
50{
51 struct ib_device *dev;
52
53 if ((atomic_read(&regd_buf->ref_count) == 0) ||
54 atomic_dec_and_test(&regd_buf->ref_count)) {
55 /* if we used the dma mr, unreg is just NOP */
56 if (regd_buf->reg.is_fmr)
57 iser_unreg_mem(&regd_buf->reg);
58
59 if (regd_buf->dma_addr) {
60 dev = regd_buf->device->ib_device;
61 ib_dma_unmap_single(dev,
62 regd_buf->dma_addr,
63 regd_buf->data_size,
64 regd_buf->direction);
65 }
66 /* else this regd buf is associated with task which we */
67 /* dma_unmap_single/sg later */
68 return 0;
69 } else {
70 iser_dbg("Release deferred, regd.buff: 0x%p\n", regd_buf);
71 return 1;
72 }
73}
74
75/**
76 * iser_reg_single - fills registered buffer descriptor with
77 * registration information
78 */
79void iser_reg_single(struct iser_device *device,
80 struct iser_regd_buf *regd_buf,
81 enum dma_data_direction direction)
82{
83 u64 dma_addr;
84
85 dma_addr = ib_dma_map_single(device->ib_device,
86 regd_buf->virt_addr,
87 regd_buf->data_size, direction);
88 BUG_ON(ib_dma_mapping_error(device->ib_device, dma_addr));
89
90 regd_buf->reg.lkey = device->mr->lkey;
91 regd_buf->reg.len = regd_buf->data_size;
92 regd_buf->reg.va = dma_addr;
93 regd_buf->reg.is_fmr = 0;
94
95 regd_buf->dma_addr = dma_addr;
96 regd_buf->direction = direction;
97}
98
99/**
100 * iser_start_rdma_unaligned_sg 44 * iser_start_rdma_unaligned_sg
101 */ 45 */
102static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, 46static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
@@ -109,10 +53,10 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
109 unsigned long cmd_data_len = data->data_len; 53 unsigned long cmd_data_len = data->data_len;
110 54
111 if (cmd_data_len > ISER_KMALLOC_THRESHOLD) 55 if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
112 mem = (void *)__get_free_pages(GFP_NOIO, 56 mem = (void *)__get_free_pages(GFP_ATOMIC,
113 ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT); 57 ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
114 else 58 else
115 mem = kmalloc(cmd_data_len, GFP_NOIO); 59 mem = kmalloc(cmd_data_len, GFP_ATOMIC);
116 60
117 if (mem == NULL) { 61 if (mem == NULL) {
118 iser_err("Failed to allocate mem size %d %d for copying sglist\n", 62 iser_err("Failed to allocate mem size %d %d for copying sglist\n",
@@ -474,9 +418,5 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task,
474 return err; 418 return err;
475 } 419 }
476 } 420 }
477
478 /* take a reference on this regd buf such that it will not be released *
479 * (eg in send dto completion) before we get the scsi response */
480 atomic_inc(&regd_buf->ref_count);
481 return 0; 421 return 0;
482} 422}
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 8579f32ce38e..308d17bb5146 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -37,9 +37,8 @@
37#include "iscsi_iser.h" 37#include "iscsi_iser.h"
38 38
39#define ISCSI_ISER_MAX_CONN 8 39#define ISCSI_ISER_MAX_CONN 8
40#define ISER_MAX_CQ_LEN ((ISER_QP_MAX_RECV_DTOS + \ 40#define ISER_MAX_RX_CQ_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN)
41 ISER_QP_MAX_REQ_DTOS) * \ 41#define ISER_MAX_TX_CQ_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN)
42 ISCSI_ISER_MAX_CONN)
43 42
44static void iser_cq_tasklet_fn(unsigned long data); 43static void iser_cq_tasklet_fn(unsigned long data);
45static void iser_cq_callback(struct ib_cq *cq, void *cq_context); 44static void iser_cq_callback(struct ib_cq *cq, void *cq_context);
@@ -67,15 +66,23 @@ static int iser_create_device_ib_res(struct iser_device *device)
67 if (IS_ERR(device->pd)) 66 if (IS_ERR(device->pd))
68 goto pd_err; 67 goto pd_err;
69 68
70 device->cq = ib_create_cq(device->ib_device, 69 device->rx_cq = ib_create_cq(device->ib_device,
71 iser_cq_callback, 70 iser_cq_callback,
72 iser_cq_event_callback, 71 iser_cq_event_callback,
73 (void *)device, 72 (void *)device,
74 ISER_MAX_CQ_LEN, 0); 73 ISER_MAX_RX_CQ_LEN, 0);
75 if (IS_ERR(device->cq)) 74 if (IS_ERR(device->rx_cq))
76 goto cq_err; 75 goto rx_cq_err;
77 76
78 if (ib_req_notify_cq(device->cq, IB_CQ_NEXT_COMP)) 77 device->tx_cq = ib_create_cq(device->ib_device,
78 NULL, iser_cq_event_callback,
79 (void *)device,
80 ISER_MAX_TX_CQ_LEN, 0);
81
82 if (IS_ERR(device->tx_cq))
83 goto tx_cq_err;
84
85 if (ib_req_notify_cq(device->rx_cq, IB_CQ_NEXT_COMP))
79 goto cq_arm_err; 86 goto cq_arm_err;
80 87
81 tasklet_init(&device->cq_tasklet, 88 tasklet_init(&device->cq_tasklet,
@@ -93,8 +100,10 @@ static int iser_create_device_ib_res(struct iser_device *device)
93dma_mr_err: 100dma_mr_err:
94 tasklet_kill(&device->cq_tasklet); 101 tasklet_kill(&device->cq_tasklet);
95cq_arm_err: 102cq_arm_err:
96 ib_destroy_cq(device->cq); 103 ib_destroy_cq(device->tx_cq);
97cq_err: 104tx_cq_err:
105 ib_destroy_cq(device->rx_cq);
106rx_cq_err:
98 ib_dealloc_pd(device->pd); 107 ib_dealloc_pd(device->pd);
99pd_err: 108pd_err:
100 iser_err("failed to allocate an IB resource\n"); 109 iser_err("failed to allocate an IB resource\n");
@@ -112,11 +121,13 @@ static void iser_free_device_ib_res(struct iser_device *device)
112 tasklet_kill(&device->cq_tasklet); 121 tasklet_kill(&device->cq_tasklet);
113 122
114 (void)ib_dereg_mr(device->mr); 123 (void)ib_dereg_mr(device->mr);
115 (void)ib_destroy_cq(device->cq); 124 (void)ib_destroy_cq(device->tx_cq);
125 (void)ib_destroy_cq(device->rx_cq);
116 (void)ib_dealloc_pd(device->pd); 126 (void)ib_dealloc_pd(device->pd);
117 127
118 device->mr = NULL; 128 device->mr = NULL;
119 device->cq = NULL; 129 device->tx_cq = NULL;
130 device->rx_cq = NULL;
120 device->pd = NULL; 131 device->pd = NULL;
121} 132}
122 133
@@ -129,13 +140,23 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
129{ 140{
130 struct iser_device *device; 141 struct iser_device *device;
131 struct ib_qp_init_attr init_attr; 142 struct ib_qp_init_attr init_attr;
132 int ret; 143 int ret = -ENOMEM;
133 struct ib_fmr_pool_param params; 144 struct ib_fmr_pool_param params;
134 145
135 BUG_ON(ib_conn->device == NULL); 146 BUG_ON(ib_conn->device == NULL);
136 147
137 device = ib_conn->device; 148 device = ib_conn->device;
138 149
150 ib_conn->login_buf = kmalloc(ISER_RX_LOGIN_SIZE, GFP_KERNEL);
151 if (!ib_conn->login_buf) {
152 goto alloc_err;
153 ret = -ENOMEM;
154 }
155
156 ib_conn->login_dma = ib_dma_map_single(ib_conn->device->ib_device,
157 (void *)ib_conn->login_buf, ISER_RX_LOGIN_SIZE,
158 DMA_FROM_DEVICE);
159
139 ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) + 160 ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) +
140 (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)), 161 (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)),
141 GFP_KERNEL); 162 GFP_KERNEL);
@@ -169,12 +190,12 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
169 190
170 init_attr.event_handler = iser_qp_event_callback; 191 init_attr.event_handler = iser_qp_event_callback;
171 init_attr.qp_context = (void *)ib_conn; 192 init_attr.qp_context = (void *)ib_conn;
172 init_attr.send_cq = device->cq; 193 init_attr.send_cq = device->tx_cq;
173 init_attr.recv_cq = device->cq; 194 init_attr.recv_cq = device->rx_cq;
174 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; 195 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS;
175 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; 196 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS;
176 init_attr.cap.max_send_sge = MAX_REGD_BUF_VECTOR_LEN; 197 init_attr.cap.max_send_sge = 2;
177 init_attr.cap.max_recv_sge = 2; 198 init_attr.cap.max_recv_sge = 1;
178 init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 199 init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
179 init_attr.qp_type = IB_QPT_RC; 200 init_attr.qp_type = IB_QPT_RC;
180 201
@@ -192,6 +213,7 @@ qp_err:
192 (void)ib_destroy_fmr_pool(ib_conn->fmr_pool); 213 (void)ib_destroy_fmr_pool(ib_conn->fmr_pool);
193fmr_pool_err: 214fmr_pool_err:
194 kfree(ib_conn->page_vec); 215 kfree(ib_conn->page_vec);
216 kfree(ib_conn->login_buf);
195alloc_err: 217alloc_err:
196 iser_err("unable to alloc mem or create resource, err %d\n", ret); 218 iser_err("unable to alloc mem or create resource, err %d\n", ret);
197 return ret; 219 return ret;
@@ -278,17 +300,6 @@ static void iser_device_try_release(struct iser_device *device)
278 mutex_unlock(&ig.device_list_mutex); 300 mutex_unlock(&ig.device_list_mutex);
279} 301}
280 302
281int iser_conn_state_comp(struct iser_conn *ib_conn,
282 enum iser_ib_conn_state comp)
283{
284 int ret;
285
286 spin_lock_bh(&ib_conn->lock);
287 ret = (ib_conn->state == comp);
288 spin_unlock_bh(&ib_conn->lock);
289 return ret;
290}
291
292static int iser_conn_state_comp_exch(struct iser_conn *ib_conn, 303static int iser_conn_state_comp_exch(struct iser_conn *ib_conn,
293 enum iser_ib_conn_state comp, 304 enum iser_ib_conn_state comp,
294 enum iser_ib_conn_state exch) 305 enum iser_ib_conn_state exch)
@@ -314,7 +325,7 @@ static void iser_conn_release(struct iser_conn *ib_conn)
314 mutex_lock(&ig.connlist_mutex); 325 mutex_lock(&ig.connlist_mutex);
315 list_del(&ib_conn->conn_list); 326 list_del(&ib_conn->conn_list);
316 mutex_unlock(&ig.connlist_mutex); 327 mutex_unlock(&ig.connlist_mutex);
317 328 iser_free_rx_descriptors(ib_conn);
318 iser_free_ib_conn_res(ib_conn); 329 iser_free_ib_conn_res(ib_conn);
319 ib_conn->device = NULL; 330 ib_conn->device = NULL;
320 /* on EVENT_ADDR_ERROR there's no device yet for this conn */ 331 /* on EVENT_ADDR_ERROR there's no device yet for this conn */
@@ -442,7 +453,7 @@ static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
442 ISCSI_ERR_CONN_FAILED); 453 ISCSI_ERR_CONN_FAILED);
443 454
444 /* Complete the termination process if no posts are pending */ 455 /* Complete the termination process if no posts are pending */
445 if ((atomic_read(&ib_conn->post_recv_buf_count) == 0) && 456 if (ib_conn->post_recv_buf_count == 0 &&
446 (atomic_read(&ib_conn->post_send_buf_count) == 0)) { 457 (atomic_read(&ib_conn->post_send_buf_count) == 0)) {
447 ib_conn->state = ISER_CONN_DOWN; 458 ib_conn->state = ISER_CONN_DOWN;
448 wake_up_interruptible(&ib_conn->wait); 459 wake_up_interruptible(&ib_conn->wait);
@@ -489,9 +500,8 @@ void iser_conn_init(struct iser_conn *ib_conn)
489{ 500{
490 ib_conn->state = ISER_CONN_INIT; 501 ib_conn->state = ISER_CONN_INIT;
491 init_waitqueue_head(&ib_conn->wait); 502 init_waitqueue_head(&ib_conn->wait);
492 atomic_set(&ib_conn->post_recv_buf_count, 0); 503 ib_conn->post_recv_buf_count = 0;
493 atomic_set(&ib_conn->post_send_buf_count, 0); 504 atomic_set(&ib_conn->post_send_buf_count, 0);
494 atomic_set(&ib_conn->unexpected_pdu_count, 0);
495 atomic_set(&ib_conn->refcount, 1); 505 atomic_set(&ib_conn->refcount, 1);
496 INIT_LIST_HEAD(&ib_conn->conn_list); 506 INIT_LIST_HEAD(&ib_conn->conn_list);
497 spin_lock_init(&ib_conn->lock); 507 spin_lock_init(&ib_conn->lock);
@@ -626,136 +636,97 @@ void iser_unreg_mem(struct iser_mem_reg *reg)
626 reg->mem_h = NULL; 636 reg->mem_h = NULL;
627} 637}
628 638
629/** 639int iser_post_recvl(struct iser_conn *ib_conn)
630 * iser_dto_to_iov - builds IOV from a dto descriptor
631 */
632static void iser_dto_to_iov(struct iser_dto *dto, struct ib_sge *iov, int iov_len)
633{ 640{
634 int i; 641 struct ib_recv_wr rx_wr, *rx_wr_failed;
635 struct ib_sge *sge; 642 struct ib_sge sge;
636 struct iser_regd_buf *regd_buf; 643 int ib_ret;
637
638 if (dto->regd_vector_len > iov_len) {
639 iser_err("iov size %d too small for posting dto of len %d\n",
640 iov_len, dto->regd_vector_len);
641 BUG();
642 }
643 644
644 for (i = 0; i < dto->regd_vector_len; i++) { 645 sge.addr = ib_conn->login_dma;
645 sge = &iov[i]; 646 sge.length = ISER_RX_LOGIN_SIZE;
646 regd_buf = dto->regd[i]; 647 sge.lkey = ib_conn->device->mr->lkey;
647
648 sge->addr = regd_buf->reg.va;
649 sge->length = regd_buf->reg.len;
650 sge->lkey = regd_buf->reg.lkey;
651
652 if (dto->used_sz[i] > 0) /* Adjust size */
653 sge->length = dto->used_sz[i];
654
655 /* offset and length should not exceed the regd buf length */
656 if (sge->length + dto->offset[i] > regd_buf->reg.len) {
657 iser_err("Used len:%ld + offset:%d, exceed reg.buf.len:"
658 "%ld in dto:0x%p [%d], va:0x%08lX\n",
659 (unsigned long)sge->length, dto->offset[i],
660 (unsigned long)regd_buf->reg.len, dto, i,
661 (unsigned long)sge->addr);
662 BUG();
663 }
664 648
665 sge->addr += dto->offset[i]; /* Adjust offset */ 649 rx_wr.wr_id = (unsigned long)ib_conn->login_buf;
650 rx_wr.sg_list = &sge;
651 rx_wr.num_sge = 1;
652 rx_wr.next = NULL;
653
654 ib_conn->post_recv_buf_count++;
655 ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed);
656 if (ib_ret) {
657 iser_err("ib_post_recv failed ret=%d\n", ib_ret);
658 ib_conn->post_recv_buf_count--;
666 } 659 }
660 return ib_ret;
667} 661}
668 662
669/** 663int iser_post_recvm(struct iser_conn *ib_conn, int count)
670 * iser_post_recv - Posts a receive buffer.
671 *
672 * returns 0 on success, -1 on failure
673 */
674int iser_post_recv(struct iser_desc *rx_desc)
675{ 664{
676 int ib_ret, ret_val = 0; 665 struct ib_recv_wr *rx_wr, *rx_wr_failed;
677 struct ib_recv_wr recv_wr, *recv_wr_failed; 666 int i, ib_ret;
678 struct ib_sge iov[2]; 667 unsigned int my_rx_head = ib_conn->rx_desc_head;
679 struct iser_conn *ib_conn; 668 struct iser_rx_desc *rx_desc;
680 struct iser_dto *recv_dto = &rx_desc->dto; 669
681 670 for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
682 /* Retrieve conn */ 671 rx_desc = &ib_conn->rx_descs[my_rx_head];
683 ib_conn = recv_dto->ib_conn; 672 rx_wr->wr_id = (unsigned long)rx_desc;
684 673 rx_wr->sg_list = &rx_desc->rx_sg;
685 iser_dto_to_iov(recv_dto, iov, 2); 674 rx_wr->num_sge = 1;
675 rx_wr->next = rx_wr + 1;
676 my_rx_head = (my_rx_head + 1) & (ISER_QP_MAX_RECV_DTOS - 1);
677 }
686 678
687 recv_wr.next = NULL; 679 rx_wr--;
688 recv_wr.sg_list = iov; 680 rx_wr->next = NULL; /* mark end of work requests list */
689 recv_wr.num_sge = recv_dto->regd_vector_len;
690 recv_wr.wr_id = (unsigned long)rx_desc;
691 681
692 atomic_inc(&ib_conn->post_recv_buf_count); 682 ib_conn->post_recv_buf_count += count;
693 ib_ret = ib_post_recv(ib_conn->qp, &recv_wr, &recv_wr_failed); 683 ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed);
694 if (ib_ret) { 684 if (ib_ret) {
695 iser_err("ib_post_recv failed ret=%d\n", ib_ret); 685 iser_err("ib_post_recv failed ret=%d\n", ib_ret);
696 atomic_dec(&ib_conn->post_recv_buf_count); 686 ib_conn->post_recv_buf_count -= count;
697 ret_val = -1; 687 } else
698 } 688 ib_conn->rx_desc_head = my_rx_head;
699 689 return ib_ret;
700 return ret_val;
701} 690}
702 691
692
703/** 693/**
704 * iser_start_send - Initiate a Send DTO operation 694 * iser_start_send - Initiate a Send DTO operation
705 * 695 *
706 * returns 0 on success, -1 on failure 696 * returns 0 on success, -1 on failure
707 */ 697 */
708int iser_post_send(struct iser_desc *tx_desc) 698int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc)
709{ 699{
710 int ib_ret, ret_val = 0; 700 int ib_ret;
711 struct ib_send_wr send_wr, *send_wr_failed; 701 struct ib_send_wr send_wr, *send_wr_failed;
712 struct ib_sge iov[MAX_REGD_BUF_VECTOR_LEN];
713 struct iser_conn *ib_conn;
714 struct iser_dto *dto = &tx_desc->dto;
715 702
716 ib_conn = dto->ib_conn; 703 ib_dma_sync_single_for_device(ib_conn->device->ib_device,
717 704 tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
718 iser_dto_to_iov(dto, iov, MAX_REGD_BUF_VECTOR_LEN);
719 705
720 send_wr.next = NULL; 706 send_wr.next = NULL;
721 send_wr.wr_id = (unsigned long)tx_desc; 707 send_wr.wr_id = (unsigned long)tx_desc;
722 send_wr.sg_list = iov; 708 send_wr.sg_list = tx_desc->tx_sg;
723 send_wr.num_sge = dto->regd_vector_len; 709 send_wr.num_sge = tx_desc->num_sge;
724 send_wr.opcode = IB_WR_SEND; 710 send_wr.opcode = IB_WR_SEND;
725 send_wr.send_flags = dto->notify_enable ? IB_SEND_SIGNALED : 0; 711 send_wr.send_flags = IB_SEND_SIGNALED;
726 712
727 atomic_inc(&ib_conn->post_send_buf_count); 713 atomic_inc(&ib_conn->post_send_buf_count);
728 714
729 ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed); 715 ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed);
730 if (ib_ret) { 716 if (ib_ret) {
731 iser_err("Failed to start SEND DTO, dto: 0x%p, IOV len: %d\n",
732 dto, dto->regd_vector_len);
733 iser_err("ib_post_send failed, ret:%d\n", ib_ret); 717 iser_err("ib_post_send failed, ret:%d\n", ib_ret);
734 atomic_dec(&ib_conn->post_send_buf_count); 718 atomic_dec(&ib_conn->post_send_buf_count);
735 ret_val = -1;
736 } 719 }
737 720 return ib_ret;
738 return ret_val;
739} 721}
740 722
741static void iser_handle_comp_error(struct iser_desc *desc) 723static void iser_handle_comp_error(struct iser_tx_desc *desc,
724 struct iser_conn *ib_conn)
742{ 725{
743 struct iser_dto *dto = &desc->dto; 726 if (desc && desc->type == ISCSI_TX_DATAOUT)
744 struct iser_conn *ib_conn = dto->ib_conn;
745
746 iser_dto_buffs_release(dto);
747
748 if (desc->type == ISCSI_RX) {
749 kfree(desc->data);
750 kmem_cache_free(ig.desc_cache, desc); 727 kmem_cache_free(ig.desc_cache, desc);
751 atomic_dec(&ib_conn->post_recv_buf_count);
752 } else { /* type is TX control/command/dataout */
753 if (desc->type == ISCSI_TX_DATAOUT)
754 kmem_cache_free(ig.desc_cache, desc);
755 atomic_dec(&ib_conn->post_send_buf_count);
756 }
757 728
758 if (atomic_read(&ib_conn->post_recv_buf_count) == 0 && 729 if (ib_conn->post_recv_buf_count == 0 &&
759 atomic_read(&ib_conn->post_send_buf_count) == 0) { 730 atomic_read(&ib_conn->post_send_buf_count) == 0) {
760 /* getting here when the state is UP means that the conn is * 731 /* getting here when the state is UP means that the conn is *
761 * being terminated asynchronously from the iSCSI layer's * 732 * being terminated asynchronously from the iSCSI layer's *
@@ -774,32 +745,74 @@ static void iser_handle_comp_error(struct iser_desc *desc)
774 } 745 }
775} 746}
776 747
748static int iser_drain_tx_cq(struct iser_device *device)
749{
750 struct ib_cq *cq = device->tx_cq;
751 struct ib_wc wc;
752 struct iser_tx_desc *tx_desc;
753 struct iser_conn *ib_conn;
754 int completed_tx = 0;
755
756 while (ib_poll_cq(cq, 1, &wc) == 1) {
757 tx_desc = (struct iser_tx_desc *) (unsigned long) wc.wr_id;
758 ib_conn = wc.qp->qp_context;
759 if (wc.status == IB_WC_SUCCESS) {
760 if (wc.opcode == IB_WC_SEND)
761 iser_snd_completion(tx_desc, ib_conn);
762 else
763 iser_err("expected opcode %d got %d\n",
764 IB_WC_SEND, wc.opcode);
765 } else {
766 iser_err("tx id %llx status %d vend_err %x\n",
767 wc.wr_id, wc.status, wc.vendor_err);
768 atomic_dec(&ib_conn->post_send_buf_count);
769 iser_handle_comp_error(tx_desc, ib_conn);
770 }
771 completed_tx++;
772 }
773 return completed_tx;
774}
775
776
777static void iser_cq_tasklet_fn(unsigned long data) 777static void iser_cq_tasklet_fn(unsigned long data)
778{ 778{
779 struct iser_device *device = (struct iser_device *)data; 779 struct iser_device *device = (struct iser_device *)data;
780 struct ib_cq *cq = device->cq; 780 struct ib_cq *cq = device->rx_cq;
781 struct ib_wc wc; 781 struct ib_wc wc;
782 struct iser_desc *desc; 782 struct iser_rx_desc *desc;
783 unsigned long xfer_len; 783 unsigned long xfer_len;
784 struct iser_conn *ib_conn;
785 int completed_tx, completed_rx;
786 completed_tx = completed_rx = 0;
784 787
785 while (ib_poll_cq(cq, 1, &wc) == 1) { 788 while (ib_poll_cq(cq, 1, &wc) == 1) {
786 desc = (struct iser_desc *) (unsigned long) wc.wr_id; 789 desc = (struct iser_rx_desc *) (unsigned long) wc.wr_id;
787 BUG_ON(desc == NULL); 790 BUG_ON(desc == NULL);
788 791 ib_conn = wc.qp->qp_context;
789 if (wc.status == IB_WC_SUCCESS) { 792 if (wc.status == IB_WC_SUCCESS) {
790 if (desc->type == ISCSI_RX) { 793 if (wc.opcode == IB_WC_RECV) {
791 xfer_len = (unsigned long)wc.byte_len; 794 xfer_len = (unsigned long)wc.byte_len;
792 iser_rcv_completion(desc, xfer_len); 795 iser_rcv_completion(desc, xfer_len, ib_conn);
793 } else /* type == ISCSI_TX_CONTROL/SCSI_CMD/DOUT */ 796 } else
794 iser_snd_completion(desc); 797 iser_err("expected opcode %d got %d\n",
798 IB_WC_RECV, wc.opcode);
795 } else { 799 } else {
796 iser_err("comp w. error op %d status %d\n",desc->type,wc.status); 800 if (wc.status != IB_WC_WR_FLUSH_ERR)
797 iser_handle_comp_error(desc); 801 iser_err("rx id %llx status %d vend_err %x\n",
802 wc.wr_id, wc.status, wc.vendor_err);
803 ib_conn->post_recv_buf_count--;
804 iser_handle_comp_error(NULL, ib_conn);
798 } 805 }
806 completed_rx++;
807 if (!(completed_rx & 63))
808 completed_tx += iser_drain_tx_cq(device);
799 } 809 }
800 /* #warning "it is assumed here that arming CQ only once its empty" * 810 /* #warning "it is assumed here that arming CQ only once its empty" *
801 * " would not cause interrupts to be missed" */ 811 * " would not cause interrupts to be missed" */
802 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 812 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
813
814 completed_tx += iser_drain_tx_cq(device);
815 iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx);
803} 816}
804 817
805static void iser_cq_callback(struct ib_cq *cq, void *cq_context) 818static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 54c8fe25c423..ed3f9ebae882 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -80,7 +80,8 @@ MODULE_PARM_DESC(mellanox_workarounds,
80 80
81static void srp_add_one(struct ib_device *device); 81static void srp_add_one(struct ib_device *device);
82static void srp_remove_one(struct ib_device *device); 82static void srp_remove_one(struct ib_device *device);
83static void srp_completion(struct ib_cq *cq, void *target_ptr); 83static void srp_recv_completion(struct ib_cq *cq, void *target_ptr);
84static void srp_send_completion(struct ib_cq *cq, void *target_ptr);
84static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); 85static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
85 86
86static struct scsi_transport_template *ib_srp_transport_template; 87static struct scsi_transport_template *ib_srp_transport_template;
@@ -227,14 +228,21 @@ static int srp_create_target_ib(struct srp_target_port *target)
227 if (!init_attr) 228 if (!init_attr)
228 return -ENOMEM; 229 return -ENOMEM;
229 230
230 target->cq = ib_create_cq(target->srp_host->srp_dev->dev, 231 target->recv_cq = ib_create_cq(target->srp_host->srp_dev->dev,
231 srp_completion, NULL, target, SRP_CQ_SIZE, 0); 232 srp_recv_completion, NULL, target, SRP_RQ_SIZE, 0);
232 if (IS_ERR(target->cq)) { 233 if (IS_ERR(target->recv_cq)) {
233 ret = PTR_ERR(target->cq); 234 ret = PTR_ERR(target->recv_cq);
234 goto out; 235 goto err;
235 } 236 }
236 237
237 ib_req_notify_cq(target->cq, IB_CQ_NEXT_COMP); 238 target->send_cq = ib_create_cq(target->srp_host->srp_dev->dev,
239 srp_send_completion, NULL, target, SRP_SQ_SIZE, 0);
240 if (IS_ERR(target->send_cq)) {
241 ret = PTR_ERR(target->send_cq);
242 goto err_recv_cq;
243 }
244
245 ib_req_notify_cq(target->recv_cq, IB_CQ_NEXT_COMP);
238 246
239 init_attr->event_handler = srp_qp_event; 247 init_attr->event_handler = srp_qp_event;
240 init_attr->cap.max_send_wr = SRP_SQ_SIZE; 248 init_attr->cap.max_send_wr = SRP_SQ_SIZE;
@@ -243,24 +251,32 @@ static int srp_create_target_ib(struct srp_target_port *target)
243 init_attr->cap.max_send_sge = 1; 251 init_attr->cap.max_send_sge = 1;
244 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; 252 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
245 init_attr->qp_type = IB_QPT_RC; 253 init_attr->qp_type = IB_QPT_RC;
246 init_attr->send_cq = target->cq; 254 init_attr->send_cq = target->send_cq;
247 init_attr->recv_cq = target->cq; 255 init_attr->recv_cq = target->recv_cq;
248 256
249 target->qp = ib_create_qp(target->srp_host->srp_dev->pd, init_attr); 257 target->qp = ib_create_qp(target->srp_host->srp_dev->pd, init_attr);
250 if (IS_ERR(target->qp)) { 258 if (IS_ERR(target->qp)) {
251 ret = PTR_ERR(target->qp); 259 ret = PTR_ERR(target->qp);
252 ib_destroy_cq(target->cq); 260 goto err_send_cq;
253 goto out;
254 } 261 }
255 262
256 ret = srp_init_qp(target, target->qp); 263 ret = srp_init_qp(target, target->qp);
257 if (ret) { 264 if (ret)
258 ib_destroy_qp(target->qp); 265 goto err_qp;
259 ib_destroy_cq(target->cq);
260 goto out;
261 }
262 266
263out: 267 kfree(init_attr);
268 return 0;
269
270err_qp:
271 ib_destroy_qp(target->qp);
272
273err_send_cq:
274 ib_destroy_cq(target->send_cq);
275
276err_recv_cq:
277 ib_destroy_cq(target->recv_cq);
278
279err:
264 kfree(init_attr); 280 kfree(init_attr);
265 return ret; 281 return ret;
266} 282}
@@ -270,7 +286,8 @@ static void srp_free_target_ib(struct srp_target_port *target)
270 int i; 286 int i;
271 287
272 ib_destroy_qp(target->qp); 288 ib_destroy_qp(target->qp);
273 ib_destroy_cq(target->cq); 289 ib_destroy_cq(target->send_cq);
290 ib_destroy_cq(target->recv_cq);
274 291
275 for (i = 0; i < SRP_RQ_SIZE; ++i) 292 for (i = 0; i < SRP_RQ_SIZE; ++i)
276 srp_free_iu(target->srp_host, target->rx_ring[i]); 293 srp_free_iu(target->srp_host, target->rx_ring[i]);
@@ -568,7 +585,9 @@ static int srp_reconnect_target(struct srp_target_port *target)
568 if (ret) 585 if (ret)
569 goto err; 586 goto err;
570 587
571 while (ib_poll_cq(target->cq, 1, &wc) > 0) 588 while (ib_poll_cq(target->recv_cq, 1, &wc) > 0)
589 ; /* nothing */
590 while (ib_poll_cq(target->send_cq, 1, &wc) > 0)
572 ; /* nothing */ 591 ; /* nothing */
573 592
574 spin_lock_irq(target->scsi_host->host_lock); 593 spin_lock_irq(target->scsi_host->host_lock);
@@ -851,7 +870,7 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
851 struct srp_iu *iu; 870 struct srp_iu *iu;
852 u8 opcode; 871 u8 opcode;
853 872
854 iu = target->rx_ring[wc->wr_id & ~SRP_OP_RECV]; 873 iu = target->rx_ring[wc->wr_id];
855 874
856 dev = target->srp_host->srp_dev->dev; 875 dev = target->srp_host->srp_dev->dev;
857 ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_ti_iu_len, 876 ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_ti_iu_len,
@@ -898,7 +917,7 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
898 DMA_FROM_DEVICE); 917 DMA_FROM_DEVICE);
899} 918}
900 919
901static void srp_completion(struct ib_cq *cq, void *target_ptr) 920static void srp_recv_completion(struct ib_cq *cq, void *target_ptr)
902{ 921{
903 struct srp_target_port *target = target_ptr; 922 struct srp_target_port *target = target_ptr;
904 struct ib_wc wc; 923 struct ib_wc wc;
@@ -907,17 +926,31 @@ static void srp_completion(struct ib_cq *cq, void *target_ptr)
907 while (ib_poll_cq(cq, 1, &wc) > 0) { 926 while (ib_poll_cq(cq, 1, &wc) > 0) {
908 if (wc.status) { 927 if (wc.status) {
909 shost_printk(KERN_ERR, target->scsi_host, 928 shost_printk(KERN_ERR, target->scsi_host,
910 PFX "failed %s status %d\n", 929 PFX "failed receive status %d\n",
911 wc.wr_id & SRP_OP_RECV ? "receive" : "send",
912 wc.status); 930 wc.status);
913 target->qp_in_error = 1; 931 target->qp_in_error = 1;
914 break; 932 break;
915 } 933 }
916 934
917 if (wc.wr_id & SRP_OP_RECV) 935 srp_handle_recv(target, &wc);
918 srp_handle_recv(target, &wc); 936 }
919 else 937}
920 ++target->tx_tail; 938
939static void srp_send_completion(struct ib_cq *cq, void *target_ptr)
940{
941 struct srp_target_port *target = target_ptr;
942 struct ib_wc wc;
943
944 while (ib_poll_cq(cq, 1, &wc) > 0) {
945 if (wc.status) {
946 shost_printk(KERN_ERR, target->scsi_host,
947 PFX "failed send status %d\n",
948 wc.status);
949 target->qp_in_error = 1;
950 break;
951 }
952
953 ++target->tx_tail;
921 } 954 }
922} 955}
923 956
@@ -930,7 +963,7 @@ static int __srp_post_recv(struct srp_target_port *target)
930 int ret; 963 int ret;
931 964
932 next = target->rx_head & (SRP_RQ_SIZE - 1); 965 next = target->rx_head & (SRP_RQ_SIZE - 1);
933 wr.wr_id = next | SRP_OP_RECV; 966 wr.wr_id = next;
934 iu = target->rx_ring[next]; 967 iu = target->rx_ring[next];
935 968
936 list.addr = iu->dma; 969 list.addr = iu->dma;
@@ -970,6 +1003,8 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_target_port *target,
970{ 1003{
971 s32 min = (req_type == SRP_REQ_TASK_MGMT) ? 1 : 2; 1004 s32 min = (req_type == SRP_REQ_TASK_MGMT) ? 1 : 2;
972 1005
1006 srp_send_completion(target->send_cq, target);
1007
973 if (target->tx_head - target->tx_tail >= SRP_SQ_SIZE) 1008 if (target->tx_head - target->tx_tail >= SRP_SQ_SIZE)
974 return NULL; 1009 return NULL;
975 1010
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index e185b907fc12..5a80eac6fdaa 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -60,7 +60,6 @@ enum {
60 SRP_RQ_SHIFT = 6, 60 SRP_RQ_SHIFT = 6,
61 SRP_RQ_SIZE = 1 << SRP_RQ_SHIFT, 61 SRP_RQ_SIZE = 1 << SRP_RQ_SHIFT,
62 SRP_SQ_SIZE = SRP_RQ_SIZE - 1, 62 SRP_SQ_SIZE = SRP_RQ_SIZE - 1,
63 SRP_CQ_SIZE = SRP_SQ_SIZE + SRP_RQ_SIZE,
64 63
65 SRP_TAG_TSK_MGMT = 1 << (SRP_RQ_SHIFT + 1), 64 SRP_TAG_TSK_MGMT = 1 << (SRP_RQ_SHIFT + 1),
66 65
@@ -69,8 +68,6 @@ enum {
69 SRP_FMR_DIRTY_SIZE = SRP_FMR_POOL_SIZE / 4 68 SRP_FMR_DIRTY_SIZE = SRP_FMR_POOL_SIZE / 4
70}; 69};
71 70
72#define SRP_OP_RECV (1 << 31)
73
74enum srp_target_state { 71enum srp_target_state {
75 SRP_TARGET_LIVE, 72 SRP_TARGET_LIVE,
76 SRP_TARGET_CONNECTING, 73 SRP_TARGET_CONNECTING,
@@ -133,7 +130,8 @@ struct srp_target_port {
133 int path_query_id; 130 int path_query_id;
134 131
135 struct ib_cm_id *cm_id; 132 struct ib_cm_id *cm_id;
136 struct ib_cq *cq; 133 struct ib_cq *recv_cq;
134 struct ib_cq *send_cq;
137 struct ib_qp *qp; 135 struct ib_qp *qp;
138 136
139 int max_ti_iu_len; 137 int max_ti_iu_len;
diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h
index 3e8618b4efbc..4cd7f420766a 100644
--- a/drivers/net/cxgb3/adapter.h
+++ b/drivers/net/cxgb3/adapter.h
@@ -264,6 +264,10 @@ struct adapter {
264 struct work_struct fatal_error_handler_task; 264 struct work_struct fatal_error_handler_task;
265 struct work_struct link_fault_handler_task; 265 struct work_struct link_fault_handler_task;
266 266
267 struct work_struct db_full_task;
268 struct work_struct db_empty_task;
269 struct work_struct db_drop_task;
270
267 struct dentry *debugfs_root; 271 struct dentry *debugfs_root;
268 272
269 struct mutex mdio_lock; 273 struct mutex mdio_lock;
@@ -335,6 +339,7 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
335int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 339int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
336 unsigned char *data); 340 unsigned char *data);
337irqreturn_t t3_sge_intr_msix(int irq, void *cookie); 341irqreturn_t t3_sge_intr_msix(int irq, void *cookie);
342extern struct workqueue_struct *cxgb3_wq;
338 343
339int t3_get_edc_fw(struct cphy *phy, int edc_idx, int size); 344int t3_get_edc_fw(struct cphy *phy, int edc_idx, int size);
340 345
diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c
index 6fd968abb073..3e453e1d97e7 100644
--- a/drivers/net/cxgb3/cxgb3_main.c
+++ b/drivers/net/cxgb3/cxgb3_main.c
@@ -45,6 +45,7 @@
45#include <linux/firmware.h> 45#include <linux/firmware.h>
46#include <linux/log2.h> 46#include <linux/log2.h>
47#include <linux/stringify.h> 47#include <linux/stringify.h>
48#include <linux/sched.h>
48#include <asm/uaccess.h> 49#include <asm/uaccess.h>
49 50
50#include "common.h" 51#include "common.h"
@@ -140,7 +141,7 @@ MODULE_PARM_DESC(ofld_disable, "whether to enable offload at init time or not");
140 * will block keventd as it needs the rtnl lock, and we'll deadlock waiting 141 * will block keventd as it needs the rtnl lock, and we'll deadlock waiting
141 * for our work to complete. Get our own work queue to solve this. 142 * for our work to complete. Get our own work queue to solve this.
142 */ 143 */
143static struct workqueue_struct *cxgb3_wq; 144struct workqueue_struct *cxgb3_wq;
144 145
145/** 146/**
146 * link_report - show link status and link speed/duplex 147 * link_report - show link status and link speed/duplex
@@ -586,6 +587,19 @@ static void setup_rss(struct adapter *adap)
586 V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ, cpus, rspq_map); 587 V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ, cpus, rspq_map);
587} 588}
588 589
590static void ring_dbs(struct adapter *adap)
591{
592 int i, j;
593
594 for (i = 0; i < SGE_QSETS; i++) {
595 struct sge_qset *qs = &adap->sge.qs[i];
596
597 if (qs->adap)
598 for (j = 0; j < SGE_TXQ_PER_SET; j++)
599 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(qs->txq[j].cntxt_id));
600 }
601}
602
589static void init_napi(struct adapter *adap) 603static void init_napi(struct adapter *adap)
590{ 604{
591 int i; 605 int i;
@@ -2750,6 +2764,42 @@ static void t3_adap_check_task(struct work_struct *work)
2750 spin_unlock_irq(&adapter->work_lock); 2764 spin_unlock_irq(&adapter->work_lock);
2751} 2765}
2752 2766
2767static void db_full_task(struct work_struct *work)
2768{
2769 struct adapter *adapter = container_of(work, struct adapter,
2770 db_full_task);
2771
2772 cxgb3_event_notify(&adapter->tdev, OFFLOAD_DB_FULL, 0);
2773}
2774
2775static void db_empty_task(struct work_struct *work)
2776{
2777 struct adapter *adapter = container_of(work, struct adapter,
2778 db_empty_task);
2779
2780 cxgb3_event_notify(&adapter->tdev, OFFLOAD_DB_EMPTY, 0);
2781}
2782
2783static void db_drop_task(struct work_struct *work)
2784{
2785 struct adapter *adapter = container_of(work, struct adapter,
2786 db_drop_task);
2787 unsigned long delay = 1000;
2788 unsigned short r;
2789
2790 cxgb3_event_notify(&adapter->tdev, OFFLOAD_DB_DROP, 0);
2791
2792 /*
2793 * Sleep a while before ringing the driver qset dbs.
2794 * The delay is between 1000-2023 usecs.
2795 */
2796 get_random_bytes(&r, 2);
2797 delay += r & 1023;
2798 set_current_state(TASK_UNINTERRUPTIBLE);
2799 schedule_timeout(usecs_to_jiffies(delay));
2800 ring_dbs(adapter);
2801}
2802
2753/* 2803/*
2754 * Processes external (PHY) interrupts in process context. 2804 * Processes external (PHY) interrupts in process context.
2755 */ 2805 */
@@ -3218,6 +3268,11 @@ static int __devinit init_one(struct pci_dev *pdev,
3218 INIT_LIST_HEAD(&adapter->adapter_list); 3268 INIT_LIST_HEAD(&adapter->adapter_list);
3219 INIT_WORK(&adapter->ext_intr_handler_task, ext_intr_task); 3269 INIT_WORK(&adapter->ext_intr_handler_task, ext_intr_task);
3220 INIT_WORK(&adapter->fatal_error_handler_task, fatal_error_task); 3270 INIT_WORK(&adapter->fatal_error_handler_task, fatal_error_task);
3271
3272 INIT_WORK(&adapter->db_full_task, db_full_task);
3273 INIT_WORK(&adapter->db_empty_task, db_empty_task);
3274 INIT_WORK(&adapter->db_drop_task, db_drop_task);
3275
3221 INIT_DELAYED_WORK(&adapter->adap_check_task, t3_adap_check_task); 3276 INIT_DELAYED_WORK(&adapter->adap_check_task, t3_adap_check_task);
3222 3277
3223 for (i = 0; i < ai->nports0 + ai->nports1; ++i) { 3278 for (i = 0; i < ai->nports0 + ai->nports1; ++i) {
diff --git a/drivers/net/cxgb3/cxgb3_offload.h b/drivers/net/cxgb3/cxgb3_offload.h
index 670aa62042da..929c298115ca 100644
--- a/drivers/net/cxgb3/cxgb3_offload.h
+++ b/drivers/net/cxgb3/cxgb3_offload.h
@@ -73,7 +73,10 @@ enum {
73 OFFLOAD_STATUS_UP, 73 OFFLOAD_STATUS_UP,
74 OFFLOAD_STATUS_DOWN, 74 OFFLOAD_STATUS_DOWN,
75 OFFLOAD_PORT_DOWN, 75 OFFLOAD_PORT_DOWN,
76 OFFLOAD_PORT_UP 76 OFFLOAD_PORT_UP,
77 OFFLOAD_DB_FULL,
78 OFFLOAD_DB_EMPTY,
79 OFFLOAD_DB_DROP
77}; 80};
78 81
79struct cxgb3_client { 82struct cxgb3_client {
diff --git a/drivers/net/cxgb3/regs.h b/drivers/net/cxgb3/regs.h
index 1b5327b5a965..cb42353c9fdd 100644
--- a/drivers/net/cxgb3/regs.h
+++ b/drivers/net/cxgb3/regs.h
@@ -254,6 +254,22 @@
254#define V_LOPIODRBDROPERR(x) ((x) << S_LOPIODRBDROPERR) 254#define V_LOPIODRBDROPERR(x) ((x) << S_LOPIODRBDROPERR)
255#define F_LOPIODRBDROPERR V_LOPIODRBDROPERR(1U) 255#define F_LOPIODRBDROPERR V_LOPIODRBDROPERR(1U)
256 256
257#define S_HIPRIORITYDBFULL 7
258#define V_HIPRIORITYDBFULL(x) ((x) << S_HIPRIORITYDBFULL)
259#define F_HIPRIORITYDBFULL V_HIPRIORITYDBFULL(1U)
260
261#define S_HIPRIORITYDBEMPTY 6
262#define V_HIPRIORITYDBEMPTY(x) ((x) << S_HIPRIORITYDBEMPTY)
263#define F_HIPRIORITYDBEMPTY V_HIPRIORITYDBEMPTY(1U)
264
265#define S_LOPRIORITYDBFULL 5
266#define V_LOPRIORITYDBFULL(x) ((x) << S_LOPRIORITYDBFULL)
267#define F_LOPRIORITYDBFULL V_LOPRIORITYDBFULL(1U)
268
269#define S_LOPRIORITYDBEMPTY 4
270#define V_LOPRIORITYDBEMPTY(x) ((x) << S_LOPRIORITYDBEMPTY)
271#define F_LOPRIORITYDBEMPTY V_LOPRIORITYDBEMPTY(1U)
272
257#define S_RSPQDISABLED 3 273#define S_RSPQDISABLED 3
258#define V_RSPQDISABLED(x) ((x) << S_RSPQDISABLED) 274#define V_RSPQDISABLED(x) ((x) << S_RSPQDISABLED)
259#define F_RSPQDISABLED V_RSPQDISABLED(1U) 275#define F_RSPQDISABLED V_RSPQDISABLED(1U)
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 048205903741..78e265b484b6 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -42,6 +42,7 @@
42#include "sge_defs.h" 42#include "sge_defs.h"
43#include "t3_cpl.h" 43#include "t3_cpl.h"
44#include "firmware_exports.h" 44#include "firmware_exports.h"
45#include "cxgb3_offload.h"
45 46
46#define USE_GTS 0 47#define USE_GTS 0
47 48
@@ -2841,8 +2842,13 @@ void t3_sge_err_intr_handler(struct adapter *adapter)
2841 } 2842 }
2842 2843
2843 if (status & (F_HIPIODRBDROPERR | F_LOPIODRBDROPERR)) 2844 if (status & (F_HIPIODRBDROPERR | F_LOPIODRBDROPERR))
2844 CH_ALERT(adapter, "SGE dropped %s priority doorbell\n", 2845 queue_work(cxgb3_wq, &adapter->db_drop_task);
2845 status & F_HIPIODRBDROPERR ? "high" : "lo"); 2846
2847 if (status & (F_HIPRIORITYDBFULL | F_LOPRIORITYDBFULL))
2848 queue_work(cxgb3_wq, &adapter->db_full_task);
2849
2850 if (status & (F_HIPRIORITYDBEMPTY | F_LOPRIORITYDBEMPTY))
2851 queue_work(cxgb3_wq, &adapter->db_empty_task);
2846 2852
2847 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 2853 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
2848 if (status & SGE_FATALERR) 2854 if (status & SGE_FATALERR)
diff --git a/drivers/net/cxgb3/t3_hw.c b/drivers/net/cxgb3/t3_hw.c
index 3ab9f51918aa..95a8ba0759f1 100644
--- a/drivers/net/cxgb3/t3_hw.c
+++ b/drivers/net/cxgb3/t3_hw.c
@@ -1433,7 +1433,10 @@ static int t3_handle_intr_status(struct adapter *adapter, unsigned int reg,
1433 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \ 1433 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
1434 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \ 1434 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
1435 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \ 1435 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
1436 F_HIRCQPARITYERROR) 1436 F_HIRCQPARITYERROR | F_LOPRIORITYDBFULL | \
1437 F_HIPRIORITYDBFULL | F_LOPRIORITYDBEMPTY | \
1438 F_HIPRIORITYDBEMPTY | F_HIPIODRBDROPERR | \
1439 F_LOPIODRBDROPERR)
1437#define MC5_INTR_MASK (F_PARITYERR | F_ACTRGNFULL | F_UNKNOWNCMD | \ 1440#define MC5_INTR_MASK (F_PARITYERR | F_ACTRGNFULL | F_UNKNOWNCMD | \
1438 F_REQQPARERR | F_DISPQPARERR | F_DELACTEMPTY | \ 1441 F_REQQPARERR | F_DISPQPARERR | F_DELACTEMPTY | \
1439 F_NFASRCHFAIL) 1442 F_NFASRCHFAIL)
diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h
index d7fc45c4eba9..cbb50f4da3dd 100644
--- a/include/rdma/ib_pack.h
+++ b/include/rdma/ib_pack.h
@@ -232,6 +232,7 @@ void ib_unpack(const struct ib_field *desc,
232 232
233void ib_ud_header_init(int payload_bytes, 233void ib_ud_header_init(int payload_bytes,
234 int grh_present, 234 int grh_present,
235 int immediate_present,
235 struct ib_ud_header *header); 236 struct ib_ud_header *header);
236 237
237int ib_ud_header_pack(struct ib_ud_header *header, 238int ib_ud_header_pack(struct ib_ud_header *header,
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 09509edb1c5f..a585e0f92bc3 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -984,9 +984,9 @@ struct ib_device {
984 struct list_head event_handler_list; 984 struct list_head event_handler_list;
985 spinlock_t event_handler_lock; 985 spinlock_t event_handler_lock;
986 986
987 spinlock_t client_data_lock;
987 struct list_head core_list; 988 struct list_head core_list;
988 struct list_head client_data_list; 989 struct list_head client_data_list;
989 spinlock_t client_data_lock;
990 990
991 struct ib_cache cache; 991 struct ib_cache cache;
992 int *pkey_tbl_len; 992 int *pkey_tbl_len;
@@ -1144,8 +1144,8 @@ struct ib_device {
1144 IB_DEV_UNREGISTERED 1144 IB_DEV_UNREGISTERED
1145 } reg_state; 1145 } reg_state;
1146 1146
1147 u64 uverbs_cmd_mask;
1148 int uverbs_abi_ver; 1147 int uverbs_abi_ver;
1148 u64 uverbs_cmd_mask;
1149 1149
1150 char node_desc[64]; 1150 char node_desc[64];
1151 __be64 node_guid; 1151 __be64 node_guid;
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index c6b2962315b3..4fae90304648 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -67,7 +67,6 @@ enum rdma_port_space {
67 RDMA_PS_IPOIB = 0x0002, 67 RDMA_PS_IPOIB = 0x0002,
68 RDMA_PS_TCP = 0x0106, 68 RDMA_PS_TCP = 0x0106,
69 RDMA_PS_UDP = 0x0111, 69 RDMA_PS_UDP = 0x0111,
70 RDMA_PS_SCTP = 0x0183
71}; 70};
72 71
73struct rdma_addr { 72struct rdma_addr {