aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/core/ucm.c63
-rw-r--r--drivers/infiniband/core/user_mad.c173
-rw-r--r--drivers/infiniband/core/uverbs.h11
-rw-r--r--drivers/infiniband/core/uverbs_main.c175
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c15
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.h4
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_wr.h17
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.c80
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.h2
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c2
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c9
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c5
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c4
-rw-r--r--drivers/infiniband/hw/ehca/ehca_sqp.c2
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c2
-rw-r--r--drivers/infiniband/hw/nes/nes.c1
-rw-r--r--drivers/infiniband/hw/nes/nes.h9
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c11
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c484
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.h2
-rw-r--r--drivers/infiniband/hw/nes/nes_nic.c61
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c6
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c10
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c47
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h97
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c506
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c64
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c281
-rw-r--r--drivers/net/cxgb3/adapter.h5
-rw-r--r--drivers/net/cxgb3/cxgb3_main.c57
-rw-r--r--drivers/net/cxgb3/cxgb3_offload.h5
-rw-r--r--drivers/net/cxgb3/regs.h16
-rw-r--r--drivers/net/cxgb3/sge.c10
-rw-r--r--drivers/net/cxgb3/t3_hw.c5
-rw-r--r--include/rdma/ib_verbs.h4
-rw-r--r--include/rdma/rdma_cm.h1
36 files changed, 1140 insertions, 1106 deletions
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index f504c9b00c1b..1b09b735c5a8 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -1215,15 +1215,18 @@ static void ib_ucm_release_dev(struct device *dev)
1215 1215
1216 ucm_dev = container_of(dev, struct ib_ucm_device, dev); 1216 ucm_dev = container_of(dev, struct ib_ucm_device, dev);
1217 cdev_del(&ucm_dev->cdev); 1217 cdev_del(&ucm_dev->cdev);
1218 clear_bit(ucm_dev->devnum, dev_map); 1218 if (ucm_dev->devnum < IB_UCM_MAX_DEVICES)
1219 clear_bit(ucm_dev->devnum, dev_map);
1220 else
1221 clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, dev_map);
1219 kfree(ucm_dev); 1222 kfree(ucm_dev);
1220} 1223}
1221 1224
1222static const struct file_operations ucm_fops = { 1225static const struct file_operations ucm_fops = {
1223 .owner = THIS_MODULE, 1226 .owner = THIS_MODULE,
1224 .open = ib_ucm_open, 1227 .open = ib_ucm_open,
1225 .release = ib_ucm_close, 1228 .release = ib_ucm_close,
1226 .write = ib_ucm_write, 1229 .write = ib_ucm_write,
1227 .poll = ib_ucm_poll, 1230 .poll = ib_ucm_poll,
1228}; 1231};
1229 1232
@@ -1237,8 +1240,32 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
1237} 1240}
1238static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); 1241static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
1239 1242
1243static dev_t overflow_maj;
1244static DECLARE_BITMAP(overflow_map, IB_UCM_MAX_DEVICES);
1245static int find_overflow_devnum(void)
1246{
1247 int ret;
1248
1249 if (!overflow_maj) {
1250 ret = alloc_chrdev_region(&overflow_maj, 0, IB_UCM_MAX_DEVICES,
1251 "infiniband_cm");
1252 if (ret) {
1253 printk(KERN_ERR "ucm: couldn't register dynamic device number\n");
1254 return ret;
1255 }
1256 }
1257
1258 ret = find_first_zero_bit(overflow_map, IB_UCM_MAX_DEVICES);
1259 if (ret >= IB_UCM_MAX_DEVICES)
1260 return -1;
1261
1262 return ret;
1263}
1264
1240static void ib_ucm_add_one(struct ib_device *device) 1265static void ib_ucm_add_one(struct ib_device *device)
1241{ 1266{
1267 int devnum;
1268 dev_t base;
1242 struct ib_ucm_device *ucm_dev; 1269 struct ib_ucm_device *ucm_dev;
1243 1270
1244 if (!device->alloc_ucontext || 1271 if (!device->alloc_ucontext ||
@@ -1251,16 +1278,25 @@ static void ib_ucm_add_one(struct ib_device *device)
1251 1278
1252 ucm_dev->ib_dev = device; 1279 ucm_dev->ib_dev = device;
1253 1280
1254 ucm_dev->devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES); 1281 devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES);
1255 if (ucm_dev->devnum >= IB_UCM_MAX_DEVICES) 1282 if (devnum >= IB_UCM_MAX_DEVICES) {
1256 goto err; 1283 devnum = find_overflow_devnum();
1257 1284 if (devnum < 0)
1258 set_bit(ucm_dev->devnum, dev_map); 1285 goto err;
1286
1287 ucm_dev->devnum = devnum + IB_UCM_MAX_DEVICES;
1288 base = devnum + overflow_maj;
1289 set_bit(devnum, overflow_map);
1290 } else {
1291 ucm_dev->devnum = devnum;
1292 base = devnum + IB_UCM_BASE_DEV;
1293 set_bit(devnum, dev_map);
1294 }
1259 1295
1260 cdev_init(&ucm_dev->cdev, &ucm_fops); 1296 cdev_init(&ucm_dev->cdev, &ucm_fops);
1261 ucm_dev->cdev.owner = THIS_MODULE; 1297 ucm_dev->cdev.owner = THIS_MODULE;
1262 kobject_set_name(&ucm_dev->cdev.kobj, "ucm%d", ucm_dev->devnum); 1298 kobject_set_name(&ucm_dev->cdev.kobj, "ucm%d", ucm_dev->devnum);
1263 if (cdev_add(&ucm_dev->cdev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1)) 1299 if (cdev_add(&ucm_dev->cdev, base, 1))
1264 goto err; 1300 goto err;
1265 1301
1266 ucm_dev->dev.class = &cm_class; 1302 ucm_dev->dev.class = &cm_class;
@@ -1281,7 +1317,10 @@ err_dev:
1281 device_unregister(&ucm_dev->dev); 1317 device_unregister(&ucm_dev->dev);
1282err_cdev: 1318err_cdev:
1283 cdev_del(&ucm_dev->cdev); 1319 cdev_del(&ucm_dev->cdev);
1284 clear_bit(ucm_dev->devnum, dev_map); 1320 if (ucm_dev->devnum < IB_UCM_MAX_DEVICES)
1321 clear_bit(devnum, dev_map);
1322 else
1323 clear_bit(devnum, overflow_map);
1285err: 1324err:
1286 kfree(ucm_dev); 1325 kfree(ucm_dev);
1287 return; 1326 return;
@@ -1340,6 +1379,8 @@ static void __exit ib_ucm_cleanup(void)
1340 ib_unregister_client(&ucm_client); 1379 ib_unregister_client(&ucm_client);
1341 class_remove_file(&cm_class, &class_attr_abi_version); 1380 class_remove_file(&cm_class, &class_attr_abi_version);
1342 unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); 1381 unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
1382 if (overflow_maj)
1383 unregister_chrdev_region(overflow_maj, IB_UCM_MAX_DEVICES);
1343 idr_destroy(&ctx_id_table); 1384 idr_destroy(&ctx_id_table);
1344} 1385}
1345 1386
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 7de02969ed7d..02d360cfc2f7 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -65,12 +65,9 @@ enum {
65}; 65};
66 66
67/* 67/*
68 * Our lifetime rules for these structs are the following: each time a 68 * Our lifetime rules for these structs are the following:
69 * device special file is opened, we look up the corresponding struct 69 * device special file is opened, we take a reference on the
70 * ib_umad_port by minor in the umad_port[] table while holding the 70 * ib_umad_port's struct ib_umad_device. We drop these
71 * port_lock. If this lookup succeeds, we take a reference on the
72 * ib_umad_port's struct ib_umad_device while still holding the
73 * port_lock; if the lookup fails, we fail the open(). We drop these
74 * references in the corresponding close(). 71 * references in the corresponding close().
75 * 72 *
76 * In addition to references coming from open character devices, there 73 * In addition to references coming from open character devices, there
@@ -78,19 +75,14 @@ enum {
78 * module's reference taken when allocating the ib_umad_device in 75 * module's reference taken when allocating the ib_umad_device in
79 * ib_umad_add_one(). 76 * ib_umad_add_one().
80 * 77 *
81 * When destroying an ib_umad_device, we clear all of its 78 * When destroying an ib_umad_device, we drop the module's reference.
82 * ib_umad_ports from umad_port[] while holding port_lock before
83 * dropping the module's reference to the ib_umad_device. This is
84 * always safe because any open() calls will either succeed and obtain
85 * a reference before we clear the umad_port[] entries, or fail after
86 * we clear the umad_port[] entries.
87 */ 79 */
88 80
89struct ib_umad_port { 81struct ib_umad_port {
90 struct cdev *cdev; 82 struct cdev cdev;
91 struct device *dev; 83 struct device *dev;
92 84
93 struct cdev *sm_cdev; 85 struct cdev sm_cdev;
94 struct device *sm_dev; 86 struct device *sm_dev;
95 struct semaphore sm_sem; 87 struct semaphore sm_sem;
96 88
@@ -136,7 +128,6 @@ static struct class *umad_class;
136static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE); 128static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
137 129
138static DEFINE_SPINLOCK(port_lock); 130static DEFINE_SPINLOCK(port_lock);
139static struct ib_umad_port *umad_port[IB_UMAD_MAX_PORTS];
140static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS); 131static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);
141 132
142static void ib_umad_add_one(struct ib_device *device); 133static void ib_umad_add_one(struct ib_device *device);
@@ -496,8 +487,8 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
496 ah_attr.ah_flags = IB_AH_GRH; 487 ah_attr.ah_flags = IB_AH_GRH;
497 memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16); 488 memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16);
498 ah_attr.grh.sgid_index = packet->mad.hdr.gid_index; 489 ah_attr.grh.sgid_index = packet->mad.hdr.gid_index;
499 ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label); 490 ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label);
500 ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit; 491 ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit;
501 ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class; 492 ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class;
502 } 493 }
503 494
@@ -528,9 +519,9 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
528 goto err_ah; 519 goto err_ah;
529 } 520 }
530 521
531 packet->msg->ah = ah; 522 packet->msg->ah = ah;
532 packet->msg->timeout_ms = packet->mad.hdr.timeout_ms; 523 packet->msg->timeout_ms = packet->mad.hdr.timeout_ms;
533 packet->msg->retries = packet->mad.hdr.retries; 524 packet->msg->retries = packet->mad.hdr.retries;
534 packet->msg->context[0] = packet; 525 packet->msg->context[0] = packet;
535 526
536 /* Copy MAD header. Any RMPP header is already in place. */ 527 /* Copy MAD header. Any RMPP header is already in place. */
@@ -779,15 +770,11 @@ static long ib_umad_compat_ioctl(struct file *filp, unsigned int cmd,
779/* 770/*
780 * ib_umad_open() does not need the BKL: 771 * ib_umad_open() does not need the BKL:
781 * 772 *
782 * - umad_port[] accesses are protected by port_lock, the 773 * - the ib_umad_port structures are properly reference counted, and
783 * ib_umad_port structures are properly reference counted, and
784 * everything else is purely local to the file being created, so 774 * everything else is purely local to the file being created, so
785 * races against other open calls are not a problem; 775 * races against other open calls are not a problem;
786 * - the ioctl method does not affect any global state outside of the 776 * - the ioctl method does not affect any global state outside of the
787 * file structure being operated on; 777 * file structure being operated on;
788 * - the port is added to umad_port[] as the last part of module
789 * initialization so the open method will either immediately run
790 * -ENXIO, or all required initialization will be done.
791 */ 778 */
792static int ib_umad_open(struct inode *inode, struct file *filp) 779static int ib_umad_open(struct inode *inode, struct file *filp)
793{ 780{
@@ -795,13 +782,10 @@ static int ib_umad_open(struct inode *inode, struct file *filp)
795 struct ib_umad_file *file; 782 struct ib_umad_file *file;
796 int ret = 0; 783 int ret = 0;
797 784
798 spin_lock(&port_lock); 785 port = container_of(inode->i_cdev, struct ib_umad_port, cdev);
799 port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE];
800 if (port) 786 if (port)
801 kref_get(&port->umad_dev->ref); 787 kref_get(&port->umad_dev->ref);
802 spin_unlock(&port_lock); 788 else
803
804 if (!port)
805 return -ENXIO; 789 return -ENXIO;
806 790
807 mutex_lock(&port->file_mutex); 791 mutex_lock(&port->file_mutex);
@@ -872,16 +856,16 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
872} 856}
873 857
874static const struct file_operations umad_fops = { 858static const struct file_operations umad_fops = {
875 .owner = THIS_MODULE, 859 .owner = THIS_MODULE,
876 .read = ib_umad_read, 860 .read = ib_umad_read,
877 .write = ib_umad_write, 861 .write = ib_umad_write,
878 .poll = ib_umad_poll, 862 .poll = ib_umad_poll,
879 .unlocked_ioctl = ib_umad_ioctl, 863 .unlocked_ioctl = ib_umad_ioctl,
880#ifdef CONFIG_COMPAT 864#ifdef CONFIG_COMPAT
881 .compat_ioctl = ib_umad_compat_ioctl, 865 .compat_ioctl = ib_umad_compat_ioctl,
882#endif 866#endif
883 .open = ib_umad_open, 867 .open = ib_umad_open,
884 .release = ib_umad_close 868 .release = ib_umad_close
885}; 869};
886 870
887static int ib_umad_sm_open(struct inode *inode, struct file *filp) 871static int ib_umad_sm_open(struct inode *inode, struct file *filp)
@@ -892,13 +876,10 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp)
892 }; 876 };
893 int ret; 877 int ret;
894 878
895 spin_lock(&port_lock); 879 port = container_of(inode->i_cdev, struct ib_umad_port, sm_cdev);
896 port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE - IB_UMAD_MAX_PORTS];
897 if (port) 880 if (port)
898 kref_get(&port->umad_dev->ref); 881 kref_get(&port->umad_dev->ref);
899 spin_unlock(&port_lock); 882 else
900
901 if (!port)
902 return -ENXIO; 883 return -ENXIO;
903 884
904 if (filp->f_flags & O_NONBLOCK) { 885 if (filp->f_flags & O_NONBLOCK) {
@@ -949,8 +930,8 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp)
949} 930}
950 931
951static const struct file_operations umad_sm_fops = { 932static const struct file_operations umad_sm_fops = {
952 .owner = THIS_MODULE, 933 .owner = THIS_MODULE,
953 .open = ib_umad_sm_open, 934 .open = ib_umad_sm_open,
954 .release = ib_umad_sm_close 935 .release = ib_umad_sm_close
955}; 936};
956 937
@@ -990,16 +971,51 @@ static ssize_t show_abi_version(struct class *class, char *buf)
990} 971}
991static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); 972static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
992 973
974static dev_t overflow_maj;
975static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS);
976static int find_overflow_devnum(void)
977{
978 int ret;
979
980 if (!overflow_maj) {
981 ret = alloc_chrdev_region(&overflow_maj, 0, IB_UMAD_MAX_PORTS * 2,
982 "infiniband_mad");
983 if (ret) {
984 printk(KERN_ERR "user_mad: couldn't register dynamic device number\n");
985 return ret;
986 }
987 }
988
989 ret = find_first_zero_bit(overflow_map, IB_UMAD_MAX_PORTS);
990 if (ret >= IB_UMAD_MAX_PORTS)
991 return -1;
992
993 return ret;
994}
995
993static int ib_umad_init_port(struct ib_device *device, int port_num, 996static int ib_umad_init_port(struct ib_device *device, int port_num,
994 struct ib_umad_port *port) 997 struct ib_umad_port *port)
995{ 998{
999 int devnum;
1000 dev_t base;
1001
996 spin_lock(&port_lock); 1002 spin_lock(&port_lock);
997 port->dev_num = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); 1003 devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
998 if (port->dev_num >= IB_UMAD_MAX_PORTS) { 1004 if (devnum >= IB_UMAD_MAX_PORTS) {
999 spin_unlock(&port_lock); 1005 spin_unlock(&port_lock);
1000 return -1; 1006 devnum = find_overflow_devnum();
1007 if (devnum < 0)
1008 return -1;
1009
1010 spin_lock(&port_lock);
1011 port->dev_num = devnum + IB_UMAD_MAX_PORTS;
1012 base = devnum + overflow_maj;
1013 set_bit(devnum, overflow_map);
1014 } else {
1015 port->dev_num = devnum;
1016 base = devnum + base_dev;
1017 set_bit(devnum, dev_map);
1001 } 1018 }
1002 set_bit(port->dev_num, dev_map);
1003 spin_unlock(&port_lock); 1019 spin_unlock(&port_lock);
1004 1020
1005 port->ib_dev = device; 1021 port->ib_dev = device;
@@ -1008,17 +1024,14 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
1008 mutex_init(&port->file_mutex); 1024 mutex_init(&port->file_mutex);
1009 INIT_LIST_HEAD(&port->file_list); 1025 INIT_LIST_HEAD(&port->file_list);
1010 1026
1011 port->cdev = cdev_alloc(); 1027 cdev_init(&port->cdev, &umad_fops);
1012 if (!port->cdev) 1028 port->cdev.owner = THIS_MODULE;
1013 return -1; 1029 kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num);
1014 port->cdev->owner = THIS_MODULE; 1030 if (cdev_add(&port->cdev, base, 1))
1015 port->cdev->ops = &umad_fops;
1016 kobject_set_name(&port->cdev->kobj, "umad%d", port->dev_num);
1017 if (cdev_add(port->cdev, base_dev + port->dev_num, 1))
1018 goto err_cdev; 1031 goto err_cdev;
1019 1032
1020 port->dev = device_create(umad_class, device->dma_device, 1033 port->dev = device_create(umad_class, device->dma_device,
1021 port->cdev->dev, port, 1034 port->cdev.dev, port,
1022 "umad%d", port->dev_num); 1035 "umad%d", port->dev_num);
1023 if (IS_ERR(port->dev)) 1036 if (IS_ERR(port->dev))
1024 goto err_cdev; 1037 goto err_cdev;
@@ -1028,17 +1041,15 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
1028 if (device_create_file(port->dev, &dev_attr_port)) 1041 if (device_create_file(port->dev, &dev_attr_port))
1029 goto err_dev; 1042 goto err_dev;
1030 1043
1031 port->sm_cdev = cdev_alloc(); 1044 base += IB_UMAD_MAX_PORTS;
1032 if (!port->sm_cdev) 1045 cdev_init(&port->sm_cdev, &umad_sm_fops);
1033 goto err_dev; 1046 port->sm_cdev.owner = THIS_MODULE;
1034 port->sm_cdev->owner = THIS_MODULE; 1047 kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num);
1035 port->sm_cdev->ops = &umad_sm_fops; 1048 if (cdev_add(&port->sm_cdev, base, 1))
1036 kobject_set_name(&port->sm_cdev->kobj, "issm%d", port->dev_num);
1037 if (cdev_add(port->sm_cdev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1))
1038 goto err_sm_cdev; 1049 goto err_sm_cdev;
1039 1050
1040 port->sm_dev = device_create(umad_class, device->dma_device, 1051 port->sm_dev = device_create(umad_class, device->dma_device,
1041 port->sm_cdev->dev, port, 1052 port->sm_cdev.dev, port,
1042 "issm%d", port->dev_num); 1053 "issm%d", port->dev_num);
1043 if (IS_ERR(port->sm_dev)) 1054 if (IS_ERR(port->sm_dev))
1044 goto err_sm_cdev; 1055 goto err_sm_cdev;
@@ -1048,24 +1059,23 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
1048 if (device_create_file(port->sm_dev, &dev_attr_port)) 1059 if (device_create_file(port->sm_dev, &dev_attr_port))
1049 goto err_sm_dev; 1060 goto err_sm_dev;
1050 1061
1051 spin_lock(&port_lock);
1052 umad_port[port->dev_num] = port;
1053 spin_unlock(&port_lock);
1054
1055 return 0; 1062 return 0;
1056 1063
1057err_sm_dev: 1064err_sm_dev:
1058 device_destroy(umad_class, port->sm_cdev->dev); 1065 device_destroy(umad_class, port->sm_cdev.dev);
1059 1066
1060err_sm_cdev: 1067err_sm_cdev:
1061 cdev_del(port->sm_cdev); 1068 cdev_del(&port->sm_cdev);
1062 1069
1063err_dev: 1070err_dev:
1064 device_destroy(umad_class, port->cdev->dev); 1071 device_destroy(umad_class, port->cdev.dev);
1065 1072
1066err_cdev: 1073err_cdev:
1067 cdev_del(port->cdev); 1074 cdev_del(&port->cdev);
1068 clear_bit(port->dev_num, dev_map); 1075 if (port->dev_num < IB_UMAD_MAX_PORTS)
1076 clear_bit(devnum, dev_map);
1077 else
1078 clear_bit(devnum, overflow_map);
1069 1079
1070 return -1; 1080 return -1;
1071} 1081}
@@ -1079,15 +1089,11 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
1079 dev_set_drvdata(port->dev, NULL); 1089 dev_set_drvdata(port->dev, NULL);
1080 dev_set_drvdata(port->sm_dev, NULL); 1090 dev_set_drvdata(port->sm_dev, NULL);
1081 1091
1082 device_destroy(umad_class, port->cdev->dev); 1092 device_destroy(umad_class, port->cdev.dev);
1083 device_destroy(umad_class, port->sm_cdev->dev); 1093 device_destroy(umad_class, port->sm_cdev.dev);
1084 1094
1085 cdev_del(port->cdev); 1095 cdev_del(&port->cdev);
1086 cdev_del(port->sm_cdev); 1096 cdev_del(&port->sm_cdev);
1087
1088 spin_lock(&port_lock);
1089 umad_port[port->dev_num] = NULL;
1090 spin_unlock(&port_lock);
1091 1097
1092 mutex_lock(&port->file_mutex); 1098 mutex_lock(&port->file_mutex);
1093 1099
@@ -1106,7 +1112,10 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
1106 1112
1107 mutex_unlock(&port->file_mutex); 1113 mutex_unlock(&port->file_mutex);
1108 1114
1109 clear_bit(port->dev_num, dev_map); 1115 if (port->dev_num < IB_UMAD_MAX_PORTS)
1116 clear_bit(port->dev_num, dev_map);
1117 else
1118 clear_bit(port->dev_num - IB_UMAD_MAX_PORTS, overflow_map);
1110} 1119}
1111 1120
1112static void ib_umad_add_one(struct ib_device *device) 1121static void ib_umad_add_one(struct ib_device *device)
@@ -1214,6 +1223,8 @@ static void __exit ib_umad_cleanup(void)
1214 ib_unregister_client(&umad_client); 1223 ib_unregister_client(&umad_client);
1215 class_destroy(umad_class); 1224 class_destroy(umad_class);
1216 unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); 1225 unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
1226 if (overflow_maj)
1227 unregister_chrdev_region(overflow_maj, IB_UMAD_MAX_PORTS * 2);
1217} 1228}
1218 1229
1219module_init(ib_umad_init); 1230module_init(ib_umad_init);
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index b3ea9587dc80..e54d9ac6d1ca 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -41,6 +41,7 @@
41#include <linux/idr.h> 41#include <linux/idr.h>
42#include <linux/mutex.h> 42#include <linux/mutex.h>
43#include <linux/completion.h> 43#include <linux/completion.h>
44#include <linux/cdev.h>
44 45
45#include <rdma/ib_verbs.h> 46#include <rdma/ib_verbs.h>
46#include <rdma/ib_umem.h> 47#include <rdma/ib_umem.h>
@@ -69,23 +70,23 @@
69 70
70struct ib_uverbs_device { 71struct ib_uverbs_device {
71 struct kref ref; 72 struct kref ref;
73 int num_comp_vectors;
72 struct completion comp; 74 struct completion comp;
73 int devnum;
74 struct cdev *cdev;
75 struct device *dev; 75 struct device *dev;
76 struct ib_device *ib_dev; 76 struct ib_device *ib_dev;
77 int num_comp_vectors; 77 int devnum;
78 struct cdev cdev;
78}; 79};
79 80
80struct ib_uverbs_event_file { 81struct ib_uverbs_event_file {
81 struct kref ref; 82 struct kref ref;
83 int is_async;
82 struct ib_uverbs_file *uverbs_file; 84 struct ib_uverbs_file *uverbs_file;
83 spinlock_t lock; 85 spinlock_t lock;
86 int is_closed;
84 wait_queue_head_t poll_wait; 87 wait_queue_head_t poll_wait;
85 struct fasync_struct *async_queue; 88 struct fasync_struct *async_queue;
86 struct list_head event_list; 89 struct list_head event_list;
87 int is_async;
88 int is_closed;
89}; 90};
90 91
91struct ib_uverbs_file { 92struct ib_uverbs_file {
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 5f284ffd430e..dbf04511cf0a 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -43,7 +43,6 @@
43#include <linux/sched.h> 43#include <linux/sched.h>
44#include <linux/file.h> 44#include <linux/file.h>
45#include <linux/mount.h> 45#include <linux/mount.h>
46#include <linux/cdev.h>
47 46
48#include <asm/uaccess.h> 47#include <asm/uaccess.h>
49 48
@@ -75,40 +74,39 @@ DEFINE_IDR(ib_uverbs_qp_idr);
75DEFINE_IDR(ib_uverbs_srq_idr); 74DEFINE_IDR(ib_uverbs_srq_idr);
76 75
77static DEFINE_SPINLOCK(map_lock); 76static DEFINE_SPINLOCK(map_lock);
78static struct ib_uverbs_device *dev_table[IB_UVERBS_MAX_DEVICES];
79static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); 77static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
80 78
81static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, 79static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
82 const char __user *buf, int in_len, 80 const char __user *buf, int in_len,
83 int out_len) = { 81 int out_len) = {
84 [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, 82 [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
85 [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, 83 [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
86 [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, 84 [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
87 [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, 85 [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
88 [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, 86 [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
89 [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, 87 [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
90 [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, 88 [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
91 [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel, 89 [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
92 [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, 90 [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
93 [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq, 91 [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq,
94 [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq, 92 [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq,
95 [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq, 93 [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq,
96 [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, 94 [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
97 [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, 95 [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
98 [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp, 96 [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp,
99 [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, 97 [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
100 [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, 98 [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
101 [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send, 99 [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send,
102 [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv, 100 [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv,
103 [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv, 101 [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv,
104 [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah, 102 [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah,
105 [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah, 103 [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah,
106 [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, 104 [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
107 [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, 105 [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
108 [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, 106 [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq,
109 [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, 107 [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
110 [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq, 108 [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq,
111 [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, 109 [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
112}; 110};
113 111
114static struct vfsmount *uverbs_event_mnt; 112static struct vfsmount *uverbs_event_mnt;
@@ -370,7 +368,7 @@ static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
370 368
371static const struct file_operations uverbs_event_fops = { 369static const struct file_operations uverbs_event_fops = {
372 .owner = THIS_MODULE, 370 .owner = THIS_MODULE,
373 .read = ib_uverbs_event_read, 371 .read = ib_uverbs_event_read,
374 .poll = ib_uverbs_event_poll, 372 .poll = ib_uverbs_event_poll,
375 .release = ib_uverbs_event_close, 373 .release = ib_uverbs_event_close,
376 .fasync = ib_uverbs_event_fasync 374 .fasync = ib_uverbs_event_fasync
@@ -617,14 +615,12 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
617/* 615/*
618 * ib_uverbs_open() does not need the BKL: 616 * ib_uverbs_open() does not need the BKL:
619 * 617 *
620 * - dev_table[] accesses are protected by map_lock, the 618 * - the ib_uverbs_device structures are properly reference counted and
621 * ib_uverbs_device structures are properly reference counted, and
622 * everything else is purely local to the file being created, so 619 * everything else is purely local to the file being created, so
623 * races against other open calls are not a problem; 620 * races against other open calls are not a problem;
624 * - there is no ioctl method to race against; 621 * - there is no ioctl method to race against;
625 * - the device is added to dev_table[] as the last part of module 622 * - the open method will either immediately run -ENXIO, or all
626 * initialization, the open method will either immediately run 623 * required initialization will be done.
627 * -ENXIO, or all required initialization will be done.
628 */ 624 */
629static int ib_uverbs_open(struct inode *inode, struct file *filp) 625static int ib_uverbs_open(struct inode *inode, struct file *filp)
630{ 626{
@@ -632,13 +628,10 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
632 struct ib_uverbs_file *file; 628 struct ib_uverbs_file *file;
633 int ret; 629 int ret;
634 630
635 spin_lock(&map_lock); 631 dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev);
636 dev = dev_table[iminor(inode) - IB_UVERBS_BASE_MINOR];
637 if (dev) 632 if (dev)
638 kref_get(&dev->ref); 633 kref_get(&dev->ref);
639 spin_unlock(&map_lock); 634 else
640
641 if (!dev)
642 return -ENXIO; 635 return -ENXIO;
643 636
644 if (!try_module_get(dev->ib_dev->owner)) { 637 if (!try_module_get(dev->ib_dev->owner)) {
@@ -685,17 +678,17 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
685} 678}
686 679
687static const struct file_operations uverbs_fops = { 680static const struct file_operations uverbs_fops = {
688 .owner = THIS_MODULE, 681 .owner = THIS_MODULE,
689 .write = ib_uverbs_write, 682 .write = ib_uverbs_write,
690 .open = ib_uverbs_open, 683 .open = ib_uverbs_open,
691 .release = ib_uverbs_close 684 .release = ib_uverbs_close
692}; 685};
693 686
694static const struct file_operations uverbs_mmap_fops = { 687static const struct file_operations uverbs_mmap_fops = {
695 .owner = THIS_MODULE, 688 .owner = THIS_MODULE,
696 .write = ib_uverbs_write, 689 .write = ib_uverbs_write,
697 .mmap = ib_uverbs_mmap, 690 .mmap = ib_uverbs_mmap,
698 .open = ib_uverbs_open, 691 .open = ib_uverbs_open,
699 .release = ib_uverbs_close 692 .release = ib_uverbs_close
700}; 693};
701 694
@@ -735,8 +728,38 @@ static ssize_t show_abi_version(struct class *class, char *buf)
735} 728}
736static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); 729static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
737 730
731static dev_t overflow_maj;
732static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES);
733
734/*
735 * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by
736 * requesting a new major number and doubling the number of max devices we
737 * support. It's stupid, but simple.
738 */
739static int find_overflow_devnum(void)
740{
741 int ret;
742
743 if (!overflow_maj) {
744 ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES,
745 "infiniband_verbs");
746 if (ret) {
747 printk(KERN_ERR "user_verbs: couldn't register dynamic device number\n");
748 return ret;
749 }
750 }
751
752 ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES);
753 if (ret >= IB_UVERBS_MAX_DEVICES)
754 return -1;
755
756 return ret;
757}
758
738static void ib_uverbs_add_one(struct ib_device *device) 759static void ib_uverbs_add_one(struct ib_device *device)
739{ 760{
761 int devnum;
762 dev_t base;
740 struct ib_uverbs_device *uverbs_dev; 763 struct ib_uverbs_device *uverbs_dev;
741 764
742 if (!device->alloc_ucontext) 765 if (!device->alloc_ucontext)
@@ -750,28 +773,36 @@ static void ib_uverbs_add_one(struct ib_device *device)
750 init_completion(&uverbs_dev->comp); 773 init_completion(&uverbs_dev->comp);
751 774
752 spin_lock(&map_lock); 775 spin_lock(&map_lock);
753 uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); 776 devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
754 if (uverbs_dev->devnum >= IB_UVERBS_MAX_DEVICES) { 777 if (devnum >= IB_UVERBS_MAX_DEVICES) {
755 spin_unlock(&map_lock); 778 spin_unlock(&map_lock);
756 goto err; 779 devnum = find_overflow_devnum();
780 if (devnum < 0)
781 goto err;
782
783 spin_lock(&map_lock);
784 uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES;
785 base = devnum + overflow_maj;
786 set_bit(devnum, overflow_map);
787 } else {
788 uverbs_dev->devnum = devnum;
789 base = devnum + IB_UVERBS_BASE_DEV;
790 set_bit(devnum, dev_map);
757 } 791 }
758 set_bit(uverbs_dev->devnum, dev_map);
759 spin_unlock(&map_lock); 792 spin_unlock(&map_lock);
760 793
761 uverbs_dev->ib_dev = device; 794 uverbs_dev->ib_dev = device;
762 uverbs_dev->num_comp_vectors = device->num_comp_vectors; 795 uverbs_dev->num_comp_vectors = device->num_comp_vectors;
763 796
764 uverbs_dev->cdev = cdev_alloc(); 797 cdev_init(&uverbs_dev->cdev, NULL);
765 if (!uverbs_dev->cdev) 798 uverbs_dev->cdev.owner = THIS_MODULE;
766 goto err; 799 uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
767 uverbs_dev->cdev->owner = THIS_MODULE; 800 kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
768 uverbs_dev->cdev->ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; 801 if (cdev_add(&uverbs_dev->cdev, base, 1))
769 kobject_set_name(&uverbs_dev->cdev->kobj, "uverbs%d", uverbs_dev->devnum);
770 if (cdev_add(uverbs_dev->cdev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1))
771 goto err_cdev; 802 goto err_cdev;
772 803
773 uverbs_dev->dev = device_create(uverbs_class, device->dma_device, 804 uverbs_dev->dev = device_create(uverbs_class, device->dma_device,
774 uverbs_dev->cdev->dev, uverbs_dev, 805 uverbs_dev->cdev.dev, uverbs_dev,
775 "uverbs%d", uverbs_dev->devnum); 806 "uverbs%d", uverbs_dev->devnum);
776 if (IS_ERR(uverbs_dev->dev)) 807 if (IS_ERR(uverbs_dev->dev))
777 goto err_cdev; 808 goto err_cdev;
@@ -781,20 +812,19 @@ static void ib_uverbs_add_one(struct ib_device *device)
781 if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version)) 812 if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
782 goto err_class; 813 goto err_class;
783 814
784 spin_lock(&map_lock);
785 dev_table[uverbs_dev->devnum] = uverbs_dev;
786 spin_unlock(&map_lock);
787
788 ib_set_client_data(device, &uverbs_client, uverbs_dev); 815 ib_set_client_data(device, &uverbs_client, uverbs_dev);
789 816
790 return; 817 return;
791 818
792err_class: 819err_class:
793 device_destroy(uverbs_class, uverbs_dev->cdev->dev); 820 device_destroy(uverbs_class, uverbs_dev->cdev.dev);
794 821
795err_cdev: 822err_cdev:
796 cdev_del(uverbs_dev->cdev); 823 cdev_del(&uverbs_dev->cdev);
797 clear_bit(uverbs_dev->devnum, dev_map); 824 if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
825 clear_bit(devnum, dev_map);
826 else
827 clear_bit(devnum, overflow_map);
798 828
799err: 829err:
800 kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); 830 kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
@@ -811,14 +841,13 @@ static void ib_uverbs_remove_one(struct ib_device *device)
811 return; 841 return;
812 842
813 dev_set_drvdata(uverbs_dev->dev, NULL); 843 dev_set_drvdata(uverbs_dev->dev, NULL);
814 device_destroy(uverbs_class, uverbs_dev->cdev->dev); 844 device_destroy(uverbs_class, uverbs_dev->cdev.dev);
815 cdev_del(uverbs_dev->cdev); 845 cdev_del(&uverbs_dev->cdev);
816 846
817 spin_lock(&map_lock); 847 if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
818 dev_table[uverbs_dev->devnum] = NULL; 848 clear_bit(uverbs_dev->devnum, dev_map);
819 spin_unlock(&map_lock); 849 else
820 850 clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
821 clear_bit(uverbs_dev->devnum, dev_map);
822 851
823 kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); 852 kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
824 wait_for_completion(&uverbs_dev->comp); 853 wait_for_completion(&uverbs_dev->comp);
@@ -908,6 +937,8 @@ static void __exit ib_uverbs_cleanup(void)
908 unregister_filesystem(&uverbs_event_fs); 937 unregister_filesystem(&uverbs_event_fs);
909 class_destroy(uverbs_class); 938 class_destroy(uverbs_class);
910 unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); 939 unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
940 if (overflow_maj)
941 unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES);
911 idr_destroy(&ib_uverbs_pd_idr); 942 idr_destroy(&ib_uverbs_pd_idr);
912 idr_destroy(&ib_uverbs_mr_idr); 943 idr_destroy(&ib_uverbs_mr_idr);
913 idr_destroy(&ib_uverbs_mw_idr); 944 idr_destroy(&ib_uverbs_mw_idr);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index 0677fc7dfd51..a28e862f2d68 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -109,7 +109,6 @@ int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq,
109 while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) { 109 while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) {
110 udelay(1); 110 udelay(1);
111 if (i++ > 1000000) { 111 if (i++ > 1000000) {
112 BUG_ON(1);
113 printk(KERN_ERR "%s: stalled rnic\n", 112 printk(KERN_ERR "%s: stalled rnic\n",
114 rdev_p->dev_name); 113 rdev_p->dev_name);
115 return -EIO; 114 return -EIO;
@@ -155,7 +154,7 @@ static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
155 return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); 154 return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb);
156} 155}
157 156
158int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) 157int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel)
159{ 158{
160 struct rdma_cq_setup setup; 159 struct rdma_cq_setup setup;
161 int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe); 160 int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe);
@@ -163,12 +162,12 @@ int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
163 cq->cqid = cxio_hal_get_cqid(rdev_p->rscp); 162 cq->cqid = cxio_hal_get_cqid(rdev_p->rscp);
164 if (!cq->cqid) 163 if (!cq->cqid)
165 return -ENOMEM; 164 return -ENOMEM;
166 cq->sw_queue = kzalloc(size, GFP_KERNEL); 165 if (kernel) {
167 if (!cq->sw_queue) 166 cq->sw_queue = kzalloc(size, GFP_KERNEL);
168 return -ENOMEM; 167 if (!cq->sw_queue)
169 cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), 168 return -ENOMEM;
170 (1UL << (cq->size_log2)) * 169 }
171 sizeof(struct t3_cqe), 170 cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), size,
172 &(cq->dma_addr), GFP_KERNEL); 171 &(cq->dma_addr), GFP_KERNEL);
173 if (!cq->queue) { 172 if (!cq->queue) {
174 kfree(cq->sw_queue); 173 kfree(cq->sw_queue);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
index f3d440cc68f2..073373c2c560 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -53,7 +53,7 @@
53#define T3_MAX_PBL_SIZE 256 53#define T3_MAX_PBL_SIZE 256
54#define T3_MAX_RQ_SIZE 1024 54#define T3_MAX_RQ_SIZE 1024
55#define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1) 55#define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1)
56#define T3_MAX_CQ_DEPTH 8192 56#define T3_MAX_CQ_DEPTH 262144
57#define T3_MAX_NUM_STAG (1<<15) 57#define T3_MAX_NUM_STAG (1<<15)
58#define T3_MAX_MR_SIZE 0x100000000ULL 58#define T3_MAX_MR_SIZE 0x100000000ULL
59#define T3_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */ 59#define T3_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */
@@ -157,7 +157,7 @@ int cxio_rdev_open(struct cxio_rdev *rdev);
157void cxio_rdev_close(struct cxio_rdev *rdev); 157void cxio_rdev_close(struct cxio_rdev *rdev);
158int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq, 158int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq,
159 enum t3_cq_opcode op, u32 credit); 159 enum t3_cq_opcode op, u32 credit);
160int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq); 160int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq, int kernel);
161int cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq); 161int cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
162int cxio_resize_cq(struct cxio_rdev *rdev, struct t3_cq *cq); 162int cxio_resize_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
163void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx); 163void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
index a197a5b7ac7f..15073b2da1c5 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_wr.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h
@@ -730,7 +730,22 @@ struct t3_cq {
730 730
731static inline void cxio_set_wq_in_error(struct t3_wq *wq) 731static inline void cxio_set_wq_in_error(struct t3_wq *wq)
732{ 732{
733 wq->queue->wq_in_err.err = 1; 733 wq->queue->wq_in_err.err |= 1;
734}
735
736static inline void cxio_disable_wq_db(struct t3_wq *wq)
737{
738 wq->queue->wq_in_err.err |= 2;
739}
740
741static inline void cxio_enable_wq_db(struct t3_wq *wq)
742{
743 wq->queue->wq_in_err.err &= ~2;
744}
745
746static inline int cxio_wq_db_enabled(struct t3_wq *wq)
747{
748 return !(wq->queue->wq_in_err.err & 2);
734} 749}
735 750
736static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq) 751static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq)
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
index b0ea0105ddf6..ee1d8b4d4541 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.c
+++ b/drivers/infiniband/hw/cxgb3/iwch.c
@@ -65,6 +65,46 @@ struct cxgb3_client t3c_client = {
65static LIST_HEAD(dev_list); 65static LIST_HEAD(dev_list);
66static DEFINE_MUTEX(dev_mutex); 66static DEFINE_MUTEX(dev_mutex);
67 67
68static int disable_qp_db(int id, void *p, void *data)
69{
70 struct iwch_qp *qhp = p;
71
72 cxio_disable_wq_db(&qhp->wq);
73 return 0;
74}
75
76static int enable_qp_db(int id, void *p, void *data)
77{
78 struct iwch_qp *qhp = p;
79
80 if (data)
81 ring_doorbell(qhp->rhp->rdev.ctrl_qp.doorbell, qhp->wq.qpid);
82 cxio_enable_wq_db(&qhp->wq);
83 return 0;
84}
85
86static void disable_dbs(struct iwch_dev *rnicp)
87{
88 spin_lock_irq(&rnicp->lock);
89 idr_for_each(&rnicp->qpidr, disable_qp_db, NULL);
90 spin_unlock_irq(&rnicp->lock);
91}
92
93static void enable_dbs(struct iwch_dev *rnicp, int ring_db)
94{
95 spin_lock_irq(&rnicp->lock);
96 idr_for_each(&rnicp->qpidr, enable_qp_db,
97 (void *)(unsigned long)ring_db);
98 spin_unlock_irq(&rnicp->lock);
99}
100
101static void iwch_db_drop_task(struct work_struct *work)
102{
103 struct iwch_dev *rnicp = container_of(work, struct iwch_dev,
104 db_drop_task.work);
105 enable_dbs(rnicp, 1);
106}
107
68static void rnic_init(struct iwch_dev *rnicp) 108static void rnic_init(struct iwch_dev *rnicp)
69{ 109{
70 PDBG("%s iwch_dev %p\n", __func__, rnicp); 110 PDBG("%s iwch_dev %p\n", __func__, rnicp);
@@ -72,6 +112,7 @@ static void rnic_init(struct iwch_dev *rnicp)
72 idr_init(&rnicp->qpidr); 112 idr_init(&rnicp->qpidr);
73 idr_init(&rnicp->mmidr); 113 idr_init(&rnicp->mmidr);
74 spin_lock_init(&rnicp->lock); 114 spin_lock_init(&rnicp->lock);
115 INIT_DELAYED_WORK(&rnicp->db_drop_task, iwch_db_drop_task);
75 116
76 rnicp->attr.max_qps = T3_MAX_NUM_QP - 32; 117 rnicp->attr.max_qps = T3_MAX_NUM_QP - 32;
77 rnicp->attr.max_wrs = T3_MAX_QP_DEPTH; 118 rnicp->attr.max_wrs = T3_MAX_QP_DEPTH;
@@ -147,6 +188,8 @@ static void close_rnic_dev(struct t3cdev *tdev)
147 mutex_lock(&dev_mutex); 188 mutex_lock(&dev_mutex);
148 list_for_each_entry_safe(dev, tmp, &dev_list, entry) { 189 list_for_each_entry_safe(dev, tmp, &dev_list, entry) {
149 if (dev->rdev.t3cdev_p == tdev) { 190 if (dev->rdev.t3cdev_p == tdev) {
191 dev->rdev.flags = CXIO_ERROR_FATAL;
192 cancel_delayed_work_sync(&dev->db_drop_task);
150 list_del(&dev->entry); 193 list_del(&dev->entry);
151 iwch_unregister_device(dev); 194 iwch_unregister_device(dev);
152 cxio_rdev_close(&dev->rdev); 195 cxio_rdev_close(&dev->rdev);
@@ -165,7 +208,8 @@ static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id)
165 struct cxio_rdev *rdev = tdev->ulp; 208 struct cxio_rdev *rdev = tdev->ulp;
166 struct iwch_dev *rnicp; 209 struct iwch_dev *rnicp;
167 struct ib_event event; 210 struct ib_event event;
168 u32 portnum = port_id + 1; 211 u32 portnum = port_id + 1;
212 int dispatch = 0;
169 213
170 if (!rdev) 214 if (!rdev)
171 return; 215 return;
@@ -174,21 +218,49 @@ static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id)
174 case OFFLOAD_STATUS_DOWN: { 218 case OFFLOAD_STATUS_DOWN: {
175 rdev->flags = CXIO_ERROR_FATAL; 219 rdev->flags = CXIO_ERROR_FATAL;
176 event.event = IB_EVENT_DEVICE_FATAL; 220 event.event = IB_EVENT_DEVICE_FATAL;
221 dispatch = 1;
177 break; 222 break;
178 } 223 }
179 case OFFLOAD_PORT_DOWN: { 224 case OFFLOAD_PORT_DOWN: {
180 event.event = IB_EVENT_PORT_ERR; 225 event.event = IB_EVENT_PORT_ERR;
226 dispatch = 1;
181 break; 227 break;
182 } 228 }
183 case OFFLOAD_PORT_UP: { 229 case OFFLOAD_PORT_UP: {
184 event.event = IB_EVENT_PORT_ACTIVE; 230 event.event = IB_EVENT_PORT_ACTIVE;
231 dispatch = 1;
232 break;
233 }
234 case OFFLOAD_DB_FULL: {
235 disable_dbs(rnicp);
236 break;
237 }
238 case OFFLOAD_DB_EMPTY: {
239 enable_dbs(rnicp, 1);
240 break;
241 }
242 case OFFLOAD_DB_DROP: {
243 unsigned long delay = 1000;
244 unsigned short r;
245
246 disable_dbs(rnicp);
247 get_random_bytes(&r, 2);
248 delay += r & 1023;
249
250 /*
251 * delay is between 1000-2023 usecs.
252 */
253 schedule_delayed_work(&rnicp->db_drop_task,
254 usecs_to_jiffies(delay));
185 break; 255 break;
186 } 256 }
187 } 257 }
188 258
189 event.device = &rnicp->ibdev; 259 if (dispatch) {
190 event.element.port_num = portnum; 260 event.device = &rnicp->ibdev;
191 ib_dispatch_event(&event); 261 event.element.port_num = portnum;
262 ib_dispatch_event(&event);
263 }
192 264
193 return; 265 return;
194} 266}
diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h
index 84735506333f..a1c44578e039 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.h
+++ b/drivers/infiniband/hw/cxgb3/iwch.h
@@ -36,6 +36,7 @@
36#include <linux/list.h> 36#include <linux/list.h>
37#include <linux/spinlock.h> 37#include <linux/spinlock.h>
38#include <linux/idr.h> 38#include <linux/idr.h>
39#include <linux/workqueue.h>
39 40
40#include <rdma/ib_verbs.h> 41#include <rdma/ib_verbs.h>
41 42
@@ -110,6 +111,7 @@ struct iwch_dev {
110 struct idr mmidr; 111 struct idr mmidr;
111 spinlock_t lock; 112 spinlock_t lock;
112 struct list_head entry; 113 struct list_head entry;
114 struct delayed_work db_drop_task;
113}; 115};
114 116
115static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev) 117static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev)
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index ed7175549ebd..47b35c6608d2 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -187,7 +187,7 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int ve
187 entries = roundup_pow_of_two(entries); 187 entries = roundup_pow_of_two(entries);
188 chp->cq.size_log2 = ilog2(entries); 188 chp->cq.size_log2 = ilog2(entries);
189 189
190 if (cxio_create_cq(&rhp->rdev, &chp->cq)) { 190 if (cxio_create_cq(&rhp->rdev, &chp->cq, !ucontext)) {
191 kfree(chp); 191 kfree(chp);
192 return ERR_PTR(-ENOMEM); 192 return ERR_PTR(-ENOMEM);
193 } 193 }
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 3eb8cecf81d7..b4d893de3650 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -452,7 +452,8 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
452 ++(qhp->wq.sq_wptr); 452 ++(qhp->wq.sq_wptr);
453 } 453 }
454 spin_unlock_irqrestore(&qhp->lock, flag); 454 spin_unlock_irqrestore(&qhp->lock, flag);
455 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); 455 if (cxio_wq_db_enabled(&qhp->wq))
456 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
456 457
457out: 458out:
458 if (err) 459 if (err)
@@ -514,7 +515,8 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
514 num_wrs--; 515 num_wrs--;
515 } 516 }
516 spin_unlock_irqrestore(&qhp->lock, flag); 517 spin_unlock_irqrestore(&qhp->lock, flag);
517 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); 518 if (cxio_wq_db_enabled(&qhp->wq))
519 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
518 520
519out: 521out:
520 if (err) 522 if (err)
@@ -597,7 +599,8 @@ int iwch_bind_mw(struct ib_qp *qp,
597 ++(qhp->wq.sq_wptr); 599 ++(qhp->wq.sq_wptr);
598 spin_unlock_irqrestore(&qhp->lock, flag); 600 spin_unlock_irqrestore(&qhp->lock, flag);
599 601
600 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); 602 if (cxio_wq_db_enabled(&qhp->wq))
603 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
601 604
602 return err; 605 return err;
603} 606}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index 42be0b15084b..b2b6fea2b141 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -548,11 +548,10 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq)
548 struct ehca_eq *eq = &shca->eq; 548 struct ehca_eq *eq = &shca->eq;
549 struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache; 549 struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
550 u64 eqe_value, ret; 550 u64 eqe_value, ret;
551 unsigned long flags;
552 int eqe_cnt, i; 551 int eqe_cnt, i;
553 int eq_empty = 0; 552 int eq_empty = 0;
554 553
555 spin_lock_irqsave(&eq->irq_spinlock, flags); 554 spin_lock(&eq->irq_spinlock);
556 if (is_irq) { 555 if (is_irq) {
557 const int max_query_cnt = 100; 556 const int max_query_cnt = 100;
558 int query_cnt = 0; 557 int query_cnt = 0;
@@ -643,7 +642,7 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq)
643 } while (1); 642 } while (1);
644 643
645unlock_irq_spinlock: 644unlock_irq_spinlock:
646 spin_unlock_irqrestore(&eq->irq_spinlock, flags); 645 spin_unlock(&eq->irq_spinlock);
647} 646}
648 647
649void ehca_tasklet_eq(unsigned long data) 648void ehca_tasklet_eq(unsigned long data)
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index 0338f1fabe8a..b105f664d3ef 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -55,9 +55,7 @@ static struct kmem_cache *qp_cache;
55/* 55/*
56 * attributes not supported by query qp 56 * attributes not supported by query qp
57 */ 57 */
58#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_MAX_DEST_RD_ATOMIC | \ 58#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_ACCESS_FLAGS | \
59 IB_QP_MAX_QP_RD_ATOMIC | \
60 IB_QP_ACCESS_FLAGS | \
61 IB_QP_EN_SQD_ASYNC_NOTIFY) 59 IB_QP_EN_SQD_ASYNC_NOTIFY)
62 60
63/* 61/*
diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c
index 8c1213f8916a..dba8f9f8b996 100644
--- a/drivers/infiniband/hw/ehca/ehca_sqp.c
+++ b/drivers/infiniband/hw/ehca/ehca_sqp.c
@@ -222,7 +222,7 @@ int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
222{ 222{
223 int ret; 223 int ret;
224 224
225 if (!port_num || port_num > ibdev->phys_port_cnt) 225 if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc)
226 return IB_MAD_RESULT_FAILURE; 226 return IB_MAD_RESULT_FAILURE;
227 227
228 /* accept only pma request */ 228 /* accept only pma request */
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 2a97c964b9ef..b377671264e9 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1214,7 +1214,7 @@ out:
1214static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, 1214static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1215 void *wqe, unsigned *mlx_seg_len) 1215 void *wqe, unsigned *mlx_seg_len)
1216{ 1216{
1217 struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev; 1217 struct ib_device *ib_dev = sqp->qp.ibqp.device;
1218 struct mlx4_wqe_mlx_seg *mlx = wqe; 1218 struct mlx4_wqe_mlx_seg *mlx = wqe;
1219 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; 1219 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
1220 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); 1220 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index b9d09bafd6c1..4272c52e38a4 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -110,6 +110,7 @@ static unsigned int sysfs_idx_addr;
110 110
111static struct pci_device_id nes_pci_table[] = { 111static struct pci_device_id nes_pci_table[] = {
112 {PCI_VENDOR_ID_NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020, PCI_ANY_ID, PCI_ANY_ID}, 112 {PCI_VENDOR_ID_NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020, PCI_ANY_ID, PCI_ANY_ID},
113 {PCI_VENDOR_ID_NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020_KR, PCI_ANY_ID, PCI_ANY_ID},
113 {0} 114 {0}
114}; 115};
115 116
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index 98840564bb2f..cc78fee1dd51 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -64,8 +64,9 @@
64 * NetEffect PCI vendor id and NE010 PCI device id. 64 * NetEffect PCI vendor id and NE010 PCI device id.
65 */ 65 */
66#ifndef PCI_VENDOR_ID_NETEFFECT /* not in pci.ids yet */ 66#ifndef PCI_VENDOR_ID_NETEFFECT /* not in pci.ids yet */
67#define PCI_VENDOR_ID_NETEFFECT 0x1678 67#define PCI_VENDOR_ID_NETEFFECT 0x1678
68#define PCI_DEVICE_ID_NETEFFECT_NE020 0x0100 68#define PCI_DEVICE_ID_NETEFFECT_NE020 0x0100
69#define PCI_DEVICE_ID_NETEFFECT_NE020_KR 0x0110
69#endif 70#endif
70 71
71#define NE020_REV 4 72#define NE020_REV 4
@@ -193,8 +194,8 @@ extern u32 cm_packets_created;
193extern u32 cm_packets_received; 194extern u32 cm_packets_received;
194extern u32 cm_packets_dropped; 195extern u32 cm_packets_dropped;
195extern u32 cm_packets_retrans; 196extern u32 cm_packets_retrans;
196extern u32 cm_listens_created; 197extern atomic_t cm_listens_created;
197extern u32 cm_listens_destroyed; 198extern atomic_t cm_listens_destroyed;
198extern u32 cm_backlog_drops; 199extern u32 cm_backlog_drops;
199extern atomic_t cm_loopbacks; 200extern atomic_t cm_loopbacks;
200extern atomic_t cm_nodes_created; 201extern atomic_t cm_nodes_created;
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 39468c277036..2a49ee40b520 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -67,8 +67,8 @@ u32 cm_packets_dropped;
67u32 cm_packets_retrans; 67u32 cm_packets_retrans;
68u32 cm_packets_created; 68u32 cm_packets_created;
69u32 cm_packets_received; 69u32 cm_packets_received;
70u32 cm_listens_created; 70atomic_t cm_listens_created;
71u32 cm_listens_destroyed; 71atomic_t cm_listens_destroyed;
72u32 cm_backlog_drops; 72u32 cm_backlog_drops;
73atomic_t cm_loopbacks; 73atomic_t cm_loopbacks;
74atomic_t cm_nodes_created; 74atomic_t cm_nodes_created;
@@ -1011,9 +1011,10 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
1011 event.cm_info.loc_port = 1011 event.cm_info.loc_port =
1012 loopback->loc_port; 1012 loopback->loc_port;
1013 event.cm_info.cm_id = loopback->cm_id; 1013 event.cm_info.cm_id = loopback->cm_id;
1014 add_ref_cm_node(loopback);
1015 loopback->state = NES_CM_STATE_CLOSED;
1014 cm_event_connect_error(&event); 1016 cm_event_connect_error(&event);
1015 cm_node->state = NES_CM_STATE_LISTENER_DESTROYED; 1017 cm_node->state = NES_CM_STATE_LISTENER_DESTROYED;
1016 loopback->state = NES_CM_STATE_CLOSED;
1017 1018
1018 rem_ref_cm_node(cm_node->cm_core, 1019 rem_ref_cm_node(cm_node->cm_core,
1019 cm_node); 1020 cm_node);
@@ -1042,7 +1043,7 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
1042 kfree(listener); 1043 kfree(listener);
1043 listener = NULL; 1044 listener = NULL;
1044 ret = 0; 1045 ret = 0;
1045 cm_listens_destroyed++; 1046 atomic_inc(&cm_listens_destroyed);
1046 } else { 1047 } else {
1047 spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); 1048 spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
1048 } 1049 }
@@ -3172,7 +3173,7 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
3172 g_cm_core->api->stop_listener(g_cm_core, (void *)cm_node); 3173 g_cm_core->api->stop_listener(g_cm_core, (void *)cm_node);
3173 return err; 3174 return err;
3174 } 3175 }
3175 cm_listens_created++; 3176 atomic_inc(&cm_listens_created);
3176 } 3177 }
3177 3178
3178 cm_id->add_ref(cm_id); 3179 cm_id->add_ref(cm_id);
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index b1c2cbb88f09..ce7f53833577 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -748,16 +748,28 @@ static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count,
748 748
749 if (hw_rev != NE020_REV) { 749 if (hw_rev != NE020_REV) {
750 /* init serdes 0 */ 750 /* init serdes 0 */
751 if (wide_ppm_offset && (nesadapter->phy_type[0] == NES_PHY_TYPE_CX4)) 751 switch (nesadapter->phy_type[0]) {
752 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000FFFAA); 752 case NES_PHY_TYPE_CX4:
753 else 753 if (wide_ppm_offset)
754 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000FFFAA);
755 else
756 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
757 break;
758 case NES_PHY_TYPE_KR:
759 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
760 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x00000000);
761 break;
762 case NES_PHY_TYPE_PUMA_1G:
754 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF); 763 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
755
756 if (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) {
757 sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0); 764 sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0);
758 sds |= 0x00000100; 765 sds |= 0x00000100;
759 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds); 766 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds);
767 break;
768 default:
769 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
770 break;
760 } 771 }
772
761 if (!OneG_Mode) 773 if (!OneG_Mode)
762 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE0, 0x11110000); 774 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE0, 0x11110000);
763 775
@@ -778,6 +790,9 @@ static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count,
778 if (wide_ppm_offset) 790 if (wide_ppm_offset)
779 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000FFFAA); 791 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000FFFAA);
780 break; 792 break;
793 case NES_PHY_TYPE_KR:
794 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP1, 0x00000000);
795 break;
781 case NES_PHY_TYPE_PUMA_1G: 796 case NES_PHY_TYPE_PUMA_1G:
782 sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1); 797 sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1);
783 sds |= 0x000000100; 798 sds |= 0x000000100;
@@ -1279,115 +1294,115 @@ int nes_destroy_cqp(struct nes_device *nesdev)
1279 1294
1280 1295
1281/** 1296/**
1282 * nes_init_phy 1297 * nes_init_1g_phy
1283 */ 1298 */
1284int nes_init_phy(struct nes_device *nesdev) 1299int nes_init_1g_phy(struct nes_device *nesdev, u8 phy_type, u8 phy_index)
1285{ 1300{
1286 struct nes_adapter *nesadapter = nesdev->nesadapter;
1287 u32 counter = 0; 1301 u32 counter = 0;
1288 u32 sds;
1289 u32 mac_index = nesdev->mac_index;
1290 u32 tx_config = 0;
1291 u16 phy_data; 1302 u16 phy_data;
1292 u32 temp_phy_data = 0; 1303 int ret = 0;
1293 u32 temp_phy_data2 = 0;
1294 u8 phy_type = nesadapter->phy_type[mac_index];
1295 u8 phy_index = nesadapter->phy_index[mac_index];
1296
1297 if ((nesadapter->OneG_Mode) &&
1298 (phy_type != NES_PHY_TYPE_PUMA_1G)) {
1299 nes_debug(NES_DBG_PHY, "1G PHY, mac_index = %d.\n", mac_index);
1300 if (phy_type == NES_PHY_TYPE_1G) {
1301 tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
1302 tx_config &= 0xFFFFFFE3;
1303 tx_config |= 0x04;
1304 nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
1305 }
1306 1304
1307 nes_read_1G_phy_reg(nesdev, 1, phy_index, &phy_data); 1305 nes_read_1G_phy_reg(nesdev, 1, phy_index, &phy_data);
1308 nes_write_1G_phy_reg(nesdev, 23, phy_index, 0xb000); 1306 nes_write_1G_phy_reg(nesdev, 23, phy_index, 0xb000);
1309 1307
1310 /* Reset the PHY */ 1308 /* Reset the PHY */
1311 nes_write_1G_phy_reg(nesdev, 0, phy_index, 0x8000); 1309 nes_write_1G_phy_reg(nesdev, 0, phy_index, 0x8000);
1312 udelay(100); 1310 udelay(100);
1313 counter = 0; 1311 counter = 0;
1314 do { 1312 do {
1315 nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
1316 if (counter++ > 100)
1317 break;
1318 } while (phy_data & 0x8000);
1319
1320 /* Setting no phy loopback */
1321 phy_data &= 0xbfff;
1322 phy_data |= 0x1140;
1323 nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data);
1324 nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); 1313 nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
1325 nes_read_1G_phy_reg(nesdev, 0x17, phy_index, &phy_data); 1314 if (counter++ > 100) {
1326 nes_read_1G_phy_reg(nesdev, 0x1e, phy_index, &phy_data); 1315 ret = -1;
1327 1316 break;
1328 /* Setting the interrupt mask */ 1317 }
1329 nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data); 1318 } while (phy_data & 0x8000);
1330 nes_write_1G_phy_reg(nesdev, 0x19, phy_index, 0xffee); 1319
1331 nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data); 1320 /* Setting no phy loopback */
1321 phy_data &= 0xbfff;
1322 phy_data |= 0x1140;
1323 nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data);
1324 nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
1325 nes_read_1G_phy_reg(nesdev, 0x17, phy_index, &phy_data);
1326 nes_read_1G_phy_reg(nesdev, 0x1e, phy_index, &phy_data);
1327
1328 /* Setting the interrupt mask */
1329 nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data);
1330 nes_write_1G_phy_reg(nesdev, 0x19, phy_index, 0xffee);
1331 nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data);
1332
1333 /* turning on flow control */
1334 nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data);
1335 nes_write_1G_phy_reg(nesdev, 4, phy_index, (phy_data & ~(0x03E0)) | 0xc00);
1336 nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data);
1337
1338 /* Clear Half duplex */
1339 nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data);
1340 nes_write_1G_phy_reg(nesdev, 9, phy_index, phy_data & ~(0x0100));
1341 nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data);
1342
1343 nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
1344 nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data | 0x0300);
1345
1346 return ret;
1347}
1332 1348
1333 /* turning on flow control */
1334 nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data);
1335 nes_write_1G_phy_reg(nesdev, 4, phy_index, (phy_data & ~(0x03E0)) | 0xc00);
1336 nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data);
1337 1349
1338 /* Clear Half duplex */ 1350/**
1339 nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data); 1351 * nes_init_2025_phy
1340 nes_write_1G_phy_reg(nesdev, 9, phy_index, phy_data & ~(0x0100)); 1352 */
1341 nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data); 1353int nes_init_2025_phy(struct nes_device *nesdev, u8 phy_type, u8 phy_index)
1354{
1355 u32 temp_phy_data = 0;
1356 u32 temp_phy_data2 = 0;
1357 u32 counter = 0;
1358 u32 sds;
1359 u32 mac_index = nesdev->mac_index;
1360 int ret = 0;
1361 unsigned int first_attempt = 1;
1342 1362
1343 nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); 1363 /* Check firmware heartbeat */
1344 nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data | 0x0300); 1364 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
1365 temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
1366 udelay(1500);
1367 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
1368 temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
1345 1369
1346 return 0; 1370 if (temp_phy_data != temp_phy_data2) {
1371 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd);
1372 temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
1373 if ((temp_phy_data & 0xff) > 0x20)
1374 return 0;
1375 printk(PFX "Reinitialize external PHY\n");
1347 } 1376 }
1348 1377
1349 if ((phy_type == NES_PHY_TYPE_IRIS) || 1378 /* no heartbeat, configure the PHY */
1350 (phy_type == NES_PHY_TYPE_ARGUS) || 1379 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0x0000, 0x8000);
1351 (phy_type == NES_PHY_TYPE_SFP_D)) { 1380 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0000);
1352 /* setup 10G MDIO operation */ 1381 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A);
1353 tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG); 1382 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052);
1354 tx_config &= 0xFFFFFFE3;
1355 tx_config |= 0x15;
1356 nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
1357 }
1358 if ((phy_type == NES_PHY_TYPE_ARGUS) ||
1359 (phy_type == NES_PHY_TYPE_SFP_D)) {
1360 u32 first_time = 1;
1361 1383
1362 /* Check firmware heartbeat */ 1384 switch (phy_type) {
1363 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); 1385 case NES_PHY_TYPE_ARGUS:
1364 temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); 1386 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A);
1365 udelay(1500); 1387 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052);
1366 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); 1388 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x000C);
1367 temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); 1389 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0008);
1390 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0001);
1391 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0098);
1392 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00);
1368 1393
1369 if (temp_phy_data != temp_phy_data2) { 1394 /* setup LEDs */
1370 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd); 1395 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x0007);
1371 temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); 1396 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x000A);
1372 if ((temp_phy_data & 0xff) > 0x20) 1397 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0009);
1373 return 0; 1398 break;
1374 printk(PFX "Reinitializing PHY\n");
1375 }
1376 1399
1377 /* no heartbeat, configure the PHY */ 1400 case NES_PHY_TYPE_SFP_D:
1378 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0x0000, 0x8000);
1379 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0000);
1380 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A); 1401 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A);
1381 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052); 1402 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052);
1382 if (phy_type == NES_PHY_TYPE_ARGUS) { 1403 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x0004);
1383 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x000C); 1404 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0038);
1384 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0008); 1405 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0013);
1385 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0001);
1386 } else {
1387 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x0004);
1388 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0038);
1389 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0013);
1390 }
1391 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0098); 1406 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0098);
1392 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00); 1407 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00);
1393 1408
@@ -1395,71 +1410,136 @@ int nes_init_phy(struct nes_device *nesdev)
1395 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x0007); 1410 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x0007);
1396 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x000A); 1411 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x000A);
1397 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0009); 1412 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0009);
1413 break;
1414
1415 case NES_PHY_TYPE_KR:
1416 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A);
1417 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052);
1418 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x000C);
1419 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0010);
1420 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0013);
1421 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0080);
1422 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00);
1423
1424 /* setup LEDs */
1425 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x000B);
1426 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x0003);
1427 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0004);
1398 1428
1399 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0028, 0xA528); 1429 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0022, 0x406D);
1430 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0023, 0x0020);
1431 break;
1432 }
1433
1434 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0028, 0xA528);
1400 1435
1401 /* Bring PHY out of reset */ 1436 /* Bring PHY out of reset */
1402 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0002); 1437 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0002);
1403 1438
1404 /* Check for heartbeat */ 1439 /* Check for heartbeat */
1405 counter = 0; 1440 counter = 0;
1406 mdelay(690); 1441 mdelay(690);
1442 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
1443 temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
1444 do {
1445 if (counter++ > 150) {
1446 printk(PFX "No PHY heartbeat\n");
1447 break;
1448 }
1449 mdelay(1);
1407 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); 1450 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
1451 temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
1452 } while ((temp_phy_data2 == temp_phy_data));
1453
1454 /* wait for tracking */
1455 counter = 0;
1456 do {
1457 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd);
1408 temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); 1458 temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
1409 do { 1459 if (counter++ > 300) {
1410 if (counter++ > 150) { 1460 if (((temp_phy_data & 0xff) == 0x0) && first_attempt) {
1411 printk(PFX "No PHY heartbeat\n"); 1461 first_attempt = 0;
1462 counter = 0;
1463 /* reset AMCC PHY and try again */
1464 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x00c0);
1465 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x0040);
1466 continue;
1467 } else {
1468 ret = 1;
1412 break; 1469 break;
1413 } 1470 }
1414 mdelay(1); 1471 }
1415 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); 1472 mdelay(10);
1416 temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); 1473 } while ((temp_phy_data & 0xff) < 0x30);
1417 } while ((temp_phy_data2 == temp_phy_data)); 1474
1418 1475 /* setup signal integrity */
1419 /* wait for tracking */ 1476 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd003, 0x0000);
1420 counter = 0; 1477 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00D, 0x00FE);
1421 do { 1478 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00E, 0x0032);
1422 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd); 1479 if (phy_type == NES_PHY_TYPE_KR) {
1423 temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); 1480 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00F, 0x000C);
1424 if (counter++ > 300) { 1481 } else {
1425 if (((temp_phy_data & 0xff) == 0x0) && first_time) {
1426 first_time = 0;
1427 counter = 0;
1428 /* reset AMCC PHY and try again */
1429 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x00c0);
1430 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x0040);
1431 continue;
1432 } else {
1433 printk(PFX "PHY did not track\n");
1434 break;
1435 }
1436 }
1437 mdelay(10);
1438 } while ((temp_phy_data & 0xff) < 0x30);
1439
1440 /* setup signal integrity */
1441 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd003, 0x0000);
1442 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00D, 0x00FE);
1443 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00E, 0x0032);
1444 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00F, 0x0002); 1482 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00F, 0x0002);
1445 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc314, 0x0063); 1483 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc314, 0x0063);
1484 }
1485
1486 /* reset serdes */
1487 sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200);
1488 sds |= 0x1;
1489 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200, sds);
1490 sds &= 0xfffffffe;
1491 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200, sds);
1492
1493 counter = 0;
1494 while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) & 0x00000040) != 0x00000040)
1495 && (counter++ < 5000))
1496 ;
1497
1498 return ret;
1499}
1500
1446 1501
1447 /* reset serdes */ 1502/**
1448 sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + 1503 * nes_init_phy
1449 mac_index * 0x200); 1504 */
1450 sds |= 0x1; 1505int nes_init_phy(struct nes_device *nesdev)
1451 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + 1506{
1452 mac_index * 0x200, sds); 1507 struct nes_adapter *nesadapter = nesdev->nesadapter;
1453 sds &= 0xfffffffe; 1508 u32 mac_index = nesdev->mac_index;
1454 nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + 1509 u32 tx_config = 0;
1455 mac_index * 0x200, sds); 1510 unsigned long flags;
1456 1511 u8 phy_type = nesadapter->phy_type[mac_index];
1457 counter = 0; 1512 u8 phy_index = nesadapter->phy_index[mac_index];
1458 while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) & 0x00000040) != 0x00000040) 1513 int ret = 0;
1459 && (counter++ < 5000)) 1514
1460 ; 1515 tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
1516 if (phy_type == NES_PHY_TYPE_1G) {
1517 /* setup 1G MDIO operation */
1518 tx_config &= 0xFFFFFFE3;
1519 tx_config |= 0x04;
1520 } else {
1521 /* setup 10G MDIO operation */
1522 tx_config &= 0xFFFFFFE3;
1523 tx_config |= 0x15;
1461 } 1524 }
1462 return 0; 1525 nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
1526
1527 spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags);
1528
1529 switch (phy_type) {
1530 case NES_PHY_TYPE_1G:
1531 ret = nes_init_1g_phy(nesdev, phy_type, phy_index);
1532 break;
1533 case NES_PHY_TYPE_ARGUS:
1534 case NES_PHY_TYPE_SFP_D:
1535 case NES_PHY_TYPE_KR:
1536 ret = nes_init_2025_phy(nesdev, phy_type, phy_index);
1537 break;
1538 }
1539
1540 spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags);
1541
1542 return ret;
1463} 1543}
1464 1544
1465 1545
@@ -2460,23 +2540,9 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
2460 } 2540 }
2461 } else { 2541 } else {
2462 switch (nesadapter->phy_type[mac_index]) { 2542 switch (nesadapter->phy_type[mac_index]) {
2463 case NES_PHY_TYPE_IRIS:
2464 nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1);
2465 temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
2466 u32temp = 20;
2467 do {
2468 nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1);
2469 phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
2470 if ((phy_data == temp_phy_data) || (!(--u32temp)))
2471 break;
2472 temp_phy_data = phy_data;
2473 } while (1);
2474 nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n",
2475 __func__, phy_data, nesadapter->mac_link_down[mac_index] ? "DOWN" : "UP");
2476 break;
2477
2478 case NES_PHY_TYPE_ARGUS: 2543 case NES_PHY_TYPE_ARGUS:
2479 case NES_PHY_TYPE_SFP_D: 2544 case NES_PHY_TYPE_SFP_D:
2545 case NES_PHY_TYPE_KR:
2480 /* clear the alarms */ 2546 /* clear the alarms */
2481 nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0x0008); 2547 nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0x0008);
2482 nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc001); 2548 nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc001);
@@ -3352,8 +3418,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
3352 u16 async_event_id; 3418 u16 async_event_id;
3353 u8 tcp_state; 3419 u8 tcp_state;
3354 u8 iwarp_state; 3420 u8 iwarp_state;
3355 int must_disconn = 1;
3356 int must_terminate = 0;
3357 struct ib_event ibevent; 3421 struct ib_event ibevent;
3358 3422
3359 nes_debug(NES_DBG_AEQ, "\n"); 3423 nes_debug(NES_DBG_AEQ, "\n");
@@ -3367,6 +3431,8 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
3367 BUG_ON(!context); 3431 BUG_ON(!context);
3368 } 3432 }
3369 3433
3434 /* context is nesqp unless async_event_id == CQ ERROR */
3435 nesqp = (struct nes_qp *)(unsigned long)context;
3370 async_event_id = (u16)aeq_info; 3436 async_event_id = (u16)aeq_info;
3371 tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT; 3437 tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT;
3372 iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT; 3438 iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT;
@@ -3378,8 +3444,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
3378 3444
3379 switch (async_event_id) { 3445 switch (async_event_id) {
3380 case NES_AEQE_AEID_LLP_FIN_RECEIVED: 3446 case NES_AEQE_AEID_LLP_FIN_RECEIVED:
3381 nesqp = (struct nes_qp *)(unsigned long)context;
3382
3383 if (nesqp->term_flags) 3447 if (nesqp->term_flags)
3384 return; /* Ignore it, wait for close complete */ 3448 return; /* Ignore it, wait for close complete */
3385 3449
@@ -3394,79 +3458,48 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
3394 async_event_id, nesqp->last_aeq, tcp_state); 3458 async_event_id, nesqp->last_aeq, tcp_state);
3395 } 3459 }
3396 3460
3397 if ((tcp_state != NES_AEQE_TCP_STATE_CLOSE_WAIT) || 3461 break;
3398 (nesqp->ibqp_state != IB_QPS_RTS)) {
3399 /* FIN Received but tcp state or IB state moved on,
3400 should expect a close complete */
3401 return;
3402 }
3403
3404 case NES_AEQE_AEID_LLP_CLOSE_COMPLETE: 3462 case NES_AEQE_AEID_LLP_CLOSE_COMPLETE:
3405 nesqp = (struct nes_qp *)(unsigned long)context;
3406 if (nesqp->term_flags) { 3463 if (nesqp->term_flags) {
3407 nes_terminate_done(nesqp, 0); 3464 nes_terminate_done(nesqp, 0);
3408 return; 3465 return;
3409 } 3466 }
3467 spin_lock_irqsave(&nesqp->lock, flags);
3468 nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
3469 spin_unlock_irqrestore(&nesqp->lock, flags);
3470 nes_hw_modify_qp(nesdev, nesqp, NES_CQP_QP_IWARP_STATE_CLOSING, 0, 0);
3471 nes_cm_disconn(nesqp);
3472 break;
3410 3473
3411 case NES_AEQE_AEID_LLP_CONNECTION_RESET:
3412 case NES_AEQE_AEID_RESET_SENT: 3474 case NES_AEQE_AEID_RESET_SENT:
3413 nesqp = (struct nes_qp *)(unsigned long)context; 3475 tcp_state = NES_AEQE_TCP_STATE_CLOSED;
3414 if (async_event_id == NES_AEQE_AEID_RESET_SENT) {
3415 tcp_state = NES_AEQE_TCP_STATE_CLOSED;
3416 }
3417 spin_lock_irqsave(&nesqp->lock, flags); 3476 spin_lock_irqsave(&nesqp->lock, flags);
3418 nesqp->hw_iwarp_state = iwarp_state; 3477 nesqp->hw_iwarp_state = iwarp_state;
3419 nesqp->hw_tcp_state = tcp_state; 3478 nesqp->hw_tcp_state = tcp_state;
3420 nesqp->last_aeq = async_event_id; 3479 nesqp->last_aeq = async_event_id;
3421 3480 nesqp->hte_added = 0;
3422 if ((tcp_state == NES_AEQE_TCP_STATE_CLOSED) ||
3423 (tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT)) {
3424 nesqp->hte_added = 0;
3425 next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE;
3426 }
3427
3428 if ((nesqp->ibqp_state == IB_QPS_RTS) &&
3429 ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
3430 (async_event_id == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
3431 switch (nesqp->hw_iwarp_state) {
3432 case NES_AEQE_IWARP_STATE_RTS:
3433 next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING;
3434 nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
3435 break;
3436 case NES_AEQE_IWARP_STATE_TERMINATE:
3437 must_disconn = 0; /* terminate path takes care of disconn */
3438 if (nesqp->term_flags == 0)
3439 must_terminate = 1;
3440 break;
3441 }
3442 } else {
3443 if (async_event_id == NES_AEQE_AEID_LLP_FIN_RECEIVED) {
3444 /* FIN Received but ib state not RTS,
3445 close complete will be on its way */
3446 must_disconn = 0;
3447 }
3448 }
3449 spin_unlock_irqrestore(&nesqp->lock, flags); 3481 spin_unlock_irqrestore(&nesqp->lock, flags);
3482 next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE;
3483 nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0);
3484 nes_cm_disconn(nesqp);
3485 break;
3450 3486
3451 if (must_terminate) 3487 case NES_AEQE_AEID_LLP_CONNECTION_RESET:
3452 nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL); 3488 if (atomic_read(&nesqp->close_timer_started))
3453 else if (must_disconn) { 3489 return;
3454 if (next_iwarp_state) { 3490 spin_lock_irqsave(&nesqp->lock, flags);
3455 nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X\n", 3491 nesqp->hw_iwarp_state = iwarp_state;
3456 nesqp->hwqp.qp_id, next_iwarp_state); 3492 nesqp->hw_tcp_state = tcp_state;
3457 nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0); 3493 nesqp->last_aeq = async_event_id;
3458 } 3494 spin_unlock_irqrestore(&nesqp->lock, flags);
3459 nes_cm_disconn(nesqp); 3495 nes_cm_disconn(nesqp);
3460 }
3461 break; 3496 break;
3462 3497
3463 case NES_AEQE_AEID_TERMINATE_SENT: 3498 case NES_AEQE_AEID_TERMINATE_SENT:
3464 nesqp = (struct nes_qp *)(unsigned long)context;
3465 nes_terminate_send_fin(nesdev, nesqp, aeqe); 3499 nes_terminate_send_fin(nesdev, nesqp, aeqe);
3466 break; 3500 break;
3467 3501
3468 case NES_AEQE_AEID_LLP_TERMINATE_RECEIVED: 3502 case NES_AEQE_AEID_LLP_TERMINATE_RECEIVED:
3469 nesqp = (struct nes_qp *)(unsigned long)context;
3470 nes_terminate_received(nesdev, nesqp, aeqe); 3503 nes_terminate_received(nesdev, nesqp, aeqe);
3471 break; 3504 break;
3472 3505
@@ -3480,7 +3513,8 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
3480 case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER: 3513 case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
3481 case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION: 3514 case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION:
3482 case NES_AEQE_AEID_AMP_TO_WRAP: 3515 case NES_AEQE_AEID_AMP_TO_WRAP:
3483 nesqp = (struct nes_qp *)(unsigned long)context; 3516 printk(KERN_ERR PFX "QP[%u] async_event_id=0x%04X IB_EVENT_QP_ACCESS_ERR\n",
3517 nesqp->hwqp.qp_id, async_event_id);
3484 nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_ACCESS_ERR); 3518 nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_ACCESS_ERR);
3485 break; 3519 break;
3486 3520
@@ -3488,7 +3522,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
3488 case NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL: 3522 case NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL:
3489 case NES_AEQE_AEID_DDP_UBE_INVALID_MO: 3523 case NES_AEQE_AEID_DDP_UBE_INVALID_MO:
3490 case NES_AEQE_AEID_DDP_UBE_INVALID_QN: 3524 case NES_AEQE_AEID_DDP_UBE_INVALID_QN:
3491 nesqp = (struct nes_qp *)(unsigned long)context;
3492 if (iwarp_opcode(nesqp, aeq_info) > IWARP_OPCODE_TERM) { 3525 if (iwarp_opcode(nesqp, aeq_info) > IWARP_OPCODE_TERM) {
3493 aeq_info &= 0xffff0000; 3526 aeq_info &= 0xffff0000;
3494 aeq_info |= NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE; 3527 aeq_info |= NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE;
@@ -3530,7 +3563,8 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
3530 case NES_AEQE_AEID_STAG_ZERO_INVALID: 3563 case NES_AEQE_AEID_STAG_ZERO_INVALID:
3531 case NES_AEQE_AEID_ROE_INVALID_RDMA_READ_REQUEST: 3564 case NES_AEQE_AEID_ROE_INVALID_RDMA_READ_REQUEST:
3532 case NES_AEQE_AEID_ROE_INVALID_RDMA_WRITE_OR_READ_RESP: 3565 case NES_AEQE_AEID_ROE_INVALID_RDMA_WRITE_OR_READ_RESP:
3533 nesqp = (struct nes_qp *)(unsigned long)context; 3566 printk(KERN_ERR PFX "QP[%u] async_event_id=0x%04X IB_EVENT_QP_FATAL\n",
3567 nesqp->hwqp.qp_id, async_event_id);
3534 nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL); 3568 nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL);
3535 break; 3569 break;
3536 3570
diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h
index 084be0ee689b..9b1e7f869d83 100644
--- a/drivers/infiniband/hw/nes/nes_hw.h
+++ b/drivers/infiniband/hw/nes/nes_hw.h
@@ -37,12 +37,12 @@
37 37
38#define NES_PHY_TYPE_CX4 1 38#define NES_PHY_TYPE_CX4 1
39#define NES_PHY_TYPE_1G 2 39#define NES_PHY_TYPE_1G 2
40#define NES_PHY_TYPE_IRIS 3
41#define NES_PHY_TYPE_ARGUS 4 40#define NES_PHY_TYPE_ARGUS 4
42#define NES_PHY_TYPE_PUMA_1G 5 41#define NES_PHY_TYPE_PUMA_1G 5
43#define NES_PHY_TYPE_PUMA_10G 6 42#define NES_PHY_TYPE_PUMA_10G 6
44#define NES_PHY_TYPE_GLADIUS 7 43#define NES_PHY_TYPE_GLADIUS 7
45#define NES_PHY_TYPE_SFP_D 8 44#define NES_PHY_TYPE_SFP_D 8
45#define NES_PHY_TYPE_KR 9
46 46
47#define NES_MULTICAST_PF_MAX 8 47#define NES_MULTICAST_PF_MAX 8
48 48
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index ab1102780186..7dd6ce6e7b99 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -1230,8 +1230,8 @@ static void nes_netdev_get_ethtool_stats(struct net_device *netdev,
1230 target_stat_values[++index] = cm_packets_received; 1230 target_stat_values[++index] = cm_packets_received;
1231 target_stat_values[++index] = cm_packets_dropped; 1231 target_stat_values[++index] = cm_packets_dropped;
1232 target_stat_values[++index] = cm_packets_retrans; 1232 target_stat_values[++index] = cm_packets_retrans;
1233 target_stat_values[++index] = cm_listens_created; 1233 target_stat_values[++index] = atomic_read(&cm_listens_created);
1234 target_stat_values[++index] = cm_listens_destroyed; 1234 target_stat_values[++index] = atomic_read(&cm_listens_destroyed);
1235 target_stat_values[++index] = cm_backlog_drops; 1235 target_stat_values[++index] = cm_backlog_drops;
1236 target_stat_values[++index] = atomic_read(&cm_loopbacks); 1236 target_stat_values[++index] = atomic_read(&cm_loopbacks);
1237 target_stat_values[++index] = atomic_read(&cm_nodes_created); 1237 target_stat_values[++index] = atomic_read(&cm_nodes_created);
@@ -1461,9 +1461,9 @@ static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd
1461 } 1461 }
1462 return 0; 1462 return 0;
1463 } 1463 }
1464 if ((phy_type == NES_PHY_TYPE_IRIS) || 1464 if ((phy_type == NES_PHY_TYPE_ARGUS) ||
1465 (phy_type == NES_PHY_TYPE_ARGUS) || 1465 (phy_type == NES_PHY_TYPE_SFP_D) ||
1466 (phy_type == NES_PHY_TYPE_SFP_D)) { 1466 (phy_type == NES_PHY_TYPE_KR)) {
1467 et_cmd->transceiver = XCVR_EXTERNAL; 1467 et_cmd->transceiver = XCVR_EXTERNAL;
1468 et_cmd->port = PORT_FIBRE; 1468 et_cmd->port = PORT_FIBRE;
1469 et_cmd->supported = SUPPORTED_FIBRE; 1469 et_cmd->supported = SUPPORTED_FIBRE;
@@ -1583,8 +1583,7 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
1583 struct net_device *netdev; 1583 struct net_device *netdev;
1584 struct nic_qp_map *curr_qp_map; 1584 struct nic_qp_map *curr_qp_map;
1585 u32 u32temp; 1585 u32 u32temp;
1586 u16 phy_data; 1586 u8 phy_type = nesdev->nesadapter->phy_type[nesdev->mac_index];
1587 u16 temp_phy_data;
1588 1587
1589 netdev = alloc_etherdev(sizeof(struct nes_vnic)); 1588 netdev = alloc_etherdev(sizeof(struct nes_vnic));
1590 if (!netdev) { 1589 if (!netdev) {
@@ -1692,65 +1691,23 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
1692 1691
1693 if ((nesdev->netdev_count == 0) && 1692 if ((nesdev->netdev_count == 0) &&
1694 ((PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index) || 1693 ((PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index) ||
1695 ((nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) && 1694 ((phy_type == NES_PHY_TYPE_PUMA_1G) &&
1696 (((PCI_FUNC(nesdev->pcidev->devfn) == 1) && (nesdev->mac_index == 2)) || 1695 (((PCI_FUNC(nesdev->pcidev->devfn) == 1) && (nesdev->mac_index == 2)) ||
1697 ((PCI_FUNC(nesdev->pcidev->devfn) == 2) && (nesdev->mac_index == 1)))))) { 1696 ((PCI_FUNC(nesdev->pcidev->devfn) == 2) && (nesdev->mac_index == 1)))))) {
1698 /*
1699 * nes_debug(NES_DBG_INIT, "Setting up PHY interrupt mask. Using register index 0x%04X\n",
1700 * NES_IDX_PHY_PCS_CONTROL_STATUS0 + (0x200 * (nesvnic->logical_port & 1)));
1701 */
1702 u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + 1697 u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
1703 (0x200 * (nesdev->mac_index & 1))); 1698 (0x200 * (nesdev->mac_index & 1)));
1704 if (nesdev->nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_PUMA_1G) { 1699 if (phy_type != NES_PHY_TYPE_PUMA_1G) {
1705 u32temp |= 0x00200000; 1700 u32temp |= 0x00200000;
1706 nes_write_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + 1701 nes_write_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
1707 (0x200 * (nesdev->mac_index & 1)), u32temp); 1702 (0x200 * (nesdev->mac_index & 1)), u32temp);
1708 } 1703 }
1709 1704
1710 u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
1711 (0x200 * (nesdev->mac_index & 1)));
1712
1713 if ((u32temp&0x0f1f0000) == 0x0f0f0000) {
1714 if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_IRIS) {
1715 nes_init_phy(nesdev);
1716 nes_read_10G_phy_reg(nesdev, nesdev->nesadapter->phy_index[nesdev->mac_index], 1, 1);
1717 temp_phy_data = (u16)nes_read_indexed(nesdev,
1718 NES_IDX_MAC_MDIO_CONTROL);
1719 u32temp = 20;
1720 do {
1721 nes_read_10G_phy_reg(nesdev, nesdev->nesadapter->phy_index[nesdev->mac_index], 1, 1);
1722 phy_data = (u16)nes_read_indexed(nesdev,
1723 NES_IDX_MAC_MDIO_CONTROL);
1724 if ((phy_data == temp_phy_data) || (!(--u32temp)))
1725 break;
1726 temp_phy_data = phy_data;
1727 } while (1);
1728 if (phy_data & 4) {
1729 nes_debug(NES_DBG_INIT, "The Link is UP!!.\n");
1730 nesvnic->linkup = 1;
1731 } else {
1732 nes_debug(NES_DBG_INIT, "The Link is DOWN!!.\n");
1733 }
1734 } else {
1735 nes_debug(NES_DBG_INIT, "The Link is UP!!.\n");
1736 nesvnic->linkup = 1;
1737 }
1738 } else if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) {
1739 nes_debug(NES_DBG_INIT, "mac_index=%d, logical_port=%d, u32temp=0x%04X, PCI_FUNC=%d\n",
1740 nesdev->mac_index, nesvnic->logical_port, u32temp, PCI_FUNC(nesdev->pcidev->devfn));
1741 if (((nesdev->mac_index < 2) && ((u32temp&0x01010000) == 0x01010000)) ||
1742 ((nesdev->mac_index > 1) && ((u32temp&0x02020000) == 0x02020000))) {
1743 nes_debug(NES_DBG_INIT, "The Link is UP!!.\n");
1744 nesvnic->linkup = 1;
1745 }
1746 }
1747 /* clear the MAC interrupt status, assumes direct logical to physical mapping */ 1705 /* clear the MAC interrupt status, assumes direct logical to physical mapping */
1748 u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index)); 1706 u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index));
1749 nes_debug(NES_DBG_INIT, "Phy interrupt status = 0x%X.\n", u32temp); 1707 nes_debug(NES_DBG_INIT, "Phy interrupt status = 0x%X.\n", u32temp);
1750 nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index), u32temp); 1708 nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index), u32temp);
1751 1709
1752 if (nesdev->nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_IRIS) 1710 nes_init_phy(nesdev);
1753 nes_init_phy(nesdev);
1754 1711
1755 } 1712 }
1756 1713
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 64d3136e3747..815725f886c4 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -228,7 +228,7 @@ static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw,
228 /* Check for SQ overflow */ 228 /* Check for SQ overflow */
229 if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) { 229 if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) {
230 spin_unlock_irqrestore(&nesqp->lock, flags); 230 spin_unlock_irqrestore(&nesqp->lock, flags);
231 return -EINVAL; 231 return -ENOMEM;
232 } 232 }
233 233
234 wqe = &nesqp->hwqp.sq_vbase[head]; 234 wqe = &nesqp->hwqp.sq_vbase[head];
@@ -3294,7 +3294,7 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
3294 3294
3295 /* Check for SQ overflow */ 3295 /* Check for SQ overflow */
3296 if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) { 3296 if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) {
3297 err = -EINVAL; 3297 err = -ENOMEM;
3298 break; 3298 break;
3299 } 3299 }
3300 3300
@@ -3577,7 +3577,7 @@ static int nes_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
3577 } 3577 }
3578 /* Check for RQ overflow */ 3578 /* Check for RQ overflow */
3579 if (((head + (2 * qsize) - nesqp->hwqp.rq_tail) % qsize) == (qsize - 1)) { 3579 if (((head + (2 * qsize) - nesqp->hwqp.rq_tail) % qsize) == (qsize - 1)) {
3580 err = -EINVAL; 3580 err = -ENOMEM;
3581 break; 3581 break;
3582 } 3582 }
3583 3583
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index e9795f60e5d6..d10b4ec68d28 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -55,9 +55,7 @@ static int ipoib_get_coalesce(struct net_device *dev,
55 struct ipoib_dev_priv *priv = netdev_priv(dev); 55 struct ipoib_dev_priv *priv = netdev_priv(dev);
56 56
57 coal->rx_coalesce_usecs = priv->ethtool.coalesce_usecs; 57 coal->rx_coalesce_usecs = priv->ethtool.coalesce_usecs;
58 coal->tx_coalesce_usecs = priv->ethtool.coalesce_usecs;
59 coal->rx_max_coalesced_frames = priv->ethtool.max_coalesced_frames; 58 coal->rx_max_coalesced_frames = priv->ethtool.max_coalesced_frames;
60 coal->tx_max_coalesced_frames = priv->ethtool.max_coalesced_frames;
61 59
62 return 0; 60 return 0;
63} 61}
@@ -69,10 +67,8 @@ static int ipoib_set_coalesce(struct net_device *dev,
69 int ret; 67 int ret;
70 68
71 /* 69 /*
72 * Since IPoIB uses a single CQ for both rx and tx, we assume 70 * These values are saved in the private data and returned
73 * that rx params dictate the configuration. These values are 71 * when ipoib_get_coalesce() is called
74 * saved in the private data and returned when ipoib_get_coalesce()
75 * is called.
76 */ 72 */
77 if (coal->rx_coalesce_usecs > 0xffff || 73 if (coal->rx_coalesce_usecs > 0xffff ||
78 coal->rx_max_coalesced_frames > 0xffff) 74 coal->rx_max_coalesced_frames > 0xffff)
@@ -85,8 +81,6 @@ static int ipoib_set_coalesce(struct net_device *dev,
85 return ret; 81 return ret;
86 } 82 }
87 83
88 coal->tx_coalesce_usecs = coal->rx_coalesce_usecs;
89 coal->tx_max_coalesced_frames = coal->rx_max_coalesced_frames;
90 priv->ethtool.coalesce_usecs = coal->rx_coalesce_usecs; 84 priv->ethtool.coalesce_usecs = coal->rx_coalesce_usecs;
91 priv->ethtool.max_coalesced_frames = coal->rx_max_coalesced_frames; 85 priv->ethtool.max_coalesced_frames = coal->rx_max_coalesced_frames;
92 86
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 5f7a6fca0a4d..71237f8f78f7 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -128,6 +128,28 @@ static int iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
128 return 0; 128 return 0;
129} 129}
130 130
131int iser_initialize_task_headers(struct iscsi_task *task,
132 struct iser_tx_desc *tx_desc)
133{
134 struct iscsi_iser_conn *iser_conn = task->conn->dd_data;
135 struct iser_device *device = iser_conn->ib_conn->device;
136 struct iscsi_iser_task *iser_task = task->dd_data;
137 u64 dma_addr;
138
139 dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc,
140 ISER_HEADERS_LEN, DMA_TO_DEVICE);
141 if (ib_dma_mapping_error(device->ib_device, dma_addr))
142 return -ENOMEM;
143
144 tx_desc->dma_addr = dma_addr;
145 tx_desc->tx_sg[0].addr = tx_desc->dma_addr;
146 tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
147 tx_desc->tx_sg[0].lkey = device->mr->lkey;
148
149 iser_task->headers_initialized = 1;
150 iser_task->iser_conn = iser_conn;
151 return 0;
152}
131/** 153/**
132 * iscsi_iser_task_init - Initialize task 154 * iscsi_iser_task_init - Initialize task
133 * @task: iscsi task 155 * @task: iscsi task
@@ -137,17 +159,17 @@ static int iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
137static int 159static int
138iscsi_iser_task_init(struct iscsi_task *task) 160iscsi_iser_task_init(struct iscsi_task *task)
139{ 161{
140 struct iscsi_iser_conn *iser_conn = task->conn->dd_data;
141 struct iscsi_iser_task *iser_task = task->dd_data; 162 struct iscsi_iser_task *iser_task = task->dd_data;
142 163
164 if (!iser_task->headers_initialized)
165 if (iser_initialize_task_headers(task, &iser_task->desc))
166 return -ENOMEM;
167
143 /* mgmt task */ 168 /* mgmt task */
144 if (!task->sc) { 169 if (!task->sc)
145 iser_task->desc.data = task->data;
146 return 0; 170 return 0;
147 }
148 171
149 iser_task->command_sent = 0; 172 iser_task->command_sent = 0;
150 iser_task->iser_conn = iser_conn;
151 iser_task_rdma_init(iser_task); 173 iser_task_rdma_init(iser_task);
152 return 0; 174 return 0;
153} 175}
@@ -168,7 +190,7 @@ iscsi_iser_mtask_xmit(struct iscsi_conn *conn, struct iscsi_task *task)
168{ 190{
169 int error = 0; 191 int error = 0;
170 192
171 iser_dbg("task deq [cid %d itt 0x%x]\n", conn->id, task->itt); 193 iser_dbg("mtask xmit [cid %d itt 0x%x]\n", conn->id, task->itt);
172 194
173 error = iser_send_control(conn, task); 195 error = iser_send_control(conn, task);
174 196
@@ -178,9 +200,6 @@ iscsi_iser_mtask_xmit(struct iscsi_conn *conn, struct iscsi_task *task)
178 * - if yes, the task is recycled at iscsi_complete_pdu 200 * - if yes, the task is recycled at iscsi_complete_pdu
179 * - if no, the task is recycled at iser_snd_completion 201 * - if no, the task is recycled at iser_snd_completion
180 */ 202 */
181 if (error && error != -ENOBUFS)
182 iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
183
184 return error; 203 return error;
185} 204}
186 205
@@ -232,7 +251,7 @@ iscsi_iser_task_xmit(struct iscsi_task *task)
232 task->imm_count, task->unsol_r2t.data_length); 251 task->imm_count, task->unsol_r2t.data_length);
233 } 252 }
234 253
235 iser_dbg("task deq [cid %d itt 0x%x]\n", 254 iser_dbg("ctask xmit [cid %d itt 0x%x]\n",
236 conn->id, task->itt); 255 conn->id, task->itt);
237 256
238 /* Send the cmd PDU */ 257 /* Send the cmd PDU */
@@ -248,8 +267,6 @@ iscsi_iser_task_xmit(struct iscsi_task *task)
248 error = iscsi_iser_task_xmit_unsol_data(conn, task); 267 error = iscsi_iser_task_xmit_unsol_data(conn, task);
249 268
250 iscsi_iser_task_xmit_exit: 269 iscsi_iser_task_xmit_exit:
251 if (error && error != -ENOBUFS)
252 iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
253 return error; 270 return error;
254} 271}
255 272
@@ -283,7 +300,7 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
283 * due to issues with the login code re iser sematics 300 * due to issues with the login code re iser sematics
284 * this not set in iscsi_conn_setup - FIXME 301 * this not set in iscsi_conn_setup - FIXME
285 */ 302 */
286 conn->max_recv_dlength = 128; 303 conn->max_recv_dlength = ISER_RECV_DATA_SEG_LEN;
287 304
288 iser_conn = conn->dd_data; 305 iser_conn = conn->dd_data;
289 conn->dd_data = iser_conn; 306 conn->dd_data = iser_conn;
@@ -401,7 +418,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
401 struct Scsi_Host *shost; 418 struct Scsi_Host *shost;
402 struct iser_conn *ib_conn; 419 struct iser_conn *ib_conn;
403 420
404 shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 1); 421 shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0);
405 if (!shost) 422 if (!shost)
406 return NULL; 423 return NULL;
407 shost->transportt = iscsi_iser_scsi_transport; 424 shost->transportt = iscsi_iser_scsi_transport;
@@ -675,7 +692,7 @@ static int __init iser_init(void)
675 memset(&ig, 0, sizeof(struct iser_global)); 692 memset(&ig, 0, sizeof(struct iser_global));
676 693
677 ig.desc_cache = kmem_cache_create("iser_descriptors", 694 ig.desc_cache = kmem_cache_create("iser_descriptors",
678 sizeof (struct iser_desc), 695 sizeof(struct iser_tx_desc),
679 0, SLAB_HWCACHE_ALIGN, 696 0, SLAB_HWCACHE_ALIGN,
680 NULL); 697 NULL);
681 if (ig.desc_cache == NULL) 698 if (ig.desc_cache == NULL)
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 9d529cae1f0d..036934cdcb92 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -102,9 +102,9 @@
102#define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), * 102#define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), *
103 * SCSI_TMFUNC(2), LOGOUT(1) */ 103 * SCSI_TMFUNC(2), LOGOUT(1) */
104 104
105#define ISER_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX + \ 105#define ISER_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX)
106 ISER_MAX_RX_MISC_PDUS + \ 106
107 ISER_MAX_TX_MISC_PDUS) 107#define ISER_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
108 108
109/* the max TX (send) WR supported by the iSER QP is defined by * 109/* the max TX (send) WR supported by the iSER QP is defined by *
110 * max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect * 110 * max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect *
@@ -132,6 +132,12 @@ struct iser_hdr {
132 __be64 read_va; 132 __be64 read_va;
133} __attribute__((packed)); 133} __attribute__((packed));
134 134
135/* Constant PDU lengths calculations */
136#define ISER_HEADERS_LEN (sizeof(struct iser_hdr) + sizeof(struct iscsi_hdr))
137
138#define ISER_RECV_DATA_SEG_LEN 128
139#define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
140#define ISER_RX_LOGIN_SIZE (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN)
135 141
136/* Length of an object name string */ 142/* Length of an object name string */
137#define ISER_OBJECT_NAME_SIZE 64 143#define ISER_OBJECT_NAME_SIZE 64
@@ -187,51 +193,43 @@ struct iser_regd_buf {
187 struct iser_mem_reg reg; /* memory registration info */ 193 struct iser_mem_reg reg; /* memory registration info */
188 void *virt_addr; 194 void *virt_addr;
189 struct iser_device *device; /* device->device for dma_unmap */ 195 struct iser_device *device; /* device->device for dma_unmap */
190 u64 dma_addr; /* if non zero, addr for dma_unmap */
191 enum dma_data_direction direction; /* direction for dma_unmap */ 196 enum dma_data_direction direction; /* direction for dma_unmap */
192 unsigned int data_size; 197 unsigned int data_size;
193 atomic_t ref_count; /* refcount, freed when dec to 0 */
194};
195
196#define MAX_REGD_BUF_VECTOR_LEN 2
197
198struct iser_dto {
199 struct iscsi_iser_task *task;
200 struct iser_conn *ib_conn;
201 int notify_enable;
202
203 /* vector of registered buffers */
204 unsigned int regd_vector_len;
205 struct iser_regd_buf *regd[MAX_REGD_BUF_VECTOR_LEN];
206
207 /* offset into the registered buffer may be specified */
208 unsigned int offset[MAX_REGD_BUF_VECTOR_LEN];
209
210 /* a smaller size may be specified, if 0, then full size is used */
211 unsigned int used_sz[MAX_REGD_BUF_VECTOR_LEN];
212}; 198};
213 199
214enum iser_desc_type { 200enum iser_desc_type {
215 ISCSI_RX,
216 ISCSI_TX_CONTROL , 201 ISCSI_TX_CONTROL ,
217 ISCSI_TX_SCSI_COMMAND, 202 ISCSI_TX_SCSI_COMMAND,
218 ISCSI_TX_DATAOUT 203 ISCSI_TX_DATAOUT
219}; 204};
220 205
221struct iser_desc { 206struct iser_tx_desc {
222 struct iser_hdr iser_header; 207 struct iser_hdr iser_header;
223 struct iscsi_hdr iscsi_header; 208 struct iscsi_hdr iscsi_header;
224 struct iser_regd_buf hdr_regd_buf;
225 void *data; /* used by RX & TX_CONTROL */
226 struct iser_regd_buf data_regd_buf; /* used by RX & TX_CONTROL */
227 enum iser_desc_type type; 209 enum iser_desc_type type;
228 struct iser_dto dto; 210 u64 dma_addr;
211 /* sg[0] points to iser/iscsi headers, sg[1] optionally points to either
212 of immediate data, unsolicited data-out or control (login,text) */
213 struct ib_sge tx_sg[2];
214 int num_sge;
229}; 215};
230 216
217#define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \
218 sizeof(u64) + sizeof(struct ib_sge)))
219struct iser_rx_desc {
220 struct iser_hdr iser_header;
221 struct iscsi_hdr iscsi_header;
222 char data[ISER_RECV_DATA_SEG_LEN];
223 u64 dma_addr;
224 struct ib_sge rx_sg;
225 char pad[ISER_RX_PAD_SIZE];
226} __attribute__((packed));
227
231struct iser_device { 228struct iser_device {
232 struct ib_device *ib_device; 229 struct ib_device *ib_device;
233 struct ib_pd *pd; 230 struct ib_pd *pd;
234 struct ib_cq *cq; 231 struct ib_cq *rx_cq;
232 struct ib_cq *tx_cq;
235 struct ib_mr *mr; 233 struct ib_mr *mr;
236 struct tasklet_struct cq_tasklet; 234 struct tasklet_struct cq_tasklet;
237 struct list_head ig_list; /* entry in ig devices list */ 235 struct list_head ig_list; /* entry in ig devices list */
@@ -250,15 +248,18 @@ struct iser_conn {
250 struct ib_fmr_pool *fmr_pool; /* pool of IB FMRs */ 248 struct ib_fmr_pool *fmr_pool; /* pool of IB FMRs */
251 int disc_evt_flag; /* disconn event delivered */ 249 int disc_evt_flag; /* disconn event delivered */
252 wait_queue_head_t wait; /* waitq for conn/disconn */ 250 wait_queue_head_t wait; /* waitq for conn/disconn */
253 atomic_t post_recv_buf_count; /* posted rx count */ 251 int post_recv_buf_count; /* posted rx count */
254 atomic_t post_send_buf_count; /* posted tx count */ 252 atomic_t post_send_buf_count; /* posted tx count */
255 atomic_t unexpected_pdu_count;/* count of received *
256 * unexpected pdus *
257 * not yet retired */
258 char name[ISER_OBJECT_NAME_SIZE]; 253 char name[ISER_OBJECT_NAME_SIZE];
259 struct iser_page_vec *page_vec; /* represents SG to fmr maps* 254 struct iser_page_vec *page_vec; /* represents SG to fmr maps*
260 * maps serialized as tx is*/ 255 * maps serialized as tx is*/
261 struct list_head conn_list; /* entry in ig conn list */ 256 struct list_head conn_list; /* entry in ig conn list */
257
258 char *login_buf;
259 u64 login_dma;
260 unsigned int rx_desc_head;
261 struct iser_rx_desc *rx_descs;
262 struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX];
262}; 263};
263 264
264struct iscsi_iser_conn { 265struct iscsi_iser_conn {
@@ -267,7 +268,7 @@ struct iscsi_iser_conn {
267}; 268};
268 269
269struct iscsi_iser_task { 270struct iscsi_iser_task {
270 struct iser_desc desc; 271 struct iser_tx_desc desc;
271 struct iscsi_iser_conn *iser_conn; 272 struct iscsi_iser_conn *iser_conn;
272 enum iser_task_status status; 273 enum iser_task_status status;
273 int command_sent; /* set if command sent */ 274 int command_sent; /* set if command sent */
@@ -275,6 +276,7 @@ struct iscsi_iser_task {
275 struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */ 276 struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */
276 struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/ 277 struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/
277 struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */ 278 struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */
279 int headers_initialized;
278}; 280};
279 281
280struct iser_page_vec { 282struct iser_page_vec {
@@ -322,22 +324,17 @@ void iser_conn_put(struct iser_conn *ib_conn);
322 324
323void iser_conn_terminate(struct iser_conn *ib_conn); 325void iser_conn_terminate(struct iser_conn *ib_conn);
324 326
325void iser_rcv_completion(struct iser_desc *desc, 327void iser_rcv_completion(struct iser_rx_desc *desc,
326 unsigned long dto_xfer_len); 328 unsigned long dto_xfer_len,
329 struct iser_conn *ib_conn);
327 330
328void iser_snd_completion(struct iser_desc *desc); 331void iser_snd_completion(struct iser_tx_desc *desc, struct iser_conn *ib_conn);
329 332
330void iser_task_rdma_init(struct iscsi_iser_task *task); 333void iser_task_rdma_init(struct iscsi_iser_task *task);
331 334
332void iser_task_rdma_finalize(struct iscsi_iser_task *task); 335void iser_task_rdma_finalize(struct iscsi_iser_task *task);
333 336
334void iser_dto_buffs_release(struct iser_dto *dto); 337void iser_free_rx_descriptors(struct iser_conn *ib_conn);
335
336int iser_regd_buff_release(struct iser_regd_buf *regd_buf);
337
338void iser_reg_single(struct iser_device *device,
339 struct iser_regd_buf *regd_buf,
340 enum dma_data_direction direction);
341 338
342void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task, 339void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task,
343 enum iser_data_dir cmd_dir); 340 enum iser_data_dir cmd_dir);
@@ -356,11 +353,9 @@ int iser_reg_page_vec(struct iser_conn *ib_conn,
356 353
357void iser_unreg_mem(struct iser_mem_reg *mem_reg); 354void iser_unreg_mem(struct iser_mem_reg *mem_reg);
358 355
359int iser_post_recv(struct iser_desc *rx_desc); 356int iser_post_recvl(struct iser_conn *ib_conn);
360int iser_post_send(struct iser_desc *tx_desc); 357int iser_post_recvm(struct iser_conn *ib_conn, int count);
361 358int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc);
362int iser_conn_state_comp(struct iser_conn *ib_conn,
363 enum iser_ib_conn_state comp);
364 359
365int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, 360int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
366 struct iser_data_buf *data, 361 struct iser_data_buf *data,
@@ -368,4 +363,6 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
368 enum dma_data_direction dma_dir); 363 enum dma_data_direction dma_dir);
369 364
370void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task); 365void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task);
366int iser_initialize_task_headers(struct iscsi_task *task,
367 struct iser_tx_desc *tx_desc);
371#endif 368#endif
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 9de640200ad3..0b9ef0716588 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -39,29 +39,6 @@
39 39
40#include "iscsi_iser.h" 40#include "iscsi_iser.h"
41 41
42/* Constant PDU lengths calculations */
43#define ISER_TOTAL_HEADERS_LEN (sizeof (struct iser_hdr) + \
44 sizeof (struct iscsi_hdr))
45
46/* iser_dto_add_regd_buff - increments the reference count for *
47 * the registered buffer & adds it to the DTO object */
48static void iser_dto_add_regd_buff(struct iser_dto *dto,
49 struct iser_regd_buf *regd_buf,
50 unsigned long use_offset,
51 unsigned long use_size)
52{
53 int add_idx;
54
55 atomic_inc(&regd_buf->ref_count);
56
57 add_idx = dto->regd_vector_len;
58 dto->regd[add_idx] = regd_buf;
59 dto->used_sz[add_idx] = use_size;
60 dto->offset[add_idx] = use_offset;
61
62 dto->regd_vector_len++;
63}
64
65/* Register user buffer memory and initialize passive rdma 42/* Register user buffer memory and initialize passive rdma
66 * dto descriptor. Total data size is stored in 43 * dto descriptor. Total data size is stored in
67 * iser_task->data[ISER_DIR_IN].data_len 44 * iser_task->data[ISER_DIR_IN].data_len
@@ -122,9 +99,9 @@ iser_prepare_write_cmd(struct iscsi_task *task,
122 struct iscsi_iser_task *iser_task = task->dd_data; 99 struct iscsi_iser_task *iser_task = task->dd_data;
123 struct iser_regd_buf *regd_buf; 100 struct iser_regd_buf *regd_buf;
124 int err; 101 int err;
125 struct iser_dto *send_dto = &iser_task->desc.dto;
126 struct iser_hdr *hdr = &iser_task->desc.iser_header; 102 struct iser_hdr *hdr = &iser_task->desc.iser_header;
127 struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT]; 103 struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT];
104 struct ib_sge *tx_dsg = &iser_task->desc.tx_sg[1];
128 105
129 err = iser_dma_map_task_data(iser_task, 106 err = iser_dma_map_task_data(iser_task,
130 buf_out, 107 buf_out,
@@ -163,135 +140,100 @@ iser_prepare_write_cmd(struct iscsi_task *task,
163 if (imm_sz > 0) { 140 if (imm_sz > 0) {
164 iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n", 141 iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n",
165 task->itt, imm_sz); 142 task->itt, imm_sz);
166 iser_dto_add_regd_buff(send_dto, 143 tx_dsg->addr = regd_buf->reg.va;
167 regd_buf, 144 tx_dsg->length = imm_sz;
168 0, 145 tx_dsg->lkey = regd_buf->reg.lkey;
169 imm_sz); 146 iser_task->desc.num_sge = 2;
170 } 147 }
171 148
172 return 0; 149 return 0;
173} 150}
174 151
175/** 152/* creates a new tx descriptor and adds header regd buffer */
176 * iser_post_receive_control - allocates, initializes and posts receive DTO. 153static void iser_create_send_desc(struct iser_conn *ib_conn,
177 */ 154 struct iser_tx_desc *tx_desc)
178static int iser_post_receive_control(struct iscsi_conn *conn)
179{ 155{
180 struct iscsi_iser_conn *iser_conn = conn->dd_data; 156 struct iser_device *device = ib_conn->device;
181 struct iser_desc *rx_desc;
182 struct iser_regd_buf *regd_hdr;
183 struct iser_regd_buf *regd_data;
184 struct iser_dto *recv_dto = NULL;
185 struct iser_device *device = iser_conn->ib_conn->device;
186 int rx_data_size, err;
187 int posts, outstanding_unexp_pdus;
188
189 /* for the login sequence we must support rx of upto 8K; login is done
190 * after conn create/bind (connect) and conn stop/bind (reconnect),
191 * what's common for both schemes is that the connection is not started
192 */
193 if (conn->c_stage != ISCSI_CONN_STARTED)
194 rx_data_size = ISCSI_DEF_MAX_RECV_SEG_LEN;
195 else /* FIXME till user space sets conn->max_recv_dlength correctly */
196 rx_data_size = 128;
197
198 outstanding_unexp_pdus =
199 atomic_xchg(&iser_conn->ib_conn->unexpected_pdu_count, 0);
200
201 /*
202 * in addition to the response buffer, replace those consumed by
203 * unexpected pdus.
204 */
205 for (posts = 0; posts < 1 + outstanding_unexp_pdus; posts++) {
206 rx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO);
207 if (rx_desc == NULL) {
208 iser_err("Failed to alloc desc for post recv %d\n",
209 posts);
210 err = -ENOMEM;
211 goto post_rx_cache_alloc_failure;
212 }
213 rx_desc->type = ISCSI_RX;
214 rx_desc->data = kmalloc(rx_data_size, GFP_NOIO);
215 if (rx_desc->data == NULL) {
216 iser_err("Failed to alloc data buf for post recv %d\n",
217 posts);
218 err = -ENOMEM;
219 goto post_rx_kmalloc_failure;
220 }
221
222 recv_dto = &rx_desc->dto;
223 recv_dto->ib_conn = iser_conn->ib_conn;
224 recv_dto->regd_vector_len = 0;
225 157
226 regd_hdr = &rx_desc->hdr_regd_buf; 158 ib_dma_sync_single_for_cpu(device->ib_device,
227 memset(regd_hdr, 0, sizeof(struct iser_regd_buf)); 159 tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
228 regd_hdr->device = device;
229 regd_hdr->virt_addr = rx_desc; /* == &rx_desc->iser_header */
230 regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN;
231 160
232 iser_reg_single(device, regd_hdr, DMA_FROM_DEVICE); 161 memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
233 162 tx_desc->iser_header.flags = ISER_VER;
234 iser_dto_add_regd_buff(recv_dto, regd_hdr, 0, 0);
235 163
236 regd_data = &rx_desc->data_regd_buf; 164 tx_desc->num_sge = 1;
237 memset(regd_data, 0, sizeof(struct iser_regd_buf));
238 regd_data->device = device;
239 regd_data->virt_addr = rx_desc->data;
240 regd_data->data_size = rx_data_size;
241 165
242 iser_reg_single(device, regd_data, DMA_FROM_DEVICE); 166 if (tx_desc->tx_sg[0].lkey != device->mr->lkey) {
167 tx_desc->tx_sg[0].lkey = device->mr->lkey;
168 iser_dbg("sdesc %p lkey mismatch, fixing\n", tx_desc);
169 }
170}
243 171
244 iser_dto_add_regd_buff(recv_dto, regd_data, 0, 0);
245 172
246 err = iser_post_recv(rx_desc); 173int iser_alloc_rx_descriptors(struct iser_conn *ib_conn)
247 if (err) { 174{
248 iser_err("Failed iser_post_recv for post %d\n", posts); 175 int i, j;
249 goto post_rx_post_recv_failure; 176 u64 dma_addr;
250 } 177 struct iser_rx_desc *rx_desc;
178 struct ib_sge *rx_sg;
179 struct iser_device *device = ib_conn->device;
180
181 ib_conn->rx_descs = kmalloc(ISER_QP_MAX_RECV_DTOS *
182 sizeof(struct iser_rx_desc), GFP_KERNEL);
183 if (!ib_conn->rx_descs)
184 goto rx_desc_alloc_fail;
185
186 rx_desc = ib_conn->rx_descs;
187
188 for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++) {
189 dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc,
190 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
191 if (ib_dma_mapping_error(device->ib_device, dma_addr))
192 goto rx_desc_dma_map_failed;
193
194 rx_desc->dma_addr = dma_addr;
195
196 rx_sg = &rx_desc->rx_sg;
197 rx_sg->addr = rx_desc->dma_addr;
198 rx_sg->length = ISER_RX_PAYLOAD_SIZE;
199 rx_sg->lkey = device->mr->lkey;
251 } 200 }
252 /* all posts successful */
253 return 0;
254 201
255post_rx_post_recv_failure: 202 ib_conn->rx_desc_head = 0;
256 iser_dto_buffs_release(recv_dto); 203 return 0;
257 kfree(rx_desc->data);
258post_rx_kmalloc_failure:
259 kmem_cache_free(ig.desc_cache, rx_desc);
260post_rx_cache_alloc_failure:
261 if (posts > 0) {
262 /*
263 * response buffer posted, but did not replace all unexpected
264 * pdu recv bufs. Ignore error, retry occurs next send
265 */
266 outstanding_unexp_pdus -= (posts - 1);
267 err = 0;
268 }
269 atomic_add(outstanding_unexp_pdus,
270 &iser_conn->ib_conn->unexpected_pdu_count);
271 204
272 return err; 205rx_desc_dma_map_failed:
206 rx_desc = ib_conn->rx_descs;
207 for (j = 0; j < i; j++, rx_desc++)
208 ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
209 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
210 kfree(ib_conn->rx_descs);
211 ib_conn->rx_descs = NULL;
212rx_desc_alloc_fail:
213 iser_err("failed allocating rx descriptors / data buffers\n");
214 return -ENOMEM;
273} 215}
274 216
275/* creates a new tx descriptor and adds header regd buffer */ 217void iser_free_rx_descriptors(struct iser_conn *ib_conn)
276static void iser_create_send_desc(struct iscsi_iser_conn *iser_conn,
277 struct iser_desc *tx_desc)
278{ 218{
279 struct iser_regd_buf *regd_hdr = &tx_desc->hdr_regd_buf; 219 int i;
280 struct iser_dto *send_dto = &tx_desc->dto; 220 struct iser_rx_desc *rx_desc;
221 struct iser_device *device = ib_conn->device;
281 222
282 memset(regd_hdr, 0, sizeof(struct iser_regd_buf)); 223 if (ib_conn->login_buf) {
283 regd_hdr->device = iser_conn->ib_conn->device; 224 ib_dma_unmap_single(device->ib_device, ib_conn->login_dma,
284 regd_hdr->virt_addr = tx_desc; /* == &tx_desc->iser_header */ 225 ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
285 regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN; 226 kfree(ib_conn->login_buf);
227 }
286 228
287 send_dto->ib_conn = iser_conn->ib_conn; 229 if (!ib_conn->rx_descs)
288 send_dto->notify_enable = 1; 230 return;
289 send_dto->regd_vector_len = 0;
290 231
291 memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr)); 232 rx_desc = ib_conn->rx_descs;
292 tx_desc->iser_header.flags = ISER_VER; 233 for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++)
293 234 ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
294 iser_dto_add_regd_buff(send_dto, regd_hdr, 0, 0); 235 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
236 kfree(ib_conn->rx_descs);
295} 237}
296 238
297/** 239/**
@@ -301,46 +243,23 @@ int iser_conn_set_full_featured_mode(struct iscsi_conn *conn)
301{ 243{
302 struct iscsi_iser_conn *iser_conn = conn->dd_data; 244 struct iscsi_iser_conn *iser_conn = conn->dd_data;
303 245
304 int i; 246 iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX);
305 /*
306 * FIXME this value should be declared to the target during login with
307 * the MaxOutstandingUnexpectedPDUs key when supported
308 */
309 int initial_post_recv_bufs_num = ISER_MAX_RX_MISC_PDUS;
310
311 iser_dbg("Initially post: %d\n", initial_post_recv_bufs_num);
312 247
313 /* Check that there is no posted recv or send buffers left - */ 248 /* Check that there is no posted recv or send buffers left - */
314 /* they must be consumed during the login phase */ 249 /* they must be consumed during the login phase */
315 BUG_ON(atomic_read(&iser_conn->ib_conn->post_recv_buf_count) != 0); 250 BUG_ON(iser_conn->ib_conn->post_recv_buf_count != 0);
316 BUG_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0); 251 BUG_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0);
317 252
318 /* Initial post receive buffers */ 253 if (iser_alloc_rx_descriptors(iser_conn->ib_conn))
319 for (i = 0; i < initial_post_recv_bufs_num; i++) { 254 return -ENOMEM;
320 if (iser_post_receive_control(conn) != 0) {
321 iser_err("Failed to post recv bufs at:%d conn:0x%p\n",
322 i, conn);
323 return -ENOMEM;
324 }
325 }
326 iser_dbg("Posted %d post recv bufs, conn:0x%p\n", i, conn);
327 return 0;
328}
329 255
330static int 256 /* Initial post receive buffers */
331iser_check_xmit(struct iscsi_conn *conn, void *task) 257 if (iser_post_recvm(iser_conn->ib_conn, ISER_MIN_POSTED_RX))
332{ 258 return -ENOMEM;
333 struct iscsi_iser_conn *iser_conn = conn->dd_data;
334 259
335 if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) ==
336 ISER_QP_MAX_REQ_DTOS) {
337 iser_dbg("%ld can't xmit task %p\n",jiffies,task);
338 return -ENOBUFS;
339 }
340 return 0; 260 return 0;
341} 261}
342 262
343
344/** 263/**
345 * iser_send_command - send command PDU 264 * iser_send_command - send command PDU
346 */ 265 */
@@ -349,27 +268,18 @@ int iser_send_command(struct iscsi_conn *conn,
349{ 268{
350 struct iscsi_iser_conn *iser_conn = conn->dd_data; 269 struct iscsi_iser_conn *iser_conn = conn->dd_data;
351 struct iscsi_iser_task *iser_task = task->dd_data; 270 struct iscsi_iser_task *iser_task = task->dd_data;
352 struct iser_dto *send_dto = NULL;
353 unsigned long edtl; 271 unsigned long edtl;
354 int err = 0; 272 int err;
355 struct iser_data_buf *data_buf; 273 struct iser_data_buf *data_buf;
356 struct iscsi_cmd *hdr = (struct iscsi_cmd *)task->hdr; 274 struct iscsi_cmd *hdr = (struct iscsi_cmd *)task->hdr;
357 struct scsi_cmnd *sc = task->sc; 275 struct scsi_cmnd *sc = task->sc;
358 276 struct iser_tx_desc *tx_desc = &iser_task->desc;
359 if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) {
360 iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn);
361 return -EPERM;
362 }
363 if (iser_check_xmit(conn, task))
364 return -ENOBUFS;
365 277
366 edtl = ntohl(hdr->data_length); 278 edtl = ntohl(hdr->data_length);
367 279
368 /* build the tx desc regd header and add it to the tx desc dto */ 280 /* build the tx desc regd header and add it to the tx desc dto */
369 iser_task->desc.type = ISCSI_TX_SCSI_COMMAND; 281 tx_desc->type = ISCSI_TX_SCSI_COMMAND;
370 send_dto = &iser_task->desc.dto; 282 iser_create_send_desc(iser_conn->ib_conn, tx_desc);
371 send_dto->task = iser_task;
372 iser_create_send_desc(iser_conn, &iser_task->desc);
373 283
374 if (hdr->flags & ISCSI_FLAG_CMD_READ) 284 if (hdr->flags & ISCSI_FLAG_CMD_READ)
375 data_buf = &iser_task->data[ISER_DIR_IN]; 285 data_buf = &iser_task->data[ISER_DIR_IN];
@@ -398,23 +308,13 @@ int iser_send_command(struct iscsi_conn *conn,
398 goto send_command_error; 308 goto send_command_error;
399 } 309 }
400 310
401 iser_reg_single(iser_conn->ib_conn->device,
402 send_dto->regd[0], DMA_TO_DEVICE);
403
404 if (iser_post_receive_control(conn) != 0) {
405 iser_err("post_recv failed!\n");
406 err = -ENOMEM;
407 goto send_command_error;
408 }
409
410 iser_task->status = ISER_TASK_STATUS_STARTED; 311 iser_task->status = ISER_TASK_STATUS_STARTED;
411 312
412 err = iser_post_send(&iser_task->desc); 313 err = iser_post_send(iser_conn->ib_conn, tx_desc);
413 if (!err) 314 if (!err)
414 return 0; 315 return 0;
415 316
416send_command_error: 317send_command_error:
417 iser_dto_buffs_release(send_dto);
418 iser_err("conn %p failed task->itt %d err %d\n",conn, task->itt, err); 318 iser_err("conn %p failed task->itt %d err %d\n",conn, task->itt, err);
419 return err; 319 return err;
420} 320}
@@ -428,20 +328,13 @@ int iser_send_data_out(struct iscsi_conn *conn,
428{ 328{
429 struct iscsi_iser_conn *iser_conn = conn->dd_data; 329 struct iscsi_iser_conn *iser_conn = conn->dd_data;
430 struct iscsi_iser_task *iser_task = task->dd_data; 330 struct iscsi_iser_task *iser_task = task->dd_data;
431 struct iser_desc *tx_desc = NULL; 331 struct iser_tx_desc *tx_desc = NULL;
432 struct iser_dto *send_dto = NULL; 332 struct iser_regd_buf *regd_buf;
433 unsigned long buf_offset; 333 unsigned long buf_offset;
434 unsigned long data_seg_len; 334 unsigned long data_seg_len;
435 uint32_t itt; 335 uint32_t itt;
436 int err = 0; 336 int err = 0;
437 337 struct ib_sge *tx_dsg;
438 if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) {
439 iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn);
440 return -EPERM;
441 }
442
443 if (iser_check_xmit(conn, task))
444 return -ENOBUFS;
445 338
446 itt = (__force uint32_t)hdr->itt; 339 itt = (__force uint32_t)hdr->itt;
447 data_seg_len = ntoh24(hdr->dlength); 340 data_seg_len = ntoh24(hdr->dlength);
@@ -450,28 +343,25 @@ int iser_send_data_out(struct iscsi_conn *conn,
450 iser_dbg("%s itt %d dseg_len %d offset %d\n", 343 iser_dbg("%s itt %d dseg_len %d offset %d\n",
451 __func__,(int)itt,(int)data_seg_len,(int)buf_offset); 344 __func__,(int)itt,(int)data_seg_len,(int)buf_offset);
452 345
453 tx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO); 346 tx_desc = kmem_cache_zalloc(ig.desc_cache, GFP_ATOMIC);
454 if (tx_desc == NULL) { 347 if (tx_desc == NULL) {
455 iser_err("Failed to alloc desc for post dataout\n"); 348 iser_err("Failed to alloc desc for post dataout\n");
456 return -ENOMEM; 349 return -ENOMEM;
457 } 350 }
458 351
459 tx_desc->type = ISCSI_TX_DATAOUT; 352 tx_desc->type = ISCSI_TX_DATAOUT;
353 tx_desc->iser_header.flags = ISER_VER;
460 memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr)); 354 memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr));
461 355
462 /* build the tx desc regd header and add it to the tx desc dto */ 356 /* build the tx desc */
463 send_dto = &tx_desc->dto; 357 iser_initialize_task_headers(task, tx_desc);
464 send_dto->task = iser_task;
465 iser_create_send_desc(iser_conn, tx_desc);
466
467 iser_reg_single(iser_conn->ib_conn->device,
468 send_dto->regd[0], DMA_TO_DEVICE);
469 358
470 /* all data was registered for RDMA, we can use the lkey */ 359 regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT];
471 iser_dto_add_regd_buff(send_dto, 360 tx_dsg = &tx_desc->tx_sg[1];
472 &iser_task->rdma_regd[ISER_DIR_OUT], 361 tx_dsg->addr = regd_buf->reg.va + buf_offset;
473 buf_offset, 362 tx_dsg->length = data_seg_len;
474 data_seg_len); 363 tx_dsg->lkey = regd_buf->reg.lkey;
364 tx_desc->num_sge = 2;
475 365
476 if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) { 366 if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) {
477 iser_err("Offset:%ld & DSL:%ld in Data-Out " 367 iser_err("Offset:%ld & DSL:%ld in Data-Out "
@@ -485,12 +375,11 @@ int iser_send_data_out(struct iscsi_conn *conn,
485 itt, buf_offset, data_seg_len); 375 itt, buf_offset, data_seg_len);
486 376
487 377
488 err = iser_post_send(tx_desc); 378 err = iser_post_send(iser_conn->ib_conn, tx_desc);
489 if (!err) 379 if (!err)
490 return 0; 380 return 0;
491 381
492send_data_out_error: 382send_data_out_error:
493 iser_dto_buffs_release(send_dto);
494 kmem_cache_free(ig.desc_cache, tx_desc); 383 kmem_cache_free(ig.desc_cache, tx_desc);
495 iser_err("conn %p failed err %d\n",conn, err); 384 iser_err("conn %p failed err %d\n",conn, err);
496 return err; 385 return err;
@@ -501,64 +390,44 @@ int iser_send_control(struct iscsi_conn *conn,
501{ 390{
502 struct iscsi_iser_conn *iser_conn = conn->dd_data; 391 struct iscsi_iser_conn *iser_conn = conn->dd_data;
503 struct iscsi_iser_task *iser_task = task->dd_data; 392 struct iscsi_iser_task *iser_task = task->dd_data;
504 struct iser_desc *mdesc = &iser_task->desc; 393 struct iser_tx_desc *mdesc = &iser_task->desc;
505 struct iser_dto *send_dto = NULL;
506 unsigned long data_seg_len; 394 unsigned long data_seg_len;
507 int err = 0; 395 int err = 0;
508 struct iser_regd_buf *regd_buf;
509 struct iser_device *device; 396 struct iser_device *device;
510 unsigned char opcode;
511
512 if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) {
513 iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn);
514 return -EPERM;
515 }
516
517 if (iser_check_xmit(conn, task))
518 return -ENOBUFS;
519 397
520 /* build the tx desc regd header and add it to the tx desc dto */ 398 /* build the tx desc regd header and add it to the tx desc dto */
521 mdesc->type = ISCSI_TX_CONTROL; 399 mdesc->type = ISCSI_TX_CONTROL;
522 send_dto = &mdesc->dto; 400 iser_create_send_desc(iser_conn->ib_conn, mdesc);
523 send_dto->task = NULL;
524 iser_create_send_desc(iser_conn, mdesc);
525 401
526 device = iser_conn->ib_conn->device; 402 device = iser_conn->ib_conn->device;
527 403
528 iser_reg_single(device, send_dto->regd[0], DMA_TO_DEVICE);
529
530 data_seg_len = ntoh24(task->hdr->dlength); 404 data_seg_len = ntoh24(task->hdr->dlength);
531 405
532 if (data_seg_len > 0) { 406 if (data_seg_len > 0) {
533 regd_buf = &mdesc->data_regd_buf; 407 struct ib_sge *tx_dsg = &mdesc->tx_sg[1];
534 memset(regd_buf, 0, sizeof(struct iser_regd_buf)); 408 if (task != conn->login_task) {
535 regd_buf->device = device; 409 iser_err("data present on non login task!!!\n");
536 regd_buf->virt_addr = task->data; 410 goto send_control_error;
537 regd_buf->data_size = task->data_count; 411 }
538 iser_reg_single(device, regd_buf, 412 memcpy(iser_conn->ib_conn->login_buf, task->data,
539 DMA_TO_DEVICE); 413 task->data_count);
540 iser_dto_add_regd_buff(send_dto, regd_buf, 414 tx_dsg->addr = iser_conn->ib_conn->login_dma;
541 0, 415 tx_dsg->length = data_seg_len;
542 data_seg_len); 416 tx_dsg->lkey = device->mr->lkey;
417 mdesc->num_sge = 2;
543 } 418 }
544 419
545 opcode = task->hdr->opcode & ISCSI_OPCODE_MASK; 420 if (task == conn->login_task) {
546 421 err = iser_post_recvl(iser_conn->ib_conn);
547 /* post recv buffer for response if one is expected */ 422 if (err)
548 if (!(opcode == ISCSI_OP_NOOP_OUT && task->hdr->itt == RESERVED_ITT)) {
549 if (iser_post_receive_control(conn) != 0) {
550 iser_err("post_rcv_buff failed!\n");
551 err = -ENOMEM;
552 goto send_control_error; 423 goto send_control_error;
553 }
554 } 424 }
555 425
556 err = iser_post_send(mdesc); 426 err = iser_post_send(iser_conn->ib_conn, mdesc);
557 if (!err) 427 if (!err)
558 return 0; 428 return 0;
559 429
560send_control_error: 430send_control_error:
561 iser_dto_buffs_release(send_dto);
562 iser_err("conn %p failed err %d\n",conn, err); 431 iser_err("conn %p failed err %d\n",conn, err);
563 return err; 432 return err;
564} 433}
@@ -566,104 +435,71 @@ send_control_error:
566/** 435/**
567 * iser_rcv_dto_completion - recv DTO completion 436 * iser_rcv_dto_completion - recv DTO completion
568 */ 437 */
569void iser_rcv_completion(struct iser_desc *rx_desc, 438void iser_rcv_completion(struct iser_rx_desc *rx_desc,
570 unsigned long dto_xfer_len) 439 unsigned long rx_xfer_len,
440 struct iser_conn *ib_conn)
571{ 441{
572 struct iser_dto *dto = &rx_desc->dto; 442 struct iscsi_iser_conn *conn = ib_conn->iser_conn;
573 struct iscsi_iser_conn *conn = dto->ib_conn->iser_conn;
574 struct iscsi_task *task;
575 struct iscsi_iser_task *iser_task;
576 struct iscsi_hdr *hdr; 443 struct iscsi_hdr *hdr;
577 char *rx_data = NULL; 444 u64 rx_dma;
578 int rx_data_len = 0; 445 int rx_buflen, outstanding, count, err;
579 unsigned char opcode; 446
580 447 /* differentiate between login to all other PDUs */
581 hdr = &rx_desc->iscsi_header; 448 if ((char *)rx_desc == ib_conn->login_buf) {
449 rx_dma = ib_conn->login_dma;
450 rx_buflen = ISER_RX_LOGIN_SIZE;
451 } else {
452 rx_dma = rx_desc->dma_addr;
453 rx_buflen = ISER_RX_PAYLOAD_SIZE;
454 }
582 455
583 iser_dbg("op 0x%x itt 0x%x\n", hdr->opcode,hdr->itt); 456 ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma,
457 rx_buflen, DMA_FROM_DEVICE);
584 458
585 if (dto_xfer_len > ISER_TOTAL_HEADERS_LEN) { /* we have data */ 459 hdr = &rx_desc->iscsi_header;
586 rx_data_len = dto_xfer_len - ISER_TOTAL_HEADERS_LEN;
587 rx_data = dto->regd[1]->virt_addr;
588 rx_data += dto->offset[1];
589 }
590 460
591 opcode = hdr->opcode & ISCSI_OPCODE_MASK; 461 iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
592 462 hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN));
593 if (opcode == ISCSI_OP_SCSI_CMD_RSP) {
594 spin_lock(&conn->iscsi_conn->session->lock);
595 task = iscsi_itt_to_ctask(conn->iscsi_conn, hdr->itt);
596 if (task)
597 __iscsi_get_task(task);
598 spin_unlock(&conn->iscsi_conn->session->lock);
599
600 if (!task)
601 iser_err("itt can't be matched to task!!! "
602 "conn %p opcode %d itt %d\n",
603 conn->iscsi_conn, opcode, hdr->itt);
604 else {
605 iser_task = task->dd_data;
606 iser_dbg("itt %d task %p\n",hdr->itt, task);
607 iser_task->status = ISER_TASK_STATUS_COMPLETED;
608 iser_task_rdma_finalize(iser_task);
609 iscsi_put_task(task);
610 }
611 }
612 iser_dto_buffs_release(dto);
613 463
614 iscsi_iser_recv(conn->iscsi_conn, hdr, rx_data, rx_data_len); 464 iscsi_iser_recv(conn->iscsi_conn, hdr,
465 rx_desc->data, rx_xfer_len - ISER_HEADERS_LEN);
615 466
616 kfree(rx_desc->data); 467 ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma,
617 kmem_cache_free(ig.desc_cache, rx_desc); 468 rx_buflen, DMA_FROM_DEVICE);
618 469
619 /* decrementing conn->post_recv_buf_count only --after-- freeing the * 470 /* decrementing conn->post_recv_buf_count only --after-- freeing the *
620 * task eliminates the need to worry on tasks which are completed in * 471 * task eliminates the need to worry on tasks which are completed in *
621 * parallel to the execution of iser_conn_term. So the code that waits * 472 * parallel to the execution of iser_conn_term. So the code that waits *
622 * for the posted rx bufs refcount to become zero handles everything */ 473 * for the posted rx bufs refcount to become zero handles everything */
623 atomic_dec(&conn->ib_conn->post_recv_buf_count); 474 conn->ib_conn->post_recv_buf_count--;
624 475
625 /* 476 if (rx_dma == ib_conn->login_dma)
626 * if an unexpected PDU was received then the recv wr consumed must 477 return;
627 * be replaced, this is done in the next send of a control-type PDU 478
628 */ 479 outstanding = ib_conn->post_recv_buf_count;
629 if (opcode == ISCSI_OP_NOOP_IN && hdr->itt == RESERVED_ITT) { 480 if (outstanding + ISER_MIN_POSTED_RX <= ISER_QP_MAX_RECV_DTOS) {
630 /* nop-in with itt = 0xffffffff */ 481 count = min(ISER_QP_MAX_RECV_DTOS - outstanding,
631 atomic_inc(&conn->ib_conn->unexpected_pdu_count); 482 ISER_MIN_POSTED_RX);
632 } 483 err = iser_post_recvm(ib_conn, count);
633 else if (opcode == ISCSI_OP_ASYNC_EVENT) { 484 if (err)
634 /* asyncronous message */ 485 iser_err("posting %d rx bufs err %d\n", count, err);
635 atomic_inc(&conn->ib_conn->unexpected_pdu_count);
636 } 486 }
637 /* a reject PDU consumes the recv buf posted for the response */
638} 487}
639 488
640void iser_snd_completion(struct iser_desc *tx_desc) 489void iser_snd_completion(struct iser_tx_desc *tx_desc,
490 struct iser_conn *ib_conn)
641{ 491{
642 struct iser_dto *dto = &tx_desc->dto;
643 struct iser_conn *ib_conn = dto->ib_conn;
644 struct iscsi_iser_conn *iser_conn = ib_conn->iser_conn;
645 struct iscsi_conn *conn = iser_conn->iscsi_conn;
646 struct iscsi_task *task; 492 struct iscsi_task *task;
647 int resume_tx = 0; 493 struct iser_device *device = ib_conn->device;
648
649 iser_dbg("Initiator, Data sent dto=0x%p\n", dto);
650
651 iser_dto_buffs_release(dto);
652 494
653 if (tx_desc->type == ISCSI_TX_DATAOUT) 495 if (tx_desc->type == ISCSI_TX_DATAOUT) {
496 ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
497 ISER_HEADERS_LEN, DMA_TO_DEVICE);
654 kmem_cache_free(ig.desc_cache, tx_desc); 498 kmem_cache_free(ig.desc_cache, tx_desc);
655 499 }
656 if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) ==
657 ISER_QP_MAX_REQ_DTOS)
658 resume_tx = 1;
659 500
660 atomic_dec(&ib_conn->post_send_buf_count); 501 atomic_dec(&ib_conn->post_send_buf_count);
661 502
662 if (resume_tx) {
663 iser_dbg("%ld resuming tx\n",jiffies);
664 iscsi_conn_queue_work(conn);
665 }
666
667 if (tx_desc->type == ISCSI_TX_CONTROL) { 503 if (tx_desc->type == ISCSI_TX_CONTROL) {
668 /* this arithmetic is legal by libiscsi dd_data allocation */ 504 /* this arithmetic is legal by libiscsi dd_data allocation */
669 task = (void *) ((long)(void *)tx_desc - 505 task = (void *) ((long)(void *)tx_desc -
@@ -692,7 +528,6 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
692 528
693void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) 529void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
694{ 530{
695 int deferred;
696 int is_rdma_aligned = 1; 531 int is_rdma_aligned = 1;
697 struct iser_regd_buf *regd; 532 struct iser_regd_buf *regd;
698 533
@@ -710,32 +545,17 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
710 545
711 if (iser_task->dir[ISER_DIR_IN]) { 546 if (iser_task->dir[ISER_DIR_IN]) {
712 regd = &iser_task->rdma_regd[ISER_DIR_IN]; 547 regd = &iser_task->rdma_regd[ISER_DIR_IN];
713 deferred = iser_regd_buff_release(regd); 548 if (regd->reg.is_fmr)
714 if (deferred) { 549 iser_unreg_mem(&regd->reg);
715 iser_err("%d references remain for BUF-IN rdma reg\n",
716 atomic_read(&regd->ref_count));
717 }
718 } 550 }
719 551
720 if (iser_task->dir[ISER_DIR_OUT]) { 552 if (iser_task->dir[ISER_DIR_OUT]) {
721 regd = &iser_task->rdma_regd[ISER_DIR_OUT]; 553 regd = &iser_task->rdma_regd[ISER_DIR_OUT];
722 deferred = iser_regd_buff_release(regd); 554 if (regd->reg.is_fmr)
723 if (deferred) { 555 iser_unreg_mem(&regd->reg);
724 iser_err("%d references remain for BUF-OUT rdma reg\n",
725 atomic_read(&regd->ref_count));
726 }
727 } 556 }
728 557
729 /* if the data was unaligned, it was already unmapped and then copied */ 558 /* if the data was unaligned, it was already unmapped and then copied */
730 if (is_rdma_aligned) 559 if (is_rdma_aligned)
731 iser_dma_unmap_task_data(iser_task); 560 iser_dma_unmap_task_data(iser_task);
732} 561}
733
734void iser_dto_buffs_release(struct iser_dto *dto)
735{
736 int i;
737
738 for (i = 0; i < dto->regd_vector_len; i++)
739 iser_regd_buff_release(dto->regd[i]);
740}
741
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 274c883ef3ea..fb88d6896b67 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -41,62 +41,6 @@
41#define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */ 41#define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */
42 42
43/** 43/**
44 * Decrements the reference count for the
45 * registered buffer & releases it
46 *
47 * returns 0 if released, 1 if deferred
48 */
49int iser_regd_buff_release(struct iser_regd_buf *regd_buf)
50{
51 struct ib_device *dev;
52
53 if ((atomic_read(&regd_buf->ref_count) == 0) ||
54 atomic_dec_and_test(&regd_buf->ref_count)) {
55 /* if we used the dma mr, unreg is just NOP */
56 if (regd_buf->reg.is_fmr)
57 iser_unreg_mem(&regd_buf->reg);
58
59 if (regd_buf->dma_addr) {
60 dev = regd_buf->device->ib_device;
61 ib_dma_unmap_single(dev,
62 regd_buf->dma_addr,
63 regd_buf->data_size,
64 regd_buf->direction);
65 }
66 /* else this regd buf is associated with task which we */
67 /* dma_unmap_single/sg later */
68 return 0;
69 } else {
70 iser_dbg("Release deferred, regd.buff: 0x%p\n", regd_buf);
71 return 1;
72 }
73}
74
75/**
76 * iser_reg_single - fills registered buffer descriptor with
77 * registration information
78 */
79void iser_reg_single(struct iser_device *device,
80 struct iser_regd_buf *regd_buf,
81 enum dma_data_direction direction)
82{
83 u64 dma_addr;
84
85 dma_addr = ib_dma_map_single(device->ib_device,
86 regd_buf->virt_addr,
87 regd_buf->data_size, direction);
88 BUG_ON(ib_dma_mapping_error(device->ib_device, dma_addr));
89
90 regd_buf->reg.lkey = device->mr->lkey;
91 regd_buf->reg.len = regd_buf->data_size;
92 regd_buf->reg.va = dma_addr;
93 regd_buf->reg.is_fmr = 0;
94
95 regd_buf->dma_addr = dma_addr;
96 regd_buf->direction = direction;
97}
98
99/**
100 * iser_start_rdma_unaligned_sg 44 * iser_start_rdma_unaligned_sg
101 */ 45 */
102static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, 46static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
@@ -109,10 +53,10 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
109 unsigned long cmd_data_len = data->data_len; 53 unsigned long cmd_data_len = data->data_len;
110 54
111 if (cmd_data_len > ISER_KMALLOC_THRESHOLD) 55 if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
112 mem = (void *)__get_free_pages(GFP_NOIO, 56 mem = (void *)__get_free_pages(GFP_ATOMIC,
113 ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT); 57 ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
114 else 58 else
115 mem = kmalloc(cmd_data_len, GFP_NOIO); 59 mem = kmalloc(cmd_data_len, GFP_ATOMIC);
116 60
117 if (mem == NULL) { 61 if (mem == NULL) {
118 iser_err("Failed to allocate mem size %d %d for copying sglist\n", 62 iser_err("Failed to allocate mem size %d %d for copying sglist\n",
@@ -474,9 +418,5 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task,
474 return err; 418 return err;
475 } 419 }
476 } 420 }
477
478 /* take a reference on this regd buf such that it will not be released *
479 * (eg in send dto completion) before we get the scsi response */
480 atomic_inc(&regd_buf->ref_count);
481 return 0; 421 return 0;
482} 422}
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 8579f32ce38e..308d17bb5146 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -37,9 +37,8 @@
37#include "iscsi_iser.h" 37#include "iscsi_iser.h"
38 38
39#define ISCSI_ISER_MAX_CONN 8 39#define ISCSI_ISER_MAX_CONN 8
40#define ISER_MAX_CQ_LEN ((ISER_QP_MAX_RECV_DTOS + \ 40#define ISER_MAX_RX_CQ_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN)
41 ISER_QP_MAX_REQ_DTOS) * \ 41#define ISER_MAX_TX_CQ_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN)
42 ISCSI_ISER_MAX_CONN)
43 42
44static void iser_cq_tasklet_fn(unsigned long data); 43static void iser_cq_tasklet_fn(unsigned long data);
45static void iser_cq_callback(struct ib_cq *cq, void *cq_context); 44static void iser_cq_callback(struct ib_cq *cq, void *cq_context);
@@ -67,15 +66,23 @@ static int iser_create_device_ib_res(struct iser_device *device)
67 if (IS_ERR(device->pd)) 66 if (IS_ERR(device->pd))
68 goto pd_err; 67 goto pd_err;
69 68
70 device->cq = ib_create_cq(device->ib_device, 69 device->rx_cq = ib_create_cq(device->ib_device,
71 iser_cq_callback, 70 iser_cq_callback,
72 iser_cq_event_callback, 71 iser_cq_event_callback,
73 (void *)device, 72 (void *)device,
74 ISER_MAX_CQ_LEN, 0); 73 ISER_MAX_RX_CQ_LEN, 0);
75 if (IS_ERR(device->cq)) 74 if (IS_ERR(device->rx_cq))
76 goto cq_err; 75 goto rx_cq_err;
77 76
78 if (ib_req_notify_cq(device->cq, IB_CQ_NEXT_COMP)) 77 device->tx_cq = ib_create_cq(device->ib_device,
78 NULL, iser_cq_event_callback,
79 (void *)device,
80 ISER_MAX_TX_CQ_LEN, 0);
81
82 if (IS_ERR(device->tx_cq))
83 goto tx_cq_err;
84
85 if (ib_req_notify_cq(device->rx_cq, IB_CQ_NEXT_COMP))
79 goto cq_arm_err; 86 goto cq_arm_err;
80 87
81 tasklet_init(&device->cq_tasklet, 88 tasklet_init(&device->cq_tasklet,
@@ -93,8 +100,10 @@ static int iser_create_device_ib_res(struct iser_device *device)
93dma_mr_err: 100dma_mr_err:
94 tasklet_kill(&device->cq_tasklet); 101 tasklet_kill(&device->cq_tasklet);
95cq_arm_err: 102cq_arm_err:
96 ib_destroy_cq(device->cq); 103 ib_destroy_cq(device->tx_cq);
97cq_err: 104tx_cq_err:
105 ib_destroy_cq(device->rx_cq);
106rx_cq_err:
98 ib_dealloc_pd(device->pd); 107 ib_dealloc_pd(device->pd);
99pd_err: 108pd_err:
100 iser_err("failed to allocate an IB resource\n"); 109 iser_err("failed to allocate an IB resource\n");
@@ -112,11 +121,13 @@ static void iser_free_device_ib_res(struct iser_device *device)
112 tasklet_kill(&device->cq_tasklet); 121 tasklet_kill(&device->cq_tasklet);
113 122
114 (void)ib_dereg_mr(device->mr); 123 (void)ib_dereg_mr(device->mr);
115 (void)ib_destroy_cq(device->cq); 124 (void)ib_destroy_cq(device->tx_cq);
125 (void)ib_destroy_cq(device->rx_cq);
116 (void)ib_dealloc_pd(device->pd); 126 (void)ib_dealloc_pd(device->pd);
117 127
118 device->mr = NULL; 128 device->mr = NULL;
119 device->cq = NULL; 129 device->tx_cq = NULL;
130 device->rx_cq = NULL;
120 device->pd = NULL; 131 device->pd = NULL;
121} 132}
122 133
@@ -129,13 +140,23 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
129{ 140{
130 struct iser_device *device; 141 struct iser_device *device;
131 struct ib_qp_init_attr init_attr; 142 struct ib_qp_init_attr init_attr;
132 int ret; 143 int ret = -ENOMEM;
133 struct ib_fmr_pool_param params; 144 struct ib_fmr_pool_param params;
134 145
135 BUG_ON(ib_conn->device == NULL); 146 BUG_ON(ib_conn->device == NULL);
136 147
137 device = ib_conn->device; 148 device = ib_conn->device;
138 149
150 ib_conn->login_buf = kmalloc(ISER_RX_LOGIN_SIZE, GFP_KERNEL);
151 if (!ib_conn->login_buf) {
152 goto alloc_err;
153 ret = -ENOMEM;
154 }
155
156 ib_conn->login_dma = ib_dma_map_single(ib_conn->device->ib_device,
157 (void *)ib_conn->login_buf, ISER_RX_LOGIN_SIZE,
158 DMA_FROM_DEVICE);
159
139 ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) + 160 ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) +
140 (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)), 161 (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)),
141 GFP_KERNEL); 162 GFP_KERNEL);
@@ -169,12 +190,12 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
169 190
170 init_attr.event_handler = iser_qp_event_callback; 191 init_attr.event_handler = iser_qp_event_callback;
171 init_attr.qp_context = (void *)ib_conn; 192 init_attr.qp_context = (void *)ib_conn;
172 init_attr.send_cq = device->cq; 193 init_attr.send_cq = device->tx_cq;
173 init_attr.recv_cq = device->cq; 194 init_attr.recv_cq = device->rx_cq;
174 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; 195 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS;
175 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; 196 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS;
176 init_attr.cap.max_send_sge = MAX_REGD_BUF_VECTOR_LEN; 197 init_attr.cap.max_send_sge = 2;
177 init_attr.cap.max_recv_sge = 2; 198 init_attr.cap.max_recv_sge = 1;
178 init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 199 init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
179 init_attr.qp_type = IB_QPT_RC; 200 init_attr.qp_type = IB_QPT_RC;
180 201
@@ -192,6 +213,7 @@ qp_err:
192 (void)ib_destroy_fmr_pool(ib_conn->fmr_pool); 213 (void)ib_destroy_fmr_pool(ib_conn->fmr_pool);
193fmr_pool_err: 214fmr_pool_err:
194 kfree(ib_conn->page_vec); 215 kfree(ib_conn->page_vec);
216 kfree(ib_conn->login_buf);
195alloc_err: 217alloc_err:
196 iser_err("unable to alloc mem or create resource, err %d\n", ret); 218 iser_err("unable to alloc mem or create resource, err %d\n", ret);
197 return ret; 219 return ret;
@@ -278,17 +300,6 @@ static void iser_device_try_release(struct iser_device *device)
278 mutex_unlock(&ig.device_list_mutex); 300 mutex_unlock(&ig.device_list_mutex);
279} 301}
280 302
281int iser_conn_state_comp(struct iser_conn *ib_conn,
282 enum iser_ib_conn_state comp)
283{
284 int ret;
285
286 spin_lock_bh(&ib_conn->lock);
287 ret = (ib_conn->state == comp);
288 spin_unlock_bh(&ib_conn->lock);
289 return ret;
290}
291
292static int iser_conn_state_comp_exch(struct iser_conn *ib_conn, 303static int iser_conn_state_comp_exch(struct iser_conn *ib_conn,
293 enum iser_ib_conn_state comp, 304 enum iser_ib_conn_state comp,
294 enum iser_ib_conn_state exch) 305 enum iser_ib_conn_state exch)
@@ -314,7 +325,7 @@ static void iser_conn_release(struct iser_conn *ib_conn)
314 mutex_lock(&ig.connlist_mutex); 325 mutex_lock(&ig.connlist_mutex);
315 list_del(&ib_conn->conn_list); 326 list_del(&ib_conn->conn_list);
316 mutex_unlock(&ig.connlist_mutex); 327 mutex_unlock(&ig.connlist_mutex);
317 328 iser_free_rx_descriptors(ib_conn);
318 iser_free_ib_conn_res(ib_conn); 329 iser_free_ib_conn_res(ib_conn);
319 ib_conn->device = NULL; 330 ib_conn->device = NULL;
320 /* on EVENT_ADDR_ERROR there's no device yet for this conn */ 331 /* on EVENT_ADDR_ERROR there's no device yet for this conn */
@@ -442,7 +453,7 @@ static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
442 ISCSI_ERR_CONN_FAILED); 453 ISCSI_ERR_CONN_FAILED);
443 454
444 /* Complete the termination process if no posts are pending */ 455 /* Complete the termination process if no posts are pending */
445 if ((atomic_read(&ib_conn->post_recv_buf_count) == 0) && 456 if (ib_conn->post_recv_buf_count == 0 &&
446 (atomic_read(&ib_conn->post_send_buf_count) == 0)) { 457 (atomic_read(&ib_conn->post_send_buf_count) == 0)) {
447 ib_conn->state = ISER_CONN_DOWN; 458 ib_conn->state = ISER_CONN_DOWN;
448 wake_up_interruptible(&ib_conn->wait); 459 wake_up_interruptible(&ib_conn->wait);
@@ -489,9 +500,8 @@ void iser_conn_init(struct iser_conn *ib_conn)
489{ 500{
490 ib_conn->state = ISER_CONN_INIT; 501 ib_conn->state = ISER_CONN_INIT;
491 init_waitqueue_head(&ib_conn->wait); 502 init_waitqueue_head(&ib_conn->wait);
492 atomic_set(&ib_conn->post_recv_buf_count, 0); 503 ib_conn->post_recv_buf_count = 0;
493 atomic_set(&ib_conn->post_send_buf_count, 0); 504 atomic_set(&ib_conn->post_send_buf_count, 0);
494 atomic_set(&ib_conn->unexpected_pdu_count, 0);
495 atomic_set(&ib_conn->refcount, 1); 505 atomic_set(&ib_conn->refcount, 1);
496 INIT_LIST_HEAD(&ib_conn->conn_list); 506 INIT_LIST_HEAD(&ib_conn->conn_list);
497 spin_lock_init(&ib_conn->lock); 507 spin_lock_init(&ib_conn->lock);
@@ -626,136 +636,97 @@ void iser_unreg_mem(struct iser_mem_reg *reg)
626 reg->mem_h = NULL; 636 reg->mem_h = NULL;
627} 637}
628 638
629/** 639int iser_post_recvl(struct iser_conn *ib_conn)
630 * iser_dto_to_iov - builds IOV from a dto descriptor
631 */
632static void iser_dto_to_iov(struct iser_dto *dto, struct ib_sge *iov, int iov_len)
633{ 640{
634 int i; 641 struct ib_recv_wr rx_wr, *rx_wr_failed;
635 struct ib_sge *sge; 642 struct ib_sge sge;
636 struct iser_regd_buf *regd_buf; 643 int ib_ret;
637
638 if (dto->regd_vector_len > iov_len) {
639 iser_err("iov size %d too small for posting dto of len %d\n",
640 iov_len, dto->regd_vector_len);
641 BUG();
642 }
643 644
644 for (i = 0; i < dto->regd_vector_len; i++) { 645 sge.addr = ib_conn->login_dma;
645 sge = &iov[i]; 646 sge.length = ISER_RX_LOGIN_SIZE;
646 regd_buf = dto->regd[i]; 647 sge.lkey = ib_conn->device->mr->lkey;
647
648 sge->addr = regd_buf->reg.va;
649 sge->length = regd_buf->reg.len;
650 sge->lkey = regd_buf->reg.lkey;
651
652 if (dto->used_sz[i] > 0) /* Adjust size */
653 sge->length = dto->used_sz[i];
654
655 /* offset and length should not exceed the regd buf length */
656 if (sge->length + dto->offset[i] > regd_buf->reg.len) {
657 iser_err("Used len:%ld + offset:%d, exceed reg.buf.len:"
658 "%ld in dto:0x%p [%d], va:0x%08lX\n",
659 (unsigned long)sge->length, dto->offset[i],
660 (unsigned long)regd_buf->reg.len, dto, i,
661 (unsigned long)sge->addr);
662 BUG();
663 }
664 648
665 sge->addr += dto->offset[i]; /* Adjust offset */ 649 rx_wr.wr_id = (unsigned long)ib_conn->login_buf;
650 rx_wr.sg_list = &sge;
651 rx_wr.num_sge = 1;
652 rx_wr.next = NULL;
653
654 ib_conn->post_recv_buf_count++;
655 ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed);
656 if (ib_ret) {
657 iser_err("ib_post_recv failed ret=%d\n", ib_ret);
658 ib_conn->post_recv_buf_count--;
666 } 659 }
660 return ib_ret;
667} 661}
668 662
669/** 663int iser_post_recvm(struct iser_conn *ib_conn, int count)
670 * iser_post_recv - Posts a receive buffer.
671 *
672 * returns 0 on success, -1 on failure
673 */
674int iser_post_recv(struct iser_desc *rx_desc)
675{ 664{
676 int ib_ret, ret_val = 0; 665 struct ib_recv_wr *rx_wr, *rx_wr_failed;
677 struct ib_recv_wr recv_wr, *recv_wr_failed; 666 int i, ib_ret;
678 struct ib_sge iov[2]; 667 unsigned int my_rx_head = ib_conn->rx_desc_head;
679 struct iser_conn *ib_conn; 668 struct iser_rx_desc *rx_desc;
680 struct iser_dto *recv_dto = &rx_desc->dto; 669
681 670 for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
682 /* Retrieve conn */ 671 rx_desc = &ib_conn->rx_descs[my_rx_head];
683 ib_conn = recv_dto->ib_conn; 672 rx_wr->wr_id = (unsigned long)rx_desc;
684 673 rx_wr->sg_list = &rx_desc->rx_sg;
685 iser_dto_to_iov(recv_dto, iov, 2); 674 rx_wr->num_sge = 1;
675 rx_wr->next = rx_wr + 1;
676 my_rx_head = (my_rx_head + 1) & (ISER_QP_MAX_RECV_DTOS - 1);
677 }
686 678
687 recv_wr.next = NULL; 679 rx_wr--;
688 recv_wr.sg_list = iov; 680 rx_wr->next = NULL; /* mark end of work requests list */
689 recv_wr.num_sge = recv_dto->regd_vector_len;
690 recv_wr.wr_id = (unsigned long)rx_desc;
691 681
692 atomic_inc(&ib_conn->post_recv_buf_count); 682 ib_conn->post_recv_buf_count += count;
693 ib_ret = ib_post_recv(ib_conn->qp, &recv_wr, &recv_wr_failed); 683 ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed);
694 if (ib_ret) { 684 if (ib_ret) {
695 iser_err("ib_post_recv failed ret=%d\n", ib_ret); 685 iser_err("ib_post_recv failed ret=%d\n", ib_ret);
696 atomic_dec(&ib_conn->post_recv_buf_count); 686 ib_conn->post_recv_buf_count -= count;
697 ret_val = -1; 687 } else
698 } 688 ib_conn->rx_desc_head = my_rx_head;
699 689 return ib_ret;
700 return ret_val;
701} 690}
702 691
692
703/** 693/**
704 * iser_start_send - Initiate a Send DTO operation 694 * iser_start_send - Initiate a Send DTO operation
705 * 695 *
706 * returns 0 on success, -1 on failure 696 * returns 0 on success, -1 on failure
707 */ 697 */
708int iser_post_send(struct iser_desc *tx_desc) 698int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc)
709{ 699{
710 int ib_ret, ret_val = 0; 700 int ib_ret;
711 struct ib_send_wr send_wr, *send_wr_failed; 701 struct ib_send_wr send_wr, *send_wr_failed;
712 struct ib_sge iov[MAX_REGD_BUF_VECTOR_LEN];
713 struct iser_conn *ib_conn;
714 struct iser_dto *dto = &tx_desc->dto;
715 702
716 ib_conn = dto->ib_conn; 703 ib_dma_sync_single_for_device(ib_conn->device->ib_device,
717 704 tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
718 iser_dto_to_iov(dto, iov, MAX_REGD_BUF_VECTOR_LEN);
719 705
720 send_wr.next = NULL; 706 send_wr.next = NULL;
721 send_wr.wr_id = (unsigned long)tx_desc; 707 send_wr.wr_id = (unsigned long)tx_desc;
722 send_wr.sg_list = iov; 708 send_wr.sg_list = tx_desc->tx_sg;
723 send_wr.num_sge = dto->regd_vector_len; 709 send_wr.num_sge = tx_desc->num_sge;
724 send_wr.opcode = IB_WR_SEND; 710 send_wr.opcode = IB_WR_SEND;
725 send_wr.send_flags = dto->notify_enable ? IB_SEND_SIGNALED : 0; 711 send_wr.send_flags = IB_SEND_SIGNALED;
726 712
727 atomic_inc(&ib_conn->post_send_buf_count); 713 atomic_inc(&ib_conn->post_send_buf_count);
728 714
729 ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed); 715 ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed);
730 if (ib_ret) { 716 if (ib_ret) {
731 iser_err("Failed to start SEND DTO, dto: 0x%p, IOV len: %d\n",
732 dto, dto->regd_vector_len);
733 iser_err("ib_post_send failed, ret:%d\n", ib_ret); 717 iser_err("ib_post_send failed, ret:%d\n", ib_ret);
734 atomic_dec(&ib_conn->post_send_buf_count); 718 atomic_dec(&ib_conn->post_send_buf_count);
735 ret_val = -1;
736 } 719 }
737 720 return ib_ret;
738 return ret_val;
739} 721}
740 722
741static void iser_handle_comp_error(struct iser_desc *desc) 723static void iser_handle_comp_error(struct iser_tx_desc *desc,
724 struct iser_conn *ib_conn)
742{ 725{
743 struct iser_dto *dto = &desc->dto; 726 if (desc && desc->type == ISCSI_TX_DATAOUT)
744 struct iser_conn *ib_conn = dto->ib_conn;
745
746 iser_dto_buffs_release(dto);
747
748 if (desc->type == ISCSI_RX) {
749 kfree(desc->data);
750 kmem_cache_free(ig.desc_cache, desc); 727 kmem_cache_free(ig.desc_cache, desc);
751 atomic_dec(&ib_conn->post_recv_buf_count);
752 } else { /* type is TX control/command/dataout */
753 if (desc->type == ISCSI_TX_DATAOUT)
754 kmem_cache_free(ig.desc_cache, desc);
755 atomic_dec(&ib_conn->post_send_buf_count);
756 }
757 728
758 if (atomic_read(&ib_conn->post_recv_buf_count) == 0 && 729 if (ib_conn->post_recv_buf_count == 0 &&
759 atomic_read(&ib_conn->post_send_buf_count) == 0) { 730 atomic_read(&ib_conn->post_send_buf_count) == 0) {
760 /* getting here when the state is UP means that the conn is * 731 /* getting here when the state is UP means that the conn is *
761 * being terminated asynchronously from the iSCSI layer's * 732 * being terminated asynchronously from the iSCSI layer's *
@@ -774,32 +745,74 @@ static void iser_handle_comp_error(struct iser_desc *desc)
774 } 745 }
775} 746}
776 747
748static int iser_drain_tx_cq(struct iser_device *device)
749{
750 struct ib_cq *cq = device->tx_cq;
751 struct ib_wc wc;
752 struct iser_tx_desc *tx_desc;
753 struct iser_conn *ib_conn;
754 int completed_tx = 0;
755
756 while (ib_poll_cq(cq, 1, &wc) == 1) {
757 tx_desc = (struct iser_tx_desc *) (unsigned long) wc.wr_id;
758 ib_conn = wc.qp->qp_context;
759 if (wc.status == IB_WC_SUCCESS) {
760 if (wc.opcode == IB_WC_SEND)
761 iser_snd_completion(tx_desc, ib_conn);
762 else
763 iser_err("expected opcode %d got %d\n",
764 IB_WC_SEND, wc.opcode);
765 } else {
766 iser_err("tx id %llx status %d vend_err %x\n",
767 wc.wr_id, wc.status, wc.vendor_err);
768 atomic_dec(&ib_conn->post_send_buf_count);
769 iser_handle_comp_error(tx_desc, ib_conn);
770 }
771 completed_tx++;
772 }
773 return completed_tx;
774}
775
776
777static void iser_cq_tasklet_fn(unsigned long data) 777static void iser_cq_tasklet_fn(unsigned long data)
778{ 778{
779 struct iser_device *device = (struct iser_device *)data; 779 struct iser_device *device = (struct iser_device *)data;
780 struct ib_cq *cq = device->cq; 780 struct ib_cq *cq = device->rx_cq;
781 struct ib_wc wc; 781 struct ib_wc wc;
782 struct iser_desc *desc; 782 struct iser_rx_desc *desc;
783 unsigned long xfer_len; 783 unsigned long xfer_len;
784 struct iser_conn *ib_conn;
785 int completed_tx, completed_rx;
786 completed_tx = completed_rx = 0;
784 787
785 while (ib_poll_cq(cq, 1, &wc) == 1) { 788 while (ib_poll_cq(cq, 1, &wc) == 1) {
786 desc = (struct iser_desc *) (unsigned long) wc.wr_id; 789 desc = (struct iser_rx_desc *) (unsigned long) wc.wr_id;
787 BUG_ON(desc == NULL); 790 BUG_ON(desc == NULL);
788 791 ib_conn = wc.qp->qp_context;
789 if (wc.status == IB_WC_SUCCESS) { 792 if (wc.status == IB_WC_SUCCESS) {
790 if (desc->type == ISCSI_RX) { 793 if (wc.opcode == IB_WC_RECV) {
791 xfer_len = (unsigned long)wc.byte_len; 794 xfer_len = (unsigned long)wc.byte_len;
792 iser_rcv_completion(desc, xfer_len); 795 iser_rcv_completion(desc, xfer_len, ib_conn);
793 } else /* type == ISCSI_TX_CONTROL/SCSI_CMD/DOUT */ 796 } else
794 iser_snd_completion(desc); 797 iser_err("expected opcode %d got %d\n",
798 IB_WC_RECV, wc.opcode);
795 } else { 799 } else {
796 iser_err("comp w. error op %d status %d\n",desc->type,wc.status); 800 if (wc.status != IB_WC_WR_FLUSH_ERR)
797 iser_handle_comp_error(desc); 801 iser_err("rx id %llx status %d vend_err %x\n",
802 wc.wr_id, wc.status, wc.vendor_err);
803 ib_conn->post_recv_buf_count--;
804 iser_handle_comp_error(NULL, ib_conn);
798 } 805 }
806 completed_rx++;
807 if (!(completed_rx & 63))
808 completed_tx += iser_drain_tx_cq(device);
799 } 809 }
800 /* #warning "it is assumed here that arming CQ only once its empty" * 810 /* #warning "it is assumed here that arming CQ only once its empty" *
801 * " would not cause interrupts to be missed" */ 811 * " would not cause interrupts to be missed" */
802 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 812 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
813
814 completed_tx += iser_drain_tx_cq(device);
815 iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx);
803} 816}
804 817
805static void iser_cq_callback(struct ib_cq *cq, void *cq_context) 818static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h
index 3e8618b4efbc..4cd7f420766a 100644
--- a/drivers/net/cxgb3/adapter.h
+++ b/drivers/net/cxgb3/adapter.h
@@ -264,6 +264,10 @@ struct adapter {
264 struct work_struct fatal_error_handler_task; 264 struct work_struct fatal_error_handler_task;
265 struct work_struct link_fault_handler_task; 265 struct work_struct link_fault_handler_task;
266 266
267 struct work_struct db_full_task;
268 struct work_struct db_empty_task;
269 struct work_struct db_drop_task;
270
267 struct dentry *debugfs_root; 271 struct dentry *debugfs_root;
268 272
269 struct mutex mdio_lock; 273 struct mutex mdio_lock;
@@ -335,6 +339,7 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
335int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 339int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
336 unsigned char *data); 340 unsigned char *data);
337irqreturn_t t3_sge_intr_msix(int irq, void *cookie); 341irqreturn_t t3_sge_intr_msix(int irq, void *cookie);
342extern struct workqueue_struct *cxgb3_wq;
338 343
339int t3_get_edc_fw(struct cphy *phy, int edc_idx, int size); 344int t3_get_edc_fw(struct cphy *phy, int edc_idx, int size);
340 345
diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c
index 89bec9c3c141..37945fce7fa5 100644
--- a/drivers/net/cxgb3/cxgb3_main.c
+++ b/drivers/net/cxgb3/cxgb3_main.c
@@ -45,6 +45,7 @@
45#include <linux/firmware.h> 45#include <linux/firmware.h>
46#include <linux/log2.h> 46#include <linux/log2.h>
47#include <linux/stringify.h> 47#include <linux/stringify.h>
48#include <linux/sched.h>
48#include <asm/uaccess.h> 49#include <asm/uaccess.h>
49 50
50#include "common.h" 51#include "common.h"
@@ -140,7 +141,7 @@ MODULE_PARM_DESC(ofld_disable, "whether to enable offload at init time or not");
140 * will block keventd as it needs the rtnl lock, and we'll deadlock waiting 141 * will block keventd as it needs the rtnl lock, and we'll deadlock waiting
141 * for our work to complete. Get our own work queue to solve this. 142 * for our work to complete. Get our own work queue to solve this.
142 */ 143 */
143static struct workqueue_struct *cxgb3_wq; 144struct workqueue_struct *cxgb3_wq;
144 145
145/** 146/**
146 * link_report - show link status and link speed/duplex 147 * link_report - show link status and link speed/duplex
@@ -590,6 +591,19 @@ static void setup_rss(struct adapter *adap)
590 V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ, cpus, rspq_map); 591 V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ, cpus, rspq_map);
591} 592}
592 593
594static void ring_dbs(struct adapter *adap)
595{
596 int i, j;
597
598 for (i = 0; i < SGE_QSETS; i++) {
599 struct sge_qset *qs = &adap->sge.qs[i];
600
601 if (qs->adap)
602 for (j = 0; j < SGE_TXQ_PER_SET; j++)
603 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(qs->txq[j].cntxt_id));
604 }
605}
606
593static void init_napi(struct adapter *adap) 607static void init_napi(struct adapter *adap)
594{ 608{
595 int i; 609 int i;
@@ -2754,6 +2768,42 @@ static void t3_adap_check_task(struct work_struct *work)
2754 spin_unlock_irq(&adapter->work_lock); 2768 spin_unlock_irq(&adapter->work_lock);
2755} 2769}
2756 2770
2771static void db_full_task(struct work_struct *work)
2772{
2773 struct adapter *adapter = container_of(work, struct adapter,
2774 db_full_task);
2775
2776 cxgb3_event_notify(&adapter->tdev, OFFLOAD_DB_FULL, 0);
2777}
2778
2779static void db_empty_task(struct work_struct *work)
2780{
2781 struct adapter *adapter = container_of(work, struct adapter,
2782 db_empty_task);
2783
2784 cxgb3_event_notify(&adapter->tdev, OFFLOAD_DB_EMPTY, 0);
2785}
2786
2787static void db_drop_task(struct work_struct *work)
2788{
2789 struct adapter *adapter = container_of(work, struct adapter,
2790 db_drop_task);
2791 unsigned long delay = 1000;
2792 unsigned short r;
2793
2794 cxgb3_event_notify(&adapter->tdev, OFFLOAD_DB_DROP, 0);
2795
2796 /*
2797 * Sleep a while before ringing the driver qset dbs.
2798 * The delay is between 1000-2023 usecs.
2799 */
2800 get_random_bytes(&r, 2);
2801 delay += r & 1023;
2802 set_current_state(TASK_UNINTERRUPTIBLE);
2803 schedule_timeout(usecs_to_jiffies(delay));
2804 ring_dbs(adapter);
2805}
2806
2757/* 2807/*
2758 * Processes external (PHY) interrupts in process context. 2808 * Processes external (PHY) interrupts in process context.
2759 */ 2809 */
@@ -3222,6 +3272,11 @@ static int __devinit init_one(struct pci_dev *pdev,
3222 INIT_LIST_HEAD(&adapter->adapter_list); 3272 INIT_LIST_HEAD(&adapter->adapter_list);
3223 INIT_WORK(&adapter->ext_intr_handler_task, ext_intr_task); 3273 INIT_WORK(&adapter->ext_intr_handler_task, ext_intr_task);
3224 INIT_WORK(&adapter->fatal_error_handler_task, fatal_error_task); 3274 INIT_WORK(&adapter->fatal_error_handler_task, fatal_error_task);
3275
3276 INIT_WORK(&adapter->db_full_task, db_full_task);
3277 INIT_WORK(&adapter->db_empty_task, db_empty_task);
3278 INIT_WORK(&adapter->db_drop_task, db_drop_task);
3279
3225 INIT_DELAYED_WORK(&adapter->adap_check_task, t3_adap_check_task); 3280 INIT_DELAYED_WORK(&adapter->adap_check_task, t3_adap_check_task);
3226 3281
3227 for (i = 0; i < ai->nports0 + ai->nports1; ++i) { 3282 for (i = 0; i < ai->nports0 + ai->nports1; ++i) {
diff --git a/drivers/net/cxgb3/cxgb3_offload.h b/drivers/net/cxgb3/cxgb3_offload.h
index 670aa62042da..929c298115ca 100644
--- a/drivers/net/cxgb3/cxgb3_offload.h
+++ b/drivers/net/cxgb3/cxgb3_offload.h
@@ -73,7 +73,10 @@ enum {
73 OFFLOAD_STATUS_UP, 73 OFFLOAD_STATUS_UP,
74 OFFLOAD_STATUS_DOWN, 74 OFFLOAD_STATUS_DOWN,
75 OFFLOAD_PORT_DOWN, 75 OFFLOAD_PORT_DOWN,
76 OFFLOAD_PORT_UP 76 OFFLOAD_PORT_UP,
77 OFFLOAD_DB_FULL,
78 OFFLOAD_DB_EMPTY,
79 OFFLOAD_DB_DROP
77}; 80};
78 81
79struct cxgb3_client { 82struct cxgb3_client {
diff --git a/drivers/net/cxgb3/regs.h b/drivers/net/cxgb3/regs.h
index 1b5327b5a965..cb42353c9fdd 100644
--- a/drivers/net/cxgb3/regs.h
+++ b/drivers/net/cxgb3/regs.h
@@ -254,6 +254,22 @@
254#define V_LOPIODRBDROPERR(x) ((x) << S_LOPIODRBDROPERR) 254#define V_LOPIODRBDROPERR(x) ((x) << S_LOPIODRBDROPERR)
255#define F_LOPIODRBDROPERR V_LOPIODRBDROPERR(1U) 255#define F_LOPIODRBDROPERR V_LOPIODRBDROPERR(1U)
256 256
257#define S_HIPRIORITYDBFULL 7
258#define V_HIPRIORITYDBFULL(x) ((x) << S_HIPRIORITYDBFULL)
259#define F_HIPRIORITYDBFULL V_HIPRIORITYDBFULL(1U)
260
261#define S_HIPRIORITYDBEMPTY 6
262#define V_HIPRIORITYDBEMPTY(x) ((x) << S_HIPRIORITYDBEMPTY)
263#define F_HIPRIORITYDBEMPTY V_HIPRIORITYDBEMPTY(1U)
264
265#define S_LOPRIORITYDBFULL 5
266#define V_LOPRIORITYDBFULL(x) ((x) << S_LOPRIORITYDBFULL)
267#define F_LOPRIORITYDBFULL V_LOPRIORITYDBFULL(1U)
268
269#define S_LOPRIORITYDBEMPTY 4
270#define V_LOPRIORITYDBEMPTY(x) ((x) << S_LOPRIORITYDBEMPTY)
271#define F_LOPRIORITYDBEMPTY V_LOPRIORITYDBEMPTY(1U)
272
257#define S_RSPQDISABLED 3 273#define S_RSPQDISABLED 3
258#define V_RSPQDISABLED(x) ((x) << S_RSPQDISABLED) 274#define V_RSPQDISABLED(x) ((x) << S_RSPQDISABLED)
259#define F_RSPQDISABLED V_RSPQDISABLED(1U) 275#define F_RSPQDISABLED V_RSPQDISABLED(1U)
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 318a018ca7c5..9b434461c4f1 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -42,6 +42,7 @@
42#include "sge_defs.h" 42#include "sge_defs.h"
43#include "t3_cpl.h" 43#include "t3_cpl.h"
44#include "firmware_exports.h" 44#include "firmware_exports.h"
45#include "cxgb3_offload.h"
45 46
46#define USE_GTS 0 47#define USE_GTS 0
47 48
@@ -2833,8 +2834,13 @@ void t3_sge_err_intr_handler(struct adapter *adapter)
2833 } 2834 }
2834 2835
2835 if (status & (F_HIPIODRBDROPERR | F_LOPIODRBDROPERR)) 2836 if (status & (F_HIPIODRBDROPERR | F_LOPIODRBDROPERR))
2836 CH_ALERT(adapter, "SGE dropped %s priority doorbell\n", 2837 queue_work(cxgb3_wq, &adapter->db_drop_task);
2837 status & F_HIPIODRBDROPERR ? "high" : "lo"); 2838
2839 if (status & (F_HIPRIORITYDBFULL | F_LOPRIORITYDBFULL))
2840 queue_work(cxgb3_wq, &adapter->db_full_task);
2841
2842 if (status & (F_HIPRIORITYDBEMPTY | F_LOPRIORITYDBEMPTY))
2843 queue_work(cxgb3_wq, &adapter->db_empty_task);
2838 2844
2839 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 2845 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
2840 if (status & SGE_FATALERR) 2846 if (status & SGE_FATALERR)
diff --git a/drivers/net/cxgb3/t3_hw.c b/drivers/net/cxgb3/t3_hw.c
index 032cfe065570..c38fc717a0d1 100644
--- a/drivers/net/cxgb3/t3_hw.c
+++ b/drivers/net/cxgb3/t3_hw.c
@@ -1432,7 +1432,10 @@ static int t3_handle_intr_status(struct adapter *adapter, unsigned int reg,
1432 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \ 1432 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
1433 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \ 1433 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
1434 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \ 1434 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
1435 F_HIRCQPARITYERROR) 1435 F_HIRCQPARITYERROR | F_LOPRIORITYDBFULL | \
1436 F_HIPRIORITYDBFULL | F_LOPRIORITYDBEMPTY | \
1437 F_HIPRIORITYDBEMPTY | F_HIPIODRBDROPERR | \
1438 F_LOPIODRBDROPERR)
1436#define MC5_INTR_MASK (F_PARITYERR | F_ACTRGNFULL | F_UNKNOWNCMD | \ 1439#define MC5_INTR_MASK (F_PARITYERR | F_ACTRGNFULL | F_UNKNOWNCMD | \
1437 F_REQQPARERR | F_DISPQPARERR | F_DELACTEMPTY | \ 1440 F_REQQPARERR | F_DISPQPARERR | F_DELACTEMPTY | \
1438 F_NFASRCHFAIL) 1441 F_NFASRCHFAIL)
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 09509edb1c5f..a585e0f92bc3 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -984,9 +984,9 @@ struct ib_device {
984 struct list_head event_handler_list; 984 struct list_head event_handler_list;
985 spinlock_t event_handler_lock; 985 spinlock_t event_handler_lock;
986 986
987 spinlock_t client_data_lock;
987 struct list_head core_list; 988 struct list_head core_list;
988 struct list_head client_data_list; 989 struct list_head client_data_list;
989 spinlock_t client_data_lock;
990 990
991 struct ib_cache cache; 991 struct ib_cache cache;
992 int *pkey_tbl_len; 992 int *pkey_tbl_len;
@@ -1144,8 +1144,8 @@ struct ib_device {
1144 IB_DEV_UNREGISTERED 1144 IB_DEV_UNREGISTERED
1145 } reg_state; 1145 } reg_state;
1146 1146
1147 u64 uverbs_cmd_mask;
1148 int uverbs_abi_ver; 1147 int uverbs_abi_ver;
1148 u64 uverbs_cmd_mask;
1149 1149
1150 char node_desc[64]; 1150 char node_desc[64];
1151 __be64 node_guid; 1151 __be64 node_guid;
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index c6b2962315b3..4fae90304648 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -67,7 +67,6 @@ enum rdma_port_space {
67 RDMA_PS_IPOIB = 0x0002, 67 RDMA_PS_IPOIB = 0x0002,
68 RDMA_PS_TCP = 0x0106, 68 RDMA_PS_TCP = 0x0106,
69 RDMA_PS_UDP = 0x0111, 69 RDMA_PS_UDP = 0x0111,
70 RDMA_PS_SCTP = 0x0183
71}; 70};
72 71
73struct rdma_addr { 72struct rdma_addr {