aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-11 22:43:13 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-11 22:43:13 -0400
commitce9d3c9a6a9aef61525be07fe6ba27d937236aa2 (patch)
tree1b29bcb8f60fc6b59fa0d7b833cc733b8ebe17c9
parent038a5008b2f395c85e6e71d6ddf3c684e7c405b0 (diff)
parent3d73c2884f45f9a297cbc956cea101405a9703f2 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (87 commits) mlx4_core: Fix section mismatches IPoIB: Allow setting policy to ignore multicast groups IB/mthca: Mark error paths as unlikely() in post_srq_recv functions IB/ipath: Minor fix to ordering of freeing and zeroing of tid pages. IB/ipath: Remove redundant link state checks IB/ipath: Fix IB_EVENT_PORT_ERR event IB/ipath: Better handling of unexpected GPIO interrupts IB/ipath: Maintain active time on all chips IB/ipath: Fix QHT7040 serial number check IB/ipath: Indicate a couple of chip bugs to userspace IB/ipath: iba6110 rev4 no longer needs recv header overrun workaround IB/ipath: Use counters in ipath_poll and cleanup interrupts in ipath_close IB/ipath: Remove duplicate copy of LMC IB/ipath: Add ability to set the LMC via the sysfs debugging interface IB/ipath: Optimize completion queue entry insertion and polling IB/ipath: Implement IB_EVENT_QP_LAST_WQE_REACHED IB/ipath: Generate flush CQE when QP is in error state IB/ipath: Remove redundant code IB/ipath: Future proof eeprom checksum code (contents reading) IB/ipath: UC RDMA WRITE with IMMEDIATE doesn't send the immediate ...
-rw-r--r--Documentation/infiniband/user_mad.txt14
-rw-r--r--drivers/infiniband/core/addr.c3
-rw-r--r--drivers/infiniband/core/cm.c51
-rw-r--r--drivers/infiniband/core/cma.c46
-rw-r--r--drivers/infiniband/core/device.c4
-rw-r--r--drivers/infiniband/core/fmr_pool.c22
-rw-r--r--drivers/infiniband/core/multicast.c2
-rw-r--r--drivers/infiniband/core/sa_query.c12
-rw-r--r--drivers/infiniband/core/ucma.c74
-rw-r--r--drivers/infiniband/core/umem.c20
-rw-r--r--drivers/infiniband/core/user_mad.c151
-rw-r--r--drivers/infiniband/core/uverbs.h1
-rw-r--r--drivers/infiniband/core/uverbs_main.c16
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c16
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h14
-rw-r--r--drivers/infiniband/hw/ehca/ehca_cq.c23
-rw-r--r--drivers/infiniband/hw/ehca/ehca_hca.c34
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c33
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c52
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mcast.c4
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.c102
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c169
-rw-r--r--drivers/infiniband/hw/ehca/ehca_reqs.c2
-rw-r--r--drivers/infiniband/hw/ehca/ehca_sqp.c2
-rw-r--r--drivers/infiniband/hw/ehca/ehca_tools.h19
-rw-r--r--drivers/infiniband/hw/ehca/ehca_uverbs.c46
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.c105
-rw-r--r--drivers/infiniband/hw/ehca/ipz_pt_fn.c1
-rw-r--r--drivers/infiniband/hw/ipath/ipath_common.h4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_cq.c94
-rw-r--r--drivers/infiniband/hw/ipath/ipath_diag.c22
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c93
-rw-r--r--drivers/infiniband/hw/ipath/ipath_eeprom.c10
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c74
-rw-r--r--drivers/infiniband/hw/ipath/ipath_fs.c187
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6110.c57
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6120.c18
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c64
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h12
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mad.c53
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c31
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c73
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c308
-rw-r--r--drivers/infiniband/hw/ipath/ipath_stats.c17
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sysfs.c40
-rw-r--r--drivers/infiniband/hw/ipath/ipath_uc.c98
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c382
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c329
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h45
-rw-r--r--drivers/infiniband/hw/mlx4/main.c50
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h16
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c100
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c14
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c6
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c110
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c8
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h24
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c18
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c8
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c45
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c31
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c2
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c2
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c2
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c1
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c2
-rw-r--r--drivers/net/mlx4/cmd.c11
-rw-r--r--drivers/net/mlx4/cq.c2
-rw-r--r--drivers/net/mlx4/eq.c13
-rw-r--r--drivers/net/mlx4/fw.c2
-rw-r--r--drivers/net/mlx4/icm.c134
-rw-r--r--drivers/net/mlx4/icm.h9
-rw-r--r--drivers/net/mlx4/main.c130
-rw-r--r--drivers/net/mlx4/mcg.c2
-rw-r--r--drivers/net/mlx4/mlx4.h10
-rw-r--r--drivers/net/mlx4/mr.c242
-rw-r--r--drivers/net/mlx4/pd.c2
-rw-r--r--drivers/net/mlx4/qp.c5
-rw-r--r--drivers/net/mlx4/srq.c4
-rw-r--r--include/linux/mlx4/device.h27
-rw-r--r--include/rdma/ib_cm.h7
-rw-r--r--include/rdma/ib_sa.h11
-rw-r--r--include/rdma/ib_umem.h1
-rw-r--r--include/rdma/ib_user_mad.h70
-rw-r--r--include/rdma/rdma_cm.h14
-rw-r--r--include/rdma/rdma_user_cm.h18
89 files changed, 2498 insertions, 1710 deletions
diff --git a/Documentation/infiniband/user_mad.txt b/Documentation/infiniband/user_mad.txt
index 8ec54b974b67..744687dd195b 100644
--- a/Documentation/infiniband/user_mad.txt
+++ b/Documentation/infiniband/user_mad.txt
@@ -99,6 +99,20 @@ Transaction IDs
99 request/response pairs. The upper 32 bits are reserved for use by 99 request/response pairs. The upper 32 bits are reserved for use by
100 the kernel and will be overwritten before a MAD is sent. 100 the kernel and will be overwritten before a MAD is sent.
101 101
102P_Key Index Handling
103
104 The old ib_umad interface did not allow setting the P_Key index for
105 MADs that are sent and did not provide a way for obtaining the P_Key
106 index of received MADs. A new layout for struct ib_user_mad_hdr
107 with a pkey_index member has been defined; however, to preserve
108 binary compatibility with older applications, this new layout will
109 not be used unless the IB_USER_MAD_ENABLE_PKEY ioctl is called
110 before a file descriptor is used for anything else.
111
112 In September 2008, the IB_USER_MAD_ABI_VERSION will be incremented
113 to 6, the new layout of struct ib_user_mad_hdr will be used by
114 default, and the IB_USER_MAD_ENABLE_PKEY ioctl will be removed.
115
102Setting IsSM Capability Bit 116Setting IsSM Capability Bit
103 117
104 To set the IsSM capability bit for a port, simply open the 118 To set the IsSM capability bit for a port, simply open the
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index c5c33d35f87d..5381c80de10a 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -161,8 +161,7 @@ static void addr_send_arp(struct sockaddr_in *dst_in)
161 if (ip_route_output_key(&rt, &fl)) 161 if (ip_route_output_key(&rt, &fl))
162 return; 162 return;
163 163
164 arp_send(ARPOP_REQUEST, ETH_P_ARP, rt->rt_gateway, rt->idev->dev, 164 neigh_event_send(rt->u.dst.neighbour, NULL);
165 rt->rt_src, NULL, rt->idev->dev->dev_addr, NULL);
166 ip_rt_put(rt); 165 ip_rt_put(rt);
167} 166}
168 167
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 4df269f5d9ac..2e39236d189f 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -2219,6 +2219,9 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
2219{ 2219{
2220 struct cm_id_private *cm_id_priv; 2220 struct cm_id_private *cm_id_priv;
2221 struct ib_mad_send_buf *msg; 2221 struct ib_mad_send_buf *msg;
2222 enum ib_cm_state cm_state;
2223 enum ib_cm_lap_state lap_state;
2224 enum cm_msg_response msg_response;
2222 void *data; 2225 void *data;
2223 unsigned long flags; 2226 unsigned long flags;
2224 int ret; 2227 int ret;
@@ -2235,48 +2238,40 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
2235 spin_lock_irqsave(&cm_id_priv->lock, flags); 2238 spin_lock_irqsave(&cm_id_priv->lock, flags);
2236 switch(cm_id_priv->id.state) { 2239 switch(cm_id_priv->id.state) {
2237 case IB_CM_REQ_RCVD: 2240 case IB_CM_REQ_RCVD:
2238 ret = cm_alloc_msg(cm_id_priv, &msg); 2241 cm_state = IB_CM_MRA_REQ_SENT;
2239 if (ret) 2242 lap_state = cm_id->lap_state;
2240 goto error1; 2243 msg_response = CM_MSG_RESPONSE_REQ;
2241
2242 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2243 CM_MSG_RESPONSE_REQ, service_timeout,
2244 private_data, private_data_len);
2245 ret = ib_post_send_mad(msg, NULL);
2246 if (ret)
2247 goto error2;
2248 cm_id->state = IB_CM_MRA_REQ_SENT;
2249 break; 2244 break;
2250 case IB_CM_REP_RCVD: 2245 case IB_CM_REP_RCVD:
2251 ret = cm_alloc_msg(cm_id_priv, &msg); 2246 cm_state = IB_CM_MRA_REP_SENT;
2252 if (ret) 2247 lap_state = cm_id->lap_state;
2253 goto error1; 2248 msg_response = CM_MSG_RESPONSE_REP;
2254
2255 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2256 CM_MSG_RESPONSE_REP, service_timeout,
2257 private_data, private_data_len);
2258 ret = ib_post_send_mad(msg, NULL);
2259 if (ret)
2260 goto error2;
2261 cm_id->state = IB_CM_MRA_REP_SENT;
2262 break; 2249 break;
2263 case IB_CM_ESTABLISHED: 2250 case IB_CM_ESTABLISHED:
2251 cm_state = cm_id->state;
2252 lap_state = IB_CM_MRA_LAP_SENT;
2253 msg_response = CM_MSG_RESPONSE_OTHER;
2254 break;
2255 default:
2256 ret = -EINVAL;
2257 goto error1;
2258 }
2259
2260 if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
2264 ret = cm_alloc_msg(cm_id_priv, &msg); 2261 ret = cm_alloc_msg(cm_id_priv, &msg);
2265 if (ret) 2262 if (ret)
2266 goto error1; 2263 goto error1;
2267 2264
2268 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, 2265 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2269 CM_MSG_RESPONSE_OTHER, service_timeout, 2266 msg_response, service_timeout,
2270 private_data, private_data_len); 2267 private_data, private_data_len);
2271 ret = ib_post_send_mad(msg, NULL); 2268 ret = ib_post_send_mad(msg, NULL);
2272 if (ret) 2269 if (ret)
2273 goto error2; 2270 goto error2;
2274 cm_id->lap_state = IB_CM_MRA_LAP_SENT;
2275 break;
2276 default:
2277 ret = -EINVAL;
2278 goto error1;
2279 } 2271 }
2272
2273 cm_id->state = cm_state;
2274 cm_id->lap_state = lap_state;
2280 cm_id_priv->service_timeout = service_timeout; 2275 cm_id_priv->service_timeout = service_timeout;
2281 cm_set_private_data(cm_id_priv, data, private_data_len); 2276 cm_set_private_data(cm_id_priv, data, private_data_len);
2282 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 2277 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 2e641b255db4..93644f82592c 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -52,6 +52,7 @@ MODULE_LICENSE("Dual BSD/GPL");
52 52
53#define CMA_CM_RESPONSE_TIMEOUT 20 53#define CMA_CM_RESPONSE_TIMEOUT 20
54#define CMA_MAX_CM_RETRIES 15 54#define CMA_MAX_CM_RETRIES 15
55#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
55 56
56static void cma_add_one(struct ib_device *device); 57static void cma_add_one(struct ib_device *device);
57static void cma_remove_one(struct ib_device *device); 58static void cma_remove_one(struct ib_device *device);
@@ -138,6 +139,7 @@ struct rdma_id_private {
138 u32 qkey; 139 u32 qkey;
139 u32 qp_num; 140 u32 qp_num;
140 u8 srq; 141 u8 srq;
142 u8 tos;
141}; 143};
142 144
143struct cma_multicast { 145struct cma_multicast {
@@ -1089,6 +1091,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1089 event.param.ud.private_data_len = 1091 event.param.ud.private_data_len =
1090 IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset; 1092 IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
1091 } else { 1093 } else {
1094 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
1092 conn_id = cma_new_conn_id(&listen_id->id, ib_event); 1095 conn_id = cma_new_conn_id(&listen_id->id, ib_event);
1093 cma_set_req_event_data(&event, &ib_event->param.req_rcvd, 1096 cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
1094 ib_event->private_data, offset); 1097 ib_event->private_data, offset);
@@ -1474,6 +1477,15 @@ err:
1474} 1477}
1475EXPORT_SYMBOL(rdma_listen); 1478EXPORT_SYMBOL(rdma_listen);
1476 1479
1480void rdma_set_service_type(struct rdma_cm_id *id, int tos)
1481{
1482 struct rdma_id_private *id_priv;
1483
1484 id_priv = container_of(id, struct rdma_id_private, id);
1485 id_priv->tos = (u8) tos;
1486}
1487EXPORT_SYMBOL(rdma_set_service_type);
1488
1477static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec, 1489static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
1478 void *context) 1490 void *context)
1479{ 1491{
@@ -1498,23 +1510,37 @@ static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
1498static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, 1510static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
1499 struct cma_work *work) 1511 struct cma_work *work)
1500{ 1512{
1501 struct rdma_dev_addr *addr = &id_priv->id.route.addr.dev_addr; 1513 struct rdma_addr *addr = &id_priv->id.route.addr;
1502 struct ib_sa_path_rec path_rec; 1514 struct ib_sa_path_rec path_rec;
1515 ib_sa_comp_mask comp_mask;
1516 struct sockaddr_in6 *sin6;
1503 1517
1504 memset(&path_rec, 0, sizeof path_rec); 1518 memset(&path_rec, 0, sizeof path_rec);
1505 ib_addr_get_sgid(addr, &path_rec.sgid); 1519 ib_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
1506 ib_addr_get_dgid(addr, &path_rec.dgid); 1520 ib_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
1507 path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr)); 1521 path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
1508 path_rec.numb_path = 1; 1522 path_rec.numb_path = 1;
1509 path_rec.reversible = 1; 1523 path_rec.reversible = 1;
1524 path_rec.service_id = cma_get_service_id(id_priv->id.ps, &addr->dst_addr);
1525
1526 comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
1527 IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
1528 IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID;
1529
1530 if (addr->src_addr.sa_family == AF_INET) {
1531 path_rec.qos_class = cpu_to_be16((u16) id_priv->tos);
1532 comp_mask |= IB_SA_PATH_REC_QOS_CLASS;
1533 } else {
1534 sin6 = (struct sockaddr_in6 *) &addr->src_addr;
1535 path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20);
1536 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
1537 }
1510 1538
1511 id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, 1539 id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
1512 id_priv->id.port_num, &path_rec, 1540 id_priv->id.port_num, &path_rec,
1513 IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | 1541 comp_mask, timeout_ms,
1514 IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH | 1542 GFP_KERNEL, cma_query_handler,
1515 IB_SA_PATH_REC_REVERSIBLE, 1543 work, &id_priv->query);
1516 timeout_ms, GFP_KERNEL,
1517 cma_query_handler, work, &id_priv->query);
1518 1544
1519 return (id_priv->query_id < 0) ? id_priv->query_id : 0; 1545 return (id_priv->query_id < 0) ? id_priv->query_id : 0;
1520} 1546}
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 2506c43ba041..5ac5ffee05cb 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -120,12 +120,12 @@ static struct ib_device *__ib_device_get_by_name(const char *name)
120 120
121static int alloc_name(char *name) 121static int alloc_name(char *name)
122{ 122{
123 long *inuse; 123 unsigned long *inuse;
124 char buf[IB_DEVICE_NAME_MAX]; 124 char buf[IB_DEVICE_NAME_MAX];
125 struct ib_device *device; 125 struct ib_device *device;
126 int i; 126 int i;
127 127
128 inuse = (long *) get_zeroed_page(GFP_KERNEL); 128 inuse = (unsigned long *) get_zeroed_page(GFP_KERNEL);
129 if (!inuse) 129 if (!inuse)
130 return -ENOMEM; 130 return -ENOMEM;
131 131
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index a06bcc65a871..d7f64525469b 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -152,7 +152,7 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
152 152
153#ifdef DEBUG 153#ifdef DEBUG
154 if (fmr->ref_count !=0) { 154 if (fmr->ref_count !=0) {
155 printk(KERN_WARNING PFX "Unmapping FMR 0x%08x with ref count %d", 155 printk(KERN_WARNING PFX "Unmapping FMR 0x%08x with ref count %d\n",
156 fmr, fmr->ref_count); 156 fmr, fmr->ref_count);
157 } 157 }
158#endif 158#endif
@@ -170,7 +170,7 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
170 170
171 ret = ib_unmap_fmr(&fmr_list); 171 ret = ib_unmap_fmr(&fmr_list);
172 if (ret) 172 if (ret)
173 printk(KERN_WARNING PFX "ib_unmap_fmr returned %d", ret); 173 printk(KERN_WARNING PFX "ib_unmap_fmr returned %d\n", ret);
174 174
175 spin_lock_irq(&pool->pool_lock); 175 spin_lock_irq(&pool->pool_lock);
176 list_splice(&unmap_list, &pool->free_list); 176 list_splice(&unmap_list, &pool->free_list);
@@ -235,13 +235,13 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
235 235
236 attr = kmalloc(sizeof *attr, GFP_KERNEL); 236 attr = kmalloc(sizeof *attr, GFP_KERNEL);
237 if (!attr) { 237 if (!attr) {
238 printk(KERN_WARNING PFX "couldn't allocate device attr struct"); 238 printk(KERN_WARNING PFX "couldn't allocate device attr struct\n");
239 return ERR_PTR(-ENOMEM); 239 return ERR_PTR(-ENOMEM);
240 } 240 }
241 241
242 ret = ib_query_device(device, attr); 242 ret = ib_query_device(device, attr);
243 if (ret) { 243 if (ret) {
244 printk(KERN_WARNING PFX "couldn't query device: %d", ret); 244 printk(KERN_WARNING PFX "couldn't query device: %d\n", ret);
245 kfree(attr); 245 kfree(attr);
246 return ERR_PTR(ret); 246 return ERR_PTR(ret);
247 } 247 }
@@ -255,7 +255,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
255 255
256 pool = kmalloc(sizeof *pool, GFP_KERNEL); 256 pool = kmalloc(sizeof *pool, GFP_KERNEL);
257 if (!pool) { 257 if (!pool) {
258 printk(KERN_WARNING PFX "couldn't allocate pool struct"); 258 printk(KERN_WARNING PFX "couldn't allocate pool struct\n");
259 return ERR_PTR(-ENOMEM); 259 return ERR_PTR(-ENOMEM);
260 } 260 }
261 261
@@ -272,7 +272,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
272 kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket, 272 kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket,
273 GFP_KERNEL); 273 GFP_KERNEL);
274 if (!pool->cache_bucket) { 274 if (!pool->cache_bucket) {
275 printk(KERN_WARNING PFX "Failed to allocate cache in pool"); 275 printk(KERN_WARNING PFX "Failed to allocate cache in pool\n");
276 ret = -ENOMEM; 276 ret = -ENOMEM;
277 goto out_free_pool; 277 goto out_free_pool;
278 } 278 }
@@ -296,7 +296,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
296 "ib_fmr(%s)", 296 "ib_fmr(%s)",
297 device->name); 297 device->name);
298 if (IS_ERR(pool->thread)) { 298 if (IS_ERR(pool->thread)) {
299 printk(KERN_WARNING PFX "couldn't start cleanup thread"); 299 printk(KERN_WARNING PFX "couldn't start cleanup thread\n");
300 ret = PTR_ERR(pool->thread); 300 ret = PTR_ERR(pool->thread);
301 goto out_free_pool; 301 goto out_free_pool;
302 } 302 }
@@ -314,7 +314,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
314 GFP_KERNEL); 314 GFP_KERNEL);
315 if (!fmr) { 315 if (!fmr) {
316 printk(KERN_WARNING PFX "failed to allocate fmr " 316 printk(KERN_WARNING PFX "failed to allocate fmr "
317 "struct for FMR %d", i); 317 "struct for FMR %d\n", i);
318 goto out_fail; 318 goto out_fail;
319 } 319 }
320 320
@@ -326,7 +326,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
326 fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr); 326 fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr);
327 if (IS_ERR(fmr->fmr)) { 327 if (IS_ERR(fmr->fmr)) {
328 printk(KERN_WARNING PFX "fmr_create failed " 328 printk(KERN_WARNING PFX "fmr_create failed "
329 "for FMR %d", i); 329 "for FMR %d\n", i);
330 kfree(fmr); 330 kfree(fmr);
331 goto out_fail; 331 goto out_fail;
332 } 332 }
@@ -381,7 +381,7 @@ void ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
381 } 381 }
382 382
383 if (i < pool->pool_size) 383 if (i < pool->pool_size)
384 printk(KERN_WARNING PFX "pool still has %d regions registered", 384 printk(KERN_WARNING PFX "pool still has %d regions registered\n",
385 pool->pool_size - i); 385 pool->pool_size - i);
386 386
387 kfree(pool->cache_bucket); 387 kfree(pool->cache_bucket);
@@ -518,7 +518,7 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
518 518
519#ifdef DEBUG 519#ifdef DEBUG
520 if (fmr->ref_count < 0) 520 if (fmr->ref_count < 0)
521 printk(KERN_WARNING PFX "FMR %p has ref count %d < 0", 521 printk(KERN_WARNING PFX "FMR %p has ref count %d < 0\n",
522 fmr, fmr->ref_count); 522 fmr, fmr->ref_count);
523#endif 523#endif
524 524
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 15b4c4d3606d..1bc1fe605282 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -196,7 +196,7 @@ static void queue_join(struct mcast_member *member)
196 unsigned long flags; 196 unsigned long flags;
197 197
198 spin_lock_irqsave(&group->lock, flags); 198 spin_lock_irqsave(&group->lock, flags);
199 list_add(&member->list, &group->pending_list); 199 list_add_tail(&member->list, &group->pending_list);
200 if (group->state == MCAST_IDLE) { 200 if (group->state == MCAST_IDLE) {
201 group->state = MCAST_BUSY; 201 group->state = MCAST_BUSY;
202 atomic_inc(&group->refcount); 202 atomic_inc(&group->refcount);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index d271bd715c12..cf474ec27070 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -123,14 +123,10 @@ static u32 tid;
123 .field_name = "sa_path_rec:" #field 123 .field_name = "sa_path_rec:" #field
124 124
125static const struct ib_field path_rec_table[] = { 125static const struct ib_field path_rec_table[] = {
126 { RESERVED, 126 { PATH_REC_FIELD(service_id),
127 .offset_words = 0, 127 .offset_words = 0,
128 .offset_bits = 0, 128 .offset_bits = 0,
129 .size_bits = 32 }, 129 .size_bits = 64 },
130 { RESERVED,
131 .offset_words = 1,
132 .offset_bits = 0,
133 .size_bits = 32 },
134 { PATH_REC_FIELD(dgid), 130 { PATH_REC_FIELD(dgid),
135 .offset_words = 2, 131 .offset_words = 2,
136 .offset_bits = 0, 132 .offset_bits = 0,
@@ -179,7 +175,7 @@ static const struct ib_field path_rec_table[] = {
179 .offset_words = 12, 175 .offset_words = 12,
180 .offset_bits = 16, 176 .offset_bits = 16,
181 .size_bits = 16 }, 177 .size_bits = 16 },
182 { RESERVED, 178 { PATH_REC_FIELD(qos_class),
183 .offset_words = 13, 179 .offset_words = 13,
184 .offset_bits = 0, 180 .offset_bits = 0,
185 .size_bits = 12 }, 181 .size_bits = 12 },
@@ -531,7 +527,7 @@ static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
531 query->sm_ah->pkey_index, 527 query->sm_ah->pkey_index,
532 0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA, 528 0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
533 gfp_mask); 529 gfp_mask);
534 if (!query->mad_buf) { 530 if (IS_ERR(query->mad_buf)) {
535 kref_put(&query->sm_ah->ref, free_sm_ah); 531 kref_put(&query->sm_ah->ref, free_sm_ah);
536 return -ENOMEM; 532 return -ENOMEM;
537 } 533 }
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 53b4c94a7eb5..90d675ad9ec8 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -792,6 +792,78 @@ out:
792 return ret; 792 return ret;
793} 793}
794 794
795static int ucma_set_option_id(struct ucma_context *ctx, int optname,
796 void *optval, size_t optlen)
797{
798 int ret = 0;
799
800 switch (optname) {
801 case RDMA_OPTION_ID_TOS:
802 if (optlen != sizeof(u8)) {
803 ret = -EINVAL;
804 break;
805 }
806 rdma_set_service_type(ctx->cm_id, *((u8 *) optval));
807 break;
808 default:
809 ret = -ENOSYS;
810 }
811
812 return ret;
813}
814
815static int ucma_set_option_level(struct ucma_context *ctx, int level,
816 int optname, void *optval, size_t optlen)
817{
818 int ret;
819
820 switch (level) {
821 case RDMA_OPTION_ID:
822 ret = ucma_set_option_id(ctx, optname, optval, optlen);
823 break;
824 default:
825 ret = -ENOSYS;
826 }
827
828 return ret;
829}
830
831static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
832 int in_len, int out_len)
833{
834 struct rdma_ucm_set_option cmd;
835 struct ucma_context *ctx;
836 void *optval;
837 int ret;
838
839 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
840 return -EFAULT;
841
842 ctx = ucma_get_ctx(file, cmd.id);
843 if (IS_ERR(ctx))
844 return PTR_ERR(ctx);
845
846 optval = kmalloc(cmd.optlen, GFP_KERNEL);
847 if (!optval) {
848 ret = -ENOMEM;
849 goto out1;
850 }
851
852 if (copy_from_user(optval, (void __user *) (unsigned long) cmd.optval,
853 cmd.optlen)) {
854 ret = -EFAULT;
855 goto out2;
856 }
857
858 ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval,
859 cmd.optlen);
860out2:
861 kfree(optval);
862out1:
863 ucma_put_ctx(ctx);
864 return ret;
865}
866
795static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf, 867static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
796 int in_len, int out_len) 868 int in_len, int out_len)
797{ 869{
@@ -936,7 +1008,7 @@ static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
936 [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr, 1008 [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr,
937 [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event, 1009 [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event,
938 [RDMA_USER_CM_CMD_GET_OPTION] = NULL, 1010 [RDMA_USER_CM_CMD_GET_OPTION] = NULL,
939 [RDMA_USER_CM_CMD_SET_OPTION] = NULL, 1011 [RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option,
940 [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, 1012 [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify,
941 [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast, 1013 [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast,
942 [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, 1014 [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast,
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 664d2faa9e74..2f54e29dc7a6 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -37,6 +37,7 @@
37#include <linux/mm.h> 37#include <linux/mm.h>
38#include <linux/dma-mapping.h> 38#include <linux/dma-mapping.h>
39#include <linux/sched.h> 39#include <linux/sched.h>
40#include <linux/hugetlb.h>
40 41
41#include "uverbs.h" 42#include "uverbs.h"
42 43
@@ -75,6 +76,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
75{ 76{
76 struct ib_umem *umem; 77 struct ib_umem *umem;
77 struct page **page_list; 78 struct page **page_list;
79 struct vm_area_struct **vma_list;
78 struct ib_umem_chunk *chunk; 80 struct ib_umem_chunk *chunk;
79 unsigned long locked; 81 unsigned long locked;
80 unsigned long lock_limit; 82 unsigned long lock_limit;
@@ -104,6 +106,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
104 */ 106 */
105 umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ); 107 umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ);
106 108
109 /* We assume the memory is from hugetlb until proved otherwise */
110 umem->hugetlb = 1;
111
107 INIT_LIST_HEAD(&umem->chunk_list); 112 INIT_LIST_HEAD(&umem->chunk_list);
108 113
109 page_list = (struct page **) __get_free_page(GFP_KERNEL); 114 page_list = (struct page **) __get_free_page(GFP_KERNEL);
@@ -112,6 +117,14 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
112 return ERR_PTR(-ENOMEM); 117 return ERR_PTR(-ENOMEM);
113 } 118 }
114 119
120 /*
121 * if we can't alloc the vma_list, it's not so bad;
122 * just assume the memory is not hugetlb memory
123 */
124 vma_list = (struct vm_area_struct **) __get_free_page(GFP_KERNEL);
125 if (!vma_list)
126 umem->hugetlb = 0;
127
115 npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT; 128 npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT;
116 129
117 down_write(&current->mm->mmap_sem); 130 down_write(&current->mm->mmap_sem);
@@ -131,7 +144,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
131 ret = get_user_pages(current, current->mm, cur_base, 144 ret = get_user_pages(current, current->mm, cur_base,
132 min_t(int, npages, 145 min_t(int, npages,
133 PAGE_SIZE / sizeof (struct page *)), 146 PAGE_SIZE / sizeof (struct page *)),
134 1, !umem->writable, page_list, NULL); 147 1, !umem->writable, page_list, vma_list);
135 148
136 if (ret < 0) 149 if (ret < 0)
137 goto out; 150 goto out;
@@ -152,6 +165,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
152 165
153 chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK); 166 chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK);
154 for (i = 0; i < chunk->nents; ++i) { 167 for (i = 0; i < chunk->nents; ++i) {
168 if (vma_list &&
169 !is_vm_hugetlb_page(vma_list[i + off]))
170 umem->hugetlb = 0;
155 chunk->page_list[i].page = page_list[i + off]; 171 chunk->page_list[i].page = page_list[i + off];
156 chunk->page_list[i].offset = 0; 172 chunk->page_list[i].offset = 0;
157 chunk->page_list[i].length = PAGE_SIZE; 173 chunk->page_list[i].length = PAGE_SIZE;
@@ -186,6 +202,8 @@ out:
186 current->mm->locked_vm = locked; 202 current->mm->locked_vm = locked;
187 203
188 up_write(&current->mm->mmap_sem); 204 up_write(&current->mm->mmap_sem);
205 if (vma_list)
206 free_page((unsigned long) vma_list);
189 free_page((unsigned long) page_list); 207 free_page((unsigned long) page_list);
190 208
191 return ret < 0 ? ERR_PTR(ret) : umem; 209 return ret < 0 ? ERR_PTR(ret) : umem;
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index d97ded25c4ff..b53eac4611de 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -44,6 +44,7 @@
44#include <linux/poll.h> 44#include <linux/poll.h>
45#include <linux/rwsem.h> 45#include <linux/rwsem.h>
46#include <linux/kref.h> 46#include <linux/kref.h>
47#include <linux/compat.h>
47 48
48#include <asm/uaccess.h> 49#include <asm/uaccess.h>
49#include <asm/semaphore.h> 50#include <asm/semaphore.h>
@@ -118,6 +119,8 @@ struct ib_umad_file {
118 wait_queue_head_t recv_wait; 119 wait_queue_head_t recv_wait;
119 struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS]; 120 struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS];
120 int agents_dead; 121 int agents_dead;
122 u8 use_pkey_index;
123 u8 already_used;
121}; 124};
122 125
123struct ib_umad_packet { 126struct ib_umad_packet {
@@ -147,6 +150,12 @@ static void ib_umad_release_dev(struct kref *ref)
147 kfree(dev); 150 kfree(dev);
148} 151}
149 152
153static int hdr_size(struct ib_umad_file *file)
154{
155 return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) :
156 sizeof (struct ib_user_mad_hdr_old);
157}
158
150/* caller must hold port->mutex at least for reading */ 159/* caller must hold port->mutex at least for reading */
151static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id) 160static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id)
152{ 161{
@@ -221,13 +230,13 @@ static void recv_handler(struct ib_mad_agent *agent,
221 packet->length = mad_recv_wc->mad_len; 230 packet->length = mad_recv_wc->mad_len;
222 packet->recv_wc = mad_recv_wc; 231 packet->recv_wc = mad_recv_wc;
223 232
224 packet->mad.hdr.status = 0; 233 packet->mad.hdr.status = 0;
225 packet->mad.hdr.length = sizeof (struct ib_user_mad) + 234 packet->mad.hdr.length = hdr_size(file) + mad_recv_wc->mad_len;
226 mad_recv_wc->mad_len; 235 packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp);
227 packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); 236 packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid);
228 packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid); 237 packet->mad.hdr.sl = mad_recv_wc->wc->sl;
229 packet->mad.hdr.sl = mad_recv_wc->wc->sl; 238 packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits;
230 packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits; 239 packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index;
231 packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH); 240 packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH);
232 if (packet->mad.hdr.grh_present) { 241 if (packet->mad.hdr.grh_present) {
233 struct ib_ah_attr ah_attr; 242 struct ib_ah_attr ah_attr;
@@ -253,8 +262,8 @@ err1:
253 ib_free_recv_mad(mad_recv_wc); 262 ib_free_recv_mad(mad_recv_wc);
254} 263}
255 264
256static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet, 265static ssize_t copy_recv_mad(struct ib_umad_file *file, char __user *buf,
257 size_t count) 266 struct ib_umad_packet *packet, size_t count)
258{ 267{
259 struct ib_mad_recv_buf *recv_buf; 268 struct ib_mad_recv_buf *recv_buf;
260 int left, seg_payload, offset, max_seg_payload; 269 int left, seg_payload, offset, max_seg_payload;
@@ -262,15 +271,15 @@ static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
262 /* We need enough room to copy the first (or only) MAD segment. */ 271 /* We need enough room to copy the first (or only) MAD segment. */
263 recv_buf = &packet->recv_wc->recv_buf; 272 recv_buf = &packet->recv_wc->recv_buf;
264 if ((packet->length <= sizeof (*recv_buf->mad) && 273 if ((packet->length <= sizeof (*recv_buf->mad) &&
265 count < sizeof (packet->mad) + packet->length) || 274 count < hdr_size(file) + packet->length) ||
266 (packet->length > sizeof (*recv_buf->mad) && 275 (packet->length > sizeof (*recv_buf->mad) &&
267 count < sizeof (packet->mad) + sizeof (*recv_buf->mad))) 276 count < hdr_size(file) + sizeof (*recv_buf->mad)))
268 return -EINVAL; 277 return -EINVAL;
269 278
270 if (copy_to_user(buf, &packet->mad, sizeof (packet->mad))) 279 if (copy_to_user(buf, &packet->mad, hdr_size(file)))
271 return -EFAULT; 280 return -EFAULT;
272 281
273 buf += sizeof (packet->mad); 282 buf += hdr_size(file);
274 seg_payload = min_t(int, packet->length, sizeof (*recv_buf->mad)); 283 seg_payload = min_t(int, packet->length, sizeof (*recv_buf->mad));
275 if (copy_to_user(buf, recv_buf->mad, seg_payload)) 284 if (copy_to_user(buf, recv_buf->mad, seg_payload))
276 return -EFAULT; 285 return -EFAULT;
@@ -280,7 +289,7 @@ static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
280 * Multipacket RMPP MAD message. Copy remainder of message. 289 * Multipacket RMPP MAD message. Copy remainder of message.
281 * Note that last segment may have a shorter payload. 290 * Note that last segment may have a shorter payload.
282 */ 291 */
283 if (count < sizeof (packet->mad) + packet->length) { 292 if (count < hdr_size(file) + packet->length) {
284 /* 293 /*
285 * The buffer is too small, return the first RMPP segment, 294 * The buffer is too small, return the first RMPP segment,
286 * which includes the RMPP message length. 295 * which includes the RMPP message length.
@@ -300,18 +309,23 @@ static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
300 return -EFAULT; 309 return -EFAULT;
301 } 310 }
302 } 311 }
303 return sizeof (packet->mad) + packet->length; 312 return hdr_size(file) + packet->length;
304} 313}
305 314
306static ssize_t copy_send_mad(char __user *buf, struct ib_umad_packet *packet, 315static ssize_t copy_send_mad(struct ib_umad_file *file, char __user *buf,
307 size_t count) 316 struct ib_umad_packet *packet, size_t count)
308{ 317{
309 ssize_t size = sizeof (packet->mad) + packet->length; 318 ssize_t size = hdr_size(file) + packet->length;
310 319
311 if (count < size) 320 if (count < size)
312 return -EINVAL; 321 return -EINVAL;
313 322
314 if (copy_to_user(buf, &packet->mad, size)) 323 if (copy_to_user(buf, &packet->mad, hdr_size(file)))
324 return -EFAULT;
325
326 buf += hdr_size(file);
327
328 if (copy_to_user(buf, packet->mad.data, packet->length))
315 return -EFAULT; 329 return -EFAULT;
316 330
317 return size; 331 return size;
@@ -324,7 +338,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
324 struct ib_umad_packet *packet; 338 struct ib_umad_packet *packet;
325 ssize_t ret; 339 ssize_t ret;
326 340
327 if (count < sizeof (struct ib_user_mad)) 341 if (count < hdr_size(file))
328 return -EINVAL; 342 return -EINVAL;
329 343
330 spin_lock_irq(&file->recv_lock); 344 spin_lock_irq(&file->recv_lock);
@@ -348,9 +362,9 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
348 spin_unlock_irq(&file->recv_lock); 362 spin_unlock_irq(&file->recv_lock);
349 363
350 if (packet->recv_wc) 364 if (packet->recv_wc)
351 ret = copy_recv_mad(buf, packet, count); 365 ret = copy_recv_mad(file, buf, packet, count);
352 else 366 else
353 ret = copy_send_mad(buf, packet, count); 367 ret = copy_send_mad(file, buf, packet, count);
354 368
355 if (ret < 0) { 369 if (ret < 0) {
356 /* Requeue packet */ 370 /* Requeue packet */
@@ -442,15 +456,14 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
442 __be64 *tid; 456 __be64 *tid;
443 int ret, data_len, hdr_len, copy_offset, rmpp_active; 457 int ret, data_len, hdr_len, copy_offset, rmpp_active;
444 458
445 if (count < sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR) 459 if (count < hdr_size(file) + IB_MGMT_RMPP_HDR)
446 return -EINVAL; 460 return -EINVAL;
447 461
448 packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL); 462 packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL);
449 if (!packet) 463 if (!packet)
450 return -ENOMEM; 464 return -ENOMEM;
451 465
452 if (copy_from_user(&packet->mad, buf, 466 if (copy_from_user(&packet->mad, buf, hdr_size(file))) {
453 sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)) {
454 ret = -EFAULT; 467 ret = -EFAULT;
455 goto err; 468 goto err;
456 } 469 }
@@ -461,6 +474,13 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
461 goto err; 474 goto err;
462 } 475 }
463 476
477 buf += hdr_size(file);
478
479 if (copy_from_user(packet->mad.data, buf, IB_MGMT_RMPP_HDR)) {
480 ret = -EFAULT;
481 goto err;
482 }
483
464 down_read(&file->port->mutex); 484 down_read(&file->port->mutex);
465 485
466 agent = __get_agent(file, packet->mad.hdr.id); 486 agent = __get_agent(file, packet->mad.hdr.id);
@@ -500,11 +520,11 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
500 IB_MGMT_RMPP_FLAG_ACTIVE; 520 IB_MGMT_RMPP_FLAG_ACTIVE;
501 } 521 }
502 522
503 data_len = count - sizeof (struct ib_user_mad) - hdr_len; 523 data_len = count - hdr_size(file) - hdr_len;
504 packet->msg = ib_create_send_mad(agent, 524 packet->msg = ib_create_send_mad(agent,
505 be32_to_cpu(packet->mad.hdr.qpn), 525 be32_to_cpu(packet->mad.hdr.qpn),
506 0, rmpp_active, hdr_len, 526 packet->mad.hdr.pkey_index, rmpp_active,
507 data_len, GFP_KERNEL); 527 hdr_len, data_len, GFP_KERNEL);
508 if (IS_ERR(packet->msg)) { 528 if (IS_ERR(packet->msg)) {
509 ret = PTR_ERR(packet->msg); 529 ret = PTR_ERR(packet->msg);
510 goto err_ah; 530 goto err_ah;
@@ -517,7 +537,6 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
517 537
518 /* Copy MAD header. Any RMPP header is already in place. */ 538 /* Copy MAD header. Any RMPP header is already in place. */
519 memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR); 539 memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR);
520 buf += sizeof (struct ib_user_mad);
521 540
522 if (!rmpp_active) { 541 if (!rmpp_active) {
523 if (copy_from_user(packet->msg->mad + copy_offset, 542 if (copy_from_user(packet->msg->mad + copy_offset,
@@ -589,7 +608,8 @@ static unsigned int ib_umad_poll(struct file *filp, struct poll_table_struct *wa
589 return mask; 608 return mask;
590} 609}
591 610
592static int ib_umad_reg_agent(struct ib_umad_file *file, unsigned long arg) 611static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
612 int compat_method_mask)
593{ 613{
594 struct ib_user_mad_reg_req ureq; 614 struct ib_user_mad_reg_req ureq;
595 struct ib_mad_reg_req req; 615 struct ib_mad_reg_req req;
@@ -604,7 +624,7 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, unsigned long arg)
604 goto out; 624 goto out;
605 } 625 }
606 626
607 if (copy_from_user(&ureq, (void __user *) arg, sizeof ureq)) { 627 if (copy_from_user(&ureq, arg, sizeof ureq)) {
608 ret = -EFAULT; 628 ret = -EFAULT;
609 goto out; 629 goto out;
610 } 630 }
@@ -625,8 +645,18 @@ found:
625 if (ureq.mgmt_class) { 645 if (ureq.mgmt_class) {
626 req.mgmt_class = ureq.mgmt_class; 646 req.mgmt_class = ureq.mgmt_class;
627 req.mgmt_class_version = ureq.mgmt_class_version; 647 req.mgmt_class_version = ureq.mgmt_class_version;
628 memcpy(req.method_mask, ureq.method_mask, sizeof req.method_mask); 648 memcpy(req.oui, ureq.oui, sizeof req.oui);
629 memcpy(req.oui, ureq.oui, sizeof req.oui); 649
650 if (compat_method_mask) {
651 u32 *umm = (u32 *) ureq.method_mask;
652 int i;
653
654 for (i = 0; i < BITS_TO_LONGS(IB_MGMT_MAX_METHODS); ++i)
655 req.method_mask[i] =
656 umm[i * 2] | ((u64) umm[i * 2 + 1] << 32);
657 } else
658 memcpy(req.method_mask, ureq.method_mask,
659 sizeof req.method_mask);
630 } 660 }
631 661
632 agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num, 662 agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num,
@@ -646,6 +676,16 @@ found:
646 goto out; 676 goto out;
647 } 677 }
648 678
679 if (!file->already_used) {
680 file->already_used = 1;
681 if (!file->use_pkey_index) {
682 printk(KERN_WARNING "user_mad: process %s did not enable "
683 "P_Key index support.\n", current->comm);
684 printk(KERN_WARNING "user_mad: Documentation/infiniband/user_mad.txt "
685 "has info on the new ABI.\n");
686 }
687 }
688
649 file->agent[agent_id] = agent; 689 file->agent[agent_id] = agent;
650 ret = 0; 690 ret = 0;
651 691
@@ -654,13 +694,13 @@ out:
654 return ret; 694 return ret;
655} 695}
656 696
657static int ib_umad_unreg_agent(struct ib_umad_file *file, unsigned long arg) 697static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg)
658{ 698{
659 struct ib_mad_agent *agent = NULL; 699 struct ib_mad_agent *agent = NULL;
660 u32 id; 700 u32 id;
661 int ret = 0; 701 int ret = 0;
662 702
663 if (get_user(id, (u32 __user *) arg)) 703 if (get_user(id, arg))
664 return -EFAULT; 704 return -EFAULT;
665 705
666 down_write(&file->port->mutex); 706 down_write(&file->port->mutex);
@@ -682,18 +722,51 @@ out:
682 return ret; 722 return ret;
683} 723}
684 724
725static long ib_umad_enable_pkey(struct ib_umad_file *file)
726{
727 int ret = 0;
728
729 down_write(&file->port->mutex);
730 if (file->already_used)
731 ret = -EINVAL;
732 else
733 file->use_pkey_index = 1;
734 up_write(&file->port->mutex);
735
736 return ret;
737}
738
685static long ib_umad_ioctl(struct file *filp, unsigned int cmd, 739static long ib_umad_ioctl(struct file *filp, unsigned int cmd,
686 unsigned long arg) 740 unsigned long arg)
687{ 741{
688 switch (cmd) { 742 switch (cmd) {
689 case IB_USER_MAD_REGISTER_AGENT: 743 case IB_USER_MAD_REGISTER_AGENT:
690 return ib_umad_reg_agent(filp->private_data, arg); 744 return ib_umad_reg_agent(filp->private_data, (void __user *) arg, 0);
745 case IB_USER_MAD_UNREGISTER_AGENT:
746 return ib_umad_unreg_agent(filp->private_data, (__u32 __user *) arg);
747 case IB_USER_MAD_ENABLE_PKEY:
748 return ib_umad_enable_pkey(filp->private_data);
749 default:
750 return -ENOIOCTLCMD;
751 }
752}
753
754#ifdef CONFIG_COMPAT
755static long ib_umad_compat_ioctl(struct file *filp, unsigned int cmd,
756 unsigned long arg)
757{
758 switch (cmd) {
759 case IB_USER_MAD_REGISTER_AGENT:
760 return ib_umad_reg_agent(filp->private_data, compat_ptr(arg), 1);
691 case IB_USER_MAD_UNREGISTER_AGENT: 761 case IB_USER_MAD_UNREGISTER_AGENT:
692 return ib_umad_unreg_agent(filp->private_data, arg); 762 return ib_umad_unreg_agent(filp->private_data, compat_ptr(arg));
763 case IB_USER_MAD_ENABLE_PKEY:
764 return ib_umad_enable_pkey(filp->private_data);
693 default: 765 default:
694 return -ENOIOCTLCMD; 766 return -ENOIOCTLCMD;
695 } 767 }
696} 768}
769#endif
697 770
698static int ib_umad_open(struct inode *inode, struct file *filp) 771static int ib_umad_open(struct inode *inode, struct file *filp)
699{ 772{
@@ -782,7 +855,9 @@ static const struct file_operations umad_fops = {
782 .write = ib_umad_write, 855 .write = ib_umad_write,
783 .poll = ib_umad_poll, 856 .poll = ib_umad_poll,
784 .unlocked_ioctl = ib_umad_ioctl, 857 .unlocked_ioctl = ib_umad_ioctl,
785 .compat_ioctl = ib_umad_ioctl, 858#ifdef CONFIG_COMPAT
859 .compat_ioctl = ib_umad_compat_ioctl,
860#endif
786 .open = ib_umad_open, 861 .open = ib_umad_open,
787 .release = ib_umad_close 862 .release = ib_umad_close
788}; 863};
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index c33546f9e961..c75eb6c9bd49 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -148,7 +148,6 @@ void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
148 148
149struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, 149struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
150 int is_async, int *fd); 150 int is_async, int *fd);
151void ib_uverbs_release_event_file(struct kref *ref);
152struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd); 151struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd);
153 152
154void ib_uverbs_release_ucq(struct ib_uverbs_file *file, 153void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 14d7ccd89195..7c2ac3905582 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -125,6 +125,14 @@ static void ib_uverbs_release_dev(struct kref *ref)
125 complete(&dev->comp); 125 complete(&dev->comp);
126} 126}
127 127
128static void ib_uverbs_release_event_file(struct kref *ref)
129{
130 struct ib_uverbs_event_file *file =
131 container_of(ref, struct ib_uverbs_event_file, ref);
132
133 kfree(file);
134}
135
128void ib_uverbs_release_ucq(struct ib_uverbs_file *file, 136void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
129 struct ib_uverbs_event_file *ev_file, 137 struct ib_uverbs_event_file *ev_file,
130 struct ib_ucq_object *uobj) 138 struct ib_ucq_object *uobj)
@@ -331,14 +339,6 @@ static unsigned int ib_uverbs_event_poll(struct file *filp,
331 return pollflags; 339 return pollflags;
332} 340}
333 341
334void ib_uverbs_release_event_file(struct kref *ref)
335{
336 struct ib_uverbs_event_file *file =
337 container_of(ref, struct ib_uverbs_event_file, ref);
338
339 kfree(file);
340}
341
342static int ib_uverbs_event_fasync(int fd, struct file *filp, int on) 342static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
343{ 343{
344 struct ib_uverbs_event_file *file = filp->private_data; 344 struct ib_uverbs_event_file *file = filp->private_data;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 1cdfcd43b0bc..20ba372dd182 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -63,37 +63,37 @@ static char *states[] = {
63}; 63};
64 64
65static int ep_timeout_secs = 10; 65static int ep_timeout_secs = 10;
66module_param(ep_timeout_secs, int, 0444); 66module_param(ep_timeout_secs, int, 0644);
67MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout " 67MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
68 "in seconds (default=10)"); 68 "in seconds (default=10)");
69 69
70static int mpa_rev = 1; 70static int mpa_rev = 1;
71module_param(mpa_rev, int, 0444); 71module_param(mpa_rev, int, 0644);
72MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, " 72MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
73 "1 is spec compliant. (default=1)"); 73 "1 is spec compliant. (default=1)");
74 74
75static int markers_enabled = 0; 75static int markers_enabled = 0;
76module_param(markers_enabled, int, 0444); 76module_param(markers_enabled, int, 0644);
77MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)"); 77MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)");
78 78
79static int crc_enabled = 1; 79static int crc_enabled = 1;
80module_param(crc_enabled, int, 0444); 80module_param(crc_enabled, int, 0644);
81MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)"); 81MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)");
82 82
83static int rcv_win = 256 * 1024; 83static int rcv_win = 256 * 1024;
84module_param(rcv_win, int, 0444); 84module_param(rcv_win, int, 0644);
85MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256)"); 85MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256)");
86 86
87static int snd_win = 32 * 1024; 87static int snd_win = 32 * 1024;
88module_param(snd_win, int, 0444); 88module_param(snd_win, int, 0644);
89MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=32KB)"); 89MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=32KB)");
90 90
91static unsigned int nocong = 0; 91static unsigned int nocong = 0;
92module_param(nocong, uint, 0444); 92module_param(nocong, uint, 0644);
93MODULE_PARM_DESC(nocong, "Turn off congestion control (default=0)"); 93MODULE_PARM_DESC(nocong, "Turn off congestion control (default=0)");
94 94
95static unsigned int cong_flavor = 1; 95static unsigned int cong_flavor = 1;
96module_param(cong_flavor, uint, 0444); 96module_param(cong_flavor, uint, 0644);
97MODULE_PARM_DESC(cong_flavor, "TCP Congestion control flavor (default=1)"); 97MODULE_PARM_DESC(cong_flavor, "TCP Congestion control flavor (default=1)");
98 98
99static void process_work(struct work_struct *work); 99static void process_work(struct work_struct *work);
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index b5e960305316..0f7a55d35ea7 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -53,6 +53,7 @@ struct ehca_pd;
53struct ehca_av; 53struct ehca_av;
54 54
55#include <linux/wait.h> 55#include <linux/wait.h>
56#include <linux/mutex.h>
56 57
57#include <rdma/ib_verbs.h> 58#include <rdma/ib_verbs.h>
58#include <rdma/ib_user_verbs.h> 59#include <rdma/ib_user_verbs.h>
@@ -99,10 +100,10 @@ struct ehca_sport {
99 struct ehca_sma_attr saved_attr; 100 struct ehca_sma_attr saved_attr;
100}; 101};
101 102
102#define HCA_CAP_MR_PGSIZE_4K 1 103#define HCA_CAP_MR_PGSIZE_4K 0x80000000
103#define HCA_CAP_MR_PGSIZE_64K 2 104#define HCA_CAP_MR_PGSIZE_64K 0x40000000
104#define HCA_CAP_MR_PGSIZE_1M 4 105#define HCA_CAP_MR_PGSIZE_1M 0x20000000
105#define HCA_CAP_MR_PGSIZE_16M 8 106#define HCA_CAP_MR_PGSIZE_16M 0x10000000
106 107
107struct ehca_shca { 108struct ehca_shca {
108 struct ib_device ib_device; 109 struct ib_device ib_device;
@@ -337,6 +338,8 @@ struct ehca_create_cq_resp {
337 u32 cq_number; 338 u32 cq_number;
338 u32 token; 339 u32 token;
339 struct ipzu_queue_resp ipz_queue; 340 struct ipzu_queue_resp ipz_queue;
341 u32 fw_handle_ofs;
342 u32 dummy;
340}; 343};
341 344
342struct ehca_create_qp_resp { 345struct ehca_create_qp_resp {
@@ -347,7 +350,8 @@ struct ehca_create_qp_resp {
347 u32 qkey; 350 u32 qkey;
348 /* qp_num assigned by ehca: sqp0/1 may have got different numbers */ 351 /* qp_num assigned by ehca: sqp0/1 may have got different numbers */
349 u32 real_qp_num; 352 u32 real_qp_num;
350 u32 dummy; /* padding for 8 byte alignment */ 353 u32 fw_handle_ofs;
354 u32 dummy;
351 struct ipzu_queue_resp ipz_squeue; 355 struct ipzu_queue_resp ipz_squeue;
352 struct ipzu_queue_resp ipz_rqueue; 356 struct ipzu_queue_resp ipz_rqueue;
353}; 357};
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index 81aff36101ba..79c25f51c21e 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -166,7 +166,6 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
166 write_lock_irqsave(&ehca_cq_idr_lock, flags); 166 write_lock_irqsave(&ehca_cq_idr_lock, flags);
167 ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token); 167 ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token);
168 write_unlock_irqrestore(&ehca_cq_idr_lock, flags); 168 write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
169
170 } while (ret == -EAGAIN); 169 } while (ret == -EAGAIN);
171 170
172 if (ret) { 171 if (ret) {
@@ -176,6 +175,12 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
176 goto create_cq_exit1; 175 goto create_cq_exit1;
177 } 176 }
178 177
178 if (my_cq->token > 0x1FFFFFF) {
179 cq = ERR_PTR(-ENOMEM);
180 ehca_err(device, "Invalid number of cq. device=%p", device);
181 goto create_cq_exit2;
182 }
183
179 /* 184 /*
180 * CQs maximum depth is 4GB-64, but we need additional 20 as buffer 185 * CQs maximum depth is 4GB-64, but we need additional 20 as buffer
181 * for receiving errors CQEs. 186 * for receiving errors CQEs.
@@ -185,7 +190,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
185 190
186 if (h_ret != H_SUCCESS) { 191 if (h_ret != H_SUCCESS) {
187 ehca_err(device, "hipz_h_alloc_resource_cq() failed " 192 ehca_err(device, "hipz_h_alloc_resource_cq() failed "
188 "h_ret=%lx device=%p", h_ret, device); 193 "h_ret=%li device=%p", h_ret, device);
189 cq = ERR_PTR(ehca2ib_return_code(h_ret)); 194 cq = ERR_PTR(ehca2ib_return_code(h_ret));
190 goto create_cq_exit2; 195 goto create_cq_exit2;
191 } 196 }
@@ -193,7 +198,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
193 ipz_rc = ipz_queue_ctor(NULL, &my_cq->ipz_queue, param.act_pages, 198 ipz_rc = ipz_queue_ctor(NULL, &my_cq->ipz_queue, param.act_pages,
194 EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0, 0); 199 EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0, 0);
195 if (!ipz_rc) { 200 if (!ipz_rc) {
196 ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%x device=%p", 201 ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%i device=%p",
197 ipz_rc, device); 202 ipz_rc, device);
198 cq = ERR_PTR(-EINVAL); 203 cq = ERR_PTR(-EINVAL);
199 goto create_cq_exit3; 204 goto create_cq_exit3;
@@ -221,7 +226,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
221 226
222 if (h_ret < H_SUCCESS) { 227 if (h_ret < H_SUCCESS) {
223 ehca_err(device, "hipz_h_register_rpage_cq() failed " 228 ehca_err(device, "hipz_h_register_rpage_cq() failed "
224 "ehca_cq=%p cq_num=%x h_ret=%lx counter=%i " 229 "ehca_cq=%p cq_num=%x h_ret=%li counter=%i "
225 "act_pages=%i", my_cq, my_cq->cq_number, 230 "act_pages=%i", my_cq, my_cq->cq_number,
226 h_ret, counter, param.act_pages); 231 h_ret, counter, param.act_pages);
227 cq = ERR_PTR(-EINVAL); 232 cq = ERR_PTR(-EINVAL);
@@ -233,7 +238,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
233 if ((h_ret != H_SUCCESS) || vpage) { 238 if ((h_ret != H_SUCCESS) || vpage) {
234 ehca_err(device, "Registration of pages not " 239 ehca_err(device, "Registration of pages not "
235 "complete ehca_cq=%p cq_num=%x " 240 "complete ehca_cq=%p cq_num=%x "
236 "h_ret=%lx", my_cq, my_cq->cq_number, 241 "h_ret=%li", my_cq, my_cq->cq_number,
237 h_ret); 242 h_ret);
238 cq = ERR_PTR(-EAGAIN); 243 cq = ERR_PTR(-EAGAIN);
239 goto create_cq_exit4; 244 goto create_cq_exit4;
@@ -241,7 +246,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
241 } else { 246 } else {
242 if (h_ret != H_PAGE_REGISTERED) { 247 if (h_ret != H_PAGE_REGISTERED) {
243 ehca_err(device, "Registration of page failed " 248 ehca_err(device, "Registration of page failed "
244 "ehca_cq=%p cq_num=%x h_ret=%lx" 249 "ehca_cq=%p cq_num=%x h_ret=%li"
245 "counter=%i act_pages=%i", 250 "counter=%i act_pages=%i",
246 my_cq, my_cq->cq_number, 251 my_cq, my_cq->cq_number,
247 h_ret, counter, param.act_pages); 252 h_ret, counter, param.act_pages);
@@ -276,6 +281,8 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
276 resp.ipz_queue.queue_length = ipz_queue->queue_length; 281 resp.ipz_queue.queue_length = ipz_queue->queue_length;
277 resp.ipz_queue.pagesize = ipz_queue->pagesize; 282 resp.ipz_queue.pagesize = ipz_queue->pagesize;
278 resp.ipz_queue.toggle_state = ipz_queue->toggle_state; 283 resp.ipz_queue.toggle_state = ipz_queue->toggle_state;
284 resp.fw_handle_ofs = (u32)
285 (my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1));
279 if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { 286 if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
280 ehca_err(device, "Copy to udata failed."); 287 ehca_err(device, "Copy to udata failed.");
281 goto create_cq_exit4; 288 goto create_cq_exit4;
@@ -291,7 +298,7 @@ create_cq_exit3:
291 h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); 298 h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
292 if (h_ret != H_SUCCESS) 299 if (h_ret != H_SUCCESS)
293 ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p " 300 ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p "
294 "cq_num=%x h_ret=%lx", my_cq, my_cq->cq_number, h_ret); 301 "cq_num=%x h_ret=%li", my_cq, my_cq->cq_number, h_ret);
295 302
296create_cq_exit2: 303create_cq_exit2:
297 write_lock_irqsave(&ehca_cq_idr_lock, flags); 304 write_lock_irqsave(&ehca_cq_idr_lock, flags);
@@ -355,7 +362,7 @@ int ehca_destroy_cq(struct ib_cq *cq)
355 cq_num); 362 cq_num);
356 } 363 }
357 if (h_ret != H_SUCCESS) { 364 if (h_ret != H_SUCCESS) {
358 ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lx " 365 ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%li "
359 "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num); 366 "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num);
360 return ehca2ib_return_code(h_ret); 367 return ehca2ib_return_code(h_ret);
361 } 368 }
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
index cf22472d9414..4aa3ffa6a19f 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -82,17 +82,17 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
82 props->vendor_id = rblock->vendor_id >> 8; 82 props->vendor_id = rblock->vendor_id >> 8;
83 props->vendor_part_id = rblock->vendor_part_id >> 16; 83 props->vendor_part_id = rblock->vendor_part_id >> 16;
84 props->hw_ver = rblock->hw_ver; 84 props->hw_ver = rblock->hw_ver;
85 props->max_qp = min_t(int, rblock->max_qp, INT_MAX); 85 props->max_qp = min_t(unsigned, rblock->max_qp, INT_MAX);
86 props->max_qp_wr = min_t(int, rblock->max_wqes_wq, INT_MAX); 86 props->max_qp_wr = min_t(unsigned, rblock->max_wqes_wq, INT_MAX);
87 props->max_sge = min_t(int, rblock->max_sge, INT_MAX); 87 props->max_sge = min_t(unsigned, rblock->max_sge, INT_MAX);
88 props->max_sge_rd = min_t(int, rblock->max_sge_rd, INT_MAX); 88 props->max_sge_rd = min_t(unsigned, rblock->max_sge_rd, INT_MAX);
89 props->max_cq = min_t(int, rblock->max_cq, INT_MAX); 89 props->max_cq = min_t(unsigned, rblock->max_cq, INT_MAX);
90 props->max_cqe = min_t(int, rblock->max_cqe, INT_MAX); 90 props->max_cqe = min_t(unsigned, rblock->max_cqe, INT_MAX);
91 props->max_mr = min_t(int, rblock->max_mr, INT_MAX); 91 props->max_mr = min_t(unsigned, rblock->max_mr, INT_MAX);
92 props->max_mw = min_t(int, rblock->max_mw, INT_MAX); 92 props->max_mw = min_t(unsigned, rblock->max_mw, INT_MAX);
93 props->max_pd = min_t(int, rblock->max_pd, INT_MAX); 93 props->max_pd = min_t(unsigned, rblock->max_pd, INT_MAX);
94 props->max_ah = min_t(int, rblock->max_ah, INT_MAX); 94 props->max_ah = min_t(unsigned, rblock->max_ah, INT_MAX);
95 props->max_fmr = min_t(int, rblock->max_mr, INT_MAX); 95 props->max_fmr = min_t(unsigned, rblock->max_mr, INT_MAX);
96 96
97 if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) { 97 if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) {
98 props->max_srq = props->max_qp; 98 props->max_srq = props->max_qp;
@@ -104,15 +104,15 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
104 props->local_ca_ack_delay 104 props->local_ca_ack_delay
105 = rblock->local_ca_ack_delay; 105 = rblock->local_ca_ack_delay;
106 props->max_raw_ipv6_qp 106 props->max_raw_ipv6_qp
107 = min_t(int, rblock->max_raw_ipv6_qp, INT_MAX); 107 = min_t(unsigned, rblock->max_raw_ipv6_qp, INT_MAX);
108 props->max_raw_ethy_qp 108 props->max_raw_ethy_qp
109 = min_t(int, rblock->max_raw_ethy_qp, INT_MAX); 109 = min_t(unsigned, rblock->max_raw_ethy_qp, INT_MAX);
110 props->max_mcast_grp 110 props->max_mcast_grp
111 = min_t(int, rblock->max_mcast_grp, INT_MAX); 111 = min_t(unsigned, rblock->max_mcast_grp, INT_MAX);
112 props->max_mcast_qp_attach 112 props->max_mcast_qp_attach
113 = min_t(int, rblock->max_mcast_qp_attach, INT_MAX); 113 = min_t(unsigned, rblock->max_mcast_qp_attach, INT_MAX);
114 props->max_total_mcast_qp_attach 114 props->max_total_mcast_qp_attach
115 = min_t(int, rblock->max_total_mcast_qp_attach, INT_MAX); 115 = min_t(unsigned, rblock->max_total_mcast_qp_attach, INT_MAX);
116 116
117 /* translate device capabilities */ 117 /* translate device capabilities */
118 props->device_cap_flags = IB_DEVICE_SYS_IMAGE_GUID | 118 props->device_cap_flags = IB_DEVICE_SYS_IMAGE_GUID |
@@ -352,7 +352,7 @@ int ehca_modify_port(struct ib_device *ibdev,
352 hret = hipz_h_modify_port(shca->ipz_hca_handle, port, 352 hret = hipz_h_modify_port(shca->ipz_hca_handle, port,
353 cap, props->init_type, port_modify_mask); 353 cap, props->init_type, port_modify_mask);
354 if (hret != H_SUCCESS) { 354 if (hret != H_SUCCESS) {
355 ehca_err(&shca->ib_device, "Modify port failed hret=%lx", 355 ehca_err(&shca->ib_device, "Modify port failed h_ret=%li",
356 hret); 356 hret);
357 ret = -EINVAL; 357 ret = -EINVAL;
358 } 358 }
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index a925ea52443f..3f617b27b954 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -69,9 +69,6 @@
69static void queue_comp_task(struct ehca_cq *__cq); 69static void queue_comp_task(struct ehca_cq *__cq);
70 70
71static struct ehca_comp_pool *pool; 71static struct ehca_comp_pool *pool;
72#ifdef CONFIG_HOTPLUG_CPU
73static struct notifier_block comp_pool_callback_nb;
74#endif
75 72
76static inline void comp_event_callback(struct ehca_cq *cq) 73static inline void comp_event_callback(struct ehca_cq *cq)
77{ 74{
@@ -294,8 +291,8 @@ static void parse_identifier(struct ehca_shca *shca, u64 eqe)
294 case 0x11: /* unaffiliated access error */ 291 case 0x11: /* unaffiliated access error */
295 ehca_err(&shca->ib_device, "Unaffiliated access error."); 292 ehca_err(&shca->ib_device, "Unaffiliated access error.");
296 break; 293 break;
297 case 0x12: /* path migrating error */ 294 case 0x12: /* path migrating */
298 ehca_err(&shca->ib_device, "Path migration error."); 295 ehca_err(&shca->ib_device, "Path migrating.");
299 break; 296 break;
300 case 0x13: /* interface trace stopped */ 297 case 0x13: /* interface trace stopped */
301 ehca_err(&shca->ib_device, "Interface trace stopped."); 298 ehca_err(&shca->ib_device, "Interface trace stopped.");
@@ -760,9 +757,7 @@ static void destroy_comp_task(struct ehca_comp_pool *pool,
760 kthread_stop(task); 757 kthread_stop(task);
761} 758}
762 759
763#ifdef CONFIG_HOTPLUG_CPU 760static void __cpuinit take_over_work(struct ehca_comp_pool *pool, int cpu)
764static void take_over_work(struct ehca_comp_pool *pool,
765 int cpu)
766{ 761{
767 struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); 762 struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
768 LIST_HEAD(list); 763 LIST_HEAD(list);
@@ -785,9 +780,9 @@ static void take_over_work(struct ehca_comp_pool *pool,
785 780
786} 781}
787 782
788static int comp_pool_callback(struct notifier_block *nfb, 783static int __cpuinit comp_pool_callback(struct notifier_block *nfb,
789 unsigned long action, 784 unsigned long action,
790 void *hcpu) 785 void *hcpu)
791{ 786{
792 unsigned int cpu = (unsigned long)hcpu; 787 unsigned int cpu = (unsigned long)hcpu;
793 struct ehca_cpu_comp_task *cct; 788 struct ehca_cpu_comp_task *cct;
@@ -833,7 +828,11 @@ static int comp_pool_callback(struct notifier_block *nfb,
833 828
834 return NOTIFY_OK; 829 return NOTIFY_OK;
835} 830}
836#endif 831
832static struct notifier_block comp_pool_callback_nb __cpuinitdata = {
833 .notifier_call = comp_pool_callback,
834 .priority = 0,
835};
837 836
838int ehca_create_comp_pool(void) 837int ehca_create_comp_pool(void)
839{ 838{
@@ -864,11 +863,7 @@ int ehca_create_comp_pool(void)
864 } 863 }
865 } 864 }
866 865
867#ifdef CONFIG_HOTPLUG_CPU 866 register_hotcpu_notifier(&comp_pool_callback_nb);
868 comp_pool_callback_nb.notifier_call = comp_pool_callback;
869 comp_pool_callback_nb.priority = 0;
870 register_cpu_notifier(&comp_pool_callback_nb);
871#endif
872 867
873 printk(KERN_INFO "eHCA scaling code enabled\n"); 868 printk(KERN_INFO "eHCA scaling code enabled\n");
874 869
@@ -882,9 +877,7 @@ void ehca_destroy_comp_pool(void)
882 if (!ehca_scaling_code) 877 if (!ehca_scaling_code)
883 return; 878 return;
884 879
885#ifdef CONFIG_HOTPLUG_CPU 880 unregister_hotcpu_notifier(&comp_pool_callback_nb);
886 unregister_cpu_notifier(&comp_pool_callback_nb);
887#endif
888 881
889 for (i = 0; i < NR_CPUS; i++) { 882 for (i = 0; i < NR_CPUS; i++) {
890 if (cpu_online(i)) 883 if (cpu_online(i))
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 99036b65bb84..403467f66fe6 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -49,10 +49,12 @@
49#include "ehca_tools.h" 49#include "ehca_tools.h"
50#include "hcp_if.h" 50#include "hcp_if.h"
51 51
52#define HCAD_VERSION "0024"
53
52MODULE_LICENSE("Dual BSD/GPL"); 54MODULE_LICENSE("Dual BSD/GPL");
53MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>"); 55MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
54MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); 56MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
55MODULE_VERSION("SVNEHCA_0023"); 57MODULE_VERSION(HCAD_VERSION);
56 58
57int ehca_open_aqp1 = 0; 59int ehca_open_aqp1 = 0;
58int ehca_debug_level = 0; 60int ehca_debug_level = 0;
@@ -65,16 +67,16 @@ int ehca_static_rate = -1;
65int ehca_scaling_code = 0; 67int ehca_scaling_code = 0;
66int ehca_mr_largepage = 0; 68int ehca_mr_largepage = 0;
67 69
68module_param_named(open_aqp1, ehca_open_aqp1, int, 0); 70module_param_named(open_aqp1, ehca_open_aqp1, int, S_IRUGO);
69module_param_named(debug_level, ehca_debug_level, int, 0); 71module_param_named(debug_level, ehca_debug_level, int, S_IRUGO);
70module_param_named(hw_level, ehca_hw_level, int, 0); 72module_param_named(hw_level, ehca_hw_level, int, S_IRUGO);
71module_param_named(nr_ports, ehca_nr_ports, int, 0); 73module_param_named(nr_ports, ehca_nr_ports, int, S_IRUGO);
72module_param_named(use_hp_mr, ehca_use_hp_mr, int, 0); 74module_param_named(use_hp_mr, ehca_use_hp_mr, int, S_IRUGO);
73module_param_named(port_act_time, ehca_port_act_time, int, 0); 75module_param_named(port_act_time, ehca_port_act_time, int, S_IRUGO);
74module_param_named(poll_all_eqs, ehca_poll_all_eqs, int, 0); 76module_param_named(poll_all_eqs, ehca_poll_all_eqs, int, S_IRUGO);
75module_param_named(static_rate, ehca_static_rate, int, 0); 77module_param_named(static_rate, ehca_static_rate, int, S_IRUGO);
76module_param_named(scaling_code, ehca_scaling_code, int, 0); 78module_param_named(scaling_code, ehca_scaling_code, int, S_IRUGO);
77module_param_named(mr_largepage, ehca_mr_largepage, int, 0); 79module_param_named(mr_largepage, ehca_mr_largepage, int, S_IRUGO);
78 80
79MODULE_PARM_DESC(open_aqp1, 81MODULE_PARM_DESC(open_aqp1,
80 "AQP1 on startup (0: no (default), 1: yes)"); 82 "AQP1 on startup (0: no (default), 1: yes)");
@@ -273,7 +275,7 @@ int ehca_sense_attributes(struct ehca_shca *shca)
273 275
274 h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock); 276 h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock);
275 if (h_ret != H_SUCCESS) { 277 if (h_ret != H_SUCCESS) {
276 ehca_gen_err("Cannot query device properties. h_ret=%lx", 278 ehca_gen_err("Cannot query device properties. h_ret=%li",
277 h_ret); 279 h_ret);
278 ret = -EPERM; 280 ret = -EPERM;
279 goto sense_attributes1; 281 goto sense_attributes1;
@@ -332,7 +334,7 @@ int ehca_sense_attributes(struct ehca_shca *shca)
332 port = (struct hipz_query_port *)rblock; 334 port = (struct hipz_query_port *)rblock;
333 h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port); 335 h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port);
334 if (h_ret != H_SUCCESS) { 336 if (h_ret != H_SUCCESS) {
335 ehca_gen_err("Cannot query port properties. h_ret=%lx", 337 ehca_gen_err("Cannot query port properties. h_ret=%li",
336 h_ret); 338 h_ret);
337 ret = -EPERM; 339 ret = -EPERM;
338 goto sense_attributes1; 340 goto sense_attributes1;
@@ -380,7 +382,7 @@ int ehca_init_device(struct ehca_shca *shca)
380 strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX); 382 strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX);
381 shca->ib_device.owner = THIS_MODULE; 383 shca->ib_device.owner = THIS_MODULE;
382 384
383 shca->ib_device.uverbs_abi_ver = 7; 385 shca->ib_device.uverbs_abi_ver = 8;
384 shca->ib_device.uverbs_cmd_mask = 386 shca->ib_device.uverbs_cmd_mask =
385 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 387 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
386 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 388 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
@@ -526,13 +528,13 @@ static int ehca_destroy_aqp1(struct ehca_sport *sport)
526 528
527 ret = ib_destroy_qp(sport->ibqp_aqp1); 529 ret = ib_destroy_qp(sport->ibqp_aqp1);
528 if (ret) { 530 if (ret) {
529 ehca_gen_err("Cannot destroy AQP1 QP. ret=%x", ret); 531 ehca_gen_err("Cannot destroy AQP1 QP. ret=%i", ret);
530 return ret; 532 return ret;
531 } 533 }
532 534
533 ret = ib_destroy_cq(sport->ibcq_aqp1); 535 ret = ib_destroy_cq(sport->ibcq_aqp1);
534 if (ret) 536 if (ret)
535 ehca_gen_err("Cannot destroy AQP1 CQ. ret=%x", ret); 537 ehca_gen_err("Cannot destroy AQP1 CQ. ret=%i", ret);
536 538
537 return ret; 539 return ret;
538} 540}
@@ -728,7 +730,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev,
728 ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr); 730 ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr);
729 731
730 if (ret) { 732 if (ret) {
731 ehca_err(&shca->ib_device, "Cannot create internal MR ret=%x", 733 ehca_err(&shca->ib_device, "Cannot create internal MR ret=%i",
732 ret); 734 ret);
733 goto probe5; 735 goto probe5;
734 } 736 }
@@ -736,7 +738,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev,
736 ret = ib_register_device(&shca->ib_device); 738 ret = ib_register_device(&shca->ib_device);
737 if (ret) { 739 if (ret) {
738 ehca_err(&shca->ib_device, 740 ehca_err(&shca->ib_device,
739 "ib_register_device() failed ret=%x", ret); 741 "ib_register_device() failed ret=%i", ret);
740 goto probe6; 742 goto probe6;
741 } 743 }
742 744
@@ -777,7 +779,7 @@ probe8:
777 ret = ehca_destroy_aqp1(&shca->sport[0]); 779 ret = ehca_destroy_aqp1(&shca->sport[0]);
778 if (ret) 780 if (ret)
779 ehca_err(&shca->ib_device, 781 ehca_err(&shca->ib_device,
780 "Cannot destroy AQP1 for port 1. ret=%x", ret); 782 "Cannot destroy AQP1 for port 1. ret=%i", ret);
781 783
782probe7: 784probe7:
783 ib_unregister_device(&shca->ib_device); 785 ib_unregister_device(&shca->ib_device);
@@ -826,7 +828,7 @@ static int __devexit ehca_remove(struct ibmebus_dev *dev)
826 if (ret) 828 if (ret)
827 ehca_err(&shca->ib_device, 829 ehca_err(&shca->ib_device,
828 "Cannot destroy AQP1 for port %x " 830 "Cannot destroy AQP1 for port %x "
829 "ret=%x", ret, i); 831 "ret=%i", ret, i);
830 } 832 }
831 } 833 }
832 834
@@ -835,20 +837,20 @@ static int __devexit ehca_remove(struct ibmebus_dev *dev)
835 ret = ehca_dereg_internal_maxmr(shca); 837 ret = ehca_dereg_internal_maxmr(shca);
836 if (ret) 838 if (ret)
837 ehca_err(&shca->ib_device, 839 ehca_err(&shca->ib_device,
838 "Cannot destroy internal MR. ret=%x", ret); 840 "Cannot destroy internal MR. ret=%i", ret);
839 841
840 ret = ehca_dealloc_pd(&shca->pd->ib_pd); 842 ret = ehca_dealloc_pd(&shca->pd->ib_pd);
841 if (ret) 843 if (ret)
842 ehca_err(&shca->ib_device, 844 ehca_err(&shca->ib_device,
843 "Cannot destroy internal PD. ret=%x", ret); 845 "Cannot destroy internal PD. ret=%i", ret);
844 846
845 ret = ehca_destroy_eq(shca, &shca->eq); 847 ret = ehca_destroy_eq(shca, &shca->eq);
846 if (ret) 848 if (ret)
847 ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%x", ret); 849 ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%i", ret);
848 850
849 ret = ehca_destroy_eq(shca, &shca->neq); 851 ret = ehca_destroy_eq(shca, &shca->neq);
850 if (ret) 852 if (ret)
851 ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%x", ret); 853 ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%i", ret);
852 854
853 ib_dealloc_device(&shca->ib_device); 855 ib_dealloc_device(&shca->ib_device);
854 856
@@ -909,7 +911,7 @@ int __init ehca_module_init(void)
909 int ret; 911 int ret;
910 912
911 printk(KERN_INFO "eHCA Infiniband Device Driver " 913 printk(KERN_INFO "eHCA Infiniband Device Driver "
912 "(Rel.: SVNEHCA_0023)\n"); 914 "(Version " HCAD_VERSION ")\n");
913 915
914 ret = ehca_create_comp_pool(); 916 ret = ehca_create_comp_pool();
915 if (ret) { 917 if (ret) {
diff --git a/drivers/infiniband/hw/ehca/ehca_mcast.c b/drivers/infiniband/hw/ehca/ehca_mcast.c
index 32a870660bfe..e3ef0264ccc6 100644
--- a/drivers/infiniband/hw/ehca/ehca_mcast.c
+++ b/drivers/infiniband/hw/ehca/ehca_mcast.c
@@ -88,7 +88,7 @@ int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
88 if (h_ret != H_SUCCESS) 88 if (h_ret != H_SUCCESS)
89 ehca_err(ibqp->device, 89 ehca_err(ibqp->device,
90 "ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed " 90 "ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed "
91 "h_ret=%lx", my_qp, ibqp->qp_num, h_ret); 91 "h_ret=%li", my_qp, ibqp->qp_num, h_ret);
92 92
93 return ehca2ib_return_code(h_ret); 93 return ehca2ib_return_code(h_ret);
94} 94}
@@ -125,7 +125,7 @@ int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
125 if (h_ret != H_SUCCESS) 125 if (h_ret != H_SUCCESS)
126 ehca_err(ibqp->device, 126 ehca_err(ibqp->device,
127 "ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed " 127 "ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed "
128 "h_ret=%lx", my_qp, ibqp->qp_num, h_ret); 128 "h_ret=%li", my_qp, ibqp->qp_num, h_ret);
129 129
130 return ehca2ib_return_code(h_ret); 130 return ehca2ib_return_code(h_ret);
131} 131}
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index d97eda3e1da0..da88738265ed 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -51,6 +51,7 @@
51 51
52#define NUM_CHUNKS(length, chunk_size) \ 52#define NUM_CHUNKS(length, chunk_size) \
53 (((length) + (chunk_size - 1)) / (chunk_size)) 53 (((length) + (chunk_size - 1)) / (chunk_size))
54
54/* max number of rpages (per hcall register_rpages) */ 55/* max number of rpages (per hcall register_rpages) */
55#define MAX_RPAGES 512 56#define MAX_RPAGES 512
56 57
@@ -64,6 +65,11 @@ enum ehca_mr_pgsize {
64 EHCA_MR_PGSIZE16M = 0x1000000L 65 EHCA_MR_PGSIZE16M = 0x1000000L
65}; 66};
66 67
68#define EHCA_MR_PGSHIFT4K 12
69#define EHCA_MR_PGSHIFT64K 16
70#define EHCA_MR_PGSHIFT1M 20
71#define EHCA_MR_PGSHIFT16M 24
72
67static u32 ehca_encode_hwpage_size(u32 pgsize) 73static u32 ehca_encode_hwpage_size(u32 pgsize)
68{ 74{
69 u32 idx = 0; 75 u32 idx = 0;
@@ -159,7 +165,7 @@ struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
159 165
160get_dma_mr_exit0: 166get_dma_mr_exit0:
161 if (IS_ERR(ib_mr)) 167 if (IS_ERR(ib_mr))
162 ehca_err(&shca->ib_device, "rc=%lx pd=%p mr_access_flags=%x ", 168 ehca_err(&shca->ib_device, "h_ret=%li pd=%p mr_access_flags=%x",
163 PTR_ERR(ib_mr), pd, mr_access_flags); 169 PTR_ERR(ib_mr), pd, mr_access_flags);
164 return ib_mr; 170 return ib_mr;
165} /* end ehca_get_dma_mr() */ 171} /* end ehca_get_dma_mr() */
@@ -271,7 +277,7 @@ reg_phys_mr_exit1:
271 ehca_mr_delete(e_mr); 277 ehca_mr_delete(e_mr);
272reg_phys_mr_exit0: 278reg_phys_mr_exit0:
273 if (IS_ERR(ib_mr)) 279 if (IS_ERR(ib_mr))
274 ehca_err(pd->device, "rc=%lx pd=%p phys_buf_array=%p " 280 ehca_err(pd->device, "h_ret=%li pd=%p phys_buf_array=%p "
275 "num_phys_buf=%x mr_access_flags=%x iova_start=%p", 281 "num_phys_buf=%x mr_access_flags=%x iova_start=%p",
276 PTR_ERR(ib_mr), pd, phys_buf_array, 282 PTR_ERR(ib_mr), pd, phys_buf_array,
277 num_phys_buf, mr_access_flags, iova_start); 283 num_phys_buf, mr_access_flags, iova_start);
@@ -347,17 +353,16 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
347 /* select proper hw_pgsize */ 353 /* select proper hw_pgsize */
348 if (ehca_mr_largepage && 354 if (ehca_mr_largepage &&
349 (shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M)) { 355 (shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M)) {
350 if (length <= EHCA_MR_PGSIZE4K 356 int page_shift = PAGE_SHIFT;
351 && PAGE_SIZE == EHCA_MR_PGSIZE4K) 357 if (e_mr->umem->hugetlb) {
352 hwpage_size = EHCA_MR_PGSIZE4K; 358 /* determine page_shift, clamp between 4K and 16M */
353 else if (length <= EHCA_MR_PGSIZE64K) 359 page_shift = (fls64(length - 1) + 3) & ~3;
354 hwpage_size = EHCA_MR_PGSIZE64K; 360 page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K),
355 else if (length <= EHCA_MR_PGSIZE1M) 361 EHCA_MR_PGSHIFT16M);
356 hwpage_size = EHCA_MR_PGSIZE1M; 362 }
357 else 363 hwpage_size = 1UL << page_shift;
358 hwpage_size = EHCA_MR_PGSIZE16M;
359 } else 364 } else
360 hwpage_size = EHCA_MR_PGSIZE4K; 365 hwpage_size = EHCA_MR_PGSIZE4K; /* ehca1 only supports 4k */
361 ehca_dbg(pd->device, "hwpage_size=%lx", hwpage_size); 366 ehca_dbg(pd->device, "hwpage_size=%lx", hwpage_size);
362 367
363reg_user_mr_fallback: 368reg_user_mr_fallback:
@@ -403,8 +408,7 @@ reg_user_mr_exit1:
403 ehca_mr_delete(e_mr); 408 ehca_mr_delete(e_mr);
404reg_user_mr_exit0: 409reg_user_mr_exit0:
405 if (IS_ERR(ib_mr)) 410 if (IS_ERR(ib_mr))
406 ehca_err(pd->device, "rc=%lx pd=%p mr_access_flags=%x" 411 ehca_err(pd->device, "rc=%li pd=%p mr_access_flags=%x udata=%p",
407 " udata=%p",
408 PTR_ERR(ib_mr), pd, mr_access_flags, udata); 412 PTR_ERR(ib_mr), pd, mr_access_flags, udata);
409 return ib_mr; 413 return ib_mr;
410} /* end ehca_reg_user_mr() */ 414} /* end ehca_reg_user_mr() */
@@ -565,7 +569,7 @@ rereg_phys_mr_exit1:
565 spin_unlock_irqrestore(&e_mr->mrlock, sl_flags); 569 spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
566rereg_phys_mr_exit0: 570rereg_phys_mr_exit0:
567 if (ret) 571 if (ret)
568 ehca_err(mr->device, "ret=%x mr=%p mr_rereg_mask=%x pd=%p " 572 ehca_err(mr->device, "ret=%i mr=%p mr_rereg_mask=%x pd=%p "
569 "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x " 573 "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x "
570 "iova_start=%p", 574 "iova_start=%p",
571 ret, mr, mr_rereg_mask, pd, phys_buf_array, 575 ret, mr, mr_rereg_mask, pd, phys_buf_array,
@@ -607,7 +611,7 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
607 611
608 h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout); 612 h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout);
609 if (h_ret != H_SUCCESS) { 613 if (h_ret != H_SUCCESS) {
610 ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lx mr=%p " 614 ehca_err(mr->device, "hipz_mr_query failed, h_ret=%li mr=%p "
611 "hca_hndl=%lx mr_hndl=%lx lkey=%x", 615 "hca_hndl=%lx mr_hndl=%lx lkey=%x",
612 h_ret, mr, shca->ipz_hca_handle.handle, 616 h_ret, mr, shca->ipz_hca_handle.handle,
613 e_mr->ipz_mr_handle.handle, mr->lkey); 617 e_mr->ipz_mr_handle.handle, mr->lkey);
@@ -625,7 +629,7 @@ query_mr_exit1:
625 spin_unlock_irqrestore(&e_mr->mrlock, sl_flags); 629 spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
626query_mr_exit0: 630query_mr_exit0:
627 if (ret) 631 if (ret)
628 ehca_err(mr->device, "ret=%x mr=%p mr_attr=%p", 632 ehca_err(mr->device, "ret=%i mr=%p mr_attr=%p",
629 ret, mr, mr_attr); 633 ret, mr, mr_attr);
630 return ret; 634 return ret;
631} /* end ehca_query_mr() */ 635} /* end ehca_query_mr() */
@@ -667,7 +671,7 @@ int ehca_dereg_mr(struct ib_mr *mr)
667 /* TODO: BUSY: MR still has bound window(s) */ 671 /* TODO: BUSY: MR still has bound window(s) */
668 h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); 672 h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
669 if (h_ret != H_SUCCESS) { 673 if (h_ret != H_SUCCESS) {
670 ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lx shca=%p " 674 ehca_err(mr->device, "hipz_free_mr failed, h_ret=%li shca=%p "
671 "e_mr=%p hca_hndl=%lx mr_hndl=%lx mr->lkey=%x", 675 "e_mr=%p hca_hndl=%lx mr_hndl=%lx mr->lkey=%x",
672 h_ret, shca, e_mr, shca->ipz_hca_handle.handle, 676 h_ret, shca, e_mr, shca->ipz_hca_handle.handle,
673 e_mr->ipz_mr_handle.handle, mr->lkey); 677 e_mr->ipz_mr_handle.handle, mr->lkey);
@@ -683,7 +687,7 @@ int ehca_dereg_mr(struct ib_mr *mr)
683 687
684dereg_mr_exit0: 688dereg_mr_exit0:
685 if (ret) 689 if (ret)
686 ehca_err(mr->device, "ret=%x mr=%p", ret, mr); 690 ehca_err(mr->device, "ret=%i mr=%p", ret, mr);
687 return ret; 691 return ret;
688} /* end ehca_dereg_mr() */ 692} /* end ehca_dereg_mr() */
689 693
@@ -708,7 +712,7 @@ struct ib_mw *ehca_alloc_mw(struct ib_pd *pd)
708 h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw, 712 h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw,
709 e_pd->fw_pd, &hipzout); 713 e_pd->fw_pd, &hipzout);
710 if (h_ret != H_SUCCESS) { 714 if (h_ret != H_SUCCESS) {
711 ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lx " 715 ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%li "
712 "shca=%p hca_hndl=%lx mw=%p", 716 "shca=%p hca_hndl=%lx mw=%p",
713 h_ret, shca, shca->ipz_hca_handle.handle, e_mw); 717 h_ret, shca, shca->ipz_hca_handle.handle, e_mw);
714 ib_mw = ERR_PTR(ehca2ib_return_code(h_ret)); 718 ib_mw = ERR_PTR(ehca2ib_return_code(h_ret));
@@ -723,7 +727,7 @@ alloc_mw_exit1:
723 ehca_mw_delete(e_mw); 727 ehca_mw_delete(e_mw);
724alloc_mw_exit0: 728alloc_mw_exit0:
725 if (IS_ERR(ib_mw)) 729 if (IS_ERR(ib_mw))
726 ehca_err(pd->device, "rc=%lx pd=%p", PTR_ERR(ib_mw), pd); 730 ehca_err(pd->device, "h_ret=%li pd=%p", PTR_ERR(ib_mw), pd);
727 return ib_mw; 731 return ib_mw;
728} /* end ehca_alloc_mw() */ 732} /* end ehca_alloc_mw() */
729 733
@@ -750,7 +754,7 @@ int ehca_dealloc_mw(struct ib_mw *mw)
750 754
751 h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw); 755 h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw);
752 if (h_ret != H_SUCCESS) { 756 if (h_ret != H_SUCCESS) {
753 ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lx shca=%p " 757 ehca_err(mw->device, "hipz_free_mw failed, h_ret=%li shca=%p "
754 "mw=%p rkey=%x hca_hndl=%lx mw_hndl=%lx", 758 "mw=%p rkey=%x hca_hndl=%lx mw_hndl=%lx",
755 h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle, 759 h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle,
756 e_mw->ipz_mw_handle.handle); 760 e_mw->ipz_mw_handle.handle);
@@ -846,10 +850,6 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
846alloc_fmr_exit1: 850alloc_fmr_exit1:
847 ehca_mr_delete(e_fmr); 851 ehca_mr_delete(e_fmr);
848alloc_fmr_exit0: 852alloc_fmr_exit0:
849 if (IS_ERR(ib_fmr))
850 ehca_err(pd->device, "rc=%lx pd=%p mr_access_flags=%x "
851 "fmr_attr=%p", PTR_ERR(ib_fmr), pd,
852 mr_access_flags, fmr_attr);
853 return ib_fmr; 853 return ib_fmr;
854} /* end ehca_alloc_fmr() */ 854} /* end ehca_alloc_fmr() */
855 855
@@ -916,7 +916,7 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr,
916 916
917map_phys_fmr_exit0: 917map_phys_fmr_exit0:
918 if (ret) 918 if (ret)
919 ehca_err(fmr->device, "ret=%x fmr=%p page_list=%p list_len=%x " 919 ehca_err(fmr->device, "ret=%i fmr=%p page_list=%p list_len=%x "
920 "iova=%lx", ret, fmr, page_list, list_len, iova); 920 "iova=%lx", ret, fmr, page_list, list_len, iova);
921 return ret; 921 return ret;
922} /* end ehca_map_phys_fmr() */ 922} /* end ehca_map_phys_fmr() */
@@ -979,7 +979,7 @@ int ehca_unmap_fmr(struct list_head *fmr_list)
979 979
980unmap_fmr_exit0: 980unmap_fmr_exit0:
981 if (ret) 981 if (ret)
982 ehca_gen_err("ret=%x fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x", 982 ehca_gen_err("ret=%i fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x",
983 ret, fmr_list, num_fmr, unmap_fmr_cnt); 983 ret, fmr_list, num_fmr, unmap_fmr_cnt);
984 return ret; 984 return ret;
985} /* end ehca_unmap_fmr() */ 985} /* end ehca_unmap_fmr() */
@@ -1003,7 +1003,7 @@ int ehca_dealloc_fmr(struct ib_fmr *fmr)
1003 1003
1004 h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); 1004 h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
1005 if (h_ret != H_SUCCESS) { 1005 if (h_ret != H_SUCCESS) {
1006 ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lx e_fmr=%p " 1006 ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%li e_fmr=%p "
1007 "hca_hndl=%lx fmr_hndl=%lx fmr->lkey=%x", 1007 "hca_hndl=%lx fmr_hndl=%lx fmr->lkey=%x",
1008 h_ret, e_fmr, shca->ipz_hca_handle.handle, 1008 h_ret, e_fmr, shca->ipz_hca_handle.handle,
1009 e_fmr->ipz_mr_handle.handle, fmr->lkey); 1009 e_fmr->ipz_mr_handle.handle, fmr->lkey);
@@ -1016,7 +1016,7 @@ int ehca_dealloc_fmr(struct ib_fmr *fmr)
1016 1016
1017free_fmr_exit0: 1017free_fmr_exit0:
1018 if (ret) 1018 if (ret)
1019 ehca_err(&shca->ib_device, "ret=%x fmr=%p", ret, fmr); 1019 ehca_err(&shca->ib_device, "ret=%i fmr=%p", ret, fmr);
1020 return ret; 1020 return ret;
1021} /* end ehca_dealloc_fmr() */ 1021} /* end ehca_dealloc_fmr() */
1022 1022
@@ -1046,7 +1046,7 @@ int ehca_reg_mr(struct ehca_shca *shca,
1046 (u64)iova_start, size, hipz_acl, 1046 (u64)iova_start, size, hipz_acl,
1047 e_pd->fw_pd, &hipzout); 1047 e_pd->fw_pd, &hipzout);
1048 if (h_ret != H_SUCCESS) { 1048 if (h_ret != H_SUCCESS) {
1049 ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lx " 1049 ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%li "
1050 "hca_hndl=%lx", h_ret, shca->ipz_hca_handle.handle); 1050 "hca_hndl=%lx", h_ret, shca->ipz_hca_handle.handle);
1051 ret = ehca2ib_return_code(h_ret); 1051 ret = ehca2ib_return_code(h_ret);
1052 goto ehca_reg_mr_exit0; 1052 goto ehca_reg_mr_exit0;
@@ -1072,9 +1072,9 @@ int ehca_reg_mr(struct ehca_shca *shca,
1072ehca_reg_mr_exit1: 1072ehca_reg_mr_exit1:
1073 h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); 1073 h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
1074 if (h_ret != H_SUCCESS) { 1074 if (h_ret != H_SUCCESS) {
1075 ehca_err(&shca->ib_device, "h_ret=%lx shca=%p e_mr=%p " 1075 ehca_err(&shca->ib_device, "h_ret=%li shca=%p e_mr=%p "
1076 "iova_start=%p size=%lx acl=%x e_pd=%p lkey=%x " 1076 "iova_start=%p size=%lx acl=%x e_pd=%p lkey=%x "
1077 "pginfo=%p num_kpages=%lx num_hwpages=%lx ret=%x", 1077 "pginfo=%p num_kpages=%lx num_hwpages=%lx ret=%i",
1078 h_ret, shca, e_mr, iova_start, size, acl, e_pd, 1078 h_ret, shca, e_mr, iova_start, size, acl, e_pd,
1079 hipzout.lkey, pginfo, pginfo->num_kpages, 1079 hipzout.lkey, pginfo, pginfo->num_kpages,
1080 pginfo->num_hwpages, ret); 1080 pginfo->num_hwpages, ret);
@@ -1083,7 +1083,7 @@ ehca_reg_mr_exit1:
1083 } 1083 }
1084ehca_reg_mr_exit0: 1084ehca_reg_mr_exit0:
1085 if (ret) 1085 if (ret)
1086 ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p " 1086 ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
1087 "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p " 1087 "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p "
1088 "num_kpages=%lx num_hwpages=%lx", 1088 "num_kpages=%lx num_hwpages=%lx",
1089 ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo, 1089 ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo,
@@ -1127,7 +1127,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca,
1127 ret = ehca_set_pagebuf(pginfo, rnum, kpage); 1127 ret = ehca_set_pagebuf(pginfo, rnum, kpage);
1128 if (ret) { 1128 if (ret) {
1129 ehca_err(&shca->ib_device, "ehca_set_pagebuf " 1129 ehca_err(&shca->ib_device, "ehca_set_pagebuf "
1130 "bad rc, ret=%x rnum=%x kpage=%p", 1130 "bad rc, ret=%i rnum=%x kpage=%p",
1131 ret, rnum, kpage); 1131 ret, rnum, kpage);
1132 goto ehca_reg_mr_rpages_exit1; 1132 goto ehca_reg_mr_rpages_exit1;
1133 } 1133 }
@@ -1155,7 +1155,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca,
1155 */ 1155 */
1156 if (h_ret != H_SUCCESS) { 1156 if (h_ret != H_SUCCESS) {
1157 ehca_err(&shca->ib_device, "last " 1157 ehca_err(&shca->ib_device, "last "
1158 "hipz_reg_rpage_mr failed, h_ret=%lx " 1158 "hipz_reg_rpage_mr failed, h_ret=%li "
1159 "e_mr=%p i=%x hca_hndl=%lx mr_hndl=%lx" 1159 "e_mr=%p i=%x hca_hndl=%lx mr_hndl=%lx"
1160 " lkey=%x", h_ret, e_mr, i, 1160 " lkey=%x", h_ret, e_mr, i,
1161 shca->ipz_hca_handle.handle, 1161 shca->ipz_hca_handle.handle,
@@ -1167,7 +1167,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca,
1167 ret = 0; 1167 ret = 0;
1168 } else if (h_ret != H_PAGE_REGISTERED) { 1168 } else if (h_ret != H_PAGE_REGISTERED) {
1169 ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, " 1169 ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, "
1170 "h_ret=%lx e_mr=%p i=%x lkey=%x hca_hndl=%lx " 1170 "h_ret=%li e_mr=%p i=%x lkey=%x hca_hndl=%lx "
1171 "mr_hndl=%lx", h_ret, e_mr, i, 1171 "mr_hndl=%lx", h_ret, e_mr, i,
1172 e_mr->ib.ib_mr.lkey, 1172 e_mr->ib.ib_mr.lkey,
1173 shca->ipz_hca_handle.handle, 1173 shca->ipz_hca_handle.handle,
@@ -1183,7 +1183,7 @@ ehca_reg_mr_rpages_exit1:
1183 ehca_free_fw_ctrlblock(kpage); 1183 ehca_free_fw_ctrlblock(kpage);
1184ehca_reg_mr_rpages_exit0: 1184ehca_reg_mr_rpages_exit0:
1185 if (ret) 1185 if (ret)
1186 ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p pginfo=%p " 1186 ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p pginfo=%p "
1187 "num_kpages=%lx num_hwpages=%lx", ret, shca, e_mr, 1187 "num_kpages=%lx num_hwpages=%lx", ret, shca, e_mr,
1188 pginfo, pginfo->num_kpages, pginfo->num_hwpages); 1188 pginfo, pginfo->num_kpages, pginfo->num_hwpages);
1189 return ret; 1189 return ret;
@@ -1244,7 +1244,7 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
1244 * (MW bound or MR is shared) 1244 * (MW bound or MR is shared)
1245 */ 1245 */
1246 ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed " 1246 ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed "
1247 "(Rereg1), h_ret=%lx e_mr=%p", h_ret, e_mr); 1247 "(Rereg1), h_ret=%li e_mr=%p", h_ret, e_mr);
1248 *pginfo = pginfo_save; 1248 *pginfo = pginfo_save;
1249 ret = -EAGAIN; 1249 ret = -EAGAIN;
1250 } else if ((u64 *)hipzout.vaddr != iova_start) { 1250 } else if ((u64 *)hipzout.vaddr != iova_start) {
@@ -1273,7 +1273,7 @@ ehca_rereg_mr_rereg1_exit1:
1273 ehca_free_fw_ctrlblock(kpage); 1273 ehca_free_fw_ctrlblock(kpage);
1274ehca_rereg_mr_rereg1_exit0: 1274ehca_rereg_mr_rereg1_exit0:
1275 if ( ret && (ret != -EAGAIN) ) 1275 if ( ret && (ret != -EAGAIN) )
1276 ehca_err(&shca->ib_device, "ret=%x lkey=%x rkey=%x " 1276 ehca_err(&shca->ib_device, "ret=%i lkey=%x rkey=%x "
1277 "pginfo=%p num_kpages=%lx num_hwpages=%lx", 1277 "pginfo=%p num_kpages=%lx num_hwpages=%lx",
1278 ret, *lkey, *rkey, pginfo, pginfo->num_kpages, 1278 ret, *lkey, *rkey, pginfo, pginfo->num_kpages,
1279 pginfo->num_hwpages); 1279 pginfo->num_hwpages);
@@ -1334,7 +1334,7 @@ int ehca_rereg_mr(struct ehca_shca *shca,
1334 h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); 1334 h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
1335 if (h_ret != H_SUCCESS) { 1335 if (h_ret != H_SUCCESS) {
1336 ehca_err(&shca->ib_device, "hipz_free_mr failed, " 1336 ehca_err(&shca->ib_device, "hipz_free_mr failed, "
1337 "h_ret=%lx e_mr=%p hca_hndl=%lx mr_hndl=%lx " 1337 "h_ret=%li e_mr=%p hca_hndl=%lx mr_hndl=%lx "
1338 "mr->lkey=%x", 1338 "mr->lkey=%x",
1339 h_ret, e_mr, shca->ipz_hca_handle.handle, 1339 h_ret, e_mr, shca->ipz_hca_handle.handle,
1340 e_mr->ipz_mr_handle.handle, 1340 e_mr->ipz_mr_handle.handle,
@@ -1366,7 +1366,7 @@ int ehca_rereg_mr(struct ehca_shca *shca,
1366 1366
1367ehca_rereg_mr_exit0: 1367ehca_rereg_mr_exit0:
1368 if (ret) 1368 if (ret)
1369 ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p " 1369 ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
1370 "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p " 1370 "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p "
1371 "num_kpages=%lx lkey=%x rkey=%x rereg_1_hcall=%x " 1371 "num_kpages=%lx lkey=%x rkey=%x rereg_1_hcall=%x "
1372 "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size, 1372 "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size,
@@ -1410,7 +1410,7 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca,
1410 * FMRs are not shared and no MW bound to FMRs 1410 * FMRs are not shared and no MW bound to FMRs
1411 */ 1411 */
1412 ehca_err(&shca->ib_device, "hipz_reregister_pmr failed " 1412 ehca_err(&shca->ib_device, "hipz_reregister_pmr failed "
1413 "(Rereg1), h_ret=%lx e_fmr=%p hca_hndl=%lx " 1413 "(Rereg1), h_ret=%li e_fmr=%p hca_hndl=%lx "
1414 "mr_hndl=%lx lkey=%x lkey_out=%x", 1414 "mr_hndl=%lx lkey=%x lkey_out=%x",
1415 h_ret, e_fmr, shca->ipz_hca_handle.handle, 1415 h_ret, e_fmr, shca->ipz_hca_handle.handle,
1416 e_fmr->ipz_mr_handle.handle, 1416 e_fmr->ipz_mr_handle.handle,
@@ -1422,7 +1422,7 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca,
1422 h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); 1422 h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
1423 if (h_ret != H_SUCCESS) { 1423 if (h_ret != H_SUCCESS) {
1424 ehca_err(&shca->ib_device, "hipz_free_mr failed, " 1424 ehca_err(&shca->ib_device, "hipz_free_mr failed, "
1425 "h_ret=%lx e_fmr=%p hca_hndl=%lx mr_hndl=%lx " 1425 "h_ret=%li e_fmr=%p hca_hndl=%lx mr_hndl=%lx "
1426 "lkey=%x", 1426 "lkey=%x",
1427 h_ret, e_fmr, shca->ipz_hca_handle.handle, 1427 h_ret, e_fmr, shca->ipz_hca_handle.handle,
1428 e_fmr->ipz_mr_handle.handle, 1428 e_fmr->ipz_mr_handle.handle,
@@ -1457,7 +1457,7 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca,
1457 1457
1458ehca_unmap_one_fmr_exit0: 1458ehca_unmap_one_fmr_exit0:
1459 if (ret) 1459 if (ret)
1460 ehca_err(&shca->ib_device, "ret=%x tmp_lkey=%x tmp_rkey=%x " 1460 ehca_err(&shca->ib_device, "ret=%i tmp_lkey=%x tmp_rkey=%x "
1461 "fmr_max_pages=%x", 1461 "fmr_max_pages=%x",
1462 ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages); 1462 ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages);
1463 return ret; 1463 return ret;
@@ -1486,7 +1486,7 @@ int ehca_reg_smr(struct ehca_shca *shca,
1486 (u64)iova_start, hipz_acl, e_pd->fw_pd, 1486 (u64)iova_start, hipz_acl, e_pd->fw_pd,
1487 &hipzout); 1487 &hipzout);
1488 if (h_ret != H_SUCCESS) { 1488 if (h_ret != H_SUCCESS) {
1489 ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lx " 1489 ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%li "
1490 "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x " 1490 "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x "
1491 "e_pd=%p hca_hndl=%lx mr_hndl=%lx lkey=%x", 1491 "e_pd=%p hca_hndl=%lx mr_hndl=%lx lkey=%x",
1492 h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd, 1492 h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd,
@@ -1510,7 +1510,7 @@ int ehca_reg_smr(struct ehca_shca *shca,
1510 1510
1511ehca_reg_smr_exit0: 1511ehca_reg_smr_exit0:
1512 if (ret) 1512 if (ret)
1513 ehca_err(&shca->ib_device, "ret=%x shca=%p e_origmr=%p " 1513 ehca_err(&shca->ib_device, "ret=%i shca=%p e_origmr=%p "
1514 "e_newmr=%p iova_start=%p acl=%x e_pd=%p", 1514 "e_newmr=%p iova_start=%p acl=%x e_pd=%p",
1515 ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd); 1515 ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd);
1516 return ret; 1516 return ret;
@@ -1585,7 +1585,7 @@ ehca_reg_internal_maxmr_exit1:
1585 ehca_mr_delete(e_mr); 1585 ehca_mr_delete(e_mr);
1586ehca_reg_internal_maxmr_exit0: 1586ehca_reg_internal_maxmr_exit0:
1587 if (ret) 1587 if (ret)
1588 ehca_err(&shca->ib_device, "ret=%x shca=%p e_pd=%p e_maxmr=%p", 1588 ehca_err(&shca->ib_device, "ret=%i shca=%p e_pd=%p e_maxmr=%p",
1589 ret, shca, e_pd, e_maxmr); 1589 ret, shca, e_pd, e_maxmr);
1590 return ret; 1590 return ret;
1591} /* end ehca_reg_internal_maxmr() */ 1591} /* end ehca_reg_internal_maxmr() */
@@ -1612,7 +1612,7 @@ int ehca_reg_maxmr(struct ehca_shca *shca,
1612 (u64)iova_start, hipz_acl, e_pd->fw_pd, 1612 (u64)iova_start, hipz_acl, e_pd->fw_pd,
1613 &hipzout); 1613 &hipzout);
1614 if (h_ret != H_SUCCESS) { 1614 if (h_ret != H_SUCCESS) {
1615 ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lx " 1615 ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%li "
1616 "e_origmr=%p hca_hndl=%lx mr_hndl=%lx lkey=%x", 1616 "e_origmr=%p hca_hndl=%lx mr_hndl=%lx lkey=%x",
1617 h_ret, e_origmr, shca->ipz_hca_handle.handle, 1617 h_ret, e_origmr, shca->ipz_hca_handle.handle,
1618 e_origmr->ipz_mr_handle.handle, 1618 e_origmr->ipz_mr_handle.handle,
@@ -1653,7 +1653,7 @@ int ehca_dereg_internal_maxmr(struct ehca_shca *shca)
1653 ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr); 1653 ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr);
1654 if (ret) { 1654 if (ret) {
1655 ehca_err(&shca->ib_device, "dereg internal max-MR failed, " 1655 ehca_err(&shca->ib_device, "dereg internal max-MR failed, "
1656 "ret=%x e_maxmr=%p shca=%p lkey=%x", 1656 "ret=%i e_maxmr=%p shca=%p lkey=%x",
1657 ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey); 1657 ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey);
1658 shca->maxmr = e_maxmr; 1658 shca->maxmr = e_maxmr;
1659 goto ehca_dereg_internal_maxmr_exit0; 1659 goto ehca_dereg_internal_maxmr_exit0;
@@ -1663,7 +1663,7 @@ int ehca_dereg_internal_maxmr(struct ehca_shca *shca)
1663 1663
1664ehca_dereg_internal_maxmr_exit0: 1664ehca_dereg_internal_maxmr_exit0:
1665 if (ret) 1665 if (ret)
1666 ehca_err(&shca->ib_device, "ret=%x shca=%p shca->maxmr=%p", 1666 ehca_err(&shca->ib_device, "ret=%i shca=%p shca->maxmr=%p",
1667 ret, shca, shca->maxmr); 1667 ret, shca, shca->maxmr);
1668 return ret; 1668 return ret;
1669} /* end ehca_dereg_internal_maxmr() */ 1669} /* end ehca_dereg_internal_maxmr() */
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index 84d435a5ee11..e2bd62be11e7 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -273,6 +273,7 @@ static inline void queue2resp(struct ipzu_queue_resp *resp,
273 resp->queue_length = queue->queue_length; 273 resp->queue_length = queue->queue_length;
274 resp->pagesize = queue->pagesize; 274 resp->pagesize = queue->pagesize;
275 resp->toggle_state = queue->toggle_state; 275 resp->toggle_state = queue->toggle_state;
276 resp->offset = queue->offset;
276} 277}
277 278
278/* 279/*
@@ -309,7 +310,7 @@ static inline int init_qp_queue(struct ehca_shca *shca,
309 } 310 }
310 311
311 if (!ipz_rc) { 312 if (!ipz_rc) {
312 ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%x", 313 ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%i",
313 ipz_rc); 314 ipz_rc);
314 return -EBUSY; 315 return -EBUSY;
315 } 316 }
@@ -333,7 +334,7 @@ static inline int init_qp_queue(struct ehca_shca *shca,
333 if (cnt == (nr_q_pages - 1)) { /* last page! */ 334 if (cnt == (nr_q_pages - 1)) { /* last page! */
334 if (h_ret != expected_hret) { 335 if (h_ret != expected_hret) {
335 ehca_err(ib_dev, "hipz_qp_register_rpage() " 336 ehca_err(ib_dev, "hipz_qp_register_rpage() "
336 "h_ret= %lx ", h_ret); 337 "h_ret=%li", h_ret);
337 ret = ehca2ib_return_code(h_ret); 338 ret = ehca2ib_return_code(h_ret);
338 goto init_qp_queue1; 339 goto init_qp_queue1;
339 } 340 }
@@ -347,7 +348,7 @@ static inline int init_qp_queue(struct ehca_shca *shca,
347 } else { 348 } else {
348 if (h_ret != H_PAGE_REGISTERED) { 349 if (h_ret != H_PAGE_REGISTERED) {
349 ehca_err(ib_dev, "hipz_qp_register_rpage() " 350 ehca_err(ib_dev, "hipz_qp_register_rpage() "
350 "h_ret= %lx ", h_ret); 351 "h_ret=%li", h_ret);
351 ret = ehca2ib_return_code(h_ret); 352 ret = ehca2ib_return_code(h_ret);
352 goto init_qp_queue1; 353 goto init_qp_queue1;
353 } 354 }
@@ -512,7 +513,7 @@ static struct ehca_qp *internal_create_qp(
512 } else if (init_attr->cap.max_send_wr > 255) { 513 } else if (init_attr->cap.max_send_wr > 255) {
513 ehca_err(pd->device, 514 ehca_err(pd->device,
514 "Invalid Number of " 515 "Invalid Number of "
515 "ax_send_wr=%x for UD QP_TYPE=%x", 516 "max_send_wr=%x for UD QP_TYPE=%x",
516 init_attr->cap.max_send_wr, qp_type); 517 init_attr->cap.max_send_wr, qp_type);
517 return ERR_PTR(-EINVAL); 518 return ERR_PTR(-EINVAL);
518 } 519 }
@@ -523,6 +524,18 @@ static struct ehca_qp *internal_create_qp(
523 return ERR_PTR(-EINVAL); 524 return ERR_PTR(-EINVAL);
524 break; 525 break;
525 } 526 }
527 } else {
528 int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI
529 || qp_type == IB_QPT_GSI) ? 250 : 252;
530
531 if (init_attr->cap.max_send_sge > max_sge
532 || init_attr->cap.max_recv_sge > max_sge) {
533 ehca_err(pd->device, "Invalid number of SGEs requested "
534 "send_sge=%x recv_sge=%x max_sge=%x",
535 init_attr->cap.max_send_sge,
536 init_attr->cap.max_recv_sge, max_sge);
537 return ERR_PTR(-EINVAL);
538 }
526 } 539 }
527 540
528 if (pd->uobject && udata) 541 if (pd->uobject && udata)
@@ -556,7 +569,6 @@ static struct ehca_qp *internal_create_qp(
556 write_lock_irqsave(&ehca_qp_idr_lock, flags); 569 write_lock_irqsave(&ehca_qp_idr_lock, flags);
557 ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token); 570 ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token);
558 write_unlock_irqrestore(&ehca_qp_idr_lock, flags); 571 write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
559
560 } while (ret == -EAGAIN); 572 } while (ret == -EAGAIN);
561 573
562 if (ret) { 574 if (ret) {
@@ -565,11 +577,17 @@ static struct ehca_qp *internal_create_qp(
565 goto create_qp_exit0; 577 goto create_qp_exit0;
566 } 578 }
567 579
580 if (my_qp->token > 0x1FFFFFF) {
581 ret = -EINVAL;
582 ehca_err(pd->device, "Invalid number of qp");
583 goto create_qp_exit1;
584 }
585
568 parms.servicetype = ibqptype2servicetype(qp_type); 586 parms.servicetype = ibqptype2servicetype(qp_type);
569 if (parms.servicetype < 0) { 587 if (parms.servicetype < 0) {
570 ret = -EINVAL; 588 ret = -EINVAL;
571 ehca_err(pd->device, "Invalid qp_type=%x", qp_type); 589 ehca_err(pd->device, "Invalid qp_type=%x", qp_type);
572 goto create_qp_exit0; 590 goto create_qp_exit1;
573 } 591 }
574 592
575 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) 593 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
@@ -598,8 +616,7 @@ static struct ehca_qp *internal_create_qp(
598 parms.squeue.max_sge = max_send_sge; 616 parms.squeue.max_sge = max_send_sge;
599 parms.rqueue.max_sge = max_recv_sge; 617 parms.rqueue.max_sge = max_recv_sge;
600 618
601 if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap) 619 if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)) {
602 && !(context && udata)) { /* no small QP support in userspace ATM */
603 if (HAS_SQ(my_qp)) 620 if (HAS_SQ(my_qp))
604 ehca_determine_small_queue( 621 ehca_determine_small_queue(
605 &parms.squeue, max_send_sge, is_llqp); 622 &parms.squeue, max_send_sge, is_llqp);
@@ -612,7 +629,7 @@ static struct ehca_qp *internal_create_qp(
612 629
613 h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms); 630 h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms);
614 if (h_ret != H_SUCCESS) { 631 if (h_ret != H_SUCCESS) {
615 ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lx", 632 ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%li",
616 h_ret); 633 h_ret);
617 ret = ehca2ib_return_code(h_ret); 634 ret = ehca2ib_return_code(h_ret);
618 goto create_qp_exit1; 635 goto create_qp_exit1;
@@ -666,7 +683,7 @@ static struct ehca_qp *internal_create_qp(
666 &parms.squeue, swqe_size); 683 &parms.squeue, swqe_size);
667 if (ret) { 684 if (ret) {
668 ehca_err(pd->device, "Couldn't initialize squeue " 685 ehca_err(pd->device, "Couldn't initialize squeue "
669 "and pages ret=%x", ret); 686 "and pages ret=%i", ret);
670 goto create_qp_exit2; 687 goto create_qp_exit2;
671 } 688 }
672 } 689 }
@@ -677,7 +694,7 @@ static struct ehca_qp *internal_create_qp(
677 H_SUCCESS, &parms.rqueue, rwqe_size); 694 H_SUCCESS, &parms.rqueue, rwqe_size);
678 if (ret) { 695 if (ret) {
679 ehca_err(pd->device, "Couldn't initialize rqueue " 696 ehca_err(pd->device, "Couldn't initialize rqueue "
680 "and pages ret=%x", ret); 697 "and pages ret=%i", ret);
681 goto create_qp_exit3; 698 goto create_qp_exit3;
682 } 699 }
683 } 700 }
@@ -714,8 +731,6 @@ static struct ehca_qp *internal_create_qp(
714 if (qp_type == IB_QPT_GSI) { 731 if (qp_type == IB_QPT_GSI) {
715 h_ret = ehca_define_sqp(shca, my_qp, init_attr); 732 h_ret = ehca_define_sqp(shca, my_qp, init_attr);
716 if (h_ret != H_SUCCESS) { 733 if (h_ret != H_SUCCESS) {
717 ehca_err(pd->device, "ehca_define_sqp() failed rc=%lx",
718 h_ret);
719 ret = ehca2ib_return_code(h_ret); 734 ret = ehca2ib_return_code(h_ret);
720 goto create_qp_exit4; 735 goto create_qp_exit4;
721 } 736 }
@@ -725,7 +740,7 @@ static struct ehca_qp *internal_create_qp(
725 ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp); 740 ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp);
726 if (ret) { 741 if (ret) {
727 ehca_err(pd->device, 742 ehca_err(pd->device,
728 "Couldn't assign qp to send_cq ret=%x", ret); 743 "Couldn't assign qp to send_cq ret=%i", ret);
729 goto create_qp_exit4; 744 goto create_qp_exit4;
730 } 745 }
731 } 746 }
@@ -741,12 +756,13 @@ static struct ehca_qp *internal_create_qp(
741 resp.ext_type = my_qp->ext_type; 756 resp.ext_type = my_qp->ext_type;
742 resp.qkey = my_qp->qkey; 757 resp.qkey = my_qp->qkey;
743 resp.real_qp_num = my_qp->real_qp_num; 758 resp.real_qp_num = my_qp->real_qp_num;
744 resp.ipz_rqueue.offset = my_qp->ipz_rqueue.offset; 759
745 resp.ipz_squeue.offset = my_qp->ipz_squeue.offset;
746 if (HAS_SQ(my_qp)) 760 if (HAS_SQ(my_qp))
747 queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue); 761 queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue);
748 if (HAS_RQ(my_qp)) 762 if (HAS_RQ(my_qp))
749 queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue); 763 queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue);
764 resp.fw_handle_ofs = (u32)
765 (my_qp->galpas.user.fw_handle & (PAGE_SIZE - 1));
750 766
751 if (ib_copy_to_udata(udata, &resp, sizeof resp)) { 767 if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
752 ehca_err(pd->device, "Copy to udata failed"); 768 ehca_err(pd->device, "Copy to udata failed");
@@ -841,7 +857,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd,
841 mqpcb, my_qp->galpas.kernel); 857 mqpcb, my_qp->galpas.kernel);
842 if (hret != H_SUCCESS) { 858 if (hret != H_SUCCESS) {
843 ehca_err(pd->device, "Could not modify SRQ to INIT" 859 ehca_err(pd->device, "Could not modify SRQ to INIT"
844 "ehca_qp=%p qp_num=%x hret=%lx", 860 "ehca_qp=%p qp_num=%x h_ret=%li",
845 my_qp, my_qp->real_qp_num, hret); 861 my_qp, my_qp->real_qp_num, hret);
846 goto create_srq2; 862 goto create_srq2;
847 } 863 }
@@ -855,7 +871,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd,
855 mqpcb, my_qp->galpas.kernel); 871 mqpcb, my_qp->galpas.kernel);
856 if (hret != H_SUCCESS) { 872 if (hret != H_SUCCESS) {
857 ehca_err(pd->device, "Could not enable SRQ" 873 ehca_err(pd->device, "Could not enable SRQ"
858 "ehca_qp=%p qp_num=%x hret=%lx", 874 "ehca_qp=%p qp_num=%x h_ret=%li",
859 my_qp, my_qp->real_qp_num, hret); 875 my_qp, my_qp->real_qp_num, hret);
860 goto create_srq2; 876 goto create_srq2;
861 } 877 }
@@ -869,11 +885,13 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd,
869 mqpcb, my_qp->galpas.kernel); 885 mqpcb, my_qp->galpas.kernel);
870 if (hret != H_SUCCESS) { 886 if (hret != H_SUCCESS) {
871 ehca_err(pd->device, "Could not modify SRQ to RTR" 887 ehca_err(pd->device, "Could not modify SRQ to RTR"
872 "ehca_qp=%p qp_num=%x hret=%lx", 888 "ehca_qp=%p qp_num=%x h_ret=%li",
873 my_qp, my_qp->real_qp_num, hret); 889 my_qp, my_qp->real_qp_num, hret);
874 goto create_srq2; 890 goto create_srq2;
875 } 891 }
876 892
893 ehca_free_fw_ctrlblock(mqpcb);
894
877 return &my_qp->ib_srq; 895 return &my_qp->ib_srq;
878 896
879create_srq2: 897create_srq2:
@@ -907,7 +925,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
907 &bad_send_wqe_p, NULL, 2); 925 &bad_send_wqe_p, NULL, 2);
908 if (h_ret != H_SUCCESS) { 926 if (h_ret != H_SUCCESS) {
909 ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed" 927 ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed"
910 " ehca_qp=%p qp_num=%x h_ret=%lx", 928 " ehca_qp=%p qp_num=%x h_ret=%li",
911 my_qp, qp_num, h_ret); 929 my_qp, qp_num, h_ret);
912 return ehca2ib_return_code(h_ret); 930 return ehca2ib_return_code(h_ret);
913 } 931 }
@@ -985,7 +1003,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
985 mqpcb, my_qp->galpas.kernel); 1003 mqpcb, my_qp->galpas.kernel);
986 if (h_ret != H_SUCCESS) { 1004 if (h_ret != H_SUCCESS) {
987 ehca_err(ibqp->device, "hipz_h_query_qp() failed " 1005 ehca_err(ibqp->device, "hipz_h_query_qp() failed "
988 "ehca_qp=%p qp_num=%x h_ret=%lx", 1006 "ehca_qp=%p qp_num=%x h_ret=%li",
989 my_qp, ibqp->qp_num, h_ret); 1007 my_qp, ibqp->qp_num, h_ret);
990 ret = ehca2ib_return_code(h_ret); 1008 ret = ehca2ib_return_code(h_ret);
991 goto modify_qp_exit1; 1009 goto modify_qp_exit1;
@@ -1021,7 +1039,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
1021 ibqp, &smiqp_attr, smiqp_attr_mask, 1); 1039 ibqp, &smiqp_attr, smiqp_attr_mask, 1);
1022 if (smirc) { 1040 if (smirc) {
1023 ehca_err(ibqp->device, "SMI RESET -> INIT failed. " 1041 ehca_err(ibqp->device, "SMI RESET -> INIT failed. "
1024 "ehca_modify_qp() rc=%x", smirc); 1042 "ehca_modify_qp() rc=%i", smirc);
1025 ret = H_PARAMETER; 1043 ret = H_PARAMETER;
1026 goto modify_qp_exit1; 1044 goto modify_qp_exit1;
1027 } 1045 }
@@ -1123,7 +1141,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
1123 ret = prepare_sqe_rts(my_qp, shca, &bad_wqe_cnt); 1141 ret = prepare_sqe_rts(my_qp, shca, &bad_wqe_cnt);
1124 if (ret) { 1142 if (ret) {
1125 ehca_err(ibqp->device, "prepare_sqe_rts() failed " 1143 ehca_err(ibqp->device, "prepare_sqe_rts() failed "
1126 "ehca_qp=%p qp_num=%x ret=%x", 1144 "ehca_qp=%p qp_num=%x ret=%i",
1127 my_qp, ibqp->qp_num, ret); 1145 my_qp, ibqp->qp_num, ret);
1128 goto modify_qp_exit2; 1146 goto modify_qp_exit2;
1129 } 1147 }
@@ -1149,6 +1167,13 @@ static int internal_modify_qp(struct ib_qp *ibqp,
1149 } 1167 }
1150 1168
1151 if (attr_mask & IB_QP_PKEY_INDEX) { 1169 if (attr_mask & IB_QP_PKEY_INDEX) {
1170 if (attr->pkey_index >= 16) {
1171 ret = -EINVAL;
1172 ehca_err(ibqp->device, "Invalid pkey_index=%x. "
1173 "ehca_qp=%p qp_num=%x max_pkey_index=f",
1174 attr->pkey_index, my_qp, ibqp->qp_num);
1175 goto modify_qp_exit2;
1176 }
1152 mqpcb->prim_p_key_idx = attr->pkey_index; 1177 mqpcb->prim_p_key_idx = attr->pkey_index;
1153 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1); 1178 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1);
1154 } 1179 }
@@ -1257,50 +1282,78 @@ static int internal_modify_qp(struct ib_qp *ibqp,
1257 int ehca_mult = ib_rate_to_mult( 1282 int ehca_mult = ib_rate_to_mult(
1258 shca->sport[my_qp->init_attr.port_num].rate); 1283 shca->sport[my_qp->init_attr.port_num].rate);
1259 1284
1285 if (attr->alt_port_num < 1
1286 || attr->alt_port_num > shca->num_ports) {
1287 ret = -EINVAL;
1288 ehca_err(ibqp->device, "Invalid alt_port=%x. "
1289 "ehca_qp=%p qp_num=%x num_ports=%x",
1290 attr->alt_port_num, my_qp, ibqp->qp_num,
1291 shca->num_ports);
1292 goto modify_qp_exit2;
1293 }
1294 mqpcb->alt_phys_port = attr->alt_port_num;
1295
1296 if (attr->alt_pkey_index >= 16) {
1297 ret = -EINVAL;
1298 ehca_err(ibqp->device, "Invalid alt_pkey_index=%x. "
1299 "ehca_qp=%p qp_num=%x max_pkey_index=f",
1300 attr->pkey_index, my_qp, ibqp->qp_num);
1301 goto modify_qp_exit2;
1302 }
1303 mqpcb->alt_p_key_idx = attr->alt_pkey_index;
1304
1305 mqpcb->timeout_al = attr->alt_timeout;
1260 mqpcb->dlid_al = attr->alt_ah_attr.dlid; 1306 mqpcb->dlid_al = attr->alt_ah_attr.dlid;
1261 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1);
1262 mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits; 1307 mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits;
1263 update_mask |=
1264 EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1);
1265 mqpcb->service_level_al = attr->alt_ah_attr.sl; 1308 mqpcb->service_level_al = attr->alt_ah_attr.sl;
1266 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1);
1267 1309
1268 if (ah_mult < ehca_mult) 1310 if (ah_mult > 0 && ah_mult < ehca_mult)
1269 mqpcb->max_static_rate = (ah_mult > 0) ? 1311 mqpcb->max_static_rate_al = (ehca_mult - 1) / ah_mult;
1270 ((ehca_mult - 1) / ah_mult) : 0;
1271 else 1312 else
1272 mqpcb->max_static_rate_al = 0; 1313 mqpcb->max_static_rate_al = 0;
1273 1314
1274 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1); 1315 /* OpenIB doesn't support alternate retry counts - copy them */
1316 mqpcb->retry_count_al = mqpcb->retry_count;
1317 mqpcb->rnr_retry_count_al = mqpcb->rnr_retry_count;
1318
1319 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_ALT_PHYS_PORT, 1)
1320 | EHCA_BMASK_SET(MQPCB_MASK_ALT_P_KEY_IDX, 1)
1321 | EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT_AL, 1)
1322 | EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1)
1323 | EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1)
1324 | EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1)
1325 | EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1)
1326 | EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT_AL, 1)
1327 | EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT_AL, 1);
1328
1329 /*
1330 * Always supply the GRH flag, even if it's zero, to give the
1331 * hypervisor a clear "yes" or "no" instead of a "perhaps"
1332 */
1333 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1);
1275 1334
1276 /* 1335 /*
1277 * only if GRH is TRUE we might consider SOURCE_GID_IDX 1336 * only if GRH is TRUE we might consider SOURCE_GID_IDX
1278 * and DEST_GID otherwise phype will return H_ATTR_PARM!!! 1337 * and DEST_GID otherwise phype will return H_ATTR_PARM!!!
1279 */ 1338 */
1280 if (attr->alt_ah_attr.ah_flags == IB_AH_GRH) { 1339 if (attr->alt_ah_attr.ah_flags == IB_AH_GRH) {
1281 mqpcb->send_grh_flag_al = 1 << 31; 1340 mqpcb->send_grh_flag_al = 1;
1282 update_mask |=
1283 EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1);
1284 mqpcb->source_gid_idx_al =
1285 attr->alt_ah_attr.grh.sgid_index;
1286 update_mask |=
1287 EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1);
1288 1341
1289 for (cnt = 0; cnt < 16; cnt++) 1342 for (cnt = 0; cnt < 16; cnt++)
1290 mqpcb->dest_gid_al.byte[cnt] = 1343 mqpcb->dest_gid_al.byte[cnt] =
1291 attr->alt_ah_attr.grh.dgid.raw[cnt]; 1344 attr->alt_ah_attr.grh.dgid.raw[cnt];
1292 1345 mqpcb->source_gid_idx_al =
1293 update_mask |= 1346 attr->alt_ah_attr.grh.sgid_index;
1294 EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1);
1295 mqpcb->flow_label_al = attr->alt_ah_attr.grh.flow_label; 1347 mqpcb->flow_label_al = attr->alt_ah_attr.grh.flow_label;
1296 update_mask |=
1297 EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1);
1298 mqpcb->hop_limit_al = attr->alt_ah_attr.grh.hop_limit; 1348 mqpcb->hop_limit_al = attr->alt_ah_attr.grh.hop_limit;
1299 update_mask |=
1300 EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1);
1301 mqpcb->traffic_class_al = 1349 mqpcb->traffic_class_al =
1302 attr->alt_ah_attr.grh.traffic_class; 1350 attr->alt_ah_attr.grh.traffic_class;
1351
1303 update_mask |= 1352 update_mask |=
1353 EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1)
1354 | EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1)
1355 | EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1)
1356 | EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1) |
1304 EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS_AL, 1); 1357 EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS_AL, 1);
1305 } 1358 }
1306 } 1359 }
@@ -1322,7 +1375,14 @@ static int internal_modify_qp(struct ib_qp *ibqp,
1322 } 1375 }
1323 1376
1324 if (attr_mask & IB_QP_PATH_MIG_STATE) { 1377 if (attr_mask & IB_QP_PATH_MIG_STATE) {
1325 mqpcb->path_migration_state = attr->path_mig_state; 1378 if (attr->path_mig_state != IB_MIG_REARM
1379 && attr->path_mig_state != IB_MIG_MIGRATED) {
1380 ret = -EINVAL;
1381 ehca_err(ibqp->device, "Invalid mig_state=%x",
1382 attr->path_mig_state);
1383 goto modify_qp_exit2;
1384 }
1385 mqpcb->path_migration_state = attr->path_mig_state + 1;
1326 update_mask |= 1386 update_mask |=
1327 EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1); 1387 EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1);
1328 } 1388 }
@@ -1348,7 +1408,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
1348 1408
1349 if (h_ret != H_SUCCESS) { 1409 if (h_ret != H_SUCCESS) {
1350 ret = ehca2ib_return_code(h_ret); 1410 ret = ehca2ib_return_code(h_ret);
1351 ehca_err(ibqp->device, "hipz_h_modify_qp() failed rc=%lx " 1411 ehca_err(ibqp->device, "hipz_h_modify_qp() failed h_ret=%li "
1352 "ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num); 1412 "ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num);
1353 goto modify_qp_exit2; 1413 goto modify_qp_exit2;
1354 } 1414 }
@@ -1381,7 +1441,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
1381 ret = ehca2ib_return_code(h_ret); 1441 ret = ehca2ib_return_code(h_ret);
1382 ehca_err(ibqp->device, "ENABLE in context of " 1442 ehca_err(ibqp->device, "ENABLE in context of "
1383 "RESET_2_INIT failed! Maybe you didn't get " 1443 "RESET_2_INIT failed! Maybe you didn't get "
1384 "a LID h_ret=%lx ehca_qp=%p qp_num=%x", 1444 "a LID h_ret=%li ehca_qp=%p qp_num=%x",
1385 h_ret, my_qp, ibqp->qp_num); 1445 h_ret, my_qp, ibqp->qp_num);
1386 goto modify_qp_exit2; 1446 goto modify_qp_exit2;
1387 } 1447 }
@@ -1469,7 +1529,7 @@ int ehca_query_qp(struct ib_qp *qp,
1469 if (h_ret != H_SUCCESS) { 1529 if (h_ret != H_SUCCESS) {
1470 ret = ehca2ib_return_code(h_ret); 1530 ret = ehca2ib_return_code(h_ret);
1471 ehca_err(qp->device, "hipz_h_query_qp() failed " 1531 ehca_err(qp->device, "hipz_h_query_qp() failed "
1472 "ehca_qp=%p qp_num=%x h_ret=%lx", 1532 "ehca_qp=%p qp_num=%x h_ret=%li",
1473 my_qp, qp->qp_num, h_ret); 1533 my_qp, qp->qp_num, h_ret);
1474 goto query_qp_exit1; 1534 goto query_qp_exit1;
1475 } 1535 }
@@ -1490,7 +1550,7 @@ int ehca_query_qp(struct ib_qp *qp,
1490 1550
1491 qp_attr->qkey = qpcb->qkey; 1551 qp_attr->qkey = qpcb->qkey;
1492 qp_attr->path_mtu = qpcb->path_mtu; 1552 qp_attr->path_mtu = qpcb->path_mtu;
1493 qp_attr->path_mig_state = qpcb->path_migration_state; 1553 qp_attr->path_mig_state = qpcb->path_migration_state - 1;
1494 qp_attr->rq_psn = qpcb->receive_psn; 1554 qp_attr->rq_psn = qpcb->receive_psn;
1495 qp_attr->sq_psn = qpcb->send_psn; 1555 qp_attr->sq_psn = qpcb->send_psn;
1496 qp_attr->min_rnr_timer = qpcb->min_rnr_nak_timer_field; 1556 qp_attr->min_rnr_timer = qpcb->min_rnr_nak_timer_field;
@@ -1644,7 +1704,7 @@ int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
1644 1704
1645 if (h_ret != H_SUCCESS) { 1705 if (h_ret != H_SUCCESS) {
1646 ret = ehca2ib_return_code(h_ret); 1706 ret = ehca2ib_return_code(h_ret);
1647 ehca_err(ibsrq->device, "hipz_h_modify_qp() failed rc=%lx " 1707 ehca_err(ibsrq->device, "hipz_h_modify_qp() failed h_ret=%li "
1648 "ehca_qp=%p qp_num=%x", 1708 "ehca_qp=%p qp_num=%x",
1649 h_ret, my_qp, my_qp->real_qp_num); 1709 h_ret, my_qp, my_qp->real_qp_num);
1650 } 1710 }
@@ -1687,12 +1747,13 @@ int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr)
1687 if (h_ret != H_SUCCESS) { 1747 if (h_ret != H_SUCCESS) {
1688 ret = ehca2ib_return_code(h_ret); 1748 ret = ehca2ib_return_code(h_ret);
1689 ehca_err(srq->device, "hipz_h_query_qp() failed " 1749 ehca_err(srq->device, "hipz_h_query_qp() failed "
1690 "ehca_qp=%p qp_num=%x h_ret=%lx", 1750 "ehca_qp=%p qp_num=%x h_ret=%li",
1691 my_qp, my_qp->real_qp_num, h_ret); 1751 my_qp, my_qp->real_qp_num, h_ret);
1692 goto query_srq_exit1; 1752 goto query_srq_exit1;
1693 } 1753 }
1694 1754
1695 srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1; 1755 srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1;
1756 srq_attr->max_sge = qpcb->actual_nr_sges_in_rq_wqe;
1696 srq_attr->srq_limit = EHCA_BMASK_GET( 1757 srq_attr->srq_limit = EHCA_BMASK_GET(
1697 MQPCB_CURR_SRQ_LIMIT, qpcb->curr_srq_limit); 1758 MQPCB_CURR_SRQ_LIMIT, qpcb->curr_srq_limit);
1698 1759
@@ -1737,7 +1798,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
1737 ret = ehca_cq_unassign_qp(my_qp->send_cq, qp_num); 1798 ret = ehca_cq_unassign_qp(my_qp->send_cq, qp_num);
1738 if (ret) { 1799 if (ret) {
1739 ehca_err(dev, "Couldn't unassign qp from " 1800 ehca_err(dev, "Couldn't unassign qp from "
1740 "send_cq ret=%x qp_num=%x cq_num=%x", ret, 1801 "send_cq ret=%i qp_num=%x cq_num=%x", ret,
1741 qp_num, my_qp->send_cq->cq_number); 1802 qp_num, my_qp->send_cq->cq_number);
1742 return ret; 1803 return ret;
1743 } 1804 }
@@ -1749,7 +1810,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
1749 1810
1750 h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); 1811 h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
1751 if (h_ret != H_SUCCESS) { 1812 if (h_ret != H_SUCCESS) {
1752 ehca_err(dev, "hipz_h_destroy_qp() failed rc=%lx " 1813 ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%li "
1753 "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num); 1814 "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num);
1754 return ehca2ib_return_code(h_ret); 1815 return ehca2ib_return_code(h_ret);
1755 } 1816 }
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index 94eed70fedf5..ea91360835d3 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -526,7 +526,7 @@ poll_cq_one_read_cqe:
526 if (!cqe) { 526 if (!cqe) {
527 ret = -EAGAIN; 527 ret = -EAGAIN;
528 ehca_dbg(cq->device, "Completion queue is empty ehca_cq=%p " 528 ehca_dbg(cq->device, "Completion queue is empty ehca_cq=%p "
529 "cq_num=%x ret=%x", my_cq, my_cq->cq_number, ret); 529 "cq_num=%x ret=%i", my_cq, my_cq->cq_number, ret);
530 goto poll_cq_one_exit0; 530 goto poll_cq_one_exit0;
531 } 531 }
532 532
diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c
index 9f16e9c79394..f0792e5fbd02 100644
--- a/drivers/infiniband/hw/ehca/ehca_sqp.c
+++ b/drivers/infiniband/hw/ehca/ehca_sqp.c
@@ -82,7 +82,7 @@ u64 ehca_define_sqp(struct ehca_shca *shca,
82 82
83 if (ret != H_SUCCESS) { 83 if (ret != H_SUCCESS) {
84 ehca_err(&shca->ib_device, 84 ehca_err(&shca->ib_device,
85 "Can't define AQP1 for port %x. rc=%lx", 85 "Can't define AQP1 for port %x. h_ret=%li",
86 port, ret); 86 port, ret);
87 return ret; 87 return ret;
88 } 88 }
diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h
index 57c77a715f46..4a8346a2bc9e 100644
--- a/drivers/infiniband/hw/ehca/ehca_tools.h
+++ b/drivers/infiniband/hw/ehca/ehca_tools.h
@@ -73,40 +73,37 @@ extern int ehca_debug_level;
73 if (unlikely(ehca_debug_level)) \ 73 if (unlikely(ehca_debug_level)) \
74 dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \ 74 dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \
75 "PU%04x EHCA_DBG:%s " format "\n", \ 75 "PU%04x EHCA_DBG:%s " format "\n", \
76 get_paca()->paca_index, __FUNCTION__, \ 76 raw_smp_processor_id(), __FUNCTION__, \
77 ## arg); \ 77 ## arg); \
78 } while (0) 78 } while (0)
79 79
80#define ehca_info(ib_dev, format, arg...) \ 80#define ehca_info(ib_dev, format, arg...) \
81 dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \ 81 dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \
82 get_paca()->paca_index, __FUNCTION__, ## arg) 82 raw_smp_processor_id(), __FUNCTION__, ## arg)
83 83
84#define ehca_warn(ib_dev, format, arg...) \ 84#define ehca_warn(ib_dev, format, arg...) \
85 dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \ 85 dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \
86 get_paca()->paca_index, __FUNCTION__, ## arg) 86 raw_smp_processor_id(), __FUNCTION__, ## arg)
87 87
88#define ehca_err(ib_dev, format, arg...) \ 88#define ehca_err(ib_dev, format, arg...) \
89 dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \ 89 dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \
90 get_paca()->paca_index, __FUNCTION__, ## arg) 90 raw_smp_processor_id(), __FUNCTION__, ## arg)
91 91
92/* use this one only if no ib_dev available */ 92/* use this one only if no ib_dev available */
93#define ehca_gen_dbg(format, arg...) \ 93#define ehca_gen_dbg(format, arg...) \
94 do { \ 94 do { \
95 if (unlikely(ehca_debug_level)) \ 95 if (unlikely(ehca_debug_level)) \
96 printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n", \ 96 printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n", \
97 get_paca()->paca_index, __FUNCTION__, ## arg); \ 97 raw_smp_processor_id(), __FUNCTION__, ## arg); \
98 } while (0) 98 } while (0)
99 99
100#define ehca_gen_warn(format, arg...) \ 100#define ehca_gen_warn(format, arg...) \
101 do { \ 101 printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \
102 if (unlikely(ehca_debug_level)) \ 102 raw_smp_processor_id(), __FUNCTION__, ## arg)
103 printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \
104 get_paca()->paca_index, __FUNCTION__, ## arg); \
105 } while (0)
106 103
107#define ehca_gen_err(format, arg...) \ 104#define ehca_gen_err(format, arg...) \
108 printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \ 105 printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \
109 get_paca()->paca_index, __FUNCTION__, ## arg) 106 raw_smp_processor_id(), __FUNCTION__, ## arg)
110 107
111/** 108/**
112 * ehca_dmp - printk a memory block, whose length is n*8 bytes. 109 * ehca_dmp - printk a memory block, whose length is n*8 bytes.
diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c
index 4bc687fdf531..5234d6c15c49 100644
--- a/drivers/infiniband/hw/ehca/ehca_uverbs.c
+++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c
@@ -109,7 +109,7 @@ static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas,
109 u64 vsize, physical; 109 u64 vsize, physical;
110 110
111 vsize = vma->vm_end - vma->vm_start; 111 vsize = vma->vm_end - vma->vm_start;
112 if (vsize != EHCA_PAGESIZE) { 112 if (vsize < EHCA_PAGESIZE) {
113 ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start); 113 ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start);
114 return -EINVAL; 114 return -EINVAL;
115 } 115 }
@@ -118,10 +118,10 @@ static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas,
118 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 118 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
119 ehca_gen_dbg("vsize=%lx physical=%lx", vsize, physical); 119 ehca_gen_dbg("vsize=%lx physical=%lx", vsize, physical);
120 /* VM_IO | VM_RESERVED are set by remap_pfn_range() */ 120 /* VM_IO | VM_RESERVED are set by remap_pfn_range() */
121 ret = remap_pfn_range(vma, vma->vm_start, physical >> PAGE_SHIFT, 121 ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT,
122 vsize, vma->vm_page_prot); 122 vma->vm_page_prot);
123 if (unlikely(ret)) { 123 if (unlikely(ret)) {
124 ehca_gen_err("remap_pfn_range() failed ret=%x", ret); 124 ehca_gen_err("remap_pfn_range() failed ret=%i", ret);
125 return -ENOMEM; 125 return -ENOMEM;
126 } 126 }
127 127
@@ -146,7 +146,7 @@ static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue,
146 page = virt_to_page(virt_addr); 146 page = virt_to_page(virt_addr);
147 ret = vm_insert_page(vma, start, page); 147 ret = vm_insert_page(vma, start, page);
148 if (unlikely(ret)) { 148 if (unlikely(ret)) {
149 ehca_gen_err("vm_insert_page() failed rc=%x", ret); 149 ehca_gen_err("vm_insert_page() failed rc=%i", ret);
150 return ret; 150 return ret;
151 } 151 }
152 start += PAGE_SIZE; 152 start += PAGE_SIZE;
@@ -164,23 +164,23 @@ static int ehca_mmap_cq(struct vm_area_struct *vma, struct ehca_cq *cq,
164 int ret; 164 int ret;
165 165
166 switch (rsrc_type) { 166 switch (rsrc_type) {
167 case 1: /* galpa fw handle */ 167 case 0: /* galpa fw handle */
168 ehca_dbg(cq->ib_cq.device, "cq_num=%x fw", cq->cq_number); 168 ehca_dbg(cq->ib_cq.device, "cq_num=%x fw", cq->cq_number);
169 ret = ehca_mmap_fw(vma, &cq->galpas, &cq->mm_count_galpa); 169 ret = ehca_mmap_fw(vma, &cq->galpas, &cq->mm_count_galpa);
170 if (unlikely(ret)) { 170 if (unlikely(ret)) {
171 ehca_err(cq->ib_cq.device, 171 ehca_err(cq->ib_cq.device,
172 "ehca_mmap_fw() failed rc=%x cq_num=%x", 172 "ehca_mmap_fw() failed rc=%i cq_num=%x",
173 ret, cq->cq_number); 173 ret, cq->cq_number);
174 return ret; 174 return ret;
175 } 175 }
176 break; 176 break;
177 177
178 case 2: /* cq queue_addr */ 178 case 1: /* cq queue_addr */
179 ehca_dbg(cq->ib_cq.device, "cq_num=%x queue", cq->cq_number); 179 ehca_dbg(cq->ib_cq.device, "cq_num=%x queue", cq->cq_number);
180 ret = ehca_mmap_queue(vma, &cq->ipz_queue, &cq->mm_count_queue); 180 ret = ehca_mmap_queue(vma, &cq->ipz_queue, &cq->mm_count_queue);
181 if (unlikely(ret)) { 181 if (unlikely(ret)) {
182 ehca_err(cq->ib_cq.device, 182 ehca_err(cq->ib_cq.device,
183 "ehca_mmap_queue() failed rc=%x cq_num=%x", 183 "ehca_mmap_queue() failed rc=%i cq_num=%x",
184 ret, cq->cq_number); 184 ret, cq->cq_number);
185 return ret; 185 return ret;
186 } 186 }
@@ -201,38 +201,38 @@ static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp,
201 int ret; 201 int ret;
202 202
203 switch (rsrc_type) { 203 switch (rsrc_type) {
204 case 1: /* galpa fw handle */ 204 case 0: /* galpa fw handle */
205 ehca_dbg(qp->ib_qp.device, "qp_num=%x fw", qp->ib_qp.qp_num); 205 ehca_dbg(qp->ib_qp.device, "qp_num=%x fw", qp->ib_qp.qp_num);
206 ret = ehca_mmap_fw(vma, &qp->galpas, &qp->mm_count_galpa); 206 ret = ehca_mmap_fw(vma, &qp->galpas, &qp->mm_count_galpa);
207 if (unlikely(ret)) { 207 if (unlikely(ret)) {
208 ehca_err(qp->ib_qp.device, 208 ehca_err(qp->ib_qp.device,
209 "remap_pfn_range() failed ret=%x qp_num=%x", 209 "remap_pfn_range() failed ret=%i qp_num=%x",
210 ret, qp->ib_qp.qp_num); 210 ret, qp->ib_qp.qp_num);
211 return -ENOMEM; 211 return -ENOMEM;
212 } 212 }
213 break; 213 break;
214 214
215 case 2: /* qp rqueue_addr */ 215 case 1: /* qp rqueue_addr */
216 ehca_dbg(qp->ib_qp.device, "qp_num=%x rqueue", 216 ehca_dbg(qp->ib_qp.device, "qp_num=%x rqueue",
217 qp->ib_qp.qp_num); 217 qp->ib_qp.qp_num);
218 ret = ehca_mmap_queue(vma, &qp->ipz_rqueue, 218 ret = ehca_mmap_queue(vma, &qp->ipz_rqueue,
219 &qp->mm_count_rqueue); 219 &qp->mm_count_rqueue);
220 if (unlikely(ret)) { 220 if (unlikely(ret)) {
221 ehca_err(qp->ib_qp.device, 221 ehca_err(qp->ib_qp.device,
222 "ehca_mmap_queue(rq) failed rc=%x qp_num=%x", 222 "ehca_mmap_queue(rq) failed rc=%i qp_num=%x",
223 ret, qp->ib_qp.qp_num); 223 ret, qp->ib_qp.qp_num);
224 return ret; 224 return ret;
225 } 225 }
226 break; 226 break;
227 227
228 case 3: /* qp squeue_addr */ 228 case 2: /* qp squeue_addr */
229 ehca_dbg(qp->ib_qp.device, "qp_num=%x squeue", 229 ehca_dbg(qp->ib_qp.device, "qp_num=%x squeue",
230 qp->ib_qp.qp_num); 230 qp->ib_qp.qp_num);
231 ret = ehca_mmap_queue(vma, &qp->ipz_squeue, 231 ret = ehca_mmap_queue(vma, &qp->ipz_squeue,
232 &qp->mm_count_squeue); 232 &qp->mm_count_squeue);
233 if (unlikely(ret)) { 233 if (unlikely(ret)) {
234 ehca_err(qp->ib_qp.device, 234 ehca_err(qp->ib_qp.device,
235 "ehca_mmap_queue(sq) failed rc=%x qp_num=%x", 235 "ehca_mmap_queue(sq) failed rc=%i qp_num=%x",
236 ret, qp->ib_qp.qp_num); 236 ret, qp->ib_qp.qp_num);
237 return ret; 237 return ret;
238 } 238 }
@@ -249,10 +249,10 @@ static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp,
249 249
250int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) 250int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
251{ 251{
252 u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT; 252 u64 fileoffset = vma->vm_pgoff;
253 u32 idr_handle = fileoffset >> 32; 253 u32 idr_handle = fileoffset & 0x1FFFFFF;
254 u32 q_type = (fileoffset >> 28) & 0xF; /* CQ, QP,... */ 254 u32 q_type = (fileoffset >> 27) & 0x1; /* CQ, QP,... */
255 u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */ 255 u32 rsrc_type = (fileoffset >> 25) & 0x3; /* sq,rq,cmnd_window */
256 u32 cur_pid = current->tgid; 256 u32 cur_pid = current->tgid;
257 u32 ret; 257 u32 ret;
258 struct ehca_cq *cq; 258 struct ehca_cq *cq;
@@ -261,7 +261,7 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
261 struct ib_uobject *uobject; 261 struct ib_uobject *uobject;
262 262
263 switch (q_type) { 263 switch (q_type) {
264 case 1: /* CQ */ 264 case 0: /* CQ */
265 read_lock(&ehca_cq_idr_lock); 265 read_lock(&ehca_cq_idr_lock);
266 cq = idr_find(&ehca_cq_idr, idr_handle); 266 cq = idr_find(&ehca_cq_idr, idr_handle);
267 read_unlock(&ehca_cq_idr_lock); 267 read_unlock(&ehca_cq_idr_lock);
@@ -283,13 +283,13 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
283 ret = ehca_mmap_cq(vma, cq, rsrc_type); 283 ret = ehca_mmap_cq(vma, cq, rsrc_type);
284 if (unlikely(ret)) { 284 if (unlikely(ret)) {
285 ehca_err(cq->ib_cq.device, 285 ehca_err(cq->ib_cq.device,
286 "ehca_mmap_cq() failed rc=%x cq_num=%x", 286 "ehca_mmap_cq() failed rc=%i cq_num=%x",
287 ret, cq->cq_number); 287 ret, cq->cq_number);
288 return ret; 288 return ret;
289 } 289 }
290 break; 290 break;
291 291
292 case 2: /* QP */ 292 case 1: /* QP */
293 read_lock(&ehca_qp_idr_lock); 293 read_lock(&ehca_qp_idr_lock);
294 qp = idr_find(&ehca_qp_idr, idr_handle); 294 qp = idr_find(&ehca_qp_idr, idr_handle);
295 read_unlock(&ehca_qp_idr_lock); 295 read_unlock(&ehca_qp_idr_lock);
@@ -313,7 +313,7 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
313 ret = ehca_mmap_qp(vma, qp, rsrc_type); 313 ret = ehca_mmap_qp(vma, qp, rsrc_type);
314 if (unlikely(ret)) { 314 if (unlikely(ret)) {
315 ehca_err(qp->ib_qp.device, 315 ehca_err(qp->ib_qp.device,
316 "ehca_mmap_qp() failed rc=%x qp_num=%x", 316 "ehca_mmap_qp() failed rc=%i qp_num=%x",
317 ret, qp->ib_qp.qp_num); 317 ret, qp->ib_qp.qp_num);
318 return ret; 318 return ret;
319 } 319 }
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
index 24f454162f24..c16a21374bb5 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.c
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -84,6 +84,10 @@
84#define H_MP_SHUTDOWN EHCA_BMASK_IBM(48, 48) 84#define H_MP_SHUTDOWN EHCA_BMASK_IBM(48, 48)
85#define H_MP_RESET_QKEY_CTR EHCA_BMASK_IBM(49, 49) 85#define H_MP_RESET_QKEY_CTR EHCA_BMASK_IBM(49, 49)
86 86
87#define HCALL4_REGS_FORMAT "r4=%lx r5=%lx r6=%lx r7=%lx"
88#define HCALL7_REGS_FORMAT HCALL4_REGS_FORMAT " r8=%lx r9=%lx r10=%lx"
89#define HCALL9_REGS_FORMAT HCALL7_REGS_FORMAT " r11=%lx r12=%lx"
90
87static DEFINE_SPINLOCK(hcall_lock); 91static DEFINE_SPINLOCK(hcall_lock);
88 92
89static u32 get_longbusy_msecs(int longbusy_rc) 93static u32 get_longbusy_msecs(int longbusy_rc)
@@ -116,16 +120,28 @@ static long ehca_plpar_hcall_norets(unsigned long opcode,
116 unsigned long arg7) 120 unsigned long arg7)
117{ 121{
118 long ret; 122 long ret;
119 int i, sleep_msecs; 123 int i, sleep_msecs, do_lock;
124 unsigned long flags;
120 125
121 ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx " 126 ehca_gen_dbg("opcode=%lx " HCALL7_REGS_FORMAT,
122 "arg5=%lx arg6=%lx arg7=%lx",
123 opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7); 127 opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
124 128
129 /* lock H_FREE_RESOURCE(MR) against itself and H_ALLOC_RESOURCE(MR) */
130 if ((opcode == H_FREE_RESOURCE) && (arg7 == 5)) {
131 arg7 = 0; /* better not upset firmware */
132 do_lock = 1;
133 }
134
125 for (i = 0; i < 5; i++) { 135 for (i = 0; i < 5; i++) {
136 if (do_lock)
137 spin_lock_irqsave(&hcall_lock, flags);
138
126 ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4, 139 ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4,
127 arg5, arg6, arg7); 140 arg5, arg6, arg7);
128 141
142 if (do_lock)
143 spin_unlock_irqrestore(&hcall_lock, flags);
144
129 if (H_IS_LONG_BUSY(ret)) { 145 if (H_IS_LONG_BUSY(ret)) {
130 sleep_msecs = get_longbusy_msecs(ret); 146 sleep_msecs = get_longbusy_msecs(ret);
131 msleep_interruptible(sleep_msecs); 147 msleep_interruptible(sleep_msecs);
@@ -133,16 +149,13 @@ static long ehca_plpar_hcall_norets(unsigned long opcode,
133 } 149 }
134 150
135 if (ret < H_SUCCESS) 151 if (ret < H_SUCCESS)
136 ehca_gen_err("opcode=%lx ret=%lx" 152 ehca_gen_err("opcode=%lx ret=%li " HCALL7_REGS_FORMAT,
137 " arg1=%lx arg2=%lx arg3=%lx arg4=%lx" 153 opcode, ret, arg1, arg2, arg3,
138 " arg5=%lx arg6=%lx arg7=%lx ", 154 arg4, arg5, arg6, arg7);
139 opcode, ret, 155 else
140 arg1, arg2, arg3, arg4, arg5, 156 ehca_gen_dbg("opcode=%lx ret=%li", opcode, ret);
141 arg6, arg7);
142
143 ehca_gen_dbg("opcode=%lx ret=%lx", opcode, ret);
144 return ret;
145 157
158 return ret;
146 } 159 }
147 160
148 return H_BUSY; 161 return H_BUSY;
@@ -161,25 +174,24 @@ static long ehca_plpar_hcall9(unsigned long opcode,
161 unsigned long arg9) 174 unsigned long arg9)
162{ 175{
163 long ret; 176 long ret;
164 int i, sleep_msecs, lock_is_set = 0; 177 int i, sleep_msecs, do_lock;
165 unsigned long flags = 0; 178 unsigned long flags = 0;
166 179
167 ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx " 180 ehca_gen_dbg("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, opcode,
168 "arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx", 181 arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9);
169 opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7, 182
170 arg8, arg9); 183 /* lock H_ALLOC_RESOURCE(MR) against itself and H_FREE_RESOURCE(MR) */
184 do_lock = ((opcode == H_ALLOC_RESOURCE) && (arg2 == 5));
171 185
172 for (i = 0; i < 5; i++) { 186 for (i = 0; i < 5; i++) {
173 if ((opcode == H_ALLOC_RESOURCE) && (arg2 == 5)) { 187 if (do_lock)
174 spin_lock_irqsave(&hcall_lock, flags); 188 spin_lock_irqsave(&hcall_lock, flags);
175 lock_is_set = 1;
176 }
177 189
178 ret = plpar_hcall9(opcode, outs, 190 ret = plpar_hcall9(opcode, outs,
179 arg1, arg2, arg3, arg4, arg5, 191 arg1, arg2, arg3, arg4, arg5,
180 arg6, arg7, arg8, arg9); 192 arg6, arg7, arg8, arg9);
181 193
182 if (lock_is_set) 194 if (do_lock)
183 spin_unlock_irqrestore(&hcall_lock, flags); 195 spin_unlock_irqrestore(&hcall_lock, flags);
184 196
185 if (H_IS_LONG_BUSY(ret)) { 197 if (H_IS_LONG_BUSY(ret)) {
@@ -188,26 +200,19 @@ static long ehca_plpar_hcall9(unsigned long opcode,
188 continue; 200 continue;
189 } 201 }
190 202
191 if (ret < H_SUCCESS) 203 if (ret < H_SUCCESS) {
192 ehca_gen_err("opcode=%lx ret=%lx" 204 ehca_gen_err("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT,
193 " arg1=%lx arg2=%lx arg3=%lx arg4=%lx" 205 opcode, arg1, arg2, arg3, arg4, arg5,
194 " arg5=%lx arg6=%lx arg7=%lx arg8=%lx" 206 arg6, arg7, arg8, arg9);
195 " arg9=%lx" 207 ehca_gen_err("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT,
196 " out1=%lx out2=%lx out3=%lx out4=%lx" 208 ret, outs[0], outs[1], outs[2], outs[3],
197 " out5=%lx out6=%lx out7=%lx out8=%lx" 209 outs[4], outs[5], outs[6], outs[7],
198 " out9=%lx", 210 outs[8]);
199 opcode, ret, 211 } else
200 arg1, arg2, arg3, arg4, arg5, 212 ehca_gen_dbg("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT,
201 arg6, arg7, arg8, arg9, 213 ret, outs[0], outs[1], outs[2], outs[3],
202 outs[0], outs[1], outs[2], outs[3],
203 outs[4], outs[5], outs[6], outs[7], 214 outs[4], outs[5], outs[6], outs[7],
204 outs[8]); 215 outs[8]);
205
206 ehca_gen_dbg("opcode=%lx ret=%lx out1=%lx out2=%lx out3=%lx "
207 "out4=%lx out5=%lx out6=%lx out7=%lx out8=%lx "
208 "out9=%lx",
209 opcode, ret, outs[0], outs[1], outs[2], outs[3],
210 outs[4], outs[5], outs[6], outs[7], outs[8]);
211 return ret; 216 return ret;
212 } 217 }
213 218
@@ -247,7 +252,7 @@ u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
247 *eq_ist = (u32)outs[5]; 252 *eq_ist = (u32)outs[5];
248 253
249 if (ret == H_NOT_ENOUGH_RESOURCES) 254 if (ret == H_NOT_ENOUGH_RESOURCES)
250 ehca_gen_err("Not enough resource - ret=%lx ", ret); 255 ehca_gen_err("Not enough resource - ret=%li ", ret);
251 256
252 return ret; 257 return ret;
253} 258}
@@ -285,7 +290,7 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
285 hcp_galpas_ctor(&cq->galpas, outs[5], outs[6]); 290 hcp_galpas_ctor(&cq->galpas, outs[5], outs[6]);
286 291
287 if (ret == H_NOT_ENOUGH_RESOURCES) 292 if (ret == H_NOT_ENOUGH_RESOURCES)
288 ehca_gen_err("Not enough resources. ret=%lx", ret); 293 ehca_gen_err("Not enough resources. ret=%li", ret);
289 294
290 return ret; 295 return ret;
291} 296}
@@ -360,7 +365,7 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
360 hcp_galpas_ctor(&parms->galpas, outs[6], outs[6]); 365 hcp_galpas_ctor(&parms->galpas, outs[6], outs[6]);
361 366
362 if (ret == H_NOT_ENOUGH_RESOURCES) 367 if (ret == H_NOT_ENOUGH_RESOURCES)
363 ehca_gen_err("Not enough resources. ret=%lx", ret); 368 ehca_gen_err("Not enough resources. ret=%li", ret);
364 369
365 return ret; 370 return ret;
366} 371}
@@ -555,7 +560,7 @@ u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
555 0, 0, 0, 0, 0); 560 0, 0, 0, 0, 0);
556 561
557 if (ret == H_NOT_ENOUGH_RESOURCES) 562 if (ret == H_NOT_ENOUGH_RESOURCES)
558 ehca_gen_err("Insufficient resources ret=%lx", ret); 563 ehca_gen_err("Insufficient resources ret=%li", ret);
559 564
560 return ret; 565 return ret;
561} 566}
@@ -591,7 +596,7 @@ u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
591 qp->ipz_qp_handle.handle, /* r6 */ 596 qp->ipz_qp_handle.handle, /* r6 */
592 0, 0, 0, 0, 0, 0); 597 0, 0, 0, 0, 0, 0);
593 if (ret == H_HARDWARE) 598 if (ret == H_HARDWARE)
594 ehca_gen_err("HCA not operational. ret=%lx", ret); 599 ehca_gen_err("HCA not operational. ret=%li", ret);
595 600
596 ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, 601 ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
597 adapter_handle.handle, /* r4 */ 602 adapter_handle.handle, /* r4 */
@@ -599,7 +604,7 @@ u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
599 0, 0, 0, 0, 0); 604 0, 0, 0, 0, 0);
600 605
601 if (ret == H_RESOURCE) 606 if (ret == H_RESOURCE)
602 ehca_gen_err("Resource still in use. ret=%lx", ret); 607 ehca_gen_err("Resource still in use. ret=%li", ret);
603 608
604 return ret; 609 return ret;
605} 610}
@@ -634,7 +639,7 @@ u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
634 *bma_qp_nr = (u32)outs[1]; 639 *bma_qp_nr = (u32)outs[1];
635 640
636 if (ret == H_ALIAS_EXIST) 641 if (ret == H_ALIAS_EXIST)
637 ehca_gen_err("AQP1 already exists. ret=%lx", ret); 642 ehca_gen_err("AQP1 already exists. ret=%li", ret);
638 643
639 return ret; 644 return ret;
640} 645}
@@ -656,7 +661,7 @@ u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
656 0, 0); 661 0, 0);
657 662
658 if (ret == H_NOT_ENOUGH_RESOURCES) 663 if (ret == H_NOT_ENOUGH_RESOURCES)
659 ehca_gen_err("Not enough resources. ret=%lx", ret); 664 ehca_gen_err("Not enough resources. ret=%li", ret);
660 665
661 return ret; 666 return ret;
662} 667}
@@ -695,7 +700,7 @@ u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
695 0, 0, 0, 0); 700 0, 0, 0, 0);
696 701
697 if (ret == H_RESOURCE) 702 if (ret == H_RESOURCE)
698 ehca_gen_err("H_FREE_RESOURCE failed ret=%lx ", ret); 703 ehca_gen_err("H_FREE_RESOURCE failed ret=%li ", ret);
699 704
700 return ret; 705 return ret;
701} 706}
@@ -717,7 +722,7 @@ u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
717 0, 0, 0, 0, 0); 722 0, 0, 0, 0, 0);
718 723
719 if (ret == H_RESOURCE) 724 if (ret == H_RESOURCE)
720 ehca_gen_err("Resource in use. ret=%lx ", ret); 725 ehca_gen_err("Resource in use. ret=%li ", ret);
721 726
722 return ret; 727 return ret;
723} 728}
@@ -816,7 +821,7 @@ u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
816 return ehca_plpar_hcall_norets(H_FREE_RESOURCE, 821 return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
817 adapter_handle.handle, /* r4 */ 822 adapter_handle.handle, /* r4 */
818 mr->ipz_mr_handle.handle, /* r5 */ 823 mr->ipz_mr_handle.handle, /* r5 */
819 0, 0, 0, 0, 0); 824 0, 0, 0, 0, 5);
820} 825}
821 826
822u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle, 827u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
index 29bd476fbd54..661f8db62706 100644
--- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
@@ -158,6 +158,7 @@ static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd)
158 158
159 queue->queue_pages[0] = (void *)(page->page | (bit << (order + 9))); 159 queue->queue_pages[0] = (void *)(page->page | (bit << (order + 9)));
160 queue->small_page = page; 160 queue->small_page = page;
161 queue->offset = bit << (order + 9);
161 return 1; 162 return 1;
162 163
163out: 164out:
diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
index 6ad822c35930..851df8a75e79 100644
--- a/drivers/infiniband/hw/ipath/ipath_common.h
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -189,6 +189,8 @@ typedef enum _ipath_ureg {
189#define IPATH_RUNTIME_RCVHDR_COPY 0x8 189#define IPATH_RUNTIME_RCVHDR_COPY 0x8
190#define IPATH_RUNTIME_MASTER 0x10 190#define IPATH_RUNTIME_MASTER 0x10
191/* 0x20 and 0x40 are no longer used, but are reserved for ABI compatibility */ 191/* 0x20 and 0x40 are no longer used, but are reserved for ABI compatibility */
192#define IPATH_RUNTIME_FORCE_PIOAVAIL 0x400
193#define IPATH_RUNTIME_PIO_REGSWAPPED 0x800
192 194
193/* 195/*
194 * This structure is returned by ipath_userinit() immediately after 196 * This structure is returned by ipath_userinit() immediately after
@@ -350,7 +352,7 @@ struct ipath_base_info {
350 * may not be implemented; the user code must deal with this if it 352 * may not be implemented; the user code must deal with this if it
351 * cares, or it must abort after initialization reports the difference. 353 * cares, or it must abort after initialization reports the difference.
352 */ 354 */
353#define IPATH_USER_SWMINOR 5 355#define IPATH_USER_SWMINOR 6
354 356
355#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR) 357#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR)
356 358
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
index a6f04d27ec57..645ed71fd797 100644
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -76,22 +76,25 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
76 } 76 }
77 return; 77 return;
78 } 78 }
79 wc->queue[head].wr_id = entry->wr_id; 79 if (cq->ip) {
80 wc->queue[head].status = entry->status; 80 wc->uqueue[head].wr_id = entry->wr_id;
81 wc->queue[head].opcode = entry->opcode; 81 wc->uqueue[head].status = entry->status;
82 wc->queue[head].vendor_err = entry->vendor_err; 82 wc->uqueue[head].opcode = entry->opcode;
83 wc->queue[head].byte_len = entry->byte_len; 83 wc->uqueue[head].vendor_err = entry->vendor_err;
84 wc->queue[head].imm_data = (__u32 __force)entry->imm_data; 84 wc->uqueue[head].byte_len = entry->byte_len;
85 wc->queue[head].qp_num = entry->qp->qp_num; 85 wc->uqueue[head].imm_data = (__u32 __force)entry->imm_data;
86 wc->queue[head].src_qp = entry->src_qp; 86 wc->uqueue[head].qp_num = entry->qp->qp_num;
87 wc->queue[head].wc_flags = entry->wc_flags; 87 wc->uqueue[head].src_qp = entry->src_qp;
88 wc->queue[head].pkey_index = entry->pkey_index; 88 wc->uqueue[head].wc_flags = entry->wc_flags;
89 wc->queue[head].slid = entry->slid; 89 wc->uqueue[head].pkey_index = entry->pkey_index;
90 wc->queue[head].sl = entry->sl; 90 wc->uqueue[head].slid = entry->slid;
91 wc->queue[head].dlid_path_bits = entry->dlid_path_bits; 91 wc->uqueue[head].sl = entry->sl;
92 wc->queue[head].port_num = entry->port_num; 92 wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
93 /* Make sure queue entry is written before the head index. */ 93 wc->uqueue[head].port_num = entry->port_num;
94 smp_wmb(); 94 /* Make sure entry is written before the head index. */
95 smp_wmb();
96 } else
97 wc->kqueue[head] = *entry;
95 wc->head = next; 98 wc->head = next;
96 99
97 if (cq->notify == IB_CQ_NEXT_COMP || 100 if (cq->notify == IB_CQ_NEXT_COMP ||
@@ -130,6 +133,12 @@ int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
130 int npolled; 133 int npolled;
131 u32 tail; 134 u32 tail;
132 135
136 /* The kernel can only poll a kernel completion queue */
137 if (cq->ip) {
138 npolled = -EINVAL;
139 goto bail;
140 }
141
133 spin_lock_irqsave(&cq->lock, flags); 142 spin_lock_irqsave(&cq->lock, flags);
134 143
135 wc = cq->queue; 144 wc = cq->queue;
@@ -137,31 +146,10 @@ int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
137 if (tail > (u32) cq->ibcq.cqe) 146 if (tail > (u32) cq->ibcq.cqe)
138 tail = (u32) cq->ibcq.cqe; 147 tail = (u32) cq->ibcq.cqe;
139 for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { 148 for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
140 struct ipath_qp *qp;
141
142 if (tail == wc->head) 149 if (tail == wc->head)
143 break; 150 break;
144 /* Make sure entry is read after head index is read. */ 151 /* The kernel doesn't need a RMB since it has the lock. */
145 smp_rmb(); 152 *entry = wc->kqueue[tail];
146 qp = ipath_lookup_qpn(&to_idev(cq->ibcq.device)->qp_table,
147 wc->queue[tail].qp_num);
148 entry->qp = &qp->ibqp;
149 if (atomic_dec_and_test(&qp->refcount))
150 wake_up(&qp->wait);
151
152 entry->wr_id = wc->queue[tail].wr_id;
153 entry->status = wc->queue[tail].status;
154 entry->opcode = wc->queue[tail].opcode;
155 entry->vendor_err = wc->queue[tail].vendor_err;
156 entry->byte_len = wc->queue[tail].byte_len;
157 entry->imm_data = wc->queue[tail].imm_data;
158 entry->src_qp = wc->queue[tail].src_qp;
159 entry->wc_flags = wc->queue[tail].wc_flags;
160 entry->pkey_index = wc->queue[tail].pkey_index;
161 entry->slid = wc->queue[tail].slid;
162 entry->sl = wc->queue[tail].sl;
163 entry->dlid_path_bits = wc->queue[tail].dlid_path_bits;
164 entry->port_num = wc->queue[tail].port_num;
165 if (tail >= cq->ibcq.cqe) 153 if (tail >= cq->ibcq.cqe)
166 tail = 0; 154 tail = 0;
167 else 155 else
@@ -171,6 +159,7 @@ int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
171 159
172 spin_unlock_irqrestore(&cq->lock, flags); 160 spin_unlock_irqrestore(&cq->lock, flags);
173 161
162bail:
174 return npolled; 163 return npolled;
175} 164}
176 165
@@ -215,6 +204,7 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vec
215 struct ipath_cq *cq; 204 struct ipath_cq *cq;
216 struct ipath_cq_wc *wc; 205 struct ipath_cq_wc *wc;
217 struct ib_cq *ret; 206 struct ib_cq *ret;
207 u32 sz;
218 208
219 if (entries < 1 || entries > ib_ipath_max_cqes) { 209 if (entries < 1 || entries > ib_ipath_max_cqes) {
220 ret = ERR_PTR(-EINVAL); 210 ret = ERR_PTR(-EINVAL);
@@ -235,7 +225,12 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vec
235 * We need to use vmalloc() in order to support mmap and large 225 * We need to use vmalloc() in order to support mmap and large
236 * numbers of entries. 226 * numbers of entries.
237 */ 227 */
238 wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * entries); 228 sz = sizeof(*wc);
229 if (udata && udata->outlen >= sizeof(__u64))
230 sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
231 else
232 sz += sizeof(struct ib_wc) * (entries + 1);
233 wc = vmalloc_user(sz);
239 if (!wc) { 234 if (!wc) {
240 ret = ERR_PTR(-ENOMEM); 235 ret = ERR_PTR(-ENOMEM);
241 goto bail_cq; 236 goto bail_cq;
@@ -247,9 +242,8 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vec
247 */ 242 */
248 if (udata && udata->outlen >= sizeof(__u64)) { 243 if (udata && udata->outlen >= sizeof(__u64)) {
249 int err; 244 int err;
250 u32 s = sizeof *wc + sizeof(struct ib_wc) * entries;
251 245
252 cq->ip = ipath_create_mmap_info(dev, s, context, wc); 246 cq->ip = ipath_create_mmap_info(dev, sz, context, wc);
253 if (!cq->ip) { 247 if (!cq->ip) {
254 ret = ERR_PTR(-ENOMEM); 248 ret = ERR_PTR(-ENOMEM);
255 goto bail_wc; 249 goto bail_wc;
@@ -380,6 +374,7 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
380 struct ipath_cq_wc *wc; 374 struct ipath_cq_wc *wc;
381 u32 head, tail, n; 375 u32 head, tail, n;
382 int ret; 376 int ret;
377 u32 sz;
383 378
384 if (cqe < 1 || cqe > ib_ipath_max_cqes) { 379 if (cqe < 1 || cqe > ib_ipath_max_cqes) {
385 ret = -EINVAL; 380 ret = -EINVAL;
@@ -389,7 +384,12 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
389 /* 384 /*
390 * Need to use vmalloc() if we want to support large #s of entries. 385 * Need to use vmalloc() if we want to support large #s of entries.
391 */ 386 */
392 wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * cqe); 387 sz = sizeof(*wc);
388 if (udata && udata->outlen >= sizeof(__u64))
389 sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
390 else
391 sz += sizeof(struct ib_wc) * (cqe + 1);
392 wc = vmalloc_user(sz);
393 if (!wc) { 393 if (!wc) {
394 ret = -ENOMEM; 394 ret = -ENOMEM;
395 goto bail; 395 goto bail;
@@ -430,7 +430,10 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
430 goto bail; 430 goto bail;
431 } 431 }
432 for (n = 0; tail != head; n++) { 432 for (n = 0; tail != head; n++) {
433 wc->queue[n] = old_wc->queue[tail]; 433 if (cq->ip)
434 wc->uqueue[n] = old_wc->uqueue[tail];
435 else
436 wc->kqueue[n] = old_wc->kqueue[tail];
434 if (tail == (u32) cq->ibcq.cqe) 437 if (tail == (u32) cq->ibcq.cqe)
435 tail = 0; 438 tail = 0;
436 else 439 else
@@ -447,9 +450,8 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
447 if (cq->ip) { 450 if (cq->ip) {
448 struct ipath_ibdev *dev = to_idev(ibcq->device); 451 struct ipath_ibdev *dev = to_idev(ibcq->device);
449 struct ipath_mmap_info *ip = cq->ip; 452 struct ipath_mmap_info *ip = cq->ip;
450 u32 s = sizeof *wc + sizeof(struct ib_wc) * cqe;
451 453
452 ipath_update_mmap_info(dev, ip, s, wc); 454 ipath_update_mmap_info(dev, ip, sz, wc);
453 spin_lock_irq(&dev->pending_lock); 455 spin_lock_irq(&dev->pending_lock);
454 if (list_empty(&ip->pending_mmaps)) 456 if (list_empty(&ip->pending_mmaps))
455 list_add(&ip->pending_mmaps, &dev->pending_mmaps); 457 list_add(&ip->pending_mmaps, &dev->pending_mmaps);
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index cf25cdab02f9..4137c7770f1b 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -446,19 +446,21 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
446 dd->ipath_unit, plen - 1, pbufn); 446 dd->ipath_unit, plen - 1, pbufn);
447 447
448 if (dp.pbc_wd == 0) 448 if (dp.pbc_wd == 0)
449 /* Legacy operation, use computed pbc_wd */
450 dp.pbc_wd = plen; 449 dp.pbc_wd = plen;
451
452 /* we have to flush after the PBC for correctness on some cpus
453 * or WC buffer can be written out of order */
454 writeq(dp.pbc_wd, piobuf); 450 writeq(dp.pbc_wd, piobuf);
455 ipath_flush_wc(); 451 /*
456 /* copy all by the trigger word, then flush, so it's written 452 * Copy all by the trigger word, then flush, so it's written
457 * to chip before trigger word, then write trigger word, then 453 * to chip before trigger word, then write trigger word, then
458 * flush again, so packet is sent. */ 454 * flush again, so packet is sent.
459 __iowrite32_copy(piobuf + 2, tmpbuf, clen - 1); 455 */
460 ipath_flush_wc(); 456 if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
461 __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1); 457 ipath_flush_wc();
458 __iowrite32_copy(piobuf + 2, tmpbuf, clen - 1);
459 ipath_flush_wc();
460 __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
461 } else
462 __iowrite32_copy(piobuf + 2, tmpbuf, clen);
463
462 ipath_flush_wc(); 464 ipath_flush_wc();
463 465
464 ret = sizeof(dp); 466 ret = sizeof(dp);
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 6ccba365a24c..1f152ded1e3c 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -34,6 +34,7 @@
34#include <linux/spinlock.h> 34#include <linux/spinlock.h>
35#include <linux/idr.h> 35#include <linux/idr.h>
36#include <linux/pci.h> 36#include <linux/pci.h>
37#include <linux/io.h>
37#include <linux/delay.h> 38#include <linux/delay.h>
38#include <linux/netdevice.h> 39#include <linux/netdevice.h>
39#include <linux/vmalloc.h> 40#include <linux/vmalloc.h>
@@ -280,6 +281,89 @@ void __attribute__((weak)) ipath_disable_wc(struct ipath_devdata *dd)
280{ 281{
281} 282}
282 283
284/*
285 * Perform a PIO buffer bandwidth write test, to verify proper system
286 * configuration. Even when all the setup calls work, occasionally
287 * BIOS or other issues can prevent write combining from working, or
288 * can cause other bandwidth problems to the chip.
289 *
290 * This test simply writes the same buffer over and over again, and
291 * measures close to the peak bandwidth to the chip (not testing
292 * data bandwidth to the wire). On chips that use an address-based
293 * trigger to send packets to the wire, this is easy. On chips that
294 * use a count to trigger, we want to make sure that the packet doesn't
295 * go out on the wire, or trigger flow control checks.
296 */
297static void ipath_verify_pioperf(struct ipath_devdata *dd)
298{
299 u32 pbnum, cnt, lcnt;
300 u32 __iomem *piobuf;
301 u32 *addr;
302 u64 msecs, emsecs;
303
304 piobuf = ipath_getpiobuf(dd, &pbnum);
305 if (!piobuf) {
306 dev_info(&dd->pcidev->dev,
307 "No PIObufs for checking perf, skipping\n");
308 return;
309 }
310
311 /*
312 * Enough to give us a reasonable test, less than piobuf size, and
313 * likely multiple of store buffer length.
314 */
315 cnt = 1024;
316
317 addr = vmalloc(cnt);
318 if (!addr) {
319 dev_info(&dd->pcidev->dev,
320 "Couldn't get memory for checking PIO perf,"
321 " skipping\n");
322 goto done;
323 }
324
325 preempt_disable(); /* we want reasonably accurate elapsed time */
326 msecs = 1 + jiffies_to_msecs(jiffies);
327 for (lcnt = 0; lcnt < 10000U; lcnt++) {
328 /* wait until we cross msec boundary */
329 if (jiffies_to_msecs(jiffies) >= msecs)
330 break;
331 udelay(1);
332 }
333
334 writeq(0, piobuf); /* length 0, no dwords actually sent */
335 ipath_flush_wc();
336
337 /*
338 * this is only roughly accurate, since even with preempt we
339 * still take interrupts that could take a while. Running for
340 * >= 5 msec seems to get us "close enough" to accurate values
341 */
342 msecs = jiffies_to_msecs(jiffies);
343 for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) {
344 __iowrite32_copy(piobuf + 64, addr, cnt >> 2);
345 emsecs = jiffies_to_msecs(jiffies) - msecs;
346 }
347
348 /* 1 GiB/sec, slightly over IB SDR line rate */
349 if (lcnt < (emsecs * 1024U))
350 ipath_dev_err(dd,
351 "Performance problem: bandwidth to PIO buffers is "
352 "only %u MiB/sec\n",
353 lcnt / (u32) emsecs);
354 else
355 ipath_dbg("PIO buffer bandwidth %u MiB/sec is OK\n",
356 lcnt / (u32) emsecs);
357
358 preempt_enable();
359
360 vfree(addr);
361
362done:
363 /* disarm piobuf, so it's available again */
364 ipath_disarm_piobufs(dd, pbnum, 1);
365}
366
283static int __devinit ipath_init_one(struct pci_dev *pdev, 367static int __devinit ipath_init_one(struct pci_dev *pdev,
284 const struct pci_device_id *ent) 368 const struct pci_device_id *ent)
285{ 369{
@@ -298,8 +382,6 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
298 382
299 ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit); 383 ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit);
300 384
301 read_bars(dd, pdev, &bar0, &bar1);
302
303 ret = pci_enable_device(pdev); 385 ret = pci_enable_device(pdev);
304 if (ret) { 386 if (ret) {
305 /* This can happen iff: 387 /* This can happen iff:
@@ -445,9 +527,6 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
445 goto bail_regions; 527 goto bail_regions;
446 } 528 }
447 529
448 dd->ipath_deviceid = ent->device; /* save for later use */
449 dd->ipath_vendorid = ent->vendor;
450
451 dd->ipath_pcirev = pdev->revision; 530 dd->ipath_pcirev = pdev->revision;
452 531
453#if defined(__powerpc__) 532#if defined(__powerpc__)
@@ -515,6 +594,8 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
515 ret = 0; 594 ret = 0;
516 } 595 }
517 596
597 ipath_verify_pioperf(dd);
598
518 ipath_device_create_group(&pdev->dev, dd); 599 ipath_device_create_group(&pdev->dev, dd);
519 ipathfs_add_device(dd); 600 ipathfs_add_device(dd);
520 ipath_user_add(dd); 601 ipath_user_add(dd);
@@ -2005,6 +2086,8 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
2005 INFINIPATH_IBCC_LINKINITCMD_SHIFT); 2086 INFINIPATH_IBCC_LINKINITCMD_SHIFT);
2006 ipath_cancel_sends(dd, 0); 2087 ipath_cancel_sends(dd, 0);
2007 2088
2089 signal_ib_event(dd, IB_EVENT_PORT_ERR);
2090
2008 /* disable IBC */ 2091 /* disable IBC */
2009 dd->ipath_control &= ~INFINIPATH_C_LINKENABLE; 2092 dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
2010 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 2093 ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c
index b4503e9c1e95..bcfa3ccb555f 100644
--- a/drivers/infiniband/hw/ipath/ipath_eeprom.c
+++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c
@@ -596,7 +596,11 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
596 goto bail; 596 goto bail;
597 } 597 }
598 598
599 len = offsetof(struct ipath_flash, if_future); 599 /*
600 * read full flash, not just currently used part, since it may have
601 * been written with a newer definition
602 * */
603 len = sizeof(struct ipath_flash);
600 buf = vmalloc(len); 604 buf = vmalloc(len);
601 if (!buf) { 605 if (!buf) {
602 ipath_dev_err(dd, "Couldn't allocate memory to read %u " 606 ipath_dev_err(dd, "Couldn't allocate memory to read %u "
@@ -737,8 +741,10 @@ int ipath_update_eeprom_log(struct ipath_devdata *dd)
737 /* 741 /*
738 * The quick-check above determined that there is something worthy 742 * The quick-check above determined that there is something worthy
739 * of logging, so get current contents and do a more detailed idea. 743 * of logging, so get current contents and do a more detailed idea.
744 * read full flash, not just currently used part, since it may have
745 * been written with a newer definition
740 */ 746 */
741 len = offsetof(struct ipath_flash, if_future); 747 len = sizeof(struct ipath_flash);
742 buf = vmalloc(len); 748 buf = vmalloc(len);
743 ret = 1; 749 ret = 1;
744 if (!buf) { 750 if (!buf) {
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 33ab0d6b80ff..5de3243a47c3 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -538,6 +538,9 @@ static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
538 continue; 538 continue;
539 cnt++; 539 cnt++;
540 if (dd->ipath_pageshadow[porttid + tid]) { 540 if (dd->ipath_pageshadow[porttid + tid]) {
541 struct page *p;
542 p = dd->ipath_pageshadow[porttid + tid];
543 dd->ipath_pageshadow[porttid + tid] = NULL;
541 ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n", 544 ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n",
542 pd->port_pid, tid); 545 pd->port_pid, tid);
543 dd->ipath_f_put_tid(dd, &tidbase[tid], 546 dd->ipath_f_put_tid(dd, &tidbase[tid],
@@ -546,9 +549,7 @@ static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
546 pci_unmap_page(dd->pcidev, 549 pci_unmap_page(dd->pcidev,
547 dd->ipath_physshadow[porttid + tid], 550 dd->ipath_physshadow[porttid + tid],
548 PAGE_SIZE, PCI_DMA_FROMDEVICE); 551 PAGE_SIZE, PCI_DMA_FROMDEVICE);
549 ipath_release_user_pages( 552 ipath_release_user_pages(&p, 1);
550 &dd->ipath_pageshadow[porttid + tid], 1);
551 dd->ipath_pageshadow[porttid + tid] = NULL;
552 ipath_stats.sps_pageunlocks++; 553 ipath_stats.sps_pageunlocks++;
553 } else 554 } else
554 ipath_dbg("Unused tid %u, ignoring\n", tid); 555 ipath_dbg("Unused tid %u, ignoring\n", tid);
@@ -1341,6 +1342,19 @@ bail:
1341 return ret; 1342 return ret;
1342} 1343}
1343 1344
1345static unsigned ipath_poll_hdrqfull(struct ipath_portdata *pd)
1346{
1347 unsigned pollflag = 0;
1348
1349 if ((pd->poll_type & IPATH_POLL_TYPE_OVERFLOW) &&
1350 pd->port_hdrqfull != pd->port_hdrqfull_poll) {
1351 pollflag |= POLLIN | POLLRDNORM;
1352 pd->port_hdrqfull_poll = pd->port_hdrqfull;
1353 }
1354
1355 return pollflag;
1356}
1357
1344static unsigned int ipath_poll_urgent(struct ipath_portdata *pd, 1358static unsigned int ipath_poll_urgent(struct ipath_portdata *pd,
1345 struct file *fp, 1359 struct file *fp,
1346 struct poll_table_struct *pt) 1360 struct poll_table_struct *pt)
@@ -1350,22 +1364,20 @@ static unsigned int ipath_poll_urgent(struct ipath_portdata *pd,
1350 1364
1351 dd = pd->port_dd; 1365 dd = pd->port_dd;
1352 1366
1353 if (test_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag)) { 1367 /* variable access in ipath_poll_hdrqfull() needs this */
1354 pollflag |= POLLERR; 1368 rmb();
1355 clear_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag); 1369 pollflag = ipath_poll_hdrqfull(pd);
1356 }
1357 1370
1358 if (test_bit(IPATH_PORT_WAITING_URG, &pd->int_flag)) { 1371 if (pd->port_urgent != pd->port_urgent_poll) {
1359 pollflag |= POLLIN | POLLRDNORM; 1372 pollflag |= POLLIN | POLLRDNORM;
1360 clear_bit(IPATH_PORT_WAITING_URG, &pd->int_flag); 1373 pd->port_urgent_poll = pd->port_urgent;
1361 } 1374 }
1362 1375
1363 if (!pollflag) { 1376 if (!pollflag) {
1377 /* this saves a spin_lock/unlock in interrupt handler... */
1364 set_bit(IPATH_PORT_WAITING_URG, &pd->port_flag); 1378 set_bit(IPATH_PORT_WAITING_URG, &pd->port_flag);
1365 if (pd->poll_type & IPATH_POLL_TYPE_OVERFLOW) 1379 /* flush waiting flag so don't miss an event... */
1366 set_bit(IPATH_PORT_WAITING_OVERFLOW, 1380 wmb();
1367 &pd->port_flag);
1368
1369 poll_wait(fp, &pd->port_wait, pt); 1381 poll_wait(fp, &pd->port_wait, pt);
1370 } 1382 }
1371 1383
@@ -1376,31 +1388,27 @@ static unsigned int ipath_poll_next(struct ipath_portdata *pd,
1376 struct file *fp, 1388 struct file *fp,
1377 struct poll_table_struct *pt) 1389 struct poll_table_struct *pt)
1378{ 1390{
1379 u32 head, tail; 1391 u32 head;
1392 u32 tail;
1380 unsigned pollflag = 0; 1393 unsigned pollflag = 0;
1381 struct ipath_devdata *dd; 1394 struct ipath_devdata *dd;
1382 1395
1383 dd = pd->port_dd; 1396 dd = pd->port_dd;
1384 1397
1398 /* variable access in ipath_poll_hdrqfull() needs this */
1399 rmb();
1400 pollflag = ipath_poll_hdrqfull(pd);
1401
1385 head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port); 1402 head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port);
1386 tail = *(volatile u64 *)pd->port_rcvhdrtail_kvaddr; 1403 tail = *(volatile u64 *)pd->port_rcvhdrtail_kvaddr;
1387 1404
1388 if (test_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag)) { 1405 if (head != tail)
1389 pollflag |= POLLERR;
1390 clear_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag);
1391 }
1392
1393 if (tail != head ||
1394 test_bit(IPATH_PORT_WAITING_RCV, &pd->int_flag)) {
1395 pollflag |= POLLIN | POLLRDNORM; 1406 pollflag |= POLLIN | POLLRDNORM;
1396 clear_bit(IPATH_PORT_WAITING_RCV, &pd->int_flag); 1407 else {
1397 } 1408 /* this saves a spin_lock/unlock in interrupt handler */
1398
1399 if (!pollflag) {
1400 set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag); 1409 set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
1401 if (pd->poll_type & IPATH_POLL_TYPE_OVERFLOW) 1410 /* flush waiting flag so we don't miss an event */
1402 set_bit(IPATH_PORT_WAITING_OVERFLOW, 1411 wmb();
1403 &pd->port_flag);
1404 1412
1405 set_bit(pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT, 1413 set_bit(pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT,
1406 &dd->ipath_rcvctrl); 1414 &dd->ipath_rcvctrl);
@@ -1917,6 +1925,12 @@ static int ipath_do_user_init(struct file *fp,
1917 ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n", 1925 ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
1918 pd->port_port, head32); 1926 pd->port_port, head32);
1919 pd->port_tidcursor = 0; /* start at beginning after open */ 1927 pd->port_tidcursor = 0; /* start at beginning after open */
1928
1929 /* initialize poll variables... */
1930 pd->port_urgent = 0;
1931 pd->port_urgent_poll = 0;
1932 pd->port_hdrqfull_poll = pd->port_hdrqfull;
1933
1920 /* 1934 /*
1921 * now enable the port; the tail registers will be written to memory 1935 * now enable the port; the tail registers will be written to memory
1922 * by the chip as soon as it sees the write to 1936 * by the chip as soon as it sees the write to
@@ -2039,9 +2053,11 @@ static int ipath_close(struct inode *in, struct file *fp)
2039 2053
2040 if (dd->ipath_kregbase) { 2054 if (dd->ipath_kregbase) {
2041 int i; 2055 int i;
2042 /* atomically clear receive enable port. */ 2056 /* atomically clear receive enable port and intr avail. */
2043 clear_bit(INFINIPATH_R_PORTENABLE_SHIFT + port, 2057 clear_bit(INFINIPATH_R_PORTENABLE_SHIFT + port,
2044 &dd->ipath_rcvctrl); 2058 &dd->ipath_rcvctrl);
2059 clear_bit(pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT,
2060 &dd->ipath_rcvctrl);
2045 ipath_write_kreg( dd, dd->ipath_kregs->kr_rcvctrl, 2061 ipath_write_kreg( dd, dd->ipath_kregs->kr_rcvctrl,
2046 dd->ipath_rcvctrl); 2062 dd->ipath_rcvctrl);
2047 /* and read back from chip to be sure that nothing 2063 /* and read back from chip to be sure that nothing
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index 2e689b974e1f..262c25db05cd 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -130,175 +130,6 @@ static const struct file_operations atomic_counters_ops = {
130 .read = atomic_counters_read, 130 .read = atomic_counters_read,
131}; 131};
132 132
133static ssize_t atomic_node_info_read(struct file *file, char __user *buf,
134 size_t count, loff_t *ppos)
135{
136 u32 nodeinfo[10];
137 struct ipath_devdata *dd;
138 u64 guid;
139
140 dd = file->f_path.dentry->d_inode->i_private;
141
142 guid = be64_to_cpu(dd->ipath_guid);
143
144 nodeinfo[0] = /* BaseVersion is SMA */
145 /* ClassVersion is SMA */
146 (1 << 8) /* NodeType */
147 | (1 << 0); /* NumPorts */
148 nodeinfo[1] = (u32) (guid >> 32);
149 nodeinfo[2] = (u32) (guid & 0xffffffff);
150 /* PortGUID == SystemImageGUID for us */
151 nodeinfo[3] = nodeinfo[1];
152 /* PortGUID == SystemImageGUID for us */
153 nodeinfo[4] = nodeinfo[2];
154 /* PortGUID == NodeGUID for us */
155 nodeinfo[5] = nodeinfo[3];
156 /* PortGUID == NodeGUID for us */
157 nodeinfo[6] = nodeinfo[4];
158 nodeinfo[7] = (4 << 16) /* we support 4 pkeys */
159 | (dd->ipath_deviceid << 0);
160 /* our chip version as 16 bits major, 16 bits minor */
161 nodeinfo[8] = dd->ipath_minrev | (dd->ipath_majrev << 16);
162 nodeinfo[9] = (dd->ipath_unit << 24) | (dd->ipath_vendorid << 0);
163
164 return simple_read_from_buffer(buf, count, ppos, nodeinfo,
165 sizeof nodeinfo);
166}
167
168static const struct file_operations atomic_node_info_ops = {
169 .read = atomic_node_info_read,
170};
171
172static ssize_t atomic_port_info_read(struct file *file, char __user *buf,
173 size_t count, loff_t *ppos)
174{
175 u32 portinfo[13];
176 u32 tmp, tmp2;
177 struct ipath_devdata *dd;
178
179 dd = file->f_path.dentry->d_inode->i_private;
180
181 /* so we only initialize non-zero fields. */
182 memset(portinfo, 0, sizeof portinfo);
183
184 /*
185 * Notimpl yet M_Key (64)
186 * Notimpl yet GID (64)
187 */
188
189 portinfo[4] = (dd->ipath_lid << 16);
190
191 /*
192 * Notimpl yet SMLID.
193 * CapabilityMask is 0, we don't support any of these
194 * DiagCode is 0; we don't store any diag info for now Notimpl yet
195 * M_KeyLeasePeriod (we don't support M_Key)
196 */
197
198 /* LocalPortNum is whichever port number they ask for */
199 portinfo[7] = (dd->ipath_unit << 24)
200 /* LinkWidthEnabled */
201 | (2 << 16)
202 /* LinkWidthSupported (really 2, but not IB valid) */
203 | (3 << 8)
204 /* LinkWidthActive */
205 | (2 << 0);
206 tmp = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
207 tmp2 = 5;
208 if (tmp == IPATH_IBSTATE_INIT)
209 tmp = 2;
210 else if (tmp == IPATH_IBSTATE_ARM)
211 tmp = 3;
212 else if (tmp == IPATH_IBSTATE_ACTIVE)
213 tmp = 4;
214 else {
215 tmp = 0; /* down */
216 tmp2 = tmp & 0xf;
217 }
218
219 portinfo[8] = (1 << 28) /* LinkSpeedSupported */
220 | (tmp << 24) /* PortState */
221 | (tmp2 << 20) /* PortPhysicalState */
222 | (2 << 16)
223
224 /* LinkDownDefaultState */
225 /* M_KeyProtectBits == 0 */
226 /* NotImpl yet LMC == 0 (we can support all values) */
227 | (1 << 4) /* LinkSpeedActive */
228 | (1 << 0); /* LinkSpeedEnabled */
229 switch (dd->ipath_ibmtu) {
230 case 4096:
231 tmp = 5;
232 break;
233 case 2048:
234 tmp = 4;
235 break;
236 case 1024:
237 tmp = 3;
238 break;
239 case 512:
240 tmp = 2;
241 break;
242 case 256:
243 tmp = 1;
244 break;
245 default: /* oops, something is wrong */
246 ipath_dbg("Problem, ipath_ibmtu 0x%x not a valid IB MTU, "
247 "treat as 2048\n", dd->ipath_ibmtu);
248 tmp = 4;
249 break;
250 }
251 portinfo[9] = (tmp << 28)
252 /* NeighborMTU */
253 /* Notimpl MasterSMSL */
254 | (1 << 20)
255
256 /* VLCap */
257 /* Notimpl InitType (actually, an SMA decision) */
258 /* VLHighLimit is 0 (only one VL) */
259 ; /* VLArbitrationHighCap is 0 (only one VL) */
260 /*
261 * Note: the chips support a maximum MTU of 4096, but the driver
262 * hasn't implemented this feature yet, so set the maximum
263 * to 2048.
264 */
265 portinfo[10] = /* VLArbitrationLowCap is 0 (only one VL) */
266 /* InitTypeReply is SMA decision */
267 (4 << 16) /* MTUCap 2048 */
268 | (7 << 13) /* VLStallCount */
269 | (0x1f << 8) /* HOQLife */
270 | (1 << 4)
271
272 /* OperationalVLs 0 */
273 /* PartitionEnforcementInbound */
274 /* PartitionEnforcementOutbound not enforced */
275 /* FilterRawinbound not enforced */
276 ; /* FilterRawOutbound not enforced */
277 /* M_KeyViolations are not counted by hardware, SMA can count */
278 tmp = ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
279 /* P_KeyViolations are counted by hardware. */
280 portinfo[11] = ((tmp & 0xffff) << 0);
281 portinfo[12] =
282 /* Q_KeyViolations are not counted by hardware */
283 (1 << 8)
284
285 /* GUIDCap */
286 /* SubnetTimeOut handled by SMA */
287 /* RespTimeValue handled by SMA */
288 ;
289 /* LocalPhyErrors are programmed to max */
290 portinfo[12] |= (0xf << 20)
291 | (0xf << 16) /* OverRunErrors are programmed to max */
292 ;
293
294 return simple_read_from_buffer(buf, count, ppos, portinfo,
295 sizeof portinfo);
296}
297
298static const struct file_operations atomic_port_info_ops = {
299 .read = atomic_port_info_read,
300};
301
302static ssize_t flash_read(struct file *file, char __user *buf, 133static ssize_t flash_read(struct file *file, char __user *buf,
303 size_t count, loff_t *ppos) 134 size_t count, loff_t *ppos)
304{ 135{
@@ -427,22 +258,6 @@ static int create_device_files(struct super_block *sb,
427 goto bail; 258 goto bail;
428 } 259 }
429 260
430 ret = create_file("node_info", S_IFREG|S_IRUGO, dir, &tmp,
431 &atomic_node_info_ops, dd);
432 if (ret) {
433 printk(KERN_ERR "create_file(%s/node_info) "
434 "failed: %d\n", unit, ret);
435 goto bail;
436 }
437
438 ret = create_file("port_info", S_IFREG|S_IRUGO, dir, &tmp,
439 &atomic_port_info_ops, dd);
440 if (ret) {
441 printk(KERN_ERR "create_file(%s/port_info) "
442 "failed: %d\n", unit, ret);
443 goto bail;
444 }
445
446 ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp, 261 ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp,
447 &flash_ops, dd); 262 &flash_ops, dd);
448 if (ret) { 263 if (ret) {
@@ -508,8 +323,6 @@ static int remove_device_files(struct super_block *sb,
508 } 323 }
509 324
510 remove_file(dir, "flash"); 325 remove_file(dir, "flash");
511 remove_file(dir, "port_info");
512 remove_file(dir, "node_info");
513 remove_file(dir, "atomic_counters"); 326 remove_file(dir, "atomic_counters");
514 d_delete(dir); 327 d_delete(dir);
515 ret = simple_rmdir(root->d_inode, dir); 328 ret = simple_rmdir(root->d_inode, dir);
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 650745d83fac..ddbebe4bdb27 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -631,56 +631,35 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
631{ 631{
632 char *n = NULL; 632 char *n = NULL;
633 u8 boardrev = dd->ipath_boardrev; 633 u8 boardrev = dd->ipath_boardrev;
634 int ret; 634 int ret = 0;
635 635
636 switch (boardrev) { 636 switch (boardrev) {
637 case 4: /* Ponderosa is one of the bringup boards */
638 n = "Ponderosa";
639 break;
640 case 5: 637 case 5:
641 /* 638 /*
642 * original production board; two production levels, with 639 * original production board; two production levels, with
643 * different serial number ranges. See ipath_ht_early_init() for 640 * different serial number ranges. See ipath_ht_early_init() for
644 * case where we enable IPATH_GPIO_INTR for later serial # range. 641 * case where we enable IPATH_GPIO_INTR for later serial # range.
642 * Original 112* serial number is no longer supported.
645 */ 643 */
646 n = "InfiniPath_QHT7040"; 644 n = "InfiniPath_QHT7040";
647 break; 645 break;
648 case 6:
649 n = "OEM_Board_3";
650 break;
651 case 7: 646 case 7:
652 /* small form factor production board */ 647 /* small form factor production board */
653 n = "InfiniPath_QHT7140"; 648 n = "InfiniPath_QHT7140";
654 break; 649 break;
655 case 8:
656 n = "LS/X-1";
657 break;
658 case 9: /* Comstock bringup test board */
659 n = "Comstock";
660 break;
661 case 10:
662 n = "OEM_Board_2";
663 break;
664 case 11:
665 n = "InfiniPath_HT-470"; /* obsoleted */
666 break;
667 case 12:
668 n = "OEM_Board_4";
669 break;
670 default: /* don't know, just print the number */ 650 default: /* don't know, just print the number */
671 ipath_dev_err(dd, "Don't yet know about board " 651 ipath_dev_err(dd, "Don't yet know about board "
672 "with ID %u\n", boardrev); 652 "with ID %u\n", boardrev);
673 snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u", 653 snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u",
674 boardrev); 654 boardrev);
655 ret = 1;
675 break; 656 break;
676 } 657 }
677 if (n) 658 if (n)
678 snprintf(name, namelen, "%s", n); 659 snprintf(name, namelen, "%s", n);
679 660
680 if (dd->ipath_boardrev != 6 && dd->ipath_boardrev != 7 && 661 if (ret) {
681 dd->ipath_boardrev != 11) {
682 ipath_dev_err(dd, "Unsupported InfiniPath board %s!\n", name); 662 ipath_dev_err(dd, "Unsupported InfiniPath board %s!\n", name);
683 ret = 1;
684 goto bail; 663 goto bail;
685 } 664 }
686 if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || 665 if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 ||
@@ -1554,10 +1533,25 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
1554 * can use GPIO interrupts. They have serial #'s starting 1533 * can use GPIO interrupts. They have serial #'s starting
1555 * with 128, rather than 112. 1534 * with 128, rather than 112.
1556 */ 1535 */
1557 dd->ipath_flags |= IPATH_GPIO_INTR; 1536 if (dd->ipath_serial[0] == '1' &&
1558 } else 1537 dd->ipath_serial[1] == '2' &&
1559 ipath_dev_err(dd, "Unsupported InfiniPath serial " 1538 dd->ipath_serial[2] == '8')
1560 "number %.16s!\n", dd->ipath_serial); 1539 dd->ipath_flags |= IPATH_GPIO_INTR;
1540 else {
1541 ipath_dev_err(dd, "Unsupported InfiniPath board "
1542 "(serial number %.16s)!\n",
1543 dd->ipath_serial);
1544 return 1;
1545 }
1546 }
1547
1548 if (dd->ipath_minrev >= 4) {
1549 /* Rev4+ reports extra errors via internal GPIO pins */
1550 dd->ipath_flags |= IPATH_GPIO_ERRINTRS;
1551 dd->ipath_gpio_mask |= IPATH_GPIO_ERRINTR_MASK;
1552 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
1553 dd->ipath_gpio_mask);
1554 }
1561 1555
1562 return 0; 1556 return 0;
1563} 1557}
@@ -1592,7 +1586,10 @@ static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase)
1592 struct ipath_base_info *kinfo = kbase; 1586 struct ipath_base_info *kinfo = kbase;
1593 1587
1594 kinfo->spi_runtime_flags |= IPATH_RUNTIME_HT | 1588 kinfo->spi_runtime_flags |= IPATH_RUNTIME_HT |
1595 IPATH_RUNTIME_RCVHDR_COPY; 1589 IPATH_RUNTIME_PIO_REGSWAPPED;
1590
1591 if (pd->port_dd->ipath_minrev < 4)
1592 kinfo->spi_runtime_flags |= IPATH_RUNTIME_RCVHDR_COPY;
1596 1593
1597 return 0; 1594 return 0;
1598} 1595}
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index 5b6ac9a1a709..0103d6f4847b 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -1143,11 +1143,14 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
1143 pa |= 2 << 29; 1143 pa |= 2 << 29;
1144 } 1144 }
1145 1145
1146 /* workaround chip bug 9437 by writing each TID twice 1146 /*
1147 * and holding a spinlock around the writes, so they don't 1147 * Workaround chip bug 9437 by writing the scratch register
1148 * intermix with other TID (eager or expected) writes 1148 * before and after the TID, and with an io write barrier.
1149 * Unfortunately, this call can be done from interrupt level 1149 * We use a spinlock around the writes, so they can't intermix
1150 * for the port 0 eager TIDs, so we have to use irqsave 1150 * with other TID (eager or expected) writes (the chip bug
1151 * is triggered by back to back TID writes). Unfortunately, this
1152 * call can be done from interrupt level for the port 0 eager TIDs,
1153 * so we have to use irqsave locks.
1151 */ 1154 */
1152 spin_lock_irqsave(&dd->ipath_tid_lock, flags); 1155 spin_lock_irqsave(&dd->ipath_tid_lock, flags);
1153 ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xfeeddeaf); 1156 ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xfeeddeaf);
@@ -1273,6 +1276,8 @@ static void ipath_pe_tidtemplate(struct ipath_devdata *dd)
1273static int ipath_pe_early_init(struct ipath_devdata *dd) 1276static int ipath_pe_early_init(struct ipath_devdata *dd)
1274{ 1277{
1275 dd->ipath_flags |= IPATH_4BYTE_TID; 1278 dd->ipath_flags |= IPATH_4BYTE_TID;
1279 if (ipath_unordered_wc())
1280 dd->ipath_flags |= IPATH_PIO_FLUSH_WC;
1276 1281
1277 /* 1282 /*
1278 * For openfabrics, we need to be able to handle an IB header of 1283 * For openfabrics, we need to be able to handle an IB header of
@@ -1343,7 +1348,8 @@ static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
1343 dd = pd->port_dd; 1348 dd = pd->port_dd;
1344 1349
1345done: 1350done:
1346 kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE; 1351 kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE |
1352 IPATH_RUNTIME_FORCE_PIOAVAIL | IPATH_RUNTIME_PIO_REGSWAPPED;
1347 return 0; 1353 return 0;
1348} 1354}
1349 1355
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index b29fe7e9b11a..6a5dd5cd773d 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -275,6 +275,16 @@ static char *ib_linkstate(u32 linkstate)
275 return ret; 275 return ret;
276} 276}
277 277
278void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev)
279{
280 struct ib_event event;
281
282 event.device = &dd->verbs_dev->ibdev;
283 event.element.port_num = 1;
284 event.event = ev;
285 ib_dispatch_event(&event);
286}
287
278static void handle_e_ibstatuschanged(struct ipath_devdata *dd, 288static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
279 ipath_err_t errs, int noprint) 289 ipath_err_t errs, int noprint)
280{ 290{
@@ -373,6 +383,8 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
373 dd->ipath_ibpollcnt = 0; /* some state other than 2 or 3 */ 383 dd->ipath_ibpollcnt = 0; /* some state other than 2 or 3 */
374 ipath_stats.sps_iblink++; 384 ipath_stats.sps_iblink++;
375 if (ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) { 385 if (ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) {
386 if (dd->ipath_flags & IPATH_LINKACTIVE)
387 signal_ib_event(dd, IB_EVENT_PORT_ERR);
376 dd->ipath_flags |= IPATH_LINKDOWN; 388 dd->ipath_flags |= IPATH_LINKDOWN;
377 dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT 389 dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
378 | IPATH_LINKACTIVE | 390 | IPATH_LINKACTIVE |
@@ -405,7 +417,10 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
405 *dd->ipath_statusp |= 417 *dd->ipath_statusp |=
406 IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF; 418 IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
407 dd->ipath_f_setextled(dd, lstate, ltstate); 419 dd->ipath_f_setextled(dd, lstate, ltstate);
420 signal_ib_event(dd, IB_EVENT_PORT_ACTIVE);
408 } else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_INIT) { 421 } else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_INIT) {
422 if (dd->ipath_flags & IPATH_LINKACTIVE)
423 signal_ib_event(dd, IB_EVENT_PORT_ERR);
409 /* 424 /*
410 * set INIT and DOWN. Down is checked by most of the other 425 * set INIT and DOWN. Down is checked by most of the other
411 * code, but INIT is useful to know in a few places. 426 * code, but INIT is useful to know in a few places.
@@ -418,6 +433,8 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
418 | IPATH_STATUS_IB_READY); 433 | IPATH_STATUS_IB_READY);
419 dd->ipath_f_setextled(dd, lstate, ltstate); 434 dd->ipath_f_setextled(dd, lstate, ltstate);
420 } else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_ARM) { 435 } else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_ARM) {
436 if (dd->ipath_flags & IPATH_LINKACTIVE)
437 signal_ib_event(dd, IB_EVENT_PORT_ERR);
421 dd->ipath_flags |= IPATH_LINKARMED; 438 dd->ipath_flags |= IPATH_LINKARMED;
422 dd->ipath_flags &= 439 dd->ipath_flags &=
423 ~(IPATH_LINKUNK | IPATH_LINKDOWN | IPATH_LINKINIT | 440 ~(IPATH_LINKUNK | IPATH_LINKDOWN | IPATH_LINKINIT |
@@ -688,17 +705,9 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
688 chkerrpkts = 1; 705 chkerrpkts = 1;
689 dd->ipath_lastrcvhdrqtails[i] = tl; 706 dd->ipath_lastrcvhdrqtails[i] = tl;
690 pd->port_hdrqfull++; 707 pd->port_hdrqfull++;
691 if (test_bit(IPATH_PORT_WAITING_OVERFLOW, 708 /* flush hdrqfull so that poll() sees it */
692 &pd->port_flag)) { 709 wmb();
693 clear_bit( 710 wake_up_interruptible(&pd->port_wait);
694 IPATH_PORT_WAITING_OVERFLOW,
695 &pd->port_flag);
696 set_bit(
697 IPATH_PORT_WAITING_OVERFLOW,
698 &pd->int_flag);
699 wake_up_interruptible(
700 &pd->port_wait);
701 }
702 } 711 }
703 } 712 }
704 } 713 }
@@ -960,6 +969,8 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat)
960 int i; 969 int i;
961 int rcvdint = 0; 970 int rcvdint = 0;
962 971
972 /* test_bit below needs this... */
973 rmb();
963 portr = ((istat >> INFINIPATH_I_RCVAVAIL_SHIFT) & 974 portr = ((istat >> INFINIPATH_I_RCVAVAIL_SHIFT) &
964 dd->ipath_i_rcvavail_mask) 975 dd->ipath_i_rcvavail_mask)
965 | ((istat >> INFINIPATH_I_RCVURG_SHIFT) & 976 | ((istat >> INFINIPATH_I_RCVURG_SHIFT) &
@@ -967,22 +978,15 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat)
967 for (i = 1; i < dd->ipath_cfgports; i++) { 978 for (i = 1; i < dd->ipath_cfgports; i++) {
968 struct ipath_portdata *pd = dd->ipath_pd[i]; 979 struct ipath_portdata *pd = dd->ipath_pd[i];
969 if (portr & (1 << i) && pd && pd->port_cnt) { 980 if (portr & (1 << i) && pd && pd->port_cnt) {
970 if (test_bit(IPATH_PORT_WAITING_RCV, 981 if (test_and_clear_bit(IPATH_PORT_WAITING_RCV,
971 &pd->port_flag)) { 982 &pd->port_flag)) {
972 clear_bit(IPATH_PORT_WAITING_RCV,
973 &pd->port_flag);
974 set_bit(IPATH_PORT_WAITING_RCV,
975 &pd->int_flag);
976 clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT, 983 clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT,
977 &dd->ipath_rcvctrl); 984 &dd->ipath_rcvctrl);
978 wake_up_interruptible(&pd->port_wait); 985 wake_up_interruptible(&pd->port_wait);
979 rcvdint = 1; 986 rcvdint = 1;
980 } else if (test_bit(IPATH_PORT_WAITING_URG, 987 } else if (test_and_clear_bit(IPATH_PORT_WAITING_URG,
981 &pd->port_flag)) { 988 &pd->port_flag)) {
982 clear_bit(IPATH_PORT_WAITING_URG, 989 pd->port_urgent++;
983 &pd->port_flag);
984 set_bit(IPATH_PORT_WAITING_URG,
985 &pd->int_flag);
986 wake_up_interruptible(&pd->port_wait); 990 wake_up_interruptible(&pd->port_wait);
987 } 991 }
988 } 992 }
@@ -1085,8 +1089,8 @@ irqreturn_t ipath_intr(int irq, void *data)
1085 * GPIO_2 indicates (on some HT4xx boards) that a packet 1089 * GPIO_2 indicates (on some HT4xx boards) that a packet
1086 * has arrived for Port 0. Checking for this 1090 * has arrived for Port 0. Checking for this
1087 * is controlled by flag IPATH_GPIO_INTR. 1091 * is controlled by flag IPATH_GPIO_INTR.
1088 * GPIO_3..5 on IBA6120 Rev2 chips indicate errors 1092 * GPIO_3..5 on IBA6120 Rev2 and IBA6110 Rev4 chips indicate
1089 * that we need to count. Checking for this 1093 * errors that we need to count. Checking for this
1090 * is controlled by flag IPATH_GPIO_ERRINTRS. 1094 * is controlled by flag IPATH_GPIO_ERRINTRS.
1091 */ 1095 */
1092 u32 gpiostatus; 1096 u32 gpiostatus;
@@ -1137,10 +1141,8 @@ irqreturn_t ipath_intr(int irq, void *data)
1137 /* 1141 /*
1138 * Some unexpected bits remain. If they could have 1142 * Some unexpected bits remain. If they could have
1139 * caused the interrupt, complain and clear. 1143 * caused the interrupt, complain and clear.
1140 * MEA: this is almost certainly non-ideal. 1144 * To avoid repetition of this condition, also clear
1141 * we should look into auto-disable of unexpected 1145 * the mask. It is almost certainly due to error.
1142 * GPIO interrupts, possibly on a "three strikes"
1143 * basis.
1144 */ 1146 */
1145 const u32 mask = (u32) dd->ipath_gpio_mask; 1147 const u32 mask = (u32) dd->ipath_gpio_mask;
1146 1148
@@ -1148,6 +1150,10 @@ irqreturn_t ipath_intr(int irq, void *data)
1148 ipath_dbg("Unexpected GPIO IRQ bits %x\n", 1150 ipath_dbg("Unexpected GPIO IRQ bits %x\n",
1149 gpiostatus & mask); 1151 gpiostatus & mask);
1150 to_clear |= (gpiostatus & mask); 1152 to_clear |= (gpiostatus & mask);
1153 dd->ipath_gpio_mask &= ~(gpiostatus & mask);
1154 ipath_write_kreg(dd,
1155 dd->ipath_kregs->kr_gpio_mask,
1156 dd->ipath_gpio_mask);
1151 } 1157 }
1152 } 1158 }
1153 if (to_clear) { 1159 if (to_clear) {
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 7a7966f7e4ff..8786dd7922e4 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -42,6 +42,7 @@
42#include <linux/pci.h> 42#include <linux/pci.h>
43#include <linux/dma-mapping.h> 43#include <linux/dma-mapping.h>
44#include <asm/io.h> 44#include <asm/io.h>
45#include <rdma/ib_verbs.h>
45 46
46#include "ipath_common.h" 47#include "ipath_common.h"
47#include "ipath_debug.h" 48#include "ipath_debug.h"
@@ -139,6 +140,12 @@ struct ipath_portdata {
139 u32 port_pionowait; 140 u32 port_pionowait;
140 /* total number of rcvhdrqfull errors */ 141 /* total number of rcvhdrqfull errors */
141 u32 port_hdrqfull; 142 u32 port_hdrqfull;
143 /* saved total number of rcvhdrqfull errors for poll edge trigger */
144 u32 port_hdrqfull_poll;
145 /* total number of polled urgent packets */
146 u32 port_urgent;
147 /* saved total number of polled urgent packets for poll edge trigger */
148 u32 port_urgent_poll;
142 /* pid of process using this port */ 149 /* pid of process using this port */
143 pid_t port_pid; 150 pid_t port_pid;
144 /* same size as task_struct .comm[] */ 151 /* same size as task_struct .comm[] */
@@ -724,6 +731,8 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
724#define IPATH_LINKACTIVE 0x200 731#define IPATH_LINKACTIVE 0x200
725 /* link current state is unknown */ 732 /* link current state is unknown */
726#define IPATH_LINKUNK 0x400 733#define IPATH_LINKUNK 0x400
734 /* Write combining flush needed for PIO */
735#define IPATH_PIO_FLUSH_WC 0x1000
727 /* no IB cable, or no device on IB cable */ 736 /* no IB cable, or no device on IB cable */
728#define IPATH_NOCABLE 0x4000 737#define IPATH_NOCABLE 0x4000
729 /* Supports port zero per packet receive interrupts via 738 /* Supports port zero per packet receive interrupts via
@@ -755,8 +764,6 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
755#define IPATH_PORT_MASTER_UNINIT 4 764#define IPATH_PORT_MASTER_UNINIT 4
756 /* waiting for an urgent packet to arrive */ 765 /* waiting for an urgent packet to arrive */
757#define IPATH_PORT_WAITING_URG 5 766#define IPATH_PORT_WAITING_URG 5
758 /* waiting for a header overflow */
759#define IPATH_PORT_WAITING_OVERFLOW 6
760 767
761/* free up any allocated data at closes */ 768/* free up any allocated data at closes */
762void ipath_free_data(struct ipath_portdata *dd); 769void ipath_free_data(struct ipath_portdata *dd);
@@ -769,6 +776,7 @@ void ipath_get_eeprom_info(struct ipath_devdata *);
769int ipath_update_eeprom_log(struct ipath_devdata *dd); 776int ipath_update_eeprom_log(struct ipath_devdata *dd);
770void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr); 777void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
771u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); 778u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
779void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev);
772 780
773/* 781/*
774 * Set LED override, only the two LSBs have "public" meaning, but 782 * Set LED override, only the two LSBs have "public" meaning, but
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
index d61c03044545..3d1432d1e3f4 100644
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -245,7 +245,7 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
245 245
246 /* Only return the mkey if the protection field allows it. */ 246 /* Only return the mkey if the protection field allows it. */
247 if (smp->method == IB_MGMT_METHOD_SET || dev->mkey == smp->mkey || 247 if (smp->method == IB_MGMT_METHOD_SET || dev->mkey == smp->mkey ||
248 (dev->mkeyprot_resv_lmc >> 6) == 0) 248 dev->mkeyprot == 0)
249 pip->mkey = dev->mkey; 249 pip->mkey = dev->mkey;
250 pip->gid_prefix = dev->gid_prefix; 250 pip->gid_prefix = dev->gid_prefix;
251 lid = dev->dd->ipath_lid; 251 lid = dev->dd->ipath_lid;
@@ -264,7 +264,7 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
264 pip->portphysstate_linkdown = 264 pip->portphysstate_linkdown =
265 (ipath_cvt_physportstate[ibcstat & 0xf] << 4) | 265 (ipath_cvt_physportstate[ibcstat & 0xf] << 4) |
266 (get_linkdowndefaultstate(dev->dd) ? 1 : 2); 266 (get_linkdowndefaultstate(dev->dd) ? 1 : 2);
267 pip->mkeyprot_resv_lmc = dev->mkeyprot_resv_lmc; 267 pip->mkeyprot_resv_lmc = (dev->mkeyprot << 6) | dev->dd->ipath_lmc;
268 pip->linkspeedactive_enabled = 0x11; /* 2.5Gbps, 2.5Gbps */ 268 pip->linkspeedactive_enabled = 0x11; /* 2.5Gbps, 2.5Gbps */
269 switch (dev->dd->ipath_ibmtu) { 269 switch (dev->dd->ipath_ibmtu) {
270 case 4096: 270 case 4096:
@@ -401,7 +401,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
401 struct ib_port_info *pip = (struct ib_port_info *)smp->data; 401 struct ib_port_info *pip = (struct ib_port_info *)smp->data;
402 struct ib_event event; 402 struct ib_event event;
403 struct ipath_ibdev *dev; 403 struct ipath_ibdev *dev;
404 u32 flags; 404 struct ipath_devdata *dd;
405 char clientrereg = 0; 405 char clientrereg = 0;
406 u16 lid, smlid; 406 u16 lid, smlid;
407 u8 lwe; 407 u8 lwe;
@@ -415,6 +415,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
415 goto err; 415 goto err;
416 416
417 dev = to_idev(ibdev); 417 dev = to_idev(ibdev);
418 dd = dev->dd;
418 event.device = ibdev; 419 event.device = ibdev;
419 event.element.port_num = port; 420 event.element.port_num = port;
420 421
@@ -423,11 +424,12 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
423 dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period); 424 dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
424 425
425 lid = be16_to_cpu(pip->lid); 426 lid = be16_to_cpu(pip->lid);
426 if (lid != dev->dd->ipath_lid) { 427 if (dd->ipath_lid != lid ||
428 dd->ipath_lmc != (pip->mkeyprot_resv_lmc & 7)) {
427 /* Must be a valid unicast LID address. */ 429 /* Must be a valid unicast LID address. */
428 if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE) 430 if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE)
429 goto err; 431 goto err;
430 ipath_set_lid(dev->dd, lid, pip->mkeyprot_resv_lmc & 7); 432 ipath_set_lid(dd, lid, pip->mkeyprot_resv_lmc & 7);
431 event.event = IB_EVENT_LID_CHANGE; 433 event.event = IB_EVENT_LID_CHANGE;
432 ib_dispatch_event(&event); 434 ib_dispatch_event(&event);
433 } 435 }
@@ -461,18 +463,18 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
461 case 0: /* NOP */ 463 case 0: /* NOP */
462 break; 464 break;
463 case 1: /* SLEEP */ 465 case 1: /* SLEEP */
464 if (set_linkdowndefaultstate(dev->dd, 1)) 466 if (set_linkdowndefaultstate(dd, 1))
465 goto err; 467 goto err;
466 break; 468 break;
467 case 2: /* POLL */ 469 case 2: /* POLL */
468 if (set_linkdowndefaultstate(dev->dd, 0)) 470 if (set_linkdowndefaultstate(dd, 0))
469 goto err; 471 goto err;
470 break; 472 break;
471 default: 473 default:
472 goto err; 474 goto err;
473 } 475 }
474 476
475 dev->mkeyprot_resv_lmc = pip->mkeyprot_resv_lmc; 477 dev->mkeyprot = pip->mkeyprot_resv_lmc >> 6;
476 dev->vl_high_limit = pip->vl_high_limit; 478 dev->vl_high_limit = pip->vl_high_limit;
477 479
478 switch ((pip->neighbormtu_mastersmsl >> 4) & 0xF) { 480 switch ((pip->neighbormtu_mastersmsl >> 4) & 0xF) {
@@ -495,7 +497,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
495 /* XXX We have already partially updated our state! */ 497 /* XXX We have already partially updated our state! */
496 goto err; 498 goto err;
497 } 499 }
498 ipath_set_mtu(dev->dd, mtu); 500 ipath_set_mtu(dd, mtu);
499 501
500 dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF; 502 dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF;
501 503
@@ -511,16 +513,16 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
511 * later. 513 * later.
512 */ 514 */
513 if (pip->pkey_violations == 0) 515 if (pip->pkey_violations == 0)
514 dev->z_pkey_violations = ipath_get_cr_errpkey(dev->dd); 516 dev->z_pkey_violations = ipath_get_cr_errpkey(dd);
515 517
516 if (pip->qkey_violations == 0) 518 if (pip->qkey_violations == 0)
517 dev->qkey_violations = 0; 519 dev->qkey_violations = 0;
518 520
519 ore = pip->localphyerrors_overrunerrors; 521 ore = pip->localphyerrors_overrunerrors;
520 if (set_phyerrthreshold(dev->dd, (ore >> 4) & 0xF)) 522 if (set_phyerrthreshold(dd, (ore >> 4) & 0xF))
521 goto err; 523 goto err;
522 524
523 if (set_overrunthreshold(dev->dd, (ore & 0xF))) 525 if (set_overrunthreshold(dd, (ore & 0xF)))
524 goto err; 526 goto err;
525 527
526 dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F; 528 dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
@@ -538,7 +540,6 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
538 * is down or is being set to down. 540 * is down or is being set to down.
539 */ 541 */
540 state = pip->linkspeed_portstate & 0xF; 542 state = pip->linkspeed_portstate & 0xF;
541 flags = dev->dd->ipath_flags;
542 lstate = (pip->portphysstate_linkdown >> 4) & 0xF; 543 lstate = (pip->portphysstate_linkdown >> 4) & 0xF;
543 if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP)) 544 if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP))
544 goto err; 545 goto err;
@@ -554,7 +555,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
554 /* FALLTHROUGH */ 555 /* FALLTHROUGH */
555 case IB_PORT_DOWN: 556 case IB_PORT_DOWN:
556 if (lstate == 0) 557 if (lstate == 0)
557 if (get_linkdowndefaultstate(dev->dd)) 558 if (get_linkdowndefaultstate(dd))
558 lstate = IPATH_IB_LINKDOWN_SLEEP; 559 lstate = IPATH_IB_LINKDOWN_SLEEP;
559 else 560 else
560 lstate = IPATH_IB_LINKDOWN; 561 lstate = IPATH_IB_LINKDOWN;
@@ -566,27 +567,13 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
566 lstate = IPATH_IB_LINKDOWN_DISABLE; 567 lstate = IPATH_IB_LINKDOWN_DISABLE;
567 else 568 else
568 goto err; 569 goto err;
569 ipath_set_linkstate(dev->dd, lstate); 570 ipath_set_linkstate(dd, lstate);
570 if (flags & IPATH_LINKACTIVE) {
571 event.event = IB_EVENT_PORT_ERR;
572 ib_dispatch_event(&event);
573 }
574 break; 571 break;
575 case IB_PORT_ARMED: 572 case IB_PORT_ARMED:
576 if (!(flags & (IPATH_LINKINIT | IPATH_LINKACTIVE))) 573 ipath_set_linkstate(dd, IPATH_IB_LINKARM);
577 break;
578 ipath_set_linkstate(dev->dd, IPATH_IB_LINKARM);
579 if (flags & IPATH_LINKACTIVE) {
580 event.event = IB_EVENT_PORT_ERR;
581 ib_dispatch_event(&event);
582 }
583 break; 574 break;
584 case IB_PORT_ACTIVE: 575 case IB_PORT_ACTIVE:
585 if (!(flags & IPATH_LINKARMED)) 576 ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE);
586 break;
587 ipath_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE);
588 event.event = IB_EVENT_PORT_ACTIVE;
589 ib_dispatch_event(&event);
590 break; 577 break;
591 default: 578 default:
592 /* XXX We have already partially updated our state! */ 579 /* XXX We have already partially updated our state! */
@@ -1350,7 +1337,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
1350 if (dev->mkey_lease_timeout && jiffies >= dev->mkey_lease_timeout) { 1337 if (dev->mkey_lease_timeout && jiffies >= dev->mkey_lease_timeout) {
1351 /* Clear timeout and mkey protection field. */ 1338 /* Clear timeout and mkey protection field. */
1352 dev->mkey_lease_timeout = 0; 1339 dev->mkey_lease_timeout = 0;
1353 dev->mkeyprot_resv_lmc &= 0x3F; 1340 dev->mkeyprot = 0;
1354 } 1341 }
1355 1342
1356 /* 1343 /*
@@ -1361,7 +1348,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
1361 dev->mkey != smp->mkey && 1348 dev->mkey != smp->mkey &&
1362 (smp->method == IB_MGMT_METHOD_SET || 1349 (smp->method == IB_MGMT_METHOD_SET ||
1363 (smp->method == IB_MGMT_METHOD_GET && 1350 (smp->method == IB_MGMT_METHOD_GET &&
1364 (dev->mkeyprot_resv_lmc >> 7) != 0))) { 1351 dev->mkeyprot >= 2))) {
1365 if (dev->mkey_violations != 0xFFFF) 1352 if (dev->mkey_violations != 0xFFFF)
1366 ++dev->mkey_violations; 1353 ++dev->mkey_violations;
1367 if (dev->mkey_lease_timeout || 1354 if (dev->mkey_lease_timeout ||
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 1324b35ff1f8..6a41fdbc8e57 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -338,6 +338,7 @@ static void ipath_reset_qp(struct ipath_qp *qp)
338 qp->s_busy = 0; 338 qp->s_busy = 0;
339 qp->s_flags &= IPATH_S_SIGNAL_REQ_WR; 339 qp->s_flags &= IPATH_S_SIGNAL_REQ_WR;
340 qp->s_hdrwords = 0; 340 qp->s_hdrwords = 0;
341 qp->s_wqe = NULL;
341 qp->s_psn = 0; 342 qp->s_psn = 0;
342 qp->r_psn = 0; 343 qp->r_psn = 0;
343 qp->r_msn = 0; 344 qp->r_msn = 0;
@@ -376,13 +377,15 @@ static void ipath_reset_qp(struct ipath_qp *qp)
376 * @err: the receive completion error to signal if a RWQE is active 377 * @err: the receive completion error to signal if a RWQE is active
377 * 378 *
378 * Flushes both send and receive work queues. 379 * Flushes both send and receive work queues.
380 * Returns true if last WQE event should be generated.
379 * The QP s_lock should be held and interrupts disabled. 381 * The QP s_lock should be held and interrupts disabled.
380 */ 382 */
381 383
382void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) 384int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
383{ 385{
384 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 386 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
385 struct ib_wc wc; 387 struct ib_wc wc;
388 int ret = 0;
386 389
387 ipath_dbg("QP%d/%d in error state\n", 390 ipath_dbg("QP%d/%d in error state\n",
388 qp->ibqp.qp_num, qp->remote_qpn); 391 qp->ibqp.qp_num, qp->remote_qpn);
@@ -453,7 +456,10 @@ void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
453 wq->tail = tail; 456 wq->tail = tail;
454 457
455 spin_unlock(&qp->r_rq.lock); 458 spin_unlock(&qp->r_rq.lock);
456 } 459 } else if (qp->ibqp.event_handler)
460 ret = 1;
461
462 return ret;
457} 463}
458 464
459/** 465/**
@@ -472,6 +478,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
472 struct ipath_qp *qp = to_iqp(ibqp); 478 struct ipath_qp *qp = to_iqp(ibqp);
473 enum ib_qp_state cur_state, new_state; 479 enum ib_qp_state cur_state, new_state;
474 unsigned long flags; 480 unsigned long flags;
481 int lastwqe = 0;
475 int ret; 482 int ret;
476 483
477 spin_lock_irqsave(&qp->s_lock, flags); 484 spin_lock_irqsave(&qp->s_lock, flags);
@@ -531,7 +538,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
531 break; 538 break;
532 539
533 case IB_QPS_ERR: 540 case IB_QPS_ERR:
534 ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); 541 lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
535 break; 542 break;
536 543
537 default: 544 default:
@@ -590,6 +597,14 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
590 qp->state = new_state; 597 qp->state = new_state;
591 spin_unlock_irqrestore(&qp->s_lock, flags); 598 spin_unlock_irqrestore(&qp->s_lock, flags);
592 599
600 if (lastwqe) {
601 struct ib_event ev;
602
603 ev.device = qp->ibqp.device;
604 ev.element.qp = &qp->ibqp;
605 ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
606 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
607 }
593 ret = 0; 608 ret = 0;
594 goto bail; 609 goto bail;
595 610
@@ -751,6 +766,9 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
751 switch (init_attr->qp_type) { 766 switch (init_attr->qp_type) {
752 case IB_QPT_UC: 767 case IB_QPT_UC:
753 case IB_QPT_RC: 768 case IB_QPT_RC:
769 case IB_QPT_UD:
770 case IB_QPT_SMI:
771 case IB_QPT_GSI:
754 sz = sizeof(struct ipath_sge) * 772 sz = sizeof(struct ipath_sge) *
755 init_attr->cap.max_send_sge + 773 init_attr->cap.max_send_sge +
756 sizeof(struct ipath_swqe); 774 sizeof(struct ipath_swqe);
@@ -759,10 +777,6 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
759 ret = ERR_PTR(-ENOMEM); 777 ret = ERR_PTR(-ENOMEM);
760 goto bail; 778 goto bail;
761 } 779 }
762 /* FALLTHROUGH */
763 case IB_QPT_UD:
764 case IB_QPT_SMI:
765 case IB_QPT_GSI:
766 sz = sizeof(*qp); 780 sz = sizeof(*qp);
767 if (init_attr->srq) { 781 if (init_attr->srq) {
768 struct ipath_srq *srq = to_isrq(init_attr->srq); 782 struct ipath_srq *srq = to_isrq(init_attr->srq);
@@ -805,8 +819,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
805 spin_lock_init(&qp->r_rq.lock); 819 spin_lock_init(&qp->r_rq.lock);
806 atomic_set(&qp->refcount, 0); 820 atomic_set(&qp->refcount, 0);
807 init_waitqueue_head(&qp->wait); 821 init_waitqueue_head(&qp->wait);
808 tasklet_init(&qp->s_task, ipath_do_ruc_send, 822 tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp);
809 (unsigned long)qp);
810 INIT_LIST_HEAD(&qp->piowait); 823 INIT_LIST_HEAD(&qp->piowait);
811 INIT_LIST_HEAD(&qp->timerwait); 824 INIT_LIST_HEAD(&qp->timerwait);
812 qp->state = IB_QPS_RESET; 825 qp->state = IB_QPS_RESET;
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index 46744ea2babd..5c29b2bfea17 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -81,9 +81,8 @@ static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
81 * Note that we are in the responder's side of the QP context. 81 * Note that we are in the responder's side of the QP context.
82 * Note the QP s_lock must be held. 82 * Note the QP s_lock must be held.
83 */ 83 */
84static int ipath_make_rc_ack(struct ipath_qp *qp, 84static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp,
85 struct ipath_other_headers *ohdr, 85 struct ipath_other_headers *ohdr, u32 pmtu)
86 u32 pmtu, u32 *bth0p, u32 *bth2p)
87{ 86{
88 struct ipath_ack_entry *e; 87 struct ipath_ack_entry *e;
89 u32 hwords; 88 u32 hwords;
@@ -192,8 +191,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
192 } 191 }
193 qp->s_hdrwords = hwords; 192 qp->s_hdrwords = hwords;
194 qp->s_cur_size = len; 193 qp->s_cur_size = len;
195 *bth0p = bth0 | (1 << 22); /* Set M bit */ 194 ipath_make_ruc_header(dev, qp, ohdr, bth0, bth2);
196 *bth2p = bth2;
197 return 1; 195 return 1;
198 196
199bail: 197bail:
@@ -203,32 +201,39 @@ bail:
203/** 201/**
204 * ipath_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) 202 * ipath_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
205 * @qp: a pointer to the QP 203 * @qp: a pointer to the QP
206 * @ohdr: a pointer to the IB header being constructed
207 * @pmtu: the path MTU
208 * @bth0p: pointer to the BTH opcode word
209 * @bth2p: pointer to the BTH PSN word
210 * 204 *
211 * Return 1 if constructed; otherwise, return 0. 205 * Return 1 if constructed; otherwise, return 0.
212 * Note the QP s_lock must be held and interrupts disabled.
213 */ 206 */
214int ipath_make_rc_req(struct ipath_qp *qp, 207int ipath_make_rc_req(struct ipath_qp *qp)
215 struct ipath_other_headers *ohdr,
216 u32 pmtu, u32 *bth0p, u32 *bth2p)
217{ 208{
218 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 209 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
210 struct ipath_other_headers *ohdr;
219 struct ipath_sge_state *ss; 211 struct ipath_sge_state *ss;
220 struct ipath_swqe *wqe; 212 struct ipath_swqe *wqe;
221 u32 hwords; 213 u32 hwords;
222 u32 len; 214 u32 len;
223 u32 bth0; 215 u32 bth0;
224 u32 bth2; 216 u32 bth2;
217 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
225 char newreq; 218 char newreq;
219 unsigned long flags;
220 int ret = 0;
221
222 ohdr = &qp->s_hdr.u.oth;
223 if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
224 ohdr = &qp->s_hdr.u.l.oth;
225
226 /*
227 * The lock is needed to synchronize between the sending tasklet,
228 * the receive interrupt handler, and timeout resends.
229 */
230 spin_lock_irqsave(&qp->s_lock, flags);
226 231
227 /* Sending responses has higher priority over sending requests. */ 232 /* Sending responses has higher priority over sending requests. */
228 if ((qp->r_head_ack_queue != qp->s_tail_ack_queue || 233 if ((qp->r_head_ack_queue != qp->s_tail_ack_queue ||
229 (qp->s_flags & IPATH_S_ACK_PENDING) || 234 (qp->s_flags & IPATH_S_ACK_PENDING) ||
230 qp->s_ack_state != OP(ACKNOWLEDGE)) && 235 qp->s_ack_state != OP(ACKNOWLEDGE)) &&
231 ipath_make_rc_ack(qp, ohdr, pmtu, bth0p, bth2p)) 236 ipath_make_rc_ack(dev, qp, ohdr, pmtu))
232 goto done; 237 goto done;
233 238
234 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) || 239 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) ||
@@ -560,13 +565,12 @@ int ipath_make_rc_req(struct ipath_qp *qp,
560 qp->s_hdrwords = hwords; 565 qp->s_hdrwords = hwords;
561 qp->s_cur_sge = ss; 566 qp->s_cur_sge = ss;
562 qp->s_cur_size = len; 567 qp->s_cur_size = len;
563 *bth0p = bth0 | (qp->s_state << 24); 568 ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2);
564 *bth2p = bth2;
565done: 569done:
566 return 1; 570 ret = 1;
567
568bail: 571bail:
569 return 0; 572 spin_unlock_irqrestore(&qp->s_lock, flags);
573 return ret;
570} 574}
571 575
572/** 576/**
@@ -627,7 +631,7 @@ static void send_rc_ack(struct ipath_qp *qp)
627 /* 631 /*
628 * If we can send the ACK, clear the ACK state. 632 * If we can send the ACK, clear the ACK state.
629 */ 633 */
630 if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) { 634 if (ipath_verbs_send(qp, &hdr, hwords, NULL, 0) == 0) {
631 dev->n_unicast_xmit++; 635 dev->n_unicast_xmit++;
632 goto done; 636 goto done;
633 } 637 }
@@ -757,7 +761,9 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
757 wc->vendor_err = 0; 761 wc->vendor_err = 0;
758 wc->byte_len = 0; 762 wc->byte_len = 0;
759 wc->qp = &qp->ibqp; 763 wc->qp = &qp->ibqp;
764 wc->imm_data = 0;
760 wc->src_qp = qp->remote_qpn; 765 wc->src_qp = qp->remote_qpn;
766 wc->wc_flags = 0;
761 wc->pkey_index = 0; 767 wc->pkey_index = 0;
762 wc->slid = qp->remote_ah_attr.dlid; 768 wc->slid = qp->remote_ah_attr.dlid;
763 wc->sl = qp->remote_ah_attr.sl; 769 wc->sl = qp->remote_ah_attr.sl;
@@ -1041,7 +1047,9 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
1041 wc.vendor_err = 0; 1047 wc.vendor_err = 0;
1042 wc.byte_len = 0; 1048 wc.byte_len = 0;
1043 wc.qp = &qp->ibqp; 1049 wc.qp = &qp->ibqp;
1050 wc.imm_data = 0;
1044 wc.src_qp = qp->remote_qpn; 1051 wc.src_qp = qp->remote_qpn;
1052 wc.wc_flags = 0;
1045 wc.pkey_index = 0; 1053 wc.pkey_index = 0;
1046 wc.slid = qp->remote_ah_attr.dlid; 1054 wc.slid = qp->remote_ah_attr.dlid;
1047 wc.sl = qp->remote_ah_attr.sl; 1055 wc.sl = qp->remote_ah_attr.sl;
@@ -1454,6 +1462,19 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1454 goto send_ack; 1462 goto send_ack;
1455 } 1463 }
1456 /* 1464 /*
1465 * Try to send a simple ACK to work around a Mellanox bug
1466 * which doesn't accept a RDMA read response or atomic
1467 * response as an ACK for earlier SENDs or RDMA writes.
1468 */
1469 if (qp->r_head_ack_queue == qp->s_tail_ack_queue &&
1470 !(qp->s_flags & IPATH_S_ACK_PENDING) &&
1471 qp->s_ack_state == OP(ACKNOWLEDGE)) {
1472 spin_unlock_irqrestore(&qp->s_lock, flags);
1473 qp->r_nak_state = 0;
1474 qp->r_ack_psn = qp->s_ack_queue[i].psn - 1;
1475 goto send_ack;
1476 }
1477 /*
1457 * Resend the RDMA read or atomic op which 1478 * Resend the RDMA read or atomic op which
1458 * ACKs this duplicate request. 1479 * ACKs this duplicate request.
1459 */ 1480 */
@@ -1476,11 +1497,21 @@ send_ack:
1476static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) 1497static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
1477{ 1498{
1478 unsigned long flags; 1499 unsigned long flags;
1500 int lastwqe;
1479 1501
1480 spin_lock_irqsave(&qp->s_lock, flags); 1502 spin_lock_irqsave(&qp->s_lock, flags);
1481 qp->state = IB_QPS_ERR; 1503 qp->state = IB_QPS_ERR;
1482 ipath_error_qp(qp, err); 1504 lastwqe = ipath_error_qp(qp, err);
1483 spin_unlock_irqrestore(&qp->s_lock, flags); 1505 spin_unlock_irqrestore(&qp->s_lock, flags);
1506
1507 if (lastwqe) {
1508 struct ib_event ev;
1509
1510 ev.device = qp->ibqp.device;
1511 ev.element.qp = &qp->ibqp;
1512 ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
1513 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1514 }
1484} 1515}
1485 1516
1486static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n) 1517static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n)
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index c69c25239443..4b6b7ee8e5c1 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -31,6 +31,8 @@
31 * SOFTWARE. 31 * SOFTWARE.
32 */ 32 */
33 33
34#include <linux/spinlock.h>
35
34#include "ipath_verbs.h" 36#include "ipath_verbs.h"
35#include "ipath_kernel.h" 37#include "ipath_kernel.h"
36 38
@@ -106,27 +108,30 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp)
106 spin_unlock_irqrestore(&dev->pending_lock, flags); 108 spin_unlock_irqrestore(&dev->pending_lock, flags);
107} 109}
108 110
109static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe) 111/**
112 * ipath_init_sge - Validate a RWQE and fill in the SGE state
113 * @qp: the QP
114 *
115 * Return 1 if OK.
116 */
117int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
118 u32 *lengthp, struct ipath_sge_state *ss)
110{ 119{
111 int user = to_ipd(qp->ibqp.pd)->user;
112 int i, j, ret; 120 int i, j, ret;
113 struct ib_wc wc; 121 struct ib_wc wc;
114 122
115 qp->r_len = 0; 123 *lengthp = 0;
116 for (i = j = 0; i < wqe->num_sge; i++) { 124 for (i = j = 0; i < wqe->num_sge; i++) {
117 if (wqe->sg_list[i].length == 0) 125 if (wqe->sg_list[i].length == 0)
118 continue; 126 continue;
119 /* Check LKEY */ 127 /* Check LKEY */
120 if ((user && wqe->sg_list[i].lkey == 0) || 128 if (!ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge,
121 !ipath_lkey_ok(qp, &qp->r_sg_list[j], &wqe->sg_list[i], 129 &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
122 IB_ACCESS_LOCAL_WRITE))
123 goto bad_lkey; 130 goto bad_lkey;
124 qp->r_len += wqe->sg_list[i].length; 131 *lengthp += wqe->sg_list[i].length;
125 j++; 132 j++;
126 } 133 }
127 qp->r_sge.sge = qp->r_sg_list[0]; 134 ss->num_sge = j;
128 qp->r_sge.sg_list = qp->r_sg_list + 1;
129 qp->r_sge.num_sge = j;
130 ret = 1; 135 ret = 1;
131 goto bail; 136 goto bail;
132 137
@@ -172,6 +177,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
172 u32 tail; 177 u32 tail;
173 int ret; 178 int ret;
174 179
180 qp->r_sge.sg_list = qp->r_sg_list;
181
175 if (qp->ibqp.srq) { 182 if (qp->ibqp.srq) {
176 srq = to_isrq(qp->ibqp.srq); 183 srq = to_isrq(qp->ibqp.srq);
177 handler = srq->ibsrq.event_handler; 184 handler = srq->ibsrq.event_handler;
@@ -199,7 +206,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
199 wqe = get_rwqe_ptr(rq, tail); 206 wqe = get_rwqe_ptr(rq, tail);
200 if (++tail >= rq->size) 207 if (++tail >= rq->size)
201 tail = 0; 208 tail = 0;
202 } while (!wr_id_only && !init_sge(qp, wqe)); 209 } while (!wr_id_only && !ipath_init_sge(qp, wqe, &qp->r_len,
210 &qp->r_sge));
203 qp->r_wr_id = wqe->wr_id; 211 qp->r_wr_id = wqe->wr_id;
204 wq->tail = tail; 212 wq->tail = tail;
205 213
@@ -239,9 +247,9 @@ bail:
239 247
240/** 248/**
241 * ipath_ruc_loopback - handle UC and RC lookback requests 249 * ipath_ruc_loopback - handle UC and RC lookback requests
242 * @sqp: the loopback QP 250 * @sqp: the sending QP
243 * 251 *
244 * This is called from ipath_do_uc_send() or ipath_do_rc_send() to 252 * This is called from ipath_do_send() to
245 * forward a WQE addressed to the same HCA. 253 * forward a WQE addressed to the same HCA.
246 * Note that although we are single threaded due to the tasklet, we still 254 * Note that although we are single threaded due to the tasklet, we still
247 * have to protect against post_send(). We don't have to worry about 255 * have to protect against post_send(). We don't have to worry about
@@ -450,40 +458,18 @@ again:
450 wc.byte_len = wqe->length; 458 wc.byte_len = wqe->length;
451 wc.qp = &qp->ibqp; 459 wc.qp = &qp->ibqp;
452 wc.src_qp = qp->remote_qpn; 460 wc.src_qp = qp->remote_qpn;
453 /* XXX do we know which pkey matched? Only needed for GSI. */
454 wc.pkey_index = 0; 461 wc.pkey_index = 0;
455 wc.slid = qp->remote_ah_attr.dlid; 462 wc.slid = qp->remote_ah_attr.dlid;
456 wc.sl = qp->remote_ah_attr.sl; 463 wc.sl = qp->remote_ah_attr.sl;
457 wc.dlid_path_bits = 0; 464 wc.dlid_path_bits = 0;
465 wc.port_num = 1;
458 /* Signal completion event if the solicited bit is set. */ 466 /* Signal completion event if the solicited bit is set. */
459 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 467 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
460 wqe->wr.send_flags & IB_SEND_SOLICITED); 468 wqe->wr.send_flags & IB_SEND_SOLICITED);
461 469
462send_comp: 470send_comp:
463 sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; 471 sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
464 472 ipath_send_complete(sqp, wqe, IB_WC_SUCCESS);
465 if (!(sqp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
466 (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
467 wc.wr_id = wqe->wr.wr_id;
468 wc.status = IB_WC_SUCCESS;
469 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
470 wc.vendor_err = 0;
471 wc.byte_len = wqe->length;
472 wc.qp = &sqp->ibqp;
473 wc.src_qp = 0;
474 wc.pkey_index = 0;
475 wc.slid = 0;
476 wc.sl = 0;
477 wc.dlid_path_bits = 0;
478 wc.port_num = 0;
479 ipath_cq_enter(to_icq(sqp->ibqp.send_cq), &wc, 0);
480 }
481
482 /* Update s_last now that we are finished with the SWQE */
483 spin_lock_irqsave(&sqp->s_lock, flags);
484 if (++sqp->s_last >= sqp->s_size)
485 sqp->s_last = 0;
486 spin_unlock_irqrestore(&sqp->s_lock, flags);
487 goto again; 473 goto again;
488 474
489done: 475done:
@@ -491,13 +477,11 @@ done:
491 wake_up(&qp->wait); 477 wake_up(&qp->wait);
492} 478}
493 479
494static int want_buffer(struct ipath_devdata *dd) 480static void want_buffer(struct ipath_devdata *dd)
495{ 481{
496 set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl); 482 set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
497 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 483 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
498 dd->ipath_sendctrl); 484 dd->ipath_sendctrl);
499
500 return 0;
501} 485}
502 486
503/** 487/**
@@ -507,14 +491,11 @@ static int want_buffer(struct ipath_devdata *dd)
507 * 491 *
508 * Called when we run out of PIO buffers. 492 * Called when we run out of PIO buffers.
509 */ 493 */
510static void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev) 494static void ipath_no_bufs_available(struct ipath_qp *qp,
495 struct ipath_ibdev *dev)
511{ 496{
512 unsigned long flags; 497 unsigned long flags;
513 498
514 spin_lock_irqsave(&dev->pending_lock, flags);
515 if (list_empty(&qp->piowait))
516 list_add_tail(&qp->piowait, &dev->piowait);
517 spin_unlock_irqrestore(&dev->pending_lock, flags);
518 /* 499 /*
519 * Note that as soon as want_buffer() is called and 500 * Note that as soon as want_buffer() is called and
520 * possibly before it returns, ipath_ib_piobufavail() 501 * possibly before it returns, ipath_ib_piobufavail()
@@ -524,101 +505,14 @@ static void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev
524 * We leave the busy flag set so that another post send doesn't 505 * We leave the busy flag set so that another post send doesn't
525 * try to put the same QP on the piowait list again. 506 * try to put the same QP on the piowait list again.
526 */ 507 */
508 spin_lock_irqsave(&dev->pending_lock, flags);
509 list_add_tail(&qp->piowait, &dev->piowait);
510 spin_unlock_irqrestore(&dev->pending_lock, flags);
527 want_buffer(dev->dd); 511 want_buffer(dev->dd);
528 dev->n_piowait++; 512 dev->n_piowait++;
529} 513}
530 514
531/** 515/**
532 * ipath_post_ruc_send - post RC and UC sends
533 * @qp: the QP to post on
534 * @wr: the work request to send
535 */
536int ipath_post_ruc_send(struct ipath_qp *qp, struct ib_send_wr *wr)
537{
538 struct ipath_swqe *wqe;
539 unsigned long flags;
540 u32 next;
541 int i, j;
542 int acc;
543 int ret;
544
545 /*
546 * Don't allow RDMA reads or atomic operations on UC or
547 * undefined operations.
548 * Make sure buffer is large enough to hold the result for atomics.
549 */
550 if (qp->ibqp.qp_type == IB_QPT_UC) {
551 if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) {
552 ret = -EINVAL;
553 goto bail;
554 }
555 } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) {
556 ret = -EINVAL;
557 goto bail;
558 } else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
559 (wr->num_sge == 0 ||
560 wr->sg_list[0].length < sizeof(u64) ||
561 wr->sg_list[0].addr & (sizeof(u64) - 1))) {
562 ret = -EINVAL;
563 goto bail;
564 } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) {
565 ret = -EINVAL;
566 goto bail;
567 }
568 /* IB spec says that num_sge == 0 is OK. */
569 if (wr->num_sge > qp->s_max_sge) {
570 ret = -ENOMEM;
571 goto bail;
572 }
573 spin_lock_irqsave(&qp->s_lock, flags);
574 next = qp->s_head + 1;
575 if (next >= qp->s_size)
576 next = 0;
577 if (next == qp->s_last) {
578 spin_unlock_irqrestore(&qp->s_lock, flags);
579 ret = -EINVAL;
580 goto bail;
581 }
582
583 wqe = get_swqe_ptr(qp, qp->s_head);
584 wqe->wr = *wr;
585 wqe->ssn = qp->s_ssn++;
586 wqe->sg_list[0].mr = NULL;
587 wqe->sg_list[0].vaddr = NULL;
588 wqe->sg_list[0].length = 0;
589 wqe->sg_list[0].sge_length = 0;
590 wqe->length = 0;
591 acc = wr->opcode >= IB_WR_RDMA_READ ? IB_ACCESS_LOCAL_WRITE : 0;
592 for (i = 0, j = 0; i < wr->num_sge; i++) {
593 if (to_ipd(qp->ibqp.pd)->user && wr->sg_list[i].lkey == 0) {
594 spin_unlock_irqrestore(&qp->s_lock, flags);
595 ret = -EINVAL;
596 goto bail;
597 }
598 if (wr->sg_list[i].length == 0)
599 continue;
600 if (!ipath_lkey_ok(qp, &wqe->sg_list[j], &wr->sg_list[i],
601 acc)) {
602 spin_unlock_irqrestore(&qp->s_lock, flags);
603 ret = -EINVAL;
604 goto bail;
605 }
606 wqe->length += wr->sg_list[i].length;
607 j++;
608 }
609 wqe->wr.num_sge = j;
610 qp->s_head = next;
611 spin_unlock_irqrestore(&qp->s_lock, flags);
612
613 ipath_do_ruc_send((unsigned long) qp);
614
615 ret = 0;
616
617bail:
618 return ret;
619}
620
621/**
622 * ipath_make_grh - construct a GRH header 516 * ipath_make_grh - construct a GRH header
623 * @dev: a pointer to the ipath device 517 * @dev: a pointer to the ipath device
624 * @hdr: a pointer to the GRH header being constructed 518 * @hdr: a pointer to the GRH header being constructed
@@ -648,39 +542,66 @@ u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
648 return sizeof(struct ib_grh) / sizeof(u32); 542 return sizeof(struct ib_grh) / sizeof(u32);
649} 543}
650 544
545void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp,
546 struct ipath_other_headers *ohdr,
547 u32 bth0, u32 bth2)
548{
549 u16 lrh0;
550 u32 nwords;
551 u32 extra_bytes;
552
553 /* Construct the header. */
554 extra_bytes = -qp->s_cur_size & 3;
555 nwords = (qp->s_cur_size + extra_bytes) >> 2;
556 lrh0 = IPATH_LRH_BTH;
557 if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
558 qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
559 &qp->remote_ah_attr.grh,
560 qp->s_hdrwords, nwords);
561 lrh0 = IPATH_LRH_GRH;
562 }
563 lrh0 |= qp->remote_ah_attr.sl << 4;
564 qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
565 qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
566 qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
567 qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
568 bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
569 bth0 |= extra_bytes << 20;
570 ohdr->bth[0] = cpu_to_be32(bth0 | (1 << 22));
571 ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
572 ohdr->bth[2] = cpu_to_be32(bth2);
573}
574
651/** 575/**
652 * ipath_do_ruc_send - perform a send on an RC or UC QP 576 * ipath_do_send - perform a send on a QP
653 * @data: contains a pointer to the QP 577 * @data: contains a pointer to the QP
654 * 578 *
655 * Process entries in the send work queue until credit or queue is 579 * Process entries in the send work queue until credit or queue is
656 * exhausted. Only allow one CPU to send a packet per QP (tasklet). 580 * exhausted. Only allow one CPU to send a packet per QP (tasklet).
657 * Otherwise, after we drop the QP s_lock, two threads could send 581 * Otherwise, two threads could send packets out of order.
658 * packets out of order.
659 */ 582 */
660void ipath_do_ruc_send(unsigned long data) 583void ipath_do_send(unsigned long data)
661{ 584{
662 struct ipath_qp *qp = (struct ipath_qp *)data; 585 struct ipath_qp *qp = (struct ipath_qp *)data;
663 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 586 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
664 unsigned long flags; 587 int (*make_req)(struct ipath_qp *qp);
665 u16 lrh0;
666 u32 nwords;
667 u32 extra_bytes;
668 u32 bth0;
669 u32 bth2;
670 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
671 struct ipath_other_headers *ohdr;
672 588
673 if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy)) 589 if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy))
674 goto bail; 590 goto bail;
675 591
676 if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) { 592 if ((qp->ibqp.qp_type == IB_QPT_RC ||
593 qp->ibqp.qp_type == IB_QPT_UC) &&
594 qp->remote_ah_attr.dlid == dev->dd->ipath_lid) {
677 ipath_ruc_loopback(qp); 595 ipath_ruc_loopback(qp);
678 goto clear; 596 goto clear;
679 } 597 }
680 598
681 ohdr = &qp->s_hdr.u.oth; 599 if (qp->ibqp.qp_type == IB_QPT_RC)
682 if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) 600 make_req = ipath_make_rc_req;
683 ohdr = &qp->s_hdr.u.l.oth; 601 else if (qp->ibqp.qp_type == IB_QPT_UC)
602 make_req = ipath_make_uc_req;
603 else
604 make_req = ipath_make_ud_req;
684 605
685again: 606again:
686 /* Check for a constructed packet to be sent. */ 607 /* Check for a constructed packet to be sent. */
@@ -689,9 +610,8 @@ again:
689 * If no PIO bufs are available, return. An interrupt will 610 * If no PIO bufs are available, return. An interrupt will
690 * call ipath_ib_piobufavail() when one is available. 611 * call ipath_ib_piobufavail() when one is available.
691 */ 612 */
692 if (ipath_verbs_send(dev->dd, qp->s_hdrwords, 613 if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,
693 (u32 *) &qp->s_hdr, qp->s_cur_size, 614 qp->s_cur_sge, qp->s_cur_size)) {
694 qp->s_cur_sge)) {
695 ipath_no_bufs_available(qp, dev); 615 ipath_no_bufs_available(qp, dev);
696 goto bail; 616 goto bail;
697 } 617 }
@@ -700,54 +620,42 @@ again:
700 qp->s_hdrwords = 0; 620 qp->s_hdrwords = 0;
701 } 621 }
702 622
703 /* 623 if (make_req(qp))
704 * The lock is needed to synchronize between setting 624 goto again;
705 * qp->s_ack_state, resend timer, and post_send(). 625clear:
706 */ 626 clear_bit(IPATH_S_BUSY, &qp->s_busy);
707 spin_lock_irqsave(&qp->s_lock, flags); 627bail:;
708 628}
709 if (!((qp->ibqp.qp_type == IB_QPT_RC) ?
710 ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) :
711 ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) {
712 /*
713 * Clear the busy bit before unlocking to avoid races with
714 * adding new work queue items and then failing to process
715 * them.
716 */
717 clear_bit(IPATH_S_BUSY, &qp->s_busy);
718 spin_unlock_irqrestore(&qp->s_lock, flags);
719 goto bail;
720 }
721 629
722 spin_unlock_irqrestore(&qp->s_lock, flags); 630void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
631 enum ib_wc_status status)
632{
633 u32 last = qp->s_last;
723 634
724 /* Construct the header. */ 635 if (++last == qp->s_size)
725 extra_bytes = (4 - qp->s_cur_size) & 3; 636 last = 0;
726 nwords = (qp->s_cur_size + extra_bytes) >> 2; 637 qp->s_last = last;
727 lrh0 = IPATH_LRH_BTH;
728 if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
729 qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
730 &qp->remote_ah_attr.grh,
731 qp->s_hdrwords, nwords);
732 lrh0 = IPATH_LRH_GRH;
733 }
734 lrh0 |= qp->remote_ah_attr.sl << 4;
735 qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
736 qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
737 qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords +
738 SIZE_OF_CRC);
739 qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
740 bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
741 bth0 |= extra_bytes << 20;
742 ohdr->bth[0] = cpu_to_be32(bth0);
743 ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
744 ohdr->bth[2] = cpu_to_be32(bth2);
745 638
746 /* Check for more work to do. */ 639 /* See ch. 11.2.4.1 and 10.7.3.1 */
747 goto again; 640 if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
641 (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
642 status != IB_WC_SUCCESS) {
643 struct ib_wc wc;
748 644
749clear: 645 wc.wr_id = wqe->wr.wr_id;
750 clear_bit(IPATH_S_BUSY, &qp->s_busy); 646 wc.status = status;
751bail: 647 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
752 return; 648 wc.vendor_err = 0;
649 wc.byte_len = wqe->length;
650 wc.imm_data = 0;
651 wc.qp = &qp->ibqp;
652 wc.src_qp = 0;
653 wc.wc_flags = 0;
654 wc.pkey_index = 0;
655 wc.slid = 0;
656 wc.sl = 0;
657 wc.dlid_path_bits = 0;
658 wc.port_num = 0;
659 ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
660 }
753} 661}
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
index bae4f56f7271..f0271415cd5b 100644
--- a/drivers/infiniband/hw/ipath/ipath_stats.c
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -55,7 +55,6 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
55 u64 val64; 55 u64 val64;
56 unsigned long t0, t1; 56 unsigned long t0, t1;
57 u64 ret; 57 u64 ret;
58 unsigned long flags;
59 58
60 t0 = jiffies; 59 t0 = jiffies;
61 /* If fast increment counters are only 32 bits, snapshot them, 60 /* If fast increment counters are only 32 bits, snapshot them,
@@ -92,18 +91,12 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
92 if (creg == dd->ipath_cregs->cr_wordsendcnt) { 91 if (creg == dd->ipath_cregs->cr_wordsendcnt) {
93 if (val != dd->ipath_lastsword) { 92 if (val != dd->ipath_lastsword) {
94 dd->ipath_sword += val - dd->ipath_lastsword; 93 dd->ipath_sword += val - dd->ipath_lastsword;
95 spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
96 dd->ipath_traffic_wds += val - dd->ipath_lastsword;
97 spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
98 dd->ipath_lastsword = val; 94 dd->ipath_lastsword = val;
99 } 95 }
100 val64 = dd->ipath_sword; 96 val64 = dd->ipath_sword;
101 } else if (creg == dd->ipath_cregs->cr_wordrcvcnt) { 97 } else if (creg == dd->ipath_cregs->cr_wordrcvcnt) {
102 if (val != dd->ipath_lastrword) { 98 if (val != dd->ipath_lastrword) {
103 dd->ipath_rword += val - dd->ipath_lastrword; 99 dd->ipath_rword += val - dd->ipath_lastrword;
104 spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
105 dd->ipath_traffic_wds += val - dd->ipath_lastrword;
106 spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
107 dd->ipath_lastrword = val; 100 dd->ipath_lastrword = val;
108 } 101 }
109 val64 = dd->ipath_rword; 102 val64 = dd->ipath_rword;
@@ -247,6 +240,7 @@ void ipath_get_faststats(unsigned long opaque)
247 u32 val; 240 u32 val;
248 static unsigned cnt; 241 static unsigned cnt;
249 unsigned long flags; 242 unsigned long flags;
243 u64 traffic_wds;
250 244
251 /* 245 /*
252 * don't access the chip while running diags, or memory diags can 246 * don't access the chip while running diags, or memory diags can
@@ -262,12 +256,13 @@ void ipath_get_faststats(unsigned long opaque)
262 * exceeding a threshold, so we need to check the word-counts 256 * exceeding a threshold, so we need to check the word-counts
263 * even if they are 64-bit. 257 * even if they are 64-bit.
264 */ 258 */
265 ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); 259 traffic_wds = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt) +
266 ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); 260 ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
267 spin_lock_irqsave(&dd->ipath_eep_st_lock, flags); 261 spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
268 if (dd->ipath_traffic_wds >= IPATH_TRAFFIC_ACTIVE_THRESHOLD) 262 traffic_wds -= dd->ipath_traffic_wds;
263 dd->ipath_traffic_wds += traffic_wds;
264 if (traffic_wds >= IPATH_TRAFFIC_ACTIVE_THRESHOLD)
269 atomic_add(5, &dd->ipath_active_time); /* S/B #define */ 265 atomic_add(5, &dd->ipath_active_time); /* S/B #define */
270 dd->ipath_traffic_wds = 0;
271 spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags); 266 spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
272 267
273 if (dd->ipath_flags & IPATH_32BITCOUNTERS) { 268 if (dd->ipath_flags & IPATH_32BITCOUNTERS) {
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
index 16238cd3a036..e1ad7cfc21fd 100644
--- a/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -163,6 +163,42 @@ static ssize_t show_boardversion(struct device *dev,
163 return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_boardversion); 163 return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_boardversion);
164} 164}
165 165
166static ssize_t show_lmc(struct device *dev,
167 struct device_attribute *attr,
168 char *buf)
169{
170 struct ipath_devdata *dd = dev_get_drvdata(dev);
171
172 return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_lmc);
173}
174
175static ssize_t store_lmc(struct device *dev,
176 struct device_attribute *attr,
177 const char *buf,
178 size_t count)
179{
180 struct ipath_devdata *dd = dev_get_drvdata(dev);
181 u16 lmc = 0;
182 int ret;
183
184 ret = ipath_parse_ushort(buf, &lmc);
185 if (ret < 0)
186 goto invalid;
187
188 if (lmc > 7) {
189 ret = -EINVAL;
190 goto invalid;
191 }
192
193 ipath_set_lid(dd, dd->ipath_lid, lmc);
194
195 goto bail;
196invalid:
197 ipath_dev_err(dd, "attempt to set invalid LMC %u\n", lmc);
198bail:
199 return ret;
200}
201
166static ssize_t show_lid(struct device *dev, 202static ssize_t show_lid(struct device *dev,
167 struct device_attribute *attr, 203 struct device_attribute *attr,
168 char *buf) 204 char *buf)
@@ -190,7 +226,7 @@ static ssize_t store_lid(struct device *dev,
190 goto invalid; 226 goto invalid;
191 } 227 }
192 228
193 ipath_set_lid(dd, lid, 0); 229 ipath_set_lid(dd, lid, dd->ipath_lmc);
194 230
195 goto bail; 231 goto bail;
196invalid: 232invalid:
@@ -648,6 +684,7 @@ static struct attribute_group driver_attr_group = {
648}; 684};
649 685
650static DEVICE_ATTR(guid, S_IWUSR | S_IRUGO, show_guid, store_guid); 686static DEVICE_ATTR(guid, S_IWUSR | S_IRUGO, show_guid, store_guid);
687static DEVICE_ATTR(lmc, S_IWUSR | S_IRUGO, show_lmc, store_lmc);
651static DEVICE_ATTR(lid, S_IWUSR | S_IRUGO, show_lid, store_lid); 688static DEVICE_ATTR(lid, S_IWUSR | S_IRUGO, show_lid, store_lid);
652static DEVICE_ATTR(link_state, S_IWUSR, NULL, store_link_state); 689static DEVICE_ATTR(link_state, S_IWUSR, NULL, store_link_state);
653static DEVICE_ATTR(mlid, S_IWUSR | S_IRUGO, show_mlid, store_mlid); 690static DEVICE_ATTR(mlid, S_IWUSR | S_IRUGO, show_mlid, store_mlid);
@@ -667,6 +704,7 @@ static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL);
667 704
668static struct attribute *dev_attributes[] = { 705static struct attribute *dev_attributes[] = {
669 &dev_attr_guid.attr, 706 &dev_attr_guid.attr,
707 &dev_attr_lmc.attr,
670 &dev_attr_lid.attr, 708 &dev_attr_lid.attr,
671 &dev_attr_link_state.attr, 709 &dev_attr_link_state.attr,
672 &dev_attr_mlid.attr, 710 &dev_attr_mlid.attr,
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index 8380fbc50d2c..2dd8de20d221 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -37,72 +37,40 @@
37/* cut down ridiculously long IB macro names */ 37/* cut down ridiculously long IB macro names */
38#define OP(x) IB_OPCODE_UC_##x 38#define OP(x) IB_OPCODE_UC_##x
39 39
40static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe,
41 struct ib_wc *wc)
42{
43 if (++qp->s_last == qp->s_size)
44 qp->s_last = 0;
45 if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
46 (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
47 wc->wr_id = wqe->wr.wr_id;
48 wc->status = IB_WC_SUCCESS;
49 wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
50 wc->vendor_err = 0;
51 wc->byte_len = wqe->length;
52 wc->qp = &qp->ibqp;
53 wc->src_qp = qp->remote_qpn;
54 wc->pkey_index = 0;
55 wc->slid = qp->remote_ah_attr.dlid;
56 wc->sl = qp->remote_ah_attr.sl;
57 wc->dlid_path_bits = 0;
58 wc->port_num = 0;
59 ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 0);
60 }
61}
62
63/** 40/**
64 * ipath_make_uc_req - construct a request packet (SEND, RDMA write) 41 * ipath_make_uc_req - construct a request packet (SEND, RDMA write)
65 * @qp: a pointer to the QP 42 * @qp: a pointer to the QP
66 * @ohdr: a pointer to the IB header being constructed
67 * @pmtu: the path MTU
68 * @bth0p: pointer to the BTH opcode word
69 * @bth2p: pointer to the BTH PSN word
70 * 43 *
71 * Return 1 if constructed; otherwise, return 0. 44 * Return 1 if constructed; otherwise, return 0.
72 * Note the QP s_lock must be held and interrupts disabled.
73 */ 45 */
74int ipath_make_uc_req(struct ipath_qp *qp, 46int ipath_make_uc_req(struct ipath_qp *qp)
75 struct ipath_other_headers *ohdr,
76 u32 pmtu, u32 *bth0p, u32 *bth2p)
77{ 47{
48 struct ipath_other_headers *ohdr;
78 struct ipath_swqe *wqe; 49 struct ipath_swqe *wqe;
79 u32 hwords; 50 u32 hwords;
80 u32 bth0; 51 u32 bth0;
81 u32 len; 52 u32 len;
82 struct ib_wc wc; 53 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
54 int ret = 0;
83 55
84 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) 56 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK))
85 goto done; 57 goto done;
86 58
59 ohdr = &qp->s_hdr.u.oth;
60 if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
61 ohdr = &qp->s_hdr.u.l.oth;
62
87 /* header size in 32-bit words LRH+BTH = (8+12)/4. */ 63 /* header size in 32-bit words LRH+BTH = (8+12)/4. */
88 hwords = 5; 64 hwords = 5;
89 bth0 = 1 << 22; /* Set M bit */ 65 bth0 = 1 << 22; /* Set M bit */
90 66
91 /* Get the next send request. */ 67 /* Get the next send request. */
92 wqe = get_swqe_ptr(qp, qp->s_last); 68 wqe = get_swqe_ptr(qp, qp->s_cur);
69 qp->s_wqe = NULL;
93 switch (qp->s_state) { 70 switch (qp->s_state) {
94 default: 71 default:
95 /*
96 * Signal the completion of the last send
97 * (if there is one).
98 */
99 if (qp->s_last != qp->s_tail) {
100 complete_last_send(qp, wqe, &wc);
101 wqe = get_swqe_ptr(qp, qp->s_last);
102 }
103
104 /* Check if send work queue is empty. */ 72 /* Check if send work queue is empty. */
105 if (qp->s_tail == qp->s_head) 73 if (qp->s_cur == qp->s_head)
106 goto done; 74 goto done;
107 /* 75 /*
108 * Start a new request. 76 * Start a new request.
@@ -131,6 +99,9 @@ int ipath_make_uc_req(struct ipath_qp *qp,
131 } 99 }
132 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 100 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
133 bth0 |= 1 << 23; 101 bth0 |= 1 << 23;
102 qp->s_wqe = wqe;
103 if (++qp->s_cur >= qp->s_size)
104 qp->s_cur = 0;
134 break; 105 break;
135 106
136 case IB_WR_RDMA_WRITE: 107 case IB_WR_RDMA_WRITE:
@@ -157,13 +128,14 @@ int ipath_make_uc_req(struct ipath_qp *qp,
157 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 128 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
158 bth0 |= 1 << 23; 129 bth0 |= 1 << 23;
159 } 130 }
131 qp->s_wqe = wqe;
132 if (++qp->s_cur >= qp->s_size)
133 qp->s_cur = 0;
160 break; 134 break;
161 135
162 default: 136 default:
163 goto done; 137 goto done;
164 } 138 }
165 if (++qp->s_tail >= qp->s_size)
166 qp->s_tail = 0;
167 break; 139 break;
168 140
169 case OP(SEND_FIRST): 141 case OP(SEND_FIRST):
@@ -185,6 +157,9 @@ int ipath_make_uc_req(struct ipath_qp *qp,
185 } 157 }
186 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 158 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
187 bth0 |= 1 << 23; 159 bth0 |= 1 << 23;
160 qp->s_wqe = wqe;
161 if (++qp->s_cur >= qp->s_size)
162 qp->s_cur = 0;
188 break; 163 break;
189 164
190 case OP(RDMA_WRITE_FIRST): 165 case OP(RDMA_WRITE_FIRST):
@@ -207,18 +182,22 @@ int ipath_make_uc_req(struct ipath_qp *qp,
207 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 182 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
208 bth0 |= 1 << 23; 183 bth0 |= 1 << 23;
209 } 184 }
185 qp->s_wqe = wqe;
186 if (++qp->s_cur >= qp->s_size)
187 qp->s_cur = 0;
210 break; 188 break;
211 } 189 }
212 qp->s_len -= len; 190 qp->s_len -= len;
213 qp->s_hdrwords = hwords; 191 qp->s_hdrwords = hwords;
214 qp->s_cur_sge = &qp->s_sge; 192 qp->s_cur_sge = &qp->s_sge;
215 qp->s_cur_size = len; 193 qp->s_cur_size = len;
216 *bth0p = bth0 | (qp->s_state << 24); 194 ipath_make_ruc_header(to_idev(qp->ibqp.device),
217 *bth2p = qp->s_next_psn++ & IPATH_PSN_MASK; 195 qp, ohdr, bth0 | (qp->s_state << 24),
218 return 1; 196 qp->s_next_psn++ & IPATH_PSN_MASK);
197 ret = 1;
219 198
220done: 199done:
221 return 0; 200 return ret;
222} 201}
223 202
224/** 203/**
@@ -485,6 +464,16 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
485 464
486 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): 465 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
487 rdma_last_imm: 466 rdma_last_imm:
467 if (header_in_data) {
468 wc.imm_data = *(__be32 *) data;
469 data += sizeof(__be32);
470 } else {
471 /* Immediate data comes after BTH */
472 wc.imm_data = ohdr->u.imm_data;
473 }
474 hdrsize += 4;
475 wc.wc_flags = IB_WC_WITH_IMM;
476
488 /* Get the number of bytes the message was padded by. */ 477 /* Get the number of bytes the message was padded by. */
489 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; 478 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
490 /* Check for invalid length. */ 479 /* Check for invalid length. */
@@ -505,16 +494,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
505 dev->n_pkt_drops++; 494 dev->n_pkt_drops++;
506 goto done; 495 goto done;
507 } 496 }
508 if (header_in_data) { 497 wc.byte_len = qp->r_len;
509 wc.imm_data = *(__be32 *) data;
510 data += sizeof(__be32);
511 } else {
512 /* Immediate data comes after BTH */
513 wc.imm_data = ohdr->u.imm_data;
514 }
515 hdrsize += 4;
516 wc.wc_flags = IB_WC_WITH_IMM;
517 wc.byte_len = 0;
518 goto last_imm; 498 goto last_imm;
519 499
520 case OP(RDMA_WRITE_LAST): 500 case OP(RDMA_WRITE_LAST):
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index f9a3338a5fb7..16a2a938b520 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -36,68 +36,17 @@
36#include "ipath_verbs.h" 36#include "ipath_verbs.h"
37#include "ipath_kernel.h" 37#include "ipath_kernel.h"
38 38
39static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
40 u32 *lengthp, struct ipath_sge_state *ss)
41{
42 int user = to_ipd(qp->ibqp.pd)->user;
43 int i, j, ret;
44 struct ib_wc wc;
45
46 *lengthp = 0;
47 for (i = j = 0; i < wqe->num_sge; i++) {
48 if (wqe->sg_list[i].length == 0)
49 continue;
50 /* Check LKEY */
51 if ((user && wqe->sg_list[i].lkey == 0) ||
52 !ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge,
53 &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
54 goto bad_lkey;
55 *lengthp += wqe->sg_list[i].length;
56 j++;
57 }
58 ss->num_sge = j;
59 ret = 1;
60 goto bail;
61
62bad_lkey:
63 wc.wr_id = wqe->wr_id;
64 wc.status = IB_WC_LOC_PROT_ERR;
65 wc.opcode = IB_WC_RECV;
66 wc.vendor_err = 0;
67 wc.byte_len = 0;
68 wc.imm_data = 0;
69 wc.qp = &qp->ibqp;
70 wc.src_qp = 0;
71 wc.wc_flags = 0;
72 wc.pkey_index = 0;
73 wc.slid = 0;
74 wc.sl = 0;
75 wc.dlid_path_bits = 0;
76 wc.port_num = 0;
77 /* Signal solicited completion event. */
78 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
79 ret = 0;
80bail:
81 return ret;
82}
83
84/** 39/**
85 * ipath_ud_loopback - handle send on loopback QPs 40 * ipath_ud_loopback - handle send on loopback QPs
86 * @sqp: the QP 41 * @sqp: the sending QP
87 * @ss: the SGE state 42 * @swqe: the send work request
88 * @length: the length of the data to send
89 * @wr: the work request
90 * @wc: the work completion entry
91 * 43 *
92 * This is called from ipath_post_ud_send() to forward a WQE addressed 44 * This is called from ipath_make_ud_req() to forward a WQE addressed
93 * to the same HCA. 45 * to the same HCA.
94 * Note that the receive interrupt handler may be calling ipath_ud_rcv() 46 * Note that the receive interrupt handler may be calling ipath_ud_rcv()
95 * while this is being called. 47 * while this is being called.
96 */ 48 */
97static void ipath_ud_loopback(struct ipath_qp *sqp, 49static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
98 struct ipath_sge_state *ss,
99 u32 length, struct ib_send_wr *wr,
100 struct ib_wc *wc)
101{ 50{
102 struct ipath_ibdev *dev = to_idev(sqp->ibqp.device); 51 struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
103 struct ipath_qp *qp; 52 struct ipath_qp *qp;
@@ -110,12 +59,18 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
110 struct ipath_rwq *wq; 59 struct ipath_rwq *wq;
111 struct ipath_rwqe *wqe; 60 struct ipath_rwqe *wqe;
112 void (*handler)(struct ib_event *, void *); 61 void (*handler)(struct ib_event *, void *);
62 struct ib_wc wc;
113 u32 tail; 63 u32 tail;
114 u32 rlen; 64 u32 rlen;
65 u32 length;
115 66
116 qp = ipath_lookup_qpn(&dev->qp_table, wr->wr.ud.remote_qpn); 67 qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn);
117 if (!qp) 68 if (!qp) {
118 return; 69 dev->n_pkt_drops++;
70 goto send_comp;
71 }
72
73 rsge.sg_list = NULL;
119 74
120 /* 75 /*
121 * Check that the qkey matches (except for QP0, see 9.6.1.4.1). 76 * Check that the qkey matches (except for QP0, see 9.6.1.4.1).
@@ -123,39 +78,34 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
123 * qkey from the QP context instead of the WR (see 10.2.5). 78 * qkey from the QP context instead of the WR (see 10.2.5).
124 */ 79 */
125 if (unlikely(qp->ibqp.qp_num && 80 if (unlikely(qp->ibqp.qp_num &&
126 ((int) wr->wr.ud.remote_qkey < 0 81 ((int) swqe->wr.wr.ud.remote_qkey < 0 ?
127 ? qp->qkey : wr->wr.ud.remote_qkey) != qp->qkey)) { 82 sqp->qkey : swqe->wr.wr.ud.remote_qkey) != qp->qkey)) {
128 /* XXX OK to lose a count once in a while. */ 83 /* XXX OK to lose a count once in a while. */
129 dev->qkey_violations++; 84 dev->qkey_violations++;
130 dev->n_pkt_drops++; 85 dev->n_pkt_drops++;
131 goto done; 86 goto drop;
132 } 87 }
133 88
134 /* 89 /*
135 * A GRH is expected to preceed the data even if not 90 * A GRH is expected to preceed the data even if not
136 * present on the wire. 91 * present on the wire.
137 */ 92 */
138 wc->byte_len = length + sizeof(struct ib_grh); 93 length = swqe->length;
94 wc.byte_len = length + sizeof(struct ib_grh);
139 95
140 if (wr->opcode == IB_WR_SEND_WITH_IMM) { 96 if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
141 wc->wc_flags = IB_WC_WITH_IMM; 97 wc.wc_flags = IB_WC_WITH_IMM;
142 wc->imm_data = wr->imm_data; 98 wc.imm_data = swqe->wr.imm_data;
143 } else { 99 } else {
144 wc->wc_flags = 0; 100 wc.wc_flags = 0;
145 wc->imm_data = 0; 101 wc.imm_data = 0;
146 } 102 }
147 103
148 if (wr->num_sge > 1) {
149 rsge.sg_list = kmalloc((wr->num_sge - 1) *
150 sizeof(struct ipath_sge),
151 GFP_ATOMIC);
152 } else
153 rsge.sg_list = NULL;
154
155 /* 104 /*
156 * Get the next work request entry to find where to put the data. 105 * This would be a lot simpler if we could call ipath_get_rwqe()
157 * Note that it is safe to drop the lock after changing rq->tail 106 * but that uses state that the receive interrupt handler uses
158 * since ipath_post_receive() won't fill the empty slot. 107 * so we would need to lock out receive interrupts while doing
108 * local loopback.
159 */ 109 */
160 if (qp->ibqp.srq) { 110 if (qp->ibqp.srq) {
161 srq = to_isrq(qp->ibqp.srq); 111 srq = to_isrq(qp->ibqp.srq);
@@ -167,32 +117,53 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
167 rq = &qp->r_rq; 117 rq = &qp->r_rq;
168 } 118 }
169 119
120 if (rq->max_sge > 1) {
121 /*
122 * XXX We could use GFP_KERNEL if ipath_do_send()
123 * was always called from the tasklet instead of
124 * from ipath_post_send().
125 */
126 rsge.sg_list = kmalloc((rq->max_sge - 1) *
127 sizeof(struct ipath_sge),
128 GFP_ATOMIC);
129 if (!rsge.sg_list) {
130 dev->n_pkt_drops++;
131 goto drop;
132 }
133 }
134
135 /*
136 * Get the next work request entry to find where to put the data.
137 * Note that it is safe to drop the lock after changing rq->tail
138 * since ipath_post_receive() won't fill the empty slot.
139 */
170 spin_lock_irqsave(&rq->lock, flags); 140 spin_lock_irqsave(&rq->lock, flags);
171 wq = rq->wq; 141 wq = rq->wq;
172 tail = wq->tail; 142 tail = wq->tail;
173 while (1) { 143 /* Validate tail before using it since it is user writable. */
174 if (unlikely(tail == wq->head)) { 144 if (tail >= rq->size)
175 spin_unlock_irqrestore(&rq->lock, flags); 145 tail = 0;
176 dev->n_pkt_drops++; 146 if (unlikely(tail == wq->head)) {
177 goto bail_sge; 147 spin_unlock_irqrestore(&rq->lock, flags);
178 } 148 dev->n_pkt_drops++;
179 /* Make sure entry is read after head index is read. */ 149 goto drop;
180 smp_rmb(); 150 }
181 wqe = get_rwqe_ptr(rq, tail); 151 wqe = get_rwqe_ptr(rq, tail);
182 if (++tail >= rq->size) 152 if (!ipath_init_sge(qp, wqe, &rlen, &rsge)) {
183 tail = 0; 153 spin_unlock_irqrestore(&rq->lock, flags);
184 if (init_sge(qp, wqe, &rlen, &rsge)) 154 dev->n_pkt_drops++;
185 break; 155 goto drop;
186 wq->tail = tail;
187 } 156 }
188 /* Silently drop packets which are too big. */ 157 /* Silently drop packets which are too big. */
189 if (wc->byte_len > rlen) { 158 if (wc.byte_len > rlen) {
190 spin_unlock_irqrestore(&rq->lock, flags); 159 spin_unlock_irqrestore(&rq->lock, flags);
191 dev->n_pkt_drops++; 160 dev->n_pkt_drops++;
192 goto bail_sge; 161 goto drop;
193 } 162 }
163 if (++tail >= rq->size)
164 tail = 0;
194 wq->tail = tail; 165 wq->tail = tail;
195 wc->wr_id = wqe->wr_id; 166 wc.wr_id = wqe->wr_id;
196 if (handler) { 167 if (handler) {
197 u32 n; 168 u32 n;
198 169
@@ -221,13 +192,13 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
221 } else 192 } else
222 spin_unlock_irqrestore(&rq->lock, flags); 193 spin_unlock_irqrestore(&rq->lock, flags);
223 194
224 ah_attr = &to_iah(wr->wr.ud.ah)->attr; 195 ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr;
225 if (ah_attr->ah_flags & IB_AH_GRH) { 196 if (ah_attr->ah_flags & IB_AH_GRH) {
226 ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh)); 197 ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh));
227 wc->wc_flags |= IB_WC_GRH; 198 wc.wc_flags |= IB_WC_GRH;
228 } else 199 } else
229 ipath_skip_sge(&rsge, sizeof(struct ib_grh)); 200 ipath_skip_sge(&rsge, sizeof(struct ib_grh));
230 sge = &ss->sge; 201 sge = swqe->sg_list;
231 while (length) { 202 while (length) {
232 u32 len = sge->length; 203 u32 len = sge->length;
233 204
@@ -241,8 +212,8 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
241 sge->length -= len; 212 sge->length -= len;
242 sge->sge_length -= len; 213 sge->sge_length -= len;
243 if (sge->sge_length == 0) { 214 if (sge->sge_length == 0) {
244 if (--ss->num_sge) 215 if (--swqe->wr.num_sge)
245 *sge = *ss->sg_list++; 216 sge++;
246 } else if (sge->length == 0 && sge->mr != NULL) { 217 } else if (sge->length == 0 && sge->mr != NULL) {
247 if (++sge->n >= IPATH_SEGSZ) { 218 if (++sge->n >= IPATH_SEGSZ) {
248 if (++sge->m >= sge->mr->mapsz) 219 if (++sge->m >= sge->mr->mapsz)
@@ -256,123 +227,60 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
256 } 227 }
257 length -= len; 228 length -= len;
258 } 229 }
259 wc->status = IB_WC_SUCCESS; 230 wc.status = IB_WC_SUCCESS;
260 wc->opcode = IB_WC_RECV; 231 wc.opcode = IB_WC_RECV;
261 wc->vendor_err = 0; 232 wc.vendor_err = 0;
262 wc->qp = &qp->ibqp; 233 wc.qp = &qp->ibqp;
263 wc->src_qp = sqp->ibqp.qp_num; 234 wc.src_qp = sqp->ibqp.qp_num;
264 /* XXX do we know which pkey matched? Only needed for GSI. */ 235 /* XXX do we know which pkey matched? Only needed for GSI. */
265 wc->pkey_index = 0; 236 wc.pkey_index = 0;
266 wc->slid = dev->dd->ipath_lid | 237 wc.slid = dev->dd->ipath_lid |
267 (ah_attr->src_path_bits & 238 (ah_attr->src_path_bits &
268 ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1)); 239 ((1 << dev->dd->ipath_lmc) - 1));
269 wc->sl = ah_attr->sl; 240 wc.sl = ah_attr->sl;
270 wc->dlid_path_bits = 241 wc.dlid_path_bits =
271 ah_attr->dlid & ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1); 242 ah_attr->dlid & ((1 << dev->dd->ipath_lmc) - 1);
243 wc.port_num = 1;
272 /* Signal completion event if the solicited bit is set. */ 244 /* Signal completion event if the solicited bit is set. */
273 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), wc, 245 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
274 wr->send_flags & IB_SEND_SOLICITED); 246 swqe->wr.send_flags & IB_SEND_SOLICITED);
275 247drop:
276bail_sge:
277 kfree(rsge.sg_list); 248 kfree(rsge.sg_list);
278done:
279 if (atomic_dec_and_test(&qp->refcount)) 249 if (atomic_dec_and_test(&qp->refcount))
280 wake_up(&qp->wait); 250 wake_up(&qp->wait);
251send_comp:
252 ipath_send_complete(sqp, swqe, IB_WC_SUCCESS);
281} 253}
282 254
283/** 255/**
284 * ipath_post_ud_send - post a UD send on QP 256 * ipath_make_ud_req - construct a UD request packet
285 * @qp: the QP 257 * @qp: the QP
286 * @wr: the work request
287 * 258 *
288 * Note that we actually send the data as it is posted instead of putting 259 * Return 1 if constructed; otherwise, return 0.
289 * the request into a ring buffer. If we wanted to use a ring buffer,
290 * we would need to save a reference to the destination address in the SWQE.
291 */ 260 */
292int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr) 261int ipath_make_ud_req(struct ipath_qp *qp)
293{ 262{
294 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 263 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
295 struct ipath_other_headers *ohdr; 264 struct ipath_other_headers *ohdr;
296 struct ib_ah_attr *ah_attr; 265 struct ib_ah_attr *ah_attr;
297 struct ipath_sge_state ss; 266 struct ipath_swqe *wqe;
298 struct ipath_sge *sg_list;
299 struct ib_wc wc;
300 u32 hwords;
301 u32 nwords; 267 u32 nwords;
302 u32 len;
303 u32 extra_bytes; 268 u32 extra_bytes;
304 u32 bth0; 269 u32 bth0;
305 u16 lrh0; 270 u16 lrh0;
306 u16 lid; 271 u16 lid;
307 int i; 272 int ret = 0;
308 int ret;
309 273
310 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) { 274 if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)))
311 ret = 0;
312 goto bail; 275 goto bail;
313 }
314 276
315 if (wr->wr.ud.ah->pd != qp->ibqp.pd) { 277 if (qp->s_cur == qp->s_head)
316 ret = -EPERM;
317 goto bail; 278 goto bail;
318 }
319 279
320 /* IB spec says that num_sge == 0 is OK. */ 280 wqe = get_swqe_ptr(qp, qp->s_cur);
321 if (wr->num_sge > qp->s_max_sge) {
322 ret = -EINVAL;
323 goto bail;
324 }
325
326 if (wr->num_sge > 1) {
327 sg_list = kmalloc((qp->s_max_sge - 1) * sizeof(*sg_list),
328 GFP_ATOMIC);
329 if (!sg_list) {
330 ret = -ENOMEM;
331 goto bail;
332 }
333 } else
334 sg_list = NULL;
335
336 /* Check the buffer to send. */
337 ss.sg_list = sg_list;
338 ss.sge.mr = NULL;
339 ss.sge.vaddr = NULL;
340 ss.sge.length = 0;
341 ss.sge.sge_length = 0;
342 ss.num_sge = 0;
343 len = 0;
344 for (i = 0; i < wr->num_sge; i++) {
345 /* Check LKEY */
346 if (to_ipd(qp->ibqp.pd)->user && wr->sg_list[i].lkey == 0) {
347 ret = -EINVAL;
348 goto bail;
349 }
350
351 if (wr->sg_list[i].length == 0)
352 continue;
353 if (!ipath_lkey_ok(qp, ss.num_sge ?
354 sg_list + ss.num_sge - 1 : &ss.sge,
355 &wr->sg_list[i], 0)) {
356 ret = -EINVAL;
357 goto bail;
358 }
359 len += wr->sg_list[i].length;
360 ss.num_sge++;
361 }
362 /* Check for invalid packet size. */
363 if (len > dev->dd->ipath_ibmtu) {
364 ret = -EINVAL;
365 goto bail;
366 }
367 extra_bytes = (4 - len) & 3;
368 nwords = (len + extra_bytes) >> 2;
369 281
370 /* Construct the header. */ 282 /* Construct the header. */
371 ah_attr = &to_iah(wr->wr.ud.ah)->attr; 283 ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
372 if (ah_attr->dlid == 0) {
373 ret = -EINVAL;
374 goto bail;
375 }
376 if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE) { 284 if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE) {
377 if (ah_attr->dlid != IPATH_PERMISSIVE_LID) 285 if (ah_attr->dlid != IPATH_PERMISSIVE_LID)
378 dev->n_multicast_xmit++; 286 dev->n_multicast_xmit++;
@@ -381,74 +289,63 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
381 } else { 289 } else {
382 dev->n_unicast_xmit++; 290 dev->n_unicast_xmit++;
383 lid = ah_attr->dlid & 291 lid = ah_attr->dlid &
384 ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1); 292 ~((1 << dev->dd->ipath_lmc) - 1);
385 if (unlikely(lid == dev->dd->ipath_lid)) { 293 if (unlikely(lid == dev->dd->ipath_lid)) {
386 /* 294 ipath_ud_loopback(qp, wqe);
387 * Pass in an uninitialized ib_wc to save stack
388 * space.
389 */
390 ipath_ud_loopback(qp, &ss, len, wr, &wc);
391 goto done; 295 goto done;
392 } 296 }
393 } 297 }
298
299 extra_bytes = -wqe->length & 3;
300 nwords = (wqe->length + extra_bytes) >> 2;
301
302 /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */
303 qp->s_hdrwords = 7;
304 if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM)
305 qp->s_hdrwords++;
306 qp->s_cur_size = wqe->length;
307 qp->s_cur_sge = &qp->s_sge;
308 qp->s_wqe = wqe;
309 qp->s_sge.sge = wqe->sg_list[0];
310 qp->s_sge.sg_list = wqe->sg_list + 1;
311 qp->s_sge.num_sge = wqe->wr.num_sge;
312
394 if (ah_attr->ah_flags & IB_AH_GRH) { 313 if (ah_attr->ah_flags & IB_AH_GRH) {
395 /* Header size in 32-bit words. */ 314 /* Header size in 32-bit words. */
396 hwords = 17; 315 qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
316 &ah_attr->grh,
317 qp->s_hdrwords, nwords);
397 lrh0 = IPATH_LRH_GRH; 318 lrh0 = IPATH_LRH_GRH;
398 ohdr = &qp->s_hdr.u.l.oth; 319 ohdr = &qp->s_hdr.u.l.oth;
399 qp->s_hdr.u.l.grh.version_tclass_flow =
400 cpu_to_be32((6 << 28) |
401 (ah_attr->grh.traffic_class << 20) |
402 ah_attr->grh.flow_label);
403 qp->s_hdr.u.l.grh.paylen =
404 cpu_to_be16(((wr->opcode ==
405 IB_WR_SEND_WITH_IMM ? 6 : 5) +
406 nwords + SIZE_OF_CRC) << 2);
407 /* next_hdr is defined by C8-7 in ch. 8.4.1 */
408 qp->s_hdr.u.l.grh.next_hdr = 0x1B;
409 qp->s_hdr.u.l.grh.hop_limit = ah_attr->grh.hop_limit;
410 /* The SGID is 32-bit aligned. */
411 qp->s_hdr.u.l.grh.sgid.global.subnet_prefix =
412 dev->gid_prefix;
413 qp->s_hdr.u.l.grh.sgid.global.interface_id =
414 dev->dd->ipath_guid;
415 qp->s_hdr.u.l.grh.dgid = ah_attr->grh.dgid;
416 /* 320 /*
417 * Don't worry about sending to locally attached multicast 321 * Don't worry about sending to locally attached multicast
418 * QPs. It is unspecified by the spec. what happens. 322 * QPs. It is unspecified by the spec. what happens.
419 */ 323 */
420 } else { 324 } else {
421 /* Header size in 32-bit words. */ 325 /* Header size in 32-bit words. */
422 hwords = 7;
423 lrh0 = IPATH_LRH_BTH; 326 lrh0 = IPATH_LRH_BTH;
424 ohdr = &qp->s_hdr.u.oth; 327 ohdr = &qp->s_hdr.u.oth;
425 } 328 }
426 if (wr->opcode == IB_WR_SEND_WITH_IMM) { 329 if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
427 ohdr->u.ud.imm_data = wr->imm_data; 330 ohdr->u.ud.imm_data = wqe->wr.imm_data;
428 wc.imm_data = wr->imm_data;
429 hwords += 1;
430 bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24; 331 bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
431 } else if (wr->opcode == IB_WR_SEND) { 332 } else
432 wc.imm_data = 0;
433 bth0 = IB_OPCODE_UD_SEND_ONLY << 24; 333 bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
434 } else {
435 ret = -EINVAL;
436 goto bail;
437 }
438 lrh0 |= ah_attr->sl << 4; 334 lrh0 |= ah_attr->sl << 4;
439 if (qp->ibqp.qp_type == IB_QPT_SMI) 335 if (qp->ibqp.qp_type == IB_QPT_SMI)
440 lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */ 336 lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
441 qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); 337 qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
442 qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */ 338 qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */
443 qp->s_hdr.lrh[2] = cpu_to_be16(hwords + nwords + SIZE_OF_CRC); 339 qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords +
340 SIZE_OF_CRC);
444 lid = dev->dd->ipath_lid; 341 lid = dev->dd->ipath_lid;
445 if (lid) { 342 if (lid) {
446 lid |= ah_attr->src_path_bits & 343 lid |= ah_attr->src_path_bits &
447 ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1); 344 ((1 << dev->dd->ipath_lmc) - 1);
448 qp->s_hdr.lrh[3] = cpu_to_be16(lid); 345 qp->s_hdr.lrh[3] = cpu_to_be16(lid);
449 } else 346 } else
450 qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE; 347 qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE;
451 if (wr->send_flags & IB_SEND_SOLICITED) 348 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
452 bth0 |= 1 << 23; 349 bth0 |= 1 << 23;
453 bth0 |= extra_bytes << 20; 350 bth0 |= extra_bytes << 20;
454 bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY : 351 bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY :
@@ -460,38 +357,20 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
460 ohdr->bth[1] = ah_attr->dlid >= IPATH_MULTICAST_LID_BASE && 357 ohdr->bth[1] = ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
461 ah_attr->dlid != IPATH_PERMISSIVE_LID ? 358 ah_attr->dlid != IPATH_PERMISSIVE_LID ?
462 __constant_cpu_to_be32(IPATH_MULTICAST_QPN) : 359 __constant_cpu_to_be32(IPATH_MULTICAST_QPN) :
463 cpu_to_be32(wr->wr.ud.remote_qpn); 360 cpu_to_be32(wqe->wr.wr.ud.remote_qpn);
464 /* XXX Could lose a PSN count but not worth locking */
465 ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPATH_PSN_MASK); 361 ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPATH_PSN_MASK);
466 /* 362 /*
467 * Qkeys with the high order bit set mean use the 363 * Qkeys with the high order bit set mean use the
468 * qkey from the QP context instead of the WR (see 10.2.5). 364 * qkey from the QP context instead of the WR (see 10.2.5).
469 */ 365 */
470 ohdr->u.ud.deth[0] = cpu_to_be32((int)wr->wr.ud.remote_qkey < 0 ? 366 ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ?
471 qp->qkey : wr->wr.ud.remote_qkey); 367 qp->qkey : wqe->wr.wr.ud.remote_qkey);
472 ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); 368 ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
473 if (ipath_verbs_send(dev->dd, hwords, (u32 *) &qp->s_hdr,
474 len, &ss))
475 dev->n_no_piobuf++;
476 369
477done: 370done:
478 /* Queue the completion status entry. */ 371 if (++qp->s_cur >= qp->s_size)
479 if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || 372 qp->s_cur = 0;
480 (wr->send_flags & IB_SEND_SIGNALED)) { 373 ret = 1;
481 wc.wr_id = wr->wr_id;
482 wc.status = IB_WC_SUCCESS;
483 wc.vendor_err = 0;
484 wc.opcode = IB_WC_SEND;
485 wc.byte_len = len;
486 wc.qp = &qp->ibqp;
487 wc.src_qp = 0;
488 wc.wc_flags = 0;
489 /* XXX initialize other fields? */
490 ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
491 }
492 kfree(sg_list);
493
494 ret = 0;
495 374
496bail: 375bail:
497 return ret; 376 return ret;
@@ -672,7 +551,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
672 * Save the LMC lower bits if the destination LID is a unicast LID. 551 * Save the LMC lower bits if the destination LID is a unicast LID.
673 */ 552 */
674 wc.dlid_path_bits = dlid >= IPATH_MULTICAST_LID_BASE ? 0 : 553 wc.dlid_path_bits = dlid >= IPATH_MULTICAST_LID_BASE ? 0 :
675 dlid & ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1); 554 dlid & ((1 << dev->dd->ipath_lmc) - 1);
555 wc.port_num = 1;
676 /* Signal completion event if the solicited bit is set. */ 556 /* Signal completion event if the solicited bit is set. */
677 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 557 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
678 (ohdr->bth[0] & 558 (ohdr->bth[0] &
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 16aa61fd8085..74f77e7c2c1b 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -230,6 +230,121 @@ void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
230 } 230 }
231} 231}
232 232
233static void ipath_flush_wqe(struct ipath_qp *qp, struct ib_send_wr *wr)
234{
235 struct ib_wc wc;
236
237 memset(&wc, 0, sizeof(wc));
238 wc.wr_id = wr->wr_id;
239 wc.status = IB_WC_WR_FLUSH_ERR;
240 wc.opcode = ib_ipath_wc_opcode[wr->opcode];
241 wc.qp = &qp->ibqp;
242 ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
243}
244
245/**
246 * ipath_post_one_send - post one RC, UC, or UD send work request
247 * @qp: the QP to post on
248 * @wr: the work request to send
249 */
250static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
251{
252 struct ipath_swqe *wqe;
253 u32 next;
254 int i;
255 int j;
256 int acc;
257 int ret;
258 unsigned long flags;
259
260 spin_lock_irqsave(&qp->s_lock, flags);
261
262 /* Check that state is OK to post send. */
263 if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) {
264 if (qp->state != IB_QPS_SQE && qp->state != IB_QPS_ERR)
265 goto bail_inval;
266 /* C10-96 says generate a flushed completion entry. */
267 ipath_flush_wqe(qp, wr);
268 ret = 0;
269 goto bail;
270 }
271
272 /* IB spec says that num_sge == 0 is OK. */
273 if (wr->num_sge > qp->s_max_sge)
274 goto bail_inval;
275
276 /*
277 * Don't allow RDMA reads or atomic operations on UC or
278 * undefined operations.
279 * Make sure buffer is large enough to hold the result for atomics.
280 */
281 if (qp->ibqp.qp_type == IB_QPT_UC) {
282 if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
283 goto bail_inval;
284 } else if (qp->ibqp.qp_type == IB_QPT_UD) {
285 /* Check UD opcode */
286 if (wr->opcode != IB_WR_SEND &&
287 wr->opcode != IB_WR_SEND_WITH_IMM)
288 goto bail_inval;
289 /* Check UD destination address PD */
290 if (qp->ibqp.pd != wr->wr.ud.ah->pd)
291 goto bail_inval;
292 } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
293 goto bail_inval;
294 else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
295 (wr->num_sge == 0 ||
296 wr->sg_list[0].length < sizeof(u64) ||
297 wr->sg_list[0].addr & (sizeof(u64) - 1)))
298 goto bail_inval;
299 else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
300 goto bail_inval;
301
302 next = qp->s_head + 1;
303 if (next >= qp->s_size)
304 next = 0;
305 if (next == qp->s_last)
306 goto bail_inval;
307
308 wqe = get_swqe_ptr(qp, qp->s_head);
309 wqe->wr = *wr;
310 wqe->ssn = qp->s_ssn++;
311 wqe->length = 0;
312 if (wr->num_sge) {
313 acc = wr->opcode >= IB_WR_RDMA_READ ?
314 IB_ACCESS_LOCAL_WRITE : 0;
315 for (i = 0, j = 0; i < wr->num_sge; i++) {
316 u32 length = wr->sg_list[i].length;
317 int ok;
318
319 if (length == 0)
320 continue;
321 ok = ipath_lkey_ok(qp, &wqe->sg_list[j],
322 &wr->sg_list[i], acc);
323 if (!ok)
324 goto bail_inval;
325 wqe->length += length;
326 j++;
327 }
328 wqe->wr.num_sge = j;
329 }
330 if (qp->ibqp.qp_type == IB_QPT_UC ||
331 qp->ibqp.qp_type == IB_QPT_RC) {
332 if (wqe->length > 0x80000000U)
333 goto bail_inval;
334 } else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu)
335 goto bail_inval;
336 qp->s_head = next;
337
338 ret = 0;
339 goto bail;
340
341bail_inval:
342 ret = -EINVAL;
343bail:
344 spin_unlock_irqrestore(&qp->s_lock, flags);
345 return ret;
346}
347
233/** 348/**
234 * ipath_post_send - post a send on a QP 349 * ipath_post_send - post a send on a QP
235 * @ibqp: the QP to post the send on 350 * @ibqp: the QP to post the send on
@@ -244,35 +359,17 @@ static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
244 struct ipath_qp *qp = to_iqp(ibqp); 359 struct ipath_qp *qp = to_iqp(ibqp);
245 int err = 0; 360 int err = 0;
246 361
247 /* Check that state is OK to post send. */
248 if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)) {
249 *bad_wr = wr;
250 err = -EINVAL;
251 goto bail;
252 }
253
254 for (; wr; wr = wr->next) { 362 for (; wr; wr = wr->next) {
255 switch (qp->ibqp.qp_type) { 363 err = ipath_post_one_send(qp, wr);
256 case IB_QPT_UC:
257 case IB_QPT_RC:
258 err = ipath_post_ruc_send(qp, wr);
259 break;
260
261 case IB_QPT_SMI:
262 case IB_QPT_GSI:
263 case IB_QPT_UD:
264 err = ipath_post_ud_send(qp, wr);
265 break;
266
267 default:
268 err = -EINVAL;
269 }
270 if (err) { 364 if (err) {
271 *bad_wr = wr; 365 *bad_wr = wr;
272 break; 366 goto bail;
273 } 367 }
274 } 368 }
275 369
370 /* Try to do the send work in the caller's context. */
371 ipath_do_send((unsigned long) qp);
372
276bail: 373bail:
277 return err; 374 return err;
278} 375}
@@ -416,7 +513,7 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
416 /* Check for a valid destination LID (see ch. 7.11.1). */ 513 /* Check for a valid destination LID (see ch. 7.11.1). */
417 lid = be16_to_cpu(hdr->lrh[1]); 514 lid = be16_to_cpu(hdr->lrh[1]);
418 if (lid < IPATH_MULTICAST_LID_BASE) { 515 if (lid < IPATH_MULTICAST_LID_BASE) {
419 lid &= ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1); 516 lid &= ~((1 << dev->dd->ipath_lmc) - 1);
420 if (unlikely(lid != dev->dd->ipath_lid)) { 517 if (unlikely(lid != dev->dd->ipath_lid)) {
421 dev->rcv_errors++; 518 dev->rcv_errors++;
422 goto bail; 519 goto bail;
@@ -631,7 +728,7 @@ static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
631#endif 728#endif
632 729
633static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss, 730static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
634 u32 length) 731 u32 length, unsigned flush_wc)
635{ 732{
636 u32 extra = 0; 733 u32 extra = 0;
637 u32 data = 0; 734 u32 data = 0;
@@ -641,11 +738,11 @@ static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
641 u32 len = ss->sge.length; 738 u32 len = ss->sge.length;
642 u32 off; 739 u32 off;
643 740
644 BUG_ON(len == 0);
645 if (len > length) 741 if (len > length)
646 len = length; 742 len = length;
647 if (len > ss->sge.sge_length) 743 if (len > ss->sge.sge_length)
648 len = ss->sge.sge_length; 744 len = ss->sge.sge_length;
745 BUG_ON(len == 0);
649 /* If the source address is not aligned, try to align it. */ 746 /* If the source address is not aligned, try to align it. */
650 off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1); 747 off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
651 if (off) { 748 if (off) {
@@ -757,36 +854,25 @@ static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
757 } 854 }
758 /* Update address before sending packet. */ 855 /* Update address before sending packet. */
759 update_sge(ss, length); 856 update_sge(ss, length);
760 /* must flush early everything before trigger word */ 857 if (flush_wc) {
761 ipath_flush_wc(); 858 /* must flush early everything before trigger word */
762 __raw_writel(last, piobuf); 859 ipath_flush_wc();
763 /* be sure trigger word is written */ 860 __raw_writel(last, piobuf);
764 ipath_flush_wc(); 861 /* be sure trigger word is written */
862 ipath_flush_wc();
863 } else
864 __raw_writel(last, piobuf);
765} 865}
766 866
767/** 867static int ipath_verbs_send_pio(struct ipath_qp *qp, u32 *hdr, u32 hdrwords,
768 * ipath_verbs_send - send a packet 868 struct ipath_sge_state *ss, u32 len,
769 * @dd: the infinipath device 869 u32 plen, u32 dwords)
770 * @hdrwords: the number of words in the header
771 * @hdr: the packet header
772 * @len: the length of the packet in bytes
773 * @ss: the SGE to send
774 */
775int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
776 u32 *hdr, u32 len, struct ipath_sge_state *ss)
777{ 870{
871 struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
778 u32 __iomem *piobuf; 872 u32 __iomem *piobuf;
779 u32 plen; 873 unsigned flush_wc;
780 int ret; 874 int ret;
781 875
782 /* +1 is for the qword padding of pbc */
783 plen = hdrwords + ((len + 3) >> 2) + 1;
784 if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) {
785 ret = -EINVAL;
786 goto bail;
787 }
788
789 /* Get a PIO buffer to use. */
790 piobuf = ipath_getpiobuf(dd, NULL); 876 piobuf = ipath_getpiobuf(dd, NULL);
791 if (unlikely(piobuf == NULL)) { 877 if (unlikely(piobuf == NULL)) {
792 ret = -EBUSY; 878 ret = -EBUSY;
@@ -799,51 +885,90 @@ int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
799 * or WC buffer can be written out of order. 885 * or WC buffer can be written out of order.
800 */ 886 */
801 writeq(plen, piobuf); 887 writeq(plen, piobuf);
802 ipath_flush_wc();
803 piobuf += 2; 888 piobuf += 2;
889
890 flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC;
804 if (len == 0) { 891 if (len == 0) {
805 /* 892 /*
806 * If there is just the header portion, must flush before 893 * If there is just the header portion, must flush before
807 * writing last word of header for correctness, and after 894 * writing last word of header for correctness, and after
808 * the last header word (trigger word). 895 * the last header word (trigger word).
809 */ 896 */
810 __iowrite32_copy(piobuf, hdr, hdrwords - 1); 897 if (flush_wc) {
811 ipath_flush_wc(); 898 ipath_flush_wc();
812 __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1); 899 __iowrite32_copy(piobuf, hdr, hdrwords - 1);
813 ipath_flush_wc(); 900 ipath_flush_wc();
814 ret = 0; 901 __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
815 goto bail; 902 ipath_flush_wc();
903 } else
904 __iowrite32_copy(piobuf, hdr, hdrwords);
905 goto done;
816 } 906 }
817 907
908 if (flush_wc)
909 ipath_flush_wc();
818 __iowrite32_copy(piobuf, hdr, hdrwords); 910 __iowrite32_copy(piobuf, hdr, hdrwords);
819 piobuf += hdrwords; 911 piobuf += hdrwords;
820 912
821 /* The common case is aligned and contained in one segment. */ 913 /* The common case is aligned and contained in one segment. */
822 if (likely(ss->num_sge == 1 && len <= ss->sge.length && 914 if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
823 !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) { 915 !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
824 u32 w;
825 u32 *addr = (u32 *) ss->sge.vaddr; 916 u32 *addr = (u32 *) ss->sge.vaddr;
826 917
827 /* Update address before sending packet. */ 918 /* Update address before sending packet. */
828 update_sge(ss, len); 919 update_sge(ss, len);
829 /* Need to round up for the last dword in the packet. */ 920 if (flush_wc) {
830 w = (len + 3) >> 2; 921 __iowrite32_copy(piobuf, addr, dwords - 1);
831 __iowrite32_copy(piobuf, addr, w - 1); 922 /* must flush early everything before trigger word */
832 /* must flush early everything before trigger word */ 923 ipath_flush_wc();
833 ipath_flush_wc(); 924 __raw_writel(addr[dwords - 1], piobuf + dwords - 1);
834 __raw_writel(addr[w - 1], piobuf + w - 1); 925 /* be sure trigger word is written */
835 /* be sure trigger word is written */ 926 ipath_flush_wc();
836 ipath_flush_wc(); 927 } else
837 ret = 0; 928 __iowrite32_copy(piobuf, addr, dwords);
838 goto bail; 929 goto done;
839 } 930 }
840 copy_io(piobuf, ss, len); 931 copy_io(piobuf, ss, len, flush_wc);
932done:
933 if (qp->s_wqe)
934 ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
841 ret = 0; 935 ret = 0;
842
843bail: 936bail:
844 return ret; 937 return ret;
845} 938}
846 939
940/**
941 * ipath_verbs_send - send a packet
942 * @qp: the QP to send on
943 * @hdr: the packet header
944 * @hdrwords: the number of words in the header
945 * @ss: the SGE to send
946 * @len: the length of the packet in bytes
947 */
948int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
949 u32 hdrwords, struct ipath_sge_state *ss, u32 len)
950{
951 struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
952 u32 plen;
953 int ret;
954 u32 dwords = (len + 3) >> 2;
955
956 /* +1 is for the qword padding of pbc */
957 plen = hdrwords + dwords + 1;
958
959 /* Drop non-VL15 packets if we are not in the active state */
960 if (!(dd->ipath_flags & IPATH_LINKACTIVE) &&
961 qp->ibqp.qp_type != IB_QPT_SMI) {
962 if (qp->s_wqe)
963 ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
964 ret = 0;
965 } else
966 ret = ipath_verbs_send_pio(qp, (u32 *) hdr, hdrwords,
967 ss, len, plen, dwords);
968
969 return ret;
970}
971
847int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords, 972int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
848 u64 *rwords, u64 *spkts, u64 *rpkts, 973 u64 *rwords, u64 *spkts, u64 *rpkts,
849 u64 *xmit_wait) 974 u64 *xmit_wait)
@@ -852,7 +977,6 @@ int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
852 977
853 if (!(dd->ipath_flags & IPATH_INITTED)) { 978 if (!(dd->ipath_flags & IPATH_INITTED)) {
854 /* no hardware, freeze, etc. */ 979 /* no hardware, freeze, etc. */
855 ipath_dbg("unit %u not usable\n", dd->ipath_unit);
856 ret = -EINVAL; 980 ret = -EINVAL;
857 goto bail; 981 goto bail;
858 } 982 }
@@ -878,48 +1002,44 @@ bail:
878int ipath_get_counters(struct ipath_devdata *dd, 1002int ipath_get_counters(struct ipath_devdata *dd,
879 struct ipath_verbs_counters *cntrs) 1003 struct ipath_verbs_counters *cntrs)
880{ 1004{
1005 struct ipath_cregs const *crp = dd->ipath_cregs;
881 int ret; 1006 int ret;
882 1007
883 if (!(dd->ipath_flags & IPATH_INITTED)) { 1008 if (!(dd->ipath_flags & IPATH_INITTED)) {
884 /* no hardware, freeze, etc. */ 1009 /* no hardware, freeze, etc. */
885 ipath_dbg("unit %u not usable\n", dd->ipath_unit);
886 ret = -EINVAL; 1010 ret = -EINVAL;
887 goto bail; 1011 goto bail;
888 } 1012 }
889 cntrs->symbol_error_counter = 1013 cntrs->symbol_error_counter =
890 ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt); 1014 ipath_snap_cntr(dd, crp->cr_ibsymbolerrcnt);
891 cntrs->link_error_recovery_counter = 1015 cntrs->link_error_recovery_counter =
892 ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt); 1016 ipath_snap_cntr(dd, crp->cr_iblinkerrrecovcnt);
893 /* 1017 /*
894 * The link downed counter counts when the other side downs the 1018 * The link downed counter counts when the other side downs the
895 * connection. We add in the number of times we downed the link 1019 * connection. We add in the number of times we downed the link
896 * due to local link integrity errors to compensate. 1020 * due to local link integrity errors to compensate.
897 */ 1021 */
898 cntrs->link_downed_counter = 1022 cntrs->link_downed_counter =
899 ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt); 1023 ipath_snap_cntr(dd, crp->cr_iblinkdowncnt);
900 cntrs->port_rcv_errors = 1024 cntrs->port_rcv_errors =
901 ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) + 1025 ipath_snap_cntr(dd, crp->cr_rxdroppktcnt) +
902 ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) + 1026 ipath_snap_cntr(dd, crp->cr_rcvovflcnt) +
903 ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) + 1027 ipath_snap_cntr(dd, crp->cr_portovflcnt) +
904 ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) + 1028 ipath_snap_cntr(dd, crp->cr_err_rlencnt) +
905 ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) + 1029 ipath_snap_cntr(dd, crp->cr_invalidrlencnt) +
906 ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) + 1030 ipath_snap_cntr(dd, crp->cr_errlinkcnt) +
907 ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) + 1031 ipath_snap_cntr(dd, crp->cr_erricrccnt) +
908 ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) + 1032 ipath_snap_cntr(dd, crp->cr_errvcrccnt) +
909 ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt) + 1033 ipath_snap_cntr(dd, crp->cr_errlpcrccnt) +
1034 ipath_snap_cntr(dd, crp->cr_badformatcnt) +
910 dd->ipath_rxfc_unsupvl_errs; 1035 dd->ipath_rxfc_unsupvl_errs;
911 cntrs->port_rcv_remphys_errors = 1036 cntrs->port_rcv_remphys_errors =
912 ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt); 1037 ipath_snap_cntr(dd, crp->cr_rcvebpcnt);
913 cntrs->port_xmit_discards = 1038 cntrs->port_xmit_discards = ipath_snap_cntr(dd, crp->cr_unsupvlcnt);
914 ipath_snap_cntr(dd, dd->ipath_cregs->cr_unsupvlcnt); 1039 cntrs->port_xmit_data = ipath_snap_cntr(dd, crp->cr_wordsendcnt);
915 cntrs->port_xmit_data = 1040 cntrs->port_rcv_data = ipath_snap_cntr(dd, crp->cr_wordrcvcnt);
916 ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); 1041 cntrs->port_xmit_packets = ipath_snap_cntr(dd, crp->cr_pktsendcnt);
917 cntrs->port_rcv_data = 1042 cntrs->port_rcv_packets = ipath_snap_cntr(dd, crp->cr_pktrcvcnt);
918 ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
919 cntrs->port_xmit_packets =
920 ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
921 cntrs->port_rcv_packets =
922 ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
923 cntrs->local_link_integrity_errors = 1043 cntrs->local_link_integrity_errors =
924 (dd->ipath_flags & IPATH_GPIO_ERRINTRS) ? 1044 (dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
925 dd->ipath_lli_errs : dd->ipath_lli_errors; 1045 dd->ipath_lli_errs : dd->ipath_lli_errors;
@@ -1033,25 +1153,26 @@ static int ipath_query_port(struct ib_device *ibdev,
1033 u8 port, struct ib_port_attr *props) 1153 u8 port, struct ib_port_attr *props)
1034{ 1154{
1035 struct ipath_ibdev *dev = to_idev(ibdev); 1155 struct ipath_ibdev *dev = to_idev(ibdev);
1156 struct ipath_devdata *dd = dev->dd;
1036 enum ib_mtu mtu; 1157 enum ib_mtu mtu;
1037 u16 lid = dev->dd->ipath_lid; 1158 u16 lid = dd->ipath_lid;
1038 u64 ibcstat; 1159 u64 ibcstat;
1039 1160
1040 memset(props, 0, sizeof(*props)); 1161 memset(props, 0, sizeof(*props));
1041 props->lid = lid ? lid : __constant_be16_to_cpu(IB_LID_PERMISSIVE); 1162 props->lid = lid ? lid : __constant_be16_to_cpu(IB_LID_PERMISSIVE);
1042 props->lmc = dev->mkeyprot_resv_lmc & 7; 1163 props->lmc = dd->ipath_lmc;
1043 props->sm_lid = dev->sm_lid; 1164 props->sm_lid = dev->sm_lid;
1044 props->sm_sl = dev->sm_sl; 1165 props->sm_sl = dev->sm_sl;
1045 ibcstat = dev->dd->ipath_lastibcstat; 1166 ibcstat = dd->ipath_lastibcstat;
1046 props->state = ((ibcstat >> 4) & 0x3) + 1; 1167 props->state = ((ibcstat >> 4) & 0x3) + 1;
1047 /* See phys_state_show() */ 1168 /* See phys_state_show() */
1048 props->phys_state = ipath_cvt_physportstate[ 1169 props->phys_state = ipath_cvt_physportstate[
1049 dev->dd->ipath_lastibcstat & 0xf]; 1170 dd->ipath_lastibcstat & 0xf];
1050 props->port_cap_flags = dev->port_cap_flags; 1171 props->port_cap_flags = dev->port_cap_flags;
1051 props->gid_tbl_len = 1; 1172 props->gid_tbl_len = 1;
1052 props->max_msg_sz = 0x80000000; 1173 props->max_msg_sz = 0x80000000;
1053 props->pkey_tbl_len = ipath_get_npkeys(dev->dd); 1174 props->pkey_tbl_len = ipath_get_npkeys(dd);
1054 props->bad_pkey_cntr = ipath_get_cr_errpkey(dev->dd) - 1175 props->bad_pkey_cntr = ipath_get_cr_errpkey(dd) -
1055 dev->z_pkey_violations; 1176 dev->z_pkey_violations;
1056 props->qkey_viol_cntr = dev->qkey_violations; 1177 props->qkey_viol_cntr = dev->qkey_violations;
1057 props->active_width = IB_WIDTH_4X; 1178 props->active_width = IB_WIDTH_4X;
@@ -1061,12 +1182,12 @@ static int ipath_query_port(struct ib_device *ibdev,
1061 props->init_type_reply = 0; 1182 props->init_type_reply = 0;
1062 1183
1063 /* 1184 /*
1064 * Note: the chips support a maximum MTU of 4096, but the driver 1185 * Note: the chip supports a maximum MTU of 4096, but the driver
1065 * hasn't implemented this feature yet, so set the maximum value 1186 * hasn't implemented this feature yet, so set the maximum value
1066 * to 2048. 1187 * to 2048.
1067 */ 1188 */
1068 props->max_mtu = IB_MTU_2048; 1189 props->max_mtu = IB_MTU_2048;
1069 switch (dev->dd->ipath_ibmtu) { 1190 switch (dd->ipath_ibmtu) {
1070 case 4096: 1191 case 4096:
1071 mtu = IB_MTU_4096; 1192 mtu = IB_MTU_4096;
1072 break; 1193 break;
@@ -1415,9 +1536,7 @@ static int disable_timer(struct ipath_devdata *dd)
1415{ 1536{
1416 /* Disable GPIO bit 2 interrupt */ 1537 /* Disable GPIO bit 2 interrupt */
1417 if (dd->ipath_flags & IPATH_GPIO_INTR) { 1538 if (dd->ipath_flags & IPATH_GPIO_INTR) {
1418 u64 val;
1419 /* Disable GPIO bit 2 interrupt */ 1539 /* Disable GPIO bit 2 interrupt */
1420 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask);
1421 dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT)); 1540 dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT));
1422 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 1541 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
1423 dd->ipath_gpio_mask); 1542 dd->ipath_gpio_mask);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 1a24c6a4a814..6ccb54f104a3 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -42,6 +42,8 @@
42#include <rdma/ib_pack.h> 42#include <rdma/ib_pack.h>
43#include <rdma/ib_user_verbs.h> 43#include <rdma/ib_user_verbs.h>
44 44
45#include "ipath_kernel.h"
46
45#define IPATH_MAX_RDMA_ATOMIC 4 47#define IPATH_MAX_RDMA_ATOMIC 4
46 48
47#define QPN_MAX (1 << 24) 49#define QPN_MAX (1 << 24)
@@ -59,6 +61,7 @@
59 */ 61 */
60#define IB_CQ_NONE (IB_CQ_NEXT_COMP + 1) 62#define IB_CQ_NONE (IB_CQ_NEXT_COMP + 1)
61 63
64/* AETH NAK opcode values */
62#define IB_RNR_NAK 0x20 65#define IB_RNR_NAK 0x20
63#define IB_NAK_PSN_ERROR 0x60 66#define IB_NAK_PSN_ERROR 0x60
64#define IB_NAK_INVALID_REQUEST 0x61 67#define IB_NAK_INVALID_REQUEST 0x61
@@ -66,6 +69,7 @@
66#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63 69#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
67#define IB_NAK_INVALID_RD_REQUEST 0x64 70#define IB_NAK_INVALID_RD_REQUEST 0x64
68 71
72/* Flags for checking QP state (see ib_ipath_state_ops[]) */
69#define IPATH_POST_SEND_OK 0x01 73#define IPATH_POST_SEND_OK 0x01
70#define IPATH_POST_RECV_OK 0x02 74#define IPATH_POST_RECV_OK 0x02
71#define IPATH_PROCESS_RECV_OK 0x04 75#define IPATH_PROCESS_RECV_OK 0x04
@@ -187,7 +191,11 @@ struct ipath_mmap_info {
187struct ipath_cq_wc { 191struct ipath_cq_wc {
188 u32 head; /* index of next entry to fill */ 192 u32 head; /* index of next entry to fill */
189 u32 tail; /* index of next ib_poll_cq() entry */ 193 u32 tail; /* index of next ib_poll_cq() entry */
190 struct ib_uverbs_wc queue[1]; /* this is actually size ibcq.cqe + 1 */ 194 union {
195 /* these are actually size ibcq.cqe + 1 */
196 struct ib_uverbs_wc uqueue[0];
197 struct ib_wc kqueue[0];
198 };
191}; 199};
192 200
193/* 201/*
@@ -239,7 +247,7 @@ struct ipath_mregion {
239 */ 247 */
240struct ipath_sge { 248struct ipath_sge {
241 struct ipath_mregion *mr; 249 struct ipath_mregion *mr;
242 void *vaddr; /* current pointer into the segment */ 250 void *vaddr; /* kernel virtual address of segment */
243 u32 sge_length; /* length of the SGE */ 251 u32 sge_length; /* length of the SGE */
244 u32 length; /* remaining length of the segment */ 252 u32 length; /* remaining length of the segment */
245 u16 m; /* current index: mr->map[m] */ 253 u16 m; /* current index: mr->map[m] */
@@ -407,6 +415,7 @@ struct ipath_qp {
407 u32 s_ssn; /* SSN of tail entry */ 415 u32 s_ssn; /* SSN of tail entry */
408 u32 s_lsn; /* limit sequence number (credit) */ 416 u32 s_lsn; /* limit sequence number (credit) */
409 struct ipath_swqe *s_wq; /* send work queue */ 417 struct ipath_swqe *s_wq; /* send work queue */
418 struct ipath_swqe *s_wqe;
410 struct ipath_rq r_rq; /* receive work queue */ 419 struct ipath_rq r_rq; /* receive work queue */
411 struct ipath_sge r_sg_list[0]; /* verified SGEs */ 420 struct ipath_sge r_sg_list[0]; /* verified SGEs */
412}; 421};
@@ -492,7 +501,7 @@ struct ipath_ibdev {
492 int ib_unit; /* This is the device number */ 501 int ib_unit; /* This is the device number */
493 u16 sm_lid; /* in host order */ 502 u16 sm_lid; /* in host order */
494 u8 sm_sl; 503 u8 sm_sl;
495 u8 mkeyprot_resv_lmc; 504 u8 mkeyprot;
496 /* non-zero when timer is set */ 505 /* non-zero when timer is set */
497 unsigned long mkey_lease_timeout; 506 unsigned long mkey_lease_timeout;
498 507
@@ -667,7 +676,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
667 676
668int ipath_destroy_qp(struct ib_qp *ibqp); 677int ipath_destroy_qp(struct ib_qp *ibqp);
669 678
670void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err); 679int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err);
671 680
672int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 681int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
673 int attr_mask, struct ib_udata *udata); 682 int attr_mask, struct ib_udata *udata);
@@ -683,8 +692,8 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc);
683 692
684void ipath_get_credit(struct ipath_qp *qp, u32 aeth); 693void ipath_get_credit(struct ipath_qp *qp, u32 aeth);
685 694
686int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords, 695int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
687 u32 *hdr, u32 len, struct ipath_sge_state *ss); 696 u32 hdrwords, struct ipath_sge_state *ss, u32 len);
688 697
689void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig); 698void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
690 699
@@ -692,8 +701,6 @@ void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length);
692 701
693void ipath_skip_sge(struct ipath_sge_state *ss, u32 length); 702void ipath_skip_sge(struct ipath_sge_state *ss, u32 length);
694 703
695int ipath_post_ruc_send(struct ipath_qp *qp, struct ib_send_wr *wr);
696
697void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, 704void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
698 int has_grh, void *data, u32 tlen, struct ipath_qp *qp); 705 int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
699 706
@@ -733,6 +740,8 @@ int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
733 740
734int ipath_destroy_srq(struct ib_srq *ibsrq); 741int ipath_destroy_srq(struct ib_srq *ibsrq);
735 742
743void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
744
736int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); 745int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
737 746
738struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vector, 747struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vector,
@@ -782,18 +791,28 @@ int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
782 791
783void ipath_insert_rnr_queue(struct ipath_qp *qp); 792void ipath_insert_rnr_queue(struct ipath_qp *qp);
784 793
794int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
795 u32 *lengthp, struct ipath_sge_state *ss);
796
785int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only); 797int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only);
786 798
787u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr, 799u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
788 struct ib_global_route *grh, u32 hwords, u32 nwords); 800 struct ib_global_route *grh, u32 hwords, u32 nwords);
789 801
790void ipath_do_ruc_send(unsigned long data); 802void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp,
803 struct ipath_other_headers *ohdr,
804 u32 bth0, u32 bth2);
805
806void ipath_do_send(unsigned long data);
807
808void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
809 enum ib_wc_status status);
810
811int ipath_make_rc_req(struct ipath_qp *qp);
791 812
792int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr, 813int ipath_make_uc_req(struct ipath_qp *qp);
793 u32 pmtu, u32 *bth0p, u32 *bth2p);
794 814
795int ipath_make_uc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr, 815int ipath_make_ud_req(struct ipath_qp *qp);
796 u32 pmtu, u32 *bth0p, u32 *bth2p);
797 816
798int ipath_register_ib_device(struct ipath_devdata *); 817int ipath_register_ib_device(struct ipath_devdata *);
799 818
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index dde8fe9af47e..d8287d9db41e 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -476,9 +476,48 @@ out:
476 return err; 476 return err;
477} 477}
478 478
479static ssize_t show_hca(struct class_device *cdev, char *buf)
480{
481 struct mlx4_ib_dev *dev = container_of(cdev, struct mlx4_ib_dev, ib_dev.class_dev);
482 return sprintf(buf, "MT%d\n", dev->dev->pdev->device);
483}
484
485static ssize_t show_fw_ver(struct class_device *cdev, char *buf)
486{
487 struct mlx4_ib_dev *dev = container_of(cdev, struct mlx4_ib_dev, ib_dev.class_dev);
488 return sprintf(buf, "%d.%d.%d\n", (int) (dev->dev->caps.fw_ver >> 32),
489 (int) (dev->dev->caps.fw_ver >> 16) & 0xffff,
490 (int) dev->dev->caps.fw_ver & 0xffff);
491}
492
493static ssize_t show_rev(struct class_device *cdev, char *buf)
494{
495 struct mlx4_ib_dev *dev = container_of(cdev, struct mlx4_ib_dev, ib_dev.class_dev);
496 return sprintf(buf, "%x\n", dev->dev->rev_id);
497}
498
499static ssize_t show_board(struct class_device *cdev, char *buf)
500{
501 struct mlx4_ib_dev *dev = container_of(cdev, struct mlx4_ib_dev, ib_dev.class_dev);
502 return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN, dev->dev->board_id);
503}
504
505static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
506static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
507static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
508static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
509
510static struct class_device_attribute *mlx4_class_attributes[] = {
511 &class_device_attr_hw_rev,
512 &class_device_attr_fw_ver,
513 &class_device_attr_hca_type,
514 &class_device_attr_board_id
515};
516
479static void *mlx4_ib_add(struct mlx4_dev *dev) 517static void *mlx4_ib_add(struct mlx4_dev *dev)
480{ 518{
481 struct mlx4_ib_dev *ibdev; 519 struct mlx4_ib_dev *ibdev;
520 int i;
482 521
483 ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev); 522 ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
484 if (!ibdev) { 523 if (!ibdev) {
@@ -568,6 +607,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
568 ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; 607 ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
569 ibdev->ib_dev.process_mad = mlx4_ib_process_mad; 608 ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
570 609
610 ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc;
611 ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr;
612 ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr;
613 ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc;
614
571 if (init_node_data(ibdev)) 615 if (init_node_data(ibdev))
572 goto err_map; 616 goto err_map;
573 617
@@ -580,6 +624,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
580 if (mlx4_ib_mad_init(ibdev)) 624 if (mlx4_ib_mad_init(ibdev))
581 goto err_reg; 625 goto err_reg;
582 626
627 for (i = 0; i < ARRAY_SIZE(mlx4_class_attributes); ++i) {
628 if (class_device_create_file(&ibdev->ib_dev.class_dev,
629 mlx4_class_attributes[i]))
630 goto err_reg;
631 }
632
583 return ibdev; 633 return ibdev;
584 634
585err_reg: 635err_reg:
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 705ff2fa237e..28697653a370 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -93,6 +93,11 @@ struct mlx4_ib_mr {
93 struct ib_umem *umem; 93 struct ib_umem *umem;
94}; 94};
95 95
96struct mlx4_ib_fmr {
97 struct ib_fmr ibfmr;
98 struct mlx4_fmr mfmr;
99};
100
96struct mlx4_ib_wq { 101struct mlx4_ib_wq {
97 u64 *wrid; 102 u64 *wrid;
98 spinlock_t lock; 103 spinlock_t lock;
@@ -199,6 +204,10 @@ static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr)
199 return container_of(ibmr, struct mlx4_ib_mr, ibmr); 204 return container_of(ibmr, struct mlx4_ib_mr, ibmr);
200} 205}
201 206
207static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
208{
209 return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr);
210}
202static inline struct mlx4_ib_qp *to_mqp(struct ib_qp *ibqp) 211static inline struct mlx4_ib_qp *to_mqp(struct ib_qp *ibqp)
203{ 212{
204 return container_of(ibqp, struct mlx4_ib_qp, ibqp); 213 return container_of(ibqp, struct mlx4_ib_qp, ibqp);
@@ -284,6 +293,13 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
284int mlx4_ib_mad_init(struct mlx4_ib_dev *dev); 293int mlx4_ib_mad_init(struct mlx4_ib_dev *dev);
285void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev); 294void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev);
286 295
296struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int mr_access_flags,
297 struct ib_fmr_attr *fmr_attr);
298int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages,
299 u64 iova);
300int mlx4_ib_unmap_fmr(struct list_head *fmr_list);
301int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr);
302
287static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah) 303static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
288{ 304{
289 return !!(ah->av.g_slid & 0x80); 305 return !!(ah->av.g_slid & 0x80);
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 85ae906f1d12..7dc91a3e712d 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -96,11 +96,10 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
96 pages[i++] = sg_dma_address(&chunk->page_list[j]) + 96 pages[i++] = sg_dma_address(&chunk->page_list[j]) +
97 umem->page_size * k; 97 umem->page_size * k;
98 /* 98 /*
99 * Be friendly to WRITE_MTT firmware 99 * Be friendly to mlx4_write_mtt() and
100 * command, and pass it chunks of 100 * pass it chunks of appropriate size.
101 * appropriate size.
102 */ 101 */
103 if (i == PAGE_SIZE / sizeof (u64) - 2) { 102 if (i == PAGE_SIZE / sizeof (u64)) {
104 err = mlx4_write_mtt(dev->dev, mtt, n, 103 err = mlx4_write_mtt(dev->dev, mtt, n,
105 i, pages); 104 i, pages);
106 if (err) 105 if (err)
@@ -182,3 +181,96 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
182 181
183 return 0; 182 return 0;
184} 183}
184
185struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc,
186 struct ib_fmr_attr *fmr_attr)
187{
188 struct mlx4_ib_dev *dev = to_mdev(pd->device);
189 struct mlx4_ib_fmr *fmr;
190 int err = -ENOMEM;
191
192 fmr = kmalloc(sizeof *fmr, GFP_KERNEL);
193 if (!fmr)
194 return ERR_PTR(-ENOMEM);
195
196 err = mlx4_fmr_alloc(dev->dev, to_mpd(pd)->pdn, convert_access(acc),
197 fmr_attr->max_pages, fmr_attr->max_maps,
198 fmr_attr->page_shift, &fmr->mfmr);
199 if (err)
200 goto err_free;
201
202 err = mlx4_mr_enable(to_mdev(pd->device)->dev, &fmr->mfmr.mr);
203 if (err)
204 goto err_mr;
205
206 fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mfmr.mr.key;
207
208 return &fmr->ibfmr;
209
210err_mr:
211 mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr);
212
213err_free:
214 kfree(fmr);
215
216 return ERR_PTR(err);
217}
218
219int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
220 int npages, u64 iova)
221{
222 struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
223 struct mlx4_ib_dev *dev = to_mdev(ifmr->ibfmr.device);
224
225 return mlx4_map_phys_fmr(dev->dev, &ifmr->mfmr, page_list, npages, iova,
226 &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey);
227}
228
229int mlx4_ib_unmap_fmr(struct list_head *fmr_list)
230{
231 struct ib_fmr *ibfmr;
232 int err;
233 struct mlx4_dev *mdev = NULL;
234
235 list_for_each_entry(ibfmr, fmr_list, list) {
236 if (mdev && to_mdev(ibfmr->device)->dev != mdev)
237 return -EINVAL;
238 mdev = to_mdev(ibfmr->device)->dev;
239 }
240
241 if (!mdev)
242 return 0;
243
244 list_for_each_entry(ibfmr, fmr_list, list) {
245 struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
246
247 mlx4_fmr_unmap(mdev, &ifmr->mfmr, &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey);
248 }
249
250 /*
251 * Make sure all MPT status updates are visible before issuing
252 * SYNC_TPT firmware command.
253 */
254 wmb();
255
256 err = mlx4_SYNC_TPT(mdev);
257 if (err)
258 printk(KERN_WARNING "mlx4_ib: SYNC_TPT error %d when "
259 "unmapping FMRs\n", err);
260
261 return 0;
262}
263
264int mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr)
265{
266 struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
267 struct mlx4_ib_dev *dev = to_mdev(ibfmr->device);
268 int err;
269
270 err = mlx4_fmr_free(dev->dev, &ifmr->mfmr);
271
272 if (!err)
273 kfree(ifmr);
274
275 return err;
276}
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 85c51bdc36f1..31a480e5b0d0 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1249,6 +1249,13 @@ static void set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
1249 dseg->byte_count = cpu_to_be32(sg->length); 1249 dseg->byte_count = cpu_to_be32(sg->length);
1250} 1250}
1251 1251
1252static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
1253{
1254 dseg->byte_count = cpu_to_be32(sg->length);
1255 dseg->lkey = cpu_to_be32(sg->lkey);
1256 dseg->addr = cpu_to_be64(sg->addr);
1257}
1258
1252int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 1259int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1253 struct ib_send_wr **bad_wr) 1260 struct ib_send_wr **bad_wr)
1254{ 1261{
@@ -1464,11 +1471,8 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1464 1471
1465 scat = get_recv_wqe(qp, ind); 1472 scat = get_recv_wqe(qp, ind);
1466 1473
1467 for (i = 0; i < wr->num_sge; ++i) { 1474 for (i = 0; i < wr->num_sge; ++i)
1468 scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length); 1475 __set_data_seg(scat + i, wr->sg_list + i);
1469 scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey);
1470 scat[i].addr = cpu_to_be64(wr->sg_list[i].addr);
1471 }
1472 1476
1473 if (i < qp->rq.max_gs) { 1477 if (i < qp->rq.max_gs) {
1474 scat[i].byte_count = 0; 1478 scat[i].byte_count = 0;
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 408748fb5285..e7e9a3d0dac3 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -251,7 +251,7 @@ int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
251 if (ret) 251 if (ret)
252 return ret; 252 return ret;
253 253
254 srq_attr->srq_limit = be16_to_cpu(limit_watermark); 254 srq_attr->srq_limit = limit_watermark;
255 srq_attr->max_wr = srq->msrq.max - 1; 255 srq_attr->max_wr = srq->msrq.max - 1;
256 srq_attr->max_sge = srq->msrq.max_gs; 256 srq_attr->max_sge = srq->msrq.max_gs;
257 257
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index acc95892713a..6966f943f440 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -290,6 +290,12 @@ static int mthca_cmd_post(struct mthca_dev *dev,
290 err = mthca_cmd_post_hcr(dev, in_param, out_param, in_modifier, 290 err = mthca_cmd_post_hcr(dev, in_param, out_param, in_modifier,
291 op_modifier, op, token, event); 291 op_modifier, op, token, event);
292 292
293 /*
294 * Make sure that our HCR writes don't get mixed in with
295 * writes from another CPU starting a FW command.
296 */
297 mmiowb();
298
293 mutex_unlock(&dev->cmd.hcr_mutex); 299 mutex_unlock(&dev->cmd.hcr_mutex);
294 return err; 300 return err;
295} 301}
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 9bae3cc60603..15aa32eb78b6 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -83,7 +83,7 @@ enum {
83 MTHCA_QP_CONTEXT_SIZE = 0x200, 83 MTHCA_QP_CONTEXT_SIZE = 0x200,
84 MTHCA_RDB_ENTRY_SIZE = 0x20, 84 MTHCA_RDB_ENTRY_SIZE = 0x20,
85 MTHCA_AV_SIZE = 0x20, 85 MTHCA_AV_SIZE = 0x20,
86 MTHCA_MGM_ENTRY_SIZE = 0x40, 86 MTHCA_MGM_ENTRY_SIZE = 0x100,
87 87
88 /* Arbel FW gives us these, but we need them for Tavor */ 88 /* Arbel FW gives us these, but we need them for Tavor */
89 MTHCA_MPT_ENTRY_SIZE = 0x40, 89 MTHCA_MPT_ENTRY_SIZE = 0x40,
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 76fed7545c53..60de6f93869e 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -61,7 +61,7 @@ MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
61 61
62#ifdef CONFIG_PCI_MSI 62#ifdef CONFIG_PCI_MSI
63 63
64static int msi_x = 0; 64static int msi_x = 1;
65module_param(msi_x, int, 0444); 65module_param(msi_x, int, 0444);
66MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero"); 66MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
67 67
@@ -137,40 +137,23 @@ static const char mthca_version[] __devinitdata =
137 137
138static int mthca_tune_pci(struct mthca_dev *mdev) 138static int mthca_tune_pci(struct mthca_dev *mdev)
139{ 139{
140 int cap;
141 u16 val;
142
143 if (!tune_pci) 140 if (!tune_pci)
144 return 0; 141 return 0;
145 142
146 /* First try to max out Read Byte Count */ 143 /* First try to max out Read Byte Count */
147 cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX); 144 if (pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX)) {
148 if (cap) { 145 if (pcix_set_mmrbc(mdev->pdev, pcix_get_max_mmrbc(mdev->pdev))) {
149 if (pci_read_config_word(mdev->pdev, cap + PCI_X_CMD, &val)) { 146 mthca_err(mdev, "Couldn't set PCI-X max read count, "
150 mthca_err(mdev, "Couldn't read PCI-X command register, " 147 "aborting.\n");
151 "aborting.\n");
152 return -ENODEV;
153 }
154 val = (val & ~PCI_X_CMD_MAX_READ) | (3 << 2);
155 if (pci_write_config_word(mdev->pdev, cap + PCI_X_CMD, val)) {
156 mthca_err(mdev, "Couldn't write PCI-X command register, "
157 "aborting.\n");
158 return -ENODEV; 148 return -ENODEV;
159 } 149 }
160 } else if (!(mdev->mthca_flags & MTHCA_FLAG_PCIE)) 150 } else if (!(mdev->mthca_flags & MTHCA_FLAG_PCIE))
161 mthca_info(mdev, "No PCI-X capability, not setting RBC.\n"); 151 mthca_info(mdev, "No PCI-X capability, not setting RBC.\n");
162 152
163 cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_EXP); 153 if (pci_find_capability(mdev->pdev, PCI_CAP_ID_EXP)) {
164 if (cap) { 154 if (pcie_set_readrq(mdev->pdev, 4096)) {
165 if (pci_read_config_word(mdev->pdev, cap + PCI_EXP_DEVCTL, &val)) { 155 mthca_err(mdev, "Couldn't write PCI Express read request, "
166 mthca_err(mdev, "Couldn't read PCI Express device control " 156 "aborting.\n");
167 "register, aborting.\n");
168 return -ENODEV;
169 }
170 val = (val & ~PCI_EXP_DEVCTL_READRQ) | (5 << 12);
171 if (pci_write_config_word(mdev->pdev, cap + PCI_EXP_DEVCTL, val)) {
172 mthca_err(mdev, "Couldn't write PCI Express device control "
173 "register, aborting.\n");
174 return -ENODEV; 157 return -ENODEV;
175 } 158 }
176 } else if (mdev->mthca_flags & MTHCA_FLAG_PCIE) 159 } else if (mdev->mthca_flags & MTHCA_FLAG_PCIE)
@@ -833,14 +816,19 @@ static int mthca_setup_hca(struct mthca_dev *dev)
833 816
834 err = mthca_NOP(dev, &status); 817 err = mthca_NOP(dev, &status);
835 if (err || status) { 818 if (err || status) {
836 mthca_err(dev, "NOP command failed to generate interrupt (IRQ %d), aborting.\n", 819 if (dev->mthca_flags & (MTHCA_FLAG_MSI | MTHCA_FLAG_MSI_X)) {
837 dev->mthca_flags & MTHCA_FLAG_MSI_X ? 820 mthca_warn(dev, "NOP command failed to generate interrupt "
838 dev->eq_table.eq[MTHCA_EQ_CMD].msi_x_vector : 821 "(IRQ %d).\n",
839 dev->pdev->irq); 822 dev->mthca_flags & MTHCA_FLAG_MSI_X ?
840 if (dev->mthca_flags & (MTHCA_FLAG_MSI | MTHCA_FLAG_MSI_X)) 823 dev->eq_table.eq[MTHCA_EQ_CMD].msi_x_vector :
841 mthca_err(dev, "Try again with MSI/MSI-X disabled.\n"); 824 dev->pdev->irq);
842 else 825 mthca_warn(dev, "Trying again with MSI/MSI-X disabled.\n");
826 } else {
827 mthca_err(dev, "NOP command failed to generate interrupt "
828 "(IRQ %d), aborting.\n",
829 dev->pdev->irq);
843 mthca_err(dev, "BIOS or ACPI interrupt routing problem?\n"); 830 mthca_err(dev, "BIOS or ACPI interrupt routing problem?\n");
831 }
844 832
845 goto err_cmd_poll; 833 goto err_cmd_poll;
846 } 834 }
@@ -1115,24 +1103,6 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
1115 goto err_free_dev; 1103 goto err_free_dev;
1116 } 1104 }
1117 1105
1118 if (msi_x && !mthca_enable_msi_x(mdev))
1119 mdev->mthca_flags |= MTHCA_FLAG_MSI_X;
1120 else if (msi) {
1121 static int warned;
1122
1123 if (!warned) {
1124 printk(KERN_WARNING PFX "WARNING: MSI support will be "
1125 "removed from the ib_mthca driver in January 2008.\n");
1126 printk(KERN_WARNING " If you are using MSI and cannot "
1127 "switch to MSI-X, please tell "
1128 "<general@lists.openfabrics.org>.\n");
1129 ++warned;
1130 }
1131
1132 if (!pci_enable_msi(pdev))
1133 mdev->mthca_flags |= MTHCA_FLAG_MSI;
1134 }
1135
1136 if (mthca_cmd_init(mdev)) { 1106 if (mthca_cmd_init(mdev)) {
1137 mthca_err(mdev, "Failed to init command interface, aborting.\n"); 1107 mthca_err(mdev, "Failed to init command interface, aborting.\n");
1138 goto err_free_dev; 1108 goto err_free_dev;
@@ -1156,7 +1126,35 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
1156 mthca_warn(mdev, "If you have problems, try updating your HCA FW.\n"); 1126 mthca_warn(mdev, "If you have problems, try updating your HCA FW.\n");
1157 } 1127 }
1158 1128
1129 if (msi_x && !mthca_enable_msi_x(mdev))
1130 mdev->mthca_flags |= MTHCA_FLAG_MSI_X;
1131 else if (msi) {
1132 static int warned;
1133
1134 if (!warned) {
1135 printk(KERN_WARNING PFX "WARNING: MSI support will be "
1136 "removed from the ib_mthca driver in January 2008.\n");
1137 printk(KERN_WARNING " If you are using MSI and cannot "
1138 "switch to MSI-X, please tell "
1139 "<general@lists.openfabrics.org>.\n");
1140 ++warned;
1141 }
1142
1143 if (!pci_enable_msi(pdev))
1144 mdev->mthca_flags |= MTHCA_FLAG_MSI;
1145 }
1146
1159 err = mthca_setup_hca(mdev); 1147 err = mthca_setup_hca(mdev);
1148 if (err == -EBUSY && (mdev->mthca_flags & (MTHCA_FLAG_MSI | MTHCA_FLAG_MSI_X))) {
1149 if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
1150 pci_disable_msix(pdev);
1151 if (mdev->mthca_flags & MTHCA_FLAG_MSI)
1152 pci_disable_msi(pdev);
1153 mdev->mthca_flags &= ~(MTHCA_FLAG_MSI_X | MTHCA_FLAG_MSI);
1154
1155 err = mthca_setup_hca(mdev);
1156 }
1157
1160 if (err) 1158 if (err)
1161 goto err_close; 1159 goto err_close;
1162 1160
@@ -1192,17 +1190,17 @@ err_cleanup:
1192 mthca_cleanup_uar_table(mdev); 1190 mthca_cleanup_uar_table(mdev);
1193 1191
1194err_close: 1192err_close:
1193 if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
1194 pci_disable_msix(pdev);
1195 if (mdev->mthca_flags & MTHCA_FLAG_MSI)
1196 pci_disable_msi(pdev);
1197
1195 mthca_close_hca(mdev); 1198 mthca_close_hca(mdev);
1196 1199
1197err_cmd: 1200err_cmd:
1198 mthca_cmd_cleanup(mdev); 1201 mthca_cmd_cleanup(mdev);
1199 1202
1200err_free_dev: 1203err_free_dev:
1201 if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
1202 pci_disable_msix(pdev);
1203 if (mdev->mthca_flags & MTHCA_FLAG_MSI)
1204 pci_disable_msi(pdev);
1205
1206 ib_dealloc_device(&mdev->ib_dev); 1204 ib_dealloc_device(&mdev->ib_dev);
1207 1205
1208err_free_res: 1206err_free_res:
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index 88d219e730ad..3f58c11a62b7 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -509,7 +509,7 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
509 for (nreq = 0; wr; wr = wr->next) { 509 for (nreq = 0; wr; wr = wr->next) {
510 ind = srq->first_free; 510 ind = srq->first_free;
511 511
512 if (ind < 0) { 512 if (unlikely(ind < 0)) {
513 mthca_err(dev, "SRQ %06x full\n", srq->srqn); 513 mthca_err(dev, "SRQ %06x full\n", srq->srqn);
514 err = -ENOMEM; 514 err = -ENOMEM;
515 *bad_wr = wr; 515 *bad_wr = wr;
@@ -519,7 +519,7 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
519 wqe = get_wqe(srq, ind); 519 wqe = get_wqe(srq, ind);
520 next_ind = *wqe_to_link(wqe); 520 next_ind = *wqe_to_link(wqe);
521 521
522 if (next_ind < 0) { 522 if (unlikely(next_ind < 0)) {
523 mthca_err(dev, "SRQ %06x full\n", srq->srqn); 523 mthca_err(dev, "SRQ %06x full\n", srq->srqn);
524 err = -ENOMEM; 524 err = -ENOMEM;
525 *bad_wr = wr; 525 *bad_wr = wr;
@@ -623,7 +623,7 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
623 for (nreq = 0; wr; ++nreq, wr = wr->next) { 623 for (nreq = 0; wr; ++nreq, wr = wr->next) {
624 ind = srq->first_free; 624 ind = srq->first_free;
625 625
626 if (ind < 0) { 626 if (unlikely(ind < 0)) {
627 mthca_err(dev, "SRQ %06x full\n", srq->srqn); 627 mthca_err(dev, "SRQ %06x full\n", srq->srqn);
628 err = -ENOMEM; 628 err = -ENOMEM;
629 *bad_wr = wr; 629 *bad_wr = wr;
@@ -633,7 +633,7 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
633 wqe = get_wqe(srq, ind); 633 wqe = get_wqe(srq, ind);
634 next_ind = *wqe_to_link(wqe); 634 next_ind = *wqe_to_link(wqe);
635 635
636 if (next_ind < 0) { 636 if (unlikely(next_ind < 0)) {
637 mthca_err(dev, "SRQ %06x full\n", srq->srqn); 637 mthca_err(dev, "SRQ %06x full\n", srq->srqn);
638 err = -ENOMEM; 638 err = -ENOMEM;
639 *bad_wr = wr; 639 *bad_wr = wr;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 34c6128d2a34..6545fa798b12 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -86,6 +86,7 @@ enum {
86 IPOIB_MCAST_STARTED = 8, 86 IPOIB_MCAST_STARTED = 8,
87 IPOIB_FLAG_NETIF_STOPPED = 9, 87 IPOIB_FLAG_NETIF_STOPPED = 9,
88 IPOIB_FLAG_ADMIN_CM = 10, 88 IPOIB_FLAG_ADMIN_CM = 10,
89 IPOIB_FLAG_UMCAST = 11,
89 90
90 IPOIB_MAX_BACKOFF_SECONDS = 16, 91 IPOIB_MAX_BACKOFF_SECONDS = 16,
91 92
@@ -113,7 +114,27 @@ struct ipoib_pseudoheader {
113 u8 hwaddr[INFINIBAND_ALEN]; 114 u8 hwaddr[INFINIBAND_ALEN];
114}; 115};
115 116
116struct ipoib_mcast; 117/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
118struct ipoib_mcast {
119 struct ib_sa_mcmember_rec mcmember;
120 struct ib_sa_multicast *mc;
121 struct ipoib_ah *ah;
122
123 struct rb_node rb_node;
124 struct list_head list;
125
126 unsigned long created;
127 unsigned long backoff;
128
129 unsigned long flags;
130 unsigned char logcount;
131
132 struct list_head neigh_list;
133
134 struct sk_buff_head pkt_queue;
135
136 struct net_device *dev;
137};
117 138
118struct ipoib_rx_buf { 139struct ipoib_rx_buf {
119 struct sk_buff *skb; 140 struct sk_buff *skb;
@@ -364,6 +385,7 @@ static inline void ipoib_put_ah(struct ipoib_ah *ah)
364 385
365int ipoib_open(struct net_device *dev); 386int ipoib_open(struct net_device *dev);
366int ipoib_add_pkey_attr(struct net_device *dev); 387int ipoib_add_pkey_attr(struct net_device *dev);
388int ipoib_add_umcast_attr(struct net_device *dev);
367 389
368void ipoib_send(struct net_device *dev, struct sk_buff *skb, 390void ipoib_send(struct net_device *dev, struct sk_buff *skb,
369 struct ipoib_ah *address, u32 qpn); 391 struct ipoib_ah *address, u32 qpn);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 1afd93cdd6bb..0a0dcb8fdfd1 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -810,14 +810,16 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
810static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ib_cq *cq) 810static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ib_cq *cq)
811{ 811{
812 struct ipoib_dev_priv *priv = netdev_priv(dev); 812 struct ipoib_dev_priv *priv = netdev_priv(dev);
813 struct ib_qp_init_attr attr = {}; 813 struct ib_qp_init_attr attr = {
814 attr.recv_cq = priv->cq; 814 .send_cq = cq,
815 attr.srq = priv->cm.srq; 815 .recv_cq = priv->cq,
816 attr.cap.max_send_wr = ipoib_sendq_size; 816 .srq = priv->cm.srq,
817 attr.cap.max_send_sge = 1; 817 .cap.max_send_wr = ipoib_sendq_size,
818 attr.sq_sig_type = IB_SIGNAL_ALL_WR; 818 .cap.max_send_sge = 1,
819 attr.qp_type = IB_QPT_RC; 819 .sq_sig_type = IB_SIGNAL_ALL_WR,
820 attr.send_cq = cq; 820 .qp_type = IB_QPT_RC,
821 };
822
821 return ib_create_qp(priv->pd, &attr); 823 return ib_create_qp(priv->pd, &attr);
822} 824}
823 825
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 0ec28c302fbf..1a77e79f6b43 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -553,6 +553,14 @@ void ipoib_drain_cq(struct net_device *dev)
553 do { 553 do {
554 n = ib_poll_cq(priv->cq, IPOIB_NUM_WC, priv->ibwc); 554 n = ib_poll_cq(priv->cq, IPOIB_NUM_WC, priv->ibwc);
555 for (i = 0; i < n; ++i) { 555 for (i = 0; i < n; ++i) {
556 /*
557 * Convert any successful completions to flush
558 * errors to avoid passing packets up the
559 * stack after bringing the device down.
560 */
561 if (priv->ibwc[i].status == IB_WC_SUCCESS)
562 priv->ibwc[i].status = IB_WC_WR_FLUSH_ERR;
563
556 if (priv->ibwc[i].wr_id & IPOIB_CM_OP_SRQ) 564 if (priv->ibwc[i].wr_id & IPOIB_CM_OP_SRQ)
557 ipoib_cm_handle_rx_wc(dev, priv->ibwc + i); 565 ipoib_cm_handle_rx_wc(dev, priv->ibwc + i);
558 else if (priv->ibwc[i].wr_id & IPOIB_OP_RECV) 566 else if (priv->ibwc[i].wr_id & IPOIB_OP_RECV)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 855c9deca8b7..e072f3c32ce6 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -473,9 +473,10 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
473 INIT_LIST_HEAD(&path->neigh_list); 473 INIT_LIST_HEAD(&path->neigh_list);
474 474
475 memcpy(path->pathrec.dgid.raw, gid, sizeof (union ib_gid)); 475 memcpy(path->pathrec.dgid.raw, gid, sizeof (union ib_gid));
476 path->pathrec.sgid = priv->local_gid; 476 path->pathrec.sgid = priv->local_gid;
477 path->pathrec.pkey = cpu_to_be16(priv->pkey); 477 path->pathrec.pkey = cpu_to_be16(priv->pkey);
478 path->pathrec.numb_path = 1; 478 path->pathrec.numb_path = 1;
479 path->pathrec.traffic_class = priv->broadcast->mcmember.traffic_class;
479 480
480 return path; 481 return path;
481} 482}
@@ -496,6 +497,7 @@ static int path_rec_start(struct net_device *dev,
496 IB_SA_PATH_REC_DGID | 497 IB_SA_PATH_REC_DGID |
497 IB_SA_PATH_REC_SGID | 498 IB_SA_PATH_REC_SGID |
498 IB_SA_PATH_REC_NUMB_PATH | 499 IB_SA_PATH_REC_NUMB_PATH |
500 IB_SA_PATH_REC_TRAFFIC_CLASS |
499 IB_SA_PATH_REC_PKEY, 501 IB_SA_PATH_REC_PKEY,
500 1000, GFP_ATOMIC, 502 1000, GFP_ATOMIC,
501 path_rec_completion, 503 path_rec_completion,
@@ -1015,6 +1017,37 @@ static ssize_t show_pkey(struct device *dev,
1015} 1017}
1016static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); 1018static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
1017 1019
1020static ssize_t show_umcast(struct device *dev,
1021 struct device_attribute *attr, char *buf)
1022{
1023 struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
1024
1025 return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags));
1026}
1027
1028static ssize_t set_umcast(struct device *dev,
1029 struct device_attribute *attr,
1030 const char *buf, size_t count)
1031{
1032 struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
1033 unsigned long umcast_val = simple_strtoul(buf, NULL, 0);
1034
1035 if (umcast_val > 0) {
1036 set_bit(IPOIB_FLAG_UMCAST, &priv->flags);
1037 ipoib_warn(priv, "ignoring multicast groups joined directly "
1038 "by userspace\n");
1039 } else
1040 clear_bit(IPOIB_FLAG_UMCAST, &priv->flags);
1041
1042 return count;
1043}
1044static DEVICE_ATTR(umcast, S_IWUSR | S_IRUGO, show_umcast, set_umcast);
1045
1046int ipoib_add_umcast_attr(struct net_device *dev)
1047{
1048 return device_create_file(&dev->dev, &dev_attr_umcast);
1049}
1050
1018static ssize_t create_child(struct device *dev, 1051static ssize_t create_child(struct device *dev,
1019 struct device_attribute *attr, 1052 struct device_attribute *attr,
1020 const char *buf, size_t count) 1053 const char *buf, size_t count)
@@ -1081,7 +1114,7 @@ static struct net_device *ipoib_add_port(const char *format,
1081 if (result) { 1114 if (result) {
1082 printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n", 1115 printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n",
1083 hca->name, port, result); 1116 hca->name, port, result);
1084 goto alloc_mem_failed; 1117 goto device_init_failed;
1085 } 1118 }
1086 1119
1087 /* 1120 /*
@@ -1097,7 +1130,7 @@ static struct net_device *ipoib_add_port(const char *format,
1097 if (result) { 1130 if (result) {
1098 printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n", 1131 printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n",
1099 hca->name, port, result); 1132 hca->name, port, result);
1100 goto alloc_mem_failed; 1133 goto device_init_failed;
1101 } else 1134 } else
1102 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); 1135 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
1103 1136
@@ -1132,6 +1165,8 @@ static struct net_device *ipoib_add_port(const char *format,
1132 goto sysfs_failed; 1165 goto sysfs_failed;
1133 if (ipoib_add_pkey_attr(priv->dev)) 1166 if (ipoib_add_pkey_attr(priv->dev))
1134 goto sysfs_failed; 1167 goto sysfs_failed;
1168 if (ipoib_add_umcast_attr(priv->dev))
1169 goto sysfs_failed;
1135 if (device_create_file(&priv->dev->dev, &dev_attr_create_child)) 1170 if (device_create_file(&priv->dev->dev, &dev_attr_create_child))
1136 goto sysfs_failed; 1171 goto sysfs_failed;
1137 if (device_create_file(&priv->dev->dev, &dev_attr_delete_child)) 1172 if (device_create_file(&priv->dev->dev, &dev_attr_delete_child))
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 98e904a7f3e8..827820ec66d1 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -57,28 +57,6 @@ MODULE_PARM_DESC(mcast_debug_level,
57 57
58static DEFINE_MUTEX(mcast_mutex); 58static DEFINE_MUTEX(mcast_mutex);
59 59
60/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
61struct ipoib_mcast {
62 struct ib_sa_mcmember_rec mcmember;
63 struct ib_sa_multicast *mc;
64 struct ipoib_ah *ah;
65
66 struct rb_node rb_node;
67 struct list_head list;
68
69 unsigned long created;
70 unsigned long backoff;
71
72 unsigned long flags;
73 unsigned char logcount;
74
75 struct list_head neigh_list;
76
77 struct sk_buff_head pkt_queue;
78
79 struct net_device *dev;
80};
81
82struct ipoib_mcast_iter { 60struct ipoib_mcast_iter {
83 struct net_device *dev; 61 struct net_device *dev;
84 union ib_gid mgid; 62 union ib_gid mgid;
@@ -783,6 +761,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
783 struct ipoib_mcast *mcast, *tmcast; 761 struct ipoib_mcast *mcast, *tmcast;
784 LIST_HEAD(remove_list); 762 LIST_HEAD(remove_list);
785 unsigned long flags; 763 unsigned long flags;
764 struct ib_sa_mcmember_rec rec;
786 765
787 ipoib_dbg_mcast(priv, "restarting multicast task\n"); 766 ipoib_dbg_mcast(priv, "restarting multicast task\n");
788 767
@@ -816,6 +795,14 @@ void ipoib_mcast_restart_task(struct work_struct *work)
816 if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 795 if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
817 struct ipoib_mcast *nmcast; 796 struct ipoib_mcast *nmcast;
818 797
798 /* ignore group which is directly joined by userspace */
799 if (test_bit(IPOIB_FLAG_UMCAST, &priv->flags) &&
800 !ib_sa_get_mcmember_rec(priv->ca, priv->port, &mgid, &rec)) {
801 ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid "
802 IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mgid));
803 continue;
804 }
805
819 /* Not found or send-only group, let's add a new entry */ 806 /* Not found or send-only group, let's add a new entry */
820 ipoib_dbg_mcast(priv, "adding multicast entry for mgid " 807 ipoib_dbg_mcast(priv, "adding multicast entry for mgid "
821 IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mgid)); 808 IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mgid));
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 563aeacf9e14..3c6e45db0ab5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -185,7 +185,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
185 goto out_free_cq; 185 goto out_free_cq;
186 186
187 init_attr.send_cq = priv->cq; 187 init_attr.send_cq = priv->cq;
188 init_attr.recv_cq = priv->cq, 188 init_attr.recv_cq = priv->cq;
189 189
190 priv->qp = ib_create_qp(priv->pd, &init_attr); 190 priv->qp = ib_create_qp(priv->pd, &init_attr);
191 if (IS_ERR(priv->qp)) { 191 if (IS_ERR(priv->qp)) {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 6762988439d1..293f5b892e3f 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -119,6 +119,8 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
119 goto sysfs_failed; 119 goto sysfs_failed;
120 if (ipoib_add_pkey_attr(priv->dev)) 120 if (ipoib_add_pkey_attr(priv->dev))
121 goto sysfs_failed; 121 goto sysfs_failed;
122 if (ipoib_add_umcast_attr(priv->dev))
123 goto sysfs_failed;
122 124
123 if (device_create_file(&priv->dev->dev, &dev_attr_parent)) 125 if (device_create_file(&priv->dev->dev, &dev_attr_parent))
124 goto sysfs_failed; 126 goto sysfs_failed;
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 9ea5b9aaba7c..a6f2303ed14a 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -34,8 +34,6 @@
34#include <linux/kernel.h> 34#include <linux/kernel.h>
35#include <linux/slab.h> 35#include <linux/slab.h>
36#include <linux/mm.h> 36#include <linux/mm.h>
37#include <asm/io.h>
38#include <asm/scatterlist.h>
39#include <linux/scatterlist.h> 37#include <linux/scatterlist.h>
40#include <linux/kfifo.h> 38#include <linux/kfifo.h>
41#include <scsi/scsi_cmnd.h> 39#include <scsi/scsi_cmnd.h>
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 36cdf77ae92a..e05690e3592f 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -36,8 +36,6 @@
36#include <linux/slab.h> 36#include <linux/slab.h>
37#include <linux/mm.h> 37#include <linux/mm.h>
38#include <linux/highmem.h> 38#include <linux/highmem.h>
39#include <asm/io.h>
40#include <asm/scatterlist.h>
41#include <linux/scatterlist.h> 39#include <linux/scatterlist.h>
42 40
43#include "iscsi_iser.h" 41#include "iscsi_iser.h"
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index d42ec0156eec..654a4dce0236 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -32,7 +32,6 @@
32 * 32 *
33 * $Id: iser_verbs.c 7051 2006-05-10 12:29:11Z ogerlitz $ 33 * $Id: iser_verbs.c 7051 2006-05-10 12:29:11Z ogerlitz $
34 */ 34 */
35#include <asm/io.h>
36#include <linux/kernel.h> 35#include <linux/kernel.h>
37#include <linux/module.h> 36#include <linux/module.h>
38#include <linux/delay.h> 37#include <linux/delay.h>
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index f6a051428144..9ccc63886d92 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -285,6 +285,7 @@ static int srp_lookup_path(struct srp_target_port *target)
285 target->srp_host->dev->dev, 285 target->srp_host->dev->dev,
286 target->srp_host->port, 286 target->srp_host->port,
287 &target->path, 287 &target->path,
288 IB_SA_PATH_REC_SERVICE_ID |
288 IB_SA_PATH_REC_DGID | 289 IB_SA_PATH_REC_DGID |
289 IB_SA_PATH_REC_SGID | 290 IB_SA_PATH_REC_SGID |
290 IB_SA_PATH_REC_NUMB_PATH | 291 IB_SA_PATH_REC_NUMB_PATH |
@@ -1692,6 +1693,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
1692 goto out; 1693 goto out;
1693 } 1694 }
1694 target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16)); 1695 target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
1696 target->path.service_id = target->service_id;
1695 kfree(p); 1697 kfree(p);
1696 break; 1698 break;
1697 1699
diff --git a/drivers/net/mlx4/cmd.c b/drivers/net/mlx4/cmd.c
index a9f31753661a..db49051b97b1 100644
--- a/drivers/net/mlx4/cmd.c
+++ b/drivers/net/mlx4/cmd.c
@@ -95,7 +95,7 @@ enum {
95}; 95};
96 96
97enum { 97enum {
98 GO_BIT_TIMEOUT = 10000 98 GO_BIT_TIMEOUT_MSECS = 10000
99}; 99};
100 100
101struct mlx4_cmd_context { 101struct mlx4_cmd_context {
@@ -155,7 +155,7 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param,
155 155
156 end = jiffies; 156 end = jiffies;
157 if (event) 157 if (event)
158 end += HZ * 10; 158 end += msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS);
159 159
160 while (cmd_pending(dev)) { 160 while (cmd_pending(dev)) {
161 if (time_after_eq(jiffies, end)) 161 if (time_after_eq(jiffies, end))
@@ -184,6 +184,13 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param,
184 (event ? (1 << HCR_E_BIT) : 0) | 184 (event ? (1 << HCR_E_BIT) : 0) |
185 (op_modifier << HCR_OPMOD_SHIFT) | 185 (op_modifier << HCR_OPMOD_SHIFT) |
186 op), hcr + 6); 186 op), hcr + 6);
187
188 /*
189 * Make sure that our HCR writes don't get mixed in with
190 * writes from another CPU starting a FW command.
191 */
192 mmiowb();
193
187 cmd->toggle = cmd->toggle ^ 1; 194 cmd->toggle = cmd->toggle ^ 1;
188 195
189 ret = 0; 196 ret = 0;
diff --git a/drivers/net/mlx4/cq.c b/drivers/net/mlx4/cq.c
index 39253d0c1590..d4441fee3d80 100644
--- a/drivers/net/mlx4/cq.c
+++ b/drivers/net/mlx4/cq.c
@@ -231,7 +231,7 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq)
231} 231}
232EXPORT_SYMBOL_GPL(mlx4_cq_free); 232EXPORT_SYMBOL_GPL(mlx4_cq_free);
233 233
234int __devinit mlx4_init_cq_table(struct mlx4_dev *dev) 234int mlx4_init_cq_table(struct mlx4_dev *dev)
235{ 235{
236 struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table; 236 struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
237 int err; 237 int err;
diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c
index 2095c843fa15..9c36c2034030 100644
--- a/drivers/net/mlx4/eq.c
+++ b/drivers/net/mlx4/eq.c
@@ -300,8 +300,7 @@ static int mlx4_HW2SW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
300 MLX4_CMD_TIME_CLASS_A); 300 MLX4_CMD_TIME_CLASS_A);
301} 301}
302 302
303static void __devinit __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, 303static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq)
304 struct mlx4_eq *eq)
305{ 304{
306 struct mlx4_priv *priv = mlx4_priv(dev); 305 struct mlx4_priv *priv = mlx4_priv(dev);
307 int index; 306 int index;
@@ -323,8 +322,8 @@ static void __devinit __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev,
323 return priv->eq_table.uar_map[index] + 0x800 + 8 * (eq->eqn % 4); 322 return priv->eq_table.uar_map[index] + 0x800 + 8 * (eq->eqn % 4);
324} 323}
325 324
326static int __devinit mlx4_create_eq(struct mlx4_dev *dev, int nent, 325static int mlx4_create_eq(struct mlx4_dev *dev, int nent,
327 u8 intr, struct mlx4_eq *eq) 326 u8 intr, struct mlx4_eq *eq)
328{ 327{
329 struct mlx4_priv *priv = mlx4_priv(dev); 328 struct mlx4_priv *priv = mlx4_priv(dev);
330 struct mlx4_cmd_mailbox *mailbox; 329 struct mlx4_cmd_mailbox *mailbox;
@@ -485,7 +484,7 @@ static void mlx4_free_irqs(struct mlx4_dev *dev)
485 free_irq(eq_table->eq[i].irq, eq_table->eq + i); 484 free_irq(eq_table->eq[i].irq, eq_table->eq + i);
486} 485}
487 486
488static int __devinit mlx4_map_clr_int(struct mlx4_dev *dev) 487static int mlx4_map_clr_int(struct mlx4_dev *dev)
489{ 488{
490 struct mlx4_priv *priv = mlx4_priv(dev); 489 struct mlx4_priv *priv = mlx4_priv(dev);
491 490
@@ -506,7 +505,7 @@ static void mlx4_unmap_clr_int(struct mlx4_dev *dev)
506 iounmap(priv->clr_base); 505 iounmap(priv->clr_base);
507} 506}
508 507
509int __devinit mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt) 508int mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt)
510{ 509{
511 struct mlx4_priv *priv = mlx4_priv(dev); 510 struct mlx4_priv *priv = mlx4_priv(dev);
512 int ret; 511 int ret;
@@ -548,7 +547,7 @@ void mlx4_unmap_eq_icm(struct mlx4_dev *dev)
548 __free_page(priv->eq_table.icm_page); 547 __free_page(priv->eq_table.icm_page);
549} 548}
550 549
551int __devinit mlx4_init_eq_table(struct mlx4_dev *dev) 550int mlx4_init_eq_table(struct mlx4_dev *dev)
552{ 551{
553 struct mlx4_priv *priv = mlx4_priv(dev); 552 struct mlx4_priv *priv = mlx4_priv(dev);
554 int err; 553 int err;
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index c45cbe43a0c4..6471d33afb7d 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -76,7 +76,7 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u32 flags)
76 [ 0] = "RC transport", 76 [ 0] = "RC transport",
77 [ 1] = "UC transport", 77 [ 1] = "UC transport",
78 [ 2] = "UD transport", 78 [ 2] = "UD transport",
79 [ 3] = "SRC transport", 79 [ 3] = "XRC transport",
80 [ 4] = "reliable multicast", 80 [ 4] = "reliable multicast",
81 [ 5] = "FCoIB support", 81 [ 5] = "FCoIB support",
82 [ 6] = "SRQ support", 82 [ 6] = "SRQ support",
diff --git a/drivers/net/mlx4/icm.c b/drivers/net/mlx4/icm.c
index b7a4aa8476fb..4b3c109d5eae 100644
--- a/drivers/net/mlx4/icm.c
+++ b/drivers/net/mlx4/icm.c
@@ -34,6 +34,7 @@
34#include <linux/init.h> 34#include <linux/init.h>
35#include <linux/errno.h> 35#include <linux/errno.h>
36#include <linux/mm.h> 36#include <linux/mm.h>
37#include <linux/scatterlist.h>
37 38
38#include <linux/mlx4/cmd.h> 39#include <linux/mlx4/cmd.h>
39 40
@@ -50,19 +51,41 @@ enum {
50 MLX4_TABLE_CHUNK_SIZE = 1 << 18 51 MLX4_TABLE_CHUNK_SIZE = 1 << 18
51}; 52};
52 53
53void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm) 54static void mlx4_free_icm_pages(struct mlx4_dev *dev, struct mlx4_icm_chunk *chunk)
54{ 55{
55 struct mlx4_icm_chunk *chunk, *tmp;
56 int i; 56 int i;
57 57
58 list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list) { 58 if (chunk->nsg > 0)
59 if (chunk->nsg > 0) 59 pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages,
60 pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages, 60 PCI_DMA_BIDIRECTIONAL);
61 PCI_DMA_BIDIRECTIONAL); 61
62 for (i = 0; i < chunk->npages; ++i)
63 __free_pages(chunk->mem[i].page,
64 get_order(chunk->mem[i].length));
65}
66
67static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk *chunk)
68{
69 int i;
70
71 for (i = 0; i < chunk->npages; ++i)
72 dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length,
73 lowmem_page_address(chunk->mem[i].page),
74 sg_dma_address(&chunk->mem[i]));
75}
76
77void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent)
78{
79 struct mlx4_icm_chunk *chunk, *tmp;
62 80
63 for (i = 0; i < chunk->npages; ++i) 81 if (!icm)
64 __free_pages(chunk->mem[i].page, 82 return;
65 get_order(chunk->mem[i].length)); 83
84 list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list) {
85 if (coherent)
86 mlx4_free_icm_coherent(dev, chunk);
87 else
88 mlx4_free_icm_pages(dev, chunk);
66 89
67 kfree(chunk); 90 kfree(chunk);
68 } 91 }
@@ -70,16 +93,45 @@ void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm)
70 kfree(icm); 93 kfree(icm);
71} 94}
72 95
96static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask)
97{
98 mem->page = alloc_pages(gfp_mask, order);
99 if (!mem->page)
100 return -ENOMEM;
101
102 mem->length = PAGE_SIZE << order;
103 mem->offset = 0;
104 return 0;
105}
106
107static int mlx4_alloc_icm_coherent(struct device *dev, struct scatterlist *mem,
108 int order, gfp_t gfp_mask)
109{
110 void *buf = dma_alloc_coherent(dev, PAGE_SIZE << order,
111 &sg_dma_address(mem), gfp_mask);
112 if (!buf)
113 return -ENOMEM;
114
115 sg_set_buf(mem, buf, PAGE_SIZE << order);
116 BUG_ON(mem->offset);
117 sg_dma_len(mem) = PAGE_SIZE << order;
118 return 0;
119}
120
73struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, 121struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
74 gfp_t gfp_mask) 122 gfp_t gfp_mask, int coherent)
75{ 123{
76 struct mlx4_icm *icm; 124 struct mlx4_icm *icm;
77 struct mlx4_icm_chunk *chunk = NULL; 125 struct mlx4_icm_chunk *chunk = NULL;
78 int cur_order; 126 int cur_order;
127 int ret;
128
129 /* We use sg_set_buf for coherent allocs, which assumes low memory */
130 BUG_ON(coherent && (gfp_mask & __GFP_HIGHMEM));
79 131
80 icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); 132 icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
81 if (!icm) 133 if (!icm)
82 return icm; 134 return NULL;
83 135
84 icm->refcount = 0; 136 icm->refcount = 0;
85 INIT_LIST_HEAD(&icm->chunk_list); 137 INIT_LIST_HEAD(&icm->chunk_list);
@@ -101,12 +153,20 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
101 while (1 << cur_order > npages) 153 while (1 << cur_order > npages)
102 --cur_order; 154 --cur_order;
103 155
104 chunk->mem[chunk->npages].page = alloc_pages(gfp_mask, cur_order); 156 if (coherent)
105 if (chunk->mem[chunk->npages].page) { 157 ret = mlx4_alloc_icm_coherent(&dev->pdev->dev,
106 chunk->mem[chunk->npages].length = PAGE_SIZE << cur_order; 158 &chunk->mem[chunk->npages],
107 chunk->mem[chunk->npages].offset = 0; 159 cur_order, gfp_mask);
160 else
161 ret = mlx4_alloc_icm_pages(&chunk->mem[chunk->npages],
162 cur_order, gfp_mask);
163
164 if (!ret) {
165 ++chunk->npages;
108 166
109 if (++chunk->npages == MLX4_ICM_CHUNK_LEN) { 167 if (coherent)
168 ++chunk->nsg;
169 else if (chunk->npages == MLX4_ICM_CHUNK_LEN) {
110 chunk->nsg = pci_map_sg(dev->pdev, chunk->mem, 170 chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
111 chunk->npages, 171 chunk->npages,
112 PCI_DMA_BIDIRECTIONAL); 172 PCI_DMA_BIDIRECTIONAL);
@@ -125,7 +185,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
125 } 185 }
126 } 186 }
127 187
128 if (chunk) { 188 if (!coherent && chunk) {
129 chunk->nsg = pci_map_sg(dev->pdev, chunk->mem, 189 chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
130 chunk->npages, 190 chunk->npages,
131 PCI_DMA_BIDIRECTIONAL); 191 PCI_DMA_BIDIRECTIONAL);
@@ -137,7 +197,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
137 return icm; 197 return icm;
138 198
139fail: 199fail:
140 mlx4_free_icm(dev, icm); 200 mlx4_free_icm(dev, icm, coherent);
141 return NULL; 201 return NULL;
142} 202}
143 203
@@ -202,7 +262,7 @@ int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj)
202 262
203 table->icm[i] = mlx4_alloc_icm(dev, MLX4_TABLE_CHUNK_SIZE >> PAGE_SHIFT, 263 table->icm[i] = mlx4_alloc_icm(dev, MLX4_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
204 (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) | 264 (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
205 __GFP_NOWARN); 265 __GFP_NOWARN, table->coherent);
206 if (!table->icm[i]) { 266 if (!table->icm[i]) {
207 ret = -ENOMEM; 267 ret = -ENOMEM;
208 goto out; 268 goto out;
@@ -210,7 +270,7 @@ int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj)
210 270
211 if (mlx4_MAP_ICM(dev, table->icm[i], table->virt + 271 if (mlx4_MAP_ICM(dev, table->icm[i], table->virt +
212 (u64) i * MLX4_TABLE_CHUNK_SIZE)) { 272 (u64) i * MLX4_TABLE_CHUNK_SIZE)) {
213 mlx4_free_icm(dev, table->icm[i]); 273 mlx4_free_icm(dev, table->icm[i], table->coherent);
214 table->icm[i] = NULL; 274 table->icm[i] = NULL;
215 ret = -ENOMEM; 275 ret = -ENOMEM;
216 goto out; 276 goto out;
@@ -234,16 +294,16 @@ void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj)
234 if (--table->icm[i]->refcount == 0) { 294 if (--table->icm[i]->refcount == 0) {
235 mlx4_UNMAP_ICM(dev, table->virt + i * MLX4_TABLE_CHUNK_SIZE, 295 mlx4_UNMAP_ICM(dev, table->virt + i * MLX4_TABLE_CHUNK_SIZE,
236 MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE); 296 MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE);
237 mlx4_free_icm(dev, table->icm[i]); 297 mlx4_free_icm(dev, table->icm[i], table->coherent);
238 table->icm[i] = NULL; 298 table->icm[i] = NULL;
239 } 299 }
240 300
241 mutex_unlock(&table->mutex); 301 mutex_unlock(&table->mutex);
242} 302}
243 303
244void *mlx4_table_find(struct mlx4_icm_table *table, int obj) 304void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_handle)
245{ 305{
246 int idx, offset, i; 306 int idx, offset, dma_offset, i;
247 struct mlx4_icm_chunk *chunk; 307 struct mlx4_icm_chunk *chunk;
248 struct mlx4_icm *icm; 308 struct mlx4_icm *icm;
249 struct page *page = NULL; 309 struct page *page = NULL;
@@ -253,15 +313,26 @@ void *mlx4_table_find(struct mlx4_icm_table *table, int obj)
253 313
254 mutex_lock(&table->mutex); 314 mutex_lock(&table->mutex);
255 315
256 idx = obj & (table->num_obj - 1); 316 idx = (obj & (table->num_obj - 1)) * table->obj_size;
257 icm = table->icm[idx / (MLX4_TABLE_CHUNK_SIZE / table->obj_size)]; 317 icm = table->icm[idx / MLX4_TABLE_CHUNK_SIZE];
258 offset = idx % (MLX4_TABLE_CHUNK_SIZE / table->obj_size); 318 dma_offset = offset = idx % MLX4_TABLE_CHUNK_SIZE;
259 319
260 if (!icm) 320 if (!icm)
261 goto out; 321 goto out;
262 322
263 list_for_each_entry(chunk, &icm->chunk_list, list) { 323 list_for_each_entry(chunk, &icm->chunk_list, list) {
264 for (i = 0; i < chunk->npages; ++i) { 324 for (i = 0; i < chunk->npages; ++i) {
325 if (dma_handle && dma_offset >= 0) {
326 if (sg_dma_len(&chunk->mem[i]) > dma_offset)
327 *dma_handle = sg_dma_address(&chunk->mem[i]) +
328 dma_offset;
329 dma_offset -= sg_dma_len(&chunk->mem[i]);
330 }
331 /*
332 * DMA mapping can merge pages but not split them,
333 * so if we found the page, dma_handle has already
334 * been assigned to.
335 */
265 if (chunk->mem[i].length > offset) { 336 if (chunk->mem[i].length > offset) {
266 page = chunk->mem[i].page; 337 page = chunk->mem[i].page;
267 goto out; 338 goto out;
@@ -309,7 +380,7 @@ void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
309 380
310int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table, 381int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
311 u64 virt, int obj_size, int nobj, int reserved, 382 u64 virt, int obj_size, int nobj, int reserved,
312 int use_lowmem) 383 int use_lowmem, int use_coherent)
313{ 384{
314 int obj_per_chunk; 385 int obj_per_chunk;
315 int num_icm; 386 int num_icm;
@@ -327,6 +398,7 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
327 table->num_obj = nobj; 398 table->num_obj = nobj;
328 table->obj_size = obj_size; 399 table->obj_size = obj_size;
329 table->lowmem = use_lowmem; 400 table->lowmem = use_lowmem;
401 table->coherent = use_coherent;
330 mutex_init(&table->mutex); 402 mutex_init(&table->mutex);
331 403
332 for (i = 0; i * MLX4_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) { 404 for (i = 0; i * MLX4_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) {
@@ -336,11 +408,11 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
336 408
337 table->icm[i] = mlx4_alloc_icm(dev, chunk_size >> PAGE_SHIFT, 409 table->icm[i] = mlx4_alloc_icm(dev, chunk_size >> PAGE_SHIFT,
338 (use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) | 410 (use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
339 __GFP_NOWARN); 411 __GFP_NOWARN, use_coherent);
340 if (!table->icm[i]) 412 if (!table->icm[i])
341 goto err; 413 goto err;
342 if (mlx4_MAP_ICM(dev, table->icm[i], virt + i * MLX4_TABLE_CHUNK_SIZE)) { 414 if (mlx4_MAP_ICM(dev, table->icm[i], virt + i * MLX4_TABLE_CHUNK_SIZE)) {
343 mlx4_free_icm(dev, table->icm[i]); 415 mlx4_free_icm(dev, table->icm[i], use_coherent);
344 table->icm[i] = NULL; 416 table->icm[i] = NULL;
345 goto err; 417 goto err;
346 } 418 }
@@ -359,7 +431,7 @@ err:
359 if (table->icm[i]) { 431 if (table->icm[i]) {
360 mlx4_UNMAP_ICM(dev, virt + i * MLX4_TABLE_CHUNK_SIZE, 432 mlx4_UNMAP_ICM(dev, virt + i * MLX4_TABLE_CHUNK_SIZE,
361 MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE); 433 MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE);
362 mlx4_free_icm(dev, table->icm[i]); 434 mlx4_free_icm(dev, table->icm[i], use_coherent);
363 } 435 }
364 436
365 return -ENOMEM; 437 return -ENOMEM;
@@ -373,7 +445,7 @@ void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table)
373 if (table->icm[i]) { 445 if (table->icm[i]) {
374 mlx4_UNMAP_ICM(dev, table->virt + i * MLX4_TABLE_CHUNK_SIZE, 446 mlx4_UNMAP_ICM(dev, table->virt + i * MLX4_TABLE_CHUNK_SIZE,
375 MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE); 447 MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE);
376 mlx4_free_icm(dev, table->icm[i]); 448 mlx4_free_icm(dev, table->icm[i], table->coherent);
377 } 449 }
378 450
379 kfree(table->icm); 451 kfree(table->icm);
diff --git a/drivers/net/mlx4/icm.h b/drivers/net/mlx4/icm.h
index bea223d879a5..6c44edf35847 100644
--- a/drivers/net/mlx4/icm.h
+++ b/drivers/net/mlx4/icm.h
@@ -67,8 +67,9 @@ struct mlx4_icm_iter {
67 67
68struct mlx4_dev; 68struct mlx4_dev;
69 69
70struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, gfp_t gfp_mask); 70struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
71void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm); 71 gfp_t gfp_mask, int coherent);
72void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent);
72 73
73int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj); 74int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
74void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj); 75void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
@@ -78,11 +79,11 @@ void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
78 int start, int end); 79 int start, int end);
79int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table, 80int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
80 u64 virt, int obj_size, int nobj, int reserved, 81 u64 virt, int obj_size, int nobj, int reserved,
81 int use_lowmem); 82 int use_lowmem, int use_coherent);
82void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table); 83void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table);
83int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj); 84int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
84void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj); 85void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
85void *mlx4_table_find(struct mlx4_icm_table *table, int obj); 86void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_handle);
86int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, 87int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
87 int start, int end); 88 int start, int end);
88void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, 89void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index 4dc9dc19b716..e029b8afbd37 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -61,7 +61,7 @@ MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
61 61
62#ifdef CONFIG_PCI_MSI 62#ifdef CONFIG_PCI_MSI
63 63
64static int msi_x; 64static int msi_x = 1;
65module_param(msi_x, int, 0444); 65module_param(msi_x, int, 0444);
66MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero"); 66MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
67 67
@@ -85,7 +85,7 @@ static struct mlx4_profile default_profile = {
85 .num_mtt = 1 << 20, 85 .num_mtt = 1 << 20,
86}; 86};
87 87
88static int __devinit mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) 88static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
89{ 89{
90 int err; 90 int err;
91 int i; 91 int i;
@@ -149,7 +149,8 @@ static int __devinit mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev
149 dev->caps.max_cqes = dev_cap->max_cq_sz - 1; 149 dev->caps.max_cqes = dev_cap->max_cq_sz - 1;
150 dev->caps.reserved_cqs = dev_cap->reserved_cqs; 150 dev->caps.reserved_cqs = dev_cap->reserved_cqs;
151 dev->caps.reserved_eqs = dev_cap->reserved_eqs; 151 dev->caps.reserved_eqs = dev_cap->reserved_eqs;
152 dev->caps.reserved_mtts = dev_cap->reserved_mtts; 152 dev->caps.reserved_mtts = DIV_ROUND_UP(dev_cap->reserved_mtts,
153 MLX4_MTT_ENTRY_PER_SEG);
153 dev->caps.reserved_mrws = dev_cap->reserved_mrws; 154 dev->caps.reserved_mrws = dev_cap->reserved_mrws;
154 dev->caps.reserved_uars = dev_cap->reserved_uars; 155 dev->caps.reserved_uars = dev_cap->reserved_uars;
155 dev->caps.reserved_pds = dev_cap->reserved_pds; 156 dev->caps.reserved_pds = dev_cap->reserved_pds;
@@ -168,7 +169,7 @@ static int __devinit mlx4_load_fw(struct mlx4_dev *dev)
168 int err; 169 int err;
169 170
170 priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages, 171 priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages,
171 GFP_HIGHUSER | __GFP_NOWARN); 172 GFP_HIGHUSER | __GFP_NOWARN, 0);
172 if (!priv->fw.fw_icm) { 173 if (!priv->fw.fw_icm) {
173 mlx4_err(dev, "Couldn't allocate FW area, aborting.\n"); 174 mlx4_err(dev, "Couldn't allocate FW area, aborting.\n");
174 return -ENOMEM; 175 return -ENOMEM;
@@ -192,7 +193,7 @@ err_unmap_fa:
192 mlx4_UNMAP_FA(dev); 193 mlx4_UNMAP_FA(dev);
193 194
194err_free: 195err_free:
195 mlx4_free_icm(dev, priv->fw.fw_icm); 196 mlx4_free_icm(dev, priv->fw.fw_icm, 0);
196 return err; 197 return err;
197} 198}
198 199
@@ -207,7 +208,7 @@ static int __devinit mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
207 ((u64) (MLX4_CMPT_TYPE_QP * 208 ((u64) (MLX4_CMPT_TYPE_QP *
208 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 209 cmpt_entry_sz) << MLX4_CMPT_SHIFT),
209 cmpt_entry_sz, dev->caps.num_qps, 210 cmpt_entry_sz, dev->caps.num_qps,
210 dev->caps.reserved_qps, 0); 211 dev->caps.reserved_qps, 0, 0);
211 if (err) 212 if (err)
212 goto err; 213 goto err;
213 214
@@ -216,7 +217,7 @@ static int __devinit mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
216 ((u64) (MLX4_CMPT_TYPE_SRQ * 217 ((u64) (MLX4_CMPT_TYPE_SRQ *
217 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 218 cmpt_entry_sz) << MLX4_CMPT_SHIFT),
218 cmpt_entry_sz, dev->caps.num_srqs, 219 cmpt_entry_sz, dev->caps.num_srqs,
219 dev->caps.reserved_srqs, 0); 220 dev->caps.reserved_srqs, 0, 0);
220 if (err) 221 if (err)
221 goto err_qp; 222 goto err_qp;
222 223
@@ -225,7 +226,7 @@ static int __devinit mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
225 ((u64) (MLX4_CMPT_TYPE_CQ * 226 ((u64) (MLX4_CMPT_TYPE_CQ *
226 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 227 cmpt_entry_sz) << MLX4_CMPT_SHIFT),
227 cmpt_entry_sz, dev->caps.num_cqs, 228 cmpt_entry_sz, dev->caps.num_cqs,
228 dev->caps.reserved_cqs, 0); 229 dev->caps.reserved_cqs, 0, 0);
229 if (err) 230 if (err)
230 goto err_srq; 231 goto err_srq;
231 232
@@ -236,7 +237,7 @@ static int __devinit mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
236 cmpt_entry_sz, 237 cmpt_entry_sz,
237 roundup_pow_of_two(MLX4_NUM_EQ + 238 roundup_pow_of_two(MLX4_NUM_EQ +
238 dev->caps.reserved_eqs), 239 dev->caps.reserved_eqs),
239 MLX4_NUM_EQ + dev->caps.reserved_eqs, 0); 240 MLX4_NUM_EQ + dev->caps.reserved_eqs, 0, 0);
240 if (err) 241 if (err)
241 goto err_cq; 242 goto err_cq;
242 243
@@ -255,10 +256,8 @@ err:
255 return err; 256 return err;
256} 257}
257 258
258static int __devinit mlx4_init_icm(struct mlx4_dev *dev, 259static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
259 struct mlx4_dev_cap *dev_cap, 260 struct mlx4_init_hca_param *init_hca, u64 icm_size)
260 struct mlx4_init_hca_param *init_hca,
261 u64 icm_size)
262{ 261{
263 struct mlx4_priv *priv = mlx4_priv(dev); 262 struct mlx4_priv *priv = mlx4_priv(dev);
264 u64 aux_pages; 263 u64 aux_pages;
@@ -275,7 +274,7 @@ static int __devinit mlx4_init_icm(struct mlx4_dev *dev,
275 (unsigned long long) aux_pages << 2); 274 (unsigned long long) aux_pages << 2);
276 275
277 priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages, 276 priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages,
278 GFP_HIGHUSER | __GFP_NOWARN); 277 GFP_HIGHUSER | __GFP_NOWARN, 0);
279 if (!priv->fw.aux_icm) { 278 if (!priv->fw.aux_icm) {
280 mlx4_err(dev, "Couldn't allocate aux memory, aborting.\n"); 279 mlx4_err(dev, "Couldn't allocate aux memory, aborting.\n");
281 return -ENOMEM; 280 return -ENOMEM;
@@ -299,11 +298,22 @@ static int __devinit mlx4_init_icm(struct mlx4_dev *dev,
299 goto err_unmap_cmpt; 298 goto err_unmap_cmpt;
300 } 299 }
301 300
301 /*
302 * Reserved MTT entries must be aligned up to a cacheline
303 * boundary, since the FW will write to them, while the driver
304 * writes to all other MTT entries. (The variable
305 * dev->caps.mtt_entry_sz below is really the MTT segment
306 * size, not the raw entry size)
307 */
308 dev->caps.reserved_mtts =
309 ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz,
310 dma_get_cache_alignment()) / dev->caps.mtt_entry_sz;
311
302 err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table, 312 err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
303 init_hca->mtt_base, 313 init_hca->mtt_base,
304 dev->caps.mtt_entry_sz, 314 dev->caps.mtt_entry_sz,
305 dev->caps.num_mtt_segs, 315 dev->caps.num_mtt_segs,
306 dev->caps.reserved_mtts, 1); 316 dev->caps.reserved_mtts, 1, 0);
307 if (err) { 317 if (err) {
308 mlx4_err(dev, "Failed to map MTT context memory, aborting.\n"); 318 mlx4_err(dev, "Failed to map MTT context memory, aborting.\n");
309 goto err_unmap_eq; 319 goto err_unmap_eq;
@@ -313,7 +323,7 @@ static int __devinit mlx4_init_icm(struct mlx4_dev *dev,
313 init_hca->dmpt_base, 323 init_hca->dmpt_base,
314 dev_cap->dmpt_entry_sz, 324 dev_cap->dmpt_entry_sz,
315 dev->caps.num_mpts, 325 dev->caps.num_mpts,
316 dev->caps.reserved_mrws, 1); 326 dev->caps.reserved_mrws, 1, 1);
317 if (err) { 327 if (err) {
318 mlx4_err(dev, "Failed to map dMPT context memory, aborting.\n"); 328 mlx4_err(dev, "Failed to map dMPT context memory, aborting.\n");
319 goto err_unmap_mtt; 329 goto err_unmap_mtt;
@@ -323,7 +333,7 @@ static int __devinit mlx4_init_icm(struct mlx4_dev *dev,
323 init_hca->qpc_base, 333 init_hca->qpc_base,
324 dev_cap->qpc_entry_sz, 334 dev_cap->qpc_entry_sz,
325 dev->caps.num_qps, 335 dev->caps.num_qps,
326 dev->caps.reserved_qps, 0); 336 dev->caps.reserved_qps, 0, 0);
327 if (err) { 337 if (err) {
328 mlx4_err(dev, "Failed to map QP context memory, aborting.\n"); 338 mlx4_err(dev, "Failed to map QP context memory, aborting.\n");
329 goto err_unmap_dmpt; 339 goto err_unmap_dmpt;
@@ -333,7 +343,7 @@ static int __devinit mlx4_init_icm(struct mlx4_dev *dev,
333 init_hca->auxc_base, 343 init_hca->auxc_base,
334 dev_cap->aux_entry_sz, 344 dev_cap->aux_entry_sz,
335 dev->caps.num_qps, 345 dev->caps.num_qps,
336 dev->caps.reserved_qps, 0); 346 dev->caps.reserved_qps, 0, 0);
337 if (err) { 347 if (err) {
338 mlx4_err(dev, "Failed to map AUXC context memory, aborting.\n"); 348 mlx4_err(dev, "Failed to map AUXC context memory, aborting.\n");
339 goto err_unmap_qp; 349 goto err_unmap_qp;
@@ -343,7 +353,7 @@ static int __devinit mlx4_init_icm(struct mlx4_dev *dev,
343 init_hca->altc_base, 353 init_hca->altc_base,
344 dev_cap->altc_entry_sz, 354 dev_cap->altc_entry_sz,
345 dev->caps.num_qps, 355 dev->caps.num_qps,
346 dev->caps.reserved_qps, 0); 356 dev->caps.reserved_qps, 0, 0);
347 if (err) { 357 if (err) {
348 mlx4_err(dev, "Failed to map ALTC context memory, aborting.\n"); 358 mlx4_err(dev, "Failed to map ALTC context memory, aborting.\n");
349 goto err_unmap_auxc; 359 goto err_unmap_auxc;
@@ -353,7 +363,7 @@ static int __devinit mlx4_init_icm(struct mlx4_dev *dev,
353 init_hca->rdmarc_base, 363 init_hca->rdmarc_base,
354 dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift, 364 dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift,
355 dev->caps.num_qps, 365 dev->caps.num_qps,
356 dev->caps.reserved_qps, 0); 366 dev->caps.reserved_qps, 0, 0);
357 if (err) { 367 if (err) {
358 mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n"); 368 mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n");
359 goto err_unmap_altc; 369 goto err_unmap_altc;
@@ -363,7 +373,7 @@ static int __devinit mlx4_init_icm(struct mlx4_dev *dev,
363 init_hca->cqc_base, 373 init_hca->cqc_base,
364 dev_cap->cqc_entry_sz, 374 dev_cap->cqc_entry_sz,
365 dev->caps.num_cqs, 375 dev->caps.num_cqs,
366 dev->caps.reserved_cqs, 0); 376 dev->caps.reserved_cqs, 0, 0);
367 if (err) { 377 if (err) {
368 mlx4_err(dev, "Failed to map CQ context memory, aborting.\n"); 378 mlx4_err(dev, "Failed to map CQ context memory, aborting.\n");
369 goto err_unmap_rdmarc; 379 goto err_unmap_rdmarc;
@@ -373,7 +383,7 @@ static int __devinit mlx4_init_icm(struct mlx4_dev *dev,
373 init_hca->srqc_base, 383 init_hca->srqc_base,
374 dev_cap->srq_entry_sz, 384 dev_cap->srq_entry_sz,
375 dev->caps.num_srqs, 385 dev->caps.num_srqs,
376 dev->caps.reserved_srqs, 0); 386 dev->caps.reserved_srqs, 0, 0);
377 if (err) { 387 if (err) {
378 mlx4_err(dev, "Failed to map SRQ context memory, aborting.\n"); 388 mlx4_err(dev, "Failed to map SRQ context memory, aborting.\n");
379 goto err_unmap_cq; 389 goto err_unmap_cq;
@@ -388,7 +398,7 @@ static int __devinit mlx4_init_icm(struct mlx4_dev *dev,
388 init_hca->mc_base, MLX4_MGM_ENTRY_SIZE, 398 init_hca->mc_base, MLX4_MGM_ENTRY_SIZE,
389 dev->caps.num_mgms + dev->caps.num_amgms, 399 dev->caps.num_mgms + dev->caps.num_amgms,
390 dev->caps.num_mgms + dev->caps.num_amgms, 400 dev->caps.num_mgms + dev->caps.num_amgms,
391 0); 401 0, 0);
392 if (err) { 402 if (err) {
393 mlx4_err(dev, "Failed to map MCG context memory, aborting.\n"); 403 mlx4_err(dev, "Failed to map MCG context memory, aborting.\n");
394 goto err_unmap_srq; 404 goto err_unmap_srq;
@@ -433,7 +443,7 @@ err_unmap_aux:
433 mlx4_UNMAP_ICM_AUX(dev); 443 mlx4_UNMAP_ICM_AUX(dev);
434 444
435err_free_aux: 445err_free_aux:
436 mlx4_free_icm(dev, priv->fw.aux_icm); 446 mlx4_free_icm(dev, priv->fw.aux_icm, 0);
437 447
438 return err; 448 return err;
439} 449}
@@ -458,7 +468,7 @@ static void mlx4_free_icms(struct mlx4_dev *dev)
458 mlx4_unmap_eq_icm(dev); 468 mlx4_unmap_eq_icm(dev);
459 469
460 mlx4_UNMAP_ICM_AUX(dev); 470 mlx4_UNMAP_ICM_AUX(dev);
461 mlx4_free_icm(dev, priv->fw.aux_icm); 471 mlx4_free_icm(dev, priv->fw.aux_icm, 0);
462} 472}
463 473
464static void mlx4_close_hca(struct mlx4_dev *dev) 474static void mlx4_close_hca(struct mlx4_dev *dev)
@@ -466,10 +476,10 @@ static void mlx4_close_hca(struct mlx4_dev *dev)
466 mlx4_CLOSE_HCA(dev, 0); 476 mlx4_CLOSE_HCA(dev, 0);
467 mlx4_free_icms(dev); 477 mlx4_free_icms(dev);
468 mlx4_UNMAP_FA(dev); 478 mlx4_UNMAP_FA(dev);
469 mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm); 479 mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
470} 480}
471 481
472static int __devinit mlx4_init_hca(struct mlx4_dev *dev) 482static int mlx4_init_hca(struct mlx4_dev *dev)
473{ 483{
474 struct mlx4_priv *priv = mlx4_priv(dev); 484 struct mlx4_priv *priv = mlx4_priv(dev);
475 struct mlx4_adapter adapter; 485 struct mlx4_adapter adapter;
@@ -524,8 +534,8 @@ static int __devinit mlx4_init_hca(struct mlx4_dev *dev)
524 } 534 }
525 535
526 priv->eq_table.inta_pin = adapter.inta_pin; 536 priv->eq_table.inta_pin = adapter.inta_pin;
527 priv->rev_id = adapter.revision_id; 537 dev->rev_id = adapter.revision_id;
528 memcpy(priv->board_id, adapter.board_id, sizeof priv->board_id); 538 memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id);
529 539
530 return 0; 540 return 0;
531 541
@@ -537,12 +547,12 @@ err_free_icm:
537 547
538err_stop_fw: 548err_stop_fw:
539 mlx4_UNMAP_FA(dev); 549 mlx4_UNMAP_FA(dev);
540 mlx4_free_icm(dev, priv->fw.fw_icm); 550 mlx4_free_icm(dev, priv->fw.fw_icm, 0);
541 551
542 return err; 552 return err;
543} 553}
544 554
545static int __devinit mlx4_setup_hca(struct mlx4_dev *dev) 555static int mlx4_setup_hca(struct mlx4_dev *dev)
546{ 556{
547 struct mlx4_priv *priv = mlx4_priv(dev); 557 struct mlx4_priv *priv = mlx4_priv(dev);
548 int err; 558 int err;
@@ -599,13 +609,17 @@ static int __devinit mlx4_setup_hca(struct mlx4_dev *dev)
599 609
600 err = mlx4_NOP(dev); 610 err = mlx4_NOP(dev);
601 if (err) { 611 if (err) {
602 mlx4_err(dev, "NOP command failed to generate interrupt " 612 if (dev->flags & MLX4_FLAG_MSI_X) {
603 "(IRQ %d), aborting.\n", 613 mlx4_warn(dev, "NOP command failed to generate MSI-X "
604 priv->eq_table.eq[MLX4_EQ_ASYNC].irq); 614 "interrupt IRQ %d).\n",
605 if (dev->flags & MLX4_FLAG_MSI_X) 615 priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
606 mlx4_err(dev, "Try again with MSI-X disabled.\n"); 616 mlx4_warn(dev, "Trying again without MSI-X.\n");
607 else 617 } else {
618 mlx4_err(dev, "NOP command failed to generate interrupt "
619 "(IRQ %d), aborting.\n",
620 priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
608 mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n"); 621 mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
622 }
609 623
610 goto err_cmd_poll; 624 goto err_cmd_poll;
611 } 625 }
@@ -705,19 +719,12 @@ no_msi:
705 priv->eq_table.eq[i].irq = dev->pdev->irq; 719 priv->eq_table.eq[i].irq = dev->pdev->irq;
706} 720}
707 721
708static int __devinit mlx4_init_one(struct pci_dev *pdev, 722static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
709 const struct pci_device_id *id)
710{ 723{
711 static int mlx4_version_printed;
712 struct mlx4_priv *priv; 724 struct mlx4_priv *priv;
713 struct mlx4_dev *dev; 725 struct mlx4_dev *dev;
714 int err; 726 int err;
715 727
716 if (!mlx4_version_printed) {
717 printk(KERN_INFO "%s", mlx4_version);
718 ++mlx4_version_printed;
719 }
720
721 printk(KERN_INFO PFX "Initializing %s\n", 728 printk(KERN_INFO PFX "Initializing %s\n",
722 pci_name(pdev)); 729 pci_name(pdev));
723 730
@@ -803,8 +810,6 @@ static int __devinit mlx4_init_one(struct pci_dev *pdev,
803 goto err_free_dev; 810 goto err_free_dev;
804 } 811 }
805 812
806 mlx4_enable_msi_x(dev);
807
808 if (mlx4_cmd_init(dev)) { 813 if (mlx4_cmd_init(dev)) {
809 mlx4_err(dev, "Failed to init command interface, aborting.\n"); 814 mlx4_err(dev, "Failed to init command interface, aborting.\n");
810 goto err_free_dev; 815 goto err_free_dev;
@@ -814,7 +819,15 @@ static int __devinit mlx4_init_one(struct pci_dev *pdev,
814 if (err) 819 if (err)
815 goto err_cmd; 820 goto err_cmd;
816 821
822 mlx4_enable_msi_x(dev);
823
817 err = mlx4_setup_hca(dev); 824 err = mlx4_setup_hca(dev);
825 if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X)) {
826 dev->flags &= ~MLX4_FLAG_MSI_X;
827 pci_disable_msix(pdev);
828 err = mlx4_setup_hca(dev);
829 }
830
818 if (err) 831 if (err)
819 goto err_close; 832 goto err_close;
820 833
@@ -838,15 +851,15 @@ err_cleanup:
838 mlx4_cleanup_uar_table(dev); 851 mlx4_cleanup_uar_table(dev);
839 852
840err_close: 853err_close:
854 if (dev->flags & MLX4_FLAG_MSI_X)
855 pci_disable_msix(pdev);
856
841 mlx4_close_hca(dev); 857 mlx4_close_hca(dev);
842 858
843err_cmd: 859err_cmd:
844 mlx4_cmd_cleanup(dev); 860 mlx4_cmd_cleanup(dev);
845 861
846err_free_dev: 862err_free_dev:
847 if (dev->flags & MLX4_FLAG_MSI_X)
848 pci_disable_msix(pdev);
849
850 kfree(priv); 863 kfree(priv);
851 864
852err_release_bar2: 865err_release_bar2:
@@ -861,7 +874,20 @@ err_disable_pdev:
861 return err; 874 return err;
862} 875}
863 876
864static void __devexit mlx4_remove_one(struct pci_dev *pdev) 877static int __devinit mlx4_init_one(struct pci_dev *pdev,
878 const struct pci_device_id *id)
879{
880 static int mlx4_version_printed;
881
882 if (!mlx4_version_printed) {
883 printk(KERN_INFO "%s", mlx4_version);
884 ++mlx4_version_printed;
885 }
886
887 return mlx4_init_one(pdev, id);
888}
889
890static void mlx4_remove_one(struct pci_dev *pdev)
865{ 891{
866 struct mlx4_dev *dev = pci_get_drvdata(pdev); 892 struct mlx4_dev *dev = pci_get_drvdata(pdev);
867 struct mlx4_priv *priv = mlx4_priv(dev); 893 struct mlx4_priv *priv = mlx4_priv(dev);
@@ -902,7 +928,7 @@ static void __devexit mlx4_remove_one(struct pci_dev *pdev)
902int mlx4_restart_one(struct pci_dev *pdev) 928int mlx4_restart_one(struct pci_dev *pdev)
903{ 929{
904 mlx4_remove_one(pdev); 930 mlx4_remove_one(pdev);
905 return mlx4_init_one(pdev, NULL); 931 return __mlx4_init_one(pdev, NULL);
906} 932}
907 933
908static struct pci_device_id mlx4_pci_table[] = { 934static struct pci_device_id mlx4_pci_table[] = {
diff --git a/drivers/net/mlx4/mcg.c b/drivers/net/mlx4/mcg.c
index 672024a0ee71..a99e7729d333 100644
--- a/drivers/net/mlx4/mcg.c
+++ b/drivers/net/mlx4/mcg.c
@@ -359,7 +359,7 @@ out:
359} 359}
360EXPORT_SYMBOL_GPL(mlx4_multicast_detach); 360EXPORT_SYMBOL_GPL(mlx4_multicast_detach);
361 361
362int __devinit mlx4_init_mcg_table(struct mlx4_dev *dev) 362int mlx4_init_mcg_table(struct mlx4_dev *dev)
363{ 363{
364 struct mlx4_priv *priv = mlx4_priv(dev); 364 struct mlx4_priv *priv = mlx4_priv(dev);
365 int err; 365 int err;
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index be304a7c2c91..53a1cdddfc13 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -56,11 +56,7 @@ enum {
56}; 56};
57 57
58enum { 58enum {
59 MLX4_BOARD_ID_LEN = 64 59 MLX4_MGM_ENTRY_SIZE = 0x100,
60};
61
62enum {
63 MLX4_MGM_ENTRY_SIZE = 0x40,
64 MLX4_QP_PER_MGM = 4 * (MLX4_MGM_ENTRY_SIZE / 16 - 2), 60 MLX4_QP_PER_MGM = 4 * (MLX4_MGM_ENTRY_SIZE / 16 - 2),
65 MLX4_MTT_ENTRY_PER_SEG = 8 61 MLX4_MTT_ENTRY_PER_SEG = 8
66}; 62};
@@ -133,6 +129,7 @@ struct mlx4_icm_table {
133 int num_obj; 129 int num_obj;
134 int obj_size; 130 int obj_size;
135 int lowmem; 131 int lowmem;
132 int coherent;
136 struct mutex mutex; 133 struct mutex mutex;
137 struct mlx4_icm **icm; 134 struct mlx4_icm **icm;
138}; 135};
@@ -277,9 +274,6 @@ struct mlx4_priv {
277 274
278 struct mlx4_uar driver_uar; 275 struct mlx4_uar driver_uar;
279 void __iomem *kar; 276 void __iomem *kar;
280
281 u32 rev_id;
282 char board_id[MLX4_BOARD_ID_LEN];
283}; 277};
284 278
285static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev) 279static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev)
diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c
index 5b87183e62ce..0c05a10bae3b 100644
--- a/drivers/net/mlx4/mr.c
+++ b/drivers/net/mlx4/mr.c
@@ -68,6 +68,9 @@ struct mlx4_mpt_entry {
68 68
69#define MLX4_MTT_FLAG_PRESENT 1 69#define MLX4_MTT_FLAG_PRESENT 1
70 70
71#define MLX4_MPT_STATUS_SW 0xF0
72#define MLX4_MPT_STATUS_HW 0x00
73
71static u32 mlx4_buddy_alloc(struct mlx4_buddy *buddy, int order) 74static u32 mlx4_buddy_alloc(struct mlx4_buddy *buddy, int order)
72{ 75{
73 int o; 76 int o;
@@ -349,58 +352,57 @@ err_table:
349} 352}
350EXPORT_SYMBOL_GPL(mlx4_mr_enable); 353EXPORT_SYMBOL_GPL(mlx4_mr_enable);
351 354
352static int mlx4_WRITE_MTT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, 355static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
353 int num_mtt) 356 int start_index, int npages, u64 *page_list)
354{ 357{
355 return mlx4_cmd(dev, mailbox->dma, num_mtt, 0, MLX4_CMD_WRITE_MTT, 358 struct mlx4_priv *priv = mlx4_priv(dev);
356 MLX4_CMD_TIME_CLASS_B); 359 __be64 *mtts;
360 dma_addr_t dma_handle;
361 int i;
362 int s = start_index * sizeof (u64);
363
364 /* All MTTs must fit in the same page */
365 if (start_index / (PAGE_SIZE / sizeof (u64)) !=
366 (start_index + npages - 1) / (PAGE_SIZE / sizeof (u64)))
367 return -EINVAL;
368
369 if (start_index & (MLX4_MTT_ENTRY_PER_SEG - 1))
370 return -EINVAL;
371
372 mtts = mlx4_table_find(&priv->mr_table.mtt_table, mtt->first_seg +
373 s / dev->caps.mtt_entry_sz, &dma_handle);
374 if (!mtts)
375 return -ENOMEM;
376
377 for (i = 0; i < npages; ++i)
378 mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT);
379
380 dma_sync_single(&dev->pdev->dev, dma_handle, npages * sizeof (u64), DMA_TO_DEVICE);
381
382 return 0;
357} 383}
358 384
359int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, 385int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
360 int start_index, int npages, u64 *page_list) 386 int start_index, int npages, u64 *page_list)
361{ 387{
362 struct mlx4_cmd_mailbox *mailbox; 388 int chunk;
363 __be64 *mtt_entry; 389 int err;
364 int i;
365 int err = 0;
366 390
367 if (mtt->order < 0) 391 if (mtt->order < 0)
368 return -EINVAL; 392 return -EINVAL;
369 393
370 mailbox = mlx4_alloc_cmd_mailbox(dev);
371 if (IS_ERR(mailbox))
372 return PTR_ERR(mailbox);
373
374 mtt_entry = mailbox->buf;
375
376 while (npages > 0) { 394 while (npages > 0) {
377 mtt_entry[0] = cpu_to_be64(mlx4_mtt_addr(dev, mtt) + start_index * 8); 395 chunk = min_t(int, PAGE_SIZE / sizeof(u64), npages);
378 mtt_entry[1] = 0; 396 err = mlx4_write_mtt_chunk(dev, mtt, start_index, chunk, page_list);
379
380 for (i = 0; i < npages && i < MLX4_MAILBOX_SIZE / 8 - 2; ++i)
381 mtt_entry[i + 2] = cpu_to_be64(page_list[i] |
382 MLX4_MTT_FLAG_PRESENT);
383
384 /*
385 * If we have an odd number of entries to write, add
386 * one more dummy entry for firmware efficiency.
387 */
388 if (i & 1)
389 mtt_entry[i + 2] = 0;
390
391 err = mlx4_WRITE_MTT(dev, mailbox, (i + 1) & ~1);
392 if (err) 397 if (err)
393 goto out; 398 return err;
394 399
395 npages -= i; 400 npages -= chunk;
396 start_index += i; 401 start_index += chunk;
397 page_list += i; 402 page_list += chunk;
398 } 403 }
399 404
400out: 405 return 0;
401 mlx4_free_cmd_mailbox(dev, mailbox);
402
403 return err;
404} 406}
405EXPORT_SYMBOL_GPL(mlx4_write_mtt); 407EXPORT_SYMBOL_GPL(mlx4_write_mtt);
406 408
@@ -428,7 +430,7 @@ int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
428} 430}
429EXPORT_SYMBOL_GPL(mlx4_buf_write_mtt); 431EXPORT_SYMBOL_GPL(mlx4_buf_write_mtt);
430 432
431int __devinit mlx4_init_mr_table(struct mlx4_dev *dev) 433int mlx4_init_mr_table(struct mlx4_dev *dev)
432{ 434{
433 struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table; 435 struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
434 int err; 436 int err;
@@ -444,7 +446,7 @@ int __devinit mlx4_init_mr_table(struct mlx4_dev *dev)
444 goto err_buddy; 446 goto err_buddy;
445 447
446 if (dev->caps.reserved_mtts) { 448 if (dev->caps.reserved_mtts) {
447 if (mlx4_alloc_mtt_range(dev, ilog2(dev->caps.reserved_mtts)) == -1) { 449 if (mlx4_alloc_mtt_range(dev, fls(dev->caps.reserved_mtts - 1)) == -1) {
448 mlx4_warn(dev, "MTT table of order %d is too small.\n", 450 mlx4_warn(dev, "MTT table of order %d is too small.\n",
449 mr_table->mtt_buddy.max_order); 451 mr_table->mtt_buddy.max_order);
450 err = -ENOMEM; 452 err = -ENOMEM;
@@ -470,3 +472,165 @@ void mlx4_cleanup_mr_table(struct mlx4_dev *dev)
470 mlx4_buddy_cleanup(&mr_table->mtt_buddy); 472 mlx4_buddy_cleanup(&mr_table->mtt_buddy);
471 mlx4_bitmap_cleanup(&mr_table->mpt_bitmap); 473 mlx4_bitmap_cleanup(&mr_table->mpt_bitmap);
472} 474}
475
476static inline int mlx4_check_fmr(struct mlx4_fmr *fmr, u64 *page_list,
477 int npages, u64 iova)
478{
479 int i, page_mask;
480
481 if (npages > fmr->max_pages)
482 return -EINVAL;
483
484 page_mask = (1 << fmr->page_shift) - 1;
485
486 /* We are getting page lists, so va must be page aligned. */
487 if (iova & page_mask)
488 return -EINVAL;
489
490 /* Trust the user not to pass misaligned data in page_list */
491 if (0)
492 for (i = 0; i < npages; ++i) {
493 if (page_list[i] & ~page_mask)
494 return -EINVAL;
495 }
496
497 if (fmr->maps >= fmr->max_maps)
498 return -EINVAL;
499
500 return 0;
501}
502
503int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list,
504 int npages, u64 iova, u32 *lkey, u32 *rkey)
505{
506 u32 key;
507 int i, err;
508
509 err = mlx4_check_fmr(fmr, page_list, npages, iova);
510 if (err)
511 return err;
512
513 ++fmr->maps;
514
515 key = key_to_hw_index(fmr->mr.key);
516 key += dev->caps.num_mpts;
517 *lkey = *rkey = fmr->mr.key = hw_index_to_key(key);
518
519 *(u8 *) fmr->mpt = MLX4_MPT_STATUS_SW;
520
521 /* Make sure MPT status is visible before writing MTT entries */
522 wmb();
523
524 for (i = 0; i < npages; ++i)
525 fmr->mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT);
526
527 dma_sync_single(&dev->pdev->dev, fmr->dma_handle,
528 npages * sizeof(u64), DMA_TO_DEVICE);
529
530 fmr->mpt->key = cpu_to_be32(key);
531 fmr->mpt->lkey = cpu_to_be32(key);
532 fmr->mpt->length = cpu_to_be64(npages * (1ull << fmr->page_shift));
533 fmr->mpt->start = cpu_to_be64(iova);
534
535 /* Make MTT entries are visible before setting MPT status */
536 wmb();
537
538 *(u8 *) fmr->mpt = MLX4_MPT_STATUS_HW;
539
540 /* Make sure MPT status is visible before consumer can use FMR */
541 wmb();
542
543 return 0;
544}
545EXPORT_SYMBOL_GPL(mlx4_map_phys_fmr);
546
547int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
548 int max_maps, u8 page_shift, struct mlx4_fmr *fmr)
549{
550 struct mlx4_priv *priv = mlx4_priv(dev);
551 u64 mtt_seg;
552 int err = -ENOMEM;
553
554 if (page_shift < 12 || page_shift >= 32)
555 return -EINVAL;
556
557 /* All MTTs must fit in the same page */
558 if (max_pages * sizeof *fmr->mtts > PAGE_SIZE)
559 return -EINVAL;
560
561 fmr->page_shift = page_shift;
562 fmr->max_pages = max_pages;
563 fmr->max_maps = max_maps;
564 fmr->maps = 0;
565
566 err = mlx4_mr_alloc(dev, pd, 0, 0, access, max_pages,
567 page_shift, &fmr->mr);
568 if (err)
569 return err;
570
571 mtt_seg = fmr->mr.mtt.first_seg * dev->caps.mtt_entry_sz;
572
573 fmr->mtts = mlx4_table_find(&priv->mr_table.mtt_table,
574 fmr->mr.mtt.first_seg,
575 &fmr->dma_handle);
576 if (!fmr->mtts) {
577 err = -ENOMEM;
578 goto err_free;
579 }
580
581 fmr->mpt = mlx4_table_find(&priv->mr_table.dmpt_table,
582 key_to_hw_index(fmr->mr.key), NULL);
583 if (!fmr->mpt) {
584 err = -ENOMEM;
585 goto err_free;
586 }
587
588 return 0;
589
590err_free:
591 mlx4_mr_free(dev, &fmr->mr);
592 return err;
593}
594EXPORT_SYMBOL_GPL(mlx4_fmr_alloc);
595
596int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr)
597{
598 return mlx4_mr_enable(dev, &fmr->mr);
599}
600EXPORT_SYMBOL_GPL(mlx4_fmr_enable);
601
602void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
603 u32 *lkey, u32 *rkey)
604{
605 u32 key;
606
607 if (!fmr->maps)
608 return;
609
610 key = key_to_hw_index(fmr->mr.key);
611 key &= dev->caps.num_mpts - 1;
612 *lkey = *rkey = fmr->mr.key = hw_index_to_key(key);
613
614 fmr->maps = 0;
615
616 *(u8 *) fmr->mpt = MLX4_MPT_STATUS_SW;
617}
618EXPORT_SYMBOL_GPL(mlx4_fmr_unmap);
619
620int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr)
621{
622 if (fmr->maps)
623 return -EBUSY;
624
625 fmr->mr.enabled = 0;
626 mlx4_mr_free(dev, &fmr->mr);
627
628 return 0;
629}
630EXPORT_SYMBOL_GPL(mlx4_fmr_free);
631
632int mlx4_SYNC_TPT(struct mlx4_dev *dev)
633{
634 return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_SYNC_TPT, 1000);
635}
636EXPORT_SYMBOL_GPL(mlx4_SYNC_TPT);
diff --git a/drivers/net/mlx4/pd.c b/drivers/net/mlx4/pd.c
index 23dea1ee7750..3a93c5f0f7ab 100644
--- a/drivers/net/mlx4/pd.c
+++ b/drivers/net/mlx4/pd.c
@@ -57,7 +57,7 @@ void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn)
57} 57}
58EXPORT_SYMBOL_GPL(mlx4_pd_free); 58EXPORT_SYMBOL_GPL(mlx4_pd_free);
59 59
60int __devinit mlx4_init_pd_table(struct mlx4_dev *dev) 60int mlx4_init_pd_table(struct mlx4_dev *dev)
61{ 61{
62 struct mlx4_priv *priv = mlx4_priv(dev); 62 struct mlx4_priv *priv = mlx4_priv(dev);
63 63
diff --git a/drivers/net/mlx4/qp.c b/drivers/net/mlx4/qp.c
index 19b48c71cf7f..cc4b1be18219 100644
--- a/drivers/net/mlx4/qp.c
+++ b/drivers/net/mlx4/qp.c
@@ -240,7 +240,8 @@ void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp)
240 mlx4_table_put(dev, &qp_table->auxc_table, qp->qpn); 240 mlx4_table_put(dev, &qp_table->auxc_table, qp->qpn);
241 mlx4_table_put(dev, &qp_table->qp_table, qp->qpn); 241 mlx4_table_put(dev, &qp_table->qp_table, qp->qpn);
242 242
243 mlx4_bitmap_free(&qp_table->bitmap, qp->qpn); 243 if (qp->qpn < dev->caps.sqp_start + 8)
244 mlx4_bitmap_free(&qp_table->bitmap, qp->qpn);
244} 245}
245EXPORT_SYMBOL_GPL(mlx4_qp_free); 246EXPORT_SYMBOL_GPL(mlx4_qp_free);
246 247
@@ -250,7 +251,7 @@ static int mlx4_CONF_SPECIAL_QP(struct mlx4_dev *dev, u32 base_qpn)
250 MLX4_CMD_TIME_CLASS_B); 251 MLX4_CMD_TIME_CLASS_B);
251} 252}
252 253
253int __devinit mlx4_init_qp_table(struct mlx4_dev *dev) 254int mlx4_init_qp_table(struct mlx4_dev *dev)
254{ 255{
255 struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; 256 struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
256 int err; 257 int err;
diff --git a/drivers/net/mlx4/srq.c b/drivers/net/mlx4/srq.c
index b061c86d6839..d23f46d692ef 100644
--- a/drivers/net/mlx4/srq.c
+++ b/drivers/net/mlx4/srq.c
@@ -227,7 +227,7 @@ int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_waterm
227 err = mlx4_QUERY_SRQ(dev, mailbox, srq->srqn); 227 err = mlx4_QUERY_SRQ(dev, mailbox, srq->srqn);
228 if (err) 228 if (err)
229 goto err_out; 229 goto err_out;
230 *limit_watermark = srq_context->limit_watermark; 230 *limit_watermark = be16_to_cpu(srq_context->limit_watermark);
231 231
232err_out: 232err_out:
233 mlx4_free_cmd_mailbox(dev, mailbox); 233 mlx4_free_cmd_mailbox(dev, mailbox);
@@ -235,7 +235,7 @@ err_out:
235} 235}
236EXPORT_SYMBOL_GPL(mlx4_srq_query); 236EXPORT_SYMBOL_GPL(mlx4_srq_query);
237 237
238int __devinit mlx4_init_srq_table(struct mlx4_dev *dev) 238int mlx4_init_srq_table(struct mlx4_dev *dev)
239{ 239{
240 struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; 240 struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
241 int err; 241 int err;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index cfb78fb2c046..222815d91c40 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -49,6 +49,10 @@ enum {
49}; 49};
50 50
51enum { 51enum {
52 MLX4_BOARD_ID_LEN = 64
53};
54
55enum {
52 MLX4_DEV_CAP_FLAG_RC = 1 << 0, 56 MLX4_DEV_CAP_FLAG_RC = 1 << 0,
53 MLX4_DEV_CAP_FLAG_UC = 1 << 1, 57 MLX4_DEV_CAP_FLAG_UC = 1 << 1,
54 MLX4_DEV_CAP_FLAG_UD = 1 << 2, 58 MLX4_DEV_CAP_FLAG_UD = 1 << 2,
@@ -210,6 +214,17 @@ struct mlx4_mr {
210 int enabled; 214 int enabled;
211}; 215};
212 216
217struct mlx4_fmr {
218 struct mlx4_mr mr;
219 struct mlx4_mpt_entry *mpt;
220 __be64 *mtts;
221 dma_addr_t dma_handle;
222 int max_pages;
223 int max_maps;
224 int maps;
225 u8 page_shift;
226};
227
213struct mlx4_uar { 228struct mlx4_uar {
214 unsigned long pfn; 229 unsigned long pfn;
215 int index; 230 int index;
@@ -272,6 +287,8 @@ struct mlx4_dev {
272 unsigned long flags; 287 unsigned long flags;
273 struct mlx4_caps caps; 288 struct mlx4_caps caps;
274 struct radix_tree_root qp_table_tree; 289 struct radix_tree_root qp_table_tree;
290 u32 rev_id;
291 char board_id[MLX4_BOARD_ID_LEN];
275}; 292};
276 293
277struct mlx4_init_port_param { 294struct mlx4_init_port_param {
@@ -331,4 +348,14 @@ int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port);
331int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]); 348int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]);
332int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]); 349int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]);
333 350
351int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list,
352 int npages, u64 iova, u32 *lkey, u32 *rkey);
353int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
354 int max_maps, u8 page_shift, struct mlx4_fmr *fmr);
355int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
356void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
357 u32 *lkey, u32 *rkey);
358int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
359int mlx4_SYNC_TPT(struct mlx4_dev *dev);
360
334#endif /* MLX4_DEVICE_H */ 361#endif /* MLX4_DEVICE_H */
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index 12243e80c706..a627c8682d2f 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -477,12 +477,15 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id,
477 const void *private_data, 477 const void *private_data,
478 u8 private_data_len); 478 u8 private_data_len);
479 479
480#define IB_CM_MRA_FLAG_DELAY 0x80 /* Send MRA only after a duplicate msg */
481
480/** 482/**
481 * ib_send_cm_mra - Sends a message receipt acknowledgement to a connection 483 * ib_send_cm_mra - Sends a message receipt acknowledgement to a connection
482 * message. 484 * message.
483 * @cm_id: Connection identifier associated with the connection message. 485 * @cm_id: Connection identifier associated with the connection message.
484 * @service_timeout: The maximum time required for the sender to reply to 486 * @service_timeout: The lower 5-bits specify the maximum time required for
485 * to the connection message. 487 * the sender to reply to to the connection message. The upper 3-bits
488 * specify additional control flags.
486 * @private_data: Optional user-defined private data sent with the 489 * @private_data: Optional user-defined private data sent with the
487 * message receipt acknowledgement. 490 * message receipt acknowledgement.
488 * @private_data_len: Size of the private data buffer, in bytes. 491 * @private_data_len: Size of the private data buffer, in bytes.
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index 5e26b2f53f86..942692b0b92e 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -109,8 +109,8 @@ enum ib_sa_selector {
109 * Reserved rows are indicated with comments to help maintainability. 109 * Reserved rows are indicated with comments to help maintainability.
110 */ 110 */
111 111
112/* reserved: 0 */ 112#define IB_SA_PATH_REC_SERVICE_ID (IB_SA_COMP_MASK( 0) |\
113/* reserved: 1 */ 113 IB_SA_COMP_MASK( 1))
114#define IB_SA_PATH_REC_DGID IB_SA_COMP_MASK( 2) 114#define IB_SA_PATH_REC_DGID IB_SA_COMP_MASK( 2)
115#define IB_SA_PATH_REC_SGID IB_SA_COMP_MASK( 3) 115#define IB_SA_PATH_REC_SGID IB_SA_COMP_MASK( 3)
116#define IB_SA_PATH_REC_DLID IB_SA_COMP_MASK( 4) 116#define IB_SA_PATH_REC_DLID IB_SA_COMP_MASK( 4)
@@ -123,7 +123,7 @@ enum ib_sa_selector {
123#define IB_SA_PATH_REC_REVERSIBLE IB_SA_COMP_MASK(11) 123#define IB_SA_PATH_REC_REVERSIBLE IB_SA_COMP_MASK(11)
124#define IB_SA_PATH_REC_NUMB_PATH IB_SA_COMP_MASK(12) 124#define IB_SA_PATH_REC_NUMB_PATH IB_SA_COMP_MASK(12)
125#define IB_SA_PATH_REC_PKEY IB_SA_COMP_MASK(13) 125#define IB_SA_PATH_REC_PKEY IB_SA_COMP_MASK(13)
126/* reserved: 14 */ 126#define IB_SA_PATH_REC_QOS_CLASS IB_SA_COMP_MASK(14)
127#define IB_SA_PATH_REC_SL IB_SA_COMP_MASK(15) 127#define IB_SA_PATH_REC_SL IB_SA_COMP_MASK(15)
128#define IB_SA_PATH_REC_MTU_SELECTOR IB_SA_COMP_MASK(16) 128#define IB_SA_PATH_REC_MTU_SELECTOR IB_SA_COMP_MASK(16)
129#define IB_SA_PATH_REC_MTU IB_SA_COMP_MASK(17) 129#define IB_SA_PATH_REC_MTU IB_SA_COMP_MASK(17)
@@ -134,8 +134,7 @@ enum ib_sa_selector {
134#define IB_SA_PATH_REC_PREFERENCE IB_SA_COMP_MASK(22) 134#define IB_SA_PATH_REC_PREFERENCE IB_SA_COMP_MASK(22)
135 135
136struct ib_sa_path_rec { 136struct ib_sa_path_rec {
137 /* reserved */ 137 __be64 service_id;
138 /* reserved */
139 union ib_gid dgid; 138 union ib_gid dgid;
140 union ib_gid sgid; 139 union ib_gid sgid;
141 __be16 dlid; 140 __be16 dlid;
@@ -148,7 +147,7 @@ struct ib_sa_path_rec {
148 int reversible; 147 int reversible;
149 u8 numb_path; 148 u8 numb_path;
150 __be16 pkey; 149 __be16 pkey;
151 /* reserved */ 150 __be16 qos_class;
152 u8 sl; 151 u8 sl;
153 u8 mtu_selector; 152 u8 mtu_selector;
154 u8 mtu; 153 u8 mtu;
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index c533d6c7903f..22298423cf0b 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -45,6 +45,7 @@ struct ib_umem {
45 int offset; 45 int offset;
46 int page_size; 46 int page_size;
47 int writable; 47 int writable;
48 int hugetlb;
48 struct list_head chunk_list; 49 struct list_head chunk_list;
49 struct work_struct work; 50 struct work_struct work;
50 struct mm_struct *mm; 51 struct mm_struct *mm;
diff --git a/include/rdma/ib_user_mad.h b/include/rdma/ib_user_mad.h
index d66b15ea82c4..29d2c7205a90 100644
--- a/include/rdma/ib_user_mad.h
+++ b/include/rdma/ib_user_mad.h
@@ -52,7 +52,50 @@
52 */ 52 */
53 53
54/** 54/**
55 * ib_user_mad_hdr_old - Old version of MAD packet header without pkey_index
56 * @id - ID of agent MAD received with/to be sent with
57 * @status - 0 on successful receive, ETIMEDOUT if no response
58 * received (transaction ID in data[] will be set to TID of original
59 * request) (ignored on send)
60 * @timeout_ms - Milliseconds to wait for response (unset on receive)
61 * @retries - Number of automatic retries to attempt
62 * @qpn - Remote QP number received from/to be sent to
63 * @qkey - Remote Q_Key to be sent with (unset on receive)
64 * @lid - Remote lid received from/to be sent to
65 * @sl - Service level received with/to be sent with
66 * @path_bits - Local path bits received with/to be sent with
67 * @grh_present - If set, GRH was received/should be sent
68 * @gid_index - Local GID index to send with (unset on receive)
69 * @hop_limit - Hop limit in GRH
70 * @traffic_class - Traffic class in GRH
71 * @gid - Remote GID in GRH
72 * @flow_label - Flow label in GRH
73 */
74struct ib_user_mad_hdr_old {
75 __u32 id;
76 __u32 status;
77 __u32 timeout_ms;
78 __u32 retries;
79 __u32 length;
80 __be32 qpn;
81 __be32 qkey;
82 __be16 lid;
83 __u8 sl;
84 __u8 path_bits;
85 __u8 grh_present;
86 __u8 gid_index;
87 __u8 hop_limit;
88 __u8 traffic_class;
89 __u8 gid[16];
90 __be32 flow_label;
91};
92
93/**
55 * ib_user_mad_hdr - MAD packet header 94 * ib_user_mad_hdr - MAD packet header
95 * This layout allows specifying/receiving the P_Key index. To use
96 * this capability, an application must call the
97 * IB_USER_MAD_ENABLE_PKEY ioctl on the user MAD file handle before
98 * any other actions with the file handle.
56 * @id - ID of agent MAD received with/to be sent with 99 * @id - ID of agent MAD received with/to be sent with
57 * @status - 0 on successful receive, ETIMEDOUT if no response 100 * @status - 0 on successful receive, ETIMEDOUT if no response
58 * received (transaction ID in data[] will be set to TID of original 101 * received (transaction ID in data[] will be set to TID of original
@@ -70,6 +113,7 @@
70 * @traffic_class - Traffic class in GRH 113 * @traffic_class - Traffic class in GRH
71 * @gid - Remote GID in GRH 114 * @gid - Remote GID in GRH
72 * @flow_label - Flow label in GRH 115 * @flow_label - Flow label in GRH
116 * @pkey_index - P_Key index
73 */ 117 */
74struct ib_user_mad_hdr { 118struct ib_user_mad_hdr {
75 __u32 id; 119 __u32 id;
@@ -88,6 +132,8 @@ struct ib_user_mad_hdr {
88 __u8 traffic_class; 132 __u8 traffic_class;
89 __u8 gid[16]; 133 __u8 gid[16];
90 __be32 flow_label; 134 __be32 flow_label;
135 __u16 pkey_index;
136 __u8 reserved[6];
91}; 137};
92 138
93/** 139/**
@@ -101,6 +147,26 @@ struct ib_user_mad {
101 __u64 data[0]; 147 __u64 data[0];
102}; 148};
103 149
150/*
151 * Earlier versions of this interface definition declared the
152 * method_mask[] member as an array of __u32 but treated it as a
153 * bitmap made up of longs in the kernel. This ambiguity meant that
154 * 32-bit big-endian applications that can run on both 32-bit and
155 * 64-bit kernels had no consistent ABI to rely on, and 64-bit
156 * big-endian applications that treated method_mask as being made up
157 * of 32-bit words would have their bitmap misinterpreted.
158 *
159 * To clear up this confusion, we change the declaration of
160 * method_mask[] to use unsigned long and handle the conversion from
161 * 32-bit userspace to 64-bit kernel for big-endian systems in the
162 * compat_ioctl method. Unfortunately, to keep the structure layout
163 * the same, we need the method_mask[] array to be aligned only to 4
164 * bytes even when long is 64 bits, which forces us into this ugly
165 * typedef.
166 */
167typedef unsigned long __attribute__((aligned(4))) packed_ulong;
168#define IB_USER_MAD_LONGS_PER_METHOD_MASK (128 / (8 * sizeof (long)))
169
104/** 170/**
105 * ib_user_mad_reg_req - MAD registration request 171 * ib_user_mad_reg_req - MAD registration request
106 * @id - Set by the kernel; used to identify agent in future requests. 172 * @id - Set by the kernel; used to identify agent in future requests.
@@ -119,7 +185,7 @@ struct ib_user_mad {
119 */ 185 */
120struct ib_user_mad_reg_req { 186struct ib_user_mad_reg_req {
121 __u32 id; 187 __u32 id;
122 __u32 method_mask[4]; 188 packed_ulong method_mask[IB_USER_MAD_LONGS_PER_METHOD_MASK];
123 __u8 qpn; 189 __u8 qpn;
124 __u8 mgmt_class; 190 __u8 mgmt_class;
125 __u8 mgmt_class_version; 191 __u8 mgmt_class_version;
@@ -134,4 +200,6 @@ struct ib_user_mad_reg_req {
134 200
135#define IB_USER_MAD_UNREGISTER_AGENT _IOW(IB_IOCTL_MAGIC, 2, __u32) 201#define IB_USER_MAD_UNREGISTER_AGENT _IOW(IB_IOCTL_MAGIC, 2, __u32)
136 202
203#define IB_USER_MAD_ENABLE_PKEY _IO(IB_IOCTL_MAGIC, 3)
204
137#endif /* IB_USER_MAD_H */ 205#endif /* IB_USER_MAD_H */
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 2d6a7705eae7..010f876f41d8 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -314,4 +314,18 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
314 */ 314 */
315void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr); 315void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr);
316 316
317/**
318 * rdma_set_service_type - Set the type of service associated with a
319 * connection identifier.
320 * @id: Communication identifier to associated with service type.
321 * @tos: Type of service.
322 *
323 * The type of service is interpretted as a differentiated service
324 * field (RFC 2474). The service type should be specified before
325 * performing route resolution, as existing communication on the
326 * connection identifier may be unaffected. The type of service
327 * requested may not be supported by the network to all destinations.
328 */
329void rdma_set_service_type(struct rdma_cm_id *id, int tos);
330
317#endif /* RDMA_CM_H */ 331#endif /* RDMA_CM_H */
diff --git a/include/rdma/rdma_user_cm.h b/include/rdma/rdma_user_cm.h
index f632b0c007c9..9749c1b34d00 100644
--- a/include/rdma/rdma_user_cm.h
+++ b/include/rdma/rdma_user_cm.h
@@ -212,4 +212,22 @@ struct rdma_ucm_event_resp {
212 } param; 212 } param;
213}; 213};
214 214
215/* Option levels */
216enum {
217 RDMA_OPTION_ID = 0
218};
219
220/* Option details */
221enum {
222 RDMA_OPTION_ID_TOS = 0
223};
224
225struct rdma_ucm_set_option {
226 __u64 optval;
227 __u32 id;
228 __u32 level;
229 __u32 optname;
230 __u32 optlen;
231};
232
215#endif /* RDMA_USER_CM_H */ 233#endif /* RDMA_USER_CM_H */