diff options
| -rw-r--r-- | drivers/infiniband/hw/mlx4/qp.c | 62 |
1 files changed, 49 insertions, 13 deletions
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index ba0428d872aa..85c51bdc36f1 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c | |||
| @@ -1211,12 +1211,42 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, | |||
| 1211 | dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); | 1211 | dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); |
| 1212 | } | 1212 | } |
| 1213 | 1213 | ||
| 1214 | static void set_data_seg(struct mlx4_wqe_data_seg *dseg, | 1214 | static void set_mlx_icrc_seg(void *dseg) |
| 1215 | struct ib_sge *sg) | 1215 | { |
| 1216 | u32 *t = dseg; | ||
| 1217 | struct mlx4_wqe_inline_seg *iseg = dseg; | ||
| 1218 | |||
| 1219 | t[1] = 0; | ||
| 1220 | |||
| 1221 | /* | ||
| 1222 | * Need a barrier here before writing the byte_count field to | ||
| 1223 | * make sure that all the data is visible before the | ||
| 1224 | * byte_count field is set. Otherwise, if the segment begins | ||
| 1225 | * a new cacheline, the HCA prefetcher could grab the 64-byte | ||
| 1226 | * chunk and get a valid (!= * 0xffffffff) byte count but | ||
| 1227 | * stale data, and end up sending the wrong data. | ||
| 1228 | */ | ||
| 1229 | wmb(); | ||
| 1230 | |||
| 1231 | iseg->byte_count = cpu_to_be32((1 << 31) | 4); | ||
| 1232 | } | ||
| 1233 | |||
| 1234 | static void set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) | ||
| 1216 | { | 1235 | { |
| 1217 | dseg->byte_count = cpu_to_be32(sg->length); | ||
| 1218 | dseg->lkey = cpu_to_be32(sg->lkey); | 1236 | dseg->lkey = cpu_to_be32(sg->lkey); |
| 1219 | dseg->addr = cpu_to_be64(sg->addr); | 1237 | dseg->addr = cpu_to_be64(sg->addr); |
| 1238 | |||
| 1239 | /* | ||
| 1240 | * Need a barrier here before writing the byte_count field to | ||
| 1241 | * make sure that all the data is visible before the | ||
| 1242 | * byte_count field is set. Otherwise, if the segment begins | ||
| 1243 | * a new cacheline, the HCA prefetcher could grab the 64-byte | ||
| 1244 | * chunk and get a valid (!= * 0xffffffff) byte count but | ||
| 1245 | * stale data, and end up sending the wrong data. | ||
| 1246 | */ | ||
| 1247 | wmb(); | ||
| 1248 | |||
| 1249 | dseg->byte_count = cpu_to_be32(sg->length); | ||
| 1220 | } | 1250 | } |
| 1221 | 1251 | ||
| 1222 | int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | 1252 | int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, |
| @@ -1225,6 +1255,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | |||
| 1225 | struct mlx4_ib_qp *qp = to_mqp(ibqp); | 1255 | struct mlx4_ib_qp *qp = to_mqp(ibqp); |
| 1226 | void *wqe; | 1256 | void *wqe; |
| 1227 | struct mlx4_wqe_ctrl_seg *ctrl; | 1257 | struct mlx4_wqe_ctrl_seg *ctrl; |
| 1258 | struct mlx4_wqe_data_seg *dseg; | ||
| 1228 | unsigned long flags; | 1259 | unsigned long flags; |
| 1229 | int nreq; | 1260 | int nreq; |
| 1230 | int err = 0; | 1261 | int err = 0; |
| @@ -1324,22 +1355,27 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | |||
| 1324 | break; | 1355 | break; |
| 1325 | } | 1356 | } |
| 1326 | 1357 | ||
| 1327 | for (i = 0; i < wr->num_sge; ++i) { | 1358 | /* |
| 1328 | set_data_seg(wqe, wr->sg_list + i); | 1359 | * Write data segments in reverse order, so as to |
| 1360 | * overwrite cacheline stamp last within each | ||
| 1361 | * cacheline. This avoids issues with WQE | ||
| 1362 | * prefetching. | ||
| 1363 | */ | ||
| 1329 | 1364 | ||
| 1330 | wqe += sizeof (struct mlx4_wqe_data_seg); | 1365 | dseg = wqe; |
| 1331 | size += sizeof (struct mlx4_wqe_data_seg) / 16; | 1366 | dseg += wr->num_sge - 1; |
| 1332 | } | 1367 | size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16); |
| 1333 | 1368 | ||
| 1334 | /* Add one more inline data segment for ICRC for MLX sends */ | 1369 | /* Add one more inline data segment for ICRC for MLX sends */ |
| 1335 | if (qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) { | 1370 | if (unlikely(qp->ibqp.qp_type == IB_QPT_SMI || |
| 1336 | ((struct mlx4_wqe_inline_seg *) wqe)->byte_count = | 1371 | qp->ibqp.qp_type == IB_QPT_GSI)) { |
| 1337 | cpu_to_be32((1 << 31) | 4); | 1372 | set_mlx_icrc_seg(dseg + 1); |
| 1338 | ((u32 *) wqe)[1] = 0; | ||
| 1339 | wqe += sizeof (struct mlx4_wqe_data_seg); | ||
| 1340 | size += sizeof (struct mlx4_wqe_data_seg) / 16; | 1373 | size += sizeof (struct mlx4_wqe_data_seg) / 16; |
| 1341 | } | 1374 | } |
| 1342 | 1375 | ||
| 1376 | for (i = wr->num_sge - 1; i >= 0; --i, --dseg) | ||
| 1377 | set_data_seg(dseg, wr->sg_list + i); | ||
| 1378 | |||
| 1343 | ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ? | 1379 | ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ? |
| 1344 | MLX4_WQE_CTRL_FENCE : 0) | size; | 1380 | MLX4_WQE_CTRL_FENCE : 0) | size; |
| 1345 | 1381 | ||
